• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
6
7;
8; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
9;
10
11define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
12; FMA-LABEL: test_f32_fmadd:
13; FMA:       # BB#0:
14; FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
15; FMA-NEXT:    retq
16;
17; FMA4-LABEL: test_f32_fmadd:
18; FMA4:       # BB#0:
19; FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
20; FMA4-NEXT:    retq
21;
22; AVX512-LABEL: test_f32_fmadd:
23; AVX512:       # BB#0:
24; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
25; AVX512-NEXT:    vmovaps %zmm1, %zmm0
26; AVX512-NEXT:    retq
27  %x = fmul float %a0, %a1
28  %res = fadd float %x, %a2
29  ret float %res
30}
31
32define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
33; FMA-LABEL: test_4f32_fmadd:
34; FMA:       # BB#0:
35; FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
36; FMA-NEXT:    retq
37;
38; FMA4-LABEL: test_4f32_fmadd:
39; FMA4:       # BB#0:
40; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
41; FMA4-NEXT:    retq
42;
43; AVX512-LABEL: test_4f32_fmadd:
44; AVX512:       # BB#0:
45; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
46; AVX512-NEXT:    retq
47  %x = fmul <4 x float> %a0, %a1
48  %res = fadd <4 x float> %x, %a2
49  ret <4 x float> %res
50}
51
52define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
53; FMA-LABEL: test_8f32_fmadd:
54; FMA:       # BB#0:
55; FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
56; FMA-NEXT:    retq
57;
58; FMA4-LABEL: test_8f32_fmadd:
59; FMA4:       # BB#0:
60; FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
61; FMA4-NEXT:    retq
62;
63; AVX512-LABEL: test_8f32_fmadd:
64; AVX512:       # BB#0:
65; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
66; AVX512-NEXT:    retq
67  %x = fmul <8 x float> %a0, %a1
68  %res = fadd <8 x float> %x, %a2
69  ret <8 x float> %res
70}
71
72define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
73; FMA-LABEL: test_f64_fmadd:
74; FMA:       # BB#0:
75; FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
76; FMA-NEXT:    retq
77;
78; FMA4-LABEL: test_f64_fmadd:
79; FMA4:       # BB#0:
80; FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
81; FMA4-NEXT:    retq
82;
83; AVX512-LABEL: test_f64_fmadd:
84; AVX512:       # BB#0:
85; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
86; AVX512-NEXT:    vmovaps %zmm1, %zmm0
87; AVX512-NEXT:    retq
88  %x = fmul double %a0, %a1
89  %res = fadd double %x, %a2
90  ret double %res
91}
92
93define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
94; FMA-LABEL: test_2f64_fmadd:
95; FMA:       # BB#0:
96; FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
97; FMA-NEXT:    retq
98;
99; FMA4-LABEL: test_2f64_fmadd:
100; FMA4:       # BB#0:
101; FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
102; FMA4-NEXT:    retq
103;
104; AVX512-LABEL: test_2f64_fmadd:
105; AVX512:       # BB#0:
106; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
107; AVX512-NEXT:    retq
108  %x = fmul <2 x double> %a0, %a1
109  %res = fadd <2 x double> %x, %a2
110  ret <2 x double> %res
111}
112
113define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
114; FMA-LABEL: test_4f64_fmadd:
115; FMA:       # BB#0:
116; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
117; FMA-NEXT:    retq
118;
119; FMA4-LABEL: test_4f64_fmadd:
120; FMA4:       # BB#0:
121; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
122; FMA4-NEXT:    retq
123;
124; AVX512-LABEL: test_4f64_fmadd:
125; AVX512:       # BB#0:
126; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
127; AVX512-NEXT:    retq
128  %x = fmul <4 x double> %a0, %a1
129  %res = fadd <4 x double> %x, %a2
130  ret <4 x double> %res
131}
132
133;
134; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
135;
136
137define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
138; FMA-LABEL: test_f32_fmsub:
139; FMA:       # BB#0:
140; FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
141; FMA-NEXT:    retq
142;
143; FMA4-LABEL: test_f32_fmsub:
144; FMA4:       # BB#0:
145; FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
146; FMA4-NEXT:    retq
147;
148; AVX512-LABEL: test_f32_fmsub:
149; AVX512:       # BB#0:
150; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
151; AVX512-NEXT:    vmovaps %zmm1, %zmm0
152; AVX512-NEXT:    retq
153  %x = fmul float %a0, %a1
154  %res = fsub float %x, %a2
155  ret float %res
156}
157
158define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
159; FMA-LABEL: test_4f32_fmsub:
160; FMA:       # BB#0:
161; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
162; FMA-NEXT:    retq
163;
164; FMA4-LABEL: test_4f32_fmsub:
165; FMA4:       # BB#0:
166; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
167; FMA4-NEXT:    retq
168;
169; AVX512-LABEL: test_4f32_fmsub:
170; AVX512:       # BB#0:
171; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
172; AVX512-NEXT:    retq
173  %x = fmul <4 x float> %a0, %a1
174  %res = fsub <4 x float> %x, %a2
175  ret <4 x float> %res
176}
177
178define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
179; FMA-LABEL: test_8f32_fmsub:
180; FMA:       # BB#0:
181; FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
182; FMA-NEXT:    retq
183;
184; FMA4-LABEL: test_8f32_fmsub:
185; FMA4:       # BB#0:
186; FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
187; FMA4-NEXT:    retq
188;
189; AVX512-LABEL: test_8f32_fmsub:
190; AVX512:       # BB#0:
191; AVX512-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
192; AVX512-NEXT:    retq
193  %x = fmul <8 x float> %a0, %a1
194  %res = fsub <8 x float> %x, %a2
195  ret <8 x float> %res
196}
197
198define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
199; FMA-LABEL: test_f64_fmsub:
200; FMA:       # BB#0:
201; FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
202; FMA-NEXT:    retq
203;
204; FMA4-LABEL: test_f64_fmsub:
205; FMA4:       # BB#0:
206; FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
207; FMA4-NEXT:    retq
208;
209; AVX512-LABEL: test_f64_fmsub:
210; AVX512:       # BB#0:
211; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
212; AVX512-NEXT:    vmovaps %zmm1, %zmm0
213; AVX512-NEXT:    retq
214  %x = fmul double %a0, %a1
215  %res = fsub double %x, %a2
216  ret double %res
217}
218
219define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
220; FMA-LABEL: test_2f64_fmsub:
221; FMA:       # BB#0:
222; FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
223; FMA-NEXT:    retq
224;
225; FMA4-LABEL: test_2f64_fmsub:
226; FMA4:       # BB#0:
227; FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
228; FMA4-NEXT:    retq
229;
230; AVX512-LABEL: test_2f64_fmsub:
231; AVX512:       # BB#0:
232; AVX512-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
233; AVX512-NEXT:    retq
234  %x = fmul <2 x double> %a0, %a1
235  %res = fsub <2 x double> %x, %a2
236  ret <2 x double> %res
237}
238
239define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
240; FMA-LABEL: test_4f64_fmsub:
241; FMA:       # BB#0:
242; FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
243; FMA-NEXT:    retq
244;
245; FMA4-LABEL: test_4f64_fmsub:
246; FMA4:       # BB#0:
247; FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
248; FMA4-NEXT:    retq
249;
250; AVX512-LABEL: test_4f64_fmsub:
251; AVX512:       # BB#0:
252; AVX512-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
253; AVX512-NEXT:    retq
254  %x = fmul <4 x double> %a0, %a1
255  %res = fsub <4 x double> %x, %a2
256  ret <4 x double> %res
257}
258
259;
260; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
261;
262
263define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
264; FMA-LABEL: test_f32_fnmadd:
265; FMA:       # BB#0:
266; FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
267; FMA-NEXT:    retq
268;
269; FMA4-LABEL: test_f32_fnmadd:
270; FMA4:       # BB#0:
271; FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
272; FMA4-NEXT:    retq
273;
274; AVX512-LABEL: test_f32_fnmadd:
275; AVX512:       # BB#0:
276; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
277; AVX512-NEXT:    vmovaps %zmm1, %zmm0
278; AVX512-NEXT:    retq
279  %x = fmul float %a0, %a1
280  %res = fsub float %a2, %x
281  ret float %res
282}
283
284define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
285; FMA-LABEL: test_4f32_fnmadd:
286; FMA:       # BB#0:
287; FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
288; FMA-NEXT:    retq
289;
290; FMA4-LABEL: test_4f32_fnmadd:
291; FMA4:       # BB#0:
292; FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
293; FMA4-NEXT:    retq
294;
295; AVX512-LABEL: test_4f32_fnmadd:
296; AVX512:       # BB#0:
297; AVX512-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
298; AVX512-NEXT:    retq
299  %x = fmul <4 x float> %a0, %a1
300  %res = fsub <4 x float> %a2, %x
301  ret <4 x float> %res
302}
303
304define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
305; FMA-LABEL: test_8f32_fnmadd:
306; FMA:       # BB#0:
307; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
308; FMA-NEXT:    retq
309;
310; FMA4-LABEL: test_8f32_fnmadd:
311; FMA4:       # BB#0:
312; FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
313; FMA4-NEXT:    retq
314;
315; AVX512-LABEL: test_8f32_fnmadd:
316; AVX512:       # BB#0:
317; AVX512-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
318; AVX512-NEXT:    retq
319  %x = fmul <8 x float> %a0, %a1
320  %res = fsub <8 x float> %a2, %x
321  ret <8 x float> %res
322}
323
324define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
325; FMA-LABEL: test_f64_fnmadd:
326; FMA:       # BB#0:
327; FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
328; FMA-NEXT:    retq
329;
330; FMA4-LABEL: test_f64_fnmadd:
331; FMA4:       # BB#0:
332; FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
333; FMA4-NEXT:    retq
334;
335; AVX512-LABEL: test_f64_fnmadd:
336; AVX512:       # BB#0:
337; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
338; AVX512-NEXT:    vmovaps %zmm1, %zmm0
339; AVX512-NEXT:    retq
340  %x = fmul double %a0, %a1
341  %res = fsub double %a2, %x
342  ret double %res
343}
344
345define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
346; FMA-LABEL: test_2f64_fnmadd:
347; FMA:       # BB#0:
348; FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
349; FMA-NEXT:    retq
350;
351; FMA4-LABEL: test_2f64_fnmadd:
352; FMA4:       # BB#0:
353; FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
354; FMA4-NEXT:    retq
355;
356; AVX512-LABEL: test_2f64_fnmadd:
357; AVX512:       # BB#0:
358; AVX512-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
359; AVX512-NEXT:    retq
360  %x = fmul <2 x double> %a0, %a1
361  %res = fsub <2 x double> %a2, %x
362  ret <2 x double> %res
363}
364
365define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
366; FMA-LABEL: test_4f64_fnmadd:
367; FMA:       # BB#0:
368; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
369; FMA-NEXT:    retq
370;
371; FMA4-LABEL: test_4f64_fnmadd:
372; FMA4:       # BB#0:
373; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
374; FMA4-NEXT:    retq
375;
376; AVX512-LABEL: test_4f64_fnmadd:
377; AVX512:       # BB#0:
378; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
379; AVX512-NEXT:    retq
380  %x = fmul <4 x double> %a0, %a1
381  %res = fsub <4 x double> %a2, %x
382  ret <4 x double> %res
383}
384
385;
386; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
387;
388
389define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
390; FMA-LABEL: test_f32_fnmsub:
391; FMA:       # BB#0:
392; FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
393; FMA-NEXT:    retq
394;
395; FMA4-LABEL: test_f32_fnmsub:
396; FMA4:       # BB#0:
397; FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
398; FMA4-NEXT:    retq
399;
400; AVX512-LABEL: test_f32_fnmsub:
401; AVX512:       # BB#0:
402; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
403; AVX512-NEXT:    vmovaps %zmm1, %zmm0
404; AVX512-NEXT:    retq
405  %x = fmul float %a0, %a1
406  %y = fsub float -0.000000e+00, %x
407  %res = fsub float %y, %a2
408  ret float %res
409}
410
411define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
412; FMA-LABEL: test_4f32_fnmsub:
413; FMA:       # BB#0:
414; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
415; FMA-NEXT:    retq
416;
417; FMA4-LABEL: test_4f32_fnmsub:
418; FMA4:       # BB#0:
419; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
420; FMA4-NEXT:    retq
421;
422; AVX512-LABEL: test_4f32_fnmsub:
423; AVX512:       # BB#0:
424; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
425; AVX512-NEXT:    retq
426  %x = fmul <4 x float> %a0, %a1
427  %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
428  %res = fsub <4 x float> %y, %a2
429  ret <4 x float> %res
430}
431
432define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
433; FMA-LABEL: test_8f32_fnmsub:
434; FMA:       # BB#0:
435; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
436; FMA-NEXT:    retq
437;
438; FMA4-LABEL: test_8f32_fnmsub:
439; FMA4:       # BB#0:
440; FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
441; FMA4-NEXT:    retq
442;
443; AVX512-LABEL: test_8f32_fnmsub:
444; AVX512:       # BB#0:
445; AVX512-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
446; AVX512-NEXT:    retq
447  %x = fmul <8 x float> %a0, %a1
448  %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
449  %res = fsub <8 x float> %y, %a2
450  ret <8 x float> %res
451}
452
453define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
454; FMA-LABEL: test_f64_fnmsub:
455; FMA:       # BB#0:
456; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
457; FMA-NEXT:    retq
458;
459; FMA4-LABEL: test_f64_fnmsub:
460; FMA4:       # BB#0:
461; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
462; FMA4-NEXT:    retq
463;
464; AVX512-LABEL: test_f64_fnmsub:
465; AVX512:       # BB#0:
466; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
467; AVX512-NEXT:    vmovaps %zmm1, %zmm0
468; AVX512-NEXT:    retq
469  %x = fmul double %a0, %a1
470  %y = fsub double -0.000000e+00, %x
471  %res = fsub double %y, %a2
472  ret double %res
473}
474
475define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
476; FMA-LABEL: test_2f64_fnmsub:
477; FMA:       # BB#0:
478; FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
479; FMA-NEXT:    retq
480;
481; FMA4-LABEL: test_2f64_fnmsub:
482; FMA4:       # BB#0:
483; FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
484; FMA4-NEXT:    retq
485;
486; AVX512-LABEL: test_2f64_fnmsub:
487; AVX512:       # BB#0:
488; AVX512-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
489; AVX512-NEXT:    retq
490  %x = fmul <2 x double> %a0, %a1
491  %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
492  %res = fsub <2 x double> %y, %a2
493  ret <2 x double> %res
494}
495
496define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
497; FMA-LABEL: test_4f64_fnmsub:
498; FMA:       # BB#0:
499; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
500; FMA-NEXT:    retq
501;
502; FMA4-LABEL: test_4f64_fnmsub:
503; FMA4:       # BB#0:
504; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
505; FMA4-NEXT:    retq
506;
507; AVX512-LABEL: test_4f64_fnmsub:
508; AVX512:       # BB#0:
509; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
510; AVX512-NEXT:    retq
511  %x = fmul <4 x double> %a0, %a1
512  %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
513  %res = fsub <4 x double> %y, %a2
514  ret <4 x double> %res
515}
516
517;
518; Load Folding Patterns
519;
520
521define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
522; FMA-LABEL: test_4f32_fmadd_load:
523; FMA:       # BB#0:
524; FMA-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
525; FMA-NEXT:    retq
526;
527; FMA4-LABEL: test_4f32_fmadd_load:
528; FMA4:       # BB#0:
529; FMA4-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
530; FMA4-NEXT:    retq
531;
532; AVX512-LABEL: test_4f32_fmadd_load:
533; AVX512:       # BB#0:
534; AVX512-NEXT:    vmovaps (%rdi), %xmm2
535; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm0, %xmm2
536; AVX512-NEXT:    vmovaps %zmm2, %zmm0
537; AVX512-NEXT:    retq
538  %x = load <4 x float>, <4 x float>* %a0
539  %y = fmul <4 x float> %x, %a1
540  %res = fadd <4 x float> %y, %a2
541  ret <4 x float> %res
542}
543
544define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
545; FMA-LABEL: test_2f64_fmsub_load:
546; FMA:       # BB#0:
547; FMA-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
548; FMA-NEXT:    retq
549;
550; FMA4-LABEL: test_2f64_fmsub_load:
551; FMA4:       # BB#0:
552; FMA4-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
553; FMA4-NEXT:    retq
554;
555; AVX512-LABEL: test_2f64_fmsub_load:
556; AVX512:       # BB#0:
557; AVX512-NEXT:    vmovapd (%rdi), %xmm2
558; AVX512-NEXT:    vfmsub213pd %xmm1, %xmm0, %xmm2
559; AVX512-NEXT:    vmovaps %zmm2, %zmm0
560; AVX512-NEXT:    retq
561  %x = load <2 x double>, <2 x double>* %a0
562  %y = fmul <2 x double> %x, %a1
563  %res = fsub <2 x double> %y, %a2
564  ret <2 x double> %res
565}
566
567;
568; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
569;
570
571define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
572; FMA-LABEL: test_v4f32_mul_add_x_one_y:
573; FMA:       # BB#0:
574; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
575; FMA-NEXT:    retq
576;
577; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
578; FMA4:       # BB#0:
579; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
580; FMA4-NEXT:    retq
581;
582; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
583; AVX512:       # BB#0:
584; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
585; AVX512-NEXT:    retq
586  %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
587  %m = fmul <4 x float> %a, %y
588  ret <4 x float> %m
589}
590
591define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
592; FMA-LABEL: test_v4f32_mul_y_add_x_one:
593; FMA:       # BB#0:
594; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
595; FMA-NEXT:    retq
596;
597; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
598; FMA4:       # BB#0:
599; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
600; FMA4-NEXT:    retq
601;
602; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
603; AVX512:       # BB#0:
604; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
605; AVX512-NEXT:    retq
606  %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
607  %m = fmul <4 x float> %y, %a
608  ret <4 x float> %m
609}
610
611define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
612; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
613; FMA:       # BB#0:
614; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
615; FMA-NEXT:    retq
616;
617; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
618; FMA4:       # BB#0:
619; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
620; FMA4-NEXT:    retq
621;
622; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
623; AVX512:       # BB#0:
624; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
625; AVX512-NEXT:    retq
626  %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
627  %m = fmul <4 x float> %a, %y
628  ret <4 x float> %m
629}
630
631define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
632; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
633; FMA:       # BB#0:
634; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
635; FMA-NEXT:    retq
636;
637; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
638; FMA4:       # BB#0:
639; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
640; FMA4-NEXT:    retq
641;
642; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
643; AVX512:       # BB#0:
644; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
645; AVX512-NEXT:    retq
646  %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
647  %m = fmul <4 x float> %y, %a
648  ret <4 x float> %m
649}
650
651define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
652; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
653; FMA:       # BB#0:
654; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
655; FMA-NEXT:    retq
656;
657; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
658; FMA4:       # BB#0:
659; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
660; FMA4-NEXT:    retq
661;
662; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
663; AVX512:       # BB#0:
664; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
665; AVX512-NEXT:    retq
666  %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
667  %m = fmul <4 x float> %s, %y
668  ret <4 x float> %m
669}
670
671define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
672; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
673; FMA:       # BB#0:
674; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
675; FMA-NEXT:    retq
676;
677; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
678; FMA4:       # BB#0:
679; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
680; FMA4-NEXT:    retq
681;
682; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
683; AVX512:       # BB#0:
684; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
685; AVX512-NEXT:    retq
686  %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
687  %m = fmul <4 x float> %y, %s
688  ret <4 x float> %m
689}
690
691define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
692; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
693; FMA:       # BB#0:
694; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
695; FMA-NEXT:    retq
696;
697; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
698; FMA4:       # BB#0:
699; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
700; FMA4-NEXT:    retq
701;
702; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
703; AVX512:       # BB#0:
704; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
705; AVX512-NEXT:    retq
706  %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
707  %m = fmul <4 x float> %s, %y
708  ret <4 x float> %m
709}
710
711define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
712; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
713; FMA:       # BB#0:
714; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
715; FMA-NEXT:    retq
716;
717; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
718; FMA4:       # BB#0:
719; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
720; FMA4-NEXT:    retq
721;
722; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
723; AVX512:       # BB#0:
724; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
725; AVX512-NEXT:    retq
726  %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
727  %m = fmul <4 x float> %y, %s
728  ret <4 x float> %m
729}
730
731define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
732; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
733; FMA:       # BB#0:
734; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
735; FMA-NEXT:    retq
736;
737; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
738; FMA4:       # BB#0:
739; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
740; FMA4-NEXT:    retq
741;
742; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
743; AVX512:       # BB#0:
744; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
745; AVX512-NEXT:    retq
746  %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
747  %m = fmul <4 x float> %s, %y
748  ret <4 x float> %m
749}
750
751define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
752; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
753; FMA:       # BB#0:
754; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
755; FMA-NEXT:    retq
756;
757; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
758; FMA4:       # BB#0:
759; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
760; FMA4-NEXT:    retq
761;
762; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
763; AVX512:       # BB#0:
764; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
765; AVX512-NEXT:    retq
766  %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
767  %m = fmul <4 x float> %y, %s
768  ret <4 x float> %m
769}
770
771define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
772; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
773; FMA:       # BB#0:
774; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
775; FMA-NEXT:    retq
776;
777; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
778; FMA4:       # BB#0:
779; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
780; FMA4-NEXT:    retq
781;
782; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
783; AVX512:       # BB#0:
784; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
785; AVX512-NEXT:    retq
786  %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
787  %m = fmul <4 x float> %s, %y
788  ret <4 x float> %m
789}
790
791define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
792; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
793; FMA:       # BB#0:
794; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
795; FMA-NEXT:    retq
796;
797; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
798; FMA4:       # BB#0:
799; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
800; FMA4-NEXT:    retq
801;
802; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
803; AVX512:       # BB#0:
804; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
805; AVX512-NEXT:    retq
806  %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
807  %m = fmul <4 x float> %y, %s
808  ret <4 x float> %m
809}
810
811;
812; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
813;
814
815define float @test_f32_interp(float %x, float %y, float %t) {
816; FMA-LABEL: test_f32_interp:
817; FMA:       # BB#0:
818; FMA-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
819; FMA-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
820; FMA-NEXT:    retq
821;
822; FMA4-LABEL: test_f32_interp:
823; FMA4:       # BB#0:
824; FMA4-NEXT:    vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
825; FMA4-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
826; FMA4-NEXT:    retq
827;
828; AVX512-LABEL: test_f32_interp:
829; AVX512:       # BB#0:
830; AVX512-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
831; AVX512-NEXT:    vfmadd213ss %xmm1, %xmm0, %xmm2
832; AVX512-NEXT:    vmovaps %zmm2, %zmm0
833; AVX512-NEXT:    retq
834  %t1 = fsub float 1.0, %t
835  %tx = fmul float %x, %t
836  %ty = fmul float %y, %t1
837  %r = fadd float %tx, %ty
838  ret float %r
839}
840
841define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
842; FMA-LABEL: test_v4f32_interp:
843; FMA:       # BB#0:
844; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
845; FMA-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
846; FMA-NEXT:    retq
847;
848; FMA4-LABEL: test_v4f32_interp:
849; FMA4:       # BB#0:
850; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
851; FMA4-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
852; FMA4-NEXT:    retq
853;
854; AVX512-LABEL: test_v4f32_interp:
855; AVX512:       # BB#0:
856; AVX512-NEXT:    vmovaps %zmm2, %zmm3
857; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm3
858; AVX512-NEXT:    vfmadd213ps %xmm3, %xmm2, %xmm0
859; AVX512-NEXT:    retq
860  %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
861  %tx = fmul <4 x float> %x, %t
862  %ty = fmul <4 x float> %y, %t1
863  %r = fadd <4 x float> %tx, %ty
864  ret <4 x float> %r
865}
866
867define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
868; FMA-LABEL: test_v8f32_interp:
869; FMA:       # BB#0:
870; FMA-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
871; FMA-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
872; FMA-NEXT:    retq
873;
874; FMA4-LABEL: test_v8f32_interp:
875; FMA4:       # BB#0:
876; FMA4-NEXT:    vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
877; FMA4-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
878; FMA4-NEXT:    retq
879;
880; AVX512-LABEL: test_v8f32_interp:
881; AVX512:       # BB#0:
882; AVX512-NEXT:    vmovaps %zmm2, %zmm3
883; AVX512-NEXT:    vfnmadd213ps %ymm1, %ymm1, %ymm3
884; AVX512-NEXT:    vfmadd213ps %ymm3, %ymm2, %ymm0
885; AVX512-NEXT:    retq
886  %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
887  %tx = fmul <8 x float> %x, %t
888  %ty = fmul <8 x float> %y, %t1
889  %r = fadd <8 x float> %tx, %ty
890  ret <8 x float> %r
891}
892
893define double @test_f64_interp(double %x, double %y, double %t) {
894; FMA-LABEL: test_f64_interp:
895; FMA:       # BB#0:
896; FMA-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
897; FMA-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
898; FMA-NEXT:    retq
899;
900; FMA4-LABEL: test_f64_interp:
901; FMA4:       # BB#0:
902; FMA4-NEXT:    vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
903; FMA4-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
904; FMA4-NEXT:    retq
905;
906; AVX512-LABEL: test_f64_interp:
907; AVX512:       # BB#0:
908; AVX512-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
909; AVX512-NEXT:    vfmadd213sd %xmm1, %xmm0, %xmm2
910; AVX512-NEXT:    vmovaps %zmm2, %zmm0
911; AVX512-NEXT:    retq
912  %t1 = fsub double 1.0, %t
913  %tx = fmul double %x, %t
914  %ty = fmul double %y, %t1
915  %r = fadd double %tx, %ty
916  ret double %r
917}
918
919define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
920; FMA-LABEL: test_v2f64_interp:
921; FMA:       # BB#0:
922; FMA-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
923; FMA-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
924; FMA-NEXT:    retq
925;
926; FMA4-LABEL: test_v2f64_interp:
927; FMA4:       # BB#0:
928; FMA4-NEXT:    vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
929; FMA4-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
930; FMA4-NEXT:    retq
931;
932; AVX512-LABEL: test_v2f64_interp:
933; AVX512:       # BB#0:
934; AVX512-NEXT:    vmovaps %zmm2, %zmm3
935; AVX512-NEXT:    vfnmadd213pd %xmm1, %xmm1, %xmm3
936; AVX512-NEXT:    vfmadd213pd %xmm3, %xmm2, %xmm0
937; AVX512-NEXT:    retq
938  %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
939  %tx = fmul <2 x double> %x, %t
940  %ty = fmul <2 x double> %y, %t1
941  %r = fadd <2 x double> %tx, %ty
942  ret <2 x double> %r
943}
944
945define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
946; FMA-LABEL: test_v4f64_interp:
947; FMA:       # BB#0:
948; FMA-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
949; FMA-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
950; FMA-NEXT:    retq
951;
952; FMA4-LABEL: test_v4f64_interp:
953; FMA4:       # BB#0:
954; FMA4-NEXT:    vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
955; FMA4-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
956; FMA4-NEXT:    retq
957;
958; AVX512-LABEL: test_v4f64_interp:
959; AVX512:       # BB#0:
960; AVX512-NEXT:    vmovaps %zmm2, %zmm3
961; AVX512-NEXT:    vfnmadd213pd %ymm1, %ymm1, %ymm3
962; AVX512-NEXT:    vfmadd213pd %ymm3, %ymm2, %ymm0
963; AVX512-NEXT:    retq
964  %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
965  %tx = fmul <4 x double> %x, %t
966  %ty = fmul <4 x double> %y, %t1
967  %r = fadd <4 x double> %tx, %ty
968  ret <4 x double> %r
969}
970
971;
972; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
973;
974
975define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
976; FMA-LABEL: test_v4f32_fneg_fmadd:
977; FMA:       # BB#0:
978; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
979; FMA-NEXT:    retq
980;
981; FMA4-LABEL: test_v4f32_fneg_fmadd:
982; FMA4:       # BB#0:
983; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
984; FMA4-NEXT:    retq
985;
986; AVX512-LABEL: test_v4f32_fneg_fmadd:
987; AVX512:       # BB#0:
988; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
989; AVX512-NEXT:    retq
990  %mul = fmul <4 x float> %a0, %a1
991  %add = fadd <4 x float> %mul, %a2
992  %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
993  ret <4 x float> %neg
994}
995
996define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
997; FMA-LABEL: test_v4f64_fneg_fmsub:
998; FMA:       # BB#0:
999; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
1000; FMA-NEXT:    retq
1001;
1002; FMA4-LABEL: test_v4f64_fneg_fmsub:
1003; FMA4:       # BB#0:
1004; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
1005; FMA4-NEXT:    retq
1006;
1007; AVX512-LABEL: test_v4f64_fneg_fmsub:
1008; AVX512:       # BB#0:
1009; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
1010; AVX512-NEXT:    retq
1011  %mul = fmul <4 x double> %a0, %a1
1012  %sub = fsub <4 x double> %mul, %a2
1013  %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1014  ret <4 x double> %neg
1015}
1016
1017define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1018; FMA-LABEL: test_v4f32_fneg_fnmadd:
1019; FMA:       # BB#0:
1020; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
1021; FMA-NEXT:    retq
1022;
1023; FMA4-LABEL: test_v4f32_fneg_fnmadd:
1024; FMA4:       # BB#0:
1025; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
1026; FMA4-NEXT:    retq
1027;
1028; AVX512-LABEL: test_v4f32_fneg_fnmadd:
1029; AVX512:       # BB#0:
1030; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
1031; AVX512-NEXT:    retq
1032  %mul = fmul <4 x float> %a0, %a1
1033  %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
1034  %add = fadd <4 x float> %neg0, %a2
1035  %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1036  ret <4 x float> %neg1
1037}
1038
1039define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1040; FMA-LABEL: test_v4f64_fneg_fnmsub:
1041; FMA:       # BB#0:
1042; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
1043; FMA-NEXT:    retq
1044;
1045; FMA4-LABEL: test_v4f64_fneg_fnmsub:
1046; FMA4:       # BB#0:
1047; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
1048; FMA4-NEXT:    retq
1049;
1050; AVX512-LABEL: test_v4f64_fneg_fnmsub:
1051; AVX512:       # BB#0:
1052; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
1053; AVX512-NEXT:    retq
1054  %mul = fmul <4 x double> %a0, %a1
1055  %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
1056  %sub = fsub <4 x double> %neg0, %a2
1057  %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1058  ret <4 x double> %neg1
1059}
1060
1061;
1062; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
1063;
1064
1065define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
1066; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1067; FMA:       # BB#0:
1068; FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
1069; FMA-NEXT:    retq
1070;
1071; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1072; FMA4:       # BB#0:
1073; FMA4-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
1074; FMA4-NEXT:    retq
1075;
1076; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1077; AVX512:       # BB#0:
1078; AVX512-NEXT:    vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1079; AVX512-NEXT:    retq
1080  %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
1081  %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
1082  %a  = fadd <4 x float> %m0, %m1
1083  ret <4 x float> %a
1084}
1085
1086;
1087; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
1088;
1089
1090define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
1091; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1092; FMA:       # BB#0:
1093; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
1094; FMA-NEXT:    retq
1095;
1096; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1097; FMA4:       # BB#0:
1098; FMA4-NEXT:    vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
1099; FMA4-NEXT:    retq
1100;
1101; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1102; AVX512:       # BB#0:
1103; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
1104; AVX512-NEXT:    vmovaps %zmm1, %zmm0
1105; AVX512-NEXT:    retq
1106  %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
1107  %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
1108  %a  = fadd <4 x float> %m1, %y
1109  ret <4 x float> %a
1110}
1111
1112; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
1113
1114define double @test_f64_fneg_fmul(double %x, double %y) #0 {
1115; FMA-LABEL: test_f64_fneg_fmul:
1116; FMA:       # BB#0:
1117; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1118; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
1119; FMA-NEXT:    retq
1120;
1121; FMA4-LABEL: test_f64_fneg_fmul:
1122; FMA4:       # BB#0:
1123; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1124; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
1125; FMA4-NEXT:    retq
1126;
1127; AVX512-LABEL: test_f64_fneg_fmul:
1128; AVX512:       # BB#0:
1129; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1130; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
1131; AVX512-NEXT:    vmovaps %zmm1, %zmm0
1132; AVX512-NEXT:    retq
1133  %m = fmul nsz double %x, %y
1134  %n = fsub double -0.0, %m
1135  ret double %n
1136}
1137
1138define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
1139; FMA-LABEL: test_v4f32_fneg_fmul:
1140; FMA:       # BB#0:
1141; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1142; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
1143; FMA-NEXT:    retq
1144;
1145; FMA4-LABEL: test_v4f32_fneg_fmul:
1146; FMA4:       # BB#0:
1147; FMA4-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1148; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
1149; FMA4-NEXT:    retq
1150;
1151; AVX512-LABEL: test_v4f32_fneg_fmul:
1152; AVX512:       # BB#0:
1153; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1154; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
1155; AVX512-NEXT:    retq
1156  %m = fmul nsz <4 x float> %x, %y
1157  %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
1158  ret <4 x float> %n
1159}
1160
1161define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
1162; FMA-LABEL: test_v4f64_fneg_fmul:
1163; FMA:       # BB#0:
1164; FMA-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
1165; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
1166; FMA-NEXT:    retq
1167;
1168; FMA4-LABEL: test_v4f64_fneg_fmul:
1169; FMA4:       # BB#0:
1170; FMA4-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
1171; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
1172; FMA4-NEXT:    retq
1173;
1174; AVX512-LABEL: test_v4f64_fneg_fmul:
1175; AVX512:       # BB#0:
1176; AVX512-NEXT:    vxorps %ymm2, %ymm2, %ymm2
1177; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
1178; AVX512-NEXT:    retq
1179  %m = fmul nsz <4 x double> %x, %y
1180  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1181  ret <4 x double> %n
1182}
1183
1184define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
1185; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
1186; ALL:       # BB#0:
1187; ALL-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
1188; ALL-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
1189; ALL-NEXT:    retq
1190  %m = fmul <4 x double> %x, %y
1191  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1192  ret <4 x double> %n
1193}
1194
1195attributes #0 = { "unsafe-fp-math"="true" }
1196