• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-INFS
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-INFS
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-NOINFS
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-NOINFS
10
11;
12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
13;
14
15define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
16; FMA-LABEL: test_f32_fmadd:
17; FMA:       # %bb.0:
18; FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
19; FMA-NEXT:    retq
20;
21; FMA4-LABEL: test_f32_fmadd:
22; FMA4:       # %bb.0:
23; FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
24; FMA4-NEXT:    retq
25;
26; AVX512-LABEL: test_f32_fmadd:
27; AVX512:       # %bb.0:
28; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
29; AVX512-NEXT:    retq
30  %x = fmul float %a0, %a1
31  %res = fadd float %x, %a2
32  ret float %res
33}
34
35define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
36; FMA-LABEL: test_4f32_fmadd:
37; FMA:       # %bb.0:
38; FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
39; FMA-NEXT:    retq
40;
41; FMA4-LABEL: test_4f32_fmadd:
42; FMA4:       # %bb.0:
43; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
44; FMA4-NEXT:    retq
45;
46; AVX512-LABEL: test_4f32_fmadd:
47; AVX512:       # %bb.0:
48; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
49; AVX512-NEXT:    retq
50  %x = fmul <4 x float> %a0, %a1
51  %res = fadd <4 x float> %x, %a2
52  ret <4 x float> %res
53}
54
55define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
56; FMA-LABEL: test_8f32_fmadd:
57; FMA:       # %bb.0:
58; FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
59; FMA-NEXT:    retq
60;
61; FMA4-LABEL: test_8f32_fmadd:
62; FMA4:       # %bb.0:
63; FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
64; FMA4-NEXT:    retq
65;
66; AVX512-LABEL: test_8f32_fmadd:
67; AVX512:       # %bb.0:
68; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
69; AVX512-NEXT:    retq
70  %x = fmul <8 x float> %a0, %a1
71  %res = fadd <8 x float> %x, %a2
72  ret <8 x float> %res
73}
74
75define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
76; FMA-LABEL: test_f64_fmadd:
77; FMA:       # %bb.0:
78; FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
79; FMA-NEXT:    retq
80;
81; FMA4-LABEL: test_f64_fmadd:
82; FMA4:       # %bb.0:
83; FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
84; FMA4-NEXT:    retq
85;
86; AVX512-LABEL: test_f64_fmadd:
87; AVX512:       # %bb.0:
88; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
89; AVX512-NEXT:    retq
90  %x = fmul double %a0, %a1
91  %res = fadd double %x, %a2
92  ret double %res
93}
94
95define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
96; FMA-LABEL: test_2f64_fmadd:
97; FMA:       # %bb.0:
98; FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
99; FMA-NEXT:    retq
100;
101; FMA4-LABEL: test_2f64_fmadd:
102; FMA4:       # %bb.0:
103; FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
104; FMA4-NEXT:    retq
105;
106; AVX512-LABEL: test_2f64_fmadd:
107; AVX512:       # %bb.0:
108; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
109; AVX512-NEXT:    retq
110  %x = fmul <2 x double> %a0, %a1
111  %res = fadd <2 x double> %x, %a2
112  ret <2 x double> %res
113}
114
115define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
116; FMA-LABEL: test_4f64_fmadd:
117; FMA:       # %bb.0:
118; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
119; FMA-NEXT:    retq
120;
121; FMA4-LABEL: test_4f64_fmadd:
122; FMA4:       # %bb.0:
123; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
124; FMA4-NEXT:    retq
125;
126; AVX512-LABEL: test_4f64_fmadd:
127; AVX512:       # %bb.0:
128; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
129; AVX512-NEXT:    retq
130  %x = fmul <4 x double> %a0, %a1
131  %res = fadd <4 x double> %x, %a2
132  ret <4 x double> %res
133}
134
135;
136; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
137;
138
139define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
140; FMA-LABEL: test_f32_fmsub:
141; FMA:       # %bb.0:
142; FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
143; FMA-NEXT:    retq
144;
145; FMA4-LABEL: test_f32_fmsub:
146; FMA4:       # %bb.0:
147; FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
148; FMA4-NEXT:    retq
149;
150; AVX512-LABEL: test_f32_fmsub:
151; AVX512:       # %bb.0:
152; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
153; AVX512-NEXT:    retq
154  %x = fmul float %a0, %a1
155  %res = fsub float %x, %a2
156  ret float %res
157}
158
159define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
160; FMA-LABEL: test_4f32_fmsub:
161; FMA:       # %bb.0:
162; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
163; FMA-NEXT:    retq
164;
165; FMA4-LABEL: test_4f32_fmsub:
166; FMA4:       # %bb.0:
167; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
168; FMA4-NEXT:    retq
169;
170; AVX512-LABEL: test_4f32_fmsub:
171; AVX512:       # %bb.0:
172; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
173; AVX512-NEXT:    retq
174  %x = fmul <4 x float> %a0, %a1
175  %res = fsub <4 x float> %x, %a2
176  ret <4 x float> %res
177}
178
179define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
180; FMA-LABEL: test_8f32_fmsub:
181; FMA:       # %bb.0:
182; FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
183; FMA-NEXT:    retq
184;
185; FMA4-LABEL: test_8f32_fmsub:
186; FMA4:       # %bb.0:
187; FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
188; FMA4-NEXT:    retq
189;
190; AVX512-LABEL: test_8f32_fmsub:
191; AVX512:       # %bb.0:
192; AVX512-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
193; AVX512-NEXT:    retq
194  %x = fmul <8 x float> %a0, %a1
195  %res = fsub <8 x float> %x, %a2
196  ret <8 x float> %res
197}
198
199define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
200; FMA-LABEL: test_f64_fmsub:
201; FMA:       # %bb.0:
202; FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
203; FMA-NEXT:    retq
204;
205; FMA4-LABEL: test_f64_fmsub:
206; FMA4:       # %bb.0:
207; FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
208; FMA4-NEXT:    retq
209;
210; AVX512-LABEL: test_f64_fmsub:
211; AVX512:       # %bb.0:
212; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
213; AVX512-NEXT:    retq
214  %x = fmul double %a0, %a1
215  %res = fsub double %x, %a2
216  ret double %res
217}
218
219define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
220; FMA-LABEL: test_2f64_fmsub:
221; FMA:       # %bb.0:
222; FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
223; FMA-NEXT:    retq
224;
225; FMA4-LABEL: test_2f64_fmsub:
226; FMA4:       # %bb.0:
227; FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
228; FMA4-NEXT:    retq
229;
230; AVX512-LABEL: test_2f64_fmsub:
231; AVX512:       # %bb.0:
232; AVX512-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
233; AVX512-NEXT:    retq
234  %x = fmul <2 x double> %a0, %a1
235  %res = fsub <2 x double> %x, %a2
236  ret <2 x double> %res
237}
238
239define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
240; FMA-LABEL: test_4f64_fmsub:
241; FMA:       # %bb.0:
242; FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
243; FMA-NEXT:    retq
244;
245; FMA4-LABEL: test_4f64_fmsub:
246; FMA4:       # %bb.0:
247; FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
248; FMA4-NEXT:    retq
249;
250; AVX512-LABEL: test_4f64_fmsub:
251; AVX512:       # %bb.0:
252; AVX512-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
253; AVX512-NEXT:    retq
254  %x = fmul <4 x double> %a0, %a1
255  %res = fsub <4 x double> %x, %a2
256  ret <4 x double> %res
257}
258
259;
260; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
261;
262
263define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
264; FMA-LABEL: test_f32_fnmadd:
265; FMA:       # %bb.0:
266; FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
267; FMA-NEXT:    retq
268;
269; FMA4-LABEL: test_f32_fnmadd:
270; FMA4:       # %bb.0:
271; FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
272; FMA4-NEXT:    retq
273;
274; AVX512-LABEL: test_f32_fnmadd:
275; AVX512:       # %bb.0:
276; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
277; AVX512-NEXT:    retq
278  %x = fmul float %a0, %a1
279  %res = fsub float %a2, %x
280  ret float %res
281}
282
283define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
284; FMA-LABEL: test_4f32_fnmadd:
285; FMA:       # %bb.0:
286; FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
287; FMA-NEXT:    retq
288;
289; FMA4-LABEL: test_4f32_fnmadd:
290; FMA4:       # %bb.0:
291; FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
292; FMA4-NEXT:    retq
293;
294; AVX512-LABEL: test_4f32_fnmadd:
295; AVX512:       # %bb.0:
296; AVX512-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
297; AVX512-NEXT:    retq
298  %x = fmul <4 x float> %a0, %a1
299  %res = fsub <4 x float> %a2, %x
300  ret <4 x float> %res
301}
302
303define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
304; FMA-LABEL: test_8f32_fnmadd:
305; FMA:       # %bb.0:
306; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
307; FMA-NEXT:    retq
308;
309; FMA4-LABEL: test_8f32_fnmadd:
310; FMA4:       # %bb.0:
311; FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
312; FMA4-NEXT:    retq
313;
314; AVX512-LABEL: test_8f32_fnmadd:
315; AVX512:       # %bb.0:
316; AVX512-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
317; AVX512-NEXT:    retq
318  %x = fmul <8 x float> %a0, %a1
319  %res = fsub <8 x float> %a2, %x
320  ret <8 x float> %res
321}
322
323define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
324; FMA-LABEL: test_f64_fnmadd:
325; FMA:       # %bb.0:
326; FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
327; FMA-NEXT:    retq
328;
329; FMA4-LABEL: test_f64_fnmadd:
330; FMA4:       # %bb.0:
331; FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
332; FMA4-NEXT:    retq
333;
334; AVX512-LABEL: test_f64_fnmadd:
335; AVX512:       # %bb.0:
336; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
337; AVX512-NEXT:    retq
338  %x = fmul double %a0, %a1
339  %res = fsub double %a2, %x
340  ret double %res
341}
342
343define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
344; FMA-LABEL: test_2f64_fnmadd:
345; FMA:       # %bb.0:
346; FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
347; FMA-NEXT:    retq
348;
349; FMA4-LABEL: test_2f64_fnmadd:
350; FMA4:       # %bb.0:
351; FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
352; FMA4-NEXT:    retq
353;
354; AVX512-LABEL: test_2f64_fnmadd:
355; AVX512:       # %bb.0:
356; AVX512-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
357; AVX512-NEXT:    retq
358  %x = fmul <2 x double> %a0, %a1
359  %res = fsub <2 x double> %a2, %x
360  ret <2 x double> %res
361}
362
363define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
364; FMA-LABEL: test_4f64_fnmadd:
365; FMA:       # %bb.0:
366; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
367; FMA-NEXT:    retq
368;
369; FMA4-LABEL: test_4f64_fnmadd:
370; FMA4:       # %bb.0:
371; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
372; FMA4-NEXT:    retq
373;
374; AVX512-LABEL: test_4f64_fnmadd:
375; AVX512:       # %bb.0:
376; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
377; AVX512-NEXT:    retq
378  %x = fmul <4 x double> %a0, %a1
379  %res = fsub <4 x double> %a2, %x
380  ret <4 x double> %res
381}
382
383;
384; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
385;
386
387define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
388; FMA-LABEL: test_f32_fnmsub:
389; FMA:       # %bb.0:
390; FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
391; FMA-NEXT:    retq
392;
393; FMA4-LABEL: test_f32_fnmsub:
394; FMA4:       # %bb.0:
395; FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
396; FMA4-NEXT:    retq
397;
398; AVX512-LABEL: test_f32_fnmsub:
399; AVX512:       # %bb.0:
400; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
401; AVX512-NEXT:    retq
402  %x = fmul float %a0, %a1
403  %y = fsub float -0.000000e+00, %x
404  %res = fsub float %y, %a2
405  ret float %res
406}
407
408define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
409; FMA-LABEL: test_4f32_fnmsub:
410; FMA:       # %bb.0:
411; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
412; FMA-NEXT:    retq
413;
414; FMA4-LABEL: test_4f32_fnmsub:
415; FMA4:       # %bb.0:
416; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
417; FMA4-NEXT:    retq
418;
419; AVX512-LABEL: test_4f32_fnmsub:
420; AVX512:       # %bb.0:
421; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
422; AVX512-NEXT:    retq
423  %x = fmul <4 x float> %a0, %a1
424  %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
425  %res = fsub <4 x float> %y, %a2
426  ret <4 x float> %res
427}
428
429define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
430; FMA-LABEL: test_8f32_fnmsub:
431; FMA:       # %bb.0:
432; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
433; FMA-NEXT:    retq
434;
435; FMA4-LABEL: test_8f32_fnmsub:
436; FMA4:       # %bb.0:
437; FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
438; FMA4-NEXT:    retq
439;
440; AVX512-LABEL: test_8f32_fnmsub:
441; AVX512:       # %bb.0:
442; AVX512-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
443; AVX512-NEXT:    retq
444  %x = fmul <8 x float> %a0, %a1
445  %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
446  %res = fsub <8 x float> %y, %a2
447  ret <8 x float> %res
448}
449
450define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
451; FMA-LABEL: test_f64_fnmsub:
452; FMA:       # %bb.0:
453; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
454; FMA-NEXT:    retq
455;
456; FMA4-LABEL: test_f64_fnmsub:
457; FMA4:       # %bb.0:
458; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
459; FMA4-NEXT:    retq
460;
461; AVX512-LABEL: test_f64_fnmsub:
462; AVX512:       # %bb.0:
463; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
464; AVX512-NEXT:    retq
465  %x = fmul double %a0, %a1
466  %y = fsub double -0.000000e+00, %x
467  %res = fsub double %y, %a2
468  ret double %res
469}
470
471define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
472; FMA-LABEL: test_2f64_fnmsub:
473; FMA:       # %bb.0:
474; FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
475; FMA-NEXT:    retq
476;
477; FMA4-LABEL: test_2f64_fnmsub:
478; FMA4:       # %bb.0:
479; FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
480; FMA4-NEXT:    retq
481;
482; AVX512-LABEL: test_2f64_fnmsub:
483; AVX512:       # %bb.0:
484; AVX512-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
485; AVX512-NEXT:    retq
486  %x = fmul <2 x double> %a0, %a1
487  %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
488  %res = fsub <2 x double> %y, %a2
489  ret <2 x double> %res
490}
491
492define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
493; FMA-LABEL: test_4f64_fnmsub:
494; FMA:       # %bb.0:
495; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
496; FMA-NEXT:    retq
497;
498; FMA4-LABEL: test_4f64_fnmsub:
499; FMA4:       # %bb.0:
500; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
501; FMA4-NEXT:    retq
502;
503; AVX512-LABEL: test_4f64_fnmsub:
504; AVX512:       # %bb.0:
505; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
506; AVX512-NEXT:    retq
507  %x = fmul <4 x double> %a0, %a1
508  %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
509  %res = fsub <4 x double> %y, %a2
510  ret <4 x double> %res
511}
512
513;
514; Load Folding Patterns
515;
516
517define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
518; FMA-LABEL: test_4f32_fmadd_load:
519; FMA:       # %bb.0:
520; FMA-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
521; FMA-NEXT:    retq
522;
523; FMA4-LABEL: test_4f32_fmadd_load:
524; FMA4:       # %bb.0:
525; FMA4-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
526; FMA4-NEXT:    retq
527;
528; AVX512-LABEL: test_4f32_fmadd_load:
529; AVX512:       # %bb.0:
530; AVX512-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
531; AVX512-NEXT:    retq
532  %x = load <4 x float>, <4 x float>* %a0
533  %y = fmul <4 x float> %x, %a1
534  %res = fadd <4 x float> %y, %a2
535  ret <4 x float> %res
536}
537
538define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
539; FMA-LABEL: test_2f64_fmsub_load:
540; FMA:       # %bb.0:
541; FMA-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
542; FMA-NEXT:    retq
543;
544; FMA4-LABEL: test_2f64_fmsub_load:
545; FMA4:       # %bb.0:
546; FMA4-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
547; FMA4-NEXT:    retq
548;
549; AVX512-LABEL: test_2f64_fmsub_load:
550; AVX512:       # %bb.0:
551; AVX512-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
552; AVX512-NEXT:    retq
553  %x = load <2 x double>, <2 x double>* %a0
554  %y = fmul <2 x double> %x, %a1
555  %res = fsub <2 x double> %y, %a2
556  ret <2 x double> %res
557}
558
559;
560; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
561;
562
563define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
564; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y:
565; FMA-INFS:       # %bb.0:
566; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
567; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
568; FMA-INFS-NEXT:    retq
569;
570; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y:
571; FMA4-INFS:       # %bb.0:
572; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
573; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
574; FMA4-INFS-NEXT:    retq
575;
576; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y:
577; AVX512-INFS:       # %bb.0:
578; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
579; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
580; AVX512-INFS-NEXT:    retq
581;
582; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
583; FMA-NOINFS:       # %bb.0:
584; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
585; FMA-NOINFS-NEXT:    retq
586;
587; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
588; FMA4-NOINFS:       # %bb.0:
589; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
590; FMA4-NOINFS-NEXT:    retq
591;
592; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
593; AVX512-NOINFS:       # %bb.0:
594; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
595; AVX512-NOINFS-NEXT:    retq
596  %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
597  %m = fmul <4 x float> %a, %y
598  ret <4 x float> %m
599}
600
601define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
602; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one:
603; FMA-INFS:       # %bb.0:
604; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
605; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
606; FMA-INFS-NEXT:    retq
607;
608; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one:
609; FMA4-INFS:       # %bb.0:
610; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
611; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
612; FMA4-INFS-NEXT:    retq
613;
614; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one:
615; AVX512-INFS:       # %bb.0:
616; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
617; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
618; AVX512-INFS-NEXT:    retq
619;
620; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
621; FMA-NOINFS:       # %bb.0:
622; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
623; FMA-NOINFS-NEXT:    retq
624;
625; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
626; FMA4-NOINFS:       # %bb.0:
627; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
628; FMA4-NOINFS-NEXT:    retq
629;
630; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
631; AVX512-NOINFS:       # %bb.0:
632; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
633; AVX512-NOINFS-NEXT:    retq
634  %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
635  %m = fmul <4 x float> %y, %a
636  ret <4 x float> %m
637}
638
639define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
640; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
641; FMA-INFS:       # %bb.0:
642; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
643; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
644; FMA-INFS-NEXT:    retq
645;
646; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
647; FMA4-INFS:       # %bb.0:
648; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
649; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
650; FMA4-INFS-NEXT:    retq
651;
652; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
653; AVX512-INFS:       # %bb.0:
654; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
655; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
656; AVX512-INFS-NEXT:    retq
657;
658; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
659; FMA-NOINFS:       # %bb.0:
660; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
661; FMA-NOINFS-NEXT:    retq
662;
663; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
664; FMA4-NOINFS:       # %bb.0:
665; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
666; FMA4-NOINFS-NEXT:    retq
667;
668; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
669; AVX512-NOINFS:       # %bb.0:
670; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
671; AVX512-NOINFS-NEXT:    retq
672  %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
673  %m = fmul <4 x float> %a, %y
674  ret <4 x float> %m
675}
676
677define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
678; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
679; FMA-INFS:       # %bb.0:
680; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
681; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
682; FMA-INFS-NEXT:    retq
683;
684; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
685; FMA4-INFS:       # %bb.0:
686; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
687; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
688; FMA4-INFS-NEXT:    retq
689;
690; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
691; AVX512-INFS:       # %bb.0:
692; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
693; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
694; AVX512-INFS-NEXT:    retq
695;
696; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
697; FMA-NOINFS:       # %bb.0:
698; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
699; FMA-NOINFS-NEXT:    retq
700;
701; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
702; FMA4-NOINFS:       # %bb.0:
703; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
704; FMA4-NOINFS-NEXT:    retq
705;
706; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
707; AVX512-NOINFS:       # %bb.0:
708; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
709; AVX512-NOINFS-NEXT:    retq
710  %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
711  %m = fmul <4 x float> %y, %a
712  ret <4 x float> %m
713}
714
715define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
716; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
717; FMA-INFS:       # %bb.0:
718; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
719; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
720; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
721; FMA-INFS-NEXT:    retq
722;
723; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
724; FMA4-INFS:       # %bb.0:
725; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
726; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
727; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
728; FMA4-INFS-NEXT:    retq
729;
730; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
731; AVX512-INFS:       # %bb.0:
732; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
733; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
734; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
735; AVX512-INFS-NEXT:    retq
736;
737; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
738; FMA-NOINFS:       # %bb.0:
739; FMA-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
740; FMA-NOINFS-NEXT:    retq
741;
742; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
743; FMA4-NOINFS:       # %bb.0:
744; FMA4-NOINFS-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
745; FMA4-NOINFS-NEXT:    retq
746;
747; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
748; AVX512-NOINFS:       # %bb.0:
749; AVX512-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
750; AVX512-NOINFS-NEXT:    retq
751  %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
752  %m = fmul <4 x float> %s, %y
753  ret <4 x float> %m
754}
755
756define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
757; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
758; FMA-INFS:       # %bb.0:
759; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
760; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
761; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
762; FMA-INFS-NEXT:    retq
763;
764; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
765; FMA4-INFS:       # %bb.0:
766; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
767; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
768; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
769; FMA4-INFS-NEXT:    retq
770;
771; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
772; AVX512-INFS:       # %bb.0:
773; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
774; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
775; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
776; AVX512-INFS-NEXT:    retq
777;
778; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
779; FMA-NOINFS:       # %bb.0:
780; FMA-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
781; FMA-NOINFS-NEXT:    retq
782;
783; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
784; FMA4-NOINFS:       # %bb.0:
785; FMA4-NOINFS-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
786; FMA4-NOINFS-NEXT:    retq
787;
788; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
789; AVX512-NOINFS:       # %bb.0:
790; AVX512-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
791; AVX512-NOINFS-NEXT:    retq
792  %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
793  %m = fmul <4 x float> %y, %s
794  ret <4 x float> %m
795}
796
797define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
798; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
799; FMA-INFS:       # %bb.0:
800; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
801; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
802; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
803; FMA-INFS-NEXT:    retq
804;
805; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
806; FMA4-INFS:       # %bb.0:
807; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
808; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
809; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
810; FMA4-INFS-NEXT:    retq
811;
812; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
813; AVX512-INFS:       # %bb.0:
814; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
815; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
816; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
817; AVX512-INFS-NEXT:    retq
818;
819; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
820; FMA-NOINFS:       # %bb.0:
821; FMA-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
822; FMA-NOINFS-NEXT:    retq
823;
824; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
825; FMA4-NOINFS:       # %bb.0:
826; FMA4-NOINFS-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
827; FMA4-NOINFS-NEXT:    retq
828;
829; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
830; AVX512-NOINFS:       # %bb.0:
831; AVX512-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
832; AVX512-NOINFS-NEXT:    retq
833  %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
834  %m = fmul <4 x float> %s, %y
835  ret <4 x float> %m
836}
837
838define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
839; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
840; FMA-INFS:       # %bb.0:
841; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
842; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
843; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
844; FMA-INFS-NEXT:    retq
845;
846; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
847; FMA4-INFS:       # %bb.0:
848; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
849; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
850; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
851; FMA4-INFS-NEXT:    retq
852;
853; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
854; AVX512-INFS:       # %bb.0:
855; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
856; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
857; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
858; AVX512-INFS-NEXT:    retq
859;
860; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
861; FMA-NOINFS:       # %bb.0:
862; FMA-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
863; FMA-NOINFS-NEXT:    retq
864;
865; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
866; FMA4-NOINFS:       # %bb.0:
867; FMA4-NOINFS-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
868; FMA4-NOINFS-NEXT:    retq
869;
870; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
871; AVX512-NOINFS:       # %bb.0:
872; AVX512-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
873; AVX512-NOINFS-NEXT:    retq
874  %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
875  %m = fmul <4 x float> %y, %s
876  ret <4 x float> %m
877}
878
879define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
880; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
881; FMA-INFS:       # %bb.0:
882; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
883; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
884; FMA-INFS-NEXT:    retq
885;
886; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
887; FMA4-INFS:       # %bb.0:
888; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
889; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
890; FMA4-INFS-NEXT:    retq
891;
892; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
893; AVX512-INFS:       # %bb.0:
894; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
895; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
896; AVX512-INFS-NEXT:    retq
897;
898; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
899; FMA-NOINFS:       # %bb.0:
900; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
901; FMA-NOINFS-NEXT:    retq
902;
903; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
904; FMA4-NOINFS:       # %bb.0:
905; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
906; FMA4-NOINFS-NEXT:    retq
907;
908; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
909; AVX512-NOINFS:       # %bb.0:
910; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
911; AVX512-NOINFS-NEXT:    retq
912  %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
913  %m = fmul <4 x float> %s, %y
914  ret <4 x float> %m
915}
916
917define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
918; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
919; FMA-INFS:       # %bb.0:
920; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
921; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
922; FMA-INFS-NEXT:    retq
923;
924; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
925; FMA4-INFS:       # %bb.0:
926; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
927; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
928; FMA4-INFS-NEXT:    retq
929;
930; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
931; AVX512-INFS:       # %bb.0:
932; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
933; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
934; AVX512-INFS-NEXT:    retq
935;
936; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
937; FMA-NOINFS:       # %bb.0:
938; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
939; FMA-NOINFS-NEXT:    retq
940;
941; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
942; FMA4-NOINFS:       # %bb.0:
943; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
944; FMA4-NOINFS-NEXT:    retq
945;
946; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
947; AVX512-NOINFS:       # %bb.0:
948; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
949; AVX512-NOINFS-NEXT:    retq
950  %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
951  %m = fmul <4 x float> %y, %s
952  ret <4 x float> %m
953}
954
955define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
956; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
957; FMA-INFS:       # %bb.0:
958; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
959; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
960; FMA-INFS-NEXT:    retq
961;
962; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
963; FMA4-INFS:       # %bb.0:
964; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
965; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
966; FMA4-INFS-NEXT:    retq
967;
968; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
969; AVX512-INFS:       # %bb.0:
970; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
971; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
972; AVX512-INFS-NEXT:    retq
973;
974; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
975; FMA-NOINFS:       # %bb.0:
976; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
977; FMA-NOINFS-NEXT:    retq
978;
979; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
980; FMA4-NOINFS:       # %bb.0:
981; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
982; FMA4-NOINFS-NEXT:    retq
983;
984; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
985; AVX512-NOINFS:       # %bb.0:
986; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
987; AVX512-NOINFS-NEXT:    retq
988  %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
989  %m = fmul <4 x float> %s, %y
990  ret <4 x float> %m
991}
992
993define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
994; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
995; FMA-INFS:       # %bb.0:
996; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
997; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
998; FMA-INFS-NEXT:    retq
999;
1000; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1001; FMA4-INFS:       # %bb.0:
1002; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
1003; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
1004; FMA4-INFS-NEXT:    retq
1005;
1006; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1007; AVX512-INFS:       # %bb.0:
1008; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1009; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
1010; AVX512-INFS-NEXT:    retq
1011;
1012; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1013; FMA-NOINFS:       # %bb.0:
1014; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
1015; FMA-NOINFS-NEXT:    retq
1016;
1017; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1018; FMA4-NOINFS:       # %bb.0:
1019; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
1020; FMA4-NOINFS-NEXT:    retq
1021;
1022; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1023; AVX512-NOINFS:       # %bb.0:
1024; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
1025; AVX512-NOINFS-NEXT:    retq
1026  %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
1027  %m = fmul <4 x float> %y, %s
1028  ret <4 x float> %m
1029}
1030
1031;
1032; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
1033;
1034
1035define float @test_f32_interp(float %x, float %y, float %t) {
1036; FMA-INFS-LABEL: test_f32_interp:
1037; FMA-INFS:       # %bb.0:
1038; FMA-INFS-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1039; FMA-INFS-NEXT:    vsubss %xmm2, %xmm3, %xmm3
1040; FMA-INFS-NEXT:    vmulss %xmm3, %xmm1, %xmm1
1041; FMA-INFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
1042; FMA-INFS-NEXT:    retq
1043;
1044; FMA4-INFS-LABEL: test_f32_interp:
1045; FMA4-INFS:       # %bb.0:
1046; FMA4-INFS-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1047; FMA4-INFS-NEXT:    vsubss %xmm2, %xmm3, %xmm3
1048; FMA4-INFS-NEXT:    vmulss %xmm3, %xmm1, %xmm1
1049; FMA4-INFS-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
1050; FMA4-INFS-NEXT:    retq
1051;
1052; AVX512-INFS-LABEL: test_f32_interp:
1053; AVX512-INFS:       # %bb.0:
1054; AVX512-INFS-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1055; AVX512-INFS-NEXT:    vsubss %xmm2, %xmm3, %xmm3
1056; AVX512-INFS-NEXT:    vmulss %xmm3, %xmm1, %xmm1
1057; AVX512-INFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
1058; AVX512-INFS-NEXT:    retq
1059;
1060; FMA-NOINFS-LABEL: test_f32_interp:
1061; FMA-NOINFS:       # %bb.0:
1062; FMA-NOINFS-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
1063; FMA-NOINFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
1064; FMA-NOINFS-NEXT:    retq
1065;
1066; FMA4-NOINFS-LABEL: test_f32_interp:
1067; FMA4-NOINFS:       # %bb.0:
1068; FMA4-NOINFS-NEXT:    vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
1069; FMA4-NOINFS-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
1070; FMA4-NOINFS-NEXT:    retq
1071;
1072; AVX512-NOINFS-LABEL: test_f32_interp:
1073; AVX512-NOINFS:       # %bb.0:
1074; AVX512-NOINFS-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
1075; AVX512-NOINFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
1076; AVX512-NOINFS-NEXT:    retq
1077  %t1 = fsub float 1.0, %t
1078  %tx = fmul float %x, %t
1079  %ty = fmul float %y, %t1
1080  %r = fadd float %tx, %ty
1081  ret float %r
1082}
1083
1084define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
1085; FMA-INFS-LABEL: test_v4f32_interp:
1086; FMA-INFS:       # %bb.0:
1087; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
1088; FMA-INFS-NEXT:    vsubps %xmm2, %xmm3, %xmm3
1089; FMA-INFS-NEXT:    vmulps %xmm3, %xmm1, %xmm1
1090; FMA-INFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
1091; FMA-INFS-NEXT:    retq
1092;
1093; FMA4-INFS-LABEL: test_v4f32_interp:
1094; FMA4-INFS:       # %bb.0:
1095; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
1096; FMA4-INFS-NEXT:    vsubps %xmm2, %xmm3, %xmm3
1097; FMA4-INFS-NEXT:    vmulps %xmm3, %xmm1, %xmm1
1098; FMA4-INFS-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
1099; FMA4-INFS-NEXT:    retq
1100;
1101; AVX512-INFS-LABEL: test_v4f32_interp:
1102; AVX512-INFS:       # %bb.0:
1103; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
1104; AVX512-INFS-NEXT:    vsubps %xmm2, %xmm3, %xmm3
1105; AVX512-INFS-NEXT:    vmulps %xmm3, %xmm1, %xmm1
1106; AVX512-INFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
1107; AVX512-INFS-NEXT:    retq
1108;
1109; FMA-NOINFS-LABEL: test_v4f32_interp:
1110; FMA-NOINFS:       # %bb.0:
1111; FMA-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
1112; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
1113; FMA-NOINFS-NEXT:    retq
1114;
1115; FMA4-NOINFS-LABEL: test_v4f32_interp:
1116; FMA4-NOINFS:       # %bb.0:
1117; FMA4-NOINFS-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
1118; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
1119; FMA4-NOINFS-NEXT:    retq
1120;
1121; AVX512-NOINFS-LABEL: test_v4f32_interp:
1122; AVX512-NOINFS:       # %bb.0:
1123; AVX512-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
1124; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
1125; AVX512-NOINFS-NEXT:    retq
1126  %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
1127  %tx = fmul <4 x float> %x, %t
1128  %ty = fmul <4 x float> %y, %t1
1129  %r = fadd <4 x float> %tx, %ty
1130  ret <4 x float> %r
1131}
1132
1133define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
1134; FMA-INFS-LABEL: test_v8f32_interp:
1135; FMA-INFS:       # %bb.0:
1136; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
1137; FMA-INFS-NEXT:    vsubps %ymm2, %ymm3, %ymm3
1138; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
1139; FMA-INFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
1140; FMA-INFS-NEXT:    retq
1141;
1142; FMA4-INFS-LABEL: test_v8f32_interp:
1143; FMA4-INFS:       # %bb.0:
1144; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
1145; FMA4-INFS-NEXT:    vsubps %ymm2, %ymm3, %ymm3
1146; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
1147; FMA4-INFS-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
1148; FMA4-INFS-NEXT:    retq
1149;
1150; AVX512-INFS-LABEL: test_v8f32_interp:
1151; AVX512-INFS:       # %bb.0:
1152; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
1153; AVX512-INFS-NEXT:    vsubps %ymm2, %ymm3, %ymm3
1154; AVX512-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
1155; AVX512-INFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
1156; AVX512-INFS-NEXT:    retq
1157;
1158; FMA-NOINFS-LABEL: test_v8f32_interp:
1159; FMA-NOINFS:       # %bb.0:
1160; FMA-NOINFS-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
1161; FMA-NOINFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
1162; FMA-NOINFS-NEXT:    retq
1163;
1164; FMA4-NOINFS-LABEL: test_v8f32_interp:
1165; FMA4-NOINFS:       # %bb.0:
1166; FMA4-NOINFS-NEXT:    vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
1167; FMA4-NOINFS-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
1168; FMA4-NOINFS-NEXT:    retq
1169;
1170; AVX512-NOINFS-LABEL: test_v8f32_interp:
1171; AVX512-NOINFS:       # %bb.0:
1172; AVX512-NOINFS-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
1173; AVX512-NOINFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
1174; AVX512-NOINFS-NEXT:    retq
1175  %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
1176  %tx = fmul <8 x float> %x, %t
1177  %ty = fmul <8 x float> %y, %t1
1178  %r = fadd <8 x float> %tx, %ty
1179  ret <8 x float> %r
1180}
1181
1182define double @test_f64_interp(double %x, double %y, double %t) {
1183; FMA-INFS-LABEL: test_f64_interp:
1184; FMA-INFS:       # %bb.0:
1185; FMA-INFS-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
1186; FMA-INFS-NEXT:    vsubsd %xmm2, %xmm3, %xmm3
1187; FMA-INFS-NEXT:    vmulsd %xmm3, %xmm1, %xmm1
1188; FMA-INFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
1189; FMA-INFS-NEXT:    retq
1190;
1191; FMA4-INFS-LABEL: test_f64_interp:
1192; FMA4-INFS:       # %bb.0:
1193; FMA4-INFS-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
1194; FMA4-INFS-NEXT:    vsubsd %xmm2, %xmm3, %xmm3
1195; FMA4-INFS-NEXT:    vmulsd %xmm3, %xmm1, %xmm1
1196; FMA4-INFS-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
1197; FMA4-INFS-NEXT:    retq
1198;
1199; AVX512-INFS-LABEL: test_f64_interp:
1200; AVX512-INFS:       # %bb.0:
1201; AVX512-INFS-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
1202; AVX512-INFS-NEXT:    vsubsd %xmm2, %xmm3, %xmm3
1203; AVX512-INFS-NEXT:    vmulsd %xmm3, %xmm1, %xmm1
1204; AVX512-INFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
1205; AVX512-INFS-NEXT:    retq
1206;
1207; FMA-NOINFS-LABEL: test_f64_interp:
1208; FMA-NOINFS:       # %bb.0:
1209; FMA-NOINFS-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
1210; FMA-NOINFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
1211; FMA-NOINFS-NEXT:    retq
1212;
1213; FMA4-NOINFS-LABEL: test_f64_interp:
1214; FMA4-NOINFS:       # %bb.0:
1215; FMA4-NOINFS-NEXT:    vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
1216; FMA4-NOINFS-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
1217; FMA4-NOINFS-NEXT:    retq
1218;
1219; AVX512-NOINFS-LABEL: test_f64_interp:
1220; AVX512-NOINFS:       # %bb.0:
1221; AVX512-NOINFS-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
1222; AVX512-NOINFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
1223; AVX512-NOINFS-NEXT:    retq
1224  %t1 = fsub double 1.0, %t
1225  %tx = fmul double %x, %t
1226  %ty = fmul double %y, %t1
1227  %r = fadd double %tx, %ty
1228  ret double %r
1229}
1230
1231define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
1232; FMA-INFS-LABEL: test_v2f64_interp:
1233; FMA-INFS:       # %bb.0:
1234; FMA-INFS-NEXT:    vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
1235; FMA-INFS-NEXT:    vsubpd %xmm2, %xmm3, %xmm3
1236; FMA-INFS-NEXT:    vmulpd %xmm3, %xmm1, %xmm1
1237; FMA-INFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
1238; FMA-INFS-NEXT:    retq
1239;
1240; FMA4-INFS-LABEL: test_v2f64_interp:
1241; FMA4-INFS:       # %bb.0:
1242; FMA4-INFS-NEXT:    vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
1243; FMA4-INFS-NEXT:    vsubpd %xmm2, %xmm3, %xmm3
1244; FMA4-INFS-NEXT:    vmulpd %xmm3, %xmm1, %xmm1
1245; FMA4-INFS-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
1246; FMA4-INFS-NEXT:    retq
1247;
1248; AVX512-INFS-LABEL: test_v2f64_interp:
1249; AVX512-INFS:       # %bb.0:
1250; AVX512-INFS-NEXT:    vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
1251; AVX512-INFS-NEXT:    vsubpd %xmm2, %xmm3, %xmm3
1252; AVX512-INFS-NEXT:    vmulpd %xmm3, %xmm1, %xmm1
1253; AVX512-INFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
1254; AVX512-INFS-NEXT:    retq
1255;
1256; FMA-NOINFS-LABEL: test_v2f64_interp:
1257; FMA-NOINFS:       # %bb.0:
1258; FMA-NOINFS-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
1259; FMA-NOINFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
1260; FMA-NOINFS-NEXT:    retq
1261;
1262; FMA4-NOINFS-LABEL: test_v2f64_interp:
1263; FMA4-NOINFS:       # %bb.0:
1264; FMA4-NOINFS-NEXT:    vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
1265; FMA4-NOINFS-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
1266; FMA4-NOINFS-NEXT:    retq
1267;
1268; AVX512-NOINFS-LABEL: test_v2f64_interp:
1269; AVX512-NOINFS:       # %bb.0:
1270; AVX512-NOINFS-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
1271; AVX512-NOINFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
1272; AVX512-NOINFS-NEXT:    retq
1273  %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
1274  %tx = fmul <2 x double> %x, %t
1275  %ty = fmul <2 x double> %y, %t1
1276  %r = fadd <2 x double> %tx, %ty
1277  ret <2 x double> %r
1278}
1279
1280define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
1281; FMA-INFS-LABEL: test_v4f64_interp:
1282; FMA-INFS:       # %bb.0:
1283; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
1284; FMA-INFS-NEXT:    vsubpd %ymm2, %ymm3, %ymm3
1285; FMA-INFS-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
1286; FMA-INFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
1287; FMA-INFS-NEXT:    retq
1288;
1289; FMA4-INFS-LABEL: test_v4f64_interp:
1290; FMA4-INFS:       # %bb.0:
1291; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
1292; FMA4-INFS-NEXT:    vsubpd %ymm2, %ymm3, %ymm3
1293; FMA4-INFS-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
1294; FMA4-INFS-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
1295; FMA4-INFS-NEXT:    retq
1296;
1297; AVX512-INFS-LABEL: test_v4f64_interp:
1298; AVX512-INFS:       # %bb.0:
1299; AVX512-INFS-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [1,1,1,1]
1300; AVX512-INFS-NEXT:    vsubpd %ymm2, %ymm3, %ymm3
1301; AVX512-INFS-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
1302; AVX512-INFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
1303; AVX512-INFS-NEXT:    retq
1304;
1305; FMA-NOINFS-LABEL: test_v4f64_interp:
1306; FMA-NOINFS:       # %bb.0:
1307; FMA-NOINFS-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
1308; FMA-NOINFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
1309; FMA-NOINFS-NEXT:    retq
1310;
1311; FMA4-NOINFS-LABEL: test_v4f64_interp:
1312; FMA4-NOINFS:       # %bb.0:
1313; FMA4-NOINFS-NEXT:    vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
1314; FMA4-NOINFS-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
1315; FMA4-NOINFS-NEXT:    retq
1316;
1317; AVX512-NOINFS-LABEL: test_v4f64_interp:
1318; AVX512-NOINFS:       # %bb.0:
1319; AVX512-NOINFS-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
1320; AVX512-NOINFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
1321; AVX512-NOINFS-NEXT:    retq
1322  %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
1323  %tx = fmul <4 x double> %x, %t
1324  %ty = fmul <4 x double> %y, %t1
1325  %r = fadd <4 x double> %tx, %ty
1326  ret <4 x double> %r
1327}
1328
1329;
1330; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
1331;
1332
1333define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1334; FMA-LABEL: test_v4f32_fneg_fmadd:
1335; FMA:       # %bb.0:
1336; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
1337; FMA-NEXT:    retq
1338;
1339; FMA4-LABEL: test_v4f32_fneg_fmadd:
1340; FMA4:       # %bb.0:
1341; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
1342; FMA4-NEXT:    retq
1343;
1344; AVX512-LABEL: test_v4f32_fneg_fmadd:
1345; AVX512:       # %bb.0:
1346; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
1347; AVX512-NEXT:    retq
1348  %mul = fmul <4 x float> %a0, %a1
1349  %add = fadd <4 x float> %mul, %a2
1350  %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1351  ret <4 x float> %neg
1352}
1353
1354define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1355; FMA-LABEL: test_v4f64_fneg_fmsub:
1356; FMA:       # %bb.0:
1357; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
1358; FMA-NEXT:    retq
1359;
1360; FMA4-LABEL: test_v4f64_fneg_fmsub:
1361; FMA4:       # %bb.0:
1362; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
1363; FMA4-NEXT:    retq
1364;
1365; AVX512-LABEL: test_v4f64_fneg_fmsub:
1366; AVX512:       # %bb.0:
1367; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
1368; AVX512-NEXT:    retq
1369  %mul = fmul <4 x double> %a0, %a1
1370  %sub = fsub <4 x double> %mul, %a2
1371  %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1372  ret <4 x double> %neg
1373}
1374
1375define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1376; FMA-LABEL: test_v4f32_fneg_fnmadd:
1377; FMA:       # %bb.0:
1378; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
1379; FMA-NEXT:    retq
1380;
1381; FMA4-LABEL: test_v4f32_fneg_fnmadd:
1382; FMA4:       # %bb.0:
1383; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
1384; FMA4-NEXT:    retq
1385;
1386; AVX512-LABEL: test_v4f32_fneg_fnmadd:
1387; AVX512:       # %bb.0:
1388; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
1389; AVX512-NEXT:    retq
1390  %mul = fmul <4 x float> %a0, %a1
1391  %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
1392  %add = fadd <4 x float> %neg0, %a2
1393  %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1394  ret <4 x float> %neg1
1395}
1396
1397define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1398; FMA-LABEL: test_v4f64_fneg_fnmsub:
1399; FMA:       # %bb.0:
1400; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
1401; FMA-NEXT:    retq
1402;
1403; FMA4-LABEL: test_v4f64_fneg_fnmsub:
1404; FMA4:       # %bb.0:
1405; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
1406; FMA4-NEXT:    retq
1407;
1408; AVX512-LABEL: test_v4f64_fneg_fnmsub:
1409; AVX512:       # %bb.0:
1410; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
1411; AVX512-NEXT:    retq
1412  %mul = fmul <4 x double> %a0, %a1
1413  %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
1414  %sub = fsub <4 x double> %neg0, %a2
1415  %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1416  ret <4 x double> %neg1
1417}
1418
1419;
1420; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
1421;
1422
1423define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
1424; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1425; FMA:       # %bb.0:
1426; FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
1427; FMA-NEXT:    retq
1428;
1429; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1430; FMA4:       # %bb.0:
1431; FMA4-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
1432; FMA4-NEXT:    retq
1433;
1434; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1435; AVX512:       # %bb.0:
1436; AVX512-NEXT:    vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1437; AVX512-NEXT:    retq
1438  %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
1439  %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
1440  %a  = fadd <4 x float> %m0, %m1
1441  ret <4 x float> %a
1442}
1443
1444;
1445; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
1446;
1447
1448define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
1449; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1450; FMA:       # %bb.0:
1451; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
1452; FMA-NEXT:    retq
1453;
1454; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1455; FMA4:       # %bb.0:
1456; FMA4-NEXT:    vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
1457; FMA4-NEXT:    retq
1458;
1459; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1460; AVX512:       # %bb.0:
1461; AVX512-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
1462; AVX512-NEXT:    retq
1463  %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
1464  %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
1465  %a  = fadd <4 x float> %m1, %y
1466  ret <4 x float> %a
1467}
1468
1469; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
1470
1471define double @test_f64_fneg_fmul(double %x, double %y) #0 {
1472; FMA-LABEL: test_f64_fneg_fmul:
1473; FMA:       # %bb.0:
1474; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1475; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
1476; FMA-NEXT:    retq
1477;
1478; FMA4-LABEL: test_f64_fneg_fmul:
1479; FMA4:       # %bb.0:
1480; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1481; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
1482; FMA4-NEXT:    retq
1483;
1484; AVX512-LABEL: test_f64_fneg_fmul:
1485; AVX512:       # %bb.0:
1486; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1487; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
1488; AVX512-NEXT:    retq
1489  %m = fmul nsz double %x, %y
1490  %n = fsub double -0.0, %m
1491  ret double %n
1492}
1493
1494define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
1495; FMA-LABEL: test_v4f32_fneg_fmul:
1496; FMA:       # %bb.0:
1497; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1498; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
1499; FMA-NEXT:    retq
1500;
1501; FMA4-LABEL: test_v4f32_fneg_fmul:
1502; FMA4:       # %bb.0:
1503; FMA4-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1504; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
1505; FMA4-NEXT:    retq
1506;
1507; AVX512-LABEL: test_v4f32_fneg_fmul:
1508; AVX512:       # %bb.0:
1509; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1510; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
1511; AVX512-NEXT:    retq
1512  %m = fmul nsz <4 x float> %x, %y
1513  %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
1514  ret <4 x float> %n
1515}
1516
1517define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
1518; FMA-LABEL: test_v4f64_fneg_fmul:
1519; FMA:       # %bb.0:
1520; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1521; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
1522; FMA-NEXT:    retq
1523;
1524; FMA4-LABEL: test_v4f64_fneg_fmul:
1525; FMA4:       # %bb.0:
1526; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1527; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
1528; FMA4-NEXT:    retq
1529;
1530; AVX512-LABEL: test_v4f64_fneg_fmul:
1531; AVX512:       # %bb.0:
1532; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1533; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
1534; AVX512-NEXT:    retq
1535  %m = fmul nsz <4 x double> %x, %y
1536  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1537  ret <4 x double> %n
1538}
1539
1540define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
1541; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
1542; FMA:       # %bb.0:
1543; FMA-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
1544; FMA-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
1545; FMA-NEXT:    retq
1546;
1547; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz:
1548; FMA4:       # %bb.0:
1549; FMA4-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
1550; FMA4-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
1551; FMA4-NEXT:    retq
1552;
1553; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz:
1554; AVX512:       # %bb.0:
1555; AVX512-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
1556; AVX512-NEXT:    vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
1557; AVX512-NEXT:    retq
1558  %m = fmul <4 x double> %x, %y
1559  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1560  ret <4 x double> %n
1561}
1562
1563attributes #0 = { "unsafe-fp-math"="true" }
1564