• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512
6
7;
8; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
9;
10
11define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
12; FMA-LABEL: test_16f32_fmadd:
13; FMA:       # BB#0:
14; FMA-NEXT:    vfmadd213ps %ymm4, %ymm2, %ymm0
15; FMA-NEXT:    vfmadd213ps %ymm5, %ymm3, %ymm1
16; FMA-NEXT:    retq
17;
18; FMA4-LABEL: test_16f32_fmadd:
19; FMA4:       # BB#0:
20; FMA4-NEXT:    vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
21; FMA4-NEXT:    vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
22; FMA4-NEXT:    retq
23;
24; AVX512-LABEL: test_16f32_fmadd:
25; AVX512:       # BB#0:
26; AVX512-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
27; AVX512-NEXT:    retq
28  %x = fmul <16 x float> %a0, %a1
29  %res = fadd <16 x float> %x, %a2
30  ret <16 x float> %res
31}
32
33define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
34; FMA-LABEL: test_8f64_fmadd:
35; FMA:       # BB#0:
36; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
37; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
38; FMA-NEXT:    retq
39;
40; FMA4-LABEL: test_8f64_fmadd:
41; FMA4:       # BB#0:
42; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
43; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
44; FMA4-NEXT:    retq
45;
46; AVX512-LABEL: test_8f64_fmadd:
47; AVX512:       # BB#0:
48; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
49; AVX512-NEXT:    retq
50  %x = fmul <8 x double> %a0, %a1
51  %res = fadd <8 x double> %x, %a2
52  ret <8 x double> %res
53}
54
55;
56; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
57;
58
59define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
60; FMA-LABEL: test_16f32_fmsub:
61; FMA:       # BB#0:
62; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
63; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
64; FMA-NEXT:    retq
65;
66; FMA4-LABEL: test_16f32_fmsub:
67; FMA4:       # BB#0:
68; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
69; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
70; FMA4-NEXT:    retq
71;
72; AVX512-LABEL: test_16f32_fmsub:
73; AVX512:       # BB#0:
74; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
75; AVX512-NEXT:    retq
76  %x = fmul <16 x float> %a0, %a1
77  %res = fsub <16 x float> %x, %a2
78  ret <16 x float> %res
79}
80
81define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
82; FMA-LABEL: test_8f64_fmsub:
83; FMA:       # BB#0:
84; FMA-NEXT:    vfmsub213pd %ymm4, %ymm2, %ymm0
85; FMA-NEXT:    vfmsub213pd %ymm5, %ymm3, %ymm1
86; FMA-NEXT:    retq
87;
88; FMA4-LABEL: test_8f64_fmsub:
89; FMA4:       # BB#0:
90; FMA4-NEXT:    vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
91; FMA4-NEXT:    vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
92; FMA4-NEXT:    retq
93;
94; AVX512-LABEL: test_8f64_fmsub:
95; AVX512:       # BB#0:
96; AVX512-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
97; AVX512-NEXT:    retq
98  %x = fmul <8 x double> %a0, %a1
99  %res = fsub <8 x double> %x, %a2
100  ret <8 x double> %res
101}
102
103;
104; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
105;
106
107define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
108; FMA-LABEL: test_16f32_fnmadd:
109; FMA:       # BB#0:
110; FMA-NEXT:    vfnmadd213ps %ymm4, %ymm2, %ymm0
111; FMA-NEXT:    vfnmadd213ps %ymm5, %ymm3, %ymm1
112; FMA-NEXT:    retq
113;
114; FMA4-LABEL: test_16f32_fnmadd:
115; FMA4:       # BB#0:
116; FMA4-NEXT:    vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
117; FMA4-NEXT:    vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
118; FMA4-NEXT:    retq
119;
120; AVX512-LABEL: test_16f32_fnmadd:
121; AVX512:       # BB#0:
122; AVX512-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
123; AVX512-NEXT:    retq
124  %x = fmul <16 x float> %a0, %a1
125  %res = fsub <16 x float> %a2, %x
126  ret <16 x float> %res
127}
128
129define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
130; FMA-LABEL: test_8f64_fnmadd:
131; FMA:       # BB#0:
132; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
133; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
134; FMA-NEXT:    retq
135;
136; FMA4-LABEL: test_8f64_fnmadd:
137; FMA4:       # BB#0:
138; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
139; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
140; FMA4-NEXT:    retq
141;
142; AVX512-LABEL: test_8f64_fnmadd:
143; AVX512:       # BB#0:
144; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
145; AVX512-NEXT:    retq
146  %x = fmul <8 x double> %a0, %a1
147  %res = fsub <8 x double> %a2, %x
148  ret <8 x double> %res
149}
150
151;
152; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
153;
154
155define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
156; FMA-LABEL: test_16f32_fnmsub:
157; FMA:       # BB#0:
158; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
159; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
160; FMA-NEXT:    retq
161;
162; FMA4-LABEL: test_16f32_fnmsub:
163; FMA4:       # BB#0:
164; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
165; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
166; FMA4-NEXT:    retq
167;
168; AVX512-LABEL: test_16f32_fnmsub:
169; AVX512:       # BB#0:
170; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
171; AVX512-NEXT:    retq
172  %x = fmul <16 x float> %a0, %a1
173  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
174  %res = fsub <16 x float> %y, %a2
175  ret <16 x float> %res
176}
177
178define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
179; FMA-LABEL: test_8f64_fnmsub:
180; FMA:       # BB#0:
181; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
182; FMA-NEXT:    vfnmsub213pd %ymm5, %ymm3, %ymm1
183; FMA-NEXT:    retq
184;
185; FMA4-LABEL: test_8f64_fnmsub:
186; FMA4:       # BB#0:
187; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
188; FMA4-NEXT:    vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
189; FMA4-NEXT:    retq
190;
191; AVX512-LABEL: test_8f64_fnmsub:
192; AVX512:       # BB#0:
193; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
194; AVX512-NEXT:    retq
195  %x = fmul <8 x double> %a0, %a1
196  %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
197  %res = fsub <8 x double> %y, %a2
198  ret <8 x double> %res
199}
200
201;
202; Load Folding Patterns
203;
204
205define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) {
206; FMA-LABEL: test_16f32_fmadd_load:
207; FMA:       # BB#0:
208; FMA-NEXT:    vfmadd132ps (%rdi), %ymm2, %ymm0
209; FMA-NEXT:    vfmadd132ps 32(%rdi), %ymm3, %ymm1
210; FMA-NEXT:    retq
211;
212; FMA4-LABEL: test_16f32_fmadd_load:
213; FMA4:       # BB#0:
214; FMA4-NEXT:    vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
215; FMA4-NEXT:    vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
216; FMA4-NEXT:    retq
217;
218; AVX512-LABEL: test_16f32_fmadd_load:
219; AVX512:       # BB#0:
220; AVX512-NEXT:    vmovaps (%rdi), %zmm2
221; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm0, %zmm2
222; AVX512-NEXT:    vmovaps %zmm2, %zmm0
223; AVX512-NEXT:    retq
224  %x = load <16 x float>, <16 x float>* %a0
225  %y = fmul <16 x float> %x, %a1
226  %res = fadd <16 x float> %y, %a2
227  ret <16 x float> %res
228}
229
230define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) {
231; FMA-LABEL: test_8f64_fmsub_load:
232; FMA:       # BB#0:
233; FMA-NEXT:    vfmsub132pd (%rdi), %ymm2, %ymm0
234; FMA-NEXT:    vfmsub132pd 32(%rdi), %ymm3, %ymm1
235; FMA-NEXT:    retq
236;
237; FMA4-LABEL: test_8f64_fmsub_load:
238; FMA4:       # BB#0:
239; FMA4-NEXT:    vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
240; FMA4-NEXT:    vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
241; FMA4-NEXT:    retq
242;
243; AVX512-LABEL: test_8f64_fmsub_load:
244; AVX512:       # BB#0:
245; AVX512-NEXT:    vmovapd (%rdi), %zmm2
246; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm0, %zmm2
247; AVX512-NEXT:    vmovaps %zmm2, %zmm0
248; AVX512-NEXT:    retq
249  %x = load <8 x double>, <8 x double>* %a0
250  %y = fmul <8 x double> %x, %a1
251  %res = fsub <8 x double> %y, %a2
252  ret <8 x double> %res
253}
254
255;
256; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
257;
258
259define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
260; FMA-LABEL: test_v16f32_mul_add_x_one_y:
261; FMA:       # BB#0:
262; FMA-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
263; FMA-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
264; FMA-NEXT:    retq
265;
266; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
267; FMA4:       # BB#0:
268; FMA4-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
269; FMA4-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
270; FMA4-NEXT:    retq
271;
272; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
273; AVX512:       # BB#0:
274; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
275; AVX512-NEXT:    retq
276  %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
277  %m = fmul <16 x float> %a, %y
278  ret <16 x float> %m
279}
280
281define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
282; FMA-LABEL: test_v8f64_mul_y_add_x_one:
283; FMA:       # BB#0:
284; FMA-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
285; FMA-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
286; FMA-NEXT:    retq
287;
288; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
289; FMA4:       # BB#0:
290; FMA4-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
291; FMA4-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
292; FMA4-NEXT:    retq
293;
294; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
295; AVX512:       # BB#0:
296; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
297; AVX512-NEXT:    retq
298  %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
299  %m = fmul <8 x double> %y, %a
300  ret <8 x double> %m
301}
302
303define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
304; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
305; FMA:       # BB#0:
306; FMA-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
307; FMA-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
308; FMA-NEXT:    retq
309;
310; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
311; FMA4:       # BB#0:
312; FMA4-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
313; FMA4-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
314; FMA4-NEXT:    retq
315;
316; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
317; AVX512:       # BB#0:
318; AVX512-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
319; AVX512-NEXT:    retq
320  %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
321  %m = fmul <16 x float> %a, %y
322  ret <16 x float> %m
323}
324
325define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
326; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
327; FMA:       # BB#0:
328; FMA-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
329; FMA-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
330; FMA-NEXT:    retq
331;
332; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
333; FMA4:       # BB#0:
334; FMA4-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
335; FMA4-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
336; FMA4-NEXT:    retq
337;
338; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
339; AVX512:       # BB#0:
340; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
341; AVX512-NEXT:    retq
342  %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
343  %m = fmul <8 x double> %y, %a
344  ret <8 x double> %m
345}
346
347define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
348; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
349; FMA:       # BB#0:
350; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm2, %ymm0
351; FMA-NEXT:    vfnmadd213ps %ymm3, %ymm3, %ymm1
352; FMA-NEXT:    retq
353;
354; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
355; FMA4:       # BB#0:
356; FMA4-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
357; FMA4-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
358; FMA4-NEXT:    retq
359;
360; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
361; AVX512:       # BB#0:
362; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm0
363; AVX512-NEXT:    retq
364  %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
365  %m = fmul <16 x float> %s, %y
366  ret <16 x float> %m
367}
368
369define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
370; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
371; FMA:       # BB#0:
372; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm2, %ymm0
373; FMA-NEXT:    vfnmadd213pd %ymm3, %ymm3, %ymm1
374; FMA-NEXT:    retq
375;
376; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
377; FMA4:       # BB#0:
378; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
379; FMA4-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
380; FMA4-NEXT:    retq
381;
382; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
383; AVX512:       # BB#0:
384; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm0
385; AVX512-NEXT:    retq
386  %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
387  %m = fmul <8 x double> %y, %s
388  ret <8 x double> %m
389}
390
391define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
392; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
393; FMA:       # BB#0:
394; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm2, %ymm0
395; FMA-NEXT:    vfnmsub213ps %ymm3, %ymm3, %ymm1
396; FMA-NEXT:    retq
397;
398; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
399; FMA4:       # BB#0:
400; FMA4-NEXT:    vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
401; FMA4-NEXT:    vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
402; FMA4-NEXT:    retq
403;
404; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
405; AVX512:       # BB#0:
406; AVX512-NEXT:    vfnmsub213ps %zmm1, %zmm1, %zmm0
407; AVX512-NEXT:    retq
408  %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
409  %m = fmul <16 x float> %s, %y
410  ret <16 x float> %m
411}
412
413define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
414; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
415; FMA:       # BB#0:
416; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm2, %ymm0
417; FMA-NEXT:    vfnmsub213pd %ymm3, %ymm3, %ymm1
418; FMA-NEXT:    retq
419;
420; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
421; FMA4:       # BB#0:
422; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
423; FMA4-NEXT:    vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
424; FMA4-NEXT:    retq
425;
426; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
427; AVX512:       # BB#0:
428; AVX512-NEXT:    vfnmsub213pd %zmm1, %zmm1, %zmm0
429; AVX512-NEXT:    retq
430  %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
431  %m = fmul <8 x double> %y, %s
432  ret <8 x double> %m
433}
434
435define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
436; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
437; FMA:       # BB#0:
438; FMA-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
439; FMA-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
440; FMA-NEXT:    retq
441;
442; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
443; FMA4:       # BB#0:
444; FMA4-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
445; FMA4-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
446; FMA4-NEXT:    retq
447;
448; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
449; AVX512:       # BB#0:
450; AVX512-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
451; AVX512-NEXT:    retq
452  %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
453  %m = fmul <16 x float> %s, %y
454  ret <16 x float> %m
455}
456
457define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
458; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
459; FMA:       # BB#0:
460; FMA-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
461; FMA-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
462; FMA-NEXT:    retq
463;
464; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
465; FMA4:       # BB#0:
466; FMA4-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
467; FMA4-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
468; FMA4-NEXT:    retq
469;
470; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
471; AVX512:       # BB#0:
472; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
473; AVX512-NEXT:    retq
474  %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
475  %m = fmul <8 x double> %y, %s
476  ret <8 x double> %m
477}
478
479define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
480; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
481; FMA:       # BB#0:
482; FMA-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
483; FMA-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
484; FMA-NEXT:    retq
485;
486; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
487; FMA4:       # BB#0:
488; FMA4-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
489; FMA4-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
490; FMA4-NEXT:    retq
491;
492; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
493; AVX512:       # BB#0:
494; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
495; AVX512-NEXT:    retq
496  %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
497  %m = fmul <16 x float> %s, %y
498  ret <16 x float> %m
499}
500
501define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
502; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
503; FMA:       # BB#0:
504; FMA-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
505; FMA-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
506; FMA-NEXT:    retq
507;
508; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
509; FMA4:       # BB#0:
510; FMA4-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
511; FMA4-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
512; FMA4-NEXT:    retq
513;
514; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
515; AVX512:       # BB#0:
516; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
517; AVX512-NEXT:    retq
518  %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
519  %m = fmul <8 x double> %y, %s
520  ret <8 x double> %m
521}
522
523;
524; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
525;
526
527define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
528; FMA-LABEL: test_v16f32_interp:
529; FMA:       # BB#0:
530; FMA-NEXT:    vfnmadd213ps %ymm3, %ymm5, %ymm3
531; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm4, %ymm2
532; FMA-NEXT:    vfmadd213ps %ymm2, %ymm4, %ymm0
533; FMA-NEXT:    vfmadd213ps %ymm3, %ymm5, %ymm1
534; FMA-NEXT:    retq
535;
536; FMA4-LABEL: test_v16f32_interp:
537; FMA4:       # BB#0:
538; FMA4-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
539; FMA4-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
540; FMA4-NEXT:    vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
541; FMA4-NEXT:    vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
542; FMA4-NEXT:    retq
543;
544; AVX512-LABEL: test_v16f32_interp:
545; AVX512:       # BB#0:
546; AVX512-NEXT:    vmovaps %zmm2, %zmm3
547; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm3
548; AVX512-NEXT:    vfmadd213ps %zmm3, %zmm2, %zmm0
549; AVX512-NEXT:    retq
550  %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
551  %tx = fmul <16 x float> %x, %t
552  %ty = fmul <16 x float> %y, %t1
553  %r = fadd <16 x float> %tx, %ty
554  ret <16 x float> %r
555}
556
557define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
558; FMA-LABEL: test_v8f64_interp:
559; FMA:       # BB#0:
560; FMA-NEXT:    vfnmadd213pd %ymm3, %ymm5, %ymm3
561; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm4, %ymm2
562; FMA-NEXT:    vfmadd213pd %ymm2, %ymm4, %ymm0
563; FMA-NEXT:    vfmadd213pd %ymm3, %ymm5, %ymm1
564; FMA-NEXT:    retq
565;
566; FMA4-LABEL: test_v8f64_interp:
567; FMA4:       # BB#0:
568; FMA4-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
569; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
570; FMA4-NEXT:    vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
571; FMA4-NEXT:    vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
572; FMA4-NEXT:    retq
573;
574; AVX512-LABEL: test_v8f64_interp:
575; AVX512:       # BB#0:
576; AVX512-NEXT:    vmovaps %zmm2, %zmm3
577; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm3
578; AVX512-NEXT:    vfmadd213pd %zmm3, %zmm2, %zmm0
579; AVX512-NEXT:    retq
580  %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
581  %tx = fmul <8 x double> %x, %t
582  %ty = fmul <8 x double> %y, %t1
583  %r = fadd <8 x double> %tx, %ty
584  ret <8 x double> %r
585}
586
587;
588; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
589;
590
591define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
592; FMA-LABEL: test_v16f32_fneg_fmadd:
593; FMA:       # BB#0:
594; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
595; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
596; FMA-NEXT:    retq
597;
598; FMA4-LABEL: test_v16f32_fneg_fmadd:
599; FMA4:       # BB#0:
600; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
601; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
602; FMA4-NEXT:    retq
603;
604; AVX512-LABEL: test_v16f32_fneg_fmadd:
605; AVX512:       # BB#0:
606; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
607; AVX512-NEXT:    retq
608  %mul = fmul <16 x float> %a0, %a1
609  %add = fadd <16 x float> %mul, %a2
610  %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
611  ret <16 x float> %neg
612}
613
614define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
615; FMA-LABEL: test_v8f64_fneg_fmsub:
616; FMA:       # BB#0:
617; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
618; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
619; FMA-NEXT:    retq
620;
621; FMA4-LABEL: test_v8f64_fneg_fmsub:
622; FMA4:       # BB#0:
623; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
624; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
625; FMA4-NEXT:    retq
626;
627; AVX512-LABEL: test_v8f64_fneg_fmsub:
628; AVX512:       # BB#0:
629; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
630; AVX512-NEXT:    retq
631  %mul = fmul <8 x double> %a0, %a1
632  %sub = fsub <8 x double> %mul, %a2
633  %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
634  ret <8 x double> %neg
635}
636
637define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
638; FMA-LABEL: test_v16f32_fneg_fnmadd:
639; FMA:       # BB#0:
640; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
641; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
642; FMA-NEXT:    retq
643;
644; FMA4-LABEL: test_v16f32_fneg_fnmadd:
645; FMA4:       # BB#0:
646; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
647; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
648; FMA4-NEXT:    retq
649;
650; AVX512-LABEL: test_v16f32_fneg_fnmadd:
651; AVX512:       # BB#0:
652; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
653; AVX512-NEXT:    retq
654  %mul = fmul <16 x float> %a0, %a1
655  %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
656  %add = fadd <16 x float> %neg0, %a2
657  %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
658  ret <16 x float> %neg1
659}
660
661define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
662; FMA-LABEL: test_v8f64_fneg_fnmsub:
663; FMA:       # BB#0:
664; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
665; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
666; FMA-NEXT:    retq
667;
668; FMA4-LABEL: test_v8f64_fneg_fnmsub:
669; FMA4:       # BB#0:
670; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
671; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
672; FMA4-NEXT:    retq
673;
674; AVX512-LABEL: test_v8f64_fneg_fnmsub:
675; AVX512:       # BB#0:
676; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
677; AVX512-NEXT:    retq
678  %mul = fmul <8 x double> %a0, %a1
679  %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
680  %sub = fsub <8 x double> %neg0, %a2
681  %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
682  ret <8 x double> %neg1
683}
684
685;
686; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
687;
688
689define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
690; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
691; FMA:       # BB#0:
692; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
693; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
694; FMA-NEXT:    retq
695;
696; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
697; FMA4:       # BB#0:
698; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
699; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
700; FMA4-NEXT:    retq
701;
702; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
703; AVX512:       # BB#0:
704; AVX512-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
705; AVX512-NEXT:    retq
706  %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
707  %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
708  %a  = fadd <16 x float> %m0, %m1
709  ret <16 x float> %a
710}
711
712;
713; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
714;
715
716define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
717; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
718; FMA:       # BB#0:
719; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
720; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
721; FMA-NEXT:    retq
722;
723; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
724; FMA4:       # BB#0:
725; FMA4-NEXT:    vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
726; FMA4-NEXT:    vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
727; FMA4-NEXT:    retq
728;
729; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
730; AVX512:       # BB#0:
731; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
732; AVX512-NEXT:    vmovaps %zmm1, %zmm0
733; AVX512-NEXT:    retq
734  %m0 = fmul <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
735  %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
736  %a  = fadd <16 x float> %m1, %y
737  ret <16 x float> %a
738}
739
740; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
741
742define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
743; FMA-LABEL: test_v16f32_fneg_fmul:
744; FMA:       # BB#0:
745; FMA-NEXT:    vxorps %ymm4, %ymm4, %ymm4
746; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
747; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm3, %ymm1
748; FMA-NEXT:    retq
749;
750; FMA4-LABEL: test_v16f32_fneg_fmul:
751; FMA4:       # BB#0:
752; FMA4-NEXT:    vxorps %ymm4, %ymm4, %ymm4
753; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
754; FMA4-NEXT:    vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
755; FMA4-NEXT:    retq
756;
757; AVX512-LABEL: test_v16f32_fneg_fmul:
758; AVX512:       # BB#0:
759; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
760; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
761; AVX512-NEXT:    retq
762  %m = fmul nsz <16 x float> %x, %y
763  %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m
764  ret <16 x float> %n
765}
766
767define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
768; FMA-LABEL: test_v8f64_fneg_fmul:
769; FMA:       # BB#0:
770; FMA-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
771; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
772; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm3, %ymm1
773; FMA-NEXT:    retq
774;
775; FMA4-LABEL: test_v8f64_fneg_fmul:
776; FMA4:       # BB#0:
777; FMA4-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
778; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
779; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
780; FMA4-NEXT:    retq
781;
782; AVX512-LABEL: test_v8f64_fneg_fmul:
783; AVX512:       # BB#0:
784; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
785; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
786; AVX512-NEXT:    retq
787  %m = fmul nsz <8 x double> %x, %y
788  %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
789  ret <8 x double> %n
790}
791
792define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
793; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
794; FMA:       # BB#0:
795; FMA-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
796; FMA-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
797; FMA-NEXT:    vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
798; FMA-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
799; FMA-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
800; FMA-NEXT:    retq
801;
802; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz:
803; FMA4:       # BB#0:
804; FMA4-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
805; FMA4-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
806; FMA4-NEXT:    vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
807; FMA4-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
808; FMA4-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
809; FMA4-NEXT:    retq
810;
811; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
812; AVX512:       # BB#0:
813; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
814; AVX512-NEXT:    vxorpd {{.*}}(%rip), %zmm0, %zmm0
815; AVX512-NEXT:    retq
816  %m = fmul <8 x double> %x, %y
817  %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
818  ret <8 x double> %n
819}
820
821attributes #0 = { "unsafe-fp-math"="true" }
822