• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
3; RUN: llc < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
7
8; VFMADD
9define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
10; CHECK-LABEL: test_x86_fma_vfmadd_ss:
11; CHECK-NEXT:  # BB#0:
12;
13; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
14; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
15; CHECK-FMA-WIN-NEXT: vfmadd132ss     (%rdx), %xmm1, %xmm0
16;
17; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
18;
19; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
20;
21; CHECK-NEXT: retq
22  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
23  ret <4 x float> %res
24}
25
26define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
27; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
28; CHECK-NEXT:  # BB#0:
29;
30; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
31; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
32; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
33;
34; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
35; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
36;
37; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0
38; CHECK-NEXT: retq
39  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
40  ret <4 x float> %res
41}
42declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
43
44define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
45; CHECK-LABEL: test_x86_fma_vfmadd_sd:
46; CHECK-NEXT:  # BB#0:
47;
48; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
49; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
50; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
51;
52; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
53;
54; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
55;
56; CHECK-NEXT: retq
57  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
58  ret <2 x double> %res
59}
60
61define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
62; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
63; CHECK-NEXT:  # BB#0:
64;
65; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
66; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
67; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
68;
69; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
70; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
71;
72; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0
73;
74; CHECK-NEXT: retq
75  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
76  ret <2 x double> %res
77}
78declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
79
80define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
81; CHECK-LABEL: test_x86_fma_vfmadd_ps:
82; CHECK-NEXT:  # BB#0:
83;
84; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
85; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
86; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0
87;
88; CHECK-FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
89;
90; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
91;
92; CHECK-NEXT: retq
93  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
94  ret <4 x float> %res
95}
96declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
97
98define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
99; CHECK-LABEL: test_x86_fma_vfmadd_pd:
100; CHECK-NEXT:  # BB#0:
101;
102; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
103; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
104; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0
105;
106; CHECK-FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
107;
108; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
109;
110; CHECK-NEXT: retq
111  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
112  ret <2 x double> %res
113}
114declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
115
116define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
117; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
118; CHECK-NEXT:  # BB#0:
119;
120; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
121; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
122; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0
123;
124; CHECK-FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
125;
126; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
127;
128; CHECK-NEXT: retq
129  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
130  ret <8 x float> %res
131}
132declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
133
134define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
135; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
136; CHECK-NEXT:  # BB#0:
137;
138; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
139; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
140; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0
141;
142; CHECK-FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
143;
144; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
145;
146; CHECK-NEXT: retq
147  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
148  ret <4 x double> %res
149}
150declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
151
152; VFMSUB
153define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
154; CHECK-LABEL: test_x86_fma_vfmsub_ss:
155; CHECK-NEXT:  # BB#0:
156;
157; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
158; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
159; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0
160;
161; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
162;
163; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
164;
165; CHECK-NEXT: retq
166  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
167  ret <4 x float> %res
168}
169
170define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
171; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
172; CHECK-NEXT:  # BB#0:
173;
174; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
175; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
176; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
177;
178; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
179; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
180;
181; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0
182;
183; CHECK-NEXT: retq
184  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
185  ret <4 x float> %res
186}
187declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
188
189define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
190; CHECK-LABEL: test_x86_fma_vfmsub_sd:
191; CHECK-NEXT:  # BB#0:
192;
193; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
194; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
195; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
196;
197; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
198;
199; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
200;
201; CHECK-NEXT: retq
202  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
203  ret <2 x double> %res
204}
205
206define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
207; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
208; CHECK-NEXT:  # BB#0:
209;
210; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
211; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
212; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
213;
214; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
215; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
216;
217; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0
218;
219; CHECK-NEXT: retq
220  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
221  ret <2 x double> %res
222}
223declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
224
225define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
226; CHECK-LABEL: test_x86_fma_vfmsub_ps:
227; CHECK-NEXT:  # BB#0:
228;
229; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
230; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
231; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0
232;
233; CHECK-FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
234;
235; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
236;
237; CHECK-NEXT: retq
238  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
239  ret <4 x float> %res
240}
241declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
242
243define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
244; CHECK-LABEL: test_x86_fma_vfmsub_pd:
245; CHECK-NEXT:  # BB#0:
246;
247; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
248; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
249; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0
250;
251; CHECK-FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
252;
253; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
254;
255; CHECK-NEXT: retq
256  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
257  ret <2 x double> %res
258}
259declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
260
261define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
262; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
263; CHECK-NEXT:  # BB#0:
264;
265; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
266; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
267; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0
268;
269; CHECK-FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
270;
271; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
272;
273; CHECK-NEXT: retq
274  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
275  ret <8 x float> %res
276}
277declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
278
279define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
280; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
281; CHECK-NEXT:  # BB#0:
282;
283; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
284; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
285; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0
286;
287; CHECK-FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
288;
289; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
290;
291; CHECK-NEXT: retq
292  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
293  ret <4 x double> %res
294}
295declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
296
297; VFNMADD
298define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
299; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
300; CHECK-NEXT:  # BB#0:
301;
302; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
303; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
304; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
305;
306; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
307;
308; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
309;
310; CHECK-NEXT: retq
311  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
312  ret <4 x float> %res
313}
314
315define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
316; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
317; CHECK-NEXT:  # BB#0:
318;
319; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
320; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
321; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
322;
323; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
324; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
325;
326; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0
327;
328; CHECK-NEXT: retq
329  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
330  ret <4 x float> %res
331}
332declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
333
334define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
335; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
336; CHECK-NEXT:  # BB#0:
337;
338; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
339; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
340; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
341;
342; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
343;
344; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
345;
346; CHECK-NEXT: retq
347  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
348  ret <2 x double> %res
349}
350
351define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
352; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
353; CHECK-NEXT:  # BB#0:
354;
355; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
356; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
357; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
358;
359; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
360; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
361;
362; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0
363;
364; CHECK-NEXT: retq
365  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
366  ret <2 x double> %res
367}
368declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
369
370define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
371; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
372; CHECK-NEXT:  # BB#0:
373;
374; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
375; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
376; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0
377;
378; CHECK-FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
379;
380; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
381;
382; CHECK-NEXT: retq
383  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
384  ret <4 x float> %res
385}
386declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
387
388define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
389; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
390; CHECK-NEXT:  # BB#0:
391;
392; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
393; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
394; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0
395;
396; CHECK-FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
397;
398; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
399;
400; CHECK-NEXT: retq
401  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
402  ret <2 x double> %res
403}
404declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
405
406define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
407; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
408; CHECK-NEXT:  # BB#0:
409;
410; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
411; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
412; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0
413;
414; CHECK-FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
415;
416; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
417;
418; CHECK-NEXT: retq
419  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
420  ret <8 x float> %res
421}
422declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
423
424define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
425; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
426; CHECK-NEXT:  # BB#0:
427;
428; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
429; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
430; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0
431;
432; CHECK-FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
433;
434; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
435;
436; CHECK-NEXT: retq
437  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
438  ret <4 x double> %res
439}
440declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
441
442; VFNMSUB
443define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
444; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
445; CHECK-NEXT:  # BB#0:
446;
447; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
448; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
449; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
450;
451; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
452;
453; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
454;
455; CHECK-NEXT: retq
456  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
457  ret <4 x float> %res
458}
459
460define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
461; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
462; CHECK-NEXT:  # BB#0:
463;
464; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
465; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
466; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
467;
468; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
469; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
470;
471; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0
472;
473; CHECK-NEXT: retq
474  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
475  ret <4 x float> %res
476}
477declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
478
479define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
480; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
481; CHECK-NEXT:  # BB#0:
482;
483; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
484; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
485; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
486;
487; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
488;
489; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
490;
491; CHECK-NEXT: retq
492  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
493  ret <2 x double> %res
494}
495
496define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
497; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
498; CHECK-NEXT:  # BB#0:
499;
500; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
501; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
502; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
503;
504; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
505; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
506;
507; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0
508;
509; CHECK-NEXT: retq
510  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
511  ret <2 x double> %res
512}
513declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
514
515define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
516; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
517; CHECK-NEXT:  # BB#0:
518;
519; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
520; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
521; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0
522;
523; CHECK-FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
524;
525; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
526;
527; CHECK-NEXT: retq
528  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
529  ret <4 x float> %res
530}
531declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
532
533define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
534; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
535; CHECK-NEXT:  # BB#0:
536;
537; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
538; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
539; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0
540;
541; CHECK-FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
542;
543; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
544;
545; CHECK-NEXT: retq
546  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
547  ret <2 x double> %res
548}
549declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
550
551define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
552; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
553; CHECK-NEXT:  # BB#0:
554;
555; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
556; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
557; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0
558;
559; CHECK-FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
560;
561; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
562;
563; CHECK-NEXT: retq
564  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
565  ret <8 x float> %res
566}
567declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
568
569define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
570; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
571; CHECK-NEXT:  # BB#0:
572;
573; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
574; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
575; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0
576;
577; CHECK-FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
578;
579; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
580;
581; CHECK-NEXT: retq
582  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
583  ret <4 x double> %res
584}
585declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
586
587; VFMADDSUB
588define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
589; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
590; CHECK-NEXT:  # BB#0:
591;
592; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
593; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
594; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0
595;
596; CHECK-FMA-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
597;
598; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
599;
600; CHECK-NEXT: retq
601  %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
602  ret <4 x float> %res
603}
604declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
605
606define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
607; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
608; CHECK-NEXT:  # BB#0:
609;
610; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
611; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
612; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0
613;
614; CHECK-FMA-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
615;
616; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
617;
618; CHECK-NEXT: retq
619  %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
620  ret <2 x double> %res
621}
622declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
623
624define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
625; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
626; CHECK-NEXT:  # BB#0:
627;
628; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
629; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
630; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0
631;
632; CHECK-FMA-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
633;
634; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
635;
636; CHECK-NEXT: retq
637  %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
638  ret <8 x float> %res
639}
640declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
641
642define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
643; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
644; CHECK-NEXT:  # BB#0:
645;
646; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
647; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
648; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0
649;
650; CHECK-FMA-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
651;
652; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
653;
654; CHECK-NEXT: retq
655  %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
656  ret <4 x double> %res
657}
658declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
659
660; VFMSUBADD
661define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
662; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
663; CHECK-NEXT:  # BB#0:
664;
665; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
666; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
667; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0
668;
669; CHECK-FMA-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0
670;
671; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
672;
673; CHECK-NEXT: retq
674  %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
675  ret <4 x float> %res
676}
677declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
678
679define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
680; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
681; CHECK-NEXT:  # BB#0:
682;
683; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
684; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
685; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0
686;
687; CHECK-FMA-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
688;
689; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
690;
691; CHECK-NEXT: retq
692  %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
693  ret <2 x double> %res
694}
695declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
696
697define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
698; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
699; CHECK-NEXT:  # BB#0:
700;
701; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
702; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
703; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0
704;
705; CHECK-FMA-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0
706;
707; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
708;
709; CHECK-NEXT: retq
710  %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
711  ret <8 x float> %res
712}
713declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
714
715define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
716; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
717; CHECK-NEXT:  # BB#0:
718;
719; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
720; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
721; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0
722;
723; CHECK-FMA-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0
724;
725; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
726;
727; CHECK-NEXT: retq
728  %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
729  ret <4 x double> %res
730}
731declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
732
733attributes #0 = { nounwind }
734