• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
2; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s
3; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
4
5attributes #0 = { nounwind }
6
7declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
8define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
9; CHECK-LABEL: test_x86_fmadd_baa_ss:
10; CHECK:       # BB#0:
11; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
12; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
13; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0
14; CHECK-NEXT: retq
15  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
16  ret <4 x float> %res
17}
18
19define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
20; CHECK-LABEL: test_x86_fmadd_aba_ss:
21; CHECK:       # BB#0:
22; CHECK-NEXT: vmovaps	(%rcx), %xmm0
23; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0
24; CHECK-NEXT: retq
25  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
26  ret <4 x float> %res
27}
28
29define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
30; CHECK-LABEL: test_x86_fmadd_bba_ss:
31; CHECK:       # BB#0:
32; CHECK-NEXT: vmovaps	(%rdx), %xmm0
33; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0
34; CHECK-NEXT: retq
35  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
36  ret <4 x float> %res
37}
38
39declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
40define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
41; CHECK-LABEL: test_x86_fmadd_baa_ps:
42; CHECK:       # BB#0:
43; CHECK-NEXT: vmovaps	(%rcx), %xmm0
44; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0
45; CHECK-NEXT: retq
46  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
47  ret <4 x float> %res
48}
49
50define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
51; CHECK-LABEL: test_x86_fmadd_aba_ps:
52; CHECK:       # BB#0:
53; CHECK-NEXT: vmovaps	(%rcx), %xmm0
54; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0
55; CHECK-NEXT: retq
56  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
57  ret <4 x float> %res
58}
59
60define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
61; CHECK-LABEL: test_x86_fmadd_bba_ps:
62; CHECK:       # BB#0:
63; CHECK-NEXT: vmovaps	(%rdx), %xmm0
64; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0
65; CHECK-NEXT: retq
66  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
67  ret <4 x float> %res
68}
69
70declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
71define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
72; CHECK-LABEL: test_x86_fmadd_baa_ps_y:
73; CHECK:       # BB#0:
74; CHECK-NEXT: vmovaps	(%rcx), %ymm0
75; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0
76; CHECK-NEXT: retq
77  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
78  ret <8 x float> %res
79}
80
81define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
82; CHECK-LABEL: test_x86_fmadd_aba_ps_y:
83; CHECK:       # BB#0:
84; CHECK-NEXT: vmovaps	(%rcx), %ymm0
85; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0
86; CHECK-NEXT: retq
87  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
88  ret <8 x float> %res
89}
90
91define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
92; CHECK-LABEL: test_x86_fmadd_bba_ps_y:
93; CHECK:       # BB#0:
94; CHECK-NEXT: vmovaps	(%rdx), %ymm0
95; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0
96; CHECK-NEXT: retq
97  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
98  ret <8 x float> %res
99}
100
101declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
102define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
103; CHECK-LABEL: test_x86_fmadd_baa_sd:
104; CHECK:       # BB#0:
105; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
106; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
107; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0
108; CHECK-NEXT: retq
109  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
110  ret <2 x double> %res
111}
112
113define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
114; CHECK-LABEL: test_x86_fmadd_aba_sd:
115; CHECK:       # BB#0:
116; CHECK-NEXT: vmovapd	(%rcx), %xmm0
117; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
118; CHECK-NEXT: retq
119  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
120  ret <2 x double> %res
121}
122
123define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
124; CHECK-LABEL: test_x86_fmadd_bba_sd:
125; CHECK:       # BB#0:
126; CHECK-NEXT: vmovapd	(%rdx), %xmm0
127; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
128; CHECK-NEXT: retq
129  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
130  ret <2 x double> %res
131}
132
133declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
134define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
135; CHECK-LABEL: test_x86_fmadd_baa_pd:
136; CHECK:       # BB#0:
137; CHECK-NEXT: vmovapd	(%rcx), %xmm0
138; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0
139; CHECK-NEXT: retq
140  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
141  ret <2 x double> %res
142}
143
144define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
145; CHECK-LABEL: test_x86_fmadd_aba_pd:
146; CHECK:       # BB#0:
147; CHECK-NEXT: vmovapd	(%rcx), %xmm0
148; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0
149; CHECK-NEXT: retq
150  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
151  ret <2 x double> %res
152}
153
154define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
155; CHECK-LABEL: test_x86_fmadd_bba_pd:
156; CHECK:       # BB#0:
157; CHECK-NEXT: vmovapd	(%rdx), %xmm0
158; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0
159; CHECK-NEXT: retq
160  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
161  ret <2 x double> %res
162}
163
164declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
165define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
166; CHECK-LABEL: test_x86_fmadd_baa_pd_y:
167; CHECK:       # BB#0:
168; CHECK-NEXT: vmovapd	(%rcx), %ymm0
169; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0
170; CHECK-NEXT: retq
171  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
172  ret <4 x double> %res
173}
174
175define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
176; CHECK-LABEL: test_x86_fmadd_aba_pd_y:
177; CHECK:       # BB#0:
178; CHECK-NEXT: vmovapd	(%rcx), %ymm0
179; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0
180; CHECK-NEXT: retq
181  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
182  ret <4 x double> %res
183}
184
185define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
186; CHECK-LABEL: test_x86_fmadd_bba_pd_y:
187; CHECK:       # BB#0:
188; CHECK-NEXT: vmovapd	(%rdx), %ymm0
189; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0
190; CHECK-NEXT: retq
191  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
192  ret <4 x double> %res
193}
194
195
196declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
197define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
198; CHECK-LABEL: test_x86_fnmadd_baa_ss:
199; CHECK:       # BB#0:
200; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
201; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
202; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0
203; CHECK-NEXT: retq
204  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
205  ret <4 x float> %res
206}
207
208define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
209; CHECK-LABEL: test_x86_fnmadd_aba_ss:
210; CHECK:       # BB#0:
211; CHECK-NEXT: vmovaps	(%rcx), %xmm0
212; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0
213; CHECK-NEXT: retq
214  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
215  ret <4 x float> %res
216}
217
218define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
219; CHECK-LABEL: test_x86_fnmadd_bba_ss:
220; CHECK:       # BB#0:
221; CHECK-NEXT: vmovaps	(%rdx), %xmm0
222; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0
223; CHECK-NEXT: retq
224  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
225  ret <4 x float> %res
226}
227
228declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
229define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
230; CHECK-LABEL: test_x86_fnmadd_baa_ps:
231; CHECK:       # BB#0:
232; CHECK-NEXT: vmovaps	(%rcx), %xmm0
233; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0
234; CHECK-NEXT: retq
235  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
236  ret <4 x float> %res
237}
238
239define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
240; CHECK-LABEL: test_x86_fnmadd_aba_ps:
241; CHECK:       # BB#0:
242; CHECK-NEXT: vmovaps	(%rcx), %xmm0
243; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0
244; CHECK-NEXT: retq
245  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
246  ret <4 x float> %res
247}
248
249define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
250; CHECK-LABEL: test_x86_fnmadd_bba_ps:
251; CHECK:       # BB#0:
252; CHECK-NEXT: vmovaps	(%rdx), %xmm0
253; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0
254; CHECK-NEXT: retq
255  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
256  ret <4 x float> %res
257}
258
259declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
260define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
261; CHECK-LABEL: test_x86_fnmadd_baa_ps_y:
262; CHECK:       # BB#0:
263; CHECK-NEXT: vmovaps	(%rcx), %ymm0
264; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0
265; CHECK-NEXT: retq
266  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
267  ret <8 x float> %res
268}
269
270define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
271; CHECK-LABEL: test_x86_fnmadd_aba_ps_y:
272; CHECK:       # BB#0:
273; CHECK-NEXT: vmovaps	(%rcx), %ymm0
274; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0
275; CHECK-NEXT: retq
276  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
277  ret <8 x float> %res
278}
279
280define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
281; CHECK-LABEL: test_x86_fnmadd_bba_ps_y:
282; CHECK:       # BB#0:
283; CHECK-NEXT: vmovaps	(%rdx), %ymm0
284; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0
285; CHECK-NEXT: retq
286  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
287  ret <8 x float> %res
288}
289
290declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
291define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
292; CHECK-LABEL: test_x86_fnmadd_baa_sd:
293; CHECK:       # BB#0:
294; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
295; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
296; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0
297; CHECK-NEXT: retq
298  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
299  ret <2 x double> %res
300}
301
302define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
303; CHECK-LABEL: test_x86_fnmadd_aba_sd:
304; CHECK:       # BB#0:
305; CHECK-NEXT: vmovapd	(%rcx), %xmm0
306; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
307; CHECK-NEXT: retq
308  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
309  ret <2 x double> %res
310}
311
312define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
313; CHECK-LABEL: test_x86_fnmadd_bba_sd:
314; CHECK:       # BB#0:
315; CHECK-NEXT: vmovapd	(%rdx), %xmm0
316; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
317; CHECK-NEXT: retq
318  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
319  ret <2 x double> %res
320}
321
322declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
323define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
324; CHECK-LABEL: test_x86_fnmadd_baa_pd:
325; CHECK:       # BB#0:
326; CHECK-NEXT: vmovapd	(%rcx), %xmm0
327; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0
328; CHECK-NEXT: retq
329  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
330  ret <2 x double> %res
331}
332
333define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
334; CHECK-LABEL: test_x86_fnmadd_aba_pd:
335; CHECK:       # BB#0:
336; CHECK-NEXT: vmovapd	(%rcx), %xmm0
337; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0
338; CHECK-NEXT: retq
339  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
340  ret <2 x double> %res
341}
342
343define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
344; CHECK-LABEL: test_x86_fnmadd_bba_pd:
345; CHECK:       # BB#0:
346; CHECK-NEXT: vmovapd	(%rdx), %xmm0
347; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0
348; CHECK-NEXT: retq
349  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
350  ret <2 x double> %res
351}
352
353declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
354define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
355; CHECK-LABEL: test_x86_fnmadd_baa_pd_y:
356; CHECK:       # BB#0:
357; CHECK-NEXT: vmovapd	(%rcx), %ymm0
358; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0
359; CHECK-NEXT: retq
360  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
361  ret <4 x double> %res
362}
363
364define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
365; CHECK-LABEL: test_x86_fnmadd_aba_pd_y:
366; CHECK:       # BB#0:
367; CHECK-NEXT: vmovapd	(%rcx), %ymm0
368; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0
369; CHECK-NEXT: retq
370  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
371  ret <4 x double> %res
372}
373
374define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
375; CHECK-LABEL: test_x86_fnmadd_bba_pd_y:
376; CHECK:       # BB#0:
377; CHECK-NEXT: vmovapd	(%rdx), %ymm0
378; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0
379; CHECK-NEXT: retq
380  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
381  ret <4 x double> %res
382}
383
384
385declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
386define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
387; CHECK-LABEL: test_x86_fmsub_baa_ss:
388; CHECK:       # BB#0:
389; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
390; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
391; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0
392; CHECK-NEXT: retq
393  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
394  ret <4 x float> %res
395}
396
397define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
398; CHECK-LABEL: test_x86_fmsub_aba_ss:
399; CHECK:       # BB#0:
400; CHECK-NEXT: vmovaps	(%rcx), %xmm0
401; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0
402; CHECK-NEXT: retq
403  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
404  ret <4 x float> %res
405}
406
407define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
408; CHECK-LABEL: test_x86_fmsub_bba_ss:
409; CHECK:       # BB#0:
410; CHECK-NEXT: vmovaps	(%rdx), %xmm0
411; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0
412; CHECK-NEXT: retq
413  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
414  ret <4 x float> %res
415}
416
417declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
418define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
419; CHECK-LABEL: test_x86_fmsub_baa_ps:
420; CHECK:       # BB#0:
421; CHECK-NEXT: vmovaps	(%rcx), %xmm0
422; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0
423; CHECK-NEXT: retq
424  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
425  ret <4 x float> %res
426}
427
428define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
429; CHECK-LABEL: test_x86_fmsub_aba_ps:
430; CHECK:       # BB#0:
431; CHECK-NEXT: vmovaps	(%rcx), %xmm0
432; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0
433; CHECK-NEXT: retq
434  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
435  ret <4 x float> %res
436}
437
438define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
439; CHECK-LABEL: test_x86_fmsub_bba_ps:
440; CHECK:       # BB#0:
441; CHECK-NEXT: vmovaps	(%rdx), %xmm0
442; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0
443; CHECK-NEXT: retq
444  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
445  ret <4 x float> %res
446}
447
448declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
449define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
450; CHECK-LABEL: test_x86_fmsub_baa_ps_y:
451; CHECK:       # BB#0:
452; CHECK-NEXT: vmovaps	(%rcx), %ymm0
453; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0
454; CHECK-NEXT: retq
455  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
456  ret <8 x float> %res
457}
458
459define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
460; CHECK-LABEL: test_x86_fmsub_aba_ps_y:
461; CHECK:       # BB#0:
462; CHECK-NEXT: vmovaps	(%rcx), %ymm0
463; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0
464; CHECK-NEXT: retq
465  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
466  ret <8 x float> %res
467}
468
469define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
470; CHECK-LABEL: test_x86_fmsub_bba_ps_y:
471; CHECK:       # BB#0:
472; CHECK-NEXT: vmovaps	(%rdx), %ymm0
473; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0
474; CHECK-NEXT: retq
475  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
476  ret <8 x float> %res
477}
478
479declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
480define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
481; CHECK-LABEL: test_x86_fmsub_baa_sd:
482; CHECK:       # BB#0:
483; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
484; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
485; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0
486; CHECK-NEXT: retq
487  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
488  ret <2 x double> %res
489}
490
491define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
492; CHECK-LABEL: test_x86_fmsub_aba_sd:
493; CHECK:       # BB#0:
494; CHECK-NEXT: vmovapd	(%rcx), %xmm0
495; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
496; CHECK-NEXT: retq
497  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
498  ret <2 x double> %res
499}
500
501define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
502; CHECK-LABEL: test_x86_fmsub_bba_sd:
503; CHECK:       # BB#0:
504; CHECK-NEXT: vmovapd	(%rdx), %xmm0
505; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
506; CHECK-NEXT: retq
507  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
508  ret <2 x double> %res
509}
510
511declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
512define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
513; CHECK-LABEL: test_x86_fmsub_baa_pd:
514; CHECK:       # BB#0:
515; CHECK-NEXT: vmovapd	(%rcx), %xmm0
516; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0
517; CHECK-NEXT: retq
518  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
519  ret <2 x double> %res
520}
521
522define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
523; CHECK-LABEL: test_x86_fmsub_aba_pd:
524; CHECK:       # BB#0:
525; CHECK-NEXT: vmovapd	(%rcx), %xmm0
526; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0
527; CHECK-NEXT: retq
528  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
529  ret <2 x double> %res
530}
531
532define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
533; CHECK-LABEL: test_x86_fmsub_bba_pd:
534; CHECK:       # BB#0:
535; CHECK-NEXT: vmovapd	(%rdx), %xmm0
536; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0
537; CHECK-NEXT: retq
538  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
539  ret <2 x double> %res
540}
541
542declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
543define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
544; CHECK-LABEL: test_x86_fmsub_baa_pd_y:
545; CHECK:       # BB#0:
546; CHECK-NEXT: vmovapd	(%rcx), %ymm0
547; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0
548; CHECK-NEXT: retq
549  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
550  ret <4 x double> %res
551}
552
553define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
554; CHECK-LABEL: test_x86_fmsub_aba_pd_y:
555; CHECK:       # BB#0:
556; CHECK-NEXT: vmovapd	(%rcx), %ymm0
557; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0
558; CHECK-NEXT: retq
559  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
560  ret <4 x double> %res
561}
562
563define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
564; CHECK-LABEL: test_x86_fmsub_bba_pd_y:
565; CHECK:       # BB#0:
566; CHECK-NEXT: vmovapd	(%rdx), %ymm0
567; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0
568; CHECK-NEXT: retq
569  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
570  ret <4 x double> %res
571}
572
573
574declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
575define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
576; CHECK-LABEL: test_x86_fnmsub_baa_ss:
577; CHECK:       # BB#0:
578; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
579; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
580; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0
581; CHECK-NEXT: retq
582  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
583  ret <4 x float> %res
584}
585
586define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
587; CHECK-LABEL: test_x86_fnmsub_aba_ss:
588; CHECK:       # BB#0:
589; CHECK-NEXT: vmovaps	(%rcx), %xmm0
590; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0
591; CHECK-NEXT: retq
592  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
593  ret <4 x float> %res
594}
595
596define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
597; CHECK-LABEL: test_x86_fnmsub_bba_ss:
598; CHECK:       # BB#0:
599; CHECK-NEXT: vmovaps	(%rdx), %xmm0
600; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0
601; CHECK-NEXT: retq
602  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
603  ret <4 x float> %res
604}
605
606declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
607define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
608; CHECK-LABEL: test_x86_fnmsub_baa_ps:
609; CHECK:       # BB#0:
610; CHECK-NEXT: vmovaps	(%rcx), %xmm0
611; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0
612; CHECK-NEXT: retq
613  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
614  ret <4 x float> %res
615}
616
617define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
618; CHECK-LABEL: test_x86_fnmsub_aba_ps:
619; CHECK:       # BB#0:
620; CHECK-NEXT: vmovaps	(%rcx), %xmm0
621; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0
622; CHECK-NEXT: retq
623  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
624  ret <4 x float> %res
625}
626
627define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
628; CHECK-LABEL: test_x86_fnmsub_bba_ps:
629; CHECK:       # BB#0:
630; CHECK-NEXT: vmovaps	(%rdx), %xmm0
631; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0
632; CHECK-NEXT: retq
633  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
634  ret <4 x float> %res
635}
636
637declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
638define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
639; CHECK-LABEL: test_x86_fnmsub_baa_ps_y:
640; CHECK:       # BB#0:
641; CHECK-NEXT: vmovaps	(%rcx), %ymm0
642; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0
643; CHECK-NEXT: retq
644  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
645  ret <8 x float> %res
646}
647
648define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
649; CHECK-LABEL: test_x86_fnmsub_aba_ps_y:
650; CHECK:       # BB#0:
651; CHECK-NEXT: vmovaps	(%rcx), %ymm0
652; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0
653; CHECK-NEXT: retq
654  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
655  ret <8 x float> %res
656}
657
658define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
659; CHECK-LABEL: test_x86_fnmsub_bba_ps_y:
660; CHECK:       # BB#0:
661; CHECK-NEXT: vmovaps	(%rdx), %ymm0
662; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0
663; CHECK-NEXT: retq
664  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
665  ret <8 x float> %res
666}
667
668declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
669define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
670; CHECK-LABEL: test_x86_fnmsub_baa_sd:
671; CHECK:       # BB#0:
672; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
673; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
674; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0
675; CHECK-NEXT: retq
676  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
677  ret <2 x double> %res
678}
679
680define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
681; CHECK-LABEL: test_x86_fnmsub_aba_sd:
682; CHECK:       # BB#0:
683; CHECK-NEXT: vmovapd	(%rcx), %xmm0
684; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
685; CHECK-NEXT: retq
686  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
687  ret <2 x double> %res
688}
689
690define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
691; CHECK-LABEL: test_x86_fnmsub_bba_sd:
692; CHECK:       # BB#0:
693; CHECK-NEXT: vmovapd	(%rdx), %xmm0
694; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
695; CHECK-NEXT: retq
696  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
697  ret <2 x double> %res
698}
699
700declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
701define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
702; CHECK-LABEL: test_x86_fnmsub_baa_pd:
703; CHECK:       # BB#0:
704; CHECK-NEXT: vmovapd	(%rcx), %xmm0
705; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0
706; CHECK-NEXT: retq
707  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
708  ret <2 x double> %res
709}
710
711define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
712; CHECK-LABEL: test_x86_fnmsub_aba_pd:
713; CHECK:       # BB#0:
714; CHECK-NEXT: vmovapd	(%rcx), %xmm0
715; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0
716; CHECK-NEXT: retq
717  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
718  ret <2 x double> %res
719}
720
721define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
722; CHECK-LABEL: test_x86_fnmsub_bba_pd:
723; CHECK:       # BB#0:
724; CHECK-NEXT: vmovapd	(%rdx), %xmm0
725; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0
726; CHECK-NEXT: retq
727  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
728  ret <2 x double> %res
729}
730
731declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
732define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
733; CHECK-LABEL: test_x86_fnmsub_baa_pd_y:
734; CHECK:       # BB#0:
735; CHECK-NEXT: vmovapd	(%rcx), %ymm0
736; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0
737; CHECK-NEXT: retq
738  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
739  ret <4 x double> %res
740}
741
742define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
743; CHECK-LABEL: test_x86_fnmsub_aba_pd_y:
744; CHECK:       # BB#0:
745; CHECK-NEXT: vmovapd	(%rcx), %ymm0
746; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0
747; CHECK-NEXT: retq
748  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
749  ret <4 x double> %res
750}
751
752define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
753; CHECK-LABEL: test_x86_fnmsub_bba_pd_y:
754; CHECK:       # BB#0:
755; CHECK-NEXT: vmovapd	(%rdx), %ymm0
756; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0
757; CHECK-NEXT: retq
758  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
759  ret <4 x double> %res
760}
761
762