• Home
  • Raw
  • Download

Lines Matching +full:win +full:- +full:llvm

1 … llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %…
2 …lc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileChec…
3 …c < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s -…
4 … llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck …
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --chec…
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-pr…
10 ; CHECK-LABEL: test_x86_fma_vfmadd_ss:
11 ; CHECK-NEXT: # BB#0:
13 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
14 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
15 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0
17 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
19 ; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
21 ; CHECK-NEXT: retq
22 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
27 ; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
28 ; CHECK-NEXT: # BB#0:
30 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
31 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
32 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
34 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
35 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
37 ; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0
38 ; CHECK-NEXT: retq
39 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
42 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
45 ; CHECK-LABEL: test_x86_fma_vfmadd_sd:
46 ; CHECK-NEXT: # BB#0:
48 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
49 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
50 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
52 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
54 ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
56 ; CHECK-NEXT: retq
57 …%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> …
62 ; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
63 ; CHECK-NEXT: # BB#0:
65 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
66 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
67 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
69 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
70 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
72 ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0
74 ; CHECK-NEXT: retq
75 …%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> …
78 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
81 ; CHECK-LABEL: test_x86_fma_vfmadd_ps:
82 ; CHECK-NEXT: # BB#0:
84 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
85 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
86 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0
88 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
90 ; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
92 ; CHECK-NEXT: retq
93 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
96 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
99 ; CHECK-LABEL: test_x86_fma_vfmadd_pd:
100 ; CHECK-NEXT: # BB#0:
102 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
103 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
104 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0
106 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
108 ; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
110 ; CHECK-NEXT: retq
111 …%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> …
114 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
117 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
118 ; CHECK-NEXT: # BB#0:
120 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
121 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
122 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0
124 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
126 ; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
128 ; CHECK-NEXT: retq
129 …%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> …
132 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
135 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
136 ; CHECK-NEXT: # BB#0:
138 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
139 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
140 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0
142 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
144 ; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
146 ; CHECK-NEXT: retq
147 …%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x doub…
150 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
154 ; CHECK-LABEL: test_x86_fma_vfmsub_ss:
155 ; CHECK-NEXT: # BB#0:
157 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
158 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
159 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0
161 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
163 ; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
165 ; CHECK-NEXT: retq
166 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
171 ; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
172 ; CHECK-NEXT: # BB#0:
174 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
175 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
176 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
178 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
179 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
181 ; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0
183 ; CHECK-NEXT: retq
184 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
187 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
190 ; CHECK-LABEL: test_x86_fma_vfmsub_sd:
191 ; CHECK-NEXT: # BB#0:
193 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
194 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
195 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
197 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
199 ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
201 ; CHECK-NEXT: retq
202 …%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> …
207 ; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
208 ; CHECK-NEXT: # BB#0:
210 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
211 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
212 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
214 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
215 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
217 ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0
219 ; CHECK-NEXT: retq
220 …%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> …
223 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
226 ; CHECK-LABEL: test_x86_fma_vfmsub_ps:
227 ; CHECK-NEXT: # BB#0:
229 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
230 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
231 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0
233 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
235 ; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
237 ; CHECK-NEXT: retq
238 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
241 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
244 ; CHECK-LABEL: test_x86_fma_vfmsub_pd:
245 ; CHECK-NEXT: # BB#0:
247 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
248 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
249 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0
251 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
253 ; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
255 ; CHECK-NEXT: retq
256 …%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> …
259 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
262 ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
263 ; CHECK-NEXT: # BB#0:
265 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
266 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
267 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0
269 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
271 ; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
273 ; CHECK-NEXT: retq
274 …%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> …
277 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
280 ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
281 ; CHECK-NEXT: # BB#0:
283 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
284 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
285 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0
287 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
289 ; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
291 ; CHECK-NEXT: retq
292 …%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x doub…
295 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
299 ; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
300 ; CHECK-NEXT: # BB#0:
302 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
303 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
304 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
306 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
308 ; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
310 ; CHECK-NEXT: retq
311 …%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
316 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
317 ; CHECK-NEXT: # BB#0:
319 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
320 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
321 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
323 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
324 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
326 ; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0
328 ; CHECK-NEXT: retq
329 …%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
332 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
335 ; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
336 ; CHECK-NEXT: # BB#0:
338 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
339 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
340 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
342 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
344 ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
346 ; CHECK-NEXT: retq
347 …%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double>…
352 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
353 ; CHECK-NEXT: # BB#0:
355 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
356 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
357 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
359 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
360 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
362 ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0
364 ; CHECK-NEXT: retq
365 …%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double>…
368 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
371 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
372 ; CHECK-NEXT: # BB#0:
374 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
375 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
376 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0
378 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
380 ; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
382 ; CHECK-NEXT: retq
383 …%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
386 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
389 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
390 ; CHECK-NEXT: # BB#0:
392 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
393 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
394 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0
396 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
398 ; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
400 ; CHECK-NEXT: retq
401 …%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double>…
404 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
407 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
408 ; CHECK-NEXT: # BB#0:
410 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
411 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
412 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0
414 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
416 ; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
418 ; CHECK-NEXT: retq
419 …%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>…
422 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
425 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
426 ; CHECK-NEXT: # BB#0:
428 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
429 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
430 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0
432 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
434 ; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
436 ; CHECK-NEXT: retq
437 …%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x dou…
440 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
444 ; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
445 ; CHECK-NEXT: # BB#0:
447 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
448 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
449 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
451 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
453 ; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
455 ; CHECK-NEXT: retq
456 …%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
461 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
462 ; CHECK-NEXT: # BB#0:
464 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
465 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
466 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
468 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
469 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
471 ; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0
473 ; CHECK-NEXT: retq
474 …%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
477 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
480 ; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
481 ; CHECK-NEXT: # BB#0:
483 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
484 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
485 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
487 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
489 ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
491 ; CHECK-NEXT: retq
492 …%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double>…
497 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
498 ; CHECK-NEXT: # BB#0:
500 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
501 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
502 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
504 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
505 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
507 ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0
509 ; CHECK-NEXT: retq
510 …%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double>…
513 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
516 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
517 ; CHECK-NEXT: # BB#0:
519 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
520 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
521 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0
523 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
525 ; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
527 ; CHECK-NEXT: retq
528 …%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
531 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
534 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
535 ; CHECK-NEXT: # BB#0:
537 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
538 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
539 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0
541 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
543 ; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
545 ; CHECK-NEXT: retq
546 …%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double>…
549 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
552 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
553 ; CHECK-NEXT: # BB#0:
555 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
556 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
557 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0
559 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
561 ; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
563 ; CHECK-NEXT: retq
564 …%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>…
567 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
570 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
571 ; CHECK-NEXT: # BB#0:
573 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
574 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
575 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0
577 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
579 ; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
581 ; CHECK-NEXT: retq
582 …%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x dou…
585 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
589 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
590 ; CHECK-NEXT: # BB#0:
592 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
593 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
594 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0
596 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
598 ; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
600 ; CHECK-NEXT: retq
601 …%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %…
604 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
607 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
608 ; CHECK-NEXT: # BB#0:
610 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
611 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
612 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0
614 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
616 ; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
618 ; CHECK-NEXT: retq
619 …%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x doubl…
622 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
625 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
626 ; CHECK-NEXT: # BB#0:
628 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
629 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
630 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0
632 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
634 ; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
636 ; CHECK-NEXT: retq
637 …%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x floa…
640 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
643 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
644 ; CHECK-NEXT: # BB#0:
646 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
647 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
648 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0
650 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
652 ; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
654 ; CHECK-NEXT: retq
655 …%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x d…
658 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
662 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
663 ; CHECK-NEXT: # BB#0:
665 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
666 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
667 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0
669 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
671 ; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
673 ; CHECK-NEXT: retq
674 …%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %…
677 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
680 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
681 ; CHECK-NEXT: # BB#0:
683 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
684 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
685 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0
687 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
689 ; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
691 ; CHECK-NEXT: retq
692 …%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x doubl…
695 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
698 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
699 ; CHECK-NEXT: # BB#0:
701 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
702 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
703 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0
705 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
707 ; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
709 ; CHECK-NEXT: retq
710 …%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x floa…
713 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
716 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
717 ; CHECK-NEXT: # BB#0:
719 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
720 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
721 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0
723 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
725 ; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
727 ; CHECK-NEXT: retq
728 …%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x d…
731 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)