• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
3
4declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
5declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
6
7define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
8; CHECK-LABEL: test_x86_vfnmadd_ps_z:
9; CHECK:       ## BB#0:
10; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
11; CHECK-NEXT:    retq
12  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
13  ret <16 x float> %res
14}
15declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
16
17define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
18; CHECK-LABEL: test_mask_vfnmadd_ps:
19; CHECK:       ## BB#0:
20; CHECK-NEXT:    kmovw %edi, %k1
21; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1}
22; CHECK-NEXT:    retq
23  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
24  ret <16 x float> %res
25}
26
27define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
28; CHECK-LABEL: test_x86_vfnmadd_pd_z:
29; CHECK:       ## BB#0:
30; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
31; CHECK-NEXT:    retq
32  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
33  ret <8 x double> %res
34}
35declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
36
37define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
38; CHECK-LABEL: test_mask_vfnmadd_pd:
39; CHECK:       ## BB#0:
40; CHECK-NEXT:    kmovw %edi, %k1
41; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1}
42; CHECK-NEXT:    retq
43  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
44  ret <8 x double> %res
45}
46
47define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
48; CHECK-LABEL: test_x86_vfnmsubps_z:
49; CHECK:       ## BB#0:
50; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
51; CHECK-NEXT:    retq
52  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
53  ret <16 x float> %res
54}
55declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
56
57define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
58; CHECK-LABEL: test_mask_vfnmsub_ps:
59; CHECK:       ## BB#0:
60; CHECK-NEXT:    kmovw %edi, %k1
61; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1}
62; CHECK-NEXT:    retq
63  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
64  ret <16 x float> %res
65}
66
67define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
68; CHECK-LABEL: test_x86_vfnmsubpd_z:
69; CHECK:       ## BB#0:
70; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
71; CHECK-NEXT:    retq
72  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
73  ret <8 x double> %res
74}
75declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
76
77define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
78; CHECK-LABEL: test_mask_vfnmsub_pd:
79; CHECK:       ## BB#0:
80; CHECK-NEXT:    kmovw %edi, %k1
81; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1}
82; CHECK-NEXT:    retq
83  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
84  ret <8 x double> %res
85}
86
87define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
88; CHECK-LABEL: test_x86_vfmaddsubps_z:
89; CHECK:       ## BB#0:
90; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0
91; CHECK-NEXT:    retq
92  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
93  ret <16 x float> %res
94}
95
96define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
97; CHECK-LABEL: test_mask_fmaddsub_ps:
98; CHECK:       ## BB#0:
99; CHECK-NEXT:    kmovw %edi, %k1
100; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1}
101; CHECK-NEXT:    retq
102  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
103  ret <16 x float> %res
104}
105
106declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
107
108define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
109; CHECK-LABEL: test_x86_vfmaddsubpd_z:
110; CHECK:       ## BB#0:
111; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0
112; CHECK-NEXT:    retq
113  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
114  ret <8 x double> %res
115}
116declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
117
118define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
119; CHECK-LABEL: test_mask_vfmaddsub_pd:
120; CHECK:       ## BB#0:
121; CHECK-NEXT:    kmovw %edi, %k1
122; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1}
123; CHECK-NEXT:    retq
124  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
125  ret <8 x double> %res
126}
127
128define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
129; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
130; CHECK:       ## BB#0:
131; CHECK-NEXT:    kmovw %edi, %k1
132; CHECK-NEXT:    vmovaps %zmm0, %zmm3
133; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
134; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
135; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
136; CHECK-NEXT:    retq
137  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
138  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
139  %res2 = fadd <8 x double> %res, %res1
140  ret <8 x double> %res2
141}
142
143declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
144
145define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
146; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
147; CHECK:       ## BB#0:
148; CHECK-NEXT:    kmovw %edi, %k1
149; CHECK-NEXT:    vmovaps %zmm2, %zmm3
150; CHECK-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
151; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
152; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
153; CHECK-NEXT:    retq
154  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
155  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
156  %res2 = fadd <8 x double> %res, %res1
157  ret <8 x double> %res2
158}
159
160declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
161
162define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
163; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
164; CHECK:       ## BB#0:
165; CHECK-NEXT:    kmovw %edi, %k1
166; CHECK-NEXT:    vmovaps %zmm0, %zmm3
167; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
168; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
169; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
170; CHECK-NEXT:    retq
171  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
172  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
173  %res2 = fadd <8 x double> %res, %res1
174  ret <8 x double> %res2
175}
176
177define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
178; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
179; CHECK:       ## BB#0:
180; CHECK-NEXT:    kmovw %edi, %k1
181; CHECK-NEXT:    vmovaps %zmm0, %zmm3
182; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
183; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
184; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
185; CHECK-NEXT:    retq
186  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
187  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
188  %res2 = fadd <16 x float> %res, %res1
189  ret <16 x float> %res2
190}
191
192declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
193
194define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
195; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
196; CHECK:       ## BB#0:
197; CHECK-NEXT:    kmovw %edi, %k1
198; CHECK-NEXT:    vmovaps %zmm2, %zmm3
199; CHECK-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
200; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
201; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
202; CHECK-NEXT:    retq
203  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
204  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
205  %res2 = fadd <16 x float> %res, %res1
206  ret <16 x float> %res2
207}
208
209declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
210
211define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
212; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
213; CHECK:       ## BB#0:
214; CHECK-NEXT:    kmovw %edi, %k1
215; CHECK-NEXT:    vmovaps %zmm0, %zmm3
216; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
217; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
218; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
219; CHECK-NEXT:    retq
220  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
221  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
222  %res2 = fadd <16 x float> %res, %res1
223  ret <16 x float> %res2
224}
225
226declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
227
228define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
229; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
230; CHECK:       ## BB#0:
231; CHECK-NEXT:    kmovw %edi, %k1
232; CHECK-NEXT:    vmovaps %zmm2, %zmm3
233; CHECK-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
234; CHECK-NEXT:    vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
235; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
236; CHECK-NEXT:    retq
237  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
238  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
239  %res2 = fadd <8 x double> %res, %res1
240  ret <8 x double> %res2
241}
242
243declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
244
245define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
246; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
247; CHECK:       ## BB#0:
248; CHECK-NEXT:    kmovw %edi, %k1
249; CHECK-NEXT:    vmovaps %zmm2, %zmm3
250; CHECK-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
251; CHECK-NEXT:    vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
252; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
253; CHECK-NEXT:    retq
254  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
255  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
256  %res2 = fadd <16 x float> %res, %res1
257  ret <16 x float> %res2
258}
259
260define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
261; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
262; CHECK:       ## BB#0:
263; CHECK-NEXT:    kmovw %edi, %k1
264; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1}
265; CHECK-NEXT:    retq
266  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
267  ret <16 x float> %res
268}
269
270define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
271; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
272; CHECK:       ## BB#0:
273; CHECK-NEXT:    kmovw %edi, %k1
274; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1}
275; CHECK-NEXT:    retq
276  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
277  ret <16 x float> %res
278}
279
280define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
281; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
282; CHECK:       ## BB#0:
283; CHECK-NEXT:    kmovw %edi, %k1
284; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1}
285; CHECK-NEXT:    retq
286  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
287  ret <16 x float> %res
288}
289
290define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
291; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
292; CHECK:       ## BB#0:
293; CHECK-NEXT:    kmovw %edi, %k1
294; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1}
295; CHECK-NEXT:    retq
296  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
297  ret <16 x float> %res
298}
299
300define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
301; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
302; CHECK:       ## BB#0:
303; CHECK-NEXT:    kmovw %edi, %k1
304; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1}
305; CHECK-NEXT:    retq
306  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
307  ret <16 x float> %res
308}
309
310define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
311; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
312; CHECK:       ## BB#0:
313; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
314; CHECK-NEXT:    retq
315  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
316  ret <16 x float> %res
317}
318
319define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
320; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
321; CHECK:       ## BB#0:
322; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
323; CHECK-NEXT:    retq
324  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
325  ret <16 x float> %res
326}
327
328define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
329; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
330; CHECK:       ## BB#0:
331; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
332; CHECK-NEXT:    retq
333  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
334  ret <16 x float> %res
335}
336
337define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
338; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
339; CHECK:       ## BB#0:
340; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
341; CHECK-NEXT:    retq
342  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
343  ret <16 x float> %res
344}
345
346define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
347; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
348; CHECK:       ## BB#0:
349; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
350; CHECK-NEXT:    retq
351  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
352  ret <16 x float> %res
353}
354
355declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
356
357define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
358; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
359; CHECK:       ## BB#0:
360; CHECK-NEXT:    kmovw %edi, %k1
361; CHECK-NEXT:    vmovaps %zmm2, %zmm3
362; CHECK-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
363; CHECK-NEXT:    vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
364; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
365; CHECK-NEXT:    retq
366  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
367  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
368  %res2 = fadd <8 x double> %res, %res1
369  ret <8 x double> %res2
370}
371
372declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
373
374define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
375; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
376; CHECK:       ## BB#0:
377; CHECK-NEXT:    kmovw %edi, %k1
378; CHECK-NEXT:    vmovaps %zmm2, %zmm3
379; CHECK-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
380; CHECK-NEXT:    vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
381; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
382; CHECK-NEXT:    retq
383  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
384  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
385  %res2 = fadd <16 x float> %res, %res1
386  ret <16 x float> %res2
387}
388
389define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
390; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
391; CHECK:       ## BB#0:
392; CHECK-NEXT:    kmovw %edi, %k1
393; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1}
394; CHECK-NEXT:    retq
395  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
396  ret <8 x double> %res
397}
398
399define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
400; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
401; CHECK:       ## BB#0:
402; CHECK-NEXT:    kmovw %edi, %k1
403; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1}
404; CHECK-NEXT:    retq
405  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
406  ret <8 x double> %res
407}
408
409define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
410; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
411; CHECK:       ## BB#0:
412; CHECK-NEXT:    kmovw %edi, %k1
413; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1}
414; CHECK-NEXT:    retq
415  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
416  ret <8 x double> %res
417}
418
419define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
420; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
421; CHECK:       ## BB#0:
422; CHECK-NEXT:    kmovw %edi, %k1
423; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1}
424; CHECK-NEXT:    retq
425  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
426  ret <8 x double> %res
427}
428
429define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
430; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
431; CHECK:       ## BB#0:
432; CHECK-NEXT:    kmovw %edi, %k1
433; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1}
434; CHECK-NEXT:    retq
435  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
436  ret <8 x double> %res
437}
438
439define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
440; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
441; CHECK:       ## BB#0:
442; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
443; CHECK-NEXT:    retq
444  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
445  ret <8 x double> %res
446}
447
448define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
449; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
450; CHECK:       ## BB#0:
451; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
452; CHECK-NEXT:    retq
453  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
454  ret <8 x double> %res
455}
456
457define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
458; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
459; CHECK:       ## BB#0:
460; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
461; CHECK-NEXT:    retq
462  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
463  ret <8 x double> %res
464}
465
466define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
467; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
468; CHECK:       ## BB#0:
469; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
470; CHECK-NEXT:    retq
471  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
472  ret <8 x double> %res
473}
474
475define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
476; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
477; CHECK:       ## BB#0:
478; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
479; CHECK-NEXT:    retq
480  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
481  ret <8 x double> %res
482}
483
484define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
485; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
486; CHECK:       ## BB#0:
487; CHECK-NEXT:    kmovw %edi, %k1
488; CHECK-NEXT:    vmovaps %zmm0, %zmm3
489; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
490; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
491; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
492; CHECK-NEXT:    retq
493  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
494  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
495  %res2 = fadd <8 x double> %res, %res1
496  ret <8 x double> %res2
497}
498
499declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
500
501define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
502; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
503; CHECK:       ## BB#0:
504; CHECK-NEXT:    kmovw %edi, %k1
505; CHECK-NEXT:    vmovaps %zmm2, %zmm3
506; CHECK-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
507; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
508; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
509; CHECK-NEXT:    retq
510  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
511  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
512  %res2 = fadd <8 x double> %res, %res1
513  ret <8 x double> %res2
514}
515
516declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
517
518define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
519; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
520; CHECK:       ## BB#0:
521; CHECK-NEXT:    kmovw %edi, %k1
522; CHECK-NEXT:    vmovaps %zmm0, %zmm3
523; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
524; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
525; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
526; CHECK-NEXT:    retq
527  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
528  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
529  %res2 = fadd <8 x double> %res, %res1
530  ret <8 x double> %res2
531}
532
533define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
534; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
535; CHECK:       ## BB#0:
536; CHECK-NEXT:    kmovw %edi, %k1
537; CHECK-NEXT:    vmovaps %zmm0, %zmm3
538; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
539; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
540; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
541; CHECK-NEXT:    retq
542  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
543  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
544  %res2 = fadd <16 x float> %res, %res1
545  ret <16 x float> %res2
546}
547
548declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
549
550define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
551; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
552; CHECK:       ## BB#0:
553; CHECK-NEXT:    kmovw %edi, %k1
554; CHECK-NEXT:    vmovaps %zmm2, %zmm3
555; CHECK-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
556; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
557; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
558; CHECK-NEXT:    retq
559  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
560  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
561  %res2 = fadd <16 x float> %res, %res1
562  ret <16 x float> %res2
563}
564
565declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
566
567define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
568; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
569; CHECK:       ## BB#0:
570; CHECK-NEXT:    kmovw %edi, %k1
571; CHECK-NEXT:    vmovaps %zmm0, %zmm3
572; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
573; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
574; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
575; CHECK-NEXT:    retq
576  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
577  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
578  %res2 = fadd <16 x float> %res, %res1
579  ret <16 x float> %res2
580}
581
582
583define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
584; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
585; CHECK:       ## BB#0:
586; CHECK-NEXT:    kmovw %edi, %k1
587; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1}
588; CHECK-NEXT:    retq
589  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
590  ret <8 x double> %res
591}
592
593define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
594; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
595; CHECK:       ## BB#0:
596; CHECK-NEXT:    kmovw %edi, %k1
597; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1}
598; CHECK-NEXT:    retq
599  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
600  ret <8 x double> %res
601}
602
603define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
604; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
605; CHECK:       ## BB#0:
606; CHECK-NEXT:    kmovw %edi, %k1
607; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1}
608; CHECK-NEXT:    retq
609  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
610  ret <8 x double> %res
611}
612
613define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
614; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
615; CHECK:       ## BB#0:
616; CHECK-NEXT:    kmovw %edi, %k1
617; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1}
618; CHECK-NEXT:    retq
619  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
620  ret <8 x double> %res
621}
622
623define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
624; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
625; CHECK:       ## BB#0:
626; CHECK-NEXT:    kmovw %edi, %k1
627; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1}
628; CHECK-NEXT:    retq
629  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
630  ret <8 x double> %res
631}
632
633define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
634; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
635; CHECK:       ## BB#0:
636; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
637; CHECK-NEXT:    retq
638  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
639  ret <8 x double> %res
640}
641
642define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
643; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
644; CHECK:       ## BB#0:
645; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
646; CHECK-NEXT:    retq
647  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
648  ret <8 x double> %res
649}
650
651define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
652; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
653; CHECK:       ## BB#0:
654; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
655; CHECK-NEXT:    retq
656  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
657  ret <8 x double> %res
658}
659
660define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
661; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
662; CHECK:       ## BB#0:
663; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
664; CHECK-NEXT:    retq
665  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
666  ret <8 x double> %res
667}
668
669define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
670; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
671; CHECK:       ## BB#0:
672; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
673; CHECK-NEXT:    retq
674  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
675  ret <8 x double> %res
676}
677
678define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
679; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
680; CHECK:       ## BB#0:
681; CHECK-NEXT:    kmovw %edi, %k1
682; CHECK-NEXT:    vmovaps %zmm0, %zmm3
683; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
684; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
685; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
686; CHECK-NEXT:    retq
687  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
688  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
689  %res2 = fadd <8 x double> %res, %res1
690  ret <8 x double> %res2
691}
692
693declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
694
695define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
696; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
697; CHECK:       ## BB#0:
698; CHECK-NEXT:    kmovw %edi, %k1
699; CHECK-NEXT:    vmovaps %zmm2, %zmm3
700; CHECK-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
701; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
702; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
703; CHECK-NEXT:    retq
704  %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
705  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
706  %res2 = fadd <8 x double> %res, %res1
707  ret <8 x double> %res2
708}
709
710define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
711; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
712; CHECK:       ## BB#0:
713; CHECK-NEXT:    kmovw %edi, %k1
714; CHECK-NEXT:    vmovaps %zmm0, %zmm3
715; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
716; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
717; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
718; CHECK-NEXT:    retq
719  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
720  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
721  %res2 = fadd <16 x float> %res, %res1
722  ret <16 x float> %res2
723}
724
725declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
726
727define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
728; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
729; CHECK:       ## BB#0:
730; CHECK-NEXT:    kmovw %edi, %k1
731; CHECK-NEXT:    vmovaps %zmm2, %zmm3
732; CHECK-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
733; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
734; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
735; CHECK-NEXT:    retq
736  %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
737  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
738  %res2 = fadd <16 x float> %res, %res1
739  ret <16 x float> %res2
740}
741
742define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
743; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
744; CHECK:       ## BB#0:
745; CHECK-NEXT:    kmovw %edi, %k1
746; CHECK-NEXT:    vmovaps %zmm0, %zmm3
747; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
748; CHECK-NEXT:    vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
749; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
750; CHECK-NEXT:    retq
751  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
752  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
753  %res2 = fadd <8 x double> %res, %res1
754  ret <8 x double> %res2
755}
756
757define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
758; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
759; CHECK:       ## BB#0:
760; CHECK-NEXT:    kmovw %edi, %k1
761; CHECK-NEXT:    vmovaps %zmm0, %zmm3
762; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
763; CHECK-NEXT:    vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
764; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
765; CHECK-NEXT:    retq
766  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
767  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
768  %res2 = fadd <16 x float> %res, %res1
769  ret <16 x float> %res2
770}
771