• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
2
3declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
4declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
5
6define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
7  ; CHECK-LABEL: test_x86_vfnmadd_ps_z
8  ; CHECK: vfnmadd213ps %zmm
9  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
10  ret <16 x float> %res
11}
12declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
13
14define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
15  ; CHECK-LABEL: test_mask_vfnmadd_ps
16  ; CHECK: vfnmadd213ps %zmm
17  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
18  ret <16 x float> %res
19}
20
21define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
22  ; CHECK-LABEL: test_x86_vfnmadd_pd_z
23  ; CHECK: vfnmadd213pd %zmm
24  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
25  ret <8 x double> %res
26}
27declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
28
29define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
30  ; CHECK-LABEL: test_mask_vfnmadd_pd
31  ; CHECK: vfnmadd213pd %zmm
32  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
33  ret <8 x double> %res
34}
35
36define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
37  ; CHECK-LABEL: test_x86_vfnmsubps_z
38  ; CHECK: vfnmsub213ps %zmm
39  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
40  ret <16 x float> %res
41}
42declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
43
44define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
45  ; CHECK-LABEL: test_mask_vfnmsub_ps
46  ; CHECK: vfnmsub213ps %zmm
47  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
48  ret <16 x float> %res
49}
50
51define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
52  ; CHECK-LABEL: test_x86_vfnmsubpd_z
53  ; CHECK: vfnmsub213pd %zmm
54  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
55  ret <8 x double> %res
56}
57declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
58
59define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
60  ; CHECK-LABEL: test_mask_vfnmsub_pd
61  ; CHECK: vfnmsub213pd %zmm
62  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
63  ret <8 x double> %res
64}
65
66define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
67  ; CHECK-LABEL: test_x86_vfmaddsubps_z
68  ; CHECK: vfmaddsub213ps %zmm
69  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
70  ret <16 x float> %res
71}
72
73define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
74; CHECK-LABEL: test_mask_fmaddsub_ps:
75; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
76  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
77  ret <16 x float> %res
78}
79
80declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
81
82define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
83  ; CHECK-LABEL: test_x86_vfmaddsubpd_z
84  ; CHECK: vfmaddsub213pd %zmm
85  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
86  ret <8 x double> %res
87}
88declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
89
90define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
91  ; CHECK-LABEL: test_mask_vfmaddsub_pd
92  ; CHECK: vfmaddsub213pd %zmm
93  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
94  ret <8 x double> %res
95}
96
97define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
98; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
99; CHECK:       ## BB#0:
100; CHECK-NEXT:    movzbl %dil, %eax
101; CHECK-NEXT:    kmovw %eax, %k1
102; CHECK-NEXT:    vmovaps %zmm0, %zmm3
103; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
104; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
105; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
106; CHECK-NEXT:    retq
107  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
108  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
109  %res2 = fadd <8 x double> %res, %res1
110  ret <8 x double> %res2
111}
112
113declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
114
115define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
116; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
117; CHECK:       ## BB#0:
118; CHECK-NEXT:    movzbl %dil, %eax
119; CHECK-NEXT:    kmovw %eax, %k1
120; CHECK-NEXT:    vmovaps %zmm2, %zmm3
121; CHECK-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
122; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
123; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
124; CHECK-NEXT:    retq
125  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
126  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
127  %res2 = fadd <8 x double> %res, %res1
128  ret <8 x double> %res2
129}
130
131declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
132
133define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
134; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
135; CHECK:       ## BB#0:
136; CHECK-NEXT:    movzbl %dil, %eax
137; CHECK-NEXT:    kmovw %eax, %k1
138; CHECK-NEXT:    vmovaps %zmm0, %zmm3
139; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
140; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
141; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
142; CHECK-NEXT:    retq
143  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
144  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
145  %res2 = fadd <8 x double> %res, %res1
146  ret <8 x double> %res2
147}
148
149define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
150; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
151; CHECK:       ## BB#0:
152; CHECK-NEXT:    kmovw %edi, %k1
153; CHECK-NEXT:    vmovaps %zmm0, %zmm3
154; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
155; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
156; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
157; CHECK-NEXT:    retq
158  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
159  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
160  %res2 = fadd <16 x float> %res, %res1
161  ret <16 x float> %res2
162}
163
164declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
165
166define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
167; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
168; CHECK:       ## BB#0:
169; CHECK-NEXT:    kmovw %edi, %k1
170; CHECK-NEXT:    vmovaps %zmm2, %zmm3
171; CHECK-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
172; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
173; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
174; CHECK-NEXT:    retq
175  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
176  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
177  %res2 = fadd <16 x float> %res, %res1
178  ret <16 x float> %res2
179}
180
181declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
182
183define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
184; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
185; CHECK:       ## BB#0:
186; CHECK-NEXT:    kmovw %edi, %k1
187; CHECK-NEXT:    vmovaps %zmm0, %zmm3
188; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
189; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
190; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
191; CHECK-NEXT:    retq
192  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
193  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
194  %res2 = fadd <16 x float> %res, %res1
195  ret <16 x float> %res2
196}
197
198declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
199
200define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
201; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
202; CHECK:       ## BB#0:
203; CHECK-NEXT:    movzbl %dil, %eax
204; CHECK-NEXT:    kmovw %eax, %k1
205; CHECK-NEXT:    vmovaps %zmm2, %zmm3
206; CHECK-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
207; CHECK-NEXT:    vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
208; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
209; CHECK-NEXT:    retq
210  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
211  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
212  %res2 = fadd <8 x double> %res, %res1
213  ret <8 x double> %res2
214}
215
216declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
217
218define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
219; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
220; CHECK:       ## BB#0:
221; CHECK-NEXT:    kmovw %edi, %k1
222; CHECK-NEXT:    vmovaps %zmm2, %zmm3
223; CHECK-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
224; CHECK-NEXT:    vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
225; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
226; CHECK-NEXT:    retq
227  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
228  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
229  %res2 = fadd <16 x float> %res, %res1
230  ret <16 x float> %res2
231}
232
233define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
234  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
235  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
236  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
237  ret <16 x float> %res
238}
239
240define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
241  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
242  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
243  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
244  ret <16 x float> %res
245}
246
247define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
248  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
249  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
250  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
251  ret <16 x float> %res
252}
253
254define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
255  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
256  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
257  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
258  ret <16 x float> %res
259}
260
261define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
262  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
263  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
264  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
265  ret <16 x float> %res
266}
267
268define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
269  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
270  ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
271  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
272  ret <16 x float> %res
273}
274
275define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
276  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
277  ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
278  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
279  ret <16 x float> %res
280}
281
282define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
283  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
284  ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
285  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
286  ret <16 x float> %res
287}
288
289define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
290  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
291  ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
292  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
293  ret <16 x float> %res
294}
295
296define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
297  ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
298  ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
299  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
300  ret <16 x float> %res
301}
302
303declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
304
305define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
306; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
307; CHECK:       ## BB#0:
308; CHECK-NEXT:    movzbl %dil, %eax
309; CHECK-NEXT:    kmovw %eax, %k1
310; CHECK-NEXT:    vmovaps %zmm2, %zmm3
311; CHECK-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
312; CHECK-NEXT:    vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
313; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
314; CHECK-NEXT:    retq
315  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
316  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
317  %res2 = fadd <8 x double> %res, %res1
318  ret <8 x double> %res2
319}
320
321declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
322
323define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
324; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
325; CHECK:       ## BB#0:
326; CHECK-NEXT:    kmovw %edi, %k1
327; CHECK-NEXT:    vmovaps %zmm2, %zmm3
328; CHECK-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
329; CHECK-NEXT:    vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
330; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
331; CHECK-NEXT:    retq
332  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
333  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
334  %res2 = fadd <16 x float> %res, %res1
335  ret <16 x float> %res2
336}
337
338define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
339  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
340  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
341  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
342  ret <8 x double> %res
343}
344
345define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
346  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
347  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
348  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
349  ret <8 x double> %res
350}
351
352define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
353  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
354  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
355  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
356  ret <8 x double> %res
357}
358
359define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
360  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
361  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
362  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
363  ret <8 x double> %res
364}
365
366define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
367  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
368  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
369  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
370  ret <8 x double> %res
371}
372
373define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
374  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
375  ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
376  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
377  ret <8 x double> %res
378}
379
380define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
381  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
382  ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
383  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
384  ret <8 x double> %res
385}
386
387define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
388  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
389  ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
390  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
391  ret <8 x double> %res
392}
393
394define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
395  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
396  ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
397  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
398  ret <8 x double> %res
399}
400
401define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
402  ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
403  ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
404  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
405  ret <8 x double> %res
406}
407
408define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
409; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
410; CHECK:       ## BB#0:
411; CHECK-NEXT:    movzbl %dil, %eax
412; CHECK-NEXT:    kmovw %eax, %k1
413; CHECK-NEXT:    vmovaps %zmm0, %zmm3
414; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
415; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
416; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
417; CHECK-NEXT:    retq
418  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
419  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
420  %res2 = fadd <8 x double> %res, %res1
421  ret <8 x double> %res2
422}
423
424declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
425
426define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
427; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
428; CHECK:       ## BB#0:
429; CHECK-NEXT:    movzbl %dil, %eax
430; CHECK-NEXT:    kmovw %eax, %k1
431; CHECK-NEXT:    vmovaps %zmm2, %zmm3
432; CHECK-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
433; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
434; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
435; CHECK-NEXT:    retq
436  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
437  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
438  %res2 = fadd <8 x double> %res, %res1
439  ret <8 x double> %res2
440}
441
442declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
443
444define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
445; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
446; CHECK:       ## BB#0:
447; CHECK-NEXT:    movzbl %dil, %eax
448; CHECK-NEXT:    kmovw %eax, %k1
449; CHECK-NEXT:    vmovaps %zmm0, %zmm3
450; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
451; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
452; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
453; CHECK-NEXT:    retq
454  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
455  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
456  %res2 = fadd <8 x double> %res, %res1
457  ret <8 x double> %res2
458}
459
460define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
461; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
462; CHECK:       ## BB#0:
463; CHECK-NEXT:    kmovw %edi, %k1
464; CHECK-NEXT:    vmovaps %zmm0, %zmm3
465; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
466; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
467; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
468; CHECK-NEXT:    retq
469  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
470  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
471  %res2 = fadd <16 x float> %res, %res1
472  ret <16 x float> %res2
473}
474
475declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
476
477define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
478; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
479; CHECK:       ## BB#0:
480; CHECK-NEXT:    kmovw %edi, %k1
481; CHECK-NEXT:    vmovaps %zmm2, %zmm3
482; CHECK-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
483; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
484; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
485; CHECK-NEXT:    retq
486  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
487  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
488  %res2 = fadd <16 x float> %res, %res1
489  ret <16 x float> %res2
490}
491
492declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
493
494define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
495; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
496; CHECK:       ## BB#0:
497; CHECK-NEXT:    kmovw %edi, %k1
498; CHECK-NEXT:    vmovaps %zmm0, %zmm3
499; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
500; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
501; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
502; CHECK-NEXT:    retq
503  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
504  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
505  %res2 = fadd <16 x float> %res, %res1
506  ret <16 x float> %res2
507}
508
509
510define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
511  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
512  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
513  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
514  ret <8 x double> %res
515}
516
517define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
518  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
519  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
520  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
521  ret <8 x double> %res
522}
523
524define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
525  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
526  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
527  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
528  ret <8 x double> %res
529}
530
531define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
532  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
533  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
534  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
535  ret <8 x double> %res
536}
537
538define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
539  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
540  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
541  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
542  ret <8 x double> %res
543}
544
545define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
546  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
547  ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
548  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
549  ret <8 x double> %res
550}
551
552define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
553  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
554  ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
555  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
556  ret <8 x double> %res
557}
558
559define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
560  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
561  ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
562  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
563  ret <8 x double> %res
564}
565
566define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
567  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
568  ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
569  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
570  ret <8 x double> %res
571}
572
573define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
574  ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
575  ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
576  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
577  ret <8 x double> %res
578}
579
580define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
581; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
582; CHECK:       ## BB#0:
583; CHECK-NEXT:    movzbl %dil, %eax
584; CHECK-NEXT:    kmovw %eax, %k1
585; CHECK-NEXT:    vmovaps %zmm0, %zmm3
586; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
587; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
588; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
589; CHECK-NEXT:    retq
590  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
591  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
592  %res2 = fadd <8 x double> %res, %res1
593  ret <8 x double> %res2
594}
595
596declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
597
598define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
599; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
600; CHECK:       ## BB#0:
601; CHECK-NEXT:    movzbl %dil, %eax
602; CHECK-NEXT:    kmovw %eax, %k1
603; CHECK-NEXT:    vmovaps %zmm2, %zmm3
604; CHECK-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
605; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
606; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
607; CHECK-NEXT:    retq
608  %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
609  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
610  %res2 = fadd <8 x double> %res, %res1
611  ret <8 x double> %res2
612}
613
614define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
615; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
616; CHECK:       ## BB#0:
617; CHECK-NEXT:    kmovw %edi, %k1
618; CHECK-NEXT:    vmovaps %zmm0, %zmm3
619; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
620; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
621; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
622; CHECK-NEXT:    retq
623  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
624  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
625  %res2 = fadd <16 x float> %res, %res1
626  ret <16 x float> %res2
627}
628
629declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
630
631define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
632; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
633; CHECK:       ## BB#0:
634; CHECK-NEXT:    kmovw %edi, %k1
635; CHECK-NEXT:    vmovaps %zmm2, %zmm3
636; CHECK-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
637; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
638; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
639; CHECK-NEXT:    retq
640  %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
641  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
642  %res2 = fadd <16 x float> %res, %res1
643  ret <16 x float> %res2
644}
645
646define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
647; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
648; CHECK:       ## BB#0:
649; CHECK-NEXT:    movzbl %dil, %eax
650; CHECK-NEXT:    kmovw %eax, %k1
651; CHECK-NEXT:    vmovaps %zmm0, %zmm3
652; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
653; CHECK-NEXT:    vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
654; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
655; CHECK-NEXT:    retq
656  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
657  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
658  %res2 = fadd <8 x double> %res, %res1
659  ret <8 x double> %res2
660}
661
662define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
663; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
664; CHECK:       ## BB#0:
665; CHECK-NEXT:    kmovw %edi, %k1
666; CHECK-NEXT:    vmovaps %zmm0, %zmm3
667; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
668; CHECK-NEXT:    vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
669; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
670; CHECK-NEXT:    retq
671  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
672  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
673  %res2 = fadd <16 x float> %res, %res1
674  ret <16 x float> %res2
675}
676