• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
3
4declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
5
6define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
7; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
8; CHECK:       ## BB#0:
9; CHECK-NEXT:    kmovb %edi, %k1
10; CHECK-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
11; CHECK-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0
12; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
13; CHECK-NEXT:    retq
14  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
15  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
16  %res2 = add <8 x i64> %res, %res1
17  ret <8 x i64> %res2
18}
19
20declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
21
22define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
23; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
24; CHECK:       ## BB#0:
25; CHECK-NEXT:    kmovb %edi, %k1
26; CHECK-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
27; CHECK-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
28; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
29; CHECK-NEXT:    retq
30  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
31  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
32  %res2 = add <8 x i64> %res, %res1
33  ret <8 x i64> %res2
34}
35
36declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
37
38define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
39; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
40; CHECK:       ## BB#0:
41; CHECK-NEXT:    kmovb %edi, %k1
42; CHECK-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
43; CHECK-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0
44; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
45; CHECK-NEXT:    retq
46  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
47  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
48  %res2 = add <8 x i64> %res, %res1
49  ret <8 x i64> %res2
50}
51
52declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
53
54define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
55; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
56; CHECK:       ## BB#0:
57; CHECK-NEXT:    kmovb %edi, %k1
58; CHECK-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
59; CHECK-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0
60; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
61; CHECK-NEXT:    retq
62  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
63  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
64  %res2 = add <8 x i64> %res, %res1
65  ret <8 x i64> %res2
66}
67
68declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
69
70define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
71; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
72; CHECK:       ## BB#0:
73; CHECK-NEXT:    kmovb %edi, %k1
74; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1}
75; CHECK-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0
76; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
77; CHECK-NEXT:    retq
78  %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
79  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
80  %res2 = fadd <8 x double> %res, %res1
81  ret <8 x double> %res2
82}
83
84declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
85
86define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
87; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
88; CHECK:       ## BB#0:
89; CHECK-NEXT:    kmovb %edi, %k1
90; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1}
91; CHECK-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0
92; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
93; CHECK-NEXT:    retq
94  %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
95  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
96  %res2 = fadd <8 x float> %res, %res1
97  ret <8 x float> %res2
98}
99
100declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
101
102define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
103; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
104; CHECK:       ## BB#0:
105; CHECK-NEXT:    kmovb %edi, %k1
106; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1}
107; CHECK-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0
108; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
109; CHECK-NEXT:    retq
110  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
111  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
112  %res2 = add <8 x i64> %res, %res1
113  ret <8 x i64> %res2
114}
115
116declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
117
118define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
119; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
120; CHECK:       ## BB#0:
121; CHECK-NEXT:    kmovb %edi, %k1
122; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1}
123; CHECK-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0
124; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
125; CHECK-NEXT:    retq
126  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
127  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
128  %res2 = add <8 x i64> %res, %res1
129  ret <8 x i64> %res2
130}
131
132declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
133
134define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
135; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
136; CHECK:       ## BB#0:
137; CHECK-NEXT:    kmovb %edi, %k1
138; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1}
139; CHECK-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0
140; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
141; CHECK-NEXT:    retq
142  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
143  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
144  %res2 = add <8 x i64> %res, %res1
145  ret <8 x i64> %res2
146}
147
148declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
149
150define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
151; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
152; CHECK:       ## BB#0:
153; CHECK-NEXT:    kmovb %edi, %k1
154; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1}
155; CHECK-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0
156; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
157; CHECK-NEXT:    retq
158  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
159  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
160  %res2 = add <8 x i64> %res, %res1
161  ret <8 x i64> %res2
162}
163
164declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
165
166define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
167; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
168; CHECK:       ## BB#0:
169; CHECK-NEXT:    kmovb %edi, %k1
170; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1}
171; CHECK-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
172; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
173; CHECK-NEXT:    retq
174  %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
175  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
176  %res2 = fadd <8 x double> %res, %res1
177  ret <8 x double> %res2
178}
179
180declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
181
182define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
183; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
184; CHECK:       ## BB#0:
185; CHECK-NEXT:    kmovb %edi, %k1
186; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1}
187; CHECK-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
188; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
189; CHECK-NEXT:    retq
190  %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
191  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
192  %res2 = fadd <8 x float> %res, %res1
193  ret <8 x float> %res2
194}
195
196declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
197
198define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
199; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_512:
200; CHECK:       ## BB#0:
201; CHECK-NEXT:    kmovb %edi, %k1
202; CHECK-NEXT:    vreducepd $8, %zmm0, %zmm1 {%k1}
203; CHECK-NEXT:    vreducepd $4, {sae}, %zmm0, %zmm0
204; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
205; CHECK-NEXT:    retq
206  %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
207  %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
208  %res2 = fadd <8 x double> %res, %res1
209  ret <8 x double> %res2
210}
211
212declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
213
214define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
215; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_512:
216; CHECK:       ## BB#0:
217; CHECK-NEXT:    kmovw %edi, %k1
218; CHECK-NEXT:    vreduceps $44, {sae}, %zmm0, %zmm1 {%k1}
219; CHECK-NEXT:    vreduceps $11, %zmm0, %zmm0
220; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
221; CHECK-NEXT:    retq
222  %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
223  %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
224  %res2 = fadd <16 x float> %res, %res1
225  ret <16 x float> %res2
226}
227
228declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
229
230define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
231; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_512:
232; CHECK:       ## BB#0:
233; CHECK-NEXT:    kmovb %edi, %k1
234; CHECK-NEXT:    vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1}
235; CHECK-NEXT:    vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0
236; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
237; CHECK-NEXT:    retq
238  %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
239  %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
240  %res2 = fadd <8 x double> %res, %res1
241  ret <8 x double> %res2
242}
243
244declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
245
246define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
247; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_512:
248; CHECK:       ## BB#0:
249; CHECK-NEXT:    kmovw %edi, %k1
250; CHECK-NEXT:    vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1}
251; CHECK-NEXT:    vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0
252; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
253; CHECK-NEXT:    retq
254  %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
255  %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
256  %res2 = fadd <16 x float> %res, %res1
257  ret <16 x float> %res2
258}
259
260declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
261
262define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
263; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ss:
264; CHECK:       ## BB#0:
265; CHECK-NEXT:    andl $1, %edi
266; CHECK-NEXT:    kmovw %edi, %k1
267; CHECK-NEXT:    vreducess $4, %xmm1, %xmm0, %xmm2 {%k1}
268; CHECK-NEXT:    vreducess $4, {sae}, %xmm1, %xmm0, %xmm0
269; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
270; CHECK-NEXT:    retq
271  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
272  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
273  %res2 = fadd <4 x float> %res, %res1
274  ret <4 x float> %res2
275}
276
277declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
278
279define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
280; CHECK-LABEL: test_int_x86_avx512_mask_range_ss:
281; CHECK:       ## BB#0:
282; CHECK-NEXT:    andl $1, %edi
283; CHECK-NEXT:    kmovw %edi, %k1
284; CHECK-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
285; CHECK-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm0
286; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
287; CHECK-NEXT:    retq
288  %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
289  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
290  %res2 = fadd <4 x float> %res, %res1
291  ret <4 x float> %res2
292}
293
294declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
295
296define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
297; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sd:
298; CHECK:       ## BB#0:
299; CHECK-NEXT:    andl $1, %edi
300; CHECK-NEXT:    kmovw %edi, %k1
301; CHECK-NEXT:    vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1}
302; CHECK-NEXT:    vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0
303; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
304; CHECK-NEXT:    retq
305  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
306  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
307  %res2 = fadd <2 x double> %res, %res1
308  ret <2 x double> %res2
309}
310
311declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
312
313define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
314; CHECK-LABEL: test_int_x86_avx512_mask_range_sd:
315; CHECK:       ## BB#0:
316; CHECK-NEXT:    andl $1, %edi
317; CHECK-NEXT:    kmovw %edi, %k1
318; CHECK-NEXT:    vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
319; CHECK-NEXT:    vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
320; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
321; CHECK-NEXT:    retq
322  %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
323  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
324  %res2 = fadd <2 x double> %res, %res1
325  ret <2 x double> %res2
326}
327
328
329declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
330
331define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
332; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
333; CHECK:       ## BB#0:
334; CHECK-NEXT:    kmovb %edi, %k1
335; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
336; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
337; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0
338; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
339; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
340; CHECK-NEXT:    retq
341  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
342  %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
343  %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
344  %res3 = fadd <2 x double> %res, %res1
345  %res4 = fadd <2 x double> %res2, %res3
346  ret <2 x double> %res4
347}
348
349declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
350
351define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
352; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
353; CHECK:       ## BB#0:
354; CHECK-NEXT:    kmovb %edi, %k1
355; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
356; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
357; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0
358; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
359; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
360; CHECK-NEXT:    retq
361  %res  = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
362  %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
363  %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
364  %res3 = fadd <8 x float> %res, %res1
365  %res4 = fadd <8 x float> %res2, %res3
366  ret <8 x float> %res4
367}
368
369declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
370
371define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
372; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
373; CHECK:       ## BB#0:
374; CHECK-NEXT:    kmovw %edi, %k1
375; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
376; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
377; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
378; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm1
379; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
380; CHECK-NEXT:    retq
381  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
382  %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
383  %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
384  %res3 = fadd <16 x float> %res, %res1
385  %res4 = fadd <16 x float> %res2, %res3
386  ret <16 x float> %res4
387}
388
389declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
390
391define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
392; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
393; CHECK:       ## BB#0:
394; CHECK-NEXT:    kmovb %edi, %k1
395; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
396; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
397; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
398; CHECK-NEXT:    vaddpd %zmm3, %zmm2, %zmm1
399; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
400; CHECK-NEXT:    retq
401  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
402  %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
403  %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
404  %res3 = fadd <8 x double> %res, %res1
405  %res4 = fadd <8 x double> %res3, %res2
406  ret <8 x double> %res4
407}
408
409declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
410
411define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
412; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
413; CHECK:       ## BB#0:
414; CHECK-NEXT:    kmovw %edi, %k1
415; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
416; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
417; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0
418; CHECK-NEXT:    vpaddd %zmm3, %zmm2, %zmm1
419; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
420; CHECK-NEXT:    retq
421  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
422  %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
423  %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
424  %res3 = add <16 x i32> %res, %res1
425  %res4 = add <16 x i32> %res3, %res2
426  ret <16 x i32> %res4
427}
428
429declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
430
431define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
432; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
433; CHECK:       ## BB#0:
434; CHECK-NEXT:    kmovb %edi, %k1
435; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
436; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
437; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
438; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm1
439; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
440; CHECK-NEXT:    retq
441  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
442  %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
443  %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
444  %res3 = add <8 x i64> %res, %res1
445  %res4 = add <8 x i64> %res2, %res3
446  ret <8 x i64> %res4
447}
448
449declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
450
451define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
452; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
453; CHECK:       ## BB#0:
454; CHECK-NEXT:    kmovb %edi, %k1
455; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k0 {%k1}
456; CHECK-NEXT:    kmovb %k0, %ecx
457; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0
458; CHECK-NEXT:    kmovb %k0, %eax
459; CHECK-NEXT:    addb %cl, %al
460; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
461; CHECK-NEXT:    retq
462    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
463    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
464    %res2 = add i8 %res, %res1
465    ret i8 %res2
466}
467declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
468
469define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
470; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
471; CHECK:       ## BB#0:
472; CHECK-NEXT:    kmovw %edi, %k1
473; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1}
474; CHECK-NEXT:    kmovw %k0, %ecx
475; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0
476; CHECK-NEXT:    kmovw %k0, %eax
477; CHECK-NEXT:    addl %ecx, %eax
478; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
479; CHECK-NEXT:    retq
480    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
481    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
482    %res2 = add i16 %res, %res1
483    ret i16 %res2
484}
485
486declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
487
488define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
489; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd:
490; CHECK:       ## BB#0:
491; CHECK-NEXT:    andl $1, %edi
492; CHECK-NEXT:    kmovw %edi, %k1
493; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1}
494; CHECK-NEXT:    kmovw %k0, %eax
495; CHECK-NEXT:    testb %al, %al
496; CHECK-NEXT:    je LBB28_2
497; CHECK-NEXT:  ## BB#1:
498; CHECK-NEXT:    movb $-1, %al
499; CHECK-NEXT:  LBB28_2:
500; CHECK-NEXT:    vfpclasssd $4, %xmm0, %k0
501; CHECK-NEXT:    kmovw %k0, %ecx
502; CHECK-NEXT:    testb %cl, %cl
503; CHECK-NEXT:    je LBB28_4
504; CHECK-NEXT:  ## BB#3:
505; CHECK-NEXT:    movb $-1, %cl
506; CHECK-NEXT:  LBB28_4:
507; CHECK-NEXT:    addb %cl, %al
508; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %AX<kill>
509; CHECK-NEXT:    retq
510  %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
511  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
512  %res2 = add i8 %res, %res1
513  ret i8 %res2
514}
515
516declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
517
518define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
519; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss:
520; CHECK:       ## BB#0:
521; CHECK-NEXT:    andl $1, %edi
522; CHECK-NEXT:    kmovw %edi, %k1
523; CHECK-NEXT:    vfpclassss $4, %xmm0, %k0 {%k1}
524; CHECK-NEXT:    kmovw %k0, %eax
525; CHECK-NEXT:    testb %al, %al
526; CHECK-NEXT:    je LBB29_2
527; CHECK-NEXT:  ## BB#1:
528; CHECK-NEXT:    movb $-1, %al
529; CHECK-NEXT:  LBB29_2:
530; CHECK-NEXT:    vfpclassss $4, %xmm0, %k0
531; CHECK-NEXT:    kmovw %k0, %ecx
532; CHECK-NEXT:    testb %cl, %cl
533; CHECK-NEXT:    je LBB29_4
534; CHECK-NEXT:  ## BB#3:
535; CHECK-NEXT:    movb $-1, %cl
536; CHECK-NEXT:  LBB29_4:
537; CHECK-NEXT:    addb %cl, %al
538; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %AX<kill>
539; CHECK-NEXT:    retq
540  %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
541  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
542  %res2 = add i8 %res, %res1
543  ret i8 %res2
544}
545
546declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
547
548define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
549; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
550; CHECK:       ## BB#0:
551; CHECK-NEXT:    kmovw %edi, %k1
552; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm1 {%k1}
553; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
554; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm0
555; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
556; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
557; CHECK-NEXT:    retq
558  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
559  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
560  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
561  %res3 = fadd <16 x float> %res, %res1
562  %res4 = fadd <16 x float> %res3, %res2
563  ret <16 x float> %res4
564}
565
566declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
567
568define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
569; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
570; CHECK:       ## BB#0:
571; CHECK-NEXT:    kmovw %edi, %k1
572; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm1 {%k1}
573; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
574; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm0
575; CHECK-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
576; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
577; CHECK-NEXT:    retq
578  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
579  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
580  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
581  %res3 = add <16 x i32> %res, %res1
582  %res4 = add <16 x i32> %res3, %res2
583  ret <16 x i32> %res4
584}
585
586declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
587
588define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
589; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
590; CHECK:       ## BB#0:
591; CHECK-NEXT:    vpmovd2m %zmm0, %k0
592; CHECK-NEXT:    kmovw %k0, %eax
593; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
594; CHECK-NEXT:    retq
595  %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
596  ret i16 %res
597}
598
599declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
600
601define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
602; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
603; CHECK:       ## BB#0:
604; CHECK-NEXT:    vpmovq2m %zmm0, %k0
605; CHECK-NEXT:    kmovb %k0, %eax
606; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
607; CHECK-NEXT:    retq
608  %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
609  ret i8 %res
610}
611
612declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16)
613
614define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) {
615; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512:
616; CHECK:       ## BB#0:
617; CHECK-NEXT:    kmovw %edi, %k0
618; CHECK-NEXT:    vpmovm2d %k0, %zmm0
619; CHECK-NEXT:    retq
620  %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0)
621  ret <16 x i32> %res
622}
623
624declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8)
625
626define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) {
627; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512:
628; CHECK:       ## BB#0:
629; CHECK-NEXT:    kmovb %edi, %k0
630; CHECK-NEXT:    vpmovm2q %k0, %zmm0
631; CHECK-NEXT:    retq
632  %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0)
633  ret <8 x i64> %res
634}
635
636declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16)
637
638define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) {
639; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
640; CHECK:       ## BB#0:
641; CHECK-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
642; CHECK-NEXT:    kmovw %edi, %k1
643; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
644; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
645; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
646; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
647; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
648; CHECK-NEXT:    retq
649
650  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1)
651  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask)
652  %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
653  %res4 = fadd <16 x float> %res1, %res2
654  %res5 = fadd <16 x float> %res3, %res4
655  ret <16 x float> %res5
656}
657
658declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8)
659
660define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
661; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
662; CHECK:       ## BB#0:
663; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
664; CHECK-NEXT:    kmovb %edi, %k1
665; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
666; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
667; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
668; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
669; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
670; CHECK-NEXT:    retq
671
672  %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1)
673  %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask)
674  %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
675  %res4 = fadd <8 x double> %res1, %res2
676  %res5 = fadd <8 x double> %res3, %res4
677  ret <8 x double> %res5
678}
679
680declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16)
681
682define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) {
683; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
684; CHECK:       ## BB#0:
685; CHECK-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
686; CHECK-NEXT:    kmovw %edi, %k1
687; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
688; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
689; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
690; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
691; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
692; CHECK-NEXT:    retq
693
694  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1)
695  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask)
696  %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
697  %res4 = add <16 x i32> %res1, %res2
698  %res5 = add <16 x i32> %res3, %res4
699  ret <16 x i32> %res5
700}
701
702declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8)
703
704define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
705; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
706; CHECK:       ## BB#0:
707; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
708; CHECK-NEXT:    kmovb %edi, %k1
709; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
710; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
711; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
712; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
713; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
714; CHECK-NEXT:    retq
715
716  %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1)
717  %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)
718  %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
719  %res4 = add <8 x i64> %res1, %res2
720  %res5 = add <8 x i64> %res3, %res4
721  ret <8 x i64> %res5
722}
723