• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
3
4declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
5
6define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
7; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
8; CHECK:       ## BB#0:
9; CHECK-NEXT:    kmovb %edi, %k1
10; CHECK-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
11; CHECK-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0
12; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
13; CHECK-NEXT:    retq
14  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
15  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
16  %res2 = add <8 x i64> %res, %res1
17  ret <8 x i64> %res2
18}
19
20declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
21
22define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
23; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
24; CHECK:       ## BB#0:
25; CHECK-NEXT:    kmovb %edi, %k1
26; CHECK-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
27; CHECK-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
28; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
29; CHECK-NEXT:    retq
30  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
31  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
32  %res2 = add <8 x i64> %res, %res1
33  ret <8 x i64> %res2
34}
35
36declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
37
38define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
39; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
40; CHECK:       ## BB#0:
41; CHECK-NEXT:    kmovb %edi, %k1
42; CHECK-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
43; CHECK-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0
44; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
45; CHECK-NEXT:    retq
46  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
47  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
48  %res2 = add <8 x i64> %res, %res1
49  ret <8 x i64> %res2
50}
51
52declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
53
54define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
55; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
56; CHECK:       ## BB#0:
57; CHECK-NEXT:    kmovb %edi, %k1
58; CHECK-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
59; CHECK-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0
60; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
61; CHECK-NEXT:    retq
62  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
63  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
64  %res2 = add <8 x i64> %res, %res1
65  ret <8 x i64> %res2
66}
67
68declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
69
70define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
71; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
72; CHECK:       ## BB#0:
73; CHECK-NEXT:    kmovb %edi, %k1
74; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1}
75; CHECK-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0
76; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
77; CHECK-NEXT:    retq
78  %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
79  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
80  %res2 = fadd <8 x double> %res, %res1
81  ret <8 x double> %res2
82}
83
84declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
85
86define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
87; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
88; CHECK:       ## BB#0:
89; CHECK-NEXT:    kmovb %edi, %k1
90; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1}
91; CHECK-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0
92; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
93; CHECK-NEXT:    retq
94  %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
95  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
96  %res2 = fadd <8 x float> %res, %res1
97  ret <8 x float> %res2
98}
99
100declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
101
102define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
103; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
104; CHECK:       ## BB#0:
105; CHECK-NEXT:    kmovb %edi, %k1
106; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1}
107; CHECK-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0
108; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
109; CHECK-NEXT:    retq
110  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
111  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
112  %res2 = add <8 x i64> %res, %res1
113  ret <8 x i64> %res2
114}
115
116declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
117
118define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
119; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
120; CHECK:       ## BB#0:
121; CHECK-NEXT:    kmovb %edi, %k1
122; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1}
123; CHECK-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0
124; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
125; CHECK-NEXT:    retq
126  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
127  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
128  %res2 = add <8 x i64> %res, %res1
129  ret <8 x i64> %res2
130}
131
132declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
133
134define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
135; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
136; CHECK:       ## BB#0:
137; CHECK-NEXT:    kmovb %edi, %k1
138; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1}
139; CHECK-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0
140; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
141; CHECK-NEXT:    retq
142  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
143  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
144  %res2 = add <8 x i64> %res, %res1
145  ret <8 x i64> %res2
146}
147
148declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
149
150define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
151; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
152; CHECK:       ## BB#0:
153; CHECK-NEXT:    kmovb %edi, %k1
154; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1}
155; CHECK-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0
156; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
157; CHECK-NEXT:    retq
158  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
159  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
160  %res2 = add <8 x i64> %res, %res1
161  ret <8 x i64> %res2
162}
163
164declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
165
166define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
167; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
168; CHECK:       ## BB#0:
169; CHECK-NEXT:    kmovb %edi, %k1
170; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1}
171; CHECK-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
172; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
173; CHECK-NEXT:    retq
174  %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
175  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
176  %res2 = fadd <8 x double> %res, %res1
177  ret <8 x double> %res2
178}
179
180declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
181
182define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
183; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
184; CHECK:       ## BB#0:
185; CHECK-NEXT:    kmovb %edi, %k1
186; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1}
187; CHECK-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
188; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
189; CHECK-NEXT:    retq
190  %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
191  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
192  %res2 = fadd <8 x float> %res, %res1
193  ret <8 x float> %res2
194}
195
196declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
197; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512
198; CHECK-NOT: call
199; CHECK: kmov
200; CHECK: vreducepd {{.*}}{%k1}
201; CHECK: vreducepd
202; CHECK: {sae}
203define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
204  %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
205  %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
206  %res2 = fadd <8 x double> %res, %res1
207  ret <8 x double> %res2
208}
209
210declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
211; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512
212; CHECK-NOT: call
213; CHECK: kmov
214; CHECK: vreduceps
215; CHECK: {sae}
216; CKECK: {%k1}
217; CHECK: vreduceps
218define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
219  %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
220  %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
221  %res2 = fadd <16 x float> %res, %res1
222  ret <16 x float> %res2
223}
224
225declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
226; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512
227; CHECK-NOT: call
228; CHECK: kmov
229; CHECK: vrangepd
230; CKECK: {%k1}
231; CHECK: vrangepd
232; CHECK: {sae}
233define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
234  %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
235  %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
236  %res2 = fadd <8 x double> %res, %res1
237  ret <8 x double> %res2
238}
239
240declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
241
242; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512
243; CHECK-NOT: call
244; CHECK: kmov
245; CHECK: vrangeps
246; CKECK: {%k1}
247; CHECK: vrangeps
248; CHECK: {sae}
249define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
250  %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
251  %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
252  %res2 = fadd <16 x float> %res, %res1
253  ret <16 x float> %res2
254}
255
256declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
257
258; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss
259; CHECK-NOT: call
260; CHECK: kmov
261; CHECK: vreducess
262; CKECK: {%k1}
263; CHECK: vreducess
264; CHECK: {sae}
265define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
266  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
267  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
268  %res2 = fadd <4 x float> %res, %res1
269  ret <4 x float> %res2
270}
271
272declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
273; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss
274; CHECK-NOT: call
275; CHECK: kmov
276; CHECK: vrangess
277; CHECK: {sae}
278; CKECK: {%k1}
279; CHECK: vrangess
280; CHECK: {sae}
281define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
282  %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
283  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
284  %res2 = fadd <4 x float> %res, %res1
285  ret <4 x float> %res2
286}
287
288declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
289
290; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd
291; CHECK-NOT: call
292; CHECK: kmov
293; CHECK: vreducesd
294; CKECK: {%k1}
295; CHECK: vreducesd
296; CHECK: {sae}
297define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
298  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
299  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
300  %res2 = fadd <2 x double> %res, %res1
301  ret <2 x double> %res2
302}
303
304declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
305; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd
306; CHECK-NOT: call
307; CHECK: kmov
308; CHECK: vrangesd
309; CKECK: {%k1}
310; CHECK: vrangesd
311; CHECK: {sae}
312define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
313  %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
314  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
315  %res2 = fadd <2 x double> %res, %res1
316  ret <2 x double> %res2
317}
318
319
320declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
321
322define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
323; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
324; CHECK:       ## BB#0:
325; CHECK-NEXT:    kmovb %edi, %k1
326; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
327; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
328; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0
329; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
330; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
331; CHECK-NEXT:    retq
332  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
333  %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
334  %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
335  %res3 = fadd <2 x double> %res, %res1
336  %res4 = fadd <2 x double> %res2, %res3
337  ret <2 x double> %res4
338}
339
340declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
341
342define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
343; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
344; CHECK:       ## BB#0:
345; CHECK-NEXT:    kmovb %edi, %k1
346; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
347; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
348; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0
349; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
350; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
351; CHECK-NEXT:    retq
352  %res  = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
353  %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
354  %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
355  %res3 = fadd <8 x float> %res, %res1
356  %res4 = fadd <8 x float> %res2, %res3
357  ret <8 x float> %res4
358}
359
360declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
361
362define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
363; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
364; CHECK:       ## BB#0:
365; CHECK-NEXT:    kmovw %edi, %k1
366; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
367; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
368; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
369; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm1
370; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
371; CHECK-NEXT:    retq
372  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
373  %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
374  %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
375  %res3 = fadd <16 x float> %res, %res1
376  %res4 = fadd <16 x float> %res2, %res3
377  ret <16 x float> %res4
378}
379
380declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
381
382define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
383; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
384; CHECK:       ## BB#0:
385; CHECK-NEXT:    kmovb %edi, %k1
386; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
387; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
388; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
389; CHECK-NEXT:    vaddpd %zmm3, %zmm2, %zmm1
390; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
391; CHECK-NEXT:    retq
392  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
393  %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
394  %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
395  %res3 = fadd <8 x double> %res, %res1
396  %res4 = fadd <8 x double> %res3, %res2
397  ret <8 x double> %res4
398}
399
400declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
401
402define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
403; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
404; CHECK:       ## BB#0:
405; CHECK-NEXT:    kmovw %edi, %k1
406; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
407; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
408; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0
409; CHECK-NEXT:    vpaddd %zmm3, %zmm2, %zmm1
410; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
411; CHECK-NEXT:    retq
412  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
413  %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
414  %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
415  %res3 = add <16 x i32> %res, %res1
416  %res4 = add <16 x i32> %res3, %res2
417  ret <16 x i32> %res4
418}
419
420declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
421
422define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
423; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
424; CHECK:       ## BB#0:
425; CHECK-NEXT:    kmovb %edi, %k1
426; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
427; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
428; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
429; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm1
430; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
431; CHECK-NEXT:    retq
432  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
433  %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
434  %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
435  %res3 = add <8 x i64> %res, %res1
436  %res4 = add <8 x i64> %res2, %res3
437  ret <8 x i64> %res4
438}
439
440declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
441
442; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512
443; CHECK-NOT: call
444; CHECK: kmov
445; CHECK: vfpclasspd
446; CHECK: {%k1}
447; CHECK: vfpclasspd
448; CHECK: kmovb   %k0
449define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
450	%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
451	%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
452	%res2 = add i8 %res, %res1
453	ret i8 %res2
454}
455declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
456
457; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512
458; CHECK-NOT: call
459; CHECK: kmov
460; CHECK: vfpclassps
461; CHECK: vfpclassps
462; CHECK: {%k1}
463; CHECK: kmov
464define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
465	%res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
466	%res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
467	%res2 = add i16 %res, %res1
468	ret i16 %res2
469}
470
471declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
472
473; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd
474; CHECK-NOT: call
475; CHECK: kmov
476; CHECK: vfpclasssd
477; CHECK: %k0 {%k1}
478; CHECK: vfpclasssd
479; CHECK: %k0
480define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
481  %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
482  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
483  %res2 = add i8 %res, %res1
484  ret i8 %res2
485}
486
487declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
488
489; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss
490; CHECK-NOT: call
491; CHECK: kmovw
492; CHECK: vfpclassss
493; CHECK: %k0
494; CHECK: {%k1}
495; CHECK: kmovw
496; CHECK: vfpclassss
497; CHECK: %k0
498define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
499  %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
500  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
501  %res2 = add i8 %res, %res1
502  ret i8 %res2
503}
504
505declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
506
507define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
508; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
509; CHECK:       ## BB#0:
510; CHECK-NEXT:    kmovw %edi, %k1
511; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm1 {%k1}
512; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
513; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm0
514; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
515; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
516; CHECK-NEXT:    retq
517  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
518  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
519  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
520  %res3 = fadd <16 x float> %res, %res1
521  %res4 = fadd <16 x float> %res3, %res2
522  ret <16 x float> %res4
523}
524
525declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
526
527define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
528; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
529; CHECK:       ## BB#0:
530; CHECK-NEXT:    kmovw %edi, %k1
531; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm1 {%k1}
532; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
533; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm0
534; CHECK-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
535; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
536; CHECK-NEXT:    retq
537  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
538  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
539  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
540  %res3 = add <16 x i32> %res, %res1
541  %res4 = add <16 x i32> %res3, %res2
542  ret <16 x i32> %res4
543}
544