• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
4
5define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
6; AVX512BW-LABEL: test_pcmpeq_b:
7; AVX512BW:       ## BB#0:
8; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
9; AVX512BW-NEXT:    kmovq %k0, %rax
10; AVX512BW-NEXT:    retq
11;
12; AVX512F-32-LABEL: test_pcmpeq_b:
13; AVX512F-32:       # BB#0:
14; AVX512F-32-NEXT:    subl $12, %esp
15; AVX512F-32-NEXT:  .Ltmp0:
16; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
17; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
18; AVX512F-32-NEXT:    kmovq %k0, (%esp)
19; AVX512F-32-NEXT:    movl (%esp), %eax
20; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
21; AVX512F-32-NEXT:    addl $12, %esp
22; AVX512F-32-NEXT:    retl
23  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
24  ret i64 %res
25}
26
27define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
28; AVX512BW-LABEL: test_mask_pcmpeq_b:
29; AVX512BW:       ## BB#0:
30; AVX512BW-NEXT:    kmovq %rdi, %k1
31; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
32; AVX512BW-NEXT:    kmovq %k0, %rax
33; AVX512BW-NEXT:    retq
34;
35; AVX512F-32-LABEL: test_mask_pcmpeq_b:
36; AVX512F-32:       # BB#0:
37; AVX512F-32-NEXT:    subl $12, %esp
38; AVX512F-32-NEXT:  .Ltmp1:
39; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
40; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
41; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
42; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
43; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
44; AVX512F-32-NEXT:    kmovq %k0, (%esp)
45; AVX512F-32-NEXT:    movl (%esp), %eax
46; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
47; AVX512F-32-NEXT:    addl $12, %esp
48; AVX512F-32-NEXT:    retl
49  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
50  ret i64 %res
51}
52
53declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
54
55define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
56; AVX512BW-LABEL: test_pcmpeq_w:
57; AVX512BW:       ## BB#0:
58; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
59; AVX512BW-NEXT:    kmovd %k0, %eax
60; AVX512BW-NEXT:    retq
61;
62; AVX512F-32-LABEL: test_pcmpeq_w:
63; AVX512F-32:       # BB#0:
64; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
65; AVX512F-32-NEXT:    kmovd %k0, %eax
66; AVX512F-32-NEXT:    retl
67  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
68  ret i32 %res
69}
70
71define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
72; AVX512BW-LABEL: test_mask_pcmpeq_w:
73; AVX512BW:       ## BB#0:
74; AVX512BW-NEXT:    kmovd %edi, %k1
75; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
76; AVX512BW-NEXT:    kmovd %k0, %eax
77; AVX512BW-NEXT:    retq
78;
79; AVX512F-32-LABEL: test_mask_pcmpeq_w:
80; AVX512F-32:       # BB#0:
81; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
82; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
83; AVX512F-32-NEXT:    kmovd %k0, %eax
84; AVX512F-32-NEXT:    retl
85  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
86  ret i32 %res
87}
88
89declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
90
91define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
92; AVX512BW-LABEL: test_pcmpgt_b:
93; AVX512BW:       ## BB#0:
94; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
95; AVX512BW-NEXT:    kmovq %k0, %rax
96; AVX512BW-NEXT:    retq
97;
98; AVX512F-32-LABEL: test_pcmpgt_b:
99; AVX512F-32:       # BB#0:
100; AVX512F-32-NEXT:    subl $12, %esp
101; AVX512F-32-NEXT:  .Ltmp2:
102; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
103; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
104; AVX512F-32-NEXT:    kmovq %k0, (%esp)
105; AVX512F-32-NEXT:    movl (%esp), %eax
106; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
107; AVX512F-32-NEXT:    addl $12, %esp
108; AVX512F-32-NEXT:    retl
109  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
110  ret i64 %res
111}
112
113define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
114; AVX512BW-LABEL: test_mask_pcmpgt_b:
115; AVX512BW:       ## BB#0:
116; AVX512BW-NEXT:    kmovq %rdi, %k1
117; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
118; AVX512BW-NEXT:    kmovq %k0, %rax
119; AVX512BW-NEXT:    retq
120;
121; AVX512F-32-LABEL: test_mask_pcmpgt_b:
122; AVX512F-32:       # BB#0:
123; AVX512F-32-NEXT:    subl $12, %esp
124; AVX512F-32-NEXT:  .Ltmp3:
125; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
126; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
127; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
128; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
129; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
130; AVX512F-32-NEXT:    kmovq %k0, (%esp)
131; AVX512F-32-NEXT:    movl (%esp), %eax
132; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
133; AVX512F-32-NEXT:    addl $12, %esp
134; AVX512F-32-NEXT:    retl
135  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
136  ret i64 %res
137}
138
139declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
140
141define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
142; AVX512BW-LABEL: test_pcmpgt_w:
143; AVX512BW:       ## BB#0:
144; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
145; AVX512BW-NEXT:    kmovd %k0, %eax
146; AVX512BW-NEXT:    retq
147;
148; AVX512F-32-LABEL: test_pcmpgt_w:
149; AVX512F-32:       # BB#0:
150; AVX512F-32-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
151; AVX512F-32-NEXT:    kmovd %k0, %eax
152; AVX512F-32-NEXT:    retl
153  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
154  ret i32 %res
155}
156
157define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
158; AVX512BW-LABEL: test_mask_pcmpgt_w:
159; AVX512BW:       ## BB#0:
160; AVX512BW-NEXT:    kmovd %edi, %k1
161; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
162; AVX512BW-NEXT:    kmovd %k0, %eax
163; AVX512BW-NEXT:    retq
164;
165; AVX512F-32-LABEL: test_mask_pcmpgt_w:
166; AVX512F-32:       # BB#0:
167; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
168; AVX512F-32-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
169; AVX512F-32-NEXT:    kmovd %k0, %eax
170; AVX512F-32-NEXT:    retl
171  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
172  ret i32 %res
173}
174
175declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
176
177define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
178; AVX512BW-LABEL: test_cmp_b_512:
179; AVX512BW:       ## BB#0:
180; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
181; AVX512BW-NEXT:    kmovq %k0, %rax
182; AVX512BW-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
183; AVX512BW-NEXT:    kmovq %k0, %rcx
184; AVX512BW-NEXT:    addq %rax, %rcx
185; AVX512BW-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
186; AVX512BW-NEXT:    kmovq %k0, %rax
187; AVX512BW-NEXT:    addq %rcx, %rax
188; AVX512BW-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
189; AVX512BW-NEXT:    kmovq %k0, %rcx
190; AVX512BW-NEXT:    addq %rax, %rcx
191; AVX512BW-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
192; AVX512BW-NEXT:    kmovq %k0, %rax
193; AVX512BW-NEXT:    addq %rcx, %rax
194; AVX512BW-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
195; AVX512BW-NEXT:    kmovq %k0, %rcx
196; AVX512BW-NEXT:    addq %rax, %rcx
197; AVX512BW-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
198; AVX512BW-NEXT:    kmovq %k0, %rdx
199; AVX512BW-NEXT:    addq %rcx, %rdx
200; AVX512BW-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
201; AVX512BW-NEXT:    kmovq %k0, %rax
202; AVX512BW-NEXT:    addq %rdx, %rax
203; AVX512BW-NEXT:    retq
204;
205; AVX512F-32-LABEL: test_cmp_b_512:
206; AVX512F-32:       # BB#0:
207; AVX512F-32-NEXT:    subl $68, %esp
208; AVX512F-32-NEXT:  .Ltmp4:
209; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
210; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
211; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
212; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
213; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
214; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
215; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
216; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
217; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
218; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
219; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
220; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
221; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
222; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
223; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
224; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
225; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
226; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
227; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
228; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
229; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
230; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
231; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
232; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
233; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
234; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
235; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
236; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
237; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
238; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
239; AVX512F-32-NEXT:    kmovq %k0, (%esp)
240; AVX512F-32-NEXT:    addl (%esp), %eax
241; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
242; AVX512F-32-NEXT:    addl $68, %esp
243; AVX512F-32-NEXT:    retl
244  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
245  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
246  %ret1 = add i64 %res0, %res1
247  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
248  %ret2 = add i64 %ret1, %res2
249  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
250  %ret3 = add i64 %ret2, %res3
251  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
252  %ret4 = add i64 %ret3, %res4
253  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
254  %ret5 = add i64 %ret4, %res5
255  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
256  %ret6 = add i64 %ret5, %res6
257  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
258  %ret7 = add i64 %ret6, %res7
259  ret i64 %ret7
260}
261
262define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
263; AVX512BW-LABEL: test_mask_cmp_b_512:
264; AVX512BW:       ## BB#0:
265; AVX512BW-NEXT:    kmovq %rdi, %k1
266; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
267; AVX512BW-NEXT:    kmovq %k0, %rax
268; AVX512BW-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
269; AVX512BW-NEXT:    kmovq %k0, %rcx
270; AVX512BW-NEXT:    addq %rax, %rcx
271; AVX512BW-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
272; AVX512BW-NEXT:    kmovq %k0, %rax
273; AVX512BW-NEXT:    addq %rcx, %rax
274; AVX512BW-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
275; AVX512BW-NEXT:    kmovq %k0, %rcx
276; AVX512BW-NEXT:    addq %rax, %rcx
277; AVX512BW-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
278; AVX512BW-NEXT:    kmovq %k0, %rax
279; AVX512BW-NEXT:    addq %rcx, %rax
280; AVX512BW-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
281; AVX512BW-NEXT:    kmovq %k0, %rcx
282; AVX512BW-NEXT:    addq %rax, %rcx
283; AVX512BW-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
284; AVX512BW-NEXT:    kmovq %k0, %rdx
285; AVX512BW-NEXT:    addq %rcx, %rdx
286; AVX512BW-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
287; AVX512BW-NEXT:    kmovq %k0, %rax
288; AVX512BW-NEXT:    addq %rdx, %rax
289; AVX512BW-NEXT:    retq
290;
291; AVX512F-32-LABEL: test_mask_cmp_b_512:
292; AVX512F-32:       # BB#0:
293; AVX512F-32-NEXT:    subl $68, %esp
294; AVX512F-32-NEXT:  .Ltmp5:
295; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
296; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
297; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
298; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
299; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
300; AVX512F-32-NEXT:    kmovq %k0, (%esp)
301; AVX512F-32-NEXT:    movl (%esp), %eax
302; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
303; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
304; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
305; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
306; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
307; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
308; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
309; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
310; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
311; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
312; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
313; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
314; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
315; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
316; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
317; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
318; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
319; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
320; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
321; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
322; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
323; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
324; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
325; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
326; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
327; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
328; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
329; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
330; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
331; AVX512F-32-NEXT:    addl $68, %esp
332; AVX512F-32-NEXT:    retl
333  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
334  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
335  %ret1 = add i64 %res0, %res1
336  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
337  %ret2 = add i64 %ret1, %res2
338  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
339  %ret3 = add i64 %ret2, %res3
340  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
341  %ret4 = add i64 %ret3, %res4
342  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
343  %ret5 = add i64 %ret4, %res5
344  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
345  %ret6 = add i64 %ret5, %res6
346  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
347  %ret7 = add i64 %ret6, %res7
348  ret i64 %ret7
349}
350
351declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
352
353define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
354; AVX512BW-LABEL: test_ucmp_b_512:
355; AVX512BW:       ## BB#0:
356; AVX512BW-NEXT:    vpcmpequb %zmm1, %zmm0, %k0
357; AVX512BW-NEXT:    kmovq %k0, %rax
358; AVX512BW-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
359; AVX512BW-NEXT:    kmovq %k0, %rcx
360; AVX512BW-NEXT:    addq %rax, %rcx
361; AVX512BW-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
362; AVX512BW-NEXT:    kmovq %k0, %rax
363; AVX512BW-NEXT:    addq %rcx, %rax
364; AVX512BW-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
365; AVX512BW-NEXT:    kmovq %k0, %rcx
366; AVX512BW-NEXT:    addq %rax, %rcx
367; AVX512BW-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
368; AVX512BW-NEXT:    kmovq %k0, %rax
369; AVX512BW-NEXT:    addq %rcx, %rax
370; AVX512BW-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
371; AVX512BW-NEXT:    kmovq %k0, %rcx
372; AVX512BW-NEXT:    addq %rax, %rcx
373; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
374; AVX512BW-NEXT:    kmovq %k0, %rdx
375; AVX512BW-NEXT:    addq %rcx, %rdx
376; AVX512BW-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
377; AVX512BW-NEXT:    kmovq %k0, %rax
378; AVX512BW-NEXT:    addq %rdx, %rax
379; AVX512BW-NEXT:    retq
380;
381; AVX512F-32-LABEL: test_ucmp_b_512:
382; AVX512F-32:       # BB#0:
383; AVX512F-32-NEXT:    subl $68, %esp
384; AVX512F-32-NEXT:  .Ltmp6:
385; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
386; AVX512F-32-NEXT:    vpcmpequb %zmm1, %zmm0, %k0
387; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
388; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
389; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
390; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
391; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
392; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
393; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
394; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
395; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
396; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
397; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
398; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
399; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
400; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
401; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
402; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
403; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
404; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
405; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
406; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
407; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
408; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
409; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
410; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
411; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
412; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
413; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
414; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
415; AVX512F-32-NEXT:    kmovq %k0, (%esp)
416; AVX512F-32-NEXT:    addl (%esp), %eax
417; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
418; AVX512F-32-NEXT:    addl $68, %esp
419; AVX512F-32-NEXT:    retl
420  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
421  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
422  %ret1 = add i64 %res0, %res1
423  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
424  %ret2 = add i64 %ret1, %res2
425  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
426  %ret3 = add i64 %ret2, %res3
427  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
428  %ret4 = add i64 %ret3, %res4
429  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
430  %ret5 = add i64 %ret4, %res5
431  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
432  %ret6 = add i64 %ret5, %res6
433  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
434  %ret7 = add i64 %ret6, %res7
435  ret i64 %ret7
436}
437
438define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
439; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512:
440; AVX512BW:       ## BB#0:
441; AVX512BW-NEXT:    kmovq %rdi, %k1
442; AVX512BW-NEXT:    vpcmpequb %zmm1, %zmm0, %k0 {%k1}
443; AVX512BW-NEXT:    kmovq %k0, %rax
444; AVX512BW-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
445; AVX512BW-NEXT:    kmovq %k0, %rcx
446; AVX512BW-NEXT:    addq %rax, %rcx
447; AVX512BW-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
448; AVX512BW-NEXT:    kmovq %k0, %rax
449; AVX512BW-NEXT:    addq %rcx, %rax
450; AVX512BW-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
451; AVX512BW-NEXT:    kmovq %k0, %rcx
452; AVX512BW-NEXT:    addq %rax, %rcx
453; AVX512BW-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
454; AVX512BW-NEXT:    kmovq %k0, %rax
455; AVX512BW-NEXT:    addq %rcx, %rax
456; AVX512BW-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
457; AVX512BW-NEXT:    kmovq %k0, %rcx
458; AVX512BW-NEXT:    addq %rax, %rcx
459; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
460; AVX512BW-NEXT:    kmovq %k0, %rdx
461; AVX512BW-NEXT:    addq %rcx, %rdx
462; AVX512BW-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
463; AVX512BW-NEXT:    kmovq %k0, %rax
464; AVX512BW-NEXT:    addq %rdx, %rax
465; AVX512BW-NEXT:    retq
466;
467; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
468; AVX512F-32:       # BB#0:
469; AVX512F-32-NEXT:    subl $68, %esp
470; AVX512F-32-NEXT:  .Ltmp7:
471; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
472; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
473; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
474; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
475; AVX512F-32-NEXT:    vpcmpequb %zmm1, %zmm0, %k0 {%k1}
476; AVX512F-32-NEXT:    kmovq %k0, (%esp)
477; AVX512F-32-NEXT:    movl (%esp), %eax
478; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
479; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
480; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
481; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
482; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
483; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
484; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
485; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
486; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
487; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
488; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
489; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
490; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
491; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
492; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
493; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
494; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
495; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
496; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
497; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
498; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
499; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
500; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
501; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
502; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
503; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
504; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
505; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
506; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
507; AVX512F-32-NEXT:    addl $68, %esp
508; AVX512F-32-NEXT:    retl
509  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
510  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
511  %ret1 = add i64 %res0, %res1
512  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
513  %ret2 = add i64 %ret1, %res2
514  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
515  %ret3 = add i64 %ret2, %res3
516  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
517  %ret4 = add i64 %ret3, %res4
518  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
519  %ret5 = add i64 %ret4, %res5
520  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
521  %ret6 = add i64 %ret5, %res6
522  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
523  %ret7 = add i64 %ret6, %res7
524  ret i64 %ret7
525}
526
527declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
528
529define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
530; AVX512BW-LABEL: test_cmp_w_512:
531; AVX512BW:       ## BB#0:
532; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
533; AVX512BW-NEXT:    kmovd %k0, %eax
534; AVX512BW-NEXT:    vpcmpltw %zmm1, %zmm0, %k0
535; AVX512BW-NEXT:    kmovd %k0, %ecx
536; AVX512BW-NEXT:    addl %eax, %ecx
537; AVX512BW-NEXT:    vpcmplew %zmm1, %zmm0, %k0
538; AVX512BW-NEXT:    kmovd %k0, %eax
539; AVX512BW-NEXT:    addl %ecx, %eax
540; AVX512BW-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0
541; AVX512BW-NEXT:    kmovd %k0, %ecx
542; AVX512BW-NEXT:    addl %eax, %ecx
543; AVX512BW-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0
544; AVX512BW-NEXT:    kmovd %k0, %eax
545; AVX512BW-NEXT:    addl %ecx, %eax
546; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
547; AVX512BW-NEXT:    kmovd %k0, %ecx
548; AVX512BW-NEXT:    addl %eax, %ecx
549; AVX512BW-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0
550; AVX512BW-NEXT:    kmovd %k0, %edx
551; AVX512BW-NEXT:    addl %ecx, %edx
552; AVX512BW-NEXT:    vpcmpordw %zmm1, %zmm0, %k0
553; AVX512BW-NEXT:    kmovd %k0, %eax
554; AVX512BW-NEXT:    addl %edx, %eax
555; AVX512BW-NEXT:    retq
556;
557; AVX512F-32-LABEL: test_cmp_w_512:
558; AVX512F-32:       # BB#0:
559; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
560; AVX512F-32-NEXT:    kmovd %k0, %eax
561; AVX512F-32-NEXT:    vpcmpltw %zmm1, %zmm0, %k0
562; AVX512F-32-NEXT:    kmovd %k0, %ecx
563; AVX512F-32-NEXT:    addl %eax, %ecx
564; AVX512F-32-NEXT:    vpcmplew %zmm1, %zmm0, %k0
565; AVX512F-32-NEXT:    kmovd %k0, %eax
566; AVX512F-32-NEXT:    addl %ecx, %eax
567; AVX512F-32-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0
568; AVX512F-32-NEXT:    kmovd %k0, %ecx
569; AVX512F-32-NEXT:    addl %eax, %ecx
570; AVX512F-32-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0
571; AVX512F-32-NEXT:    kmovd %k0, %eax
572; AVX512F-32-NEXT:    addl %ecx, %eax
573; AVX512F-32-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
574; AVX512F-32-NEXT:    kmovd %k0, %ecx
575; AVX512F-32-NEXT:    addl %eax, %ecx
576; AVX512F-32-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0
577; AVX512F-32-NEXT:    kmovd %k0, %edx
578; AVX512F-32-NEXT:    addl %ecx, %edx
579; AVX512F-32-NEXT:    vpcmpordw %zmm1, %zmm0, %k0
580; AVX512F-32-NEXT:    kmovd %k0, %eax
581; AVX512F-32-NEXT:    addl %edx, %eax
582; AVX512F-32-NEXT:    retl
583  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
584  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
585  %ret1 = add i32 %res0, %res1
586  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
587  %ret2 = add i32 %ret1, %res2
588  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
589  %ret3 = add i32 %ret2, %res3
590  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
591  %ret4 = add i32 %ret3, %res4
592  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
593  %ret5 = add i32 %ret4, %res5
594  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
595  %ret6 = add i32 %ret5, %res6
596  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
597  %ret7 = add i32 %ret6, %res7
598  ret i32 %ret7
599}
600
601define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
602; AVX512BW-LABEL: test_mask_cmp_w_512:
603; AVX512BW:       ## BB#0:
604; AVX512BW-NEXT:    kmovd %edi, %k1
605; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
606; AVX512BW-NEXT:    kmovd %k0, %eax
607; AVX512BW-NEXT:    vpcmpltw %zmm1, %zmm0, %k0 {%k1}
608; AVX512BW-NEXT:    kmovd %k0, %ecx
609; AVX512BW-NEXT:    addl %eax, %ecx
610; AVX512BW-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1}
611; AVX512BW-NEXT:    kmovd %k0, %eax
612; AVX512BW-NEXT:    addl %ecx, %eax
613; AVX512BW-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
614; AVX512BW-NEXT:    kmovd %k0, %ecx
615; AVX512BW-NEXT:    addl %eax, %ecx
616; AVX512BW-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
617; AVX512BW-NEXT:    kmovd %k0, %eax
618; AVX512BW-NEXT:    addl %ecx, %eax
619; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
620; AVX512BW-NEXT:    kmovd %k0, %ecx
621; AVX512BW-NEXT:    addl %eax, %ecx
622; AVX512BW-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
623; AVX512BW-NEXT:    kmovd %k0, %edx
624; AVX512BW-NEXT:    addl %ecx, %edx
625; AVX512BW-NEXT:    vpcmpordw %zmm1, %zmm0, %k0 {%k1}
626; AVX512BW-NEXT:    kmovd %k0, %eax
627; AVX512BW-NEXT:    addl %edx, %eax
628; AVX512BW-NEXT:    retq
629;
630; AVX512F-32-LABEL: test_mask_cmp_w_512:
631; AVX512F-32:       # BB#0:
632; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
633; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
634; AVX512F-32-NEXT:    kmovd %k0, %eax
635; AVX512F-32-NEXT:    vpcmpltw %zmm1, %zmm0, %k0 {%k1}
636; AVX512F-32-NEXT:    kmovd %k0, %ecx
637; AVX512F-32-NEXT:    addl %eax, %ecx
638; AVX512F-32-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1}
639; AVX512F-32-NEXT:    kmovd %k0, %eax
640; AVX512F-32-NEXT:    addl %ecx, %eax
641; AVX512F-32-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
642; AVX512F-32-NEXT:    kmovd %k0, %ecx
643; AVX512F-32-NEXT:    addl %eax, %ecx
644; AVX512F-32-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
645; AVX512F-32-NEXT:    kmovd %k0, %eax
646; AVX512F-32-NEXT:    addl %ecx, %eax
647; AVX512F-32-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
648; AVX512F-32-NEXT:    kmovd %k0, %ecx
649; AVX512F-32-NEXT:    addl %eax, %ecx
650; AVX512F-32-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
651; AVX512F-32-NEXT:    kmovd %k0, %edx
652; AVX512F-32-NEXT:    addl %ecx, %edx
653; AVX512F-32-NEXT:    vpcmpordw %zmm1, %zmm0, %k0 {%k1}
654; AVX512F-32-NEXT:    kmovd %k0, %eax
655; AVX512F-32-NEXT:    addl %edx, %eax
656; AVX512F-32-NEXT:    retl
657  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
658  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
659  %ret1 = add i32 %res0, %res1
660  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
661  %ret2 = add i32 %ret1, %res2
662  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
663  %ret3 = add i32 %ret2, %res3
664  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
665  %ret4 = add i32 %ret3, %res4
666  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
667  %ret5 = add i32 %ret4, %res5
668  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
669  %ret6 = add i32 %ret5, %res6
670  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
671  %ret7 = add i32 %ret6, %res7
672  ret i32 %ret7
673}
674
675declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
676
677define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
678; AVX512BW-LABEL: test_ucmp_w_512:
679; AVX512BW:       ## BB#0:
680; AVX512BW-NEXT:    vpcmpequw %zmm1, %zmm0, %k0
681; AVX512BW-NEXT:    kmovd %k0, %eax
682; AVX512BW-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
683; AVX512BW-NEXT:    kmovd %k0, %ecx
684; AVX512BW-NEXT:    addl %eax, %ecx
685; AVX512BW-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0
686; AVX512BW-NEXT:    kmovd %k0, %eax
687; AVX512BW-NEXT:    addl %ecx, %eax
688; AVX512BW-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0
689; AVX512BW-NEXT:    kmovd %k0, %ecx
690; AVX512BW-NEXT:    addl %eax, %ecx
691; AVX512BW-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0
692; AVX512BW-NEXT:    kmovd %k0, %eax
693; AVX512BW-NEXT:    addl %ecx, %eax
694; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0
695; AVX512BW-NEXT:    kmovd %k0, %ecx
696; AVX512BW-NEXT:    addl %eax, %ecx
697; AVX512BW-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
698; AVX512BW-NEXT:    kmovd %k0, %edx
699; AVX512BW-NEXT:    addl %ecx, %edx
700; AVX512BW-NEXT:    vpcmporduw %zmm1, %zmm0, %k0
701; AVX512BW-NEXT:    kmovd %k0, %eax
702; AVX512BW-NEXT:    addl %edx, %eax
703; AVX512BW-NEXT:    retq
704;
705; AVX512F-32-LABEL: test_ucmp_w_512:
706; AVX512F-32:       # BB#0:
707; AVX512F-32-NEXT:    vpcmpequw %zmm1, %zmm0, %k0
708; AVX512F-32-NEXT:    kmovd %k0, %eax
709; AVX512F-32-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
710; AVX512F-32-NEXT:    kmovd %k0, %ecx
711; AVX512F-32-NEXT:    addl %eax, %ecx
712; AVX512F-32-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0
713; AVX512F-32-NEXT:    kmovd %k0, %eax
714; AVX512F-32-NEXT:    addl %ecx, %eax
715; AVX512F-32-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0
716; AVX512F-32-NEXT:    kmovd %k0, %ecx
717; AVX512F-32-NEXT:    addl %eax, %ecx
718; AVX512F-32-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0
719; AVX512F-32-NEXT:    kmovd %k0, %eax
720; AVX512F-32-NEXT:    addl %ecx, %eax
721; AVX512F-32-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0
722; AVX512F-32-NEXT:    kmovd %k0, %ecx
723; AVX512F-32-NEXT:    addl %eax, %ecx
724; AVX512F-32-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
725; AVX512F-32-NEXT:    kmovd %k0, %edx
726; AVX512F-32-NEXT:    addl %ecx, %edx
727; AVX512F-32-NEXT:    vpcmporduw %zmm1, %zmm0, %k0
728; AVX512F-32-NEXT:    kmovd %k0, %eax
729; AVX512F-32-NEXT:    addl %edx, %eax
730; AVX512F-32-NEXT:    retl
731  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
732  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
733  %ret1 = add i32 %res0, %res1
734  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
735  %ret2 = add i32 %ret1, %res2
736  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
737  %ret3 = add i32 %ret2, %res3
738  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
739  %ret4 = add i32 %ret3, %res4
740  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
741  %ret5 = add i32 %ret4, %res5
742  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
743  %ret6 = add i32 %ret5, %res6
744  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
745  %ret7 = add i32 %ret6, %res7
746  ret i32 %ret7
747}
748
749define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
750; AVX512BW-LABEL: test_mask_ucmp_w_512:
751; AVX512BW:       ## BB#0:
752; AVX512BW-NEXT:    kmovd %edi, %k1
753; AVX512BW-NEXT:    vpcmpequw %zmm1, %zmm0, %k0 {%k1}
754; AVX512BW-NEXT:    kmovd %k0, %eax
755; AVX512BW-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
756; AVX512BW-NEXT:    kmovd %k0, %ecx
757; AVX512BW-NEXT:    addl %eax, %ecx
758; AVX512BW-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
759; AVX512BW-NEXT:    kmovd %k0, %eax
760; AVX512BW-NEXT:    addl %ecx, %eax
761; AVX512BW-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
762; AVX512BW-NEXT:    kmovd %k0, %ecx
763; AVX512BW-NEXT:    addl %eax, %ecx
764; AVX512BW-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
765; AVX512BW-NEXT:    kmovd %k0, %eax
766; AVX512BW-NEXT:    addl %ecx, %eax
767; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
768; AVX512BW-NEXT:    kmovd %k0, %ecx
769; AVX512BW-NEXT:    addl %eax, %ecx
770; AVX512BW-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
771; AVX512BW-NEXT:    kmovd %k0, %edx
772; AVX512BW-NEXT:    addl %ecx, %edx
773; AVX512BW-NEXT:    vpcmporduw %zmm1, %zmm0, %k0 {%k1}
774; AVX512BW-NEXT:    kmovd %k0, %eax
775; AVX512BW-NEXT:    addl %edx, %eax
776; AVX512BW-NEXT:    retq
777;
778; AVX512F-32-LABEL: test_mask_ucmp_w_512:
779; AVX512F-32:       # BB#0:
780; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
781; AVX512F-32-NEXT:    vpcmpequw %zmm1, %zmm0, %k0 {%k1}
782; AVX512F-32-NEXT:    kmovd %k0, %eax
783; AVX512F-32-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
784; AVX512F-32-NEXT:    kmovd %k0, %ecx
785; AVX512F-32-NEXT:    addl %eax, %ecx
786; AVX512F-32-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
787; AVX512F-32-NEXT:    kmovd %k0, %eax
788; AVX512F-32-NEXT:    addl %ecx, %eax
789; AVX512F-32-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
790; AVX512F-32-NEXT:    kmovd %k0, %ecx
791; AVX512F-32-NEXT:    addl %eax, %ecx
792; AVX512F-32-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
793; AVX512F-32-NEXT:    kmovd %k0, %eax
794; AVX512F-32-NEXT:    addl %ecx, %eax
795; AVX512F-32-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
796; AVX512F-32-NEXT:    kmovd %k0, %ecx
797; AVX512F-32-NEXT:    addl %eax, %ecx
798; AVX512F-32-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
799; AVX512F-32-NEXT:    kmovd %k0, %edx
800; AVX512F-32-NEXT:    addl %ecx, %edx
801; AVX512F-32-NEXT:    vpcmporduw %zmm1, %zmm0, %k0 {%k1}
802; AVX512F-32-NEXT:    kmovd %k0, %eax
803; AVX512F-32-NEXT:    addl %edx, %eax
804; AVX512F-32-NEXT:    retl
805  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
806  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
807  %ret1 = add i32 %res0, %res1
808  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
809  %ret2 = add i32 %ret1, %res2
810  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
811  %ret3 = add i32 %ret2, %res3
812  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
813  %ret4 = add i32 %ret3, %res4
814  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
815  %ret5 = add i32 %ret4, %res5
816  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
817  %ret6 = add i32 %ret5, %res6
818  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
819  %ret7 = add i32 %ret6, %res7
820  ret i32 %ret7
821}
822
823declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
824
825declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
826
827define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
828; AVX512BW-LABEL: test_x86_mask_blend_w_512:
829; AVX512BW:       ## BB#0:
830; AVX512BW-NEXT:    kmovd %edi, %k1
831; AVX512BW-NEXT:    vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
832; AVX512BW-NEXT:    retq
833;
834; AVX512F-32-LABEL: test_x86_mask_blend_w_512:
835; AVX512F-32:       # BB#0:
836; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
837; AVX512F-32-NEXT:    vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
838; AVX512F-32-NEXT:    retl
839    %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
840  ret <32 x i16> %res
841}
842declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
843
844define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
845; AVX512BW-LABEL: test_x86_mask_blend_b_512:
846; AVX512BW:       ## BB#0:
847; AVX512BW-NEXT:    kmovq %rdi, %k1
848; AVX512BW-NEXT:    vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
849; AVX512BW-NEXT:    retq
850;
851; AVX512F-32-LABEL: test_x86_mask_blend_b_512:
852; AVX512F-32:       # BB#0:
853; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
854; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
855; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
856; AVX512F-32-NEXT:    vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
857; AVX512F-32-NEXT:    retl
858  %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
859  ret <64 x i8> %res
860}
861
862define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
863; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
864; AVX512BW:       ## BB#0:
865; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
866; AVX512BW-NEXT:    retq
867;
868; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
869; AVX512F-32:       # BB#0:
870; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
871; AVX512F-32-NEXT:    retl
872  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
873  ret <32 x i16> %res
874}
875
876define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
877; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
878; AVX512BW:       ## BB#0:
879; AVX512BW-NEXT:    kmovd %edi, %k1
880; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
881; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
882; AVX512BW-NEXT:    retq
883;
884; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
885; AVX512F-32:       # BB#0:
886; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
887; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
888; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
889; AVX512F-32-NEXT:    retl
890  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
891  ret <32 x i16> %res
892}
893
894define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
895; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
896; AVX512BW:       ## BB#0:
897; AVX512BW-NEXT:    kmovd %edi, %k1
898; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
899; AVX512BW-NEXT:    retq
900;
901; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
902; AVX512F-32:       # BB#0:
903; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
904; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
905; AVX512F-32-NEXT:    retl
906  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
907  ret <32 x i16> %res
908}
909
910define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
911; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
912; AVX512BW:       ## BB#0:
913; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0
914; AVX512BW-NEXT:    retq
915;
916; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
917; AVX512F-32:       # BB#0:
918; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
919; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm0
920; AVX512F-32-NEXT:    retl
921  %b = load <16 x i32>, <16 x i32>* %ptr_b
922  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
923  ret <32 x i16> %res
924}
925
926define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
927; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
928; AVX512BW:       ## BB#0:
929; AVX512BW-NEXT:    kmovd %esi, %k1
930; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
931; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
932; AVX512BW-NEXT:    retq
933;
934; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
935; AVX512F-32:       # BB#0:
936; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
937; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
938; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1}
939; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
940; AVX512F-32-NEXT:    retl
941  %b = load <16 x i32>, <16 x i32>* %ptr_b
942  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
943  ret <32 x i16> %res
944}
945
946define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
947; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
948; AVX512BW:       ## BB#0:
949; AVX512BW-NEXT:    kmovd %esi, %k1
950; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
951; AVX512BW-NEXT:    retq
952;
953; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
954; AVX512F-32:       # BB#0:
955; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
956; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
957; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
958; AVX512F-32-NEXT:    retl
959  %b = load <16 x i32>, <16 x i32>* %ptr_b
960  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
961  ret <32 x i16> %res
962}
963
964define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
965; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
966; AVX512BW:       ## BB#0:
967; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0
968; AVX512BW-NEXT:    retq
969;
970; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
971; AVX512F-32:       # BB#0:
972; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
973; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0
974; AVX512F-32-NEXT:    retl
975  %q = load i32, i32* %ptr_b
976  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
977  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
978  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
979  ret <32 x i16> %res
980}
981
982define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
983; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
984; AVX512BW:       ## BB#0:
985; AVX512BW-NEXT:    kmovd %esi, %k1
986; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
987; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
988; AVX512BW-NEXT:    retq
989;
990; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
991; AVX512F-32:       # BB#0:
992; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
993; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
994; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
995; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
996; AVX512F-32-NEXT:    retl
997  %q = load i32, i32* %ptr_b
998  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
999  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1000  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1001  ret <32 x i16> %res
1002}
1003
1004define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1005; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
1006; AVX512BW:       ## BB#0:
1007; AVX512BW-NEXT:    kmovd %esi, %k1
1008; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
1009; AVX512BW-NEXT:    retq
1010;
1011; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
1012; AVX512F-32:       # BB#0:
1013; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1014; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1015; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1016; AVX512F-32-NEXT:    retl
1017  %q = load i32, i32* %ptr_b
1018  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1019  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1020  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1021  ret <32 x i16> %res
1022}
1023
1024declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1025
1026define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1027; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
1028; AVX512BW:       ## BB#0:
1029; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0
1030; AVX512BW-NEXT:    retq
1031;
1032; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
1033; AVX512F-32:       # BB#0:
1034; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0
1035; AVX512F-32-NEXT:    retl
1036  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1037  ret <64 x i8> %res
1038}
1039
1040define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1041; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
1042; AVX512BW:       ## BB#0:
1043; AVX512BW-NEXT:    kmovq %rdi, %k1
1044; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
1045; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1046; AVX512BW-NEXT:    retq
1047;
1048; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
1049; AVX512F-32:       # BB#0:
1050; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1051; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1052; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1053; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
1054; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1055; AVX512F-32-NEXT:    retl
1056  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1057  ret <64 x i8> %res
1058}
1059
1060define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1061; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
1062; AVX512BW:       ## BB#0:
1063; AVX512BW-NEXT:    kmovq %rdi, %k1
1064; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1065; AVX512BW-NEXT:    retq
1066;
1067; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
1068; AVX512F-32:       # BB#0:
1069; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1070; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1071; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1072; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1073; AVX512F-32-NEXT:    retl
1074  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1075  ret <64 x i8> %res
1076}
1077
1078define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1079; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
1080; AVX512BW:       ## BB#0:
1081; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0
1082; AVX512BW-NEXT:    retq
1083;
1084; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
1085; AVX512F-32:       # BB#0:
1086; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1087; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm0
1088; AVX512F-32-NEXT:    retl
1089  %b = load <32 x i16>, <32 x i16>* %ptr_b
1090  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1091  ret <64 x i8> %res
1092}
1093
1094define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1095; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
1096; AVX512BW:       ## BB#0:
1097; AVX512BW-NEXT:    kmovq %rsi, %k1
1098; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
1099; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1100; AVX512BW-NEXT:    retq
1101;
1102; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
1103; AVX512F-32:       # BB#0:
1104; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1105; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1106; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1107; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1108; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1}
1109; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1110; AVX512F-32-NEXT:    retl
1111  %b = load <32 x i16>, <32 x i16>* %ptr_b
1112  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1113  ret <64 x i8> %res
1114}
1115
1116define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1117; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
1118; AVX512BW:       ## BB#0:
1119; AVX512BW-NEXT:    kmovq %rsi, %k1
1120; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
1121; AVX512BW-NEXT:    retq
1122;
1123; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
1124; AVX512F-32:       # BB#0:
1125; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1126; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1127; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1128; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1129; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
1130; AVX512F-32-NEXT:    retl
1131  %b = load <32 x i16>, <32 x i16>* %ptr_b
1132  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1133  ret <64 x i8> %res
1134}
1135
1136declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1137
1138
1139define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
1140; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
1141; AVX512BW:       ## BB#0:
1142; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
1143; AVX512BW-NEXT:    retq
1144;
1145; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
1146; AVX512F-32:       # BB#0:
1147; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
1148; AVX512F-32-NEXT:    retl
1149  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1150  ret <32 x i16> %res
1151}
1152
1153define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
1154; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
1155; AVX512BW:       ## BB#0:
1156; AVX512BW-NEXT:    kmovd %edi, %k1
1157; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
1158; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1159; AVX512BW-NEXT:    retq
1160;
1161; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
1162; AVX512F-32:       # BB#0:
1163; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1164; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
1165; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1166; AVX512F-32-NEXT:    retl
1167  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1168  ret <32 x i16> %res
1169}
1170
1171define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
1172; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
1173; AVX512BW:       ## BB#0:
1174; AVX512BW-NEXT:    kmovd %edi, %k1
1175; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
1176; AVX512BW-NEXT:    retq
1177;
1178; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
1179; AVX512F-32:       # BB#0:
1180; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1181; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
1182; AVX512F-32-NEXT:    retl
1183  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1184  ret <32 x i16> %res
1185}
1186
1187define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
1188; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
1189; AVX512BW:       ## BB#0:
1190; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0
1191; AVX512BW-NEXT:    retq
1192;
1193; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
1194; AVX512F-32:       # BB#0:
1195; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1196; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm0
1197; AVX512F-32-NEXT:    retl
1198  %b = load <16 x i32>, <16 x i32>* %ptr_b
1199  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1200  ret <32 x i16> %res
1201}
1202
1203define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1204; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
1205; AVX512BW:       ## BB#0:
1206; AVX512BW-NEXT:    kmovd %esi, %k1
1207; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
1208; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1209; AVX512BW-NEXT:    retq
1210;
1211; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
1212; AVX512F-32:       # BB#0:
1213; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1214; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1215; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1}
1216; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1217; AVX512F-32-NEXT:    retl
1218  %b = load <16 x i32>, <16 x i32>* %ptr_b
1219  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1220  ret <32 x i16> %res
1221}
1222
1223define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1224; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
1225; AVX512BW:       ## BB#0:
1226; AVX512BW-NEXT:    kmovd %esi, %k1
1227; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
1228; AVX512BW-NEXT:    retq
1229;
1230; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
1231; AVX512F-32:       # BB#0:
1232; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1233; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1234; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
1235; AVX512F-32-NEXT:    retl
1236  %b = load <16 x i32>, <16 x i32>* %ptr_b
1237  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1238  ret <32 x i16> %res
1239}
1240
1241define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1242; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
1243; AVX512BW:       ## BB#0:
1244; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0
1245; AVX512BW-NEXT:    retq
1246;
1247; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
1248; AVX512F-32:       # BB#0:
1249; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1250; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0
1251; AVX512F-32-NEXT:    retl
1252  %q = load i32, i32* %ptr_b
1253  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1254  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1255  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1256  ret <32 x i16> %res
1257}
1258
1259define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1260; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
1261; AVX512BW:       ## BB#0:
1262; AVX512BW-NEXT:    kmovd %esi, %k1
1263; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1264; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1265; AVX512BW-NEXT:    retq
1266;
1267; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
1268; AVX512F-32:       # BB#0:
1269; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1270; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1271; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
1272; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1273; AVX512F-32-NEXT:    retl
1274  %q = load i32, i32* %ptr_b
1275  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1276  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1277  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1278  ret <32 x i16> %res
1279}
1280
1281define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1282; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
1283; AVX512BW:       ## BB#0:
1284; AVX512BW-NEXT:    kmovd %esi, %k1
1285; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
1286; AVX512BW-NEXT:    retq
1287;
1288; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
1289; AVX512F-32:       # BB#0:
1290; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1291; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1292; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1293; AVX512F-32-NEXT:    retl
1294  %q = load i32, i32* %ptr_b
1295  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1296  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1297  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1298  ret <32 x i16> %res
1299}
1300
1301declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1302
1303define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1304; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
1305; AVX512BW:       ## BB#0:
1306; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1307; AVX512BW-NEXT:    retq
1308;
1309; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
1310; AVX512F-32:       # BB#0:
1311; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1312; AVX512F-32-NEXT:    retl
1313  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1314  ret <64 x i8> %res
1315}
1316
1317define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1318; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
1319; AVX512BW:       ## BB#0:
1320; AVX512BW-NEXT:    kmovq %rdi, %k1
1321; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1322; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1323; AVX512BW-NEXT:    retq
1324;
1325; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
1326; AVX512F-32:       # BB#0:
1327; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1328; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1329; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1330; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1331; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1332; AVX512F-32-NEXT:    retl
1333  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1334  ret <64 x i8> %res
1335}
1336
1337define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1338; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
1339; AVX512BW:       ## BB#0:
1340; AVX512BW-NEXT:    kmovq %rdi, %k1
1341; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1342; AVX512BW-NEXT:    retq
1343;
1344; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
1345; AVX512F-32:       # BB#0:
1346; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1347; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1348; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1349; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1350; AVX512F-32-NEXT:    retl
1351  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1352  ret <64 x i8> %res
1353}
1354
1355define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1356; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
1357; AVX512BW:       ## BB#0:
1358; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0
1359; AVX512BW-NEXT:    retq
1360;
1361; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
1362; AVX512F-32:       # BB#0:
1363; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1364; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm0
1365; AVX512F-32-NEXT:    retl
1366  %b = load <32 x i16>, <32 x i16>* %ptr_b
1367  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1368  ret <64 x i8> %res
1369}
1370
1371define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1372; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
1373; AVX512BW:       ## BB#0:
1374; AVX512BW-NEXT:    kmovq %rsi, %k1
1375; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
1376; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1377; AVX512BW-NEXT:    retq
1378;
1379; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
1380; AVX512F-32:       # BB#0:
1381; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1382; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1383; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1384; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1385; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1}
1386; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1387; AVX512F-32-NEXT:    retl
1388  %b = load <32 x i16>, <32 x i16>* %ptr_b
1389  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1390  ret <64 x i8> %res
1391}
1392
1393define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1394; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
1395; AVX512BW:       ## BB#0:
1396; AVX512BW-NEXT:    kmovq %rsi, %k1
1397; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
1398; AVX512BW-NEXT:    retq
1399;
1400; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
1401; AVX512F-32:       # BB#0:
1402; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1403; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1404; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1405; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1406; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
1407; AVX512F-32-NEXT:    retl
1408  %b = load <32 x i16>, <32 x i16>* %ptr_b
1409  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1410  ret <64 x i8> %res
1411}
1412
1413declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1414
1415define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1416; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
1417; AVX512BW:       ## BB#0:
1418; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
1419; AVX512BW-NEXT:    retq
1420;
1421; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
1422; AVX512F-32:       # BB#0:
1423; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
1424; AVX512F-32-NEXT:    retl
1425  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1426  ret <32 x i16> %res
1427}
1428
1429define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1430; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
1431; AVX512BW:       ## BB#0:
1432; AVX512BW-NEXT:    kmovd %edi, %k1
1433; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1434; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1435; AVX512BW-NEXT:    retq
1436;
1437; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
1438; AVX512F-32:       # BB#0:
1439; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1440; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1441; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1442; AVX512F-32-NEXT:    retl
1443  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1444  ret <32 x i16> %res
1445}
1446
1447define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1448; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
1449; AVX512BW:       ## BB#0:
1450; AVX512BW-NEXT:    kmovd %edi, %k1
1451; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1452; AVX512BW-NEXT:    retq
1453;
1454; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
1455; AVX512F-32:       # BB#0:
1456; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1457; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1458; AVX512F-32-NEXT:    retl
1459  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1460  ret <32 x i16> %res
1461}
1462
1463define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1464; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
1465; AVX512BW:       ## BB#0:
1466; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0
1467; AVX512BW-NEXT:    retq
1468;
1469; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
1470; AVX512F-32:       # BB#0:
1471; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1472; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0
1473; AVX512F-32-NEXT:    retl
1474  %b = load <32 x i16>, <32 x i16>* %ptr_b
1475  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1476  ret <32 x i16> %res
1477}
1478
1479define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1480; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
1481; AVX512BW:       ## BB#0:
1482; AVX512BW-NEXT:    kmovd %esi, %k1
1483; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
1484; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1485; AVX512BW-NEXT:    retq
1486;
1487; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
1488; AVX512F-32:       # BB#0:
1489; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1490; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1491; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1}
1492; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1493; AVX512F-32-NEXT:    retl
1494  %b = load <32 x i16>, <32 x i16>* %ptr_b
1495  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1496  ret <32 x i16> %res
1497}
1498
1499define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1500; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
1501; AVX512BW:       ## BB#0:
1502; AVX512BW-NEXT:    kmovd %esi, %k1
1503; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1504; AVX512BW-NEXT:    retq
1505;
1506; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
1507; AVX512F-32:       # BB#0:
1508; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1509; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1510; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
1511; AVX512F-32-NEXT:    retl
1512  %b = load <32 x i16>, <32 x i16>* %ptr_b
1513  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1514  ret <32 x i16> %res
1515}
1516
1517declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1518
1519define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1520; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
1521; AVX512BW:       ## BB#0:
1522; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
1523; AVX512BW-NEXT:    retq
1524;
1525; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
1526; AVX512F-32:       # BB#0:
1527; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
1528; AVX512F-32-NEXT:    retl
1529  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1530  ret <32 x i16> %res
1531}
1532
1533define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1534; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
1535; AVX512BW:       ## BB#0:
1536; AVX512BW-NEXT:    kmovd %edi, %k1
1537; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1538; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1539; AVX512BW-NEXT:    retq
1540;
1541; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
1542; AVX512F-32:       # BB#0:
1543; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1544; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1545; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1546; AVX512F-32-NEXT:    retl
1547  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1548  ret <32 x i16> %res
1549}
1550
1551define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1552; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
1553; AVX512BW:       ## BB#0:
1554; AVX512BW-NEXT:    kmovd %edi, %k1
1555; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1556; AVX512BW-NEXT:    retq
1557;
1558; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
1559; AVX512F-32:       # BB#0:
1560; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1561; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1562; AVX512F-32-NEXT:    retl
1563  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1564  ret <32 x i16> %res
1565}
1566
1567define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1568; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
1569; AVX512BW:       ## BB#0:
1570; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0
1571; AVX512BW-NEXT:    retq
1572;
1573; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
1574; AVX512F-32:       # BB#0:
1575; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1576; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0
1577; AVX512F-32-NEXT:    retl
1578  %b = load <32 x i16>, <32 x i16>* %ptr_b
1579  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1580  ret <32 x i16> %res
1581}
1582
1583define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1584; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
1585; AVX512BW:       ## BB#0:
1586; AVX512BW-NEXT:    kmovd %esi, %k1
1587; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
1588; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1589; AVX512BW-NEXT:    retq
1590;
1591; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
1592; AVX512F-32:       # BB#0:
1593; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1594; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1595; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1}
1596; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1597; AVX512F-32-NEXT:    retl
1598  %b = load <32 x i16>, <32 x i16>* %ptr_b
1599  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1600  ret <32 x i16> %res
1601}
1602
1603define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1604; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
1605; AVX512BW:       ## BB#0:
1606; AVX512BW-NEXT:    kmovd %esi, %k1
1607; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1608; AVX512BW-NEXT:    retq
1609;
1610; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
1611; AVX512F-32:       # BB#0:
1612; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1613; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1614; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
1615; AVX512F-32-NEXT:    retl
1616  %b = load <32 x i16>, <32 x i16>* %ptr_b
1617  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1618  ret <32 x i16> %res
1619}
1620
1621declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1622
1623define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1624; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
1625; AVX512BW:       ## BB#0:
1626; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
1627; AVX512BW-NEXT:    retq
1628;
1629; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
1630; AVX512F-32:       # BB#0:
1631; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
1632; AVX512F-32-NEXT:    retl
1633  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1634  ret <32 x i16> %res
1635}
1636
1637define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1638; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
1639; AVX512BW:       ## BB#0:
1640; AVX512BW-NEXT:    kmovd %edi, %k1
1641; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1642; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1643; AVX512BW-NEXT:    retq
1644;
1645; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
1646; AVX512F-32:       # BB#0:
1647; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1648; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1649; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1650; AVX512F-32-NEXT:    retl
1651  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1652  ret <32 x i16> %res
1653}
1654
1655define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1656; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
1657; AVX512BW:       ## BB#0:
1658; AVX512BW-NEXT:    kmovd %edi, %k1
1659; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1660; AVX512BW-NEXT:    retq
1661;
1662; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
1663; AVX512F-32:       # BB#0:
1664; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1665; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1666; AVX512F-32-NEXT:    retl
1667  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1668  ret <32 x i16> %res
1669}
1670
1671define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1672; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
1673; AVX512BW:       ## BB#0:
1674; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0
1675; AVX512BW-NEXT:    retq
1676;
1677; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
1678; AVX512F-32:       # BB#0:
1679; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1680; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0
1681; AVX512F-32-NEXT:    retl
1682  %b = load <32 x i16>, <32 x i16>* %ptr_b
1683  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1684  ret <32 x i16> %res
1685}
1686
1687define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1688; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
1689; AVX512BW:       ## BB#0:
1690; AVX512BW-NEXT:    kmovd %esi, %k1
1691; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
1692; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1693; AVX512BW-NEXT:    retq
1694;
1695; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
1696; AVX512F-32:       # BB#0:
1697; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1698; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1699; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1}
1700; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1701; AVX512F-32-NEXT:    retl
1702  %b = load <32 x i16>, <32 x i16>* %ptr_b
1703  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1704  ret <32 x i16> %res
1705}
1706
1707define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1708; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
1709; AVX512BW:       ## BB#0:
1710; AVX512BW-NEXT:    kmovd %esi, %k1
1711; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1712; AVX512BW-NEXT:    retq
1713;
1714; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
1715; AVX512F-32:       # BB#0:
1716; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1717; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1718; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
1719; AVX512F-32-NEXT:    retl
1720  %b = load <32 x i16>, <32 x i16>* %ptr_b
1721  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1722  ret <32 x i16> %res
1723}
1724
1725declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1726
1727define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1728; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
1729; AVX512BW:       ## BB#0:
1730; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
1731; AVX512BW-NEXT:    retq
1732;
1733; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
1734; AVX512F-32:       # BB#0:
1735; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
1736; AVX512F-32-NEXT:    retl
1737  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1738  ret <32 x i16> %res
1739}
1740
1741define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1742; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
1743; AVX512BW:       ## BB#0:
1744; AVX512BW-NEXT:    kmovd %edi, %k1
1745; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1746; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1747; AVX512BW-NEXT:    retq
1748;
1749; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
1750; AVX512F-32:       # BB#0:
1751; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1752; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1753; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1754; AVX512F-32-NEXT:    retl
1755  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1756  ret <32 x i16> %res
1757}
1758
1759define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1760; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
1761; AVX512BW:       ## BB#0:
1762; AVX512BW-NEXT:    kmovd %edi, %k1
1763; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1764; AVX512BW-NEXT:    retq
1765;
1766; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
1767; AVX512F-32:       # BB#0:
1768; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1769; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1770; AVX512F-32-NEXT:    retl
1771  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1772  ret <32 x i16> %res
1773}
1774
1775define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1776; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
1777; AVX512BW:       ## BB#0:
1778; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0
1779; AVX512BW-NEXT:    retq
1780;
1781; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
1782; AVX512F-32:       # BB#0:
1783; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1784; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0
1785; AVX512F-32-NEXT:    retl
1786  %b = load <32 x i16>, <32 x i16>* %ptr_b
1787  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1788  ret <32 x i16> %res
1789}
1790
1791define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1792; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
1793; AVX512BW:       ## BB#0:
1794; AVX512BW-NEXT:    kmovd %esi, %k1
1795; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
1796; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1797; AVX512BW-NEXT:    retq
1798;
1799; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
1800; AVX512F-32:       # BB#0:
1801; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1802; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1803; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1}
1804; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1805; AVX512F-32-NEXT:    retl
1806  %b = load <32 x i16>, <32 x i16>* %ptr_b
1807  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1808  ret <32 x i16> %res
1809}
1810
1811define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1812; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
1813; AVX512BW:       ## BB#0:
1814; AVX512BW-NEXT:    kmovd %esi, %k1
1815; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1816; AVX512BW-NEXT:    retq
1817;
1818; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
1819; AVX512F-32:       # BB#0:
1820; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1821; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1822; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
1823; AVX512F-32-NEXT:    retl
1824  %b = load <32 x i16>, <32 x i16>* %ptr_b
1825  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1826  ret <32 x i16> %res
1827}
1828
1829declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1830
1831declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1832
1833define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1834; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1835; AVX512BW:       ## BB#0:
1836; AVX512BW-NEXT:    kmovq %rdi, %k1
1837; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1838; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
1839; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1840; AVX512BW-NEXT:    retq
1841;
1842; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1843; AVX512F-32:       # BB#0:
1844; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1845; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1846; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1847; AVX512F-32-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1848; AVX512F-32-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
1849; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1850; AVX512F-32-NEXT:    retl
1851  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1852  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1853  %res2 = add <64 x i8> %res, %res1
1854  ret <64 x i8> %res2
1855}
1856
1857declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1858
1859define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1860; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1861; AVX512BW:       ## BB#0:
1862; AVX512BW-NEXT:    kmovd %edi, %k1
1863; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1864; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
1865; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1866; AVX512BW-NEXT:    retq
1867;
1868; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1869; AVX512F-32:       # BB#0:
1870; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1871; AVX512F-32-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1872; AVX512F-32-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
1873; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1874; AVX512F-32-NEXT:    retl
1875  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1876  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1877  %res2 = add <32 x i16> %res, %res1
1878  ret <32 x i16> %res2
1879}
1880
1881declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1882
1883define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1884; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1885; AVX512BW:       ## BB#0:
1886; AVX512BW-NEXT:    kmovq %rdi, %k1
1887; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1888; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
1889; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1890; AVX512BW-NEXT:    retq
1891;
1892; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1893; AVX512F-32:       # BB#0:
1894; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1895; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1896; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1897; AVX512F-32-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1898; AVX512F-32-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
1899; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1900; AVX512F-32-NEXT:    retl
1901  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1902  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1903  %res2 = add <64 x i8> %res, %res1
1904  ret <64 x i8> %res2
1905}
1906
1907declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1908
1909define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1910; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1911; AVX512BW:       ## BB#0:
1912; AVX512BW-NEXT:    kmovd %edi, %k1
1913; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1914; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
1915; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1916; AVX512BW-NEXT:    retq
1917;
1918; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1919; AVX512F-32:       # BB#0:
1920; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1921; AVX512F-32-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1922; AVX512F-32-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
1923; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1924; AVX512F-32-NEXT:    retl
1925  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1926  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1927  %res2 = add <32 x i16> %res, %res1
1928  ret <32 x i16> %res2
1929}
1930
1931declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1932
1933define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1934; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1935; AVX512BW:       ## BB#0:
1936; AVX512BW-NEXT:    kmovq %rdi, %k1
1937; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1938; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
1939; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1940; AVX512BW-NEXT:    retq
1941;
1942; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1943; AVX512F-32:       # BB#0:
1944; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1945; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1946; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1947; AVX512F-32-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1948; AVX512F-32-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
1949; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1950; AVX512F-32-NEXT:    retl
1951  %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1952  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1953  %res2 = add <64 x i8> %res, %res1
1954  ret <64 x i8> %res2
1955}
1956
1957declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1958
1959define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1960; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1961; AVX512BW:       ## BB#0:
1962; AVX512BW-NEXT:    kmovd %edi, %k1
1963; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1964; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
1965; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1966; AVX512BW-NEXT:    retq
1967;
1968; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1969; AVX512F-32:       # BB#0:
1970; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1971; AVX512F-32-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1972; AVX512F-32-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
1973; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1974; AVX512F-32-NEXT:    retl
1975  %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1976  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1977  %res2 = add <32 x i16> %res, %res1
1978  ret <32 x i16> %res2
1979}
1980
1981declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1982
1983define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1984; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1985; AVX512BW:       ## BB#0:
1986; AVX512BW-NEXT:    kmovq %rdi, %k1
1987; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1}
1988; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
1989; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1990; AVX512BW-NEXT:    retq
1991;
1992; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1993; AVX512F-32:       # BB#0:
1994; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1995; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1996; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1997; AVX512F-32-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1}
1998; AVX512F-32-NEXT:    vpminub %zmm1, %zmm0, %zmm0
1999; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2000; AVX512F-32-NEXT:    retl
2001  %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2002  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2003  %res2 = add <64 x i8> %res, %res1
2004  ret <64 x i8> %res2
2005}
2006
2007declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2008
2009define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2010; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512:
2011; AVX512BW:       ## BB#0:
2012; AVX512BW-NEXT:    kmovd %edi, %k1
2013; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1}
2014; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
2015; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2016; AVX512BW-NEXT:    retq
2017;
2018; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512:
2019; AVX512F-32:       # BB#0:
2020; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2021; AVX512F-32-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1}
2022; AVX512F-32-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
2023; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2024; AVX512F-32-NEXT:    retl
2025  %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2026  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2027  %res2 = add <32 x i16> %res, %res1
2028  ret <32 x i16> %res2
2029}
2030
2031declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2032
2033define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2034; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2035; AVX512BW:       ## BB#0:
2036; AVX512BW-NEXT:    kmovd %edi, %k1
2037; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
2038; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
2039; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
2040; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
2041; AVX512BW-NEXT:    retq
2042;
2043; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2044; AVX512F-32:       # BB#0:
2045; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2046; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
2047; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
2048; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
2049; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
2050; AVX512F-32-NEXT:    retl
2051  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2052  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2053  %res2 = add <32 x i16> %res, %res1
2054  ret <32 x i16> %res2
2055}
2056
2057declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2058
2059define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2060; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2061; AVX512BW:       ## BB#0:
2062; AVX512BW-NEXT:    kmovd %edi, %k1
2063; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
2064; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
2065; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
2066; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
2067; AVX512BW-NEXT:    retq
2068;
2069; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2070; AVX512F-32:       # BB#0:
2071; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2072; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
2073; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
2074; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
2075; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
2076; AVX512F-32-NEXT:    retl
2077  %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2078  %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2079  %res2 = add <32 x i16> %res, %res1
2080  ret <32 x i16> %res2
2081}
2082
2083declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2084
2085define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2086; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2087; AVX512BW:       ## BB#0:
2088; AVX512BW-NEXT:    kmovd %edi, %k1
2089; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
2090; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
2091; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
2092; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
2093; AVX512BW-NEXT:    retq
2094;
2095; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2096; AVX512F-32:       # BB#0:
2097; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2098; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
2099; AVX512F-32-NEXT:    vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
2100; AVX512F-32-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
2101; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
2102; AVX512F-32-NEXT:    retl
2103  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2104  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2105  %res2 = add <32 x i16> %res, %res1
2106  ret <32 x i16> %res2
2107}
2108
2109declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2110
2111define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2112; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512:
2113; AVX512BW:       ## BB#0:
2114; AVX512BW-NEXT:    kmovq %rdi, %k1
2115; AVX512BW-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1}
2116; AVX512BW-NEXT:    vpavgb %zmm1, %zmm0, %zmm0
2117; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2118; AVX512BW-NEXT:    retq
2119;
2120; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512:
2121; AVX512F-32:       # BB#0:
2122; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2123; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2124; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2125; AVX512F-32-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1}
2126; AVX512F-32-NEXT:    vpavgb %zmm1, %zmm0, %zmm0
2127; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2128; AVX512F-32-NEXT:    retl
2129  %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2130  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2131  %res2 = add <64 x i8> %res, %res1
2132  ret <64 x i8> %res2
2133}
2134
2135declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2136
2137define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2138; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512:
2139; AVX512BW:       ## BB#0:
2140; AVX512BW-NEXT:    kmovd %edi, %k1
2141; AVX512BW-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1}
2142; AVX512BW-NEXT:    vpavgw %zmm1, %zmm0, %zmm0
2143; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2144; AVX512BW-NEXT:    retq
2145;
2146; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512:
2147; AVX512F-32:       # BB#0:
2148; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2149; AVX512F-32-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1}
2150; AVX512F-32-NEXT:    vpavgw %zmm1, %zmm0, %zmm0
2151; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2152; AVX512F-32-NEXT:    retl
2153  %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2154  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2155  %res2 = add <32 x i16> %res, %res1
2156  ret <32 x i16> %res2
2157}
2158
2159declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2160
2161define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2162; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
2163; AVX512BW:       ## BB#0:
2164; AVX512BW-NEXT:    kmovq %rdi, %k1
2165; AVX512BW-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1}
2166; AVX512BW-NEXT:    vpshufb %zmm1, %zmm0, %zmm0
2167; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2168; AVX512BW-NEXT:    retq
2169;
2170; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
2171; AVX512F-32:       # BB#0:
2172; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2173; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2174; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2175; AVX512F-32-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1}
2176; AVX512F-32-NEXT:    vpshufb %zmm1, %zmm0, %zmm0
2177; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2178; AVX512F-32-NEXT:    retl
2179  %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2180  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2181  %res2 = add <64 x i8> %res, %res1
2182  ret <64 x i8> %res2
2183}
2184
2185declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
2186
2187define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2188; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2189; AVX512BW:       ## BB#0:
2190; AVX512BW-NEXT:    kmovd %edi, %k1
2191; AVX512BW-NEXT:    vpabsw %zmm0, %zmm1 {%k1}
2192; AVX512BW-NEXT:    vpabsw %zmm0, %zmm0
2193; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2194; AVX512BW-NEXT:    retq
2195;
2196; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2197; AVX512F-32:       # BB#0:
2198; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2199; AVX512F-32-NEXT:    vpabsw %zmm0, %zmm1 {%k1}
2200; AVX512F-32-NEXT:    vpabsw %zmm0, %zmm0
2201; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2202; AVX512F-32-NEXT:    retl
2203  %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2204  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
2205  %res2 = add <32 x i16> %res, %res1
2206  ret <32 x i16> %res2
2207}
2208
2209declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
2210
2211define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2212; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2213; AVX512BW:       ## BB#0:
2214; AVX512BW-NEXT:    kmovq %rdi, %k1
2215; AVX512BW-NEXT:    vpabsb %zmm0, %zmm1 {%k1}
2216; AVX512BW-NEXT:    vpabsb %zmm0, %zmm0
2217; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
2218; AVX512BW-NEXT:    retq
2219;
2220; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2221; AVX512F-32:       # BB#0:
2222; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2223; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2224; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2225; AVX512F-32-NEXT:    vpabsb %zmm0, %zmm1 {%k1}
2226; AVX512F-32-NEXT:    vpabsb %zmm0, %zmm0
2227; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
2228; AVX512F-32-NEXT:    retl
2229  %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2230  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
2231  %res2 = add <64 x i8> %res, %res1
2232  ret <64 x i8> %res2
2233}
2234
2235declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2236
2237define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2238; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2239; AVX512BW:       ## BB#0:
2240; AVX512BW-NEXT:    kmovd %edi, %k1
2241; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2242; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
2243; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2244; AVX512BW-NEXT:    retq
2245;
2246; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2247; AVX512F-32:       # BB#0:
2248; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2249; AVX512F-32-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2250; AVX512F-32-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
2251; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2252; AVX512F-32-NEXT:    retl
2253  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2254  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2255  %res2 = add <32 x i16> %res, %res1
2256  ret <32 x i16> %res2
2257}
2258
2259declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2260
2261define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2262; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2263; AVX512BW:       ## BB#0:
2264; AVX512BW-NEXT:    kmovd %edi, %k1
2265; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2266; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
2267; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2268; AVX512BW-NEXT:    retq
2269;
2270; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2271; AVX512F-32:       # BB#0:
2272; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2273; AVX512F-32-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2274; AVX512F-32-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
2275; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2276; AVX512F-32-NEXT:    retl
2277  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2278  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2279  %res2 = add <32 x i16> %res, %res1
2280  ret <32 x i16> %res2
2281}
2282
2283declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2284
2285define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2286; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2287; AVX512BW:       ## BB#0:
2288; AVX512BW-NEXT:    kmovd %edi, %k1
2289; AVX512BW-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2290; AVX512BW-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0
2291; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2292; AVX512BW-NEXT:    retq
2293;
2294; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2295; AVX512F-32:       # BB#0:
2296; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2297; AVX512F-32-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2298; AVX512F-32-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0
2299; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2300; AVX512F-32-NEXT:    retl
2301  %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2302  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2303  %res2 = add <32 x i16> %res, %res1
2304  ret <32 x i16> %res2
2305}
2306
2307declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
2308
2309define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2310; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2311; AVX512BW:       ## BB#0:
2312; AVX512BW-NEXT:    kmovd %edi, %k1
2313; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm1 {%k1}
2314; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z}
2315; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
2316; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2317; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2318; AVX512BW-NEXT:    retq
2319;
2320; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2321; AVX512F-32:       # BB#0:
2322; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2323; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm1 {%k1}
2324; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z}
2325; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm0
2326; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2327; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2328; AVX512F-32-NEXT:    retl
2329    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2330    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2331    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2332    %res3 = add <32 x i8> %res0, %res1
2333    %res4 = add <32 x i8> %res3, %res2
2334    ret <32 x i8> %res4
2335}
2336
2337declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2338
2339define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2340; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2341; AVX512BW:       ## BB#0:
2342; AVX512BW-NEXT:    kmovd %esi, %k1
2343; AVX512BW-NEXT:    vpmovwb %zmm0, (%rdi)
2344; AVX512BW-NEXT:    vpmovwb %zmm0, (%rdi) {%k1}
2345; AVX512BW-NEXT:    retq
2346;
2347; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2348; AVX512F-32:       # BB#0:
2349; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2350; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2351; AVX512F-32-NEXT:    vpmovwb %zmm0, (%eax)
2352; AVX512F-32-NEXT:    vpmovwb %zmm0, (%eax) {%k1}
2353; AVX512F-32-NEXT:    retl
2354    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2355    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2356    ret void
2357}
2358
2359declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
2360
2361define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2362; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2363; AVX512BW:       ## BB#0:
2364; AVX512BW-NEXT:    kmovd %edi, %k1
2365; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm1 {%k1}
2366; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z}
2367; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
2368; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2369; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2370; AVX512BW-NEXT:    retq
2371;
2372; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2373; AVX512F-32:       # BB#0:
2374; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2375; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm1 {%k1}
2376; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z}
2377; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm0
2378; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2379; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2380; AVX512F-32-NEXT:    retl
2381    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2382    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2383    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2384    %res3 = add <32 x i8> %res0, %res1
2385    %res4 = add <32 x i8> %res3, %res2
2386    ret <32 x i8> %res4
2387}
2388
2389declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2390
2391define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2392; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2393; AVX512BW:       ## BB#0:
2394; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi)
2395; AVX512BW-NEXT:    kmovd %esi, %k1
2396; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi) {%k1}
2397; AVX512BW-NEXT:    retq
2398;
2399; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2400; AVX512F-32:       # BB#0:
2401; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2402; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2403; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx)
2404; AVX512F-32-NEXT:    kmovd %eax, %k1
2405; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx) {%k1}
2406; AVX512F-32-NEXT:    retl
2407    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2408    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2409    ret void
2410}
2411
2412declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
2413
2414define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2415; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2416; AVX512BW:       ## BB#0:
2417; AVX512BW-NEXT:    kmovd %edi, %k1
2418; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1}
2419; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z}
2420; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm0
2421; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2422; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2423; AVX512BW-NEXT:    retq
2424;
2425; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2426; AVX512F-32:       # BB#0:
2427; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2428; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1}
2429; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z}
2430; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm0
2431; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2432; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2433; AVX512F-32-NEXT:    retl
2434    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2435    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2436    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2437    %res3 = add <32 x i8> %res0, %res1
2438    %res4 = add <32 x i8> %res3, %res2
2439    ret <32 x i8> %res4
2440}
2441
2442declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2443
2444define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2445; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2446; AVX512BW:       ## BB#0:
2447; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi)
2448; AVX512BW-NEXT:    kmovd %esi, %k1
2449; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1}
2450; AVX512BW-NEXT:    retq
2451;
2452; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2453; AVX512F-32:       # BB#0:
2454; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2455; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2456; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx)
2457; AVX512F-32-NEXT:    kmovd %eax, %k1
2458; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx) {%k1}
2459; AVX512F-32-NEXT:    retl
2460    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2461    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2462    ret void
2463}
2464
2465declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2466
2467define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
2468; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2469; AVX512BW:       ## BB#0:
2470; AVX512BW-NEXT:    kmovd %edi, %k1
2471; AVX512BW-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2472; AVX512BW-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0
2473; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2474; AVX512BW-NEXT:    retq
2475;
2476; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2477; AVX512F-32:       # BB#0:
2478; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2479; AVX512F-32-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2480; AVX512F-32-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0
2481; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2482; AVX512F-32-NEXT:    retl
2483  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
2484  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
2485  %res2 = add <32 x i16> %res, %res1
2486  ret <32 x i16> %res2
2487}
2488
2489declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2490
2491define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
2492; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2493; AVX512BW:       ## BB#0:
2494; AVX512BW-NEXT:    kmovw %edi, %k1
2495; AVX512BW-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2496; AVX512BW-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0
2497; AVX512BW-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
2498; AVX512BW-NEXT:    retq
2499;
2500; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2501; AVX512F-32:       # BB#0:
2502; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
2503; AVX512F-32-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2504; AVX512F-32-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0
2505; AVX512F-32-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
2506; AVX512F-32-NEXT:    retl
2507  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
2508  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
2509  %res2 = add <16 x i32> %res, %res1
2510  ret <16 x i32> %res2
2511}
2512
2513declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2514
2515define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2516; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
2517; AVX512BW:       ## BB#0:
2518; AVX512BW-NEXT:    kmovq %rdi, %k1
2519; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
2520; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
2521; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2522; AVX512BW-NEXT:    retq
2523;
2524; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
2525; AVX512F-32:       # BB#0:
2526; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2527; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2528; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2529; AVX512F-32-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
2530; AVX512F-32-NEXT:    vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
2531; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2532; AVX512F-32-NEXT:    retl
2533  %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2534  %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2535  %res2 = add <64 x i8> %res, %res1
2536  ret <64 x i8> %res2
2537}
2538
2539declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2540
2541define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2542; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
2543; AVX512BW:       ## BB#0:
2544; AVX512BW-NEXT:    kmovq %rdi, %k1
2545; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
2546; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
2547; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2548; AVX512BW-NEXT:    retq
2549;
2550; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
2551; AVX512F-32:       # BB#0:
2552; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2553; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2554; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2555; AVX512F-32-NEXT:    vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
2556; AVX512F-32-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
2557; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2558; AVX512F-32-NEXT:    retl
2559  %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2560  %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2561  %res2 = add <64 x i8> %res, %res1
2562  ret <64 x i8> %res2
2563}
2564
2565declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2566
2567define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2568; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
2569; AVX512BW:       ## BB#0:
2570; AVX512BW-NEXT:    kmovd %edi, %k1
2571; AVX512BW-NEXT:    vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
2572; AVX512BW-NEXT:    vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
2573; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2574; AVX512BW-NEXT:    retq
2575;
2576; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
2577; AVX512F-32:       # BB#0:
2578; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2579; AVX512F-32-NEXT:    vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
2580; AVX512F-32-NEXT:    vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
2581; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2582; AVX512F-32-NEXT:    retl
2583  %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2584  %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2585  %res2 = add <32 x i16> %res, %res1
2586  ret <32 x i16> %res2
2587}
2588
2589declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2590
2591define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2592; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
2593; AVX512BW:       ## BB#0:
2594; AVX512BW-NEXT:    kmovd %edi, %k1
2595; AVX512BW-NEXT:    vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
2596; AVX512BW-NEXT:    vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
2597; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2598; AVX512BW-NEXT:    retq
2599;
2600; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
2601; AVX512F-32:       # BB#0:
2602; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2603; AVX512F-32-NEXT:    vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
2604; AVX512F-32-NEXT:    vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
2605; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2606; AVX512F-32-NEXT:    retl
2607  %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2608  %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2609  %res2 = add <32 x i16> %res, %res1
2610  ret <32 x i16> %res2
2611}
2612
2613declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
2614
2615define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
2616; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512:
2617; AVX512BW:       ## BB#0:
2618; AVX512BW-NEXT:    kmovq %rdi, %k1
2619; AVX512BW-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
2620; AVX512BW-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2621; AVX512BW-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0
2622; AVX512BW-NEXT:    vpaddb %zmm3, %zmm2, %zmm1
2623; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
2624; AVX512BW-NEXT:    retq
2625;
2626; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512:
2627; AVX512F-32:       # BB#0:
2628; AVX512F-32-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm3
2629; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2630; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2631; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2632; AVX512F-32-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
2633; AVX512F-32-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z}
2634; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
2635; AVX512F-32-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
2636; AVX512F-32-NEXT:    retl
2637  %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
2638  %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
2639  %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
2640  %res3 = add <64 x i8> %res, %res1
2641  %res4 = add <64 x i8> %res3, %res2
2642  ret <64 x i8> %res4
2643}
2644
2645declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
2646
2647define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
2648; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2649; AVX512BW:       ## BB#0:
2650; AVX512BW-NEXT:    kmovd %edi, %k1
2651; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2652; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2653; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2654; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2655; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2656; AVX512BW-NEXT:    retq
2657;
2658; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2659; AVX512F-32:       # BB#0:
2660; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2661; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2662; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2663; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2664; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2665; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2666; AVX512F-32-NEXT:    retl
2667  %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
2668  %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
2669  %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
2670  %res3 = add <32 x i16> %res, %res1
2671  %res4 = add <32 x i16> %res3, %res2
2672  ret <32 x i16> %res4
2673}
2674
2675declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
2676
2677define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
2678; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512:
2679; AVX512BW:       ## BB#0:
2680; AVX512BW-NEXT:    vpslldq $8, %zmm0, %zmm1
2681; AVX512BW-NEXT:    vpslldq $4, %zmm0, %zmm0
2682; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2683; AVX512BW-NEXT:    retq
2684;
2685; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512:
2686; AVX512F-32:       # BB#0:
2687; AVX512F-32-NEXT:    vpslldq $8, %zmm0, %zmm1
2688; AVX512F-32-NEXT:    vpslldq $4, %zmm0, %zmm0
2689; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2690; AVX512F-32-NEXT:    retl
2691  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
2692  %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
2693  %res2 = add <8 x i64> %res, %res1
2694  ret <8 x i64> %res2
2695}
2696
2697declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
2698
2699define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
2700; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
2701; AVX512BW:       ## BB#0:
2702; AVX512BW-NEXT:    vpsrldq $8, %zmm0, %zmm1
2703; AVX512BW-NEXT:    vpsrldq $4, %zmm0, %zmm0
2704; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2705; AVX512BW-NEXT:    retq
2706;
2707; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
2708; AVX512F-32:       # BB#0:
2709; AVX512F-32-NEXT:    vpsrldq $8, %zmm0, %zmm1
2710; AVX512F-32-NEXT:    vpsrldq $4, %zmm0, %zmm0
2711; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2712; AVX512F-32-NEXT:    retl
2713  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
2714  %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
2715  %res2 = add <8 x i64> %res, %res1
2716  ret <8 x i64> %res2
2717}
2718declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
2719
2720define  <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
2721; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2722; AVX512BW:       ## BB#0:
2723; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1
2724; AVX512BW-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0
2725; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2726; AVX512BW-NEXT:    retq
2727;
2728; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2729; AVX512F-32:       # BB#0:
2730; AVX512F-32-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1
2731; AVX512F-32-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0
2732; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2733; AVX512F-32-NEXT:    retl
2734  %res = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
2735  %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
2736  %res2 = add  <8 x i64> %res, %res1
2737  ret  <8 x i64> %res2
2738}
2739
2740declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
2741
2742define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
2743; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
2744; AVX512BW:       ## BB#0:
2745; AVX512BW-NEXT:    kmovd %edi, %k0
2746; AVX512BW-NEXT:    kmovd %esi, %k1
2747; AVX512BW-NEXT:    kunpckwd %k1, %k0, %k0
2748; AVX512BW-NEXT:    kmovd %k0, %eax
2749; AVX512BW-NEXT:    retq
2750;
2751; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
2752; AVX512F-32:       # BB#0:
2753; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2754; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2755; AVX512F-32-NEXT:    kunpckwd %k1, %k0, %k0
2756; AVX512F-32-NEXT:    kmovd %k0, %eax
2757; AVX512F-32-NEXT:    retl
2758  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
2759  ret i32 %res
2760}
2761
2762declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
2763
2764define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
2765; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
2766; AVX512BW:       ## BB#0:
2767; AVX512BW-NEXT:    kmovq %rdi, %k0
2768; AVX512BW-NEXT:    kmovq %rsi, %k1
2769; AVX512BW-NEXT:    kunpckdq %k1, %k0, %k0
2770; AVX512BW-NEXT:    kmovq %k0, %rax
2771; AVX512BW-NEXT:    retq
2772;
2773; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
2774; AVX512F-32:       # BB#0:
2775; AVX512F-32-NEXT:    subl $12, %esp
2776; AVX512F-32-NEXT:  .Ltmp8:
2777; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
2778; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2779; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2780; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
2781; AVX512F-32-NEXT:    kmovq %k0, (%esp)
2782; AVX512F-32-NEXT:    movl (%esp), %eax
2783; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
2784; AVX512F-32-NEXT:    addl $12, %esp
2785; AVX512F-32-NEXT:    retl
2786  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
2787  ret i64 %res
2788}
2789