• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
7
8
9define i16 @mask16(i16 %x) {
10; CHECK-LABEL: mask16:
11; CHECK:       ## %bb.0:
12; CHECK-NEXT:    notl %edi
13; CHECK-NEXT:    movl %edi, %eax
14; CHECK-NEXT:    retq
15;
16; X86-LABEL: mask16:
17; X86:       ## %bb.0:
18; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
19; X86-NEXT:    notl %eax
20; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
21; X86-NEXT:    retl
22  %m0 = bitcast i16 %x to <16 x i1>
23  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
24  %ret = bitcast <16 x i1> %m1 to i16
25  ret i16 %ret
26}
27
28define i32 @mask16_zext(i16 %x) {
29; CHECK-LABEL: mask16_zext:
30; CHECK:       ## %bb.0:
31; CHECK-NEXT:    notl %edi
32; CHECK-NEXT:    movzwl %di, %eax
33; CHECK-NEXT:    retq
34;
35; X86-LABEL: mask16_zext:
36; X86:       ## %bb.0:
37; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
38; X86-NEXT:    xorl $65535, %eax ## imm = 0xFFFF
39; X86-NEXT:    retl
40  %m0 = bitcast i16 %x to <16 x i1>
41  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
42  %m2 = bitcast <16 x i1> %m1 to i16
43  %ret = zext i16 %m2 to i32
44  ret i32 %ret
45}
46
47define i8 @mask8(i8 %x) {
48; CHECK-LABEL: mask8:
49; CHECK:       ## %bb.0:
50; CHECK-NEXT:    notb %dil
51; CHECK-NEXT:    movl %edi, %eax
52; CHECK-NEXT:    retq
53;
54; X86-LABEL: mask8:
55; X86:       ## %bb.0:
56; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
57; X86-NEXT:    notb %al
58; X86-NEXT:    retl
59  %m0 = bitcast i8 %x to <8 x i1>
60  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
61  %ret = bitcast <8 x i1> %m1 to i8
62  ret i8 %ret
63}
64
65define i32 @mask8_zext(i8 %x) {
66; CHECK-LABEL: mask8_zext:
67; CHECK:       ## %bb.0:
68; CHECK-NEXT:    notb %dil
69; CHECK-NEXT:    movzbl %dil, %eax
70; CHECK-NEXT:    retq
71;
72; X86-LABEL: mask8_zext:
73; X86:       ## %bb.0:
74; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
75; X86-NEXT:    notb %al
76; X86-NEXT:    movzbl %al, %eax
77; X86-NEXT:    retl
78  %m0 = bitcast i8 %x to <8 x i1>
79  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
80  %m2 = bitcast <8 x i1> %m1 to i8
81  %ret = zext i8 %m2 to i32
82  ret i32 %ret
83}
84
85define void @mask16_mem(i16* %ptr) {
86; CHECK-LABEL: mask16_mem:
87; CHECK:       ## %bb.0:
88; CHECK-NEXT:    kmovw (%rdi), %k0
89; CHECK-NEXT:    knotw %k0, %k0
90; CHECK-NEXT:    kmovw %k0, (%rdi)
91; CHECK-NEXT:    retq
92;
93; X86-LABEL: mask16_mem:
94; X86:       ## %bb.0:
95; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
96; X86-NEXT:    kmovw (%eax), %k0
97; X86-NEXT:    knotw %k0, %k0
98; X86-NEXT:    kmovw %k0, (%eax)
99; X86-NEXT:    retl
100  %x = load i16, i16* %ptr, align 4
101  %m0 = bitcast i16 %x to <16 x i1>
102  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
103  %ret = bitcast <16 x i1> %m1 to i16
104  store i16 %ret, i16* %ptr, align 4
105  ret void
106}
107
108define void @mask8_mem(i8* %ptr) {
109; KNL-LABEL: mask8_mem:
110; KNL:       ## %bb.0:
111; KNL-NEXT:    notb (%rdi)
112; KNL-NEXT:    retq
113;
114; SKX-LABEL: mask8_mem:
115; SKX:       ## %bb.0:
116; SKX-NEXT:    kmovb (%rdi), %k0
117; SKX-NEXT:    knotb %k0, %k0
118; SKX-NEXT:    kmovb %k0, (%rdi)
119; SKX-NEXT:    retq
120;
121; AVX512BW-LABEL: mask8_mem:
122; AVX512BW:       ## %bb.0:
123; AVX512BW-NEXT:    notb (%rdi)
124; AVX512BW-NEXT:    retq
125;
126; AVX512DQ-LABEL: mask8_mem:
127; AVX512DQ:       ## %bb.0:
128; AVX512DQ-NEXT:    kmovb (%rdi), %k0
129; AVX512DQ-NEXT:    knotb %k0, %k0
130; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
131; AVX512DQ-NEXT:    retq
132;
133; X86-LABEL: mask8_mem:
134; X86:       ## %bb.0:
135; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
136; X86-NEXT:    kmovb (%eax), %k0
137; X86-NEXT:    knotb %k0, %k0
138; X86-NEXT:    kmovb %k0, (%eax)
139; X86-NEXT:    retl
140  %x = load i8, i8* %ptr, align 4
141  %m0 = bitcast i8 %x to <8 x i1>
142  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
143  %ret = bitcast <8 x i1> %m1 to i8
144  store i8 %ret, i8* %ptr, align 4
145  ret void
146}
147
148define i16 @mand16(i16 %x, i16 %y) {
149; CHECK-LABEL: mand16:
150; CHECK:       ## %bb.0:
151; CHECK-NEXT:    movl %edi, %eax
152; CHECK-NEXT:    xorl %esi, %eax
153; CHECK-NEXT:    andl %esi, %edi
154; CHECK-NEXT:    orl %eax, %edi
155; CHECK-NEXT:    movl %edi, %eax
156; CHECK-NEXT:    retq
157;
158; X86-LABEL: mand16:
159; X86:       ## %bb.0:
160; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
162; X86-NEXT:    movl %eax, %edx
163; X86-NEXT:    andl %ecx, %edx
164; X86-NEXT:    xorl %ecx, %eax
165; X86-NEXT:    orl %edx, %eax
166; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
167; X86-NEXT:    retl
168  %ma = bitcast i16 %x to <16 x i1>
169  %mb = bitcast i16 %y to <16 x i1>
170  %mc = and <16 x i1> %ma, %mb
171  %md = xor <16 x i1> %ma, %mb
172  %me = or <16 x i1> %mc, %md
173  %ret = bitcast <16 x i1> %me to i16
174  ret i16 %ret
175}
176
177define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
178; KNL-LABEL: mand16_mem:
179; KNL:       ## %bb.0:
180; KNL-NEXT:    kmovw (%rdi), %k0
181; KNL-NEXT:    kmovw (%rsi), %k1
182; KNL-NEXT:    kandw %k1, %k0, %k2
183; KNL-NEXT:    kxorw %k1, %k0, %k0
184; KNL-NEXT:    korw %k0, %k2, %k0
185; KNL-NEXT:    kmovw %k0, %eax
186; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
187; KNL-NEXT:    retq
188;
189; SKX-LABEL: mand16_mem:
190; SKX:       ## %bb.0:
191; SKX-NEXT:    kmovw (%rdi), %k0
192; SKX-NEXT:    kmovw (%rsi), %k1
193; SKX-NEXT:    kandw %k1, %k0, %k2
194; SKX-NEXT:    kxorw %k1, %k0, %k0
195; SKX-NEXT:    korw %k0, %k2, %k0
196; SKX-NEXT:    kmovd %k0, %eax
197; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
198; SKX-NEXT:    retq
199;
200; AVX512BW-LABEL: mand16_mem:
201; AVX512BW:       ## %bb.0:
202; AVX512BW-NEXT:    kmovw (%rdi), %k0
203; AVX512BW-NEXT:    kmovw (%rsi), %k1
204; AVX512BW-NEXT:    kandw %k1, %k0, %k2
205; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
206; AVX512BW-NEXT:    korw %k0, %k2, %k0
207; AVX512BW-NEXT:    kmovd %k0, %eax
208; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
209; AVX512BW-NEXT:    retq
210;
211; AVX512DQ-LABEL: mand16_mem:
212; AVX512DQ:       ## %bb.0:
213; AVX512DQ-NEXT:    kmovw (%rdi), %k0
214; AVX512DQ-NEXT:    kmovw (%rsi), %k1
215; AVX512DQ-NEXT:    kandw %k1, %k0, %k2
216; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
217; AVX512DQ-NEXT:    korw %k0, %k2, %k0
218; AVX512DQ-NEXT:    kmovw %k0, %eax
219; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
220; AVX512DQ-NEXT:    retq
221;
222; X86-LABEL: mand16_mem:
223; X86:       ## %bb.0:
224; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
225; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
226; X86-NEXT:    kmovw (%ecx), %k0
227; X86-NEXT:    kmovw (%eax), %k1
228; X86-NEXT:    kandw %k1, %k0, %k2
229; X86-NEXT:    kxorw %k1, %k0, %k0
230; X86-NEXT:    korw %k0, %k2, %k0
231; X86-NEXT:    kmovd %k0, %eax
232; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
233; X86-NEXT:    retl
234  %ma = load <16 x i1>, <16 x i1>* %x
235  %mb = load <16 x i1>, <16 x i1>* %y
236  %mc = and <16 x i1> %ma, %mb
237  %md = xor <16 x i1> %ma, %mb
238  %me = or <16 x i1> %mc, %md
239  %ret = bitcast <16 x i1> %me to i16
240  ret i16 %ret
241}
242
243define i8 @shuf_test1(i16 %v) nounwind {
244; KNL-LABEL: shuf_test1:
245; KNL:       ## %bb.0:
246; KNL-NEXT:    kmovw %edi, %k0
247; KNL-NEXT:    kshiftrw $8, %k0, %k0
248; KNL-NEXT:    kmovw %k0, %eax
249; KNL-NEXT:    ## kill: def $al killed $al killed $eax
250; KNL-NEXT:    retq
251;
252; SKX-LABEL: shuf_test1:
253; SKX:       ## %bb.0:
254; SKX-NEXT:    kmovd %edi, %k0
255; SKX-NEXT:    kshiftrw $8, %k0, %k0
256; SKX-NEXT:    kmovd %k0, %eax
257; SKX-NEXT:    ## kill: def $al killed $al killed $eax
258; SKX-NEXT:    retq
259;
260; AVX512BW-LABEL: shuf_test1:
261; AVX512BW:       ## %bb.0:
262; AVX512BW-NEXT:    kmovd %edi, %k0
263; AVX512BW-NEXT:    kshiftrw $8, %k0, %k0
264; AVX512BW-NEXT:    kmovd %k0, %eax
265; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
266; AVX512BW-NEXT:    retq
267;
268; AVX512DQ-LABEL: shuf_test1:
269; AVX512DQ:       ## %bb.0:
270; AVX512DQ-NEXT:    kmovw %edi, %k0
271; AVX512DQ-NEXT:    kshiftrw $8, %k0, %k0
272; AVX512DQ-NEXT:    kmovw %k0, %eax
273; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
274; AVX512DQ-NEXT:    retq
275;
276; X86-LABEL: shuf_test1:
277; X86:       ## %bb.0:
278; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
279; X86-NEXT:    retl
280   %v1 = bitcast i16 %v to <16 x i1>
281   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
282   %mask1 = bitcast <8 x i1> %mask to i8
283   ret i8 %mask1
284}
285
286define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
287; KNL-LABEL: zext_test1:
288; KNL:       ## %bb.0:
289; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
290; KNL-NEXT:    kshiftrw $5, %k0, %k0
291; KNL-NEXT:    kmovw %k0, %eax
292; KNL-NEXT:    andl $1, %eax
293; KNL-NEXT:    vzeroupper
294; KNL-NEXT:    retq
295;
296; SKX-LABEL: zext_test1:
297; SKX:       ## %bb.0:
298; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
299; SKX-NEXT:    kshiftrw $5, %k0, %k0
300; SKX-NEXT:    kmovd %k0, %eax
301; SKX-NEXT:    andl $1, %eax
302; SKX-NEXT:    vzeroupper
303; SKX-NEXT:    retq
304;
305; AVX512BW-LABEL: zext_test1:
306; AVX512BW:       ## %bb.0:
307; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
308; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
309; AVX512BW-NEXT:    kmovd %k0, %eax
310; AVX512BW-NEXT:    andl $1, %eax
311; AVX512BW-NEXT:    vzeroupper
312; AVX512BW-NEXT:    retq
313;
314; AVX512DQ-LABEL: zext_test1:
315; AVX512DQ:       ## %bb.0:
316; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
317; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
318; AVX512DQ-NEXT:    kmovw %k0, %eax
319; AVX512DQ-NEXT:    andl $1, %eax
320; AVX512DQ-NEXT:    vzeroupper
321; AVX512DQ-NEXT:    retq
322;
323; X86-LABEL: zext_test1:
324; X86:       ## %bb.0:
325; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
326; X86-NEXT:    kshiftrw $5, %k0, %k0
327; X86-NEXT:    kmovd %k0, %eax
328; X86-NEXT:    andl $1, %eax
329; X86-NEXT:    vzeroupper
330; X86-NEXT:    retl
331  %cmp_res = icmp ugt <16 x i32> %a, %b
332  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
333  %res = zext i1 %cmp_res.i1 to i32
334  ret i32 %res
335}
336
337define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
338; KNL-LABEL: zext_test2:
339; KNL:       ## %bb.0:
340; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
341; KNL-NEXT:    kshiftrw $5, %k0, %k0
342; KNL-NEXT:    kmovw %k0, %eax
343; KNL-NEXT:    andl $1, %eax
344; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
345; KNL-NEXT:    vzeroupper
346; KNL-NEXT:    retq
347;
348; SKX-LABEL: zext_test2:
349; SKX:       ## %bb.0:
350; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
351; SKX-NEXT:    kshiftrw $5, %k0, %k0
352; SKX-NEXT:    kmovd %k0, %eax
353; SKX-NEXT:    andl $1, %eax
354; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
355; SKX-NEXT:    vzeroupper
356; SKX-NEXT:    retq
357;
358; AVX512BW-LABEL: zext_test2:
359; AVX512BW:       ## %bb.0:
360; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
361; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
362; AVX512BW-NEXT:    kmovd %k0, %eax
363; AVX512BW-NEXT:    andl $1, %eax
364; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
365; AVX512BW-NEXT:    vzeroupper
366; AVX512BW-NEXT:    retq
367;
368; AVX512DQ-LABEL: zext_test2:
369; AVX512DQ:       ## %bb.0:
370; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
371; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
372; AVX512DQ-NEXT:    kmovw %k0, %eax
373; AVX512DQ-NEXT:    andl $1, %eax
374; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
375; AVX512DQ-NEXT:    vzeroupper
376; AVX512DQ-NEXT:    retq
377;
378; X86-LABEL: zext_test2:
379; X86:       ## %bb.0:
380; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
381; X86-NEXT:    kshiftrw $5, %k0, %k0
382; X86-NEXT:    kmovd %k0, %eax
383; X86-NEXT:    andl $1, %eax
384; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
385; X86-NEXT:    vzeroupper
386; X86-NEXT:    retl
387  %cmp_res = icmp ugt <16 x i32> %a, %b
388  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
389  %res = zext i1 %cmp_res.i1 to i16
390  ret i16 %res
391}
392
393define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
394; KNL-LABEL: zext_test3:
395; KNL:       ## %bb.0:
396; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
397; KNL-NEXT:    kshiftrw $5, %k0, %k0
398; KNL-NEXT:    kmovw %k0, %eax
399; KNL-NEXT:    andb $1, %al
400; KNL-NEXT:    ## kill: def $al killed $al killed $eax
401; KNL-NEXT:    vzeroupper
402; KNL-NEXT:    retq
403;
404; SKX-LABEL: zext_test3:
405; SKX:       ## %bb.0:
406; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
407; SKX-NEXT:    kshiftrw $5, %k0, %k0
408; SKX-NEXT:    kmovd %k0, %eax
409; SKX-NEXT:    andb $1, %al
410; SKX-NEXT:    ## kill: def $al killed $al killed $eax
411; SKX-NEXT:    vzeroupper
412; SKX-NEXT:    retq
413;
414; AVX512BW-LABEL: zext_test3:
415; AVX512BW:       ## %bb.0:
416; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
417; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
418; AVX512BW-NEXT:    kmovd %k0, %eax
419; AVX512BW-NEXT:    andb $1, %al
420; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
421; AVX512BW-NEXT:    vzeroupper
422; AVX512BW-NEXT:    retq
423;
424; AVX512DQ-LABEL: zext_test3:
425; AVX512DQ:       ## %bb.0:
426; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
427; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
428; AVX512DQ-NEXT:    kmovw %k0, %eax
429; AVX512DQ-NEXT:    andb $1, %al
430; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
431; AVX512DQ-NEXT:    vzeroupper
432; AVX512DQ-NEXT:    retq
433;
434; X86-LABEL: zext_test3:
435; X86:       ## %bb.0:
436; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
437; X86-NEXT:    kshiftrw $5, %k0, %k0
438; X86-NEXT:    kmovd %k0, %eax
439; X86-NEXT:    andb $1, %al
440; X86-NEXT:    ## kill: def $al killed $al killed $eax
441; X86-NEXT:    vzeroupper
442; X86-NEXT:    retl
443  %cmp_res = icmp ugt <16 x i32> %a, %b
444  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
445  %res = zext i1 %cmp_res.i1 to i8
446  ret i8 %res
447}
448
449define i8 @conv1(<8 x i1>* %R) {
450; CHECK-LABEL: conv1:
451; CHECK:       ## %bb.0: ## %entry
452; CHECK-NEXT:    movb $-1, (%rdi)
453; CHECK-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
454; CHECK-NEXT:    movb $-2, %al
455; CHECK-NEXT:    retq
456;
457; X86-LABEL: conv1:
458; X86:       ## %bb.0: ## %entry
459; X86-NEXT:    subl $12, %esp
460; X86-NEXT:    .cfi_def_cfa_offset 16
461; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
462; X86-NEXT:    movb $-1, (%eax)
463; X86-NEXT:    movb $-2, (%esp)
464; X86-NEXT:    movb $-2, %al
465; X86-NEXT:    addl $12, %esp
466; X86-NEXT:    retl
467entry:
468  store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
469
470  %maskPtr = alloca <8 x i1>
471  store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
472  %mask = load <8 x i1>, <8 x i1>* %maskPtr
473  %mask_convert = bitcast <8 x i1> %mask to i8
474  ret i8 %mask_convert
475}
476
477define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
478; KNL-LABEL: test4:
479; KNL:       ## %bb.0:
480; KNL-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
481; KNL-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
482; KNL-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
483; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
484; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
485; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
486; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
487; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
488; KNL-NEXT:    vzeroupper
489; KNL-NEXT:    retq
490;
491; SKX-LABEL: test4:
492; SKX:       ## %bb.0:
493; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
494; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
495; SKX-NEXT:    vpmovm2d %k0, %xmm0
496; SKX-NEXT:    vzeroupper
497; SKX-NEXT:    retq
498;
499; AVX512BW-LABEL: test4:
500; AVX512BW:       ## %bb.0:
501; AVX512BW-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
502; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
503; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
504; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
505; AVX512BW-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
506; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
507; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
508; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
509; AVX512BW-NEXT:    vzeroupper
510; AVX512BW-NEXT:    retq
511;
512; AVX512DQ-LABEL: test4:
513; AVX512DQ:       ## %bb.0:
514; AVX512DQ-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
515; AVX512DQ-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
516; AVX512DQ-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
517; AVX512DQ-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
518; AVX512DQ-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
519; AVX512DQ-NEXT:    vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
520; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
521; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
522; AVX512DQ-NEXT:    vzeroupper
523; AVX512DQ-NEXT:    retq
524;
525; X86-LABEL: test4:
526; X86:       ## %bb.0:
527; X86-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
528; X86-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
529; X86-NEXT:    vpmovm2d %k0, %xmm0
530; X86-NEXT:    vzeroupper
531; X86-NEXT:    retl
532  %x_gt_y = icmp sgt <4 x i64> %x, %y
533  %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
534  %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
535  %resse = sext <4 x i1>%res to <4 x i32>
536  ret <4 x i32> %resse
537}
538
539define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
540; KNL-LABEL: test5:
541; KNL:       ## %bb.0:
542; KNL-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
543; KNL-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
544; KNL-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
545; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
546; KNL-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
547; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
548; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
549; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
550; KNL-NEXT:    vzeroupper
551; KNL-NEXT:    retq
552;
553; SKX-LABEL: test5:
554; SKX:       ## %bb.0:
555; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k1
556; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
557; SKX-NEXT:    vpmovm2q %k0, %xmm0
558; SKX-NEXT:    retq
559;
560; AVX512BW-LABEL: test5:
561; AVX512BW:       ## %bb.0:
562; AVX512BW-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
563; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
564; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
565; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
566; AVX512BW-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
567; AVX512BW-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
568; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
569; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
570; AVX512BW-NEXT:    vzeroupper
571; AVX512BW-NEXT:    retq
572;
573; AVX512DQ-LABEL: test5:
574; AVX512DQ:       ## %bb.0:
575; AVX512DQ-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
576; AVX512DQ-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
577; AVX512DQ-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
578; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
579; AVX512DQ-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
580; AVX512DQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
581; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
582; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
583; AVX512DQ-NEXT:    vzeroupper
584; AVX512DQ-NEXT:    retq
585;
586; X86-LABEL: test5:
587; X86:       ## %bb.0:
588; X86-NEXT:    vpcmpleq %xmm3, %xmm2, %k1
589; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
590; X86-NEXT:    vpmovm2q %k0, %xmm0
591; X86-NEXT:    retl
592  %x_gt_y = icmp slt <2 x i64> %x, %y
593  %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
594  %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
595  %resse = sext <2 x i1>%res to <2 x i64>
596  ret <2 x i64> %resse
597}define void @test6(<16 x i1> %mask)  {
598allocas:
599  %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
600  %b = bitcast <16 x i1> %a to i16
601  %c = icmp eq i16 %b, 0
602  br i1 %c, label %true, label %false
603
604true:
605  ret void
606
607false:
608  ret void
609}
610define void @test7(<8 x i1> %mask)  {
611; KNL-LABEL: test7:
612; KNL:       ## %bb.0: ## %allocas
613; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
614; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
615; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
616; KNL-NEXT:    kmovw %k0, %eax
617; KNL-NEXT:    orb $85, %al
618; KNL-NEXT:    vzeroupper
619; KNL-NEXT:    retq
620;
621; SKX-LABEL: test7:
622; SKX:       ## %bb.0: ## %allocas
623; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
624; SKX-NEXT:    vpmovw2m %xmm0, %k0
625; SKX-NEXT:    kmovd %k0, %eax
626; SKX-NEXT:    orb $85, %al
627; SKX-NEXT:    retq
628;
629; AVX512BW-LABEL: test7:
630; AVX512BW:       ## %bb.0: ## %allocas
631; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
632; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
633; AVX512BW-NEXT:    kmovd %k0, %eax
634; AVX512BW-NEXT:    orb $85, %al
635; AVX512BW-NEXT:    vzeroupper
636; AVX512BW-NEXT:    retq
637;
638; AVX512DQ-LABEL: test7:
639; AVX512DQ:       ## %bb.0: ## %allocas
640; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
641; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
642; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
643; AVX512DQ-NEXT:    kmovw %k0, %eax
644; AVX512DQ-NEXT:    orb $85, %al
645; AVX512DQ-NEXT:    vzeroupper
646; AVX512DQ-NEXT:    retq
647;
648; X86-LABEL: test7:
649; X86:       ## %bb.0: ## %allocas
650; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
651; X86-NEXT:    vpmovw2m %xmm0, %k0
652; X86-NEXT:    kmovd %k0, %eax
653; X86-NEXT:    orb $85, %al
654; X86-NEXT:    retl
655allocas:
656  %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
657  %b = bitcast <8 x i1> %a to i8
658  %c = icmp eq i8 %b, 0
659  br i1 %c, label %true, label %false
660
661true:
662  ret void
663
664false:
665  ret void
666}
667define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
668; KNL-LABEL: test8:
669; KNL:       ## %bb.0:
670; KNL-NEXT:    cmpl %esi, %edi
671; KNL-NEXT:    jg LBB17_1
672; KNL-NEXT:  ## %bb.2:
673; KNL-NEXT:    kxorw %k0, %k0, %k1
674; KNL-NEXT:    jmp LBB17_3
675; KNL-NEXT:  LBB17_1:
676; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
677; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
678; KNL-NEXT:  LBB17_3:
679; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
680; KNL-NEXT:    vpmovdb %zmm0, %xmm0
681; KNL-NEXT:    vzeroupper
682; KNL-NEXT:    retq
683;
684; SKX-LABEL: test8:
685; SKX:       ## %bb.0:
686; SKX-NEXT:    cmpl %esi, %edi
687; SKX-NEXT:    jg LBB17_1
688; SKX-NEXT:  ## %bb.2:
689; SKX-NEXT:    kxorw %k0, %k0, %k0
690; SKX-NEXT:    vpmovm2b %k0, %xmm0
691; SKX-NEXT:    vzeroupper
692; SKX-NEXT:    retq
693; SKX-NEXT:  LBB17_1:
694; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
695; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
696; SKX-NEXT:    vpmovm2b %k0, %xmm0
697; SKX-NEXT:    vzeroupper
698; SKX-NEXT:    retq
699;
700; AVX512BW-LABEL: test8:
701; AVX512BW:       ## %bb.0:
702; AVX512BW-NEXT:    cmpl %esi, %edi
703; AVX512BW-NEXT:    jg LBB17_1
704; AVX512BW-NEXT:  ## %bb.2:
705; AVX512BW-NEXT:    kxorw %k0, %k0, %k0
706; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
707; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
708; AVX512BW-NEXT:    vzeroupper
709; AVX512BW-NEXT:    retq
710; AVX512BW-NEXT:  LBB17_1:
711; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
712; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
713; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
714; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
715; AVX512BW-NEXT:    vzeroupper
716; AVX512BW-NEXT:    retq
717;
718; AVX512DQ-LABEL: test8:
719; AVX512DQ:       ## %bb.0:
720; AVX512DQ-NEXT:    cmpl %esi, %edi
721; AVX512DQ-NEXT:    jg LBB17_1
722; AVX512DQ-NEXT:  ## %bb.2:
723; AVX512DQ-NEXT:    kxorw %k0, %k0, %k0
724; AVX512DQ-NEXT:    jmp LBB17_3
725; AVX512DQ-NEXT:  LBB17_1:
726; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
727; AVX512DQ-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
728; AVX512DQ-NEXT:  LBB17_3:
729; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
730; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
731; AVX512DQ-NEXT:    vzeroupper
732; AVX512DQ-NEXT:    retq
733;
734; X86-LABEL: test8:
735; X86:       ## %bb.0:
736; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
737; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
738; X86-NEXT:    jg LBB17_1
739; X86-NEXT:  ## %bb.2:
740; X86-NEXT:    kxorw %k0, %k0, %k0
741; X86-NEXT:    vpmovm2b %k0, %xmm0
742; X86-NEXT:    vzeroupper
743; X86-NEXT:    retl
744; X86-NEXT:  LBB17_1:
745; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
746; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
747; X86-NEXT:    vpmovm2b %k0, %xmm0
748; X86-NEXT:    vzeroupper
749; X86-NEXT:    retl
750  %cond = icmp sgt i32 %a1, %b1
751  %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
752  %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
753  %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
754  %res = sext <16 x i1> %mix to <16 x i8>
755  ret <16 x i8> %res
756}
757define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
758; KNL-LABEL: test9:
759; KNL:       ## %bb.0:
760; KNL-NEXT:    cmpl %esi, %edi
761; KNL-NEXT:    jg LBB18_1
762; KNL-NEXT:  ## %bb.2:
763; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
764; KNL-NEXT:    jmp LBB18_3
765; KNL-NEXT:  LBB18_1:
766; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
767; KNL-NEXT:  LBB18_3:
768; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
769; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
770; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
771; KNL-NEXT:    vpmovdb %zmm0, %xmm0
772; KNL-NEXT:    vzeroupper
773; KNL-NEXT:    retq
774;
775; SKX-LABEL: test9:
776; SKX:       ## %bb.0:
777; SKX-NEXT:    cmpl %esi, %edi
778; SKX-NEXT:    jg LBB18_1
779; SKX-NEXT:  ## %bb.2:
780; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0
781; SKX-NEXT:    jmp LBB18_3
782; SKX-NEXT:  LBB18_1:
783; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
784; SKX-NEXT:  LBB18_3:
785; SKX-NEXT:    vpmovb2m %xmm0, %k0
786; SKX-NEXT:    vpmovm2b %k0, %xmm0
787; SKX-NEXT:    retq
788;
789; AVX512BW-LABEL: test9:
790; AVX512BW:       ## %bb.0:
791; AVX512BW-NEXT:    cmpl %esi, %edi
792; AVX512BW-NEXT:    jg LBB18_1
793; AVX512BW-NEXT:  ## %bb.2:
794; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm0
795; AVX512BW-NEXT:    jmp LBB18_3
796; AVX512BW-NEXT:  LBB18_1:
797; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
798; AVX512BW-NEXT:  LBB18_3:
799; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
800; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
801; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
802; AVX512BW-NEXT:    vzeroupper
803; AVX512BW-NEXT:    retq
804;
805; AVX512DQ-LABEL: test9:
806; AVX512DQ:       ## %bb.0:
807; AVX512DQ-NEXT:    cmpl %esi, %edi
808; AVX512DQ-NEXT:    jg LBB18_1
809; AVX512DQ-NEXT:  ## %bb.2:
810; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
811; AVX512DQ-NEXT:    jmp LBB18_3
812; AVX512DQ-NEXT:  LBB18_1:
813; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
814; AVX512DQ-NEXT:  LBB18_3:
815; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
816; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
817; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
818; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
819; AVX512DQ-NEXT:    vzeroupper
820; AVX512DQ-NEXT:    retq
821;
822; X86-LABEL: test9:
823; X86:       ## %bb.0:
824; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
825; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
826; X86-NEXT:    jg LBB18_1
827; X86-NEXT:  ## %bb.2:
828; X86-NEXT:    vpsllw $7, %xmm1, %xmm0
829; X86-NEXT:    jmp LBB18_3
830; X86-NEXT:  LBB18_1:
831; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
832; X86-NEXT:  LBB18_3:
833; X86-NEXT:    vpmovb2m %xmm0, %k0
834; X86-NEXT:    vpmovm2b %k0, %xmm0
835; X86-NEXT:    retl
836  %mask = icmp sgt i32 %a1, %b1
837  %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
838  ret <16 x i1>%c
839}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
840  %mask = icmp sgt i32 %a1, %b1
841  %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
842  ret <8 x i1>%c
843}
844
845define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
846; KNL-LABEL: test11:
847; KNL:       ## %bb.0:
848; KNL-NEXT:    cmpl %esi, %edi
849; KNL-NEXT:    jg LBB20_1
850; KNL-NEXT:  ## %bb.2:
851; KNL-NEXT:    vpslld $31, %xmm1, %xmm0
852; KNL-NEXT:    jmp LBB20_3
853; KNL-NEXT:  LBB20_1:
854; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
855; KNL-NEXT:  LBB20_3:
856; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
857; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
858; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
859; KNL-NEXT:    vzeroupper
860; KNL-NEXT:    retq
861;
862; SKX-LABEL: test11:
863; SKX:       ## %bb.0:
864; SKX-NEXT:    cmpl %esi, %edi
865; SKX-NEXT:    jg LBB20_1
866; SKX-NEXT:  ## %bb.2:
867; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
868; SKX-NEXT:    jmp LBB20_3
869; SKX-NEXT:  LBB20_1:
870; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
871; SKX-NEXT:  LBB20_3:
872; SKX-NEXT:    vpmovd2m %xmm0, %k0
873; SKX-NEXT:    vpmovm2d %k0, %xmm0
874; SKX-NEXT:    retq
875;
876; AVX512BW-LABEL: test11:
877; AVX512BW:       ## %bb.0:
878; AVX512BW-NEXT:    cmpl %esi, %edi
879; AVX512BW-NEXT:    jg LBB20_1
880; AVX512BW-NEXT:  ## %bb.2:
881; AVX512BW-NEXT:    vpslld $31, %xmm1, %xmm0
882; AVX512BW-NEXT:    jmp LBB20_3
883; AVX512BW-NEXT:  LBB20_1:
884; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
885; AVX512BW-NEXT:  LBB20_3:
886; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k1
887; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
888; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
889; AVX512BW-NEXT:    vzeroupper
890; AVX512BW-NEXT:    retq
891;
892; AVX512DQ-LABEL: test11:
893; AVX512DQ:       ## %bb.0:
894; AVX512DQ-NEXT:    cmpl %esi, %edi
895; AVX512DQ-NEXT:    jg LBB20_1
896; AVX512DQ-NEXT:  ## %bb.2:
897; AVX512DQ-NEXT:    vpslld $31, %xmm1, %xmm0
898; AVX512DQ-NEXT:    jmp LBB20_3
899; AVX512DQ-NEXT:  LBB20_1:
900; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
901; AVX512DQ-NEXT:  LBB20_3:
902; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
903; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
904; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
905; AVX512DQ-NEXT:    vzeroupper
906; AVX512DQ-NEXT:    retq
907;
908; X86-LABEL: test11:
909; X86:       ## %bb.0:
910; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
911; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
912; X86-NEXT:    jg LBB20_1
913; X86-NEXT:  ## %bb.2:
914; X86-NEXT:    vpslld $31, %xmm1, %xmm0
915; X86-NEXT:    jmp LBB20_3
916; X86-NEXT:  LBB20_1:
917; X86-NEXT:    vpslld $31, %xmm0, %xmm0
918; X86-NEXT:  LBB20_3:
919; X86-NEXT:    vpmovd2m %xmm0, %k0
920; X86-NEXT:    vpmovm2d %k0, %xmm0
921; X86-NEXT:    retl
922  %mask = icmp sgt i32 %a1, %b1
923  %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
924  ret <4 x i1>%c
925}
926
927define i32 @test12(i32 %x, i32 %y)  {
928; CHECK-LABEL: test12:
929; CHECK:       ## %bb.0:
930; CHECK-NEXT:    movl %edi, %eax
931; CHECK-NEXT:    retq
932;
933; X86-LABEL: test12:
934; X86:       ## %bb.0:
935; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
936; X86-NEXT:    retl
937  %a = bitcast i16 21845 to <16 x i1>
938  %b = extractelement <16 x i1> %a, i32 0
939  %c = select i1 %b, i32 %x, i32 %y
940  ret i32 %c
941}
942
943define i32 @test13(i32 %x, i32 %y)  {
944; CHECK-LABEL: test13:
945; CHECK:       ## %bb.0:
946; CHECK-NEXT:    movl %esi, %eax
947; CHECK-NEXT:    retq
948;
949; X86-LABEL: test13:
950; X86:       ## %bb.0:
951; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
952; X86-NEXT:    retl
953  %a = bitcast i16 21845 to <16 x i1>
954  %b = extractelement <16 x i1> %a, i32 3
955  %c = select i1 %b, i32 %x, i32 %y
956  ret i32 %c
957}
958
959; Make sure we don't crash on a large vector.
960define i32 @test13_crash(i32 %x, i32 %y)  {
961; CHECK-LABEL: test13_crash:
962; CHECK:       ## %bb.0:
963; CHECK-NEXT:    movl %edi, %eax
964; CHECK-NEXT:    retq
965;
966; X86-LABEL: test13_crash:
967; X86:       ## %bb.0:
968; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
969; X86-NEXT:    retl
970  %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
971  %b = extractelement <128 x i1> %a, i32 3
972  %c = select i1 %b, i32 %x, i32 %y
973  ret i32 %c
974}
975
976define <4 x i1> @test14()  {
977; CHECK-LABEL: test14:
978; CHECK:       ## %bb.0:
979; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,0,1]
980; CHECK-NEXT:    retq
981;
982; X86-LABEL: test14:
983; X86:       ## %bb.0:
984; X86-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,0,1]
985; X86-NEXT:    retl
986  %a = bitcast i16 21845 to <16 x i1>
987  %b = extractelement <16 x i1> %a, i32 2
988  %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
989  ret <4 x i1> %c
990}
991
992define <16 x i1> @test15(i32 %x, i32 %y)  {
993; KNL-LABEL: test15:
994; KNL:       ## %bb.0:
995; KNL-NEXT:    cmpl %esi, %edi
996; KNL-NEXT:    movl $21845, %eax ## imm = 0x5555
997; KNL-NEXT:    movl $1, %ecx
998; KNL-NEXT:    cmovgl %eax, %ecx
999; KNL-NEXT:    kmovw %ecx, %k1
1000; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1001; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1002; KNL-NEXT:    vzeroupper
1003; KNL-NEXT:    retq
1004;
1005; SKX-LABEL: test15:
1006; SKX:       ## %bb.0:
1007; SKX-NEXT:    cmpl %esi, %edi
1008; SKX-NEXT:    movl $21845, %eax ## imm = 0x5555
1009; SKX-NEXT:    movl $1, %ecx
1010; SKX-NEXT:    cmovgl %eax, %ecx
1011; SKX-NEXT:    kmovd %ecx, %k0
1012; SKX-NEXT:    vpmovm2b %k0, %xmm0
1013; SKX-NEXT:    retq
1014;
1015; AVX512BW-LABEL: test15:
1016; AVX512BW:       ## %bb.0:
1017; AVX512BW-NEXT:    cmpl %esi, %edi
1018; AVX512BW-NEXT:    movl $21845, %eax ## imm = 0x5555
1019; AVX512BW-NEXT:    movl $1, %ecx
1020; AVX512BW-NEXT:    cmovgl %eax, %ecx
1021; AVX512BW-NEXT:    kmovd %ecx, %k0
1022; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1023; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1024; AVX512BW-NEXT:    vzeroupper
1025; AVX512BW-NEXT:    retq
1026;
1027; AVX512DQ-LABEL: test15:
1028; AVX512DQ:       ## %bb.0:
1029; AVX512DQ-NEXT:    cmpl %esi, %edi
1030; AVX512DQ-NEXT:    movl $21845, %eax ## imm = 0x5555
1031; AVX512DQ-NEXT:    movl $1, %ecx
1032; AVX512DQ-NEXT:    cmovgl %eax, %ecx
1033; AVX512DQ-NEXT:    kmovw %ecx, %k0
1034; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1035; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1036; AVX512DQ-NEXT:    vzeroupper
1037; AVX512DQ-NEXT:    retq
1038;
1039; X86-LABEL: test15:
1040; X86:       ## %bb.0:
1041; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1042; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1043; X86-NEXT:    movl $21845, %eax ## imm = 0x5555
1044; X86-NEXT:    movl $1, %ecx
1045; X86-NEXT:    cmovgl %eax, %ecx
1046; X86-NEXT:    kmovd %ecx, %k0
1047; X86-NEXT:    vpmovm2b %k0, %xmm0
1048; X86-NEXT:    retl
1049  %a = bitcast i16 21845 to <16 x i1>
1050  %b = bitcast i16 1 to <16 x i1>
1051  %mask = icmp sgt i32 %x, %y
1052  %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
1053  ret <16 x i1> %c
1054}
1055
1056define <64 x i8> @test16(i64 %x) {
1057;
1058; KNL-LABEL: test16:
1059; KNL:       ## %bb.0:
1060; KNL-NEXT:    movq %rdi, %rax
1061; KNL-NEXT:    movl %edi, %ecx
1062; KNL-NEXT:    kmovw %edi, %k0
1063; KNL-NEXT:    shrq $32, %rdi
1064; KNL-NEXT:    shrq $48, %rax
1065; KNL-NEXT:    shrl $16, %ecx
1066; KNL-NEXT:    kmovw %ecx, %k1
1067; KNL-NEXT:    kmovw %eax, %k2
1068; KNL-NEXT:    kmovw %edi, %k3
1069; KNL-NEXT:    movb $1, %al
1070; KNL-NEXT:    kmovw %eax, %k4
1071; KNL-NEXT:    kshiftrw $5, %k0, %k5
1072; KNL-NEXT:    kxorw %k4, %k5, %k4
1073; KNL-NEXT:    kshiftlw $15, %k4, %k4
1074; KNL-NEXT:    kshiftrw $10, %k4, %k4
1075; KNL-NEXT:    kxorw %k4, %k0, %k4
1076; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1077; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1078; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1079; KNL-NEXT:    vpmovdb %zmm1, %xmm1
1080; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
1081; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
1082; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1083; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1084; KNL-NEXT:    vpmovdb %zmm2, %xmm2
1085; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1086; KNL-NEXT:    retq
1087;
1088; SKX-LABEL: test16:
1089; SKX:       ## %bb.0:
1090; SKX-NEXT:    kmovq %rdi, %k0
1091; SKX-NEXT:    movb $1, %al
1092; SKX-NEXT:    kmovd %eax, %k1
1093; SKX-NEXT:    kshiftrq $5, %k0, %k2
1094; SKX-NEXT:    kxorq %k1, %k2, %k1
1095; SKX-NEXT:    kshiftlq $63, %k1, %k1
1096; SKX-NEXT:    kshiftrq $58, %k1, %k1
1097; SKX-NEXT:    kxorq %k1, %k0, %k0
1098; SKX-NEXT:    vpmovm2b %k0, %zmm0
1099; SKX-NEXT:    retq
1100;
1101; AVX512BW-LABEL: test16:
1102; AVX512BW:       ## %bb.0:
1103; AVX512BW-NEXT:    kmovq %rdi, %k0
1104; AVX512BW-NEXT:    movb $1, %al
1105; AVX512BW-NEXT:    kmovd %eax, %k1
1106; AVX512BW-NEXT:    kshiftrq $5, %k0, %k2
1107; AVX512BW-NEXT:    kxorq %k1, %k2, %k1
1108; AVX512BW-NEXT:    kshiftlq $63, %k1, %k1
1109; AVX512BW-NEXT:    kshiftrq $58, %k1, %k1
1110; AVX512BW-NEXT:    kxorq %k1, %k0, %k0
1111; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1112; AVX512BW-NEXT:    retq
1113;
1114; AVX512DQ-LABEL: test16:
1115; AVX512DQ:       ## %bb.0:
1116; AVX512DQ-NEXT:    movq %rdi, %rax
1117; AVX512DQ-NEXT:    movl %edi, %ecx
1118; AVX512DQ-NEXT:    kmovw %edi, %k0
1119; AVX512DQ-NEXT:    shrq $32, %rdi
1120; AVX512DQ-NEXT:    shrq $48, %rax
1121; AVX512DQ-NEXT:    shrl $16, %ecx
1122; AVX512DQ-NEXT:    kmovw %ecx, %k1
1123; AVX512DQ-NEXT:    kmovw %eax, %k2
1124; AVX512DQ-NEXT:    kmovw %edi, %k3
1125; AVX512DQ-NEXT:    movb $1, %al
1126; AVX512DQ-NEXT:    kmovw %eax, %k4
1127; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k5
1128; AVX512DQ-NEXT:    kxorw %k4, %k5, %k4
1129; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
1130; AVX512DQ-NEXT:    kshiftrw $10, %k4, %k4
1131; AVX512DQ-NEXT:    kxorw %k4, %k0, %k0
1132; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm0
1133; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1134; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
1135; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
1136; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
1137; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1138; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1139; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm2
1140; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
1141; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1142; AVX512DQ-NEXT:    retq
1143;
1144; X86-LABEL: test16:
1145; X86:       ## %bb.0:
1146; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
1147; X86-NEXT:    movb $1, %al
1148; X86-NEXT:    kmovd %eax, %k1
1149; X86-NEXT:    kshiftrq $5, %k0, %k2
1150; X86-NEXT:    kxorq %k1, %k2, %k1
1151; X86-NEXT:    kshiftlq $63, %k1, %k1
1152; X86-NEXT:    kshiftrq $58, %k1, %k1
1153; X86-NEXT:    kxorq %k1, %k0, %k0
1154; X86-NEXT:    vpmovm2b %k0, %zmm0
1155; X86-NEXT:    retl
1156  %a = bitcast i64 %x to <64 x i1>
1157  %b = insertelement <64 x i1>%a, i1 true, i32 5
1158  %c = sext <64 x i1>%b to <64 x i8>
1159  ret <64 x i8>%c
1160}
1161
1162define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
1163;
1164; KNL-LABEL: test17:
1165; KNL:       ## %bb.0:
1166; KNL-NEXT:    movq %rdi, %rax
1167; KNL-NEXT:    movl %edi, %ecx
1168; KNL-NEXT:    kmovw %edi, %k0
1169; KNL-NEXT:    shrq $32, %rdi
1170; KNL-NEXT:    shrq $48, %rax
1171; KNL-NEXT:    shrl $16, %ecx
1172; KNL-NEXT:    kmovw %ecx, %k1
1173; KNL-NEXT:    kmovw %eax, %k2
1174; KNL-NEXT:    kmovw %edi, %k3
1175; KNL-NEXT:    cmpl %edx, %esi
1176; KNL-NEXT:    setg %al
1177; KNL-NEXT:    kshiftrw $5, %k0, %k4
1178; KNL-NEXT:    kmovw %eax, %k5
1179; KNL-NEXT:    kxorw %k5, %k4, %k4
1180; KNL-NEXT:    kshiftlw $15, %k4, %k4
1181; KNL-NEXT:    kshiftrw $10, %k4, %k4
1182; KNL-NEXT:    kxorw %k4, %k0, %k4
1183; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1184; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1185; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
1186; KNL-NEXT:    vpmovdb %zmm1, %xmm1
1187; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
1188; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
1189; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1190; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1191; KNL-NEXT:    vpmovdb %zmm2, %xmm2
1192; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1193; KNL-NEXT:    retq
1194;
1195; SKX-LABEL: test17:
1196; SKX:       ## %bb.0:
1197; SKX-NEXT:    kmovq %rdi, %k0
1198; SKX-NEXT:    cmpl %edx, %esi
1199; SKX-NEXT:    setg %al
1200; SKX-NEXT:    kmovd %eax, %k1
1201; SKX-NEXT:    kshiftrq $5, %k0, %k2
1202; SKX-NEXT:    kxorq %k1, %k2, %k1
1203; SKX-NEXT:    kshiftlq $63, %k1, %k1
1204; SKX-NEXT:    kshiftrq $58, %k1, %k1
1205; SKX-NEXT:    kxorq %k1, %k0, %k0
1206; SKX-NEXT:    vpmovm2b %k0, %zmm0
1207; SKX-NEXT:    retq
1208;
1209; AVX512BW-LABEL: test17:
1210; AVX512BW:       ## %bb.0:
1211; AVX512BW-NEXT:    kmovq %rdi, %k0
1212; AVX512BW-NEXT:    cmpl %edx, %esi
1213; AVX512BW-NEXT:    setg %al
1214; AVX512BW-NEXT:    kmovd %eax, %k1
1215; AVX512BW-NEXT:    kshiftrq $5, %k0, %k2
1216; AVX512BW-NEXT:    kxorq %k1, %k2, %k1
1217; AVX512BW-NEXT:    kshiftlq $63, %k1, %k1
1218; AVX512BW-NEXT:    kshiftrq $58, %k1, %k1
1219; AVX512BW-NEXT:    kxorq %k1, %k0, %k0
1220; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
1221; AVX512BW-NEXT:    retq
1222;
1223; AVX512DQ-LABEL: test17:
1224; AVX512DQ:       ## %bb.0:
1225; AVX512DQ-NEXT:    movq %rdi, %rax
1226; AVX512DQ-NEXT:    movl %edi, %ecx
1227; AVX512DQ-NEXT:    kmovw %edi, %k0
1228; AVX512DQ-NEXT:    shrq $32, %rdi
1229; AVX512DQ-NEXT:    shrq $48, %rax
1230; AVX512DQ-NEXT:    shrl $16, %ecx
1231; AVX512DQ-NEXT:    kmovw %ecx, %k1
1232; AVX512DQ-NEXT:    kmovw %eax, %k2
1233; AVX512DQ-NEXT:    kmovw %edi, %k3
1234; AVX512DQ-NEXT:    cmpl %edx, %esi
1235; AVX512DQ-NEXT:    setg %al
1236; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k4
1237; AVX512DQ-NEXT:    kmovw %eax, %k5
1238; AVX512DQ-NEXT:    kxorw %k5, %k4, %k4
1239; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
1240; AVX512DQ-NEXT:    kshiftrw $10, %k4, %k4
1241; AVX512DQ-NEXT:    kxorw %k4, %k0, %k0
1242; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm0
1243; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1244; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
1245; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
1246; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
1247; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1248; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1249; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm2
1250; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
1251; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1252; AVX512DQ-NEXT:    retq
1253;
1254; X86-LABEL: test17:
1255; X86:       ## %bb.0:
1256; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1257; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
1258; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1259; X86-NEXT:    setg %al
1260; X86-NEXT:    kmovd %eax, %k1
1261; X86-NEXT:    kshiftrq $5, %k0, %k2
1262; X86-NEXT:    kxorq %k1, %k2, %k1
1263; X86-NEXT:    kshiftlq $63, %k1, %k1
1264; X86-NEXT:    kshiftrq $58, %k1, %k1
1265; X86-NEXT:    kxorq %k1, %k0, %k0
1266; X86-NEXT:    vpmovm2b %k0, %zmm0
1267; X86-NEXT:    retl
1268  %a = bitcast i64 %x to <64 x i1>
1269  %b = icmp sgt i32 %y, %z
1270  %c = insertelement <64 x i1>%a, i1 %b, i32 5
1271  %d = sext <64 x i1>%c to <64 x i8>
1272  ret <64 x i8>%d
1273}
1274
1275define <8 x i1> @test18(i8 %a, i16 %y) {
1276; KNL-LABEL: test18:
1277; KNL:       ## %bb.0:
1278; KNL-NEXT:    kmovw %edi, %k1
1279; KNL-NEXT:    kmovw %esi, %k2
1280; KNL-NEXT:    kshiftrw $8, %k2, %k0
1281; KNL-NEXT:    kshiftrw $9, %k2, %k2
1282; KNL-NEXT:    kshiftrw $6, %k1, %k3
1283; KNL-NEXT:    kxorw %k2, %k3, %k2
1284; KNL-NEXT:    kshiftlw $15, %k2, %k2
1285; KNL-NEXT:    kshiftrw $9, %k2, %k2
1286; KNL-NEXT:    kxorw %k2, %k1, %k1
1287; KNL-NEXT:    kshiftlw $9, %k1, %k1
1288; KNL-NEXT:    kshiftrw $9, %k1, %k1
1289; KNL-NEXT:    kshiftlw $7, %k0, %k0
1290; KNL-NEXT:    korw %k0, %k1, %k1
1291; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1292; KNL-NEXT:    vpmovdw %zmm0, %ymm0
1293; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
1294; KNL-NEXT:    vzeroupper
1295; KNL-NEXT:    retq
1296;
1297; SKX-LABEL: test18:
1298; SKX:       ## %bb.0:
1299; SKX-NEXT:    kmovd %edi, %k1
1300; SKX-NEXT:    kmovd %esi, %k2
1301; SKX-NEXT:    kshiftrw $8, %k2, %k0
1302; SKX-NEXT:    kshiftrw $9, %k2, %k2
1303; SKX-NEXT:    kshiftrb $6, %k1, %k3
1304; SKX-NEXT:    kxorb %k2, %k3, %k2
1305; SKX-NEXT:    kshiftlb $7, %k2, %k2
1306; SKX-NEXT:    kshiftrb $1, %k2, %k2
1307; SKX-NEXT:    kxorb %k2, %k1, %k1
1308; SKX-NEXT:    kshiftlb $1, %k1, %k1
1309; SKX-NEXT:    kshiftrb $1, %k1, %k1
1310; SKX-NEXT:    kshiftlb $7, %k0, %k0
1311; SKX-NEXT:    korb %k0, %k1, %k0
1312; SKX-NEXT:    vpmovm2w %k0, %xmm0
1313; SKX-NEXT:    retq
1314;
1315; AVX512BW-LABEL: test18:
1316; AVX512BW:       ## %bb.0:
1317; AVX512BW-NEXT:    kmovd %edi, %k1
1318; AVX512BW-NEXT:    kmovd %esi, %k2
1319; AVX512BW-NEXT:    kshiftrw $8, %k2, %k0
1320; AVX512BW-NEXT:    kshiftrw $9, %k2, %k2
1321; AVX512BW-NEXT:    kshiftrw $6, %k1, %k3
1322; AVX512BW-NEXT:    kxorw %k2, %k3, %k2
1323; AVX512BW-NEXT:    kshiftlw $15, %k2, %k2
1324; AVX512BW-NEXT:    kshiftrw $9, %k2, %k2
1325; AVX512BW-NEXT:    kxorw %k2, %k1, %k1
1326; AVX512BW-NEXT:    kshiftlw $9, %k1, %k1
1327; AVX512BW-NEXT:    kshiftrw $9, %k1, %k1
1328; AVX512BW-NEXT:    kshiftlw $7, %k0, %k0
1329; AVX512BW-NEXT:    korw %k0, %k1, %k0
1330; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
1331; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1332; AVX512BW-NEXT:    vzeroupper
1333; AVX512BW-NEXT:    retq
1334;
1335; AVX512DQ-LABEL: test18:
1336; AVX512DQ:       ## %bb.0:
1337; AVX512DQ-NEXT:    kmovw %edi, %k1
1338; AVX512DQ-NEXT:    kmovw %esi, %k2
1339; AVX512DQ-NEXT:    kshiftrw $8, %k2, %k0
1340; AVX512DQ-NEXT:    kshiftrw $9, %k2, %k2
1341; AVX512DQ-NEXT:    kshiftrb $6, %k1, %k3
1342; AVX512DQ-NEXT:    kxorb %k2, %k3, %k2
1343; AVX512DQ-NEXT:    kshiftlb $7, %k2, %k2
1344; AVX512DQ-NEXT:    kshiftrb $1, %k2, %k2
1345; AVX512DQ-NEXT:    kxorb %k2, %k1, %k1
1346; AVX512DQ-NEXT:    kshiftlb $1, %k1, %k1
1347; AVX512DQ-NEXT:    kshiftrb $1, %k1, %k1
1348; AVX512DQ-NEXT:    kshiftlb $7, %k0, %k0
1349; AVX512DQ-NEXT:    korb %k0, %k1, %k0
1350; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1351; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
1352; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
1353; AVX512DQ-NEXT:    vzeroupper
1354; AVX512DQ-NEXT:    retq
1355;
1356; X86-LABEL: test18:
1357; X86:       ## %bb.0:
1358; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
1359; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1360; X86-NEXT:    kshiftrw $9, %k1, %k2
1361; X86-NEXT:    kshiftrw $8, %k1, %k1
1362; X86-NEXT:    kshiftlb $7, %k1, %k1
1363; X86-NEXT:    kshiftrb $6, %k0, %k3
1364; X86-NEXT:    kxorb %k2, %k3, %k2
1365; X86-NEXT:    kshiftlb $7, %k2, %k2
1366; X86-NEXT:    kshiftrb $1, %k2, %k2
1367; X86-NEXT:    kxorb %k2, %k0, %k0
1368; X86-NEXT:    kshiftlb $1, %k0, %k0
1369; X86-NEXT:    kshiftrb $1, %k0, %k0
1370; X86-NEXT:    korb %k1, %k0, %k0
1371; X86-NEXT:    vpmovm2w %k0, %xmm0
1372; X86-NEXT:    retl
1373  %b = bitcast i8 %a to <8 x i1>
1374  %b1 = bitcast i16 %y to <16 x i1>
1375  %el1 = extractelement <16 x i1>%b1, i32 8
1376  %el2 = extractelement <16 x i1>%b1, i32 9
1377  %c = insertelement <8 x i1>%b, i1 %el1, i32 7
1378  %d = insertelement <8 x i1>%c, i1 %el2, i32 6
1379  ret <8 x i1>%d
1380}
1381define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
1382; KNL-LABEL: test21:
1383; KNL:       ## %bb.0:
1384; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm3
1385; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
1386; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1387; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
1388; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
1389; KNL-NEXT:    vpand %ymm0, %ymm2, %ymm0
1390; KNL-NEXT:    vpsllw $15, %ymm3, %ymm2
1391; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
1392; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
1393; KNL-NEXT:    retq
1394;
1395; SKX-LABEL: test21:
1396; SKX:       ## %bb.0:
1397; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
1398; SKX-NEXT:    vpmovb2m %ymm1, %k1
1399; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1400; SKX-NEXT:    retq
1401;
1402; AVX512BW-LABEL: test21:
1403; AVX512BW:       ## %bb.0:
1404; AVX512BW-NEXT:    vpsllw $7, %ymm1, %ymm1
1405; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
1406; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1407; AVX512BW-NEXT:    retq
1408;
1409; AVX512DQ-LABEL: test21:
1410; AVX512DQ:       ## %bb.0:
1411; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
1412; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
1413; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1414; AVX512DQ-NEXT:    vpsllw $15, %ymm2, %ymm2
1415; AVX512DQ-NEXT:    vpsraw $15, %ymm2, %ymm2
1416; AVX512DQ-NEXT:    vpand %ymm0, %ymm2, %ymm0
1417; AVX512DQ-NEXT:    vpsllw $15, %ymm3, %ymm2
1418; AVX512DQ-NEXT:    vpsraw $15, %ymm2, %ymm2
1419; AVX512DQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
1420; AVX512DQ-NEXT:    retq
1421;
1422; X86-LABEL: test21:
1423; X86:       ## %bb.0:
1424; X86-NEXT:    vpsllw $7, %ymm1, %ymm1
1425; X86-NEXT:    vpmovb2m %ymm1, %k1
1426; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1427; X86-NEXT:    retl
1428  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
1429  ret <32 x i16> %ret
1430}
1431
1432define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
1433; KNL-LABEL: test22:
1434; KNL:       ## %bb.0:
1435; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1436; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1437; KNL-NEXT:    kmovw %k0, %eax
1438; KNL-NEXT:    movb %al, (%rdi)
1439; KNL-NEXT:    vzeroupper
1440; KNL-NEXT:    retq
1441;
1442; SKX-LABEL: test22:
1443; SKX:       ## %bb.0:
1444; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1445; SKX-NEXT:    vpmovd2m %xmm0, %k0
1446; SKX-NEXT:    kmovb %k0, (%rdi)
1447; SKX-NEXT:    retq
1448;
1449; AVX512BW-LABEL: test22:
1450; AVX512BW:       ## %bb.0:
1451; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
1452; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
1453; AVX512BW-NEXT:    kmovd %k0, %eax
1454; AVX512BW-NEXT:    movb %al, (%rdi)
1455; AVX512BW-NEXT:    vzeroupper
1456; AVX512BW-NEXT:    retq
1457;
1458; AVX512DQ-LABEL: test22:
1459; AVX512DQ:       ## %bb.0:
1460; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1461; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1462; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
1463; AVX512DQ-NEXT:    vzeroupper
1464; AVX512DQ-NEXT:    retq
1465;
1466; X86-LABEL: test22:
1467; X86:       ## %bb.0:
1468; X86-NEXT:    vpslld $31, %xmm0, %xmm0
1469; X86-NEXT:    vpmovd2m %xmm0, %k0
1470; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1471; X86-NEXT:    kmovb %k0, (%eax)
1472; X86-NEXT:    retl
1473  store <4 x i1> %a, <4 x i1>* %addr
1474  ret void
1475}
1476
1477define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
1478; KNL-LABEL: test23:
1479; KNL:       ## %bb.0:
1480; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1481; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
1482; KNL-NEXT:    kmovw %k0, %eax
1483; KNL-NEXT:    movb %al, (%rdi)
1484; KNL-NEXT:    vzeroupper
1485; KNL-NEXT:    retq
1486;
1487; SKX-LABEL: test23:
1488; SKX:       ## %bb.0:
1489; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
1490; SKX-NEXT:    vpmovq2m %xmm0, %k0
1491; SKX-NEXT:    kmovb %k0, (%rdi)
1492; SKX-NEXT:    retq
1493;
1494; AVX512BW-LABEL: test23:
1495; AVX512BW:       ## %bb.0:
1496; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
1497; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
1498; AVX512BW-NEXT:    kmovd %k0, %eax
1499; AVX512BW-NEXT:    movb %al, (%rdi)
1500; AVX512BW-NEXT:    vzeroupper
1501; AVX512BW-NEXT:    retq
1502;
1503; AVX512DQ-LABEL: test23:
1504; AVX512DQ:       ## %bb.0:
1505; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
1506; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
1507; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
1508; AVX512DQ-NEXT:    vzeroupper
1509; AVX512DQ-NEXT:    retq
1510;
1511; X86-LABEL: test23:
1512; X86:       ## %bb.0:
1513; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
1514; X86-NEXT:    vpmovq2m %xmm0, %k0
1515; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1516; X86-NEXT:    kmovb %k0, (%eax)
1517; X86-NEXT:    retl
1518  store <2 x i1> %a, <2 x i1>* %addr
1519  ret void
1520}
1521
1522define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
1523; KNL-LABEL: store_v1i1:
1524; KNL:       ## %bb.0:
1525; KNL-NEXT:    kmovw %edi, %k0
1526; KNL-NEXT:    kxnorw %k0, %k0, %k1
1527; KNL-NEXT:    kxorw %k1, %k0, %k0
1528; KNL-NEXT:    kmovw %k0, %eax
1529; KNL-NEXT:    movb %al, (%rsi)
1530; KNL-NEXT:    retq
1531;
1532; SKX-LABEL: store_v1i1:
1533; SKX:       ## %bb.0:
1534; SKX-NEXT:    kmovd %edi, %k0
1535; SKX-NEXT:    kxnorw %k0, %k0, %k1
1536; SKX-NEXT:    kxorw %k1, %k0, %k0
1537; SKX-NEXT:    kmovb %k0, (%rsi)
1538; SKX-NEXT:    retq
1539;
1540; AVX512BW-LABEL: store_v1i1:
1541; AVX512BW:       ## %bb.0:
1542; AVX512BW-NEXT:    kmovd %edi, %k0
1543; AVX512BW-NEXT:    kxnorw %k0, %k0, %k1
1544; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
1545; AVX512BW-NEXT:    kmovd %k0, %eax
1546; AVX512BW-NEXT:    movb %al, (%rsi)
1547; AVX512BW-NEXT:    retq
1548;
1549; AVX512DQ-LABEL: store_v1i1:
1550; AVX512DQ:       ## %bb.0:
1551; AVX512DQ-NEXT:    kmovw %edi, %k0
1552; AVX512DQ-NEXT:    kxnorw %k0, %k0, %k1
1553; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
1554; AVX512DQ-NEXT:    kmovb %k0, (%rsi)
1555; AVX512DQ-NEXT:    retq
1556;
1557; X86-LABEL: store_v1i1:
1558; X86:       ## %bb.0:
1559; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1560; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1561; X86-NEXT:    kxnorw %k0, %k0, %k1
1562; X86-NEXT:    kxorw %k1, %k0, %k0
1563; X86-NEXT:    kmovb %k0, (%eax)
1564; X86-NEXT:    retl
1565  %x = xor <1 x i1> %c, <i1 1>
1566  store <1 x i1> %x, <1 x i1>*  %ptr, align 4
1567  ret void
1568}
1569
1570define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
1571; KNL-LABEL: store_v2i1:
1572; KNL:       ## %bb.0:
1573; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1574; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1575; KNL-NEXT:    kmovw %k0, %eax
1576; KNL-NEXT:    movb %al, (%rdi)
1577; KNL-NEXT:    vzeroupper
1578; KNL-NEXT:    retq
1579;
1580; SKX-LABEL: store_v2i1:
1581; SKX:       ## %bb.0:
1582; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
1583; SKX-NEXT:    vpmovq2m %xmm0, %k0
1584; SKX-NEXT:    knotw %k0, %k0
1585; SKX-NEXT:    kmovb %k0, (%rdi)
1586; SKX-NEXT:    retq
1587;
1588; AVX512BW-LABEL: store_v2i1:
1589; AVX512BW:       ## %bb.0:
1590; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
1591; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1592; AVX512BW-NEXT:    kmovd %k0, %eax
1593; AVX512BW-NEXT:    movb %al, (%rdi)
1594; AVX512BW-NEXT:    vzeroupper
1595; AVX512BW-NEXT:    retq
1596;
1597; AVX512DQ-LABEL: store_v2i1:
1598; AVX512DQ:       ## %bb.0:
1599; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
1600; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
1601; AVX512DQ-NEXT:    knotw %k0, %k0
1602; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
1603; AVX512DQ-NEXT:    vzeroupper
1604; AVX512DQ-NEXT:    retq
1605;
1606; X86-LABEL: store_v2i1:
1607; X86:       ## %bb.0:
1608; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
1609; X86-NEXT:    vpmovq2m %xmm0, %k0
1610; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1611; X86-NEXT:    knotw %k0, %k0
1612; X86-NEXT:    kmovb %k0, (%eax)
1613; X86-NEXT:    retl
1614  %x = xor <2 x i1> %c, <i1 1, i1 1>
1615  store <2 x i1> %x, <2 x i1>*  %ptr, align 4
1616  ret void
1617}
1618
1619define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
1620; KNL-LABEL: store_v4i1:
1621; KNL:       ## %bb.0:
1622; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1623; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1624; KNL-NEXT:    kmovw %k0, %eax
1625; KNL-NEXT:    movb %al, (%rdi)
1626; KNL-NEXT:    vzeroupper
1627; KNL-NEXT:    retq
1628;
1629; SKX-LABEL: store_v4i1:
1630; SKX:       ## %bb.0:
1631; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1632; SKX-NEXT:    vpmovd2m %xmm0, %k0
1633; SKX-NEXT:    knotw %k0, %k0
1634; SKX-NEXT:    kmovb %k0, (%rdi)
1635; SKX-NEXT:    retq
1636;
1637; AVX512BW-LABEL: store_v4i1:
1638; AVX512BW:       ## %bb.0:
1639; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
1640; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1641; AVX512BW-NEXT:    kmovd %k0, %eax
1642; AVX512BW-NEXT:    movb %al, (%rdi)
1643; AVX512BW-NEXT:    vzeroupper
1644; AVX512BW-NEXT:    retq
1645;
1646; AVX512DQ-LABEL: store_v4i1:
1647; AVX512DQ:       ## %bb.0:
1648; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1649; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1650; AVX512DQ-NEXT:    knotw %k0, %k0
1651; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
1652; AVX512DQ-NEXT:    vzeroupper
1653; AVX512DQ-NEXT:    retq
1654;
1655; X86-LABEL: store_v4i1:
1656; X86:       ## %bb.0:
1657; X86-NEXT:    vpslld $31, %xmm0, %xmm0
1658; X86-NEXT:    vpmovd2m %xmm0, %k0
1659; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1660; X86-NEXT:    knotw %k0, %k0
1661; X86-NEXT:    kmovb %k0, (%eax)
1662; X86-NEXT:    retl
1663  %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
1664  store <4 x i1> %x, <4 x i1>*  %ptr, align 4
1665  ret void
1666}
1667
1668define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
1669; KNL-LABEL: store_v8i1:
1670; KNL:       ## %bb.0:
1671; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1672; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1673; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
1674; KNL-NEXT:    kmovw %k0, %eax
1675; KNL-NEXT:    movb %al, (%rdi)
1676; KNL-NEXT:    vzeroupper
1677; KNL-NEXT:    retq
1678;
1679; SKX-LABEL: store_v8i1:
1680; SKX:       ## %bb.0:
1681; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1682; SKX-NEXT:    vpmovw2m %xmm0, %k0
1683; SKX-NEXT:    knotb %k0, %k0
1684; SKX-NEXT:    kmovb %k0, (%rdi)
1685; SKX-NEXT:    retq
1686;
1687; AVX512BW-LABEL: store_v8i1:
1688; AVX512BW:       ## %bb.0:
1689; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
1690; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
1691; AVX512BW-NEXT:    knotw %k0, %k0
1692; AVX512BW-NEXT:    kmovd %k0, %eax
1693; AVX512BW-NEXT:    movb %al, (%rdi)
1694; AVX512BW-NEXT:    vzeroupper
1695; AVX512BW-NEXT:    retq
1696;
1697; AVX512DQ-LABEL: store_v8i1:
1698; AVX512DQ:       ## %bb.0:
1699; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
1700; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
1701; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
1702; AVX512DQ-NEXT:    knotb %k0, %k0
1703; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
1704; AVX512DQ-NEXT:    vzeroupper
1705; AVX512DQ-NEXT:    retq
1706;
1707; X86-LABEL: store_v8i1:
1708; X86:       ## %bb.0:
1709; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
1710; X86-NEXT:    vpmovw2m %xmm0, %k0
1711; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1712; X86-NEXT:    knotb %k0, %k0
1713; X86-NEXT:    kmovb %k0, (%eax)
1714; X86-NEXT:    retl
1715  %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1716  store <8 x i1> %x, <8 x i1>*  %ptr, align 4
1717  ret void
1718}
1719
1720define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
1721; KNL-LABEL: store_v16i1:
1722; KNL:       ## %bb.0:
1723; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1724; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1725; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
1726; KNL-NEXT:    kmovw %k0, (%rdi)
1727; KNL-NEXT:    vzeroupper
1728; KNL-NEXT:    retq
1729;
1730; SKX-LABEL: store_v16i1:
1731; SKX:       ## %bb.0:
1732; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1733; SKX-NEXT:    vpmovb2m %xmm0, %k0
1734; SKX-NEXT:    knotw %k0, %k0
1735; SKX-NEXT:    kmovw %k0, (%rdi)
1736; SKX-NEXT:    retq
1737;
1738; AVX512BW-LABEL: store_v16i1:
1739; AVX512BW:       ## %bb.0:
1740; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
1741; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
1742; AVX512BW-NEXT:    knotw %k0, %k0
1743; AVX512BW-NEXT:    kmovw %k0, (%rdi)
1744; AVX512BW-NEXT:    vzeroupper
1745; AVX512BW-NEXT:    retq
1746;
1747; AVX512DQ-LABEL: store_v16i1:
1748; AVX512DQ:       ## %bb.0:
1749; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
1750; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
1751; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1752; AVX512DQ-NEXT:    knotw %k0, %k0
1753; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
1754; AVX512DQ-NEXT:    vzeroupper
1755; AVX512DQ-NEXT:    retq
1756;
1757; X86-LABEL: store_v16i1:
1758; X86:       ## %bb.0:
1759; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
1760; X86-NEXT:    vpmovb2m %xmm0, %k0
1761; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1762; X86-NEXT:    knotw %k0, %k0
1763; X86-NEXT:    kmovw %k0, (%eax)
1764; X86-NEXT:    retl
1765  %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
1766  store <16 x i1> %x, <16 x i1>*  %ptr, align 4
1767  ret void
1768}
1769
1770;void f2(int);
1771;void f1(int c)
1772;{
1773;  static int v = 0;
1774;  if (v == 0)
1775;    v = 1;
1776;  else
1777;    v = 0;
1778;  f2(v);
1779;}
1780
1781@f1.v = internal unnamed_addr global i1 false, align 4
1782
1783define void @f1(i32 %c) {
1784; CHECK-LABEL: f1:
1785; CHECK:       ## %bb.0: ## %entry
1786; CHECK-NEXT:    movzbl {{.*}}(%rip), %edi
1787; CHECK-NEXT:    xorl $1, %edi
1788; CHECK-NEXT:    movb %dil, {{.*}}(%rip)
1789; CHECK-NEXT:    jmp _f2 ## TAILCALL
1790;
1791; X86-LABEL: f1:
1792; X86:       ## %bb.0: ## %entry
1793; X86-NEXT:    subl $12, %esp
1794; X86-NEXT:    .cfi_def_cfa_offset 16
1795; X86-NEXT:    movzbl _f1.v, %eax
1796; X86-NEXT:    xorl $1, %eax
1797; X86-NEXT:    movb %al, _f1.v
1798; X86-NEXT:    movl %eax, (%esp)
1799; X86-NEXT:    calll _f2
1800; X86-NEXT:    addl $12, %esp
1801; X86-NEXT:    retl
1802entry:
1803  %.b1 = load i1, i1* @f1.v, align 4
1804  %not..b1 = xor i1 %.b1, true
1805  store i1 %not..b1, i1* @f1.v, align 4
1806  %0 = zext i1 %not..b1 to i32
1807  tail call void @f2(i32 %0) #2
1808  ret void
1809}
1810
1811declare void @f2(i32) #1
1812
1813define void @store_i16_i1(i16 %x, i1 *%y) {
1814; CHECK-LABEL: store_i16_i1:
1815; CHECK:       ## %bb.0:
1816; CHECK-NEXT:    andl $1, %edi
1817; CHECK-NEXT:    movb %dil, (%rsi)
1818; CHECK-NEXT:    retq
1819;
1820; X86-LABEL: store_i16_i1:
1821; X86:       ## %bb.0:
1822; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1823; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
1824; X86-NEXT:    andl $1, %ecx
1825; X86-NEXT:    movb %cl, (%eax)
1826; X86-NEXT:    retl
1827  %c = trunc i16 %x to i1
1828  store i1 %c, i1* %y
1829  ret void
1830}
1831
1832define void @store_i8_i1(i8 %x, i1 *%y) {
1833; CHECK-LABEL: store_i8_i1:
1834; CHECK:       ## %bb.0:
1835; CHECK-NEXT:    andl $1, %edi
1836; CHECK-NEXT:    movb %dil, (%rsi)
1837; CHECK-NEXT:    retq
1838;
1839; X86-LABEL: store_i8_i1:
1840; X86:       ## %bb.0:
1841; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1842; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
1843; X86-NEXT:    andb $1, %cl
1844; X86-NEXT:    movb %cl, (%eax)
1845; X86-NEXT:    retl
1846  %c = trunc i8 %x to i1
1847  store i1 %c, i1* %y
1848  ret void
1849}
1850
1851define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
1852; KNL-LABEL: test_build_vec_v32i1:
1853; KNL:       ## %bb.0:
1854; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1855; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
1856; KNL-NEXT:    retq
1857;
1858; SKX-LABEL: test_build_vec_v32i1:
1859; SKX:       ## %bb.0:
1860; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
1861; SKX-NEXT:    kmovd %eax, %k1
1862; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1863; SKX-NEXT:    retq
1864;
1865; AVX512BW-LABEL: test_build_vec_v32i1:
1866; AVX512BW:       ## %bb.0:
1867; AVX512BW-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
1868; AVX512BW-NEXT:    kmovd %eax, %k1
1869; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1870; AVX512BW-NEXT:    retq
1871;
1872; AVX512DQ-LABEL: test_build_vec_v32i1:
1873; AVX512DQ:       ## %bb.0:
1874; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1875; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
1876; AVX512DQ-NEXT:    retq
1877;
1878; X86-LABEL: test_build_vec_v32i1:
1879; X86:       ## %bb.0:
1880; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
1881; X86-NEXT:    kmovd %eax, %k1
1882; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1883; X86-NEXT:    retl
1884  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
1885  ret <32 x i16> %ret
1886}
1887
1888define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
1889; KNL-LABEL: test_build_vec_v64i1:
1890; KNL:       ## %bb.0:
1891; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1892; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
1893; KNL-NEXT:    retq
1894;
1895; SKX-LABEL: test_build_vec_v64i1:
1896; SKX:       ## %bb.0:
1897; SKX-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1898; SKX-NEXT:    retq
1899;
1900; AVX512BW-LABEL: test_build_vec_v64i1:
1901; AVX512BW:       ## %bb.0:
1902; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1903; AVX512BW-NEXT:    retq
1904;
1905; AVX512DQ-LABEL: test_build_vec_v64i1:
1906; AVX512DQ:       ## %bb.0:
1907; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1908; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
1909; AVX512DQ-NEXT:    retq
1910;
1911; X86-LABEL: test_build_vec_v64i1:
1912; X86:       ## %bb.0:
1913; X86-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
1914; X86-NEXT:    retl
1915  %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
1916  ret <64 x i8> %ret
1917}
1918
1919define void @ktest_1(<8 x double> %in, double * %base) {
1920; KNL-LABEL: ktest_1:
1921; KNL:       ## %bb.0:
1922; KNL-NEXT:    vmovupd (%rdi), %zmm1
1923; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1924; KNL-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
1925; KNL-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1926; KNL-NEXT:    kmovw %k0, %eax
1927; KNL-NEXT:    testb %al, %al
1928; KNL-NEXT:    je LBB42_2
1929; KNL-NEXT:  ## %bb.1: ## %L1
1930; KNL-NEXT:    vmovapd %zmm0, (%rdi)
1931; KNL-NEXT:    vzeroupper
1932; KNL-NEXT:    retq
1933; KNL-NEXT:  LBB42_2: ## %L2
1934; KNL-NEXT:    vmovapd %zmm0, 8(%rdi)
1935; KNL-NEXT:    vzeroupper
1936; KNL-NEXT:    retq
1937;
1938; SKX-LABEL: ktest_1:
1939; SKX:       ## %bb.0:
1940; SKX-NEXT:    vmovupd (%rdi), %zmm1
1941; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1942; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
1943; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1944; SKX-NEXT:    kortestb %k0, %k0
1945; SKX-NEXT:    je LBB42_2
1946; SKX-NEXT:  ## %bb.1: ## %L1
1947; SKX-NEXT:    vmovapd %zmm0, (%rdi)
1948; SKX-NEXT:    vzeroupper
1949; SKX-NEXT:    retq
1950; SKX-NEXT:  LBB42_2: ## %L2
1951; SKX-NEXT:    vmovapd %zmm0, 8(%rdi)
1952; SKX-NEXT:    vzeroupper
1953; SKX-NEXT:    retq
1954;
1955; AVX512BW-LABEL: ktest_1:
1956; AVX512BW:       ## %bb.0:
1957; AVX512BW-NEXT:    vmovupd (%rdi), %zmm1
1958; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1959; AVX512BW-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
1960; AVX512BW-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1961; AVX512BW-NEXT:    kmovd %k0, %eax
1962; AVX512BW-NEXT:    testb %al, %al
1963; AVX512BW-NEXT:    je LBB42_2
1964; AVX512BW-NEXT:  ## %bb.1: ## %L1
1965; AVX512BW-NEXT:    vmovapd %zmm0, (%rdi)
1966; AVX512BW-NEXT:    vzeroupper
1967; AVX512BW-NEXT:    retq
1968; AVX512BW-NEXT:  LBB42_2: ## %L2
1969; AVX512BW-NEXT:    vmovapd %zmm0, 8(%rdi)
1970; AVX512BW-NEXT:    vzeroupper
1971; AVX512BW-NEXT:    retq
1972;
1973; AVX512DQ-LABEL: ktest_1:
1974; AVX512DQ:       ## %bb.0:
1975; AVX512DQ-NEXT:    vmovupd (%rdi), %zmm1
1976; AVX512DQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1977; AVX512DQ-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
1978; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1979; AVX512DQ-NEXT:    kortestb %k0, %k0
1980; AVX512DQ-NEXT:    je LBB42_2
1981; AVX512DQ-NEXT:  ## %bb.1: ## %L1
1982; AVX512DQ-NEXT:    vmovapd %zmm0, (%rdi)
1983; AVX512DQ-NEXT:    vzeroupper
1984; AVX512DQ-NEXT:    retq
1985; AVX512DQ-NEXT:  LBB42_2: ## %L2
1986; AVX512DQ-NEXT:    vmovapd %zmm0, 8(%rdi)
1987; AVX512DQ-NEXT:    vzeroupper
1988; AVX512DQ-NEXT:    retq
1989;
1990; X86-LABEL: ktest_1:
1991; X86:       ## %bb.0:
1992; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1993; X86-NEXT:    vmovupd (%eax), %zmm1
1994; X86-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1995; X86-NEXT:    vmovupd 8(%eax), %zmm1 {%k1} {z}
1996; X86-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1997; X86-NEXT:    kortestb %k0, %k0
1998; X86-NEXT:    je LBB42_2
1999; X86-NEXT:  ## %bb.1: ## %L1
2000; X86-NEXT:    vmovapd %zmm0, (%eax)
2001; X86-NEXT:    vzeroupper
2002; X86-NEXT:    retl
2003; X86-NEXT:  LBB42_2: ## %L2
2004; X86-NEXT:    vmovapd %zmm0, 8(%eax)
2005; X86-NEXT:    vzeroupper
2006; X86-NEXT:    retl
2007  %addr1 = getelementptr double, double * %base, i64 0
2008  %addr2 = getelementptr double, double * %base, i64 1
2009
2010  %vaddr1 = bitcast double* %addr1 to <8 x double>*
2011  %vaddr2 = bitcast double* %addr2 to <8 x double>*
2012
2013  %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
2014  %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
2015
2016  %sel1 = fcmp ogt <8 x double>%in, %val1
2017  %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
2018  %sel2 = fcmp olt <8 x double> %in, %val3
2019  %sel3 = and <8 x i1> %sel1, %sel2
2020
2021  %int_sel3 = bitcast <8 x i1> %sel3 to i8
2022  %res = icmp eq i8 %int_sel3, zeroinitializer
2023  br i1 %res, label %L2, label %L1
2024L1:
2025  store <8 x double> %in, <8 x double>* %vaddr1
2026  br label %End
2027L2:
2028  store <8 x double> %in, <8 x double>* %vaddr2
2029  br label %End
2030End:
2031  ret void
2032}
2033
2034define void @ktest_2(<32 x float> %in, float * %base) {
2035;
2036; KNL-LABEL: ktest_2:
2037; KNL:       ## %bb.0:
2038; KNL-NEXT:    vmovups (%rdi), %zmm2
2039; KNL-NEXT:    vmovups 64(%rdi), %zmm3
2040; KNL-NEXT:    vcmpltps %zmm1, %zmm3, %k1
2041; KNL-NEXT:    vcmpltps %zmm0, %zmm2, %k2
2042; KNL-NEXT:    vmovups 4(%rdi), %zmm2 {%k2} {z}
2043; KNL-NEXT:    vmovups 68(%rdi), %zmm3 {%k1} {z}
2044; KNL-NEXT:    vcmpltps %zmm3, %zmm1, %k0
2045; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k3
2046; KNL-NEXT:    korw %k3, %k2, %k2
2047; KNL-NEXT:    kmovw %k2, %eax
2048; KNL-NEXT:    korw %k0, %k1, %k0
2049; KNL-NEXT:    kmovw %k0, %ecx
2050; KNL-NEXT:    shll $16, %ecx
2051; KNL-NEXT:    orl %eax, %ecx
2052; KNL-NEXT:    je LBB43_2
2053; KNL-NEXT:  ## %bb.1: ## %L1
2054; KNL-NEXT:    vmovaps %zmm0, (%rdi)
2055; KNL-NEXT:    vmovaps %zmm1, 64(%rdi)
2056; KNL-NEXT:    vzeroupper
2057; KNL-NEXT:    retq
2058; KNL-NEXT:  LBB43_2: ## %L2
2059; KNL-NEXT:    vmovaps %zmm0, 4(%rdi)
2060; KNL-NEXT:    vmovaps %zmm1, 68(%rdi)
2061; KNL-NEXT:    vzeroupper
2062; KNL-NEXT:    retq
2063;
2064; SKX-LABEL: ktest_2:
2065; SKX:       ## %bb.0:
2066; SKX-NEXT:    vmovups (%rdi), %zmm2
2067; SKX-NEXT:    vmovups 64(%rdi), %zmm3
2068; SKX-NEXT:    vcmpltps %zmm0, %zmm2, %k1
2069; SKX-NEXT:    vcmpltps %zmm1, %zmm3, %k2
2070; SKX-NEXT:    kunpckwd %k1, %k2, %k0
2071; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
2072; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
2073; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1
2074; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
2075; SKX-NEXT:    kunpckwd %k1, %k2, %k1
2076; SKX-NEXT:    kortestd %k1, %k0
2077; SKX-NEXT:    je LBB43_2
2078; SKX-NEXT:  ## %bb.1: ## %L1
2079; SKX-NEXT:    vmovaps %zmm0, (%rdi)
2080; SKX-NEXT:    vmovaps %zmm1, 64(%rdi)
2081; SKX-NEXT:    vzeroupper
2082; SKX-NEXT:    retq
2083; SKX-NEXT:  LBB43_2: ## %L2
2084; SKX-NEXT:    vmovaps %zmm0, 4(%rdi)
2085; SKX-NEXT:    vmovaps %zmm1, 68(%rdi)
2086; SKX-NEXT:    vzeroupper
2087; SKX-NEXT:    retq
2088;
2089; AVX512BW-LABEL: ktest_2:
2090; AVX512BW:       ## %bb.0:
2091; AVX512BW-NEXT:    vmovups (%rdi), %zmm2
2092; AVX512BW-NEXT:    vmovups 64(%rdi), %zmm3
2093; AVX512BW-NEXT:    vcmpltps %zmm0, %zmm2, %k1
2094; AVX512BW-NEXT:    vcmpltps %zmm1, %zmm3, %k2
2095; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k0
2096; AVX512BW-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
2097; AVX512BW-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
2098; AVX512BW-NEXT:    vcmpltps %zmm3, %zmm0, %k1
2099; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm1, %k2
2100; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k1
2101; AVX512BW-NEXT:    kortestd %k1, %k0
2102; AVX512BW-NEXT:    je LBB43_2
2103; AVX512BW-NEXT:  ## %bb.1: ## %L1
2104; AVX512BW-NEXT:    vmovaps %zmm0, (%rdi)
2105; AVX512BW-NEXT:    vmovaps %zmm1, 64(%rdi)
2106; AVX512BW-NEXT:    vzeroupper
2107; AVX512BW-NEXT:    retq
2108; AVX512BW-NEXT:  LBB43_2: ## %L2
2109; AVX512BW-NEXT:    vmovaps %zmm0, 4(%rdi)
2110; AVX512BW-NEXT:    vmovaps %zmm1, 68(%rdi)
2111; AVX512BW-NEXT:    vzeroupper
2112; AVX512BW-NEXT:    retq
2113;
2114; AVX512DQ-LABEL: ktest_2:
2115; AVX512DQ:       ## %bb.0:
2116; AVX512DQ-NEXT:    vmovups (%rdi), %zmm2
2117; AVX512DQ-NEXT:    vmovups 64(%rdi), %zmm3
2118; AVX512DQ-NEXT:    vcmpltps %zmm1, %zmm3, %k1
2119; AVX512DQ-NEXT:    vcmpltps %zmm0, %zmm2, %k2
2120; AVX512DQ-NEXT:    vmovups 4(%rdi), %zmm2 {%k2} {z}
2121; AVX512DQ-NEXT:    vmovups 68(%rdi), %zmm3 {%k1} {z}
2122; AVX512DQ-NEXT:    vcmpltps %zmm3, %zmm1, %k0
2123; AVX512DQ-NEXT:    vcmpltps %zmm2, %zmm0, %k3
2124; AVX512DQ-NEXT:    korw %k3, %k2, %k2
2125; AVX512DQ-NEXT:    kmovw %k2, %eax
2126; AVX512DQ-NEXT:    korw %k0, %k1, %k0
2127; AVX512DQ-NEXT:    kmovw %k0, %ecx
2128; AVX512DQ-NEXT:    shll $16, %ecx
2129; AVX512DQ-NEXT:    orl %eax, %ecx
2130; AVX512DQ-NEXT:    je LBB43_2
2131; AVX512DQ-NEXT:  ## %bb.1: ## %L1
2132; AVX512DQ-NEXT:    vmovaps %zmm0, (%rdi)
2133; AVX512DQ-NEXT:    vmovaps %zmm1, 64(%rdi)
2134; AVX512DQ-NEXT:    vzeroupper
2135; AVX512DQ-NEXT:    retq
2136; AVX512DQ-NEXT:  LBB43_2: ## %L2
2137; AVX512DQ-NEXT:    vmovaps %zmm0, 4(%rdi)
2138; AVX512DQ-NEXT:    vmovaps %zmm1, 68(%rdi)
2139; AVX512DQ-NEXT:    vzeroupper
2140; AVX512DQ-NEXT:    retq
2141;
2142; X86-LABEL: ktest_2:
2143; X86:       ## %bb.0:
2144; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2145; X86-NEXT:    vmovups (%eax), %zmm2
2146; X86-NEXT:    vmovups 64(%eax), %zmm3
2147; X86-NEXT:    vcmpltps %zmm0, %zmm2, %k1
2148; X86-NEXT:    vcmpltps %zmm1, %zmm3, %k2
2149; X86-NEXT:    kunpckwd %k1, %k2, %k0
2150; X86-NEXT:    vmovups 68(%eax), %zmm2 {%k2} {z}
2151; X86-NEXT:    vmovups 4(%eax), %zmm3 {%k1} {z}
2152; X86-NEXT:    vcmpltps %zmm3, %zmm0, %k1
2153; X86-NEXT:    vcmpltps %zmm2, %zmm1, %k2
2154; X86-NEXT:    kunpckwd %k1, %k2, %k1
2155; X86-NEXT:    kortestd %k1, %k0
2156; X86-NEXT:    je LBB43_2
2157; X86-NEXT:  ## %bb.1: ## %L1
2158; X86-NEXT:    vmovaps %zmm0, (%eax)
2159; X86-NEXT:    vmovaps %zmm1, 64(%eax)
2160; X86-NEXT:    vzeroupper
2161; X86-NEXT:    retl
2162; X86-NEXT:  LBB43_2: ## %L2
2163; X86-NEXT:    vmovaps %zmm0, 4(%eax)
2164; X86-NEXT:    vmovaps %zmm1, 68(%eax)
2165; X86-NEXT:    vzeroupper
2166; X86-NEXT:    retl
2167  %addr1 = getelementptr float, float * %base, i64 0
2168  %addr2 = getelementptr float, float * %base, i64 1
2169
2170  %vaddr1 = bitcast float* %addr1 to <32 x float>*
2171  %vaddr2 = bitcast float* %addr2 to <32 x float>*
2172
2173  %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
2174  %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
2175
2176  %sel1 = fcmp ogt <32 x float>%in, %val1
2177  %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
2178  %sel2 = fcmp olt <32 x float> %in, %val3
2179  %sel3 = or <32 x i1> %sel1, %sel2
2180
2181  %int_sel3 = bitcast <32 x i1> %sel3 to i32
2182  %res = icmp eq i32 %int_sel3, zeroinitializer
2183  br i1 %res, label %L2, label %L1
2184L1:
2185  store <32 x float> %in, <32 x float>* %vaddr1
2186  br label %End
2187L2:
2188  store <32 x float> %in, <32 x float>* %vaddr2
2189  br label %End
2190End:
2191  ret void
2192}
2193
2194define <8 x i64> @load_8i1(<8 x i1>* %a) {
2195; KNL-LABEL: load_8i1:
2196; KNL:       ## %bb.0:
2197; KNL-NEXT:    movzbl (%rdi), %eax
2198; KNL-NEXT:    kmovw %eax, %k1
2199; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2200; KNL-NEXT:    retq
2201;
2202; SKX-LABEL: load_8i1:
2203; SKX:       ## %bb.0:
2204; SKX-NEXT:    kmovb (%rdi), %k0
2205; SKX-NEXT:    vpmovm2q %k0, %zmm0
2206; SKX-NEXT:    retq
2207;
2208; AVX512BW-LABEL: load_8i1:
2209; AVX512BW:       ## %bb.0:
2210; AVX512BW-NEXT:    movzbl (%rdi), %eax
2211; AVX512BW-NEXT:    kmovd %eax, %k1
2212; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2213; AVX512BW-NEXT:    retq
2214;
2215; AVX512DQ-LABEL: load_8i1:
2216; AVX512DQ:       ## %bb.0:
2217; AVX512DQ-NEXT:    kmovb (%rdi), %k0
2218; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
2219; AVX512DQ-NEXT:    retq
2220;
2221; X86-LABEL: load_8i1:
2222; X86:       ## %bb.0:
2223; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2224; X86-NEXT:    kmovb (%eax), %k0
2225; X86-NEXT:    vpmovm2q %k0, %zmm0
2226; X86-NEXT:    retl
2227  %b = load <8 x i1>, <8 x i1>* %a
2228  %c = sext <8 x i1> %b to <8 x i64>
2229  ret <8 x i64> %c
2230}
2231
2232define <16 x i32> @load_16i1(<16 x i1>* %a) {
2233; KNL-LABEL: load_16i1:
2234; KNL:       ## %bb.0:
2235; KNL-NEXT:    kmovw (%rdi), %k1
2236; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2237; KNL-NEXT:    retq
2238;
2239; SKX-LABEL: load_16i1:
2240; SKX:       ## %bb.0:
2241; SKX-NEXT:    kmovw (%rdi), %k0
2242; SKX-NEXT:    vpmovm2d %k0, %zmm0
2243; SKX-NEXT:    retq
2244;
2245; AVX512BW-LABEL: load_16i1:
2246; AVX512BW:       ## %bb.0:
2247; AVX512BW-NEXT:    kmovw (%rdi), %k1
2248; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2249; AVX512BW-NEXT:    retq
2250;
2251; AVX512DQ-LABEL: load_16i1:
2252; AVX512DQ:       ## %bb.0:
2253; AVX512DQ-NEXT:    kmovw (%rdi), %k0
2254; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
2255; AVX512DQ-NEXT:    retq
2256;
2257; X86-LABEL: load_16i1:
2258; X86:       ## %bb.0:
2259; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2260; X86-NEXT:    kmovw (%eax), %k0
2261; X86-NEXT:    vpmovm2d %k0, %zmm0
2262; X86-NEXT:    retl
2263  %b = load <16 x i1>, <16 x i1>* %a
2264  %c = sext <16 x i1> %b to <16 x i32>
2265  ret <16 x i32> %c
2266}
2267
2268define <2 x i16> @load_2i1(<2 x i1>* %a) {
2269; KNL-LABEL: load_2i1:
2270; KNL:       ## %bb.0:
2271; KNL-NEXT:    movzbl (%rdi), %eax
2272; KNL-NEXT:    kmovw %eax, %k1
2273; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2274; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
2275; KNL-NEXT:    vzeroupper
2276; KNL-NEXT:    retq
2277;
2278; SKX-LABEL: load_2i1:
2279; SKX:       ## %bb.0:
2280; SKX-NEXT:    kmovb (%rdi), %k0
2281; SKX-NEXT:    vpmovm2q %k0, %xmm0
2282; SKX-NEXT:    retq
2283;
2284; AVX512BW-LABEL: load_2i1:
2285; AVX512BW:       ## %bb.0:
2286; AVX512BW-NEXT:    movzbl (%rdi), %eax
2287; AVX512BW-NEXT:    kmovd %eax, %k1
2288; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2289; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
2290; AVX512BW-NEXT:    vzeroupper
2291; AVX512BW-NEXT:    retq
2292;
2293; AVX512DQ-LABEL: load_2i1:
2294; AVX512DQ:       ## %bb.0:
2295; AVX512DQ-NEXT:    kmovb (%rdi), %k0
2296; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
2297; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
2298; AVX512DQ-NEXT:    vzeroupper
2299; AVX512DQ-NEXT:    retq
2300;
2301; X86-LABEL: load_2i1:
2302; X86:       ## %bb.0:
2303; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2304; X86-NEXT:    kmovb (%eax), %k0
2305; X86-NEXT:    vpmovm2q %k0, %xmm0
2306; X86-NEXT:    retl
2307  %b = load <2 x i1>, <2 x i1>* %a
2308  %c = sext <2 x i1> %b to <2 x i16>
2309  ret <2 x i16> %c
2310}
2311
2312define <4 x i16> @load_4i1(<4 x i1>* %a) {
2313; KNL-LABEL: load_4i1:
2314; KNL:       ## %bb.0:
2315; KNL-NEXT:    movzbl (%rdi), %eax
2316; KNL-NEXT:    kmovw %eax, %k1
2317; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2318; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
2319; KNL-NEXT:    vzeroupper
2320; KNL-NEXT:    retq
2321;
2322; SKX-LABEL: load_4i1:
2323; SKX:       ## %bb.0:
2324; SKX-NEXT:    kmovb (%rdi), %k0
2325; SKX-NEXT:    vpmovm2d %k0, %xmm0
2326; SKX-NEXT:    retq
2327;
2328; AVX512BW-LABEL: load_4i1:
2329; AVX512BW:       ## %bb.0:
2330; AVX512BW-NEXT:    movzbl (%rdi), %eax
2331; AVX512BW-NEXT:    kmovd %eax, %k1
2332; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2333; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
2334; AVX512BW-NEXT:    vzeroupper
2335; AVX512BW-NEXT:    retq
2336;
2337; AVX512DQ-LABEL: load_4i1:
2338; AVX512DQ:       ## %bb.0:
2339; AVX512DQ-NEXT:    kmovb (%rdi), %k0
2340; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
2341; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
2342; AVX512DQ-NEXT:    vzeroupper
2343; AVX512DQ-NEXT:    retq
2344;
2345; X86-LABEL: load_4i1:
2346; X86:       ## %bb.0:
2347; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2348; X86-NEXT:    kmovb (%eax), %k0
2349; X86-NEXT:    vpmovm2d %k0, %xmm0
2350; X86-NEXT:    retl
2351  %b = load <4 x i1>, <4 x i1>* %a
2352  %c = sext <4 x i1> %b to <4 x i16>
2353  ret <4 x i16> %c
2354}
2355
2356define <32 x i16> @load_32i1(<32 x i1>* %a) {
2357; KNL-LABEL: load_32i1:
2358; KNL:       ## %bb.0:
2359; KNL-NEXT:    kmovw (%rdi), %k1
2360; KNL-NEXT:    kmovw 2(%rdi), %k2
2361; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2362; KNL-NEXT:    vpmovdw %zmm0, %ymm0
2363; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
2364; KNL-NEXT:    vpmovdw %zmm1, %ymm1
2365; KNL-NEXT:    retq
2366;
2367; SKX-LABEL: load_32i1:
2368; SKX:       ## %bb.0:
2369; SKX-NEXT:    kmovd (%rdi), %k0
2370; SKX-NEXT:    vpmovm2w %k0, %zmm0
2371; SKX-NEXT:    retq
2372;
2373; AVX512BW-LABEL: load_32i1:
2374; AVX512BW:       ## %bb.0:
2375; AVX512BW-NEXT:    kmovd (%rdi), %k0
2376; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
2377; AVX512BW-NEXT:    retq
2378;
2379; AVX512DQ-LABEL: load_32i1:
2380; AVX512DQ:       ## %bb.0:
2381; AVX512DQ-NEXT:    kmovw (%rdi), %k0
2382; AVX512DQ-NEXT:    kmovw 2(%rdi), %k1
2383; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
2384; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
2385; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
2386; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
2387; AVX512DQ-NEXT:    retq
2388;
2389; X86-LABEL: load_32i1:
2390; X86:       ## %bb.0:
2391; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2392; X86-NEXT:    kmovd (%eax), %k0
2393; X86-NEXT:    vpmovm2w %k0, %zmm0
2394; X86-NEXT:    retl
2395  %b = load <32 x i1>, <32 x i1>* %a
2396  %c = sext <32 x i1> %b to <32 x i16>
2397  ret <32 x i16> %c
2398}
2399
2400define <64 x i8> @load_64i1(<64 x i1>* %a) {
2401; KNL-LABEL: load_64i1:
2402; KNL:       ## %bb.0:
2403; KNL-NEXT:    kmovw (%rdi), %k1
2404; KNL-NEXT:    kmovw 2(%rdi), %k2
2405; KNL-NEXT:    kmovw 4(%rdi), %k3
2406; KNL-NEXT:    kmovw 6(%rdi), %k4
2407; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2408; KNL-NEXT:    vpmovdb %zmm0, %xmm0
2409; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
2410; KNL-NEXT:    vpmovdb %zmm1, %xmm1
2411; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
2412; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
2413; KNL-NEXT:    vpmovdb %zmm1, %xmm1
2414; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z}
2415; KNL-NEXT:    vpmovdb %zmm2, %xmm2
2416; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
2417; KNL-NEXT:    retq
2418;
2419; SKX-LABEL: load_64i1:
2420; SKX:       ## %bb.0:
2421; SKX-NEXT:    kmovq (%rdi), %k0
2422; SKX-NEXT:    vpmovm2b %k0, %zmm0
2423; SKX-NEXT:    retq
2424;
2425; AVX512BW-LABEL: load_64i1:
2426; AVX512BW:       ## %bb.0:
2427; AVX512BW-NEXT:    kmovq (%rdi), %k0
2428; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
2429; AVX512BW-NEXT:    retq
2430;
2431; AVX512DQ-LABEL: load_64i1:
2432; AVX512DQ:       ## %bb.0:
2433; AVX512DQ-NEXT:    kmovw (%rdi), %k0
2434; AVX512DQ-NEXT:    kmovw 2(%rdi), %k1
2435; AVX512DQ-NEXT:    kmovw 4(%rdi), %k2
2436; AVX512DQ-NEXT:    kmovw 6(%rdi), %k3
2437; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
2438; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
2439; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
2440; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
2441; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
2442; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
2443; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
2444; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm2
2445; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
2446; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
2447; AVX512DQ-NEXT:    retq
2448;
2449; X86-LABEL: load_64i1:
2450; X86:       ## %bb.0:
2451; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2452; X86-NEXT:    kmovq (%eax), %k0
2453; X86-NEXT:    vpmovm2b %k0, %zmm0
2454; X86-NEXT:    retl
2455  %b = load <64 x i1>, <64 x i1>* %a
2456  %c = sext <64 x i1> %b to <64 x i8>
2457  ret <64 x i8> %c
2458}
2459
2460define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
2461; KNL-LABEL: store_8i1:
2462; KNL:       ## %bb.0:
2463; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
2464; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
2465; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
2466; KNL-NEXT:    kmovw %k0, %eax
2467; KNL-NEXT:    movb %al, (%rdi)
2468; KNL-NEXT:    vzeroupper
2469; KNL-NEXT:    retq
2470;
2471; SKX-LABEL: store_8i1:
2472; SKX:       ## %bb.0:
2473; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
2474; SKX-NEXT:    vpmovw2m %xmm0, %k0
2475; SKX-NEXT:    kmovb %k0, (%rdi)
2476; SKX-NEXT:    retq
2477;
2478; AVX512BW-LABEL: store_8i1:
2479; AVX512BW:       ## %bb.0:
2480; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
2481; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
2482; AVX512BW-NEXT:    kmovd %k0, %eax
2483; AVX512BW-NEXT:    movb %al, (%rdi)
2484; AVX512BW-NEXT:    vzeroupper
2485; AVX512BW-NEXT:    retq
2486;
2487; AVX512DQ-LABEL: store_8i1:
2488; AVX512DQ:       ## %bb.0:
2489; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
2490; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
2491; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
2492; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
2493; AVX512DQ-NEXT:    vzeroupper
2494; AVX512DQ-NEXT:    retq
2495;
2496; X86-LABEL: store_8i1:
2497; X86:       ## %bb.0:
2498; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
2499; X86-NEXT:    vpmovw2m %xmm0, %k0
2500; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2501; X86-NEXT:    kmovb %k0, (%eax)
2502; X86-NEXT:    retl
2503  store <8 x i1> %v, <8 x i1>* %a
2504  ret void
2505}
2506
2507define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
2508; KNL-LABEL: store_8i1_1:
2509; KNL:       ## %bb.0:
2510; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
2511; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
2512; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
2513; KNL-NEXT:    kmovw %k0, %eax
2514; KNL-NEXT:    movb %al, (%rdi)
2515; KNL-NEXT:    vzeroupper
2516; KNL-NEXT:    retq
2517;
2518; SKX-LABEL: store_8i1_1:
2519; SKX:       ## %bb.0:
2520; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
2521; SKX-NEXT:    vpmovw2m %xmm0, %k0
2522; SKX-NEXT:    kmovb %k0, (%rdi)
2523; SKX-NEXT:    retq
2524;
2525; AVX512BW-LABEL: store_8i1_1:
2526; AVX512BW:       ## %bb.0:
2527; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
2528; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
2529; AVX512BW-NEXT:    kmovd %k0, %eax
2530; AVX512BW-NEXT:    movb %al, (%rdi)
2531; AVX512BW-NEXT:    vzeroupper
2532; AVX512BW-NEXT:    retq
2533;
2534; AVX512DQ-LABEL: store_8i1_1:
2535; AVX512DQ:       ## %bb.0:
2536; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
2537; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
2538; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
2539; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
2540; AVX512DQ-NEXT:    vzeroupper
2541; AVX512DQ-NEXT:    retq
2542;
2543; X86-LABEL: store_8i1_1:
2544; X86:       ## %bb.0:
2545; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2546; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
2547; X86-NEXT:    vpmovw2m %xmm0, %k0
2548; X86-NEXT:    kmovb %k0, (%eax)
2549; X86-NEXT:    retl
2550  %v1 = trunc <8 x i16> %v to <8 x i1>
2551  store <8 x i1> %v1, <8 x i1>* %a
2552  ret void
2553}
2554
2555define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
2556; KNL-LABEL: store_16i1:
2557; KNL:       ## %bb.0:
2558; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
2559; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2560; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
2561; KNL-NEXT:    kmovw %k0, (%rdi)
2562; KNL-NEXT:    vzeroupper
2563; KNL-NEXT:    retq
2564;
2565; SKX-LABEL: store_16i1:
2566; SKX:       ## %bb.0:
2567; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
2568; SKX-NEXT:    vpmovb2m %xmm0, %k0
2569; SKX-NEXT:    kmovw %k0, (%rdi)
2570; SKX-NEXT:    retq
2571;
2572; AVX512BW-LABEL: store_16i1:
2573; AVX512BW:       ## %bb.0:
2574; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
2575; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
2576; AVX512BW-NEXT:    kmovw %k0, (%rdi)
2577; AVX512BW-NEXT:    vzeroupper
2578; AVX512BW-NEXT:    retq
2579;
2580; AVX512DQ-LABEL: store_16i1:
2581; AVX512DQ:       ## %bb.0:
2582; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
2583; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2584; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
2585; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
2586; AVX512DQ-NEXT:    vzeroupper
2587; AVX512DQ-NEXT:    retq
2588;
2589; X86-LABEL: store_16i1:
2590; X86:       ## %bb.0:
2591; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
2592; X86-NEXT:    vpmovb2m %xmm0, %k0
2593; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2594; X86-NEXT:    kmovw %k0, (%eax)
2595; X86-NEXT:    retl
2596  store <16 x i1> %v, <16 x i1>* %a
2597  ret void
2598}
2599
2600define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
2601; KNL-LABEL: store_32i1:
2602; KNL:       ## %bb.0:
2603; KNL-NEXT:    vpmovsxbd %xmm0, %zmm1
2604; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
2605; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
2606; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
2607; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
2608; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2609; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
2610; KNL-NEXT:    kmovw %k1, 2(%rdi)
2611; KNL-NEXT:    kmovw %k0, (%rdi)
2612; KNL-NEXT:    vzeroupper
2613; KNL-NEXT:    retq
2614;
2615; SKX-LABEL: store_32i1:
2616; SKX:       ## %bb.0:
2617; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
2618; SKX-NEXT:    vpmovb2m %ymm0, %k0
2619; SKX-NEXT:    kmovd %k0, (%rdi)
2620; SKX-NEXT:    vzeroupper
2621; SKX-NEXT:    retq
2622;
2623; AVX512BW-LABEL: store_32i1:
2624; AVX512BW:       ## %bb.0:
2625; AVX512BW-NEXT:    vpsllw $7, %ymm0, %ymm0
2626; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
2627; AVX512BW-NEXT:    kmovd %k0, (%rdi)
2628; AVX512BW-NEXT:    vzeroupper
2629; AVX512BW-NEXT:    retq
2630;
2631; AVX512DQ-LABEL: store_32i1:
2632; AVX512DQ:       ## %bb.0:
2633; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
2634; AVX512DQ-NEXT:    vpslld $31, %zmm1, %zmm1
2635; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k0
2636; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
2637; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
2638; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2639; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
2640; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
2641; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
2642; AVX512DQ-NEXT:    vzeroupper
2643; AVX512DQ-NEXT:    retq
2644;
2645; X86-LABEL: store_32i1:
2646; X86:       ## %bb.0:
2647; X86-NEXT:    vpsllw $7, %ymm0, %ymm0
2648; X86-NEXT:    vpmovb2m %ymm0, %k0
2649; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2650; X86-NEXT:    kmovd %k0, (%eax)
2651; X86-NEXT:    vzeroupper
2652; X86-NEXT:    retl
2653  store <32 x i1> %v, <32 x i1>* %a
2654  ret void
2655}
2656
2657define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
2658; KNL-LABEL: store_32i1_1:
2659; KNL:       ## %bb.0:
2660; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
2661; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2662; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
2663; KNL-NEXT:    vpmovsxwd %ymm1, %zmm0
2664; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2665; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
2666; KNL-NEXT:    kmovw %k1, 2(%rdi)
2667; KNL-NEXT:    kmovw %k0, (%rdi)
2668; KNL-NEXT:    vzeroupper
2669; KNL-NEXT:    retq
2670;
2671; SKX-LABEL: store_32i1_1:
2672; SKX:       ## %bb.0:
2673; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0
2674; SKX-NEXT:    vpmovw2m %zmm0, %k0
2675; SKX-NEXT:    kmovd %k0, (%rdi)
2676; SKX-NEXT:    vzeroupper
2677; SKX-NEXT:    retq
2678;
2679; AVX512BW-LABEL: store_32i1_1:
2680; AVX512BW:       ## %bb.0:
2681; AVX512BW-NEXT:    vpsllw $15, %zmm0, %zmm0
2682; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
2683; AVX512BW-NEXT:    kmovd %k0, (%rdi)
2684; AVX512BW-NEXT:    vzeroupper
2685; AVX512BW-NEXT:    retq
2686;
2687; AVX512DQ-LABEL: store_32i1_1:
2688; AVX512DQ:       ## %bb.0:
2689; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
2690; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2691; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
2692; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm0
2693; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2694; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
2695; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
2696; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
2697; AVX512DQ-NEXT:    vzeroupper
2698; AVX512DQ-NEXT:    retq
2699;
2700; X86-LABEL: store_32i1_1:
2701; X86:       ## %bb.0:
2702; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2703; X86-NEXT:    vpsllw $15, %zmm0, %zmm0
2704; X86-NEXT:    vpmovw2m %zmm0, %k0
2705; X86-NEXT:    kmovd %k0, (%eax)
2706; X86-NEXT:    vzeroupper
2707; X86-NEXT:    retl
2708  %v1 = trunc <32 x i16> %v to <32 x i1>
2709  store <32 x i1> %v1, <32 x i1>* %a
2710  ret void
2711}
2712
2713
2714define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
2715;
2716; KNL-LABEL: store_64i1:
2717; KNL:       ## %bb.0:
2718; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
2719; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2720; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
2721; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
2722; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2723; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
2724; KNL-NEXT:    vpmovsxbd %xmm2, %zmm0
2725; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2726; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k2
2727; KNL-NEXT:    vpmovsxbd %xmm3, %zmm0
2728; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
2729; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k3
2730; KNL-NEXT:    kmovw %k3, 6(%rdi)
2731; KNL-NEXT:    kmovw %k2, 4(%rdi)
2732; KNL-NEXT:    kmovw %k1, 2(%rdi)
2733; KNL-NEXT:    kmovw %k0, (%rdi)
2734; KNL-NEXT:    vzeroupper
2735; KNL-NEXT:    retq
2736;
2737; SKX-LABEL: store_64i1:
2738; SKX:       ## %bb.0:
2739; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
2740; SKX-NEXT:    vpmovb2m %zmm0, %k0
2741; SKX-NEXT:    kmovq %k0, (%rdi)
2742; SKX-NEXT:    vzeroupper
2743; SKX-NEXT:    retq
2744;
2745; AVX512BW-LABEL: store_64i1:
2746; AVX512BW:       ## %bb.0:
2747; AVX512BW-NEXT:    vpsllw $7, %zmm0, %zmm0
2748; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
2749; AVX512BW-NEXT:    kmovq %k0, (%rdi)
2750; AVX512BW-NEXT:    vzeroupper
2751; AVX512BW-NEXT:    retq
2752;
2753; AVX512DQ-LABEL: store_64i1:
2754; AVX512DQ:       ## %bb.0:
2755; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
2756; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2757; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
2758; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
2759; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2760; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
2761; AVX512DQ-NEXT:    vpmovsxbd %xmm2, %zmm0
2762; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2763; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k2
2764; AVX512DQ-NEXT:    vpmovsxbd %xmm3, %zmm0
2765; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
2766; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
2767; AVX512DQ-NEXT:    kmovw %k3, 6(%rdi)
2768; AVX512DQ-NEXT:    kmovw %k2, 4(%rdi)
2769; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
2770; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
2771; AVX512DQ-NEXT:    vzeroupper
2772; AVX512DQ-NEXT:    retq
2773;
2774; X86-LABEL: store_64i1:
2775; X86:       ## %bb.0:
2776; X86-NEXT:    vpsllw $7, %zmm0, %zmm0
2777; X86-NEXT:    vpmovb2m %zmm0, %k0
2778; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2779; X86-NEXT:    kmovq %k0, (%eax)
2780; X86-NEXT:    vzeroupper
2781; X86-NEXT:    retl
2782  store <64 x i1> %v, <64 x i1>* %a
2783  ret void
2784}
2785
2786define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
2787; KNL-LABEL: test_bitcast_v8i1_zext:
2788; KNL:       ## %bb.0:
2789; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2790; KNL-NEXT:    kmovw %k0, %eax
2791; KNL-NEXT:    movzbl %al, %eax
2792; KNL-NEXT:    addl %eax, %eax
2793; KNL-NEXT:    vzeroupper
2794; KNL-NEXT:    retq
2795;
2796; SKX-LABEL: test_bitcast_v8i1_zext:
2797; SKX:       ## %bb.0:
2798; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2799; SKX-NEXT:    kmovb %k0, %eax
2800; SKX-NEXT:    addl %eax, %eax
2801; SKX-NEXT:    vzeroupper
2802; SKX-NEXT:    retq
2803;
2804; AVX512BW-LABEL: test_bitcast_v8i1_zext:
2805; AVX512BW:       ## %bb.0:
2806; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2807; AVX512BW-NEXT:    kmovd %k0, %eax
2808; AVX512BW-NEXT:    movzbl %al, %eax
2809; AVX512BW-NEXT:    addl %eax, %eax
2810; AVX512BW-NEXT:    vzeroupper
2811; AVX512BW-NEXT:    retq
2812;
2813; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
2814; AVX512DQ:       ## %bb.0:
2815; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2816; AVX512DQ-NEXT:    kmovb %k0, %eax
2817; AVX512DQ-NEXT:    addl %eax, %eax
2818; AVX512DQ-NEXT:    vzeroupper
2819; AVX512DQ-NEXT:    retq
2820;
2821; X86-LABEL: test_bitcast_v8i1_zext:
2822; X86:       ## %bb.0:
2823; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2824; X86-NEXT:    kmovb %k0, %eax
2825; X86-NEXT:    addl %eax, %eax
2826; X86-NEXT:    vzeroupper
2827; X86-NEXT:    retl
2828   %v1 = icmp eq <16 x i32> %a, zeroinitializer
2829   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2830   %mask1 = bitcast <8 x i1> %mask to i8
2831   %val = zext i8 %mask1 to i32
2832   %val1 = add i32 %val, %val
2833   ret i32 %val1
2834}
2835
2836define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
2837; CHECK-LABEL: test_bitcast_v16i1_zext:
2838; CHECK:       ## %bb.0:
2839; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2840; CHECK-NEXT:    kmovw %k0, %eax
2841; CHECK-NEXT:    addl %eax, %eax
2842; CHECK-NEXT:    vzeroupper
2843; CHECK-NEXT:    retq
2844;
2845; X86-LABEL: test_bitcast_v16i1_zext:
2846; X86:       ## %bb.0:
2847; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
2848; X86-NEXT:    kmovw %k0, %eax
2849; X86-NEXT:    addl %eax, %eax
2850; X86-NEXT:    vzeroupper
2851; X86-NEXT:    retl
2852   %v1 = icmp eq <16 x i32> %a, zeroinitializer
2853   %mask1 = bitcast <16 x i1> %v1 to i16
2854   %val = zext i16 %mask1 to i32
2855   %val1 = add i32 %val, %val
2856   ret i32 %val1
2857}
2858
2859define i16 @test_v16i1_add(i16 %x, i16 %y) {
2860; KNL-LABEL: test_v16i1_add:
2861; KNL:       ## %bb.0:
2862; KNL-NEXT:    kmovw %edi, %k0
2863; KNL-NEXT:    kmovw %esi, %k1
2864; KNL-NEXT:    kxorw %k1, %k0, %k0
2865; KNL-NEXT:    kmovw %k0, %eax
2866; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
2867; KNL-NEXT:    retq
2868;
2869; SKX-LABEL: test_v16i1_add:
2870; SKX:       ## %bb.0:
2871; SKX-NEXT:    kmovd %edi, %k0
2872; SKX-NEXT:    kmovd %esi, %k1
2873; SKX-NEXT:    kxorw %k1, %k0, %k0
2874; SKX-NEXT:    kmovd %k0, %eax
2875; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
2876; SKX-NEXT:    retq
2877;
2878; AVX512BW-LABEL: test_v16i1_add:
2879; AVX512BW:       ## %bb.0:
2880; AVX512BW-NEXT:    kmovd %edi, %k0
2881; AVX512BW-NEXT:    kmovd %esi, %k1
2882; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
2883; AVX512BW-NEXT:    kmovd %k0, %eax
2884; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
2885; AVX512BW-NEXT:    retq
2886;
2887; AVX512DQ-LABEL: test_v16i1_add:
2888; AVX512DQ:       ## %bb.0:
2889; AVX512DQ-NEXT:    kmovw %edi, %k0
2890; AVX512DQ-NEXT:    kmovw %esi, %k1
2891; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
2892; AVX512DQ-NEXT:    kmovw %k0, %eax
2893; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
2894; AVX512DQ-NEXT:    retq
2895;
2896; X86-LABEL: test_v16i1_add:
2897; X86:       ## %bb.0:
2898; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
2899; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
2900; X86-NEXT:    kxorw %k1, %k0, %k0
2901; X86-NEXT:    kmovd %k0, %eax
2902; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
2903; X86-NEXT:    retl
2904  %m0 = bitcast i16 %x to <16 x i1>
2905  %m1 = bitcast i16 %y to <16 x i1>
2906  %m2 = add <16 x i1> %m0,  %m1
2907  %ret = bitcast <16 x i1> %m2 to i16
2908  ret i16 %ret
2909}
2910
2911define i16 @test_v16i1_sub(i16 %x, i16 %y) {
2912; KNL-LABEL: test_v16i1_sub:
2913; KNL:       ## %bb.0:
2914; KNL-NEXT:    kmovw %edi, %k0
2915; KNL-NEXT:    kmovw %esi, %k1
2916; KNL-NEXT:    kxorw %k1, %k0, %k0
2917; KNL-NEXT:    kmovw %k0, %eax
2918; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
2919; KNL-NEXT:    retq
2920;
2921; SKX-LABEL: test_v16i1_sub:
2922; SKX:       ## %bb.0:
2923; SKX-NEXT:    kmovd %edi, %k0
2924; SKX-NEXT:    kmovd %esi, %k1
2925; SKX-NEXT:    kxorw %k1, %k0, %k0
2926; SKX-NEXT:    kmovd %k0, %eax
2927; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
2928; SKX-NEXT:    retq
2929;
2930; AVX512BW-LABEL: test_v16i1_sub:
2931; AVX512BW:       ## %bb.0:
2932; AVX512BW-NEXT:    kmovd %edi, %k0
2933; AVX512BW-NEXT:    kmovd %esi, %k1
2934; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
2935; AVX512BW-NEXT:    kmovd %k0, %eax
2936; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
2937; AVX512BW-NEXT:    retq
2938;
2939; AVX512DQ-LABEL: test_v16i1_sub:
2940; AVX512DQ:       ## %bb.0:
2941; AVX512DQ-NEXT:    kmovw %edi, %k0
2942; AVX512DQ-NEXT:    kmovw %esi, %k1
2943; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
2944; AVX512DQ-NEXT:    kmovw %k0, %eax
2945; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
2946; AVX512DQ-NEXT:    retq
2947;
2948; X86-LABEL: test_v16i1_sub:
2949; X86:       ## %bb.0:
2950; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
2951; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
2952; X86-NEXT:    kxorw %k1, %k0, %k0
2953; X86-NEXT:    kmovd %k0, %eax
2954; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
2955; X86-NEXT:    retl
2956  %m0 = bitcast i16 %x to <16 x i1>
2957  %m1 = bitcast i16 %y to <16 x i1>
2958  %m2 = sub <16 x i1> %m0,  %m1
2959  %ret = bitcast <16 x i1> %m2 to i16
2960  ret i16 %ret
2961}
2962
2963define i16 @test_v16i1_mul(i16 %x, i16 %y) {
2964; KNL-LABEL: test_v16i1_mul:
2965; KNL:       ## %bb.0:
2966; KNL-NEXT:    kmovw %edi, %k0
2967; KNL-NEXT:    kmovw %esi, %k1
2968; KNL-NEXT:    kandw %k1, %k0, %k0
2969; KNL-NEXT:    kmovw %k0, %eax
2970; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
2971; KNL-NEXT:    retq
2972;
2973; SKX-LABEL: test_v16i1_mul:
2974; SKX:       ## %bb.0:
2975; SKX-NEXT:    kmovd %edi, %k0
2976; SKX-NEXT:    kmovd %esi, %k1
2977; SKX-NEXT:    kandw %k1, %k0, %k0
2978; SKX-NEXT:    kmovd %k0, %eax
2979; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
2980; SKX-NEXT:    retq
2981;
2982; AVX512BW-LABEL: test_v16i1_mul:
2983; AVX512BW:       ## %bb.0:
2984; AVX512BW-NEXT:    kmovd %edi, %k0
2985; AVX512BW-NEXT:    kmovd %esi, %k1
2986; AVX512BW-NEXT:    kandw %k1, %k0, %k0
2987; AVX512BW-NEXT:    kmovd %k0, %eax
2988; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
2989; AVX512BW-NEXT:    retq
2990;
2991; AVX512DQ-LABEL: test_v16i1_mul:
2992; AVX512DQ:       ## %bb.0:
2993; AVX512DQ-NEXT:    kmovw %edi, %k0
2994; AVX512DQ-NEXT:    kmovw %esi, %k1
2995; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
2996; AVX512DQ-NEXT:    kmovw %k0, %eax
2997; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
2998; AVX512DQ-NEXT:    retq
2999;
3000; X86-LABEL: test_v16i1_mul:
3001; X86:       ## %bb.0:
3002; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
3003; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
3004; X86-NEXT:    kandw %k1, %k0, %k0
3005; X86-NEXT:    kmovd %k0, %eax
3006; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
3007; X86-NEXT:    retl
3008  %m0 = bitcast i16 %x to <16 x i1>
3009  %m1 = bitcast i16 %y to <16 x i1>
3010  %m2 = mul <16 x i1> %m0,  %m1
3011  %ret = bitcast <16 x i1> %m2 to i16
3012  ret i16 %ret
3013}
3014
3015define i8 @test_v8i1_add(i8 %x, i8 %y) {
3016; KNL-LABEL: test_v8i1_add:
3017; KNL:       ## %bb.0:
3018; KNL-NEXT:    kmovw %edi, %k0
3019; KNL-NEXT:    kmovw %esi, %k1
3020; KNL-NEXT:    kxorw %k1, %k0, %k0
3021; KNL-NEXT:    kmovw %k0, %eax
3022; KNL-NEXT:    ## kill: def $al killed $al killed $eax
3023; KNL-NEXT:    retq
3024;
3025; SKX-LABEL: test_v8i1_add:
3026; SKX:       ## %bb.0:
3027; SKX-NEXT:    kmovd %edi, %k0
3028; SKX-NEXT:    kmovd %esi, %k1
3029; SKX-NEXT:    kxorb %k1, %k0, %k0
3030; SKX-NEXT:    kmovd %k0, %eax
3031; SKX-NEXT:    ## kill: def $al killed $al killed $eax
3032; SKX-NEXT:    retq
3033;
3034; AVX512BW-LABEL: test_v8i1_add:
3035; AVX512BW:       ## %bb.0:
3036; AVX512BW-NEXT:    kmovd %edi, %k0
3037; AVX512BW-NEXT:    kmovd %esi, %k1
3038; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
3039; AVX512BW-NEXT:    kmovd %k0, %eax
3040; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
3041; AVX512BW-NEXT:    retq
3042;
3043; AVX512DQ-LABEL: test_v8i1_add:
3044; AVX512DQ:       ## %bb.0:
3045; AVX512DQ-NEXT:    kmovw %edi, %k0
3046; AVX512DQ-NEXT:    kmovw %esi, %k1
3047; AVX512DQ-NEXT:    kxorb %k1, %k0, %k0
3048; AVX512DQ-NEXT:    kmovw %k0, %eax
3049; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
3050; AVX512DQ-NEXT:    retq
3051;
3052; X86-LABEL: test_v8i1_add:
3053; X86:       ## %bb.0:
3054; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
3055; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
3056; X86-NEXT:    kxorb %k1, %k0, %k0
3057; X86-NEXT:    kmovd %k0, %eax
3058; X86-NEXT:    ## kill: def $al killed $al killed $eax
3059; X86-NEXT:    retl
3060  %m0 = bitcast i8 %x to <8 x i1>
3061  %m1 = bitcast i8 %y to <8 x i1>
3062  %m2 = add <8 x i1> %m0,  %m1
3063  %ret = bitcast <8 x i1> %m2 to i8
3064  ret i8 %ret
3065}
3066
3067define i8 @test_v8i1_sub(i8 %x, i8 %y) {
3068; KNL-LABEL: test_v8i1_sub:
3069; KNL:       ## %bb.0:
3070; KNL-NEXT:    kmovw %edi, %k0
3071; KNL-NEXT:    kmovw %esi, %k1
3072; KNL-NEXT:    kxorw %k1, %k0, %k0
3073; KNL-NEXT:    kmovw %k0, %eax
3074; KNL-NEXT:    ## kill: def $al killed $al killed $eax
3075; KNL-NEXT:    retq
3076;
3077; SKX-LABEL: test_v8i1_sub:
3078; SKX:       ## %bb.0:
3079; SKX-NEXT:    kmovd %edi, %k0
3080; SKX-NEXT:    kmovd %esi, %k1
3081; SKX-NEXT:    kxorb %k1, %k0, %k0
3082; SKX-NEXT:    kmovd %k0, %eax
3083; SKX-NEXT:    ## kill: def $al killed $al killed $eax
3084; SKX-NEXT:    retq
3085;
3086; AVX512BW-LABEL: test_v8i1_sub:
3087; AVX512BW:       ## %bb.0:
3088; AVX512BW-NEXT:    kmovd %edi, %k0
3089; AVX512BW-NEXT:    kmovd %esi, %k1
3090; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
3091; AVX512BW-NEXT:    kmovd %k0, %eax
3092; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
3093; AVX512BW-NEXT:    retq
3094;
3095; AVX512DQ-LABEL: test_v8i1_sub:
3096; AVX512DQ:       ## %bb.0:
3097; AVX512DQ-NEXT:    kmovw %edi, %k0
3098; AVX512DQ-NEXT:    kmovw %esi, %k1
3099; AVX512DQ-NEXT:    kxorb %k1, %k0, %k0
3100; AVX512DQ-NEXT:    kmovw %k0, %eax
3101; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
3102; AVX512DQ-NEXT:    retq
3103;
3104; X86-LABEL: test_v8i1_sub:
3105; X86:       ## %bb.0:
3106; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
3107; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
3108; X86-NEXT:    kxorb %k1, %k0, %k0
3109; X86-NEXT:    kmovd %k0, %eax
3110; X86-NEXT:    ## kill: def $al killed $al killed $eax
3111; X86-NEXT:    retl
3112  %m0 = bitcast i8 %x to <8 x i1>
3113  %m1 = bitcast i8 %y to <8 x i1>
3114  %m2 = sub <8 x i1> %m0,  %m1
3115  %ret = bitcast <8 x i1> %m2 to i8
3116  ret i8 %ret
3117}
3118
3119define i8 @test_v8i1_mul(i8 %x, i8 %y) {
3120; KNL-LABEL: test_v8i1_mul:
3121; KNL:       ## %bb.0:
3122; KNL-NEXT:    kmovw %edi, %k0
3123; KNL-NEXT:    kmovw %esi, %k1
3124; KNL-NEXT:    kandw %k1, %k0, %k0
3125; KNL-NEXT:    kmovw %k0, %eax
3126; KNL-NEXT:    ## kill: def $al killed $al killed $eax
3127; KNL-NEXT:    retq
3128;
3129; SKX-LABEL: test_v8i1_mul:
3130; SKX:       ## %bb.0:
3131; SKX-NEXT:    kmovd %edi, %k0
3132; SKX-NEXT:    kmovd %esi, %k1
3133; SKX-NEXT:    kandb %k1, %k0, %k0
3134; SKX-NEXT:    kmovd %k0, %eax
3135; SKX-NEXT:    ## kill: def $al killed $al killed $eax
3136; SKX-NEXT:    retq
3137;
3138; AVX512BW-LABEL: test_v8i1_mul:
3139; AVX512BW:       ## %bb.0:
3140; AVX512BW-NEXT:    kmovd %edi, %k0
3141; AVX512BW-NEXT:    kmovd %esi, %k1
3142; AVX512BW-NEXT:    kandw %k1, %k0, %k0
3143; AVX512BW-NEXT:    kmovd %k0, %eax
3144; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
3145; AVX512BW-NEXT:    retq
3146;
3147; AVX512DQ-LABEL: test_v8i1_mul:
3148; AVX512DQ:       ## %bb.0:
3149; AVX512DQ-NEXT:    kmovw %edi, %k0
3150; AVX512DQ-NEXT:    kmovw %esi, %k1
3151; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
3152; AVX512DQ-NEXT:    kmovw %k0, %eax
3153; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
3154; AVX512DQ-NEXT:    retq
3155;
3156; X86-LABEL: test_v8i1_mul:
3157; X86:       ## %bb.0:
3158; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
3159; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
3160; X86-NEXT:    kandb %k1, %k0, %k0
3161; X86-NEXT:    kmovd %k0, %eax
3162; X86-NEXT:    ## kill: def $al killed $al killed $eax
3163; X86-NEXT:    retl
3164  %m0 = bitcast i8 %x to <8 x i1>
3165  %m1 = bitcast i8 %y to <8 x i1>
3166  %m2 = mul <8 x i1> %m0,  %m1
3167  %ret = bitcast <8 x i1> %m2 to i8
3168  ret i8 %ret
3169}
3170
3171; Make sure we don't emit a ktest for signed comparisons.
3172define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
3173; KNL-LABEL: ktest_signed:
3174; KNL:       ## %bb.0:
3175; KNL-NEXT:    pushq %rax
3176; KNL-NEXT:    .cfi_def_cfa_offset 16
3177; KNL-NEXT:    vporq %zmm1, %zmm0, %zmm0
3178; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3179; KNL-NEXT:    kmovw %k0, %eax
3180; KNL-NEXT:    testw %ax, %ax
3181; KNL-NEXT:    jle LBB64_1
3182; KNL-NEXT:  ## %bb.2: ## %bb.2
3183; KNL-NEXT:    popq %rax
3184; KNL-NEXT:    vzeroupper
3185; KNL-NEXT:    retq
3186; KNL-NEXT:  LBB64_1: ## %bb.1
3187; KNL-NEXT:    vzeroupper
3188; KNL-NEXT:    callq _foo
3189; KNL-NEXT:    popq %rax
3190; KNL-NEXT:    retq
3191;
3192; SKX-LABEL: ktest_signed:
3193; SKX:       ## %bb.0:
3194; SKX-NEXT:    pushq %rax
3195; SKX-NEXT:    .cfi_def_cfa_offset 16
3196; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0
3197; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3198; SKX-NEXT:    kmovd %k0, %eax
3199; SKX-NEXT:    testw %ax, %ax
3200; SKX-NEXT:    jle LBB64_1
3201; SKX-NEXT:  ## %bb.2: ## %bb.2
3202; SKX-NEXT:    popq %rax
3203; SKX-NEXT:    vzeroupper
3204; SKX-NEXT:    retq
3205; SKX-NEXT:  LBB64_1: ## %bb.1
3206; SKX-NEXT:    vzeroupper
3207; SKX-NEXT:    callq _foo
3208; SKX-NEXT:    popq %rax
3209; SKX-NEXT:    retq
3210;
3211; AVX512BW-LABEL: ktest_signed:
3212; AVX512BW:       ## %bb.0:
3213; AVX512BW-NEXT:    pushq %rax
3214; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
3215; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
3216; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3217; AVX512BW-NEXT:    kmovd %k0, %eax
3218; AVX512BW-NEXT:    testw %ax, %ax
3219; AVX512BW-NEXT:    jle LBB64_1
3220; AVX512BW-NEXT:  ## %bb.2: ## %bb.2
3221; AVX512BW-NEXT:    popq %rax
3222; AVX512BW-NEXT:    vzeroupper
3223; AVX512BW-NEXT:    retq
3224; AVX512BW-NEXT:  LBB64_1: ## %bb.1
3225; AVX512BW-NEXT:    vzeroupper
3226; AVX512BW-NEXT:    callq _foo
3227; AVX512BW-NEXT:    popq %rax
3228; AVX512BW-NEXT:    retq
3229;
3230; AVX512DQ-LABEL: ktest_signed:
3231; AVX512DQ:       ## %bb.0:
3232; AVX512DQ-NEXT:    pushq %rax
3233; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
3234; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
3235; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3236; AVX512DQ-NEXT:    kmovw %k0, %eax
3237; AVX512DQ-NEXT:    testw %ax, %ax
3238; AVX512DQ-NEXT:    jle LBB64_1
3239; AVX512DQ-NEXT:  ## %bb.2: ## %bb.2
3240; AVX512DQ-NEXT:    popq %rax
3241; AVX512DQ-NEXT:    vzeroupper
3242; AVX512DQ-NEXT:    retq
3243; AVX512DQ-NEXT:  LBB64_1: ## %bb.1
3244; AVX512DQ-NEXT:    vzeroupper
3245; AVX512DQ-NEXT:    callq _foo
3246; AVX512DQ-NEXT:    popq %rax
3247; AVX512DQ-NEXT:    retq
3248;
3249; X86-LABEL: ktest_signed:
3250; X86:       ## %bb.0:
3251; X86-NEXT:    subl $12, %esp
3252; X86-NEXT:    .cfi_def_cfa_offset 16
3253; X86-NEXT:    vporq %zmm1, %zmm0, %zmm0
3254; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3255; X86-NEXT:    kmovd %k0, %eax
3256; X86-NEXT:    testw %ax, %ax
3257; X86-NEXT:    jle LBB64_1
3258; X86-NEXT:  ## %bb.2: ## %bb.2
3259; X86-NEXT:    addl $12, %esp
3260; X86-NEXT:    vzeroupper
3261; X86-NEXT:    retl
3262; X86-NEXT:  LBB64_1: ## %bb.1
3263; X86-NEXT:    vzeroupper
3264; X86-NEXT:    calll _foo
3265; X86-NEXT:    addl $12, %esp
3266; X86-NEXT:    retl
3267  %a = icmp eq <16 x i32> %x, zeroinitializer
3268  %b = icmp eq <16 x i32> %y, zeroinitializer
3269  %c = and <16 x i1> %a, %b
3270  %d = bitcast <16 x i1> %c to i16
3271  %e = icmp sgt i16 %d, 0
3272  br i1 %e, label %bb.2, label %bb.1
3273bb.1:
3274  call void @foo()
3275  br label %bb.2
3276bb.2:
3277  ret void
3278}
3279declare void @foo()
3280
3281; Make sure we can use the C flag from kortest to check for all ones.
3282define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
3283; CHECK-LABEL: ktest_allones:
3284; CHECK:       ## %bb.0:
3285; CHECK-NEXT:    pushq %rax
3286; CHECK-NEXT:    .cfi_def_cfa_offset 16
3287; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0
3288; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3289; CHECK-NEXT:    kortestw %k0, %k0
3290; CHECK-NEXT:    jb LBB65_2
3291; CHECK-NEXT:  ## %bb.1: ## %bb.1
3292; CHECK-NEXT:    vzeroupper
3293; CHECK-NEXT:    callq _foo
3294; CHECK-NEXT:  LBB65_2: ## %bb.2
3295; CHECK-NEXT:    popq %rax
3296; CHECK-NEXT:    vzeroupper
3297; CHECK-NEXT:    retq
3298;
3299; X86-LABEL: ktest_allones:
3300; X86:       ## %bb.0:
3301; X86-NEXT:    subl $12, %esp
3302; X86-NEXT:    .cfi_def_cfa_offset 16
3303; X86-NEXT:    vporq %zmm1, %zmm0, %zmm0
3304; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
3305; X86-NEXT:    kortestw %k0, %k0
3306; X86-NEXT:    jb LBB65_2
3307; X86-NEXT:  ## %bb.1: ## %bb.1
3308; X86-NEXT:    vzeroupper
3309; X86-NEXT:    calll _foo
3310; X86-NEXT:  LBB65_2: ## %bb.2
3311; X86-NEXT:    addl $12, %esp
3312; X86-NEXT:    vzeroupper
3313; X86-NEXT:    retl
3314  %a = icmp eq <16 x i32> %x, zeroinitializer
3315  %b = icmp eq <16 x i32> %y, zeroinitializer
3316  %c = and <16 x i1> %a, %b
3317  %d = bitcast <16 x i1> %c to i16
3318  %e = icmp eq i16 %d, -1
3319  br i1 %e, label %bb.2, label %bb.1
3320bb.1:
3321  call void @foo()
3322  br label %bb.2
3323bb.2:
3324  ret void
3325}
3326
3327; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
3328; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
3329define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
3330; KNL-LABEL: mask_widening:
3331; KNL:       ## %bb.0: ## %entry
3332; KNL-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
3333; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
3334; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
3335; KNL-NEXT:    kshiftlw $12, %k0, %k0
3336; KNL-NEXT:    kshiftrw $12, %k0, %k1
3337; KNL-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3338; KNL-NEXT:    retq
3339;
3340; SKX-LABEL: mask_widening:
3341; SKX:       ## %bb.0: ## %entry
3342; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
3343; SKX-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3344; SKX-NEXT:    retq
3345;
3346; AVX512BW-LABEL: mask_widening:
3347; AVX512BW:       ## %bb.0: ## %entry
3348; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
3349; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
3350; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
3351; AVX512BW-NEXT:    kshiftlw $12, %k0, %k0
3352; AVX512BW-NEXT:    kshiftrw $12, %k0, %k1
3353; AVX512BW-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3354; AVX512BW-NEXT:    retq
3355;
3356; AVX512DQ-LABEL: mask_widening:
3357; AVX512DQ:       ## %bb.0: ## %entry
3358; AVX512DQ-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
3359; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
3360; AVX512DQ-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
3361; AVX512DQ-NEXT:    kshiftlw $12, %k0, %k0
3362; AVX512DQ-NEXT:    kshiftrw $12, %k0, %k1
3363; AVX512DQ-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
3364; AVX512DQ-NEXT:    retq
3365;
3366; X86-LABEL: mask_widening:
3367; X86:       ## %bb.0: ## %entry
3368; X86-NEXT:    pushl %ebp
3369; X86-NEXT:    .cfi_def_cfa_offset 8
3370; X86-NEXT:    .cfi_offset %ebp, -8
3371; X86-NEXT:    movl %esp, %ebp
3372; X86-NEXT:    .cfi_def_cfa_register %ebp
3373; X86-NEXT:    andl $-64, %esp
3374; X86-NEXT:    subl $64, %esp
3375; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
3376; X86-NEXT:    vmovdqa64 8(%ebp), %zmm0
3377; X86-NEXT:    vmovdqa32 72(%ebp), %zmm0 {%k1}
3378; X86-NEXT:    movl %ebp, %esp
3379; X86-NEXT:    popl %ebp
3380; X86-NEXT:    retl
3381entry:
3382  %0 = bitcast <2 x i64> %a to <4 x i32>
3383  %1 = bitcast <2 x i64> %b to <4 x i32>
3384  %2 = icmp eq <4 x i32> %0, %1
3385  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3386  %4 = bitcast <8 x i64> %f to <16 x i32>
3387  %5 = bitcast <8 x i64> %e to <16 x i32>
3388  %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
3389  %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
3390  %8 = bitcast <16 x i32> %7 to <8 x i64>
3391  ret <8 x i64> %8
3392}
3393
3394define void @store_v64i1_constant(<64 x i1>* %R) {
3395; CHECK-LABEL: store_v64i1_constant:
3396; CHECK:       ## %bb.0: ## %entry
3397; CHECK-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
3398; CHECK-NEXT:    movq %rax, (%rdi)
3399; CHECK-NEXT:    retq
3400;
3401; X86-LABEL: store_v64i1_constant:
3402; X86:       ## %bb.0: ## %entry
3403; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3404; X86-NEXT:    movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
3405; X86-NEXT:    movl $-4099, (%eax) ## imm = 0xEFFD
3406; X86-NEXT:    retl
3407entry:
3408  store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
3409  ret void
3410}
3411
3412define void @store_v2i1_constant(<2 x i1>* %R) {
3413; CHECK-LABEL: store_v2i1_constant:
3414; CHECK:       ## %bb.0: ## %entry
3415; CHECK-NEXT:    movb $1, (%rdi)
3416; CHECK-NEXT:    retq
3417;
3418; X86-LABEL: store_v2i1_constant:
3419; X86:       ## %bb.0: ## %entry
3420; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3421; X86-NEXT:    movb $1, (%eax)
3422; X86-NEXT:    retl
3423entry:
3424  store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
3425  ret void
3426}
3427
3428define void @store_v4i1_constant(<4 x i1>* %R) {
3429; CHECK-LABEL: store_v4i1_constant:
3430; CHECK:       ## %bb.0: ## %entry
3431; CHECK-NEXT:    movb $5, (%rdi)
3432; CHECK-NEXT:    retq
3433;
3434; X86-LABEL: store_v4i1_constant:
3435; X86:       ## %bb.0: ## %entry
3436; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3437; X86-NEXT:    movb $5, (%eax)
3438; X86-NEXT:    retl
3439entry:
3440  store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
3441  ret void
3442}
3443
3444; Make sure we bring the -1 constant into the mask domain.
3445define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
3446; CHECK-LABEL: mask_not_cast:
3447; CHECK:       ## %bb.0:
3448; CHECK-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
3449; CHECK-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
3450; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
3451; CHECK-NEXT:    vzeroupper
3452; CHECK-NEXT:    retq
3453;
3454; X86-LABEL: mask_not_cast:
3455; X86:       ## %bb.0:
3456; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3457; X86-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
3458; X86-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
3459; X86-NEXT:    vmovdqu32 %zmm0, (%eax) {%k1}
3460; X86-NEXT:    vzeroupper
3461; X86-NEXT:    retl
3462  %6 = and <8 x i64> %2, %1
3463  %7 = bitcast <8 x i64> %6 to <16 x i32>
3464  %8 = icmp ne <16 x i32> %7, zeroinitializer
3465  %9 = bitcast <16 x i1> %8 to i16
3466  %10 = bitcast <8 x i64> %3 to <16 x i32>
3467  %11 = bitcast <8 x i64> %4 to <16 x i32>
3468  %12 = icmp ule <16 x i32> %10, %11
3469  %13 = bitcast <16 x i1> %12 to i16
3470  %14 = xor i16 %13, -1
3471  %15 = and i16 %14, %9
3472  %16 = bitcast <8 x i64> %1 to <16 x i32>
3473  %17 = bitcast i8* %0 to <16 x i32>*
3474  %18 = bitcast i16 %15 to <16 x i1>
3475  tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
3476  ret void
3477}
3478declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
3479