• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4
5define i16 @mask16(i16 %x) {
6; CHECK-LABEL: mask16:
7; CHECK:       ## BB#0:
8; CHECK-NEXT:    kmovw %edi, %k0
9; CHECK-NEXT:    knotw %k0, %k0
10; CHECK-NEXT:    kmovw %k0, %eax
11; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
12; CHECK-NEXT:    retq
13  %m0 = bitcast i16 %x to <16 x i1>
14  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
15  %ret = bitcast <16 x i1> %m1 to i16
16  ret i16 %ret
17}
18
19define i32 @mask16_zext(i16 %x) {
20; CHECK-LABEL: mask16_zext:
21; CHECK:       ## BB#0:
22; CHECK-NEXT:    kmovw %edi, %k0
23; CHECK-NEXT:    knotw %k0, %k0
24; CHECK-NEXT:    kmovw %k0, %eax
25; CHECK-NEXT:    retq
26  %m0 = bitcast i16 %x to <16 x i1>
27  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
28  %m2 = bitcast <16 x i1> %m1 to i16
29  %ret = zext i16 %m2 to i32
30  ret i32 %ret
31}
32
33define i8 @mask8(i8 %x) {
34; KNL-LABEL: mask8:
35; KNL:       ## BB#0:
36; KNL-NEXT:    kmovw %edi, %k0
37; KNL-NEXT:    knotw %k0, %k0
38; KNL-NEXT:    kmovw %k0, %eax
39; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
40; KNL-NEXT:    retq
41;
42; SKX-LABEL: mask8:
43; SKX:       ## BB#0:
44; SKX-NEXT:    kmovb %edi, %k0
45; SKX-NEXT:    knotb %k0, %k0
46; SKX-NEXT:    kmovb %k0, %eax
47; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
48; SKX-NEXT:    retq
49  %m0 = bitcast i8 %x to <8 x i1>
50  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
51  %ret = bitcast <8 x i1> %m1 to i8
52  ret i8 %ret
53}
54
55define i32 @mask8_zext(i8 %x) {
56; KNL-LABEL: mask8_zext:
57; KNL:       ## BB#0:
58; KNL-NEXT:    kmovw %edi, %k0
59; KNL-NEXT:    knotw %k0, %k0
60; KNL-NEXT:    kmovw %k0, %eax
61; KNL-NEXT:    retq
62;
63; SKX-LABEL: mask8_zext:
64; SKX:       ## BB#0:
65; SKX-NEXT:    kmovb %edi, %k0
66; SKX-NEXT:    knotb %k0, %k0
67; SKX-NEXT:    kmovb %k0, %eax
68; SKX-NEXT:    retq
69  %m0 = bitcast i8 %x to <8 x i1>
70  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
71  %m2 = bitcast <8 x i1> %m1 to i8
72  %ret = zext i8 %m2 to i32
73  ret i32 %ret
74}
75
76define void @mask16_mem(i16* %ptr) {
77; CHECK-LABEL: mask16_mem:
78; CHECK:       ## BB#0:
79; CHECK-NEXT:    kmovw (%rdi), %k0
80; CHECK-NEXT:    knotw %k0, %k0
81; CHECK-NEXT:    kmovw %k0, (%rdi)
82; CHECK-NEXT:    retq
83  %x = load i16, i16* %ptr, align 4
84  %m0 = bitcast i16 %x to <16 x i1>
85  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
86  %ret = bitcast <16 x i1> %m1 to i16
87  store i16 %ret, i16* %ptr, align 4
88  ret void
89}
90
91define void @mask8_mem(i8* %ptr) {
92; KNL-LABEL: mask8_mem:
93; KNL:       ## BB#0:
94; KNL-NEXT:    movzbl (%rdi), %eax
95; KNL-NEXT:    kmovw %eax, %k0
96; KNL-NEXT:    knotw %k0, %k0
97; KNL-NEXT:    kmovw %k0, %eax
98; KNL-NEXT:    movb %al, (%rdi)
99; KNL-NEXT:    retq
100;
101; SKX-LABEL: mask8_mem:
102; SKX:       ## BB#0:
103; SKX-NEXT:    kmovb (%rdi), %k0
104; SKX-NEXT:    knotb %k0, %k0
105; SKX-NEXT:    kmovb %k0, (%rdi)
106; SKX-NEXT:    retq
107  %x = load i8, i8* %ptr, align 4
108  %m0 = bitcast i8 %x to <8 x i1>
109  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
110  %ret = bitcast <8 x i1> %m1 to i8
111  store i8 %ret, i8* %ptr, align 4
112  ret void
113}
114
115define i16 @mand16(i16 %x, i16 %y) {
116; CHECK-LABEL: mand16:
117; CHECK:       ## BB#0:
118; CHECK-NEXT:    movl %edi, %eax
119; CHECK-NEXT:    xorl %esi, %eax
120; CHECK-NEXT:    andl %esi, %edi
121; CHECK-NEXT:    orl %eax, %edi
122; CHECK-NEXT:    movl %edi, %eax
123; CHECK-NEXT:    retq
124  %ma = bitcast i16 %x to <16 x i1>
125  %mb = bitcast i16 %y to <16 x i1>
126  %mc = and <16 x i1> %ma, %mb
127  %md = xor <16 x i1> %ma, %mb
128  %me = or <16 x i1> %mc, %md
129  %ret = bitcast <16 x i1> %me to i16
130  ret i16 %ret
131}
132
133define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
134; CHECK-LABEL: mand16_mem:
135; CHECK:       ## BB#0:
136; CHECK-NEXT:    kmovw (%rdi), %k0
137; CHECK-NEXT:    kmovw (%rsi), %k1
138; CHECK-NEXT:    kandw %k1, %k0, %k2
139; CHECK-NEXT:    kxorw %k1, %k0, %k0
140; CHECK-NEXT:    korw %k0, %k2, %k0
141; CHECK-NEXT:    kmovw %k0, %eax
142; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
143; CHECK-NEXT:    retq
144  %ma = load <16 x i1>, <16 x i1>* %x
145  %mb = load <16 x i1>, <16 x i1>* %y
146  %mc = and <16 x i1> %ma, %mb
147  %md = xor <16 x i1> %ma, %mb
148  %me = or <16 x i1> %mc, %md
149  %ret = bitcast <16 x i1> %me to i16
150  ret i16 %ret
151}
152
153define i8 @shuf_test1(i16 %v) nounwind {
154; KNL-LABEL: shuf_test1:
155; KNL:       ## BB#0:
156; KNL-NEXT:    kmovw %edi, %k0
157; KNL-NEXT:    kshiftrw $8, %k0, %k0
158; KNL-NEXT:    kmovw %k0, %eax
159; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
160; KNL-NEXT:    retq
161;
162; SKX-LABEL: shuf_test1:
163; SKX:       ## BB#0:
164; SKX-NEXT:    kmovw %edi, %k0
165; SKX-NEXT:    kshiftrw $8, %k0, %k0
166; SKX-NEXT:    kmovb %k0, %eax
167; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
168; SKX-NEXT:    retq
169   %v1 = bitcast i16 %v to <16 x i1>
170   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
171   %mask1 = bitcast <8 x i1> %mask to i8
172   ret i8 %mask1
173}
174
175define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
176; CHECK-LABEL: zext_test1:
177; CHECK:       ## BB#0:
178; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
179; CHECK-NEXT:    kshiftlw $10, %k0, %k0
180; CHECK-NEXT:    kshiftrw $15, %k0, %k0
181; CHECK-NEXT:    kmovw %k0, %eax
182; CHECK-NEXT:    retq
183  %cmp_res = icmp ugt <16 x i32> %a, %b
184  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
185  %res = zext i1 %cmp_res.i1 to i32
186  ret i32 %res
187}
188
189define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
190; CHECK-LABEL: zext_test2:
191; CHECK:       ## BB#0:
192; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
193; CHECK-NEXT:    kshiftlw $10, %k0, %k0
194; CHECK-NEXT:    kshiftrw $15, %k0, %k0
195; CHECK-NEXT:    kmovw %k0, %eax
196; CHECK-NEXT:    retq
197  %cmp_res = icmp ugt <16 x i32> %a, %b
198  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
199  %res = zext i1 %cmp_res.i1 to i16
200  ret i16 %res
201}
202
203define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
204; CHECK-LABEL: zext_test3:
205; CHECK:       ## BB#0:
206; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
207; CHECK-NEXT:    kshiftlw $10, %k0, %k0
208; CHECK-NEXT:    kshiftrw $15, %k0, %k0
209; CHECK-NEXT:    kmovw %k0, %eax
210; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %AX<kill>
211; CHECK-NEXT:    retq
212  %cmp_res = icmp ugt <16 x i32> %a, %b
213  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
214  %res = zext i1 %cmp_res.i1 to i8
215  ret i8 %res
216}
217
218define i8 @conv1(<8 x i1>* %R) {
219; KNL-LABEL: conv1:
220; KNL:       ## BB#0: ## %entry
221; KNL-NEXT:    kxnorw %k0, %k0, %k0
222; KNL-NEXT:    kmovw %k0, %eax
223; KNL-NEXT:    movb %al, (%rdi)
224; KNL-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
225; KNL-NEXT:    movb $-2, %al
226; KNL-NEXT:    retq
227;
228; SKX-LABEL: conv1:
229; SKX:       ## BB#0: ## %entry
230; SKX-NEXT:    kxnorw %k0, %k0, %k0
231; SKX-NEXT:    kmovb %k0, (%rdi)
232; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
233; SKX-NEXT:    movb $-2, %al
234; SKX-NEXT:    retq
235entry:
236  store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
237
238  %maskPtr = alloca <8 x i1>
239  store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
240  %mask = load <8 x i1>, <8 x i1>* %maskPtr
241  %mask_convert = bitcast <8 x i1> %mask to i8
242  ret i8 %mask_convert
243}
244
245define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
246; KNL-LABEL: test4:
247; KNL:       ## BB#0:
248; KNL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
249; KNL-NEXT:    vpmovqd %zmm0, %ymm0
250; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
251; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
252; KNL-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm1
253; KNL-NEXT:    vpmovqd %zmm1, %ymm1
254; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
255; KNL-NEXT:    vpsrad $31, %xmm1, %xmm1
256; KNL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
257; KNL-NEXT:    retq
258;
259; SKX-LABEL: test4:
260; SKX:       ## BB#0:
261; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0
262; SKX-NEXT:    knotw %k0, %k1
263; SKX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
264; SKX-NEXT:    vpmovm2d %k0, %xmm0
265; SKX-NEXT:    retq
266  %x_gt_y = icmp sgt <4 x i64> %x, %y
267  %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
268  %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
269  %resse = sext <4 x i1>%res to <4 x i32>
270  ret <4 x i32> %resse
271}
272
273define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
274; KNL-LABEL: test5:
275; KNL:       ## BB#0:
276; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
277; KNL-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm1
278; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
279; KNL-NEXT:    retq
280;
281; SKX-LABEL: test5:
282; SKX:       ## BB#0:
283; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
284; SKX-NEXT:    knotw %k0, %k1
285; SKX-NEXT:    vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
286; SKX-NEXT:    vpmovm2q %k0, %xmm0
287; SKX-NEXT:    retq
288  %x_gt_y = icmp slt <2 x i64> %x, %y
289  %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
290  %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
291  %resse = sext <2 x i1>%res to <2 x i64>
292  ret <2 x i64> %resse
293}define void @test6(<16 x i1> %mask)  {
294allocas:
295  %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
296  %b = bitcast <16 x i1> %a to i16
297  %c = icmp eq i16 %b, 0
298  br i1 %c, label %true, label %false
299
300true:
301  ret void
302
303false:
304  ret void
305}
306define void @test7(<8 x i1> %mask)  {
307; KNL-LABEL: test7:
308; KNL:       ## BB#0: ## %allocas
309; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
310; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
311; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
312; KNL-NEXT:    movb $85, %al
313; KNL-NEXT:    kmovw %eax, %k1
314; KNL-NEXT:    korw %k1, %k0, %k0
315; KNL-NEXT:    kmovw %k0, %eax
316; KNL-NEXT:    testb %al, %al
317; KNL-NEXT:    retq
318;
319; SKX-LABEL: test7:
320; SKX:       ## BB#0: ## %allocas
321; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
322; SKX-NEXT:    vpmovw2m %xmm0, %k0
323; SKX-NEXT:    movb $85, %al
324; SKX-NEXT:    kmovb %eax, %k1
325; SKX-NEXT:    korb %k1, %k0, %k0
326; SKX-NEXT:    ktestb %k0, %k0
327; SKX-NEXT:    retq
328allocas:
329  %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
330  %b = bitcast <8 x i1> %a to i8
331  %c = icmp eq i8 %b, 0
332  br i1 %c, label %true, label %false
333
334true:
335  ret void
336
337false:
338  ret void
339}
340define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
341; KNL-LABEL: test8:
342; KNL:       ## BB#0:
343; KNL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
344; KNL-NEXT:    cmpl %esi, %edi
345; KNL-NEXT:    jg LBB17_1
346; KNL-NEXT:  ## BB#2:
347; KNL-NEXT:    vpcmpltud %zmm2, %zmm1, %k1
348; KNL-NEXT:    jmp LBB17_3
349; KNL-NEXT:  LBB17_1:
350; KNL-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1
351; KNL-NEXT:  LBB17_3:
352; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
353; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
354; KNL-NEXT:    vpmovdb %zmm0, %xmm0
355; KNL-NEXT:    retq
356;
357; SKX-LABEL: test8:
358; SKX:       ## BB#0:
359; SKX-NEXT:    vpxord %zmm2, %zmm2, %zmm2
360; SKX-NEXT:    cmpl %esi, %edi
361; SKX-NEXT:    jg LBB17_1
362; SKX-NEXT:  ## BB#2:
363; SKX-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
364; SKX-NEXT:    vpmovm2b %k0, %xmm0
365; SKX-NEXT:    retq
366; SKX-NEXT:  LBB17_1:
367; SKX-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
368; SKX-NEXT:    vpmovm2b %k0, %xmm0
369; SKX-NEXT:    retq
370  %cond = icmp sgt i32 %a1, %b1
371  %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
372  %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
373  %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
374  %res = sext <16 x i1> %mix to <16 x i8>
375  ret <16 x i8> %res
376}
377define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
378; KNL-LABEL: test9:
379; KNL:       ## BB#0:
380; KNL-NEXT:    cmpl %esi, %edi
381; KNL-NEXT:    jg LBB18_1
382; KNL-NEXT:  ## BB#2:
383; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
384; KNL-NEXT:    jmp LBB18_3
385; KNL-NEXT:  LBB18_1:
386; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
387; KNL-NEXT:  LBB18_3:
388; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
389; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
390; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
391; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
392; KNL-NEXT:    vpmovdb %zmm0, %xmm0
393; KNL-NEXT:    retq
394;
395; SKX-LABEL: test9:
396; SKX:       ## BB#0:
397; SKX-NEXT:    cmpl %esi, %edi
398; SKX-NEXT:    jg LBB18_1
399; SKX-NEXT:  ## BB#2:
400; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0
401; SKX-NEXT:    jmp LBB18_3
402; SKX-NEXT:  LBB18_1:
403; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
404; SKX-NEXT:  LBB18_3:
405; SKX-NEXT:    vpmovb2m %xmm0, %k0
406; SKX-NEXT:    vpmovm2b %k0, %xmm0
407; SKX-NEXT:    retq
408  %mask = icmp sgt i32 %a1, %b1
409  %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
410  ret <16 x i1>%c
411}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
412  %mask = icmp sgt i32 %a1, %b1
413  %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
414  ret <8 x i1>%c
415}
416
417define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
418; KNL-LABEL: test11:
419; KNL:       ## BB#0:
420; KNL-NEXT:    cmpl %esi, %edi
421; KNL-NEXT:    jg LBB20_2
422; KNL-NEXT:  ## BB#1:
423; KNL-NEXT:    vmovaps %zmm1, %zmm0
424; KNL-NEXT:  LBB20_2:
425; KNL-NEXT:    retq
426;
427; SKX-LABEL: test11:
428; SKX:       ## BB#0:
429; SKX-NEXT:    cmpl %esi, %edi
430; SKX-NEXT:    jg LBB20_1
431; SKX-NEXT:  ## BB#2:
432; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
433; SKX-NEXT:    jmp LBB20_3
434; SKX-NEXT:  LBB20_1:
435; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
436; SKX-NEXT:  LBB20_3:
437; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
438; SKX-NEXT:    vpmovm2d %k0, %xmm0
439; SKX-NEXT:    retq
440  %mask = icmp sgt i32 %a1, %b1
441  %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
442  ret <4 x i1>%c
443}
444
445define i32 @test12(i32 %x, i32 %y)  {
446; CHECK-LABEL: test12:
447; CHECK:       ## BB#0:
448; CHECK-NEXT:    movl %edi, %eax
449; CHECK-NEXT:    retq
450  %a = bitcast i16 21845 to <16 x i1>
451  %b = extractelement <16 x i1> %a, i32 0
452  %c = select i1 %b, i32 %x, i32 %y
453  ret i32 %c
454}
455
456define i32 @test13(i32 %x, i32 %y)  {
457; CHECK-LABEL: test13:
458; CHECK:       ## BB#0:
459; CHECK-NEXT:    movl %esi, %eax
460; CHECK-NEXT:    retq
461  %a = bitcast i16 21845 to <16 x i1>
462  %b = extractelement <16 x i1> %a, i32 3
463  %c = select i1 %b, i32 %x, i32 %y
464  ret i32 %c
465}define <4 x i1> @test14()  {
466  %a = bitcast i16 21845 to <16 x i1>
467  %b = extractelement <16 x i1> %a, i32 2
468  %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
469  ret <4 x i1> %c
470}
471
472define <16 x i1> @test15(i32 %x, i32 %y)  {
473; KNL-LABEL: test15:
474; KNL:       ## BB#0:
475; KNL-NEXT:    cmpl %esi, %edi
476; KNL-NEXT:    movw $21845, %ax ## imm = 0x5555
477; KNL-NEXT:    movw $1, %cx
478; KNL-NEXT:    cmovgw %ax, %cx
479; KNL-NEXT:    kmovw %ecx, %k1
480; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
481; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
482; KNL-NEXT:    vpmovdb %zmm0, %xmm0
483; KNL-NEXT:    retq
484;
485; SKX-LABEL: test15:
486; SKX:       ## BB#0:
487; SKX-NEXT:    cmpl %esi, %edi
488; SKX-NEXT:    movw $21845, %ax ## imm = 0x5555
489; SKX-NEXT:    movw $1, %cx
490; SKX-NEXT:    cmovgw %ax, %cx
491; SKX-NEXT:    kmovw %ecx, %k0
492; SKX-NEXT:    vpmovm2b %k0, %xmm0
493; SKX-NEXT:    retq
494  %a = bitcast i16 21845 to <16 x i1>
495  %b = bitcast i16 1 to <16 x i1>
496  %mask = icmp sgt i32 %x, %y
497  %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
498  ret <16 x i1> %c
499}
500
501define <64 x i8> @test16(i64 %x) {
502;
503; KNL-LABEL: test16:
504; KNL:       ## BB#0:
505; KNL-NEXT:    pushq %rbp
506; KNL-NEXT:  Ltmp0:
507; KNL-NEXT:    .cfi_def_cfa_offset 16
508; KNL-NEXT:  Ltmp1:
509; KNL-NEXT:    .cfi_offset %rbp, -16
510; KNL-NEXT:    movq %rsp, %rbp
511; KNL-NEXT:  Ltmp2:
512; KNL-NEXT:    .cfi_def_cfa_register %rbp
513; KNL-NEXT:    andq $-32, %rsp
514; KNL-NEXT:    subq $64, %rsp
515; KNL-NEXT:    movl %edi, (%rsp)
516; KNL-NEXT:    shrq $32, %rdi
517; KNL-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
518; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
519; KNL-NEXT:    kmovw (%rsp), %k1
520; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
521; KNL-NEXT:    vpmovdb %zmm1, %xmm1
522; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
523; KNL-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z}
524; KNL-NEXT:    vpmovdb %zmm2, %xmm2
525; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm2
526; KNL-NEXT:    movl $1, %eax
527; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
528; KNL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
529; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
530; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
531; KNL-NEXT:    vpmovdb %zmm1, %xmm1
532; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
533; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
534; KNL-NEXT:    vpmovdb %zmm0, %xmm0
535; KNL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
536; KNL-NEXT:    vpsllw $7, %ymm2, %ymm0
537; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
538; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
539; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
540; KNL-NEXT:    movq %rbp, %rsp
541; KNL-NEXT:    popq %rbp
542; KNL-NEXT:    retq
543;
544; SKX-LABEL: test16:
545; SKX:       ## BB#0:
546; SKX-NEXT:    kmovq %rdi, %k0
547; SKX-NEXT:    kxnorw %k0, %k0, %k1
548; SKX-NEXT:    kshiftrw $15, %k1, %k1
549; SKX-NEXT:    kshiftlq $5, %k1, %k1
550; SKX-NEXT:    korq %k1, %k0, %k0
551; SKX-NEXT:    vpmovm2b %k0, %zmm0
552; SKX-NEXT:    retq
553  %a = bitcast i64 %x to <64 x i1>
554  %b = insertelement <64 x i1>%a, i1 true, i32 5
555  %c = sext <64 x i1>%b to <64 x i8>
556  ret <64 x i8>%c
557}
558
559define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
560;
561; KNL-LABEL: test17:
562; KNL:       ## BB#0:
563; KNL-NEXT:    pushq %rbp
564; KNL-NEXT:  Ltmp3:
565; KNL-NEXT:    .cfi_def_cfa_offset 16
566; KNL-NEXT:  Ltmp4:
567; KNL-NEXT:    .cfi_offset %rbp, -16
568; KNL-NEXT:    movq %rsp, %rbp
569; KNL-NEXT:  Ltmp5:
570; KNL-NEXT:    .cfi_def_cfa_register %rbp
571; KNL-NEXT:    andq $-32, %rsp
572; KNL-NEXT:    subq $64, %rsp
573; KNL-NEXT:    movl %edi, (%rsp)
574; KNL-NEXT:    shrq $32, %rdi
575; KNL-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
576; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
577; KNL-NEXT:    kmovw (%rsp), %k1
578; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
579; KNL-NEXT:    vpmovdb %zmm0, %xmm0
580; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
581; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
582; KNL-NEXT:    vpmovdb %zmm2, %xmm2
583; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
584; KNL-NEXT:    xorl %eax, %eax
585; KNL-NEXT:    cmpl %edx, %esi
586; KNL-NEXT:    setg %al
587; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
588; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
589; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
590; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
591; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
592; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
593; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
594; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
595; KNL-NEXT:    vpmovdb %zmm2, %xmm2
596; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
597; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
598; KNL-NEXT:    vpmovdb %zmm1, %xmm1
599; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
600; KNL-NEXT:    movq %rbp, %rsp
601; KNL-NEXT:    popq %rbp
602; KNL-NEXT:    retq
603;
604; SKX-LABEL: test17:
605; SKX:       ## BB#0:
606; SKX-NEXT:    kmovq %rdi, %k0
607; SKX-NEXT:    cmpl %edx, %esi
608; SKX-NEXT:    setg %al
609; SKX-NEXT:    kmovw %eax, %k1
610; SKX-NEXT:    kshiftlq $5, %k1, %k1
611; SKX-NEXT:    korq %k1, %k0, %k0
612; SKX-NEXT:    vpmovm2b %k0, %zmm0
613; SKX-NEXT:    retq
614  %a = bitcast i64 %x to <64 x i1>
615  %b = icmp sgt i32 %y, %z
616  %c = insertelement <64 x i1>%a, i1 %b, i32 5
617  %d = sext <64 x i1>%c to <64 x i8>
618  ret <64 x i8>%d
619}
620
621define <8 x i1> @test18(i8 %a, i16 %y) {
622; KNL-LABEL: test18:
623; KNL:       ## BB#0:
624; KNL-NEXT:    kmovw %edi, %k0
625; KNL-NEXT:    kmovw %esi, %k1
626; KNL-NEXT:    kshiftlw $7, %k1, %k2
627; KNL-NEXT:    kshiftrw $15, %k2, %k2
628; KNL-NEXT:    kshiftlw $6, %k1, %k1
629; KNL-NEXT:    kshiftrw $15, %k1, %k1
630; KNL-NEXT:    kshiftlw $6, %k1, %k1
631; KNL-NEXT:    korw %k1, %k0, %k0
632; KNL-NEXT:    kshiftlw $7, %k2, %k1
633; KNL-NEXT:    korw %k1, %k0, %k1
634; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
635; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
636; KNL-NEXT:    vpmovqw %zmm0, %xmm0
637; KNL-NEXT:    retq
638;
639; SKX-LABEL: test18:
640; SKX:       ## BB#0:
641; SKX-NEXT:    kmovb %edi, %k0
642; SKX-NEXT:    kmovw %esi, %k1
643; SKX-NEXT:    kshiftlw $6, %k1, %k2
644; SKX-NEXT:    kshiftrw $15, %k2, %k2
645; SKX-NEXT:    kshiftlw $7, %k1, %k1
646; SKX-NEXT:    kshiftrw $15, %k1, %k1
647; SKX-NEXT:    kshiftlb $7, %k1, %k1
648; SKX-NEXT:    kshiftlb $6, %k2, %k2
649; SKX-NEXT:    korb %k2, %k0, %k0
650; SKX-NEXT:    korb %k1, %k0, %k0
651; SKX-NEXT:    vpmovm2w %k0, %xmm0
652; SKX-NEXT:    retq
653  %b = bitcast i8 %a to <8 x i1>
654  %b1 = bitcast i16 %y to <16 x i1>
655  %el1 = extractelement <16 x i1>%b1, i32 8
656  %el2 = extractelement <16 x i1>%b1, i32 9
657  %c = insertelement <8 x i1>%b, i1 %el1, i32 7
658  %d = insertelement <8 x i1>%c, i1 %el2, i32 6
659  ret <8 x i1>%d
660}
661define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
662; KNL-LABEL: test21:
663; KNL:       ## BB#0:
664; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
665; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
666; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
667; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
668; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm2
669; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
670; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
671; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
672; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
673; KNL-NEXT:    retq
674;
675; SKX-LABEL: test21:
676; SKX:       ## BB#0:
677; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
678; SKX-NEXT:    vpmovb2m %ymm1, %k1
679; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
680; SKX-NEXT:    retq
681  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
682  ret <32 x i16> %ret
683}
684
685define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
686; KNL-LABEL: test22:
687; KNL:       ## BB#0:
688; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
689; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
690; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
691; KNL-NEXT:    kmovw %k0, %eax
692; KNL-NEXT:    movb %al, (%rdi)
693; KNL-NEXT:    retq
694;
695; SKX-LABEL: test22:
696; SKX:       ## BB#0:
697; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
698; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
699; SKX-NEXT:    kmovb %k0, (%rdi)
700; SKX-NEXT:    retq
701  store <4 x i1> %a, <4 x i1>* %addr
702  ret void
703}
704
705define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
706; KNL-LABEL: test23:
707; KNL:       ## BB#0:
708; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
709; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
710; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
711; KNL-NEXT:    kmovw %k0, %eax
712; KNL-NEXT:    movb %al, (%rdi)
713; KNL-NEXT:    retq
714;
715; SKX-LABEL: test23:
716; SKX:       ## BB#0:
717; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
718; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0
719; SKX-NEXT:    kmovb %k0, (%rdi)
720; SKX-NEXT:    retq
721  store <2 x i1> %a, <2 x i1>* %addr
722  ret void
723}
724
725define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
726; KNL-LABEL: store_v1i1:
727; KNL:       ## BB#0:
728; KNL-NEXT:    andl $1, %edi
729; KNL-NEXT:    kmovw %edi, %k0
730; KNL-NEXT:    kxnorw %k0, %k0, %k1
731; KNL-NEXT:    kshiftrw $15, %k1, %k1
732; KNL-NEXT:    kxorw %k1, %k0, %k0
733; KNL-NEXT:    kmovw %k0, %eax
734; KNL-NEXT:    movb %al, (%rsi)
735; KNL-NEXT:    retq
736;
737; SKX-LABEL: store_v1i1:
738; SKX:       ## BB#0:
739; SKX-NEXT:    andl $1, %edi
740; SKX-NEXT:    kmovw %edi, %k0
741; SKX-NEXT:    kxnorw %k0, %k0, %k1
742; SKX-NEXT:    kshiftrw $15, %k1, %k1
743; SKX-NEXT:    kxorw %k1, %k0, %k0
744; SKX-NEXT:    kmovb %k0, (%rsi)
745; SKX-NEXT:    retq
746  %x = xor <1 x i1> %c, <i1 1>
747  store <1 x i1> %x, <1 x i1>*  %ptr, align 4
748  ret void
749}
750
751define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
752; KNL-LABEL: store_v2i1:
753; KNL:       ## BB#0:
754; KNL-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
755; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
756; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
757; KNL-NEXT:    kmovw %k0, %eax
758; KNL-NEXT:    movb %al, (%rdi)
759; KNL-NEXT:    retq
760;
761; SKX-LABEL: store_v2i1:
762; SKX:       ## BB#0:
763; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
764; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0
765; SKX-NEXT:    knotw %k0, %k0
766; SKX-NEXT:    kmovb %k0, (%rdi)
767; SKX-NEXT:    retq
768  %x = xor <2 x i1> %c, <i1 1, i1 1>
769  store <2 x i1> %x, <2 x i1>*  %ptr, align 4
770  ret void
771}
772
773define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
774; KNL-LABEL: store_v4i1:
775; KNL:       ## BB#0:
776; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
777; KNL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
778; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
779; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
780; KNL-NEXT:    kmovw %k0, %eax
781; KNL-NEXT:    movb %al, (%rdi)
782; KNL-NEXT:    retq
783;
784; SKX-LABEL: store_v4i1:
785; SKX:       ## BB#0:
786; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
787; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
788; SKX-NEXT:    knotw %k0, %k0
789; SKX-NEXT:    kmovb %k0, (%rdi)
790; SKX-NEXT:    retq
791  %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
792  store <4 x i1> %x, <4 x i1>*  %ptr, align 4
793  ret void
794}
795
796define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
797; KNL-LABEL: store_v8i1:
798; KNL:       ## BB#0:
799; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
800; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
801; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
802; KNL-NEXT:    knotw %k0, %k0
803; KNL-NEXT:    kmovw %k0, %eax
804; KNL-NEXT:    movb %al, (%rdi)
805; KNL-NEXT:    retq
806;
807; SKX-LABEL: store_v8i1:
808; SKX:       ## BB#0:
809; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
810; SKX-NEXT:    vpmovw2m %xmm0, %k0
811; SKX-NEXT:    knotb %k0, %k0
812; SKX-NEXT:    kmovb %k0, (%rdi)
813; SKX-NEXT:    retq
814  %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
815  store <8 x i1> %x, <8 x i1>*  %ptr, align 4
816  ret void
817}
818
819define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
820; KNL-LABEL: store_v16i1:
821; KNL:       ## BB#0:
822; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
823; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
824; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
825; KNL-NEXT:    knotw %k0, %k0
826; KNL-NEXT:    kmovw %k0, (%rdi)
827; KNL-NEXT:    retq
828;
829; SKX-LABEL: store_v16i1:
830; SKX:       ## BB#0:
831; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
832; SKX-NEXT:    vpmovb2m %xmm0, %k0
833; SKX-NEXT:    knotw %k0, %k0
834; SKX-NEXT:    kmovw %k0, (%rdi)
835; SKX-NEXT:    retq
836  %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
837  store <16 x i1> %x, <16 x i1>*  %ptr, align 4
838  ret void
839}
840
841;void f2(int);
842;void f1(int c)
843;{
844;  static int v = 0;
845;  if (v == 0)
846;    v = 1;
847;  else
848;    v = 0;
849;  f2(v);
850;}
851
852@f1.v = internal unnamed_addr global i1 false, align 4
853
854define void @f1(i32 %c) {
855; KNL-LABEL: f1:
856; KNL:       ## BB#0: ## %entry
857; KNL-NEXT:    movzbl {{.*}}(%rip), %edi
858; KNL-NEXT:    movl %edi, %eax
859; KNL-NEXT:    andl $1, %eax
860; KNL-NEXT:    kmovw %eax, %k0
861; KNL-NEXT:    kxnorw %k0, %k0, %k1
862; KNL-NEXT:    kshiftrw $15, %k1, %k1
863; KNL-NEXT:    kxorw %k1, %k0, %k0
864; KNL-NEXT:    kmovw %k0, %eax
865; KNL-NEXT:    movb %al, {{.*}}(%rip)
866; KNL-NEXT:    xorl $1, %edi
867; KNL-NEXT:    jmp _f2 ## TAILCALL
868;
869; SKX-LABEL: f1:
870; SKX:       ## BB#0: ## %entry
871; SKX-NEXT:    movzbl {{.*}}(%rip), %edi
872; SKX-NEXT:    movl %edi, %eax
873; SKX-NEXT:    andl $1, %eax
874; SKX-NEXT:    kmovw %eax, %k0
875; SKX-NEXT:    kxnorw %k0, %k0, %k1
876; SKX-NEXT:    kshiftrw $15, %k1, %k1
877; SKX-NEXT:    kxorw %k1, %k0, %k0
878; SKX-NEXT:    kmovb %k0, {{.*}}(%rip)
879; SKX-NEXT:    xorl $1, %edi
880; SKX-NEXT:    jmp _f2 ## TAILCALL
881entry:
882  %.b1 = load i1, i1* @f1.v, align 4
883  %not..b1 = xor i1 %.b1, true
884  store i1 %not..b1, i1* @f1.v, align 4
885  %0 = zext i1 %not..b1 to i32
886  tail call void @f2(i32 %0) #2
887  ret void
888}
889
890declare void @f2(i32) #1
891
892define void @store_i16_i1(i16 %x, i1 *%y) {
893; CHECK-LABEL: store_i16_i1:
894; CHECK:       ## BB#0:
895; CHECK-NEXT:    andl $1, %edi
896; CHECK-NEXT:    movb %dil, (%rsi)
897; CHECK-NEXT:    retq
898  %c = trunc i16 %x to i1
899  store i1 %c, i1* %y
900  ret void
901}
902
903define void @store_i8_i1(i8 %x, i1 *%y) {
904; CHECK-LABEL: store_i8_i1:
905; CHECK:       ## BB#0:
906; CHECK-NEXT:    andl $1, %edi
907; CHECK-NEXT:    movb %dil, (%rsi)
908; CHECK-NEXT:    retq
909  %c = trunc i8 %x to i1
910  store i1 %c, i1* %y
911  ret void
912}
913
914define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
915; KNL-LABEL: test_build_vec_v32i1:
916; KNL:       ## BB#0:
917; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
918; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
919; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
920; KNL-NEXT:    vpand %ymm0, %ymm2, %ymm0
921; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
922; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
923; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
924; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
925; KNL-NEXT:    retq
926;
927; SKX-LABEL: test_build_vec_v32i1:
928; SKX:       ## BB#0:
929; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
930; SKX-NEXT:    kmovd %eax, %k1
931; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
932; SKX-NEXT:    retq
933  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
934  ret <32 x i16> %ret
935}
936
937define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
938; KNL-LABEL: test_build_vec_v64i1:
939; KNL:       ## BB#0:
940; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
941; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
942; KNL-NEXT:    retq
943;
944; SKX-LABEL: test_build_vec_v64i1:
945; SKX:       ## BB#0:
946; SKX-NEXT:    movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544
947; SKX-NEXT:    kmovq %rax, %k1
948; SKX-NEXT:    vmovdqu8 %zmm0, %zmm0 {%k1} {z}
949; SKX-NEXT:    retq
950  %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
951  ret <64 x i8> %ret
952}
953
954define void @ktest_1(<8 x double> %in, double * %base) {
955; KNL-LABEL: ktest_1:
956; KNL:       ## BB#0:
957; KNL-NEXT:    vmovupd (%rdi), %zmm1
958; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
959; KNL-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
960; KNL-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
961; KNL-NEXT:    kmovw %k0, %eax
962; KNL-NEXT:    testb %al, %al
963; KNL-NEXT:    je LBB41_2
964; KNL-NEXT:  ## BB#1: ## %L1
965; KNL-NEXT:    vmovapd %zmm0, (%rdi)
966; KNL-NEXT:    retq
967; KNL-NEXT:  LBB41_2: ## %L2
968; KNL-NEXT:    vmovapd %zmm0, 8(%rdi)
969; KNL-NEXT:    retq
970;
971; SKX-LABEL: ktest_1:
972; SKX:       ## BB#0:
973; SKX-NEXT:    vmovupd (%rdi), %zmm1
974; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
975; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
976; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
977; SKX-NEXT:    ktestb %k0, %k0
978; SKX-NEXT:    je LBB41_2
979; SKX-NEXT:  ## BB#1: ## %L1
980; SKX-NEXT:    vmovapd %zmm0, (%rdi)
981; SKX-NEXT:    retq
982; SKX-NEXT:  LBB41_2: ## %L2
983; SKX-NEXT:    vmovapd %zmm0, 8(%rdi)
984; SKX-NEXT:    retq
985  %addr1 = getelementptr double, double * %base, i64 0
986  %addr2 = getelementptr double, double * %base, i64 1
987
988  %vaddr1 = bitcast double* %addr1 to <8 x double>*
989  %vaddr2 = bitcast double* %addr2 to <8 x double>*
990
991  %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
992  %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
993
994  %sel1 = fcmp ogt <8 x double>%in, %val1
995  %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
996  %sel2 = fcmp olt <8 x double> %in, %val3
997  %sel3 = and <8 x i1> %sel1, %sel2
998
999  %int_sel3 = bitcast <8 x i1> %sel3 to i8
1000  %res = icmp eq i8 %int_sel3, zeroinitializer
1001  br i1 %res, label %L2, label %L1
1002L1:
1003  store <8 x double> %in, <8 x double>* %vaddr1
1004  br label %End
1005L2:
1006  store <8 x double> %in, <8 x double>* %vaddr2
1007  br label %End
1008End:
1009  ret void
1010}
1011
1012define void @ktest_2(<32 x float> %in, float * %base) {
1013;
1014; KNL-LABEL: ktest_2:
1015; KNL:       ## BB#0:
1016; KNL-NEXT:    pushq %rbp
1017; KNL-NEXT:  Ltmp6:
1018; KNL-NEXT:    .cfi_def_cfa_offset 16
1019; KNL-NEXT:  Ltmp7:
1020; KNL-NEXT:    .cfi_offset %rbp, -16
1021; KNL-NEXT:    movq %rsp, %rbp
1022; KNL-NEXT:  Ltmp8:
1023; KNL-NEXT:    .cfi_def_cfa_register %rbp
1024; KNL-NEXT:    andq $-32, %rsp
1025; KNL-NEXT:    subq $32, %rsp
1026; KNL-NEXT:    vmovups (%rdi), %zmm2
1027; KNL-NEXT:    vmovups 64(%rdi), %zmm3
1028; KNL-NEXT:    vcmpltps %zmm1, %zmm3, %k1
1029; KNL-NEXT:    kshiftlw $14, %k1, %k0
1030; KNL-NEXT:    kshiftrw $15, %k0, %k0
1031; KNL-NEXT:    kmovw %k0, %eax
1032; KNL-NEXT:    kshiftlw $15, %k1, %k0
1033; KNL-NEXT:    kshiftrw $15, %k0, %k0
1034; KNL-NEXT:    kmovw %k0, %ecx
1035; KNL-NEXT:    vmovd %ecx, %xmm3
1036; KNL-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
1037; KNL-NEXT:    kshiftlw $13, %k1, %k0
1038; KNL-NEXT:    kshiftrw $15, %k0, %k0
1039; KNL-NEXT:    kmovw %k0, %eax
1040; KNL-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1041; KNL-NEXT:    kshiftlw $12, %k1, %k0
1042; KNL-NEXT:    kshiftrw $15, %k0, %k0
1043; KNL-NEXT:    kmovw %k0, %eax
1044; KNL-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1045; KNL-NEXT:    kshiftlw $11, %k1, %k0
1046; KNL-NEXT:    kshiftrw $15, %k0, %k0
1047; KNL-NEXT:    kmovw %k0, %eax
1048; KNL-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
1049; KNL-NEXT:    kshiftlw $10, %k1, %k0
1050; KNL-NEXT:    kshiftrw $15, %k0, %k0
1051; KNL-NEXT:    kmovw %k0, %eax
1052; KNL-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
1053; KNL-NEXT:    kshiftlw $9, %k1, %k0
1054; KNL-NEXT:    kshiftrw $15, %k0, %k0
1055; KNL-NEXT:    kmovw %k0, %eax
1056; KNL-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
1057; KNL-NEXT:    kshiftlw $8, %k1, %k0
1058; KNL-NEXT:    kshiftrw $15, %k0, %k0
1059; KNL-NEXT:    kmovw %k0, %eax
1060; KNL-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
1061; KNL-NEXT:    kshiftlw $7, %k1, %k0
1062; KNL-NEXT:    kshiftrw $15, %k0, %k0
1063; KNL-NEXT:    kmovw %k0, %eax
1064; KNL-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
1065; KNL-NEXT:    kshiftlw $6, %k1, %k0
1066; KNL-NEXT:    kshiftrw $15, %k0, %k0
1067; KNL-NEXT:    kmovw %k0, %eax
1068; KNL-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
1069; KNL-NEXT:    kshiftlw $5, %k1, %k0
1070; KNL-NEXT:    kshiftrw $15, %k0, %k0
1071; KNL-NEXT:    kmovw %k0, %eax
1072; KNL-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
1073; KNL-NEXT:    kshiftlw $4, %k1, %k0
1074; KNL-NEXT:    kshiftrw $15, %k0, %k0
1075; KNL-NEXT:    kmovw %k0, %eax
1076; KNL-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
1077; KNL-NEXT:    kshiftlw $3, %k1, %k0
1078; KNL-NEXT:    kshiftrw $15, %k0, %k0
1079; KNL-NEXT:    kmovw %k0, %eax
1080; KNL-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
1081; KNL-NEXT:    kshiftlw $2, %k1, %k0
1082; KNL-NEXT:    kshiftrw $15, %k0, %k0
1083; KNL-NEXT:    kmovw %k0, %eax
1084; KNL-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
1085; KNL-NEXT:    kshiftlw $1, %k1, %k0
1086; KNL-NEXT:    kshiftrw $15, %k0, %k0
1087; KNL-NEXT:    kmovw %k0, %eax
1088; KNL-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
1089; KNL-NEXT:    kshiftlw $0, %k1, %k0
1090; KNL-NEXT:    kshiftrw $15, %k0, %k0
1091; KNL-NEXT:    kmovw %k0, %eax
1092; KNL-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm3
1093; KNL-NEXT:    vcmpltps %zmm0, %zmm2, %k2
1094; KNL-NEXT:    kshiftlw $14, %k2, %k0
1095; KNL-NEXT:    kshiftrw $15, %k0, %k0
1096; KNL-NEXT:    kmovw %k0, %eax
1097; KNL-NEXT:    kshiftlw $15, %k2, %k0
1098; KNL-NEXT:    kshiftrw $15, %k0, %k0
1099; KNL-NEXT:    kmovw %k0, %ecx
1100; KNL-NEXT:    vmovd %ecx, %xmm2
1101; KNL-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
1102; KNL-NEXT:    kshiftlw $13, %k2, %k0
1103; KNL-NEXT:    kshiftrw $15, %k0, %k0
1104; KNL-NEXT:    kmovw %k0, %eax
1105; KNL-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1106; KNL-NEXT:    kshiftlw $12, %k2, %k0
1107; KNL-NEXT:    kshiftrw $15, %k0, %k0
1108; KNL-NEXT:    kmovw %k0, %eax
1109; KNL-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1110; KNL-NEXT:    kshiftlw $11, %k2, %k0
1111; KNL-NEXT:    kshiftrw $15, %k0, %k0
1112; KNL-NEXT:    kmovw %k0, %eax
1113; KNL-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1114; KNL-NEXT:    kshiftlw $10, %k2, %k0
1115; KNL-NEXT:    kshiftrw $15, %k0, %k0
1116; KNL-NEXT:    kmovw %k0, %eax
1117; KNL-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1118; KNL-NEXT:    kshiftlw $9, %k2, %k0
1119; KNL-NEXT:    kshiftrw $15, %k0, %k0
1120; KNL-NEXT:    kmovw %k0, %eax
1121; KNL-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1122; KNL-NEXT:    kshiftlw $8, %k2, %k0
1123; KNL-NEXT:    kshiftrw $15, %k0, %k0
1124; KNL-NEXT:    kmovw %k0, %eax
1125; KNL-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1126; KNL-NEXT:    kshiftlw $7, %k2, %k0
1127; KNL-NEXT:    kshiftrw $15, %k0, %k0
1128; KNL-NEXT:    kmovw %k0, %eax
1129; KNL-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1130; KNL-NEXT:    kshiftlw $6, %k2, %k0
1131; KNL-NEXT:    kshiftrw $15, %k0, %k0
1132; KNL-NEXT:    kmovw %k0, %eax
1133; KNL-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1134; KNL-NEXT:    kshiftlw $5, %k2, %k0
1135; KNL-NEXT:    kshiftrw $15, %k0, %k0
1136; KNL-NEXT:    kmovw %k0, %eax
1137; KNL-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1138; KNL-NEXT:    kshiftlw $4, %k2, %k0
1139; KNL-NEXT:    kshiftrw $15, %k0, %k0
1140; KNL-NEXT:    kmovw %k0, %eax
1141; KNL-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1142; KNL-NEXT:    kshiftlw $3, %k2, %k0
1143; KNL-NEXT:    kshiftrw $15, %k0, %k0
1144; KNL-NEXT:    kmovw %k0, %eax
1145; KNL-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1146; KNL-NEXT:    kshiftlw $2, %k2, %k0
1147; KNL-NEXT:    kshiftrw $15, %k0, %k0
1148; KNL-NEXT:    kmovw %k0, %eax
1149; KNL-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1150; KNL-NEXT:    kshiftlw $1, %k2, %k0
1151; KNL-NEXT:    kshiftrw $15, %k0, %k0
1152; KNL-NEXT:    kmovw %k0, %eax
1153; KNL-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1154; KNL-NEXT:    kshiftlw $0, %k2, %k0
1155; KNL-NEXT:    kshiftrw $15, %k0, %k0
1156; KNL-NEXT:    kmovw %k0, %eax
1157; KNL-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm2
1158; KNL-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
1159; KNL-NEXT:    vpsllw $7, %ymm2, %ymm2
1160; KNL-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
1161; KNL-NEXT:    vpxor %ymm3, %ymm3, %ymm3
1162; KNL-NEXT:    vpcmpgtb %ymm2, %ymm3, %ymm2
1163; KNL-NEXT:    vmovups 4(%rdi), %zmm3 {%k2} {z}
1164; KNL-NEXT:    vmovups 68(%rdi), %zmm4 {%k1} {z}
1165; KNL-NEXT:    vcmpltps %zmm4, %zmm1, %k0
1166; KNL-NEXT:    kshiftlw $14, %k0, %k1
1167; KNL-NEXT:    kshiftrw $15, %k1, %k1
1168; KNL-NEXT:    kmovw %k1, %eax
1169; KNL-NEXT:    kshiftlw $15, %k0, %k1
1170; KNL-NEXT:    kshiftrw $15, %k1, %k1
1171; KNL-NEXT:    kmovw %k1, %ecx
1172; KNL-NEXT:    vmovd %ecx, %xmm4
1173; KNL-NEXT:    vpinsrb $1, %eax, %xmm4, %xmm4
1174; KNL-NEXT:    kshiftlw $13, %k0, %k1
1175; KNL-NEXT:    kshiftrw $15, %k1, %k1
1176; KNL-NEXT:    kmovw %k1, %eax
1177; KNL-NEXT:    vpinsrb $2, %eax, %xmm4, %xmm4
1178; KNL-NEXT:    kshiftlw $12, %k0, %k1
1179; KNL-NEXT:    kshiftrw $15, %k1, %k1
1180; KNL-NEXT:    kmovw %k1, %eax
1181; KNL-NEXT:    vpinsrb $3, %eax, %xmm4, %xmm4
1182; KNL-NEXT:    kshiftlw $11, %k0, %k1
1183; KNL-NEXT:    kshiftrw $15, %k1, %k1
1184; KNL-NEXT:    kmovw %k1, %eax
1185; KNL-NEXT:    vpinsrb $4, %eax, %xmm4, %xmm4
1186; KNL-NEXT:    kshiftlw $10, %k0, %k1
1187; KNL-NEXT:    kshiftrw $15, %k1, %k1
1188; KNL-NEXT:    kmovw %k1, %eax
1189; KNL-NEXT:    vpinsrb $5, %eax, %xmm4, %xmm4
1190; KNL-NEXT:    kshiftlw $9, %k0, %k1
1191; KNL-NEXT:    kshiftrw $15, %k1, %k1
1192; KNL-NEXT:    kmovw %k1, %eax
1193; KNL-NEXT:    vpinsrb $6, %eax, %xmm4, %xmm4
1194; KNL-NEXT:    kshiftlw $8, %k0, %k1
1195; KNL-NEXT:    kshiftrw $15, %k1, %k1
1196; KNL-NEXT:    kmovw %k1, %eax
1197; KNL-NEXT:    vpinsrb $7, %eax, %xmm4, %xmm4
1198; KNL-NEXT:    kshiftlw $7, %k0, %k1
1199; KNL-NEXT:    kshiftrw $15, %k1, %k1
1200; KNL-NEXT:    kmovw %k1, %eax
1201; KNL-NEXT:    vpinsrb $8, %eax, %xmm4, %xmm4
1202; KNL-NEXT:    kshiftlw $6, %k0, %k1
1203; KNL-NEXT:    kshiftrw $15, %k1, %k1
1204; KNL-NEXT:    kmovw %k1, %eax
1205; KNL-NEXT:    vpinsrb $9, %eax, %xmm4, %xmm4
1206; KNL-NEXT:    kshiftlw $5, %k0, %k1
1207; KNL-NEXT:    kshiftrw $15, %k1, %k1
1208; KNL-NEXT:    kmovw %k1, %eax
1209; KNL-NEXT:    vpinsrb $10, %eax, %xmm4, %xmm4
1210; KNL-NEXT:    kshiftlw $4, %k0, %k1
1211; KNL-NEXT:    kshiftrw $15, %k1, %k1
1212; KNL-NEXT:    kmovw %k1, %eax
1213; KNL-NEXT:    vpinsrb $11, %eax, %xmm4, %xmm4
1214; KNL-NEXT:    kshiftlw $3, %k0, %k1
1215; KNL-NEXT:    kshiftrw $15, %k1, %k1
1216; KNL-NEXT:    kmovw %k1, %eax
1217; KNL-NEXT:    vpinsrb $12, %eax, %xmm4, %xmm4
1218; KNL-NEXT:    kshiftlw $2, %k0, %k1
1219; KNL-NEXT:    kshiftrw $15, %k1, %k1
1220; KNL-NEXT:    kmovw %k1, %eax
1221; KNL-NEXT:    vpinsrb $13, %eax, %xmm4, %xmm4
1222; KNL-NEXT:    kshiftlw $1, %k0, %k1
1223; KNL-NEXT:    kshiftrw $15, %k1, %k1
1224; KNL-NEXT:    kmovw %k1, %eax
1225; KNL-NEXT:    vpinsrb $14, %eax, %xmm4, %xmm4
1226; KNL-NEXT:    kshiftlw $0, %k0, %k0
1227; KNL-NEXT:    kshiftrw $15, %k0, %k0
1228; KNL-NEXT:    kmovw %k0, %eax
1229; KNL-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm4
1230; KNL-NEXT:    vcmpltps %zmm3, %zmm0, %k0
1231; KNL-NEXT:    kshiftlw $14, %k0, %k1
1232; KNL-NEXT:    kshiftrw $15, %k1, %k1
1233; KNL-NEXT:    kmovw %k1, %eax
1234; KNL-NEXT:    kshiftlw $15, %k0, %k1
1235; KNL-NEXT:    kshiftrw $15, %k1, %k1
1236; KNL-NEXT:    kmovw %k1, %ecx
1237; KNL-NEXT:    vmovd %ecx, %xmm3
1238; KNL-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
1239; KNL-NEXT:    kshiftlw $13, %k0, %k1
1240; KNL-NEXT:    kshiftrw $15, %k1, %k1
1241; KNL-NEXT:    kmovw %k1, %eax
1242; KNL-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
1243; KNL-NEXT:    kshiftlw $12, %k0, %k1
1244; KNL-NEXT:    kshiftrw $15, %k1, %k1
1245; KNL-NEXT:    kmovw %k1, %eax
1246; KNL-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
1247; KNL-NEXT:    kshiftlw $11, %k0, %k1
1248; KNL-NEXT:    kshiftrw $15, %k1, %k1
1249; KNL-NEXT:    kmovw %k1, %eax
1250; KNL-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
1251; KNL-NEXT:    kshiftlw $10, %k0, %k1
1252; KNL-NEXT:    kshiftrw $15, %k1, %k1
1253; KNL-NEXT:    kmovw %k1, %eax
1254; KNL-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
1255; KNL-NEXT:    kshiftlw $9, %k0, %k1
1256; KNL-NEXT:    kshiftrw $15, %k1, %k1
1257; KNL-NEXT:    kmovw %k1, %eax
1258; KNL-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
1259; KNL-NEXT:    kshiftlw $8, %k0, %k1
1260; KNL-NEXT:    kshiftrw $15, %k1, %k1
1261; KNL-NEXT:    kmovw %k1, %eax
1262; KNL-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
1263; KNL-NEXT:    kshiftlw $7, %k0, %k1
1264; KNL-NEXT:    kshiftrw $15, %k1, %k1
1265; KNL-NEXT:    kmovw %k1, %eax
1266; KNL-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
1267; KNL-NEXT:    kshiftlw $6, %k0, %k1
1268; KNL-NEXT:    kshiftrw $15, %k1, %k1
1269; KNL-NEXT:    kmovw %k1, %eax
1270; KNL-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
1271; KNL-NEXT:    kshiftlw $5, %k0, %k1
1272; KNL-NEXT:    kshiftrw $15, %k1, %k1
1273; KNL-NEXT:    kmovw %k1, %eax
1274; KNL-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
1275; KNL-NEXT:    kshiftlw $4, %k0, %k1
1276; KNL-NEXT:    kshiftrw $15, %k1, %k1
1277; KNL-NEXT:    kmovw %k1, %eax
1278; KNL-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
1279; KNL-NEXT:    kshiftlw $3, %k0, %k1
1280; KNL-NEXT:    kshiftrw $15, %k1, %k1
1281; KNL-NEXT:    kmovw %k1, %eax
1282; KNL-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
1283; KNL-NEXT:    kshiftlw $2, %k0, %k1
1284; KNL-NEXT:    kshiftrw $15, %k1, %k1
1285; KNL-NEXT:    kmovw %k1, %eax
1286; KNL-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
1287; KNL-NEXT:    kshiftlw $1, %k0, %k1
1288; KNL-NEXT:    kshiftrw $15, %k1, %k1
1289; KNL-NEXT:    kmovw %k1, %eax
1290; KNL-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
1291; KNL-NEXT:    kshiftlw $0, %k0, %k0
1292; KNL-NEXT:    kshiftrw $15, %k0, %k0
1293; KNL-NEXT:    kmovw %k0, %eax
1294; KNL-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm3
1295; KNL-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
1296; KNL-NEXT:    vpor %ymm3, %ymm2, %ymm2
1297; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm3
1298; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
1299; KNL-NEXT:    vpslld $31, %zmm3, %zmm3
1300; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0
1301; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
1302; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
1303; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
1304; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k0
1305; KNL-NEXT:    kmovw %k0, (%rsp)
1306; KNL-NEXT:    cmpl $0, (%rsp)
1307; KNL-NEXT:    je LBB42_2
1308; KNL-NEXT:  ## BB#1: ## %L1
1309; KNL-NEXT:    vmovaps %zmm0, (%rdi)
1310; KNL-NEXT:    vmovaps %zmm1, 64(%rdi)
1311; KNL-NEXT:    jmp LBB42_3
1312; KNL-NEXT:  LBB42_2: ## %L2
1313; KNL-NEXT:    vmovaps %zmm0, 4(%rdi)
1314; KNL-NEXT:    vmovaps %zmm1, 68(%rdi)
1315; KNL-NEXT:  LBB42_3: ## %End
1316; KNL-NEXT:    movq %rbp, %rsp
1317; KNL-NEXT:    popq %rbp
1318; KNL-NEXT:    retq
1319;
1320; SKX-LABEL: ktest_2:
1321; SKX:       ## BB#0:
1322; SKX-NEXT:    vmovups 64(%rdi), %zmm2
1323; SKX-NEXT:    vmovups (%rdi), %zmm3
1324; SKX-NEXT:    vcmpltps %zmm0, %zmm3, %k1
1325; SKX-NEXT:    vcmpltps %zmm1, %zmm2, %k2
1326; SKX-NEXT:    kunpckwd %k1, %k2, %k0
1327; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
1328; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
1329; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1
1330; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
1331; SKX-NEXT:    kunpckwd %k1, %k2, %k1
1332; SKX-NEXT:    kord %k1, %k0, %k0
1333; SKX-NEXT:    ktestd %k0, %k0
1334; SKX-NEXT:    je LBB42_2
1335; SKX-NEXT:  ## BB#1: ## %L1
1336; SKX-NEXT:    vmovaps %zmm0, (%rdi)
1337; SKX-NEXT:    vmovaps %zmm1, 64(%rdi)
1338; SKX-NEXT:    retq
1339; SKX-NEXT:  LBB42_2: ## %L2
1340; SKX-NEXT:    vmovaps %zmm0, 4(%rdi)
1341; SKX-NEXT:    vmovaps %zmm1, 68(%rdi)
1342; SKX-NEXT:    retq
1343  %addr1 = getelementptr float, float * %base, i64 0
1344  %addr2 = getelementptr float, float * %base, i64 1
1345
1346  %vaddr1 = bitcast float* %addr1 to <32 x float>*
1347  %vaddr2 = bitcast float* %addr2 to <32 x float>*
1348
1349  %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
1350  %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
1351
1352  %sel1 = fcmp ogt <32 x float>%in, %val1
1353  %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
1354  %sel2 = fcmp olt <32 x float> %in, %val3
1355  %sel3 = or <32 x i1> %sel1, %sel2
1356
1357  %int_sel3 = bitcast <32 x i1> %sel3 to i32
1358  %res = icmp eq i32 %int_sel3, zeroinitializer
1359  br i1 %res, label %L2, label %L1
1360L1:
1361  store <32 x float> %in, <32 x float>* %vaddr1
1362  br label %End
1363L2:
1364  store <32 x float> %in, <32 x float>* %vaddr2
1365  br label %End
1366End:
1367  ret void
1368}
1369
1370define <8 x i64> @load_8i1(<8 x i1>* %a) {
1371; KNL-LABEL: load_8i1:
1372; KNL:       ## BB#0:
1373; KNL-NEXT:    movzbl (%rdi), %eax
1374; KNL-NEXT:    kmovw %eax, %k1
1375; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
1376; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1377; KNL-NEXT:    retq
1378;
1379; SKX-LABEL: load_8i1:
1380; SKX:       ## BB#0:
1381; SKX-NEXT:    kmovb (%rdi), %k0
1382; SKX-NEXT:    vpmovm2q %k0, %zmm0
1383; SKX-NEXT:    retq
1384  %b = load <8 x i1>, <8 x i1>* %a
1385  %c = sext <8 x i1> %b to <8 x i64>
1386  ret <8 x i64> %c
1387}
1388
1389define <16 x i32> @load_16i1(<16 x i1>* %a) {
1390; KNL-LABEL: load_16i1:
1391; KNL:       ## BB#0:
1392; KNL-NEXT:    kmovw (%rdi), %k1
1393; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
1394; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
1395; KNL-NEXT:    retq
1396;
1397; SKX-LABEL: load_16i1:
1398; SKX:       ## BB#0:
1399; SKX-NEXT:    kmovw (%rdi), %k0
1400; SKX-NEXT:    vpmovm2d %k0, %zmm0
1401; SKX-NEXT:    retq
1402  %b = load <16 x i1>, <16 x i1>* %a
1403  %c = sext <16 x i1> %b to <16 x i32>
1404  ret <16 x i32> %c
1405}
1406
1407define <2 x i16> @load_2i1(<2 x i1>* %a) {
1408; KNL-LABEL: load_2i1:
1409; KNL:       ## BB#0:
1410; KNL-NEXT:    movzbl (%rdi), %eax
1411; KNL-NEXT:    kmovw %eax, %k1
1412; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
1413; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1414; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1415; KNL-NEXT:    retq
1416;
1417; SKX-LABEL: load_2i1:
1418; SKX:       ## BB#0:
1419; SKX-NEXT:    kmovb (%rdi), %k0
1420; SKX-NEXT:    vpmovm2q %k0, %xmm0
1421; SKX-NEXT:    retq
1422  %b = load <2 x i1>, <2 x i1>* %a
1423  %c = sext <2 x i1> %b to <2 x i16>
1424  ret <2 x i16> %c
1425}
1426
1427define <4 x i16> @load_4i1(<4 x i1>* %a) {
1428; KNL-LABEL: load_4i1:
1429; KNL:       ## BB#0:
1430; KNL-NEXT:    movzbl (%rdi), %eax
1431; KNL-NEXT:    kmovw %eax, %k1
1432; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
1433; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1434; KNL-NEXT:    vpmovqd %zmm0, %ymm0
1435; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1436; KNL-NEXT:    retq
1437;
1438; SKX-LABEL: load_4i1:
1439; SKX:       ## BB#0:
1440; SKX-NEXT:    kmovb (%rdi), %k0
1441; SKX-NEXT:    vpmovm2d %k0, %xmm0
1442; SKX-NEXT:    retq
1443  %b = load <4 x i1>, <4 x i1>* %a
1444  %c = sext <4 x i1> %b to <4 x i16>
1445  ret <4 x i16> %c
1446}
1447
1448define <32 x i16> @load_32i1(<32 x i1>* %a) {
1449; KNL-LABEL: load_32i1:
1450; KNL:       ## BB#0:
1451; KNL-NEXT:    kmovw (%rdi), %k1
1452; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
1453; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
1454; KNL-NEXT:    vpmovdw %zmm0, %ymm0
1455; KNL-NEXT:    kmovw 2(%rdi), %k1
1456; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1457; KNL-NEXT:    vpmovdw %zmm1, %ymm1
1458; KNL-NEXT:    retq
1459;
1460; SKX-LABEL: load_32i1:
1461; SKX:       ## BB#0:
1462; SKX-NEXT:    kmovd (%rdi), %k0
1463; SKX-NEXT:    vpmovm2w %k0, %zmm0
1464; SKX-NEXT:    retq
1465  %b = load <32 x i1>, <32 x i1>* %a
1466  %c = sext <32 x i1> %b to <32 x i16>
1467  ret <32 x i16> %c
1468}
1469
1470define <64 x i8> @load_64i1(<64 x i1>* %a) {
1471; KNL-LABEL: load_64i1:
1472; KNL:       ## BB#0:
1473; KNL-NEXT:    kmovw (%rdi), %k1
1474; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
1475; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
1476; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1477; KNL-NEXT:    kmovw 2(%rdi), %k1
1478; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
1479; KNL-NEXT:    vpmovdb %zmm2, %xmm2
1480; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1481; KNL-NEXT:    kmovw 4(%rdi), %k1
1482; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
1483; KNL-NEXT:    vpmovdb %zmm2, %xmm2
1484; KNL-NEXT:    kmovw 6(%rdi), %k1
1485; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1486; KNL-NEXT:    vpmovdb %zmm1, %xmm1
1487; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1488; KNL-NEXT:    retq
1489;
1490; SKX-LABEL: load_64i1:
1491; SKX:       ## BB#0:
1492; SKX-NEXT:    kmovq (%rdi), %k0
1493; SKX-NEXT:    vpmovm2b %k0, %zmm0
1494; SKX-NEXT:    retq
1495  %b = load <64 x i1>, <64 x i1>* %a
1496  %c = sext <64 x i1> %b to <64 x i8>
1497  ret <64 x i8> %c
1498}
1499
1500define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
1501; KNL-LABEL: store_8i1:
1502; KNL:       ## BB#0:
1503; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1504; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1505; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
1506; KNL-NEXT:    kmovw %k0, %eax
1507; KNL-NEXT:    movb %al, (%rdi)
1508; KNL-NEXT:    retq
1509;
1510; SKX-LABEL: store_8i1:
1511; SKX:       ## BB#0:
1512; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1513; SKX-NEXT:    vpmovw2m %xmm0, %k0
1514; SKX-NEXT:    kmovb %k0, (%rdi)
1515; SKX-NEXT:    retq
1516  store <8 x i1> %v, <8 x i1>* %a
1517  ret void
1518}
1519
1520define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
1521; KNL-LABEL: store_8i1_1:
1522; KNL:       ## BB#0:
1523; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1524; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1525; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
1526; KNL-NEXT:    kmovw %k0, %eax
1527; KNL-NEXT:    movb %al, (%rdi)
1528; KNL-NEXT:    retq
1529;
1530; SKX-LABEL: store_8i1_1:
1531; SKX:       ## BB#0:
1532; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1533; SKX-NEXT:    vpmovw2m %xmm0, %k0
1534; SKX-NEXT:    kmovb %k0, (%rdi)
1535; SKX-NEXT:    retq
1536  %v1 = trunc <8 x i16> %v to <8 x i1>
1537  store <8 x i1> %v1, <8 x i1>* %a
1538  ret void
1539}
1540
1541define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
1542; KNL-LABEL: store_16i1:
1543; KNL:       ## BB#0:
1544; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1545; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1546; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1547; KNL-NEXT:    kmovw %k0, (%rdi)
1548; KNL-NEXT:    retq
1549;
1550; SKX-LABEL: store_16i1:
1551; SKX:       ## BB#0:
1552; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1553; SKX-NEXT:    vpmovb2m %xmm0, %k0
1554; SKX-NEXT:    kmovw %k0, (%rdi)
1555; SKX-NEXT:    retq
1556  store <16 x i1> %v, <16 x i1>* %a
1557  ret void
1558}
1559
1560define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
1561; KNL-LABEL: store_32i1:
1562; KNL:       ## BB#0:
1563; KNL-NEXT:    vextractf128 $1, %ymm0, %xmm1
1564; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
1565; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
1566; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
1567; KNL-NEXT:    kmovw %k0, 2(%rdi)
1568; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1569; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1570; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1571; KNL-NEXT:    kmovw %k0, (%rdi)
1572; KNL-NEXT:    retq
1573;
1574; SKX-LABEL: store_32i1:
1575; SKX:       ## BB#0:
1576; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
1577; SKX-NEXT:    vpmovb2m %ymm0, %k0
1578; SKX-NEXT:    kmovd %k0, (%rdi)
1579; SKX-NEXT:    retq
1580  store <32 x i1> %v, <32 x i1>* %a
1581  ret void
1582}
1583
1584define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
1585; KNL-LABEL: store_32i1_1:
1586; KNL:       ## BB#0:
1587; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
1588; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1589; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
1590; KNL-NEXT:    vpmovdb %zmm1, %xmm1
1591; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
1592; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
1593; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
1594; KNL-NEXT:    kmovw %k0, 2(%rdi)
1595; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1596; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1597; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1598; KNL-NEXT:    kmovw %k0, (%rdi)
1599; KNL-NEXT:    retq
1600;
1601; SKX-LABEL: store_32i1_1:
1602; SKX:       ## BB#0:
1603; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0
1604; SKX-NEXT:    vpmovw2m %zmm0, %k0
1605; SKX-NEXT:    kmovd %k0, (%rdi)
1606; SKX-NEXT:    retq
1607  %v1 = trunc <32 x i16> %v to <32 x i1>
1608  store <32 x i1> %v1, <32 x i1>* %a
1609  ret void
1610}
1611
1612
1613define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
1614;
1615; KNL-LABEL: store_64i1:
1616; KNL:       ## BB#0:
1617; KNL-NEXT:    pushq %rbp
1618; KNL-NEXT:  Ltmp9:
1619; KNL-NEXT:    .cfi_def_cfa_offset 16
1620; KNL-NEXT:    pushq %r15
1621; KNL-NEXT:  Ltmp10:
1622; KNL-NEXT:    .cfi_def_cfa_offset 24
1623; KNL-NEXT:    pushq %r14
1624; KNL-NEXT:  Ltmp11:
1625; KNL-NEXT:    .cfi_def_cfa_offset 32
1626; KNL-NEXT:    pushq %r13
1627; KNL-NEXT:  Ltmp12:
1628; KNL-NEXT:    .cfi_def_cfa_offset 40
1629; KNL-NEXT:    pushq %r12
1630; KNL-NEXT:  Ltmp13:
1631; KNL-NEXT:    .cfi_def_cfa_offset 48
1632; KNL-NEXT:    pushq %rbx
1633; KNL-NEXT:  Ltmp14:
1634; KNL-NEXT:    .cfi_def_cfa_offset 56
1635; KNL-NEXT:  Ltmp15:
1636; KNL-NEXT:    .cfi_offset %rbx, -56
1637; KNL-NEXT:  Ltmp16:
1638; KNL-NEXT:    .cfi_offset %r12, -48
1639; KNL-NEXT:  Ltmp17:
1640; KNL-NEXT:    .cfi_offset %r13, -40
1641; KNL-NEXT:  Ltmp18:
1642; KNL-NEXT:    .cfi_offset %r14, -32
1643; KNL-NEXT:  Ltmp19:
1644; KNL-NEXT:    .cfi_offset %r15, -24
1645; KNL-NEXT:  Ltmp20:
1646; KNL-NEXT:    .cfi_offset %rbp, -16
1647; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1648; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1649; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
1650; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
1651; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
1652; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
1653; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
1654; KNL-NEXT:    vpslld $31, %zmm3, %zmm3
1655; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0
1656; KNL-NEXT:    kshiftlw $14, %k0, %k1
1657; KNL-NEXT:    kshiftrw $15, %k1, %k1
1658; KNL-NEXT:    kmovw %k1, %r8d
1659; KNL-NEXT:    kshiftlw $15, %k0, %k1
1660; KNL-NEXT:    kshiftrw $15, %k1, %k1
1661; KNL-NEXT:    kmovw %k1, %r9d
1662; KNL-NEXT:    kshiftlw $13, %k0, %k1
1663; KNL-NEXT:    kshiftrw $15, %k1, %k1
1664; KNL-NEXT:    kmovw %k1, %r10d
1665; KNL-NEXT:    kshiftlw $12, %k0, %k1
1666; KNL-NEXT:    kshiftrw $15, %k1, %k1
1667; KNL-NEXT:    kmovw %k1, %r11d
1668; KNL-NEXT:    kshiftlw $11, %k0, %k1
1669; KNL-NEXT:    kshiftrw $15, %k1, %k1
1670; KNL-NEXT:    kmovw %k1, %r14d
1671; KNL-NEXT:    kshiftlw $10, %k0, %k1
1672; KNL-NEXT:    kshiftrw $15, %k1, %k1
1673; KNL-NEXT:    kmovw %k1, %r15d
1674; KNL-NEXT:    kshiftlw $9, %k0, %k1
1675; KNL-NEXT:    kshiftrw $15, %k1, %k1
1676; KNL-NEXT:    kmovw %k1, %r12d
1677; KNL-NEXT:    kshiftlw $8, %k0, %k1
1678; KNL-NEXT:    kshiftrw $15, %k1, %k1
1679; KNL-NEXT:    kmovw %k1, %r13d
1680; KNL-NEXT:    kshiftlw $7, %k0, %k1
1681; KNL-NEXT:    kshiftrw $15, %k1, %k1
1682; KNL-NEXT:    kmovw %k1, %ebx
1683; KNL-NEXT:    kshiftlw $6, %k0, %k1
1684; KNL-NEXT:    kshiftrw $15, %k1, %k1
1685; KNL-NEXT:    kmovw %k1, %ebp
1686; KNL-NEXT:    kshiftlw $5, %k0, %k1
1687; KNL-NEXT:    kshiftrw $15, %k1, %k1
1688; KNL-NEXT:    kmovw %k1, %eax
1689; KNL-NEXT:    kshiftlw $4, %k0, %k1
1690; KNL-NEXT:    kshiftrw $15, %k1, %k1
1691; KNL-NEXT:    kmovw %k1, %ecx
1692; KNL-NEXT:    kshiftlw $3, %k0, %k1
1693; KNL-NEXT:    kshiftrw $15, %k1, %k1
1694; KNL-NEXT:    kmovw %k1, %edx
1695; KNL-NEXT:    kshiftlw $2, %k0, %k1
1696; KNL-NEXT:    kshiftrw $15, %k1, %k1
1697; KNL-NEXT:    kmovw %k1, %esi
1698; KNL-NEXT:    kshiftlw $1, %k0, %k1
1699; KNL-NEXT:    kshiftrw $15, %k1, %k1
1700; KNL-NEXT:    vmovd %r9d, %xmm3
1701; KNL-NEXT:    kmovw %k1, %r9d
1702; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k2
1703; KNL-NEXT:    kshiftlw $0, %k0, %k0
1704; KNL-NEXT:    kshiftrw $15, %k0, %k0
1705; KNL-NEXT:    vpinsrb $1, %r8d, %xmm3, %xmm2
1706; KNL-NEXT:    vpinsrb $2, %r10d, %xmm2, %xmm2
1707; KNL-NEXT:    vpinsrb $3, %r11d, %xmm2, %xmm2
1708; KNL-NEXT:    vpinsrb $4, %r14d, %xmm2, %xmm2
1709; KNL-NEXT:    vpinsrb $5, %r15d, %xmm2, %xmm2
1710; KNL-NEXT:    vpinsrb $6, %r12d, %xmm2, %xmm2
1711; KNL-NEXT:    vpinsrb $7, %r13d, %xmm2, %xmm2
1712; KNL-NEXT:    vpinsrb $8, %ebx, %xmm2, %xmm2
1713; KNL-NEXT:    vpinsrb $9, %ebp, %xmm2, %xmm2
1714; KNL-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1715; KNL-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
1716; KNL-NEXT:    vpinsrb $12, %edx, %xmm2, %xmm2
1717; KNL-NEXT:    vpinsrb $13, %esi, %xmm2, %xmm2
1718; KNL-NEXT:    vpinsrb $14, %r9d, %xmm2, %xmm2
1719; KNL-NEXT:    kmovw %k0, %eax
1720; KNL-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm2
1721; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
1722; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
1723; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k0
1724; KNL-NEXT:    kmovw %k0, 6(%rdi)
1725; KNL-NEXT:    kshiftlw $14, %k2, %k0
1726; KNL-NEXT:    kshiftrw $15, %k0, %k0
1727; KNL-NEXT:    kmovw %k0, %r8d
1728; KNL-NEXT:    kshiftlw $15, %k2, %k0
1729; KNL-NEXT:    kshiftrw $15, %k0, %k0
1730; KNL-NEXT:    kmovw %k0, %r10d
1731; KNL-NEXT:    kshiftlw $13, %k2, %k0
1732; KNL-NEXT:    kshiftrw $15, %k0, %k0
1733; KNL-NEXT:    kmovw %k0, %r9d
1734; KNL-NEXT:    kshiftlw $12, %k2, %k0
1735; KNL-NEXT:    kshiftrw $15, %k0, %k0
1736; KNL-NEXT:    kmovw %k0, %r11d
1737; KNL-NEXT:    kshiftlw $11, %k2, %k0
1738; KNL-NEXT:    kshiftrw $15, %k0, %k0
1739; KNL-NEXT:    kmovw %k0, %r14d
1740; KNL-NEXT:    kshiftlw $10, %k2, %k0
1741; KNL-NEXT:    kshiftrw $15, %k0, %k0
1742; KNL-NEXT:    kmovw %k0, %r15d
1743; KNL-NEXT:    kshiftlw $9, %k2, %k0
1744; KNL-NEXT:    kshiftrw $15, %k0, %k0
1745; KNL-NEXT:    kmovw %k0, %r12d
1746; KNL-NEXT:    kshiftlw $8, %k2, %k0
1747; KNL-NEXT:    kshiftrw $15, %k0, %k0
1748; KNL-NEXT:    kmovw %k0, %r13d
1749; KNL-NEXT:    kshiftlw $7, %k2, %k0
1750; KNL-NEXT:    kshiftrw $15, %k0, %k0
1751; KNL-NEXT:    kmovw %k0, %edx
1752; KNL-NEXT:    kshiftlw $6, %k2, %k0
1753; KNL-NEXT:    kshiftrw $15, %k0, %k0
1754; KNL-NEXT:    kmovw %k0, %esi
1755; KNL-NEXT:    kshiftlw $5, %k2, %k0
1756; KNL-NEXT:    kshiftrw $15, %k0, %k0
1757; KNL-NEXT:    kmovw %k0, %ebp
1758; KNL-NEXT:    kshiftlw $4, %k2, %k0
1759; KNL-NEXT:    kshiftrw $15, %k0, %k0
1760; KNL-NEXT:    kmovw %k0, %ebx
1761; KNL-NEXT:    kshiftlw $3, %k2, %k0
1762; KNL-NEXT:    kshiftrw $15, %k0, %k0
1763; KNL-NEXT:    kmovw %k0, %eax
1764; KNL-NEXT:    kshiftlw $2, %k2, %k0
1765; KNL-NEXT:    kshiftrw $15, %k0, %k0
1766; KNL-NEXT:    kmovw %k0, %ecx
1767; KNL-NEXT:    kshiftlw $1, %k2, %k0
1768; KNL-NEXT:    kshiftrw $15, %k0, %k0
1769; KNL-NEXT:    vmovd %r10d, %xmm2
1770; KNL-NEXT:    kmovw %k0, %r10d
1771; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
1772; KNL-NEXT:    kshiftlw $0, %k2, %k0
1773; KNL-NEXT:    kshiftrw $15, %k0, %k0
1774; KNL-NEXT:    vpinsrb $1, %r8d, %xmm2, %xmm1
1775; KNL-NEXT:    vpinsrb $2, %r9d, %xmm1, %xmm1
1776; KNL-NEXT:    vpinsrb $3, %r11d, %xmm1, %xmm1
1777; KNL-NEXT:    vpinsrb $4, %r14d, %xmm1, %xmm1
1778; KNL-NEXT:    vpinsrb $5, %r15d, %xmm1, %xmm1
1779; KNL-NEXT:    vpinsrb $6, %r12d, %xmm1, %xmm1
1780; KNL-NEXT:    vpinsrb $7, %r13d, %xmm1, %xmm1
1781; KNL-NEXT:    vpinsrb $8, %edx, %xmm1, %xmm1
1782; KNL-NEXT:    vpinsrb $9, %esi, %xmm1, %xmm1
1783; KNL-NEXT:    vpinsrb $10, %ebp, %xmm1, %xmm1
1784; KNL-NEXT:    vpinsrb $11, %ebx, %xmm1, %xmm1
1785; KNL-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
1786; KNL-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1787; KNL-NEXT:    vpinsrb $14, %r10d, %xmm1, %xmm1
1788; KNL-NEXT:    kmovw %k0, %eax
1789; KNL-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1790; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
1791; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
1792; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
1793; KNL-NEXT:    kmovw %k0, 4(%rdi)
1794; KNL-NEXT:    kshiftlw $14, %k1, %k0
1795; KNL-NEXT:    kshiftrw $15, %k0, %k0
1796; KNL-NEXT:    kmovw %k0, %r8d
1797; KNL-NEXT:    kshiftlw $15, %k1, %k0
1798; KNL-NEXT:    kshiftrw $15, %k0, %k0
1799; KNL-NEXT:    kmovw %k0, %r10d
1800; KNL-NEXT:    kshiftlw $13, %k1, %k0
1801; KNL-NEXT:    kshiftrw $15, %k0, %k0
1802; KNL-NEXT:    kmovw %k0, %r9d
1803; KNL-NEXT:    kshiftlw $12, %k1, %k0
1804; KNL-NEXT:    kshiftrw $15, %k0, %k0
1805; KNL-NEXT:    kmovw %k0, %r11d
1806; KNL-NEXT:    kshiftlw $11, %k1, %k0
1807; KNL-NEXT:    kshiftrw $15, %k0, %k0
1808; KNL-NEXT:    kmovw %k0, %r14d
1809; KNL-NEXT:    kshiftlw $10, %k1, %k0
1810; KNL-NEXT:    kshiftrw $15, %k0, %k0
1811; KNL-NEXT:    kmovw %k0, %r15d
1812; KNL-NEXT:    kshiftlw $9, %k1, %k0
1813; KNL-NEXT:    kshiftrw $15, %k0, %k0
1814; KNL-NEXT:    kmovw %k0, %r12d
1815; KNL-NEXT:    kshiftlw $8, %k1, %k0
1816; KNL-NEXT:    kshiftrw $15, %k0, %k0
1817; KNL-NEXT:    kmovw %k0, %r13d
1818; KNL-NEXT:    kshiftlw $7, %k1, %k0
1819; KNL-NEXT:    kshiftrw $15, %k0, %k0
1820; KNL-NEXT:    kmovw %k0, %edx
1821; KNL-NEXT:    kshiftlw $6, %k1, %k0
1822; KNL-NEXT:    kshiftrw $15, %k0, %k0
1823; KNL-NEXT:    kmovw %k0, %esi
1824; KNL-NEXT:    kshiftlw $5, %k1, %k0
1825; KNL-NEXT:    kshiftrw $15, %k0, %k0
1826; KNL-NEXT:    kmovw %k0, %ebp
1827; KNL-NEXT:    kshiftlw $4, %k1, %k0
1828; KNL-NEXT:    kshiftrw $15, %k0, %k0
1829; KNL-NEXT:    kmovw %k0, %ebx
1830; KNL-NEXT:    kshiftlw $3, %k1, %k0
1831; KNL-NEXT:    kshiftrw $15, %k0, %k0
1832; KNL-NEXT:    kmovw %k0, %eax
1833; KNL-NEXT:    kshiftlw $2, %k1, %k0
1834; KNL-NEXT:    kshiftrw $15, %k0, %k0
1835; KNL-NEXT:    kmovw %k0, %ecx
1836; KNL-NEXT:    kshiftlw $1, %k1, %k0
1837; KNL-NEXT:    kshiftrw $15, %k0, %k0
1838; KNL-NEXT:    vmovd %r10d, %xmm1
1839; KNL-NEXT:    kmovw %k0, %r10d
1840; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1841; KNL-NEXT:    kshiftlw $0, %k1, %k1
1842; KNL-NEXT:    kshiftrw $15, %k1, %k1
1843; KNL-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm0
1844; KNL-NEXT:    vpinsrb $2, %r9d, %xmm0, %xmm0
1845; KNL-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
1846; KNL-NEXT:    vpinsrb $4, %r14d, %xmm0, %xmm0
1847; KNL-NEXT:    vpinsrb $5, %r15d, %xmm0, %xmm0
1848; KNL-NEXT:    vpinsrb $6, %r12d, %xmm0, %xmm0
1849; KNL-NEXT:    vpinsrb $7, %r13d, %xmm0, %xmm0
1850; KNL-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
1851; KNL-NEXT:    vpinsrb $9, %esi, %xmm0, %xmm0
1852; KNL-NEXT:    vpinsrb $10, %ebp, %xmm0, %xmm0
1853; KNL-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
1854; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
1855; KNL-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1856; KNL-NEXT:    vpinsrb $14, %r10d, %xmm0, %xmm0
1857; KNL-NEXT:    kmovw %k1, %eax
1858; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1859; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1860; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1861; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1862; KNL-NEXT:    kmovw %k1, 2(%rdi)
1863; KNL-NEXT:    kshiftlw $14, %k0, %k1
1864; KNL-NEXT:    kshiftrw $15, %k1, %k1
1865; KNL-NEXT:    kmovw %k1, %r8d
1866; KNL-NEXT:    kshiftlw $15, %k0, %k1
1867; KNL-NEXT:    kshiftrw $15, %k1, %k1
1868; KNL-NEXT:    kmovw %k1, %r9d
1869; KNL-NEXT:    kshiftlw $13, %k0, %k1
1870; KNL-NEXT:    kshiftrw $15, %k1, %k1
1871; KNL-NEXT:    kmovw %k1, %r10d
1872; KNL-NEXT:    kshiftlw $12, %k0, %k1
1873; KNL-NEXT:    kshiftrw $15, %k1, %k1
1874; KNL-NEXT:    kmovw %k1, %r11d
1875; KNL-NEXT:    kshiftlw $11, %k0, %k1
1876; KNL-NEXT:    kshiftrw $15, %k1, %k1
1877; KNL-NEXT:    kmovw %k1, %r14d
1878; KNL-NEXT:    kshiftlw $10, %k0, %k1
1879; KNL-NEXT:    kshiftrw $15, %k1, %k1
1880; KNL-NEXT:    kmovw %k1, %r15d
1881; KNL-NEXT:    kshiftlw $9, %k0, %k1
1882; KNL-NEXT:    kshiftrw $15, %k1, %k1
1883; KNL-NEXT:    kmovw %k1, %r12d
1884; KNL-NEXT:    kshiftlw $8, %k0, %k1
1885; KNL-NEXT:    kshiftrw $15, %k1, %k1
1886; KNL-NEXT:    kmovw %k1, %r13d
1887; KNL-NEXT:    kshiftlw $7, %k0, %k1
1888; KNL-NEXT:    kshiftrw $15, %k1, %k1
1889; KNL-NEXT:    kmovw %k1, %edx
1890; KNL-NEXT:    kshiftlw $6, %k0, %k1
1891; KNL-NEXT:    kshiftrw $15, %k1, %k1
1892; KNL-NEXT:    kmovw %k1, %esi
1893; KNL-NEXT:    kshiftlw $5, %k0, %k1
1894; KNL-NEXT:    kshiftrw $15, %k1, %k1
1895; KNL-NEXT:    kmovw %k1, %ebp
1896; KNL-NEXT:    kshiftlw $4, %k0, %k1
1897; KNL-NEXT:    kshiftrw $15, %k1, %k1
1898; KNL-NEXT:    kmovw %k1, %ebx
1899; KNL-NEXT:    kshiftlw $3, %k0, %k1
1900; KNL-NEXT:    kshiftrw $15, %k1, %k1
1901; KNL-NEXT:    kmovw %k1, %eax
1902; KNL-NEXT:    kshiftlw $2, %k0, %k1
1903; KNL-NEXT:    kshiftrw $15, %k1, %k1
1904; KNL-NEXT:    kmovw %k1, %ecx
1905; KNL-NEXT:    kshiftlw $1, %k0, %k1
1906; KNL-NEXT:    kshiftrw $15, %k1, %k1
1907; KNL-NEXT:    vmovd %r9d, %xmm0
1908; KNL-NEXT:    kmovw %k1, %r9d
1909; KNL-NEXT:    vpinsrb $1, %r8d, %xmm0, %xmm0
1910; KNL-NEXT:    vpinsrb $2, %r10d, %xmm0, %xmm0
1911; KNL-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
1912; KNL-NEXT:    vpinsrb $4, %r14d, %xmm0, %xmm0
1913; KNL-NEXT:    vpinsrb $5, %r15d, %xmm0, %xmm0
1914; KNL-NEXT:    vpinsrb $6, %r12d, %xmm0, %xmm0
1915; KNL-NEXT:    vpinsrb $7, %r13d, %xmm0, %xmm0
1916; KNL-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
1917; KNL-NEXT:    vpinsrb $9, %esi, %xmm0, %xmm0
1918; KNL-NEXT:    vpinsrb $10, %ebp, %xmm0, %xmm0
1919; KNL-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
1920; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
1921; KNL-NEXT:    kshiftlw $0, %k0, %k0
1922; KNL-NEXT:    kshiftrw $15, %k0, %k0
1923; KNL-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1924; KNL-NEXT:    vpinsrb $14, %r9d, %xmm0, %xmm0
1925; KNL-NEXT:    kmovw %k0, %eax
1926; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1927; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1928; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1929; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1930; KNL-NEXT:    kmovw %k0, (%rdi)
1931; KNL-NEXT:    popq %rbx
1932; KNL-NEXT:    popq %r12
1933; KNL-NEXT:    popq %r13
1934; KNL-NEXT:    popq %r14
1935; KNL-NEXT:    popq %r15
1936; KNL-NEXT:    popq %rbp
1937; KNL-NEXT:    retq
1938;
1939; SKX-LABEL: store_64i1:
1940; SKX:       ## BB#0:
1941; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
1942; SKX-NEXT:    vpmovb2m %zmm0, %k0
1943; SKX-NEXT:    kmovq %k0, (%rdi)
1944; SKX-NEXT:    retq
1945  store <64 x i1> %v, <64 x i1>* %a
1946  ret void
1947}
1948