• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4
5define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
6; KNL-LABEL: zext_8x8mem_to_8x16:
7; KNL:       ## BB#0:
8; KNL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
9; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
10; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
11; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
12; KNL-NEXT:    retq
13;
14; SKX-LABEL: zext_8x8mem_to_8x16:
15; SKX:       ## BB#0:
16; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
17; SKX-NEXT:    vpmovw2m %xmm0, %k1
18; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
19; SKX-NEXT:    retq
20  %a   = load <8 x i8>,<8 x i8> *%i,align 1
21  %x   = zext <8 x i8> %a to <8 x i16>
22  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
23  ret <8 x i16> %ret
24}
25
26define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
27; KNL-LABEL: sext_8x8mem_to_8x16:
28; KNL:       ## BB#0:
29; KNL-NEXT:    vpmovsxbw (%rdi), %xmm1
30; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
31; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
32; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
33; KNL-NEXT:    retq
34;
35; SKX-LABEL: sext_8x8mem_to_8x16:
36; SKX:       ## BB#0:
37; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
38; SKX-NEXT:    vpmovw2m %xmm0, %k1
39; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
40; SKX-NEXT:    retq
41  %a   = load <8 x i8>,<8 x i8> *%i,align 1
42  %x   = sext <8 x i8> %a to <8 x i16>
43  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
44  ret <8 x i16> %ret
45}
46
47
48define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
49; KNL-LABEL: zext_16x8mem_to_16x16:
50; KNL:       ## BB#0:
51; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
52; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
53; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
54; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
55; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
56; KNL-NEXT:    retq
57;
58; SKX-LABEL: zext_16x8mem_to_16x16:
59; SKX:       ## BB#0:
60; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
61; SKX-NEXT:    vpmovb2m %xmm0, %k1
62; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
63; SKX-NEXT:    retq
64  %a   = load <16 x i8>,<16 x i8> *%i,align 1
65  %x   = zext <16 x i8> %a to <16 x i16>
66  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
67  ret <16 x i16> %ret
68}
69
70define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
71; KNL-LABEL: sext_16x8mem_to_16x16:
72; KNL:       ## BB#0:
73; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
74; KNL-NEXT:    vpmovsxbw (%rdi), %ymm1
75; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
76; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
77; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
78; KNL-NEXT:    retq
79;
80; SKX-LABEL: sext_16x8mem_to_16x16:
81; SKX:       ## BB#0:
82; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
83; SKX-NEXT:    vpmovb2m %xmm0, %k1
84; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
85; SKX-NEXT:    retq
86  %a   = load <16 x i8>,<16 x i8> *%i,align 1
87  %x   = sext <16 x i8> %a to <16 x i16>
88  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
89  ret <16 x i16> %ret
90}
91
92define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
93; ALL-LABEL: zext_16x8_to_16x16:
94; ALL:       ## BB#0:
95; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
96; ALL-NEXT:    retq
97  %x   = zext <16 x i8> %a to <16 x i16>
98  ret <16 x i16> %x
99}
100
101define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
102; KNL-LABEL: zext_16x8_to_16x16_mask:
103; KNL:       ## BB#0:
104; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
105; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
106; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
107; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
108; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
109; KNL-NEXT:    retq
110;
111; SKX-LABEL: zext_16x8_to_16x16_mask:
112; SKX:       ## BB#0:
113; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
114; SKX-NEXT:    vpmovb2m %xmm1, %k1
115; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116; SKX-NEXT:    retq
117  %x   = zext <16 x i8> %a to <16 x i16>
118  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
119  ret <16 x i16> %ret
120}
121
122define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
123; ALL-LABEL: sext_16x8_to_16x16:
124; ALL:       ## BB#0:
125; ALL-NEXT:    vpmovsxbw %xmm0, %ymm0
126; ALL-NEXT:    retq
127  %x   = sext <16 x i8> %a to <16 x i16>
128  ret <16 x i16> %x
129}
130
131define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
132; KNL-LABEL: sext_16x8_to_16x16_mask:
133; KNL:       ## BB#0:
134; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
135; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
136; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
137; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
138; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
139; KNL-NEXT:    retq
140;
141; SKX-LABEL: sext_16x8_to_16x16_mask:
142; SKX:       ## BB#0:
143; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
144; SKX-NEXT:    vpmovb2m %xmm1, %k1
145; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
146; SKX-NEXT:    retq
147  %x   = sext <16 x i8> %a to <16 x i16>
148  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
149  ret <16 x i16> %ret
150}
151
152define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
153; KNL-LABEL: zext_32x8mem_to_32x16:
154; KNL:       ## BB#0:
155; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
156; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
157; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
158; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
159; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
160; KNL-NEXT:    vpand %ymm2, %ymm3, %ymm2
161; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
162; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
163; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
164; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
165; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm1
166; KNL-NEXT:    vmovaps %zmm2, %zmm0
167; KNL-NEXT:    retq
168;
169; SKX-LABEL: zext_32x8mem_to_32x16:
170; SKX:       ## BB#0:
171; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
172; SKX-NEXT:    vpmovb2m %ymm0, %k1
173; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
174; SKX-NEXT:    retq
175  %a   = load <32 x i8>,<32 x i8> *%i,align 1
176  %x   = zext <32 x i8> %a to <32 x i16>
177  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
178  ret <32 x i16> %ret
179}
180
181define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
182; KNL-LABEL: sext_32x8mem_to_32x16:
183; KNL:       ## BB#0:
184; KNL-NEXT:    vpmovsxbw 16(%rdi), %ymm1
185; KNL-NEXT:    vpmovsxbw (%rdi), %ymm2
186; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
187; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
188; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
189; KNL-NEXT:    vpand %ymm2, %ymm3, %ymm2
190; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
191; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
192; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
193; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
194; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm1
195; KNL-NEXT:    vmovaps %zmm2, %zmm0
196; KNL-NEXT:    retq
197;
198; SKX-LABEL: sext_32x8mem_to_32x16:
199; SKX:       ## BB#0:
200; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
201; SKX-NEXT:    vpmovb2m %ymm0, %k1
202; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
203; SKX-NEXT:    retq
204  %a   = load <32 x i8>,<32 x i8> *%i,align 1
205  %x   = sext <32 x i8> %a to <32 x i16>
206  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
207  ret <32 x i16> %ret
208}
209
210define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
211; KNL-LABEL: zext_32x8_to_32x16:
212; KNL:       ## BB#0:
213; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
214; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
215; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
216; KNL-NEXT:    vmovaps %zmm2, %zmm0
217; KNL-NEXT:    retq
218;
219; SKX-LABEL: zext_32x8_to_32x16:
220; SKX:       ## BB#0:
221; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
222; SKX-NEXT:    retq
223  %x   = zext <32 x i8> %a to <32 x i16>
224  ret <32 x i16> %x
225}
226
227define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
228; KNL-LABEL: zext_32x8_to_32x16_mask:
229; KNL:       ## BB#0:
230; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
231; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
232; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
233; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
234; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
235; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
236; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
237; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
238; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
239; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
240; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
241; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
242; KNL-NEXT:    retq
243;
244; SKX-LABEL: zext_32x8_to_32x16_mask:
245; SKX:       ## BB#0:
246; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
247; SKX-NEXT:    vpmovb2m %ymm1, %k1
248; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
249; SKX-NEXT:    retq
250  %x   = zext <32 x i8> %a to <32 x i16>
251  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
252  ret <32 x i16> %ret
253}
254
255define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
256; KNL-LABEL: sext_32x8_to_32x16:
257; KNL:       ## BB#0:
258; KNL-NEXT:    vpmovsxbw %xmm0, %ymm2
259; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
260; KNL-NEXT:    vpmovsxbw %xmm0, %ymm1
261; KNL-NEXT:    vmovaps %zmm2, %zmm0
262; KNL-NEXT:    retq
263;
264; SKX-LABEL: sext_32x8_to_32x16:
265; SKX:       ## BB#0:
266; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
267; SKX-NEXT:    retq
268  %x   = sext <32 x i8> %a to <32 x i16>
269  ret <32 x i16> %x
270}
271
272define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
273; KNL-LABEL: sext_32x8_to_32x16_mask:
274; KNL:       ## BB#0:
275; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
276; KNL-NEXT:    vpmovsxbw %xmm2, %ymm2
277; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
278; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
279; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
280; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
281; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
282; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
283; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
284; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
285; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
286; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
287; KNL-NEXT:    retq
288;
289; SKX-LABEL: sext_32x8_to_32x16_mask:
290; SKX:       ## BB#0:
291; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
292; SKX-NEXT:    vpmovb2m %ymm1, %k1
293; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
294; SKX-NEXT:    retq
295  %x   = sext <32 x i8> %a to <32 x i16>
296  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
297  ret <32 x i16> %ret
298}
299
300define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
301; KNL-LABEL: zext_4x8mem_to_4x32:
302; KNL:       ## BB#0:
303; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
304; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
305; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
306; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
307; KNL-NEXT:    retq
308;
309; SKX-LABEL: zext_4x8mem_to_4x32:
310; SKX:       ## BB#0:
311; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
312; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
313; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
314; SKX-NEXT:    retq
315  %a   = load <4 x i8>,<4 x i8> *%i,align 1
316  %x   = zext <4 x i8> %a to <4 x i32>
317  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
318  ret <4 x i32> %ret
319}
320
321define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
322; KNL-LABEL: sext_4x8mem_to_4x32:
323; KNL:       ## BB#0:
324; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
325; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
326; KNL-NEXT:    vpmovsxbd (%rdi), %xmm1
327; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
328; KNL-NEXT:    retq
329;
330; SKX-LABEL: sext_4x8mem_to_4x32:
331; SKX:       ## BB#0:
332; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
333; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
334; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
335; SKX-NEXT:    retq
336  %a   = load <4 x i8>,<4 x i8> *%i,align 1
337  %x   = sext <4 x i8> %a to <4 x i32>
338  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
339  ret <4 x i32> %ret
340}
341
342define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
343; KNL-LABEL: zext_8x8mem_to_8x32:
344; KNL:       ## BB#0:
345; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
346; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
347; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
348; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
349; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
350; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
351; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
352; KNL-NEXT:    retq
353;
354; SKX-LABEL: zext_8x8mem_to_8x32:
355; SKX:       ## BB#0:
356; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
357; SKX-NEXT:    vpmovw2m %xmm0, %k1
358; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
359; SKX-NEXT:    retq
360  %a   = load <8 x i8>,<8 x i8> *%i,align 1
361  %x   = zext <8 x i8> %a to <8 x i32>
362  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
363  ret <8 x i32> %ret
364}
365
366define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
367; KNL-LABEL: sext_8x8mem_to_8x32:
368; KNL:       ## BB#0:
369; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
370; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
371; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
372; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
373; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
374; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
375; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
376; KNL-NEXT:    retq
377;
378; SKX-LABEL: sext_8x8mem_to_8x32:
379; SKX:       ## BB#0:
380; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
381; SKX-NEXT:    vpmovw2m %xmm0, %k1
382; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
383; SKX-NEXT:    retq
384  %a   = load <8 x i8>,<8 x i8> *%i,align 1
385  %x   = sext <8 x i8> %a to <8 x i32>
386  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
387  ret <8 x i32> %ret
388}
389
390define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
391; KNL-LABEL: zext_16x8mem_to_16x32:
392; KNL:       ## BB#0:
393; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
394; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
395; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
396; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
397; KNL-NEXT:    retq
398;
399; SKX-LABEL: zext_16x8mem_to_16x32:
400; SKX:       ## BB#0:
401; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
402; SKX-NEXT:    vpmovb2m %xmm0, %k1
403; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
404; SKX-NEXT:    retq
405  %a   = load <16 x i8>,<16 x i8> *%i,align 1
406  %x   = zext <16 x i8> %a to <16 x i32>
407  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
408  ret <16 x i32> %ret
409}
410
411define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
412; KNL-LABEL: sext_16x8mem_to_16x32:
413; KNL:       ## BB#0:
414; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
415; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
416; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
417; KNL-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
418; KNL-NEXT:    retq
419;
420; SKX-LABEL: sext_16x8mem_to_16x32:
421; SKX:       ## BB#0:
422; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
423; SKX-NEXT:    vpmovb2m %xmm0, %k1
424; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
425; SKX-NEXT:    retq
426  %a   = load <16 x i8>,<16 x i8> *%i,align 1
427  %x   = sext <16 x i8> %a to <16 x i32>
428  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
429  ret <16 x i32> %ret
430}
431
432define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
433; KNL-LABEL: zext_16x8_to_16x32_mask:
434; KNL:       ## BB#0:
435; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
436; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
437; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
438; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
439; KNL-NEXT:    retq
440;
441; SKX-LABEL: zext_16x8_to_16x32_mask:
442; SKX:       ## BB#0:
443; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
444; SKX-NEXT:    vpmovb2m %xmm1, %k1
445; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
446; SKX-NEXT:    retq
447  %x   = zext <16 x i8> %a to <16 x i32>
448  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
449  ret <16 x i32> %ret
450}
451
452define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
453; KNL-LABEL: sext_16x8_to_16x32_mask:
454; KNL:       ## BB#0:
455; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
456; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
457; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
458; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
459; KNL-NEXT:    retq
460;
461; SKX-LABEL: sext_16x8_to_16x32_mask:
462; SKX:       ## BB#0:
463; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
464; SKX-NEXT:    vpmovb2m %xmm1, %k1
465; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
466; SKX-NEXT:    retq
467  %x   = sext <16 x i8> %a to <16 x i32>
468  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
469  ret <16 x i32> %ret
470}
471
472define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
473; ALL-LABEL: zext_16x8_to_16x32:
474; ALL:       ## BB#0:
475; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
476; ALL-NEXT:    retq
477  %x = zext <16 x i8> %i to <16 x i32>
478  ret <16 x i32> %x
479}
480
481define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
482; ALL-LABEL: sext_16x8_to_16x32:
483; ALL:       ## BB#0:
484; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
485; ALL-NEXT:    retq
486  %x = sext <16 x i8> %i to <16 x i32>
487  ret <16 x i32> %x
488}
489
490define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
491; KNL-LABEL: zext_2x8mem_to_2x64:
492; KNL:       ## BB#0:
493; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
494; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
495; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
496; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
497; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
498; KNL-NEXT:    retq
499;
500; SKX-LABEL: zext_2x8mem_to_2x64:
501; SKX:       ## BB#0:
502; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
503; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
504; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
505; SKX-NEXT:    retq
506  %a   = load <2 x i8>,<2 x i8> *%i,align 1
507  %x   = zext <2 x i8> %a to <2 x i64>
508  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
509  ret <2 x i64> %ret
510}
511define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
512; KNL-LABEL: sext_2x8mem_to_2x64mask:
513; KNL:       ## BB#0:
514; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
515; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
516; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
517; KNL-NEXT:    vpmovsxbq (%rdi), %xmm1
518; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
519; KNL-NEXT:    retq
520;
521; SKX-LABEL: sext_2x8mem_to_2x64mask:
522; SKX:       ## BB#0:
523; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
524; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
525; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
526; SKX-NEXT:    retq
527  %a   = load <2 x i8>,<2 x i8> *%i,align 1
528  %x   = sext <2 x i8> %a to <2 x i64>
529  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
530  ret <2 x i64> %ret
531}
532define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
533; ALL-LABEL: sext_2x8mem_to_2x64:
534; ALL:       ## BB#0:
535; ALL-NEXT:    vpmovsxbq (%rdi), %xmm0
536; ALL-NEXT:    retq
537  %a   = load <2 x i8>,<2 x i8> *%i,align 1
538  %x   = sext <2 x i8> %a to <2 x i64>
539  ret <2 x i64> %x
540}
541
542define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
543; KNL-LABEL: zext_4x8mem_to_4x64:
544; KNL:       ## BB#0:
545; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
546; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
547; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
548; KNL-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
549; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
550; KNL-NEXT:    retq
551;
552; SKX-LABEL: zext_4x8mem_to_4x64:
553; SKX:       ## BB#0:
554; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
555; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
556; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
557; SKX-NEXT:    retq
558  %a   = load <4 x i8>,<4 x i8> *%i,align 1
559  %x   = zext <4 x i8> %a to <4 x i64>
560  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
561  ret <4 x i64> %ret
562}
563
564define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
565; KNL-LABEL: sext_4x8mem_to_4x64mask:
566; KNL:       ## BB#0:
567; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
568; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
569; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
570; KNL-NEXT:    vpmovsxbq (%rdi), %ymm1
571; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
572; KNL-NEXT:    retq
573;
574; SKX-LABEL: sext_4x8mem_to_4x64mask:
575; SKX:       ## BB#0:
576; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
577; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
578; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
579; SKX-NEXT:    retq
580  %a   = load <4 x i8>,<4 x i8> *%i,align 1
581  %x   = sext <4 x i8> %a to <4 x i64>
582  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
583  ret <4 x i64> %ret
584}
585
586define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
587; ALL-LABEL: sext_4x8mem_to_4x64:
588; ALL:       ## BB#0:
589; ALL-NEXT:    vpmovsxbq (%rdi), %ymm0
590; ALL-NEXT:    retq
591  %a   = load <4 x i8>,<4 x i8> *%i,align 1
592  %x   = sext <4 x i8> %a to <4 x i64>
593  ret <4 x i64> %x
594}
595
596define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
597; KNL-LABEL: zext_8x8mem_to_8x64:
598; KNL:       ## BB#0:
599; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
600; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
601; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
602; KNL-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
603; KNL-NEXT:    retq
604;
605; SKX-LABEL: zext_8x8mem_to_8x64:
606; SKX:       ## BB#0:
607; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
608; SKX-NEXT:    vpmovw2m %xmm0, %k1
609; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
610; SKX-NEXT:    retq
611  %a   = load <8 x i8>,<8 x i8> *%i,align 1
612  %x   = zext <8 x i8> %a to <8 x i64>
613  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
614  ret <8 x i64> %ret
615}
616
617define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
618; KNL-LABEL: sext_8x8mem_to_8x64mask:
619; KNL:       ## BB#0:
620; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
621; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
622; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
623; KNL-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
624; KNL-NEXT:    retq
625;
626; SKX-LABEL: sext_8x8mem_to_8x64mask:
627; SKX:       ## BB#0:
628; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
629; SKX-NEXT:    vpmovw2m %xmm0, %k1
630; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
631; SKX-NEXT:    retq
632  %a   = load <8 x i8>,<8 x i8> *%i,align 1
633  %x   = sext <8 x i8> %a to <8 x i64>
634  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
635  ret <8 x i64> %ret
636}
637
638define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
639; ALL-LABEL: sext_8x8mem_to_8x64:
640; ALL:       ## BB#0:
641; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
642; ALL-NEXT:    retq
643  %a   = load <8 x i8>,<8 x i8> *%i,align 1
644  %x   = sext <8 x i8> %a to <8 x i64>
645  ret <8 x i64> %x
646}
647
648define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
649; KNL-LABEL: zext_4x16mem_to_4x32:
650; KNL:       ## BB#0:
651; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
652; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
653; KNL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
654; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
655; KNL-NEXT:    retq
656;
657; SKX-LABEL: zext_4x16mem_to_4x32:
658; SKX:       ## BB#0:
659; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
660; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
661; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
662; SKX-NEXT:    retq
663  %a   = load <4 x i16>,<4 x i16> *%i,align 1
664  %x   = zext <4 x i16> %a to <4 x i32>
665  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
666  ret <4 x i32> %ret
667}
668
669define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
670; KNL-LABEL: sext_4x16mem_to_4x32mask:
671; KNL:       ## BB#0:
672; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
673; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
674; KNL-NEXT:    vpmovsxwd (%rdi), %xmm1
675; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
676; KNL-NEXT:    retq
677;
678; SKX-LABEL: sext_4x16mem_to_4x32mask:
679; SKX:       ## BB#0:
680; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
681; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
682; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
683; SKX-NEXT:    retq
684  %a   = load <4 x i16>,<4 x i16> *%i,align 1
685  %x   = sext <4 x i16> %a to <4 x i32>
686  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
687  ret <4 x i32> %ret
688}
689
690define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
691; ALL-LABEL: sext_4x16mem_to_4x32:
692; ALL:       ## BB#0:
693; ALL-NEXT:    vpmovsxwd (%rdi), %xmm0
694; ALL-NEXT:    retq
695  %a   = load <4 x i16>,<4 x i16> *%i,align 1
696  %x   = sext <4 x i16> %a to <4 x i32>
697  ret <4 x i32> %x
698}
699
700
701define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
702; KNL-LABEL: zext_8x16mem_to_8x32:
703; KNL:       ## BB#0:
704; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
705; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
706; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
707; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
708; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
709; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
710; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
711; KNL-NEXT:    retq
712;
713; SKX-LABEL: zext_8x16mem_to_8x32:
714; SKX:       ## BB#0:
715; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
716; SKX-NEXT:    vpmovw2m %xmm0, %k1
717; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
718; SKX-NEXT:    retq
719  %a   = load <8 x i16>,<8 x i16> *%i,align 1
720  %x   = zext <8 x i16> %a to <8 x i32>
721  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
722  ret <8 x i32> %ret
723}
724
725define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
726; KNL-LABEL: sext_8x16mem_to_8x32mask:
727; KNL:       ## BB#0:
728; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
729; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
730; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
731; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
732; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
733; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
734; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
735; KNL-NEXT:    retq
736;
737; SKX-LABEL: sext_8x16mem_to_8x32mask:
738; SKX:       ## BB#0:
739; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
740; SKX-NEXT:    vpmovw2m %xmm0, %k1
741; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
742; SKX-NEXT:    retq
743  %a   = load <8 x i16>,<8 x i16> *%i,align 1
744  %x   = sext <8 x i16> %a to <8 x i32>
745  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
746  ret <8 x i32> %ret
747}
748
749define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
750; ALL-LABEL: sext_8x16mem_to_8x32:
751; ALL:       ## BB#0:
752; ALL-NEXT:    vpmovsxwd (%rdi), %ymm0
753; ALL-NEXT:    retq
754  %a   = load <8 x i16>,<8 x i16> *%i,align 1
755  %x   = sext <8 x i16> %a to <8 x i32>
756  ret <8 x i32> %x
757}
758
759define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
760; KNL-LABEL: zext_8x16_to_8x32mask:
761; KNL:       ## BB#0:
762; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
763; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
764; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
765; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
766; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
767; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
768; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
769; KNL-NEXT:    retq
770;
771; SKX-LABEL: zext_8x16_to_8x32mask:
772; SKX:       ## BB#0:
773; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
774; SKX-NEXT:    vpmovw2m %xmm1, %k1
775; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
776; SKX-NEXT:    retq
777  %x   = zext <8 x i16> %a to <8 x i32>
778  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
779  ret <8 x i32> %ret
780}
781
782define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
783; ALL-LABEL: zext_8x16_to_8x32:
784; ALL:       ## BB#0:
785; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
786; ALL-NEXT:    retq
787  %x   = zext <8 x i16> %a to <8 x i32>
788  ret <8 x i32> %x
789}
790
791define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
792; KNL-LABEL: zext_16x16mem_to_16x32:
793; KNL:       ## BB#0:
794; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
795; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
796; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
797; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
798; KNL-NEXT:    retq
799;
800; SKX-LABEL: zext_16x16mem_to_16x32:
801; SKX:       ## BB#0:
802; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
803; SKX-NEXT:    vpmovb2m %xmm0, %k1
804; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
805; SKX-NEXT:    retq
806  %a   = load <16 x i16>,<16 x i16> *%i,align 1
807  %x   = zext <16 x i16> %a to <16 x i32>
808  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
809  ret <16 x i32> %ret
810}
811
812define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
813; KNL-LABEL: sext_16x16mem_to_16x32mask:
814; KNL:       ## BB#0:
815; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
816; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
817; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
818; KNL-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
819; KNL-NEXT:    retq
820;
821; SKX-LABEL: sext_16x16mem_to_16x32mask:
822; SKX:       ## BB#0:
823; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
824; SKX-NEXT:    vpmovb2m %xmm0, %k1
825; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
826; SKX-NEXT:    retq
827  %a   = load <16 x i16>,<16 x i16> *%i,align 1
828  %x   = sext <16 x i16> %a to <16 x i32>
829  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
830  ret <16 x i32> %ret
831}
832
833define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
834; ALL-LABEL: sext_16x16mem_to_16x32:
835; ALL:       ## BB#0:
836; ALL-NEXT:    vpmovsxwd (%rdi), %zmm0
837; ALL-NEXT:    retq
838  %a   = load <16 x i16>,<16 x i16> *%i,align 1
839  %x   = sext <16 x i16> %a to <16 x i32>
840  ret <16 x i32> %x
841}
842define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
843; KNL-LABEL: zext_16x16_to_16x32mask:
844; KNL:       ## BB#0:
845; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
846; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
847; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
848; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
849; KNL-NEXT:    retq
850;
851; SKX-LABEL: zext_16x16_to_16x32mask:
852; SKX:       ## BB#0:
853; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
854; SKX-NEXT:    vpmovb2m %xmm1, %k1
855; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
856; SKX-NEXT:    retq
857  %x   = zext <16 x i16> %a to <16 x i32>
858  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
859  ret <16 x i32> %ret
860}
861
862define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
863; ALL-LABEL: zext_16x16_to_16x32:
864; ALL:       ## BB#0:
865; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
866; ALL-NEXT:    retq
867  %x   = zext <16 x i16> %a to <16 x i32>
868  ret <16 x i32> %x
869}
870
871define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
872; KNL-LABEL: zext_2x16mem_to_2x64:
873; KNL:       ## BB#0:
874; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
875; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
876; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
877; KNL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
878; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
879; KNL-NEXT:    retq
880;
881; SKX-LABEL: zext_2x16mem_to_2x64:
882; SKX:       ## BB#0:
883; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
884; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
885; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
886; SKX-NEXT:    retq
887  %a   = load <2 x i16>,<2 x i16> *%i,align 1
888  %x   = zext <2 x i16> %a to <2 x i64>
889  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
890  ret <2 x i64> %ret
891}
892
893define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
894; KNL-LABEL: sext_2x16mem_to_2x64mask:
895; KNL:       ## BB#0:
896; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
897; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
898; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
899; KNL-NEXT:    vpmovsxwq (%rdi), %xmm1
900; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
901; KNL-NEXT:    retq
902;
903; SKX-LABEL: sext_2x16mem_to_2x64mask:
904; SKX:       ## BB#0:
905; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
906; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
907; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
908; SKX-NEXT:    retq
909  %a   = load <2 x i16>,<2 x i16> *%i,align 1
910  %x   = sext <2 x i16> %a to <2 x i64>
911  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
912  ret <2 x i64> %ret
913}
914
915define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
916; ALL-LABEL: sext_2x16mem_to_2x64:
917; ALL:       ## BB#0:
918; ALL-NEXT:    vpmovsxwq (%rdi), %xmm0
919; ALL-NEXT:    retq
920  %a   = load <2 x i16>,<2 x i16> *%i,align 1
921  %x   = sext <2 x i16> %a to <2 x i64>
922  ret <2 x i64> %x
923}
924
925define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
926; KNL-LABEL: zext_4x16mem_to_4x64:
927; KNL:       ## BB#0:
928; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
929; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
930; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
931; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
932; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
933; KNL-NEXT:    retq
934;
935; SKX-LABEL: zext_4x16mem_to_4x64:
936; SKX:       ## BB#0:
937; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
938; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
939; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
940; SKX-NEXT:    retq
941  %a   = load <4 x i16>,<4 x i16> *%i,align 1
942  %x   = zext <4 x i16> %a to <4 x i64>
943  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
944  ret <4 x i64> %ret
945}
946
947define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
948; KNL-LABEL: sext_4x16mem_to_4x64mask:
949; KNL:       ## BB#0:
950; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
951; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
952; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
953; KNL-NEXT:    vpmovsxwq (%rdi), %ymm1
954; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
955; KNL-NEXT:    retq
956;
957; SKX-LABEL: sext_4x16mem_to_4x64mask:
958; SKX:       ## BB#0:
959; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
960; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
961; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
962; SKX-NEXT:    retq
963  %a   = load <4 x i16>,<4 x i16> *%i,align 1
964  %x   = sext <4 x i16> %a to <4 x i64>
965  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
966  ret <4 x i64> %ret
967}
968
969define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
970; ALL-LABEL: sext_4x16mem_to_4x64:
971; ALL:       ## BB#0:
972; ALL-NEXT:    vpmovsxwq (%rdi), %ymm0
973; ALL-NEXT:    retq
974  %a   = load <4 x i16>,<4 x i16> *%i,align 1
975  %x   = sext <4 x i16> %a to <4 x i64>
976  ret <4 x i64> %x
977}
978
979define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
980; KNL-LABEL: zext_8x16mem_to_8x64:
981; KNL:       ## BB#0:
982; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
983; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
984; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
985; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
986; KNL-NEXT:    retq
987;
988; SKX-LABEL: zext_8x16mem_to_8x64:
989; SKX:       ## BB#0:
990; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
991; SKX-NEXT:    vpmovw2m %xmm0, %k1
992; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
993; SKX-NEXT:    retq
994  %a   = load <8 x i16>,<8 x i16> *%i,align 1
995  %x   = zext <8 x i16> %a to <8 x i64>
996  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
997  ret <8 x i64> %ret
998}
999
1000define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1001; KNL-LABEL: sext_8x16mem_to_8x64mask:
1002; KNL:       ## BB#0:
1003; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1004; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1005; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1006; KNL-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1007; KNL-NEXT:    retq
1008;
1009; SKX-LABEL: sext_8x16mem_to_8x64mask:
1010; SKX:       ## BB#0:
1011; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1012; SKX-NEXT:    vpmovw2m %xmm0, %k1
1013; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1014; SKX-NEXT:    retq
1015  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1016  %x   = sext <8 x i16> %a to <8 x i64>
1017  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1018  ret <8 x i64> %ret
1019}
1020
1021define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
1022; ALL-LABEL: sext_8x16mem_to_8x64:
1023; ALL:       ## BB#0:
1024; ALL-NEXT:    vpmovsxwq (%rdi), %zmm0
1025; ALL-NEXT:    retq
1026  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1027  %x   = sext <8 x i16> %a to <8 x i64>
1028  ret <8 x i64> %x
1029}
1030
1031define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1032; KNL-LABEL: zext_8x16_to_8x64mask:
1033; KNL:       ## BB#0:
1034; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
1035; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
1036; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
1037; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1038; KNL-NEXT:    retq
1039;
1040; SKX-LABEL: zext_8x16_to_8x64mask:
1041; SKX:       ## BB#0:
1042; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
1043; SKX-NEXT:    vpmovw2m %xmm1, %k1
1044; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1045; SKX-NEXT:    retq
1046  %x   = zext <8 x i16> %a to <8 x i64>
1047  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1048  ret <8 x i64> %ret
1049}
1050
1051define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1052; ALL-LABEL: zext_8x16_to_8x64:
1053; ALL:       ## BB#0:
1054; ALL-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1055; ALL-NEXT:    retq
1056  %ret   = zext <8 x i16> %a to <8 x i64>
1057  ret <8 x i64> %ret
1058}
1059
1060define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1061; KNL-LABEL: zext_2x32mem_to_2x64:
1062; KNL:       ## BB#0:
1063; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1064; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
1065; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1066; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1067; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1068; KNL-NEXT:    retq
1069;
1070; SKX-LABEL: zext_2x32mem_to_2x64:
1071; SKX:       ## BB#0:
1072; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
1073; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
1074; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1075; SKX-NEXT:    retq
1076  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1077  %x   = zext <2 x i32> %a to <2 x i64>
1078  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1079  ret <2 x i64> %ret
1080}
1081
1082define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1083; KNL-LABEL: sext_2x32mem_to_2x64mask:
1084; KNL:       ## BB#0:
1085; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1086; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
1087; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1088; KNL-NEXT:    vpmovsxdq (%rdi), %xmm1
1089; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1090; KNL-NEXT:    retq
1091;
1092; SKX-LABEL: sext_2x32mem_to_2x64mask:
1093; SKX:       ## BB#0:
1094; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
1095; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
1096; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1097; SKX-NEXT:    retq
1098  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1099  %x   = sext <2 x i32> %a to <2 x i64>
1100  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1101  ret <2 x i64> %ret
1102}
1103
1104define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
1105; ALL-LABEL: sext_2x32mem_to_2x64:
1106; ALL:       ## BB#0:
1107; ALL-NEXT:    vpmovsxdq (%rdi), %xmm0
1108; ALL-NEXT:    retq
1109  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1110  %x   = sext <2 x i32> %a to <2 x i64>
1111  ret <2 x i64> %x
1112}
1113
1114define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1115; KNL-LABEL: zext_4x32mem_to_4x64:
1116; KNL:       ## BB#0:
1117; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1118; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
1119; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
1120; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1121; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
1122; KNL-NEXT:    retq
1123;
1124; SKX-LABEL: zext_4x32mem_to_4x64:
1125; SKX:       ## BB#0:
1126; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1127; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
1128; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1129; SKX-NEXT:    retq
1130  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1131  %x   = zext <4 x i32> %a to <4 x i64>
1132  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1133  ret <4 x i64> %ret
1134}
1135
1136define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1137; KNL-LABEL: sext_4x32mem_to_4x64mask:
1138; KNL:       ## BB#0:
1139; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1140; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
1141; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
1142; KNL-NEXT:    vpmovsxdq (%rdi), %ymm1
1143; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
1144; KNL-NEXT:    retq
1145;
1146; SKX-LABEL: sext_4x32mem_to_4x64mask:
1147; SKX:       ## BB#0:
1148; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1149; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
1150; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1151; SKX-NEXT:    retq
1152  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1153  %x   = sext <4 x i32> %a to <4 x i64>
1154  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1155  ret <4 x i64> %ret
1156}
1157
1158define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
1159; ALL-LABEL: sext_4x32mem_to_4x64:
1160; ALL:       ## BB#0:
1161; ALL-NEXT:    vpmovsxdq (%rdi), %ymm0
1162; ALL-NEXT:    retq
1163  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1164  %x   = sext <4 x i32> %a to <4 x i64>
1165  ret <4 x i64> %x
1166}
1167
1168define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1169; ALL-LABEL: sext_4x32_to_4x64:
1170; ALL:       ## BB#0:
1171; ALL-NEXT:    vpmovsxdq %xmm0, %ymm0
1172; ALL-NEXT:    retq
1173  %x   = sext <4 x i32> %a to <4 x i64>
1174  ret <4 x i64> %x
1175}
1176
1177define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1178; KNL-LABEL: zext_4x32_to_4x64mask:
1179; KNL:       ## BB#0:
1180; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
1181; KNL-NEXT:    vpsrad $31, %xmm1, %xmm1
1182; KNL-NEXT:    vpmovsxdq %xmm1, %ymm1
1183; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1184; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
1185; KNL-NEXT:    retq
1186;
1187; SKX-LABEL: zext_4x32_to_4x64mask:
1188; SKX:       ## BB#0:
1189; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
1190; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
1191; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1192; SKX-NEXT:    retq
1193  %x   = zext <4 x i32> %a to <4 x i64>
1194  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1195  ret <4 x i64> %ret
1196}
1197
1198define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1199; KNL-LABEL: zext_8x32mem_to_8x64:
1200; KNL:       ## BB#0:
1201; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1202; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1203; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1204; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1205; KNL-NEXT:    retq
1206;
1207; SKX-LABEL: zext_8x32mem_to_8x64:
1208; SKX:       ## BB#0:
1209; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1210; SKX-NEXT:    vpmovw2m %xmm0, %k1
1211; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1212; SKX-NEXT:    retq
1213  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1214  %x   = zext <8 x i32> %a to <8 x i64>
1215  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1216  ret <8 x i64> %ret
1217}
1218
1219define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1220; KNL-LABEL: sext_8x32mem_to_8x64mask:
1221; KNL:       ## BB#0:
1222; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1223; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1224; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1225; KNL-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1226; KNL-NEXT:    retq
1227;
1228; SKX-LABEL: sext_8x32mem_to_8x64mask:
1229; SKX:       ## BB#0:
1230; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1231; SKX-NEXT:    vpmovw2m %xmm0, %k1
1232; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1233; SKX-NEXT:    retq
1234  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1235  %x   = sext <8 x i32> %a to <8 x i64>
1236  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1237  ret <8 x i64> %ret
1238}
1239
1240define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
1241; ALL-LABEL: sext_8x32mem_to_8x64:
1242; ALL:       ## BB#0:
1243; ALL-NEXT:    vpmovsxdq (%rdi), %zmm0
1244; ALL-NEXT:    retq
1245  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1246  %x   = sext <8 x i32> %a to <8 x i64>
1247  ret <8 x i64> %x
1248}
1249
1250define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1251; ALL-LABEL: sext_8x32_to_8x64:
1252; ALL:       ## BB#0:
1253; ALL-NEXT:    vpmovsxdq %ymm0, %zmm0
1254; ALL-NEXT:    retq
1255  %x   = sext <8 x i32> %a to <8 x i64>
1256  ret <8 x i64> %x
1257}
1258
1259define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1260; KNL-LABEL: zext_8x32_to_8x64mask:
1261; KNL:       ## BB#0:
1262; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
1263; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
1264; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
1265; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1266; KNL-NEXT:    retq
1267;
1268; SKX-LABEL: zext_8x32_to_8x64mask:
1269; SKX:       ## BB#0:
1270; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
1271; SKX-NEXT:    vpmovw2m %xmm1, %k1
1272; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1273; SKX-NEXT:    retq
1274  %x   = zext <8 x i32> %a to <8 x i64>
1275  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1276  ret <8 x i64> %ret
1277}
1278define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1279; ALL-LABEL: fptrunc_test:
1280; ALL:       ## BB#0:
1281; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
1282; ALL-NEXT:    retq
1283  %b = fptrunc <8 x double> %a to <8 x float>
1284  ret <8 x float> %b
1285}
1286
1287define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1288; ALL-LABEL: fpext_test:
1289; ALL:       ## BB#0:
1290; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
1291; ALL-NEXT:    retq
1292  %b = fpext <8 x float> %a to <8 x double>
1293  ret <8 x double> %b
1294}
1295
1296define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1297; ALL-LABEL: zext_16i1_to_16xi32:
1298; ALL:       ## BB#0:
1299; ALL-NEXT:    kmovw %edi, %k1
1300; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1301; ALL-NEXT:    retq
1302  %a = bitcast i16 %b to <16 x i1>
1303  %c = zext <16 x i1> %a to <16 x i32>
1304  ret <16 x i32> %c
1305}
1306
1307define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1308; KNL-LABEL: zext_8i1_to_8xi64:
1309; KNL:       ## BB#0:
1310; KNL-NEXT:    kmovw %edi, %k1
1311; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1312; KNL-NEXT:    retq
1313;
1314; SKX-LABEL: zext_8i1_to_8xi64:
1315; SKX:       ## BB#0:
1316; SKX-NEXT:    kmovb %edi, %k1
1317; SKX-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1318; SKX-NEXT:    retq
1319  %a = bitcast i8 %b to <8 x i1>
1320  %c = zext <8 x i1> %a to <8 x i64>
1321  ret <8 x i64> %c
1322}
1323
1324define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1325; KNL-LABEL: trunc_16i8_to_16i1:
1326; KNL:       ## BB#0:
1327; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1328; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1329; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1330; KNL-NEXT:    kmovw %k0, %eax
1331; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
1332; KNL-NEXT:    retq
1333;
1334; SKX-LABEL: trunc_16i8_to_16i1:
1335; SKX:       ## BB#0:
1336; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1337; SKX-NEXT:    vpmovb2m %xmm0, %k0
1338; SKX-NEXT:    kmovw %k0, %eax
1339; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
1340; SKX-NEXT:    retq
1341  %mask_b = trunc <16 x i8>%a to <16 x i1>
1342  %mask = bitcast <16 x i1> %mask_b to i16
1343  ret i16 %mask
1344}
1345
1346define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1347; ALL-LABEL: trunc_16i32_to_16i1:
1348; ALL:       ## BB#0:
1349; ALL-NEXT:    vpslld $31, %zmm0, %zmm0
1350; ALL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1351; ALL-NEXT:    kmovw %k0, %eax
1352; ALL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
1353; ALL-NEXT:    retq
1354  %mask_b = trunc <16 x i32>%a to <16 x i1>
1355  %mask = bitcast <16 x i1> %mask_b to i16
1356  ret i16 %mask
1357}
1358
1359define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1360; KNL-LABEL: trunc_4i32_to_4i1:
1361; KNL:       ## BB#0:
1362; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1363; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1364; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
1365; KNL-NEXT:    retq
1366;
1367; SKX-LABEL: trunc_4i32_to_4i1:
1368; SKX:       ## BB#0:
1369; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1370; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
1371; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
1372; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 {%k1}
1373; SKX-NEXT:    vpmovm2d %k0, %xmm0
1374; SKX-NEXT:    retq
1375  %mask_a = trunc <4 x i32>%a to <4 x i1>
1376  %mask_b = trunc <4 x i32>%b to <4 x i1>
1377  %a_and_b = and <4 x i1>%mask_a, %mask_b
1378  %res = sext <4 x i1>%a_and_b to <4 x i32>
1379  ret <4 x i32>%res
1380}
1381
1382
1383define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1384; KNL-LABEL: trunc_8i16_to_8i1:
1385; KNL:       ## BB#0:
1386; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1387; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1388; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
1389; KNL-NEXT:    kmovw %k0, %eax
1390; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1391; KNL-NEXT:    retq
1392;
1393; SKX-LABEL: trunc_8i16_to_8i1:
1394; SKX:       ## BB#0:
1395; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1396; SKX-NEXT:    vpmovw2m %xmm0, %k0
1397; SKX-NEXT:    kmovb %k0, %eax
1398; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1399; SKX-NEXT:    retq
1400  %mask_b = trunc <8 x i16>%a to <8 x i1>
1401  %mask = bitcast <8 x i1> %mask_b to i8
1402  ret i8 %mask
1403}
1404
1405define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1406; KNL-LABEL: sext_8i1_8i32:
1407; KNL:       ## BB#0:
1408; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
1409; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1410; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
1411; KNL-NEXT:    knotw %k0, %k1
1412; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
1413; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1414; KNL-NEXT:    vpmovqd %zmm0, %ymm0
1415; KNL-NEXT:    retq
1416;
1417; SKX-LABEL: sext_8i1_8i32:
1418; SKX:       ## BB#0:
1419; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1420; SKX-NEXT:    knotb %k0, %k0
1421; SKX-NEXT:    vpmovm2d %k0, %ymm0
1422; SKX-NEXT:    retq
1423  %x = icmp slt <8 x i32> %a1, %a2
1424  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1425  %y = sext <8 x i1> %x1 to <8 x i32>
1426  ret <8 x i32> %y
1427}
1428
1429
1430define i16 @trunc_i32_to_i1(i32 %a) {
1431; ALL-LABEL: trunc_i32_to_i1:
1432; ALL:       ## BB#0:
1433; ALL-NEXT:    andl $1, %edi
1434; ALL-NEXT:    kmovw %edi, %k0
1435; ALL-NEXT:    movw $-4, %ax
1436; ALL-NEXT:    kmovw %eax, %k1
1437; ALL-NEXT:    korw %k0, %k1, %k0
1438; ALL-NEXT:    kmovw %k0, %eax
1439; ALL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
1440; ALL-NEXT:    retq
1441  %a_i = trunc i32 %a to i1
1442  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1443  %res = bitcast <16 x i1> %maskv to i16
1444  ret i16 %res
1445}
1446
1447define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1448; KNL-LABEL: sext_8i1_8i16:
1449; KNL:       ## BB#0:
1450; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1451; KNL-NEXT:    vpmovdw %zmm0, %ymm0
1452; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1453; KNL-NEXT:    retq
1454;
1455; SKX-LABEL: sext_8i1_8i16:
1456; SKX:       ## BB#0:
1457; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1458; SKX-NEXT:    vpmovm2w %k0, %xmm0
1459; SKX-NEXT:    retq
1460  %x = icmp slt <8 x i32> %a1, %a2
1461  %y = sext <8 x i1> %x to <8 x i16>
1462  ret <8 x i16> %y
1463}
1464
1465define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1466; KNL-LABEL: sext_16i1_16i32:
1467; KNL:       ## BB#0:
1468; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1469; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
1470; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
1471; KNL-NEXT:    retq
1472;
1473; SKX-LABEL: sext_16i1_16i32:
1474; SKX:       ## BB#0:
1475; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
1476; SKX-NEXT:    vpmovm2d %k0, %zmm0
1477; SKX-NEXT:    retq
1478  %x = icmp slt <16 x i32> %a1, %a2
1479  %y = sext <16 x i1> %x to <16 x i32>
1480  ret <16 x i32> %y
1481}
1482
1483define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1484; KNL-LABEL: sext_8i1_8i64:
1485; KNL:       ## BB#0:
1486; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1487; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
1488; KNL-NEXT:    retq
1489;
1490; SKX-LABEL: sext_8i1_8i64:
1491; SKX:       ## BB#0:
1492; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1493; SKX-NEXT:    vpmovm2q %k0, %zmm0
1494; SKX-NEXT:    retq
1495  %x = icmp slt <8 x i32> %a1, %a2
1496  %y = sext <8 x i1> %x to <8 x i64>
1497  ret <8 x i64> %y
1498}
1499
1500define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
1501; ALL-LABEL: extload_v8i64:
1502; ALL:       ## BB#0:
1503; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
1504; ALL-NEXT:    vmovdqa64 %zmm0, (%rsi)
1505; ALL-NEXT:    retq
1506  %sign_load = load <8 x i8>, <8 x i8>* %a
1507  %c = sext <8 x i8> %sign_load to <8 x i64>
1508  store <8 x i64> %c, <8 x i64>* %res
1509  ret void
1510}
1511
1512define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1513; KNL-LABEL: test21:
1514; KNL:       ## BB#0:
1515; KNL-NEXT:    pushq %rbp
1516; KNL-NEXT:    pushq %r15
1517; KNL-NEXT:    pushq %r14
1518; KNL-NEXT:    pushq %r13
1519; KNL-NEXT:    pushq %r12
1520; KNL-NEXT:    pushq %rbx
1521; KNL-NEXT:    vpmovsxbd %xmm7, %zmm7
1522; KNL-NEXT:    vpslld $31, %zmm7, %zmm7
1523; KNL-NEXT:    vpmovsxbd %xmm6, %zmm6
1524; KNL-NEXT:    vpslld $31, %zmm6, %zmm6
1525; KNL-NEXT:    vpmovsxbd %xmm5, %zmm5
1526; KNL-NEXT:    vpslld $31, %zmm5, %zmm5
1527; KNL-NEXT:    vpmovsxbd %xmm4, %zmm4
1528; KNL-NEXT:    vpslld $31, %zmm4, %zmm4
1529; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0
1530; KNL-NEXT:    kshiftlw $14, %k0, %k1
1531; KNL-NEXT:    kshiftrw $15, %k1, %k1
1532; KNL-NEXT:    kmovw %k1, %ecx
1533; KNL-NEXT:    kshiftlw $15, %k0, %k1
1534; KNL-NEXT:    kshiftrw $15, %k1, %k1
1535; KNL-NEXT:    kmovw %k1, %r15d
1536; KNL-NEXT:    kshiftlw $13, %k0, %k1
1537; KNL-NEXT:    kshiftrw $15, %k1, %k1
1538; KNL-NEXT:    kmovw %k1, %r12d
1539; KNL-NEXT:    kshiftlw $12, %k0, %k1
1540; KNL-NEXT:    kshiftrw $15, %k1, %k1
1541; KNL-NEXT:    kmovw %k1, %edx
1542; KNL-NEXT:    kshiftlw $11, %k0, %k1
1543; KNL-NEXT:    kshiftrw $15, %k1, %k1
1544; KNL-NEXT:    kmovw %k1, %r13d
1545; KNL-NEXT:    kshiftlw $10, %k0, %k1
1546; KNL-NEXT:    kshiftrw $15, %k1, %k1
1547; KNL-NEXT:    kmovw %k1, %eax
1548; KNL-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
1549; KNL-NEXT:    kshiftlw $9, %k0, %k1
1550; KNL-NEXT:    kshiftrw $15, %k1, %k1
1551; KNL-NEXT:    kmovw %k1, %esi
1552; KNL-NEXT:    kshiftlw $8, %k0, %k1
1553; KNL-NEXT:    kshiftrw $15, %k1, %k1
1554; KNL-NEXT:    kmovw %k1, %edi
1555; KNL-NEXT:    kshiftlw $7, %k0, %k1
1556; KNL-NEXT:    kshiftrw $15, %k1, %k1
1557; KNL-NEXT:    kmovw %k1, %r8d
1558; KNL-NEXT:    kshiftlw $6, %k0, %k1
1559; KNL-NEXT:    kshiftrw $15, %k1, %k1
1560; KNL-NEXT:    kmovw %k1, %r9d
1561; KNL-NEXT:    kshiftlw $5, %k0, %k1
1562; KNL-NEXT:    kshiftrw $15, %k1, %k1
1563; KNL-NEXT:    kmovw %k1, %r10d
1564; KNL-NEXT:    kshiftlw $4, %k0, %k1
1565; KNL-NEXT:    kshiftrw $15, %k1, %k1
1566; KNL-NEXT:    kmovw %k1, %r11d
1567; KNL-NEXT:    kshiftlw $3, %k0, %k1
1568; KNL-NEXT:    kshiftrw $15, %k1, %k1
1569; KNL-NEXT:    kmovw %k1, %ebx
1570; KNL-NEXT:    kshiftlw $2, %k0, %k1
1571; KNL-NEXT:    kshiftrw $15, %k1, %k1
1572; KNL-NEXT:    kmovw %k1, %ebp
1573; KNL-NEXT:    kshiftlw $1, %k0, %k1
1574; KNL-NEXT:    kshiftrw $15, %k1, %k1
1575; KNL-NEXT:    kmovw %k1, %r14d
1576; KNL-NEXT:    vptestmd %zmm5, %zmm5, %k2
1577; KNL-NEXT:    kshiftlw $0, %k0, %k0
1578; KNL-NEXT:    kshiftrw $15, %k0, %k0
1579; KNL-NEXT:    vmovd %r15d, %xmm4
1580; KNL-NEXT:    kmovw %k0, %r15d
1581; KNL-NEXT:    kshiftlw $14, %k2, %k0
1582; KNL-NEXT:    kshiftrw $15, %k0, %k0
1583; KNL-NEXT:    vpinsrb $1, %ecx, %xmm4, %xmm4
1584; KNL-NEXT:    kmovw %k0, %ecx
1585; KNL-NEXT:    kshiftlw $15, %k2, %k0
1586; KNL-NEXT:    kshiftrw $15, %k0, %k0
1587; KNL-NEXT:    vpinsrb $2, %r12d, %xmm4, %xmm4
1588; KNL-NEXT:    kmovw %k0, %eax
1589; KNL-NEXT:    kshiftlw $13, %k2, %k0
1590; KNL-NEXT:    kshiftrw $15, %k0, %k0
1591; KNL-NEXT:    vpinsrb $3, %edx, %xmm4, %xmm4
1592; KNL-NEXT:    kmovw %k0, %r12d
1593; KNL-NEXT:    kshiftlw $12, %k2, %k0
1594; KNL-NEXT:    kshiftrw $15, %k0, %k0
1595; KNL-NEXT:    vpinsrb $4, %r13d, %xmm4, %xmm4
1596; KNL-NEXT:    kmovw %k0, %edx
1597; KNL-NEXT:    kshiftlw $11, %k2, %k0
1598; KNL-NEXT:    kshiftrw $15, %k0, %k0
1599; KNL-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
1600; KNL-NEXT:    kmovw %k0, %r13d
1601; KNL-NEXT:    kshiftlw $10, %k2, %k0
1602; KNL-NEXT:    kshiftrw $15, %k0, %k0
1603; KNL-NEXT:    vpinsrb $6, %esi, %xmm4, %xmm4
1604; KNL-NEXT:    kmovw %k0, %esi
1605; KNL-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
1606; KNL-NEXT:    kshiftlw $9, %k2, %k0
1607; KNL-NEXT:    kshiftrw $15, %k0, %k0
1608; KNL-NEXT:    vpinsrb $7, %edi, %xmm4, %xmm4
1609; KNL-NEXT:    kmovw %k0, %esi
1610; KNL-NEXT:    kshiftlw $8, %k2, %k0
1611; KNL-NEXT:    kshiftrw $15, %k0, %k0
1612; KNL-NEXT:    vpinsrb $8, %r8d, %xmm4, %xmm4
1613; KNL-NEXT:    kmovw %k0, %edi
1614; KNL-NEXT:    kshiftlw $7, %k2, %k0
1615; KNL-NEXT:    kshiftrw $15, %k0, %k0
1616; KNL-NEXT:    vpinsrb $9, %r9d, %xmm4, %xmm4
1617; KNL-NEXT:    kmovw %k0, %r8d
1618; KNL-NEXT:    kshiftlw $6, %k2, %k0
1619; KNL-NEXT:    kshiftrw $15, %k0, %k0
1620; KNL-NEXT:    vpinsrb $10, %r10d, %xmm4, %xmm4
1621; KNL-NEXT:    kmovw %k0, %r9d
1622; KNL-NEXT:    kshiftlw $5, %k2, %k0
1623; KNL-NEXT:    kshiftrw $15, %k0, %k0
1624; KNL-NEXT:    vpinsrb $11, %r11d, %xmm4, %xmm4
1625; KNL-NEXT:    kmovw %k0, %r10d
1626; KNL-NEXT:    kshiftlw $4, %k2, %k0
1627; KNL-NEXT:    kshiftrw $15, %k0, %k0
1628; KNL-NEXT:    vpinsrb $12, %ebx, %xmm4, %xmm4
1629; KNL-NEXT:    kmovw %k0, %ebx
1630; KNL-NEXT:    kshiftlw $3, %k2, %k0
1631; KNL-NEXT:    kshiftrw $15, %k0, %k0
1632; KNL-NEXT:    vpinsrb $13, %ebp, %xmm4, %xmm4
1633; KNL-NEXT:    kmovw %k0, %ebp
1634; KNL-NEXT:    kshiftlw $2, %k2, %k0
1635; KNL-NEXT:    kshiftrw $15, %k0, %k0
1636; KNL-NEXT:    vpinsrb $14, %r14d, %xmm4, %xmm4
1637; KNL-NEXT:    kmovw %k0, %r11d
1638; KNL-NEXT:    kshiftlw $1, %k2, %k0
1639; KNL-NEXT:    kshiftrw $15, %k0, %k0
1640; KNL-NEXT:    vpinsrb $15, %r15d, %xmm4, %xmm4
1641; KNL-NEXT:    kmovw %k0, %r14d
1642; KNL-NEXT:    vptestmd %zmm6, %zmm6, %k1
1643; KNL-NEXT:    kshiftlw $0, %k2, %k0
1644; KNL-NEXT:    kshiftrw $15, %k0, %k0
1645; KNL-NEXT:    vmovd %eax, %xmm5
1646; KNL-NEXT:    kmovw %k0, %r15d
1647; KNL-NEXT:    kshiftlw $14, %k1, %k0
1648; KNL-NEXT:    kshiftrw $15, %k0, %k0
1649; KNL-NEXT:    vpinsrb $1, %ecx, %xmm5, %xmm5
1650; KNL-NEXT:    kmovw %k0, %ecx
1651; KNL-NEXT:    kshiftlw $15, %k1, %k0
1652; KNL-NEXT:    kshiftrw $15, %k0, %k0
1653; KNL-NEXT:    vpinsrb $2, %r12d, %xmm5, %xmm5
1654; KNL-NEXT:    kmovw %k0, %eax
1655; KNL-NEXT:    kshiftlw $13, %k1, %k0
1656; KNL-NEXT:    kshiftrw $15, %k0, %k0
1657; KNL-NEXT:    vpinsrb $3, %edx, %xmm5, %xmm5
1658; KNL-NEXT:    kmovw %k0, %r12d
1659; KNL-NEXT:    kshiftlw $12, %k1, %k0
1660; KNL-NEXT:    kshiftrw $15, %k0, %k0
1661; KNL-NEXT:    vpinsrb $4, %r13d, %xmm5, %xmm5
1662; KNL-NEXT:    kmovw %k0, %edx
1663; KNL-NEXT:    kshiftlw $11, %k1, %k0
1664; KNL-NEXT:    kshiftrw $15, %k0, %k0
1665; KNL-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
1666; KNL-NEXT:    kmovw %k0, %r13d
1667; KNL-NEXT:    kshiftlw $10, %k1, %k0
1668; KNL-NEXT:    kshiftrw $15, %k0, %k0
1669; KNL-NEXT:    vpinsrb $6, %esi, %xmm5, %xmm5
1670; KNL-NEXT:    kmovw %k0, %esi
1671; KNL-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
1672; KNL-NEXT:    kshiftlw $9, %k1, %k0
1673; KNL-NEXT:    kshiftrw $15, %k0, %k0
1674; KNL-NEXT:    vpinsrb $7, %edi, %xmm5, %xmm5
1675; KNL-NEXT:    kmovw %k0, %esi
1676; KNL-NEXT:    kshiftlw $8, %k1, %k0
1677; KNL-NEXT:    kshiftrw $15, %k0, %k0
1678; KNL-NEXT:    vpinsrb $8, %r8d, %xmm5, %xmm5
1679; KNL-NEXT:    kmovw %k0, %edi
1680; KNL-NEXT:    kshiftlw $7, %k1, %k0
1681; KNL-NEXT:    kshiftrw $15, %k0, %k0
1682; KNL-NEXT:    vpinsrb $9, %r9d, %xmm5, %xmm5
1683; KNL-NEXT:    kmovw %k0, %r8d
1684; KNL-NEXT:    kshiftlw $6, %k1, %k0
1685; KNL-NEXT:    kshiftrw $15, %k0, %k0
1686; KNL-NEXT:    vpinsrb $10, %r10d, %xmm5, %xmm5
1687; KNL-NEXT:    kmovw %k0, %r9d
1688; KNL-NEXT:    kshiftlw $5, %k1, %k0
1689; KNL-NEXT:    kshiftrw $15, %k0, %k0
1690; KNL-NEXT:    vpinsrb $11, %ebx, %xmm5, %xmm5
1691; KNL-NEXT:    kmovw %k0, %ebx
1692; KNL-NEXT:    kshiftlw $4, %k1, %k0
1693; KNL-NEXT:    kshiftrw $15, %k0, %k0
1694; KNL-NEXT:    vpinsrb $12, %ebp, %xmm5, %xmm5
1695; KNL-NEXT:    kmovw %k0, %ebp
1696; KNL-NEXT:    kshiftlw $3, %k1, %k0
1697; KNL-NEXT:    kshiftrw $15, %k0, %k0
1698; KNL-NEXT:    vpinsrb $13, %r11d, %xmm5, %xmm5
1699; KNL-NEXT:    kmovw %k0, %r10d
1700; KNL-NEXT:    kshiftlw $2, %k1, %k0
1701; KNL-NEXT:    kshiftrw $15, %k0, %k0
1702; KNL-NEXT:    vpinsrb $14, %r14d, %xmm5, %xmm5
1703; KNL-NEXT:    kmovw %k0, %r11d
1704; KNL-NEXT:    kshiftlw $1, %k1, %k0
1705; KNL-NEXT:    kshiftrw $15, %k0, %k0
1706; KNL-NEXT:    vpinsrb $15, %r15d, %xmm5, %xmm5
1707; KNL-NEXT:    kmovw %k0, %r14d
1708; KNL-NEXT:    vptestmd %zmm7, %zmm7, %k0
1709; KNL-NEXT:    kshiftlw $0, %k1, %k1
1710; KNL-NEXT:    kshiftrw $15, %k1, %k1
1711; KNL-NEXT:    vmovd %eax, %xmm6
1712; KNL-NEXT:    kmovw %k1, %r15d
1713; KNL-NEXT:    kshiftlw $14, %k0, %k1
1714; KNL-NEXT:    kshiftrw $15, %k1, %k1
1715; KNL-NEXT:    vpinsrb $1, %ecx, %xmm6, %xmm6
1716; KNL-NEXT:    kmovw %k1, %ecx
1717; KNL-NEXT:    kshiftlw $15, %k0, %k1
1718; KNL-NEXT:    kshiftrw $15, %k1, %k1
1719; KNL-NEXT:    vpinsrb $2, %r12d, %xmm6, %xmm6
1720; KNL-NEXT:    kmovw %k1, %r12d
1721; KNL-NEXT:    kshiftlw $13, %k0, %k1
1722; KNL-NEXT:    kshiftrw $15, %k1, %k1
1723; KNL-NEXT:    vpinsrb $3, %edx, %xmm6, %xmm6
1724; KNL-NEXT:    kmovw %k1, %edx
1725; KNL-NEXT:    kshiftlw $12, %k0, %k1
1726; KNL-NEXT:    kshiftrw $15, %k1, %k1
1727; KNL-NEXT:    vpinsrb $4, %r13d, %xmm6, %xmm6
1728; KNL-NEXT:    kmovw %k1, %r13d
1729; KNL-NEXT:    kshiftlw $11, %k0, %k1
1730; KNL-NEXT:    kshiftrw $15, %k1, %k1
1731; KNL-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
1732; KNL-NEXT:    kmovw %k1, %eax
1733; KNL-NEXT:    kshiftlw $10, %k0, %k1
1734; KNL-NEXT:    kshiftrw $15, %k1, %k1
1735; KNL-NEXT:    vpinsrb $6, %esi, %xmm6, %xmm6
1736; KNL-NEXT:    kmovw %k1, %esi
1737; KNL-NEXT:    kshiftlw $9, %k0, %k1
1738; KNL-NEXT:    kshiftrw $15, %k1, %k1
1739; KNL-NEXT:    vpinsrb $7, %edi, %xmm6, %xmm6
1740; KNL-NEXT:    kmovw %k1, %edi
1741; KNL-NEXT:    kshiftlw $8, %k0, %k1
1742; KNL-NEXT:    kshiftrw $15, %k1, %k1
1743; KNL-NEXT:    vpinsrb $8, %r8d, %xmm6, %xmm6
1744; KNL-NEXT:    kmovw %k1, %r8d
1745; KNL-NEXT:    kshiftlw $7, %k0, %k1
1746; KNL-NEXT:    kshiftrw $15, %k1, %k1
1747; KNL-NEXT:    vpinsrb $9, %r9d, %xmm6, %xmm6
1748; KNL-NEXT:    kmovw %k1, %r9d
1749; KNL-NEXT:    kshiftlw $6, %k0, %k1
1750; KNL-NEXT:    kshiftrw $15, %k1, %k1
1751; KNL-NEXT:    vpinsrb $10, %ebx, %xmm6, %xmm6
1752; KNL-NEXT:    kmovw %k1, %ebx
1753; KNL-NEXT:    kshiftlw $5, %k0, %k1
1754; KNL-NEXT:    kshiftrw $15, %k1, %k1
1755; KNL-NEXT:    vpinsrb $11, %ebp, %xmm6, %xmm6
1756; KNL-NEXT:    kmovw %k1, %ebp
1757; KNL-NEXT:    kshiftlw $4, %k0, %k1
1758; KNL-NEXT:    kshiftrw $15, %k1, %k1
1759; KNL-NEXT:    vpinsrb $12, %r10d, %xmm6, %xmm6
1760; KNL-NEXT:    kmovw %k1, %r10d
1761; KNL-NEXT:    kshiftlw $3, %k0, %k1
1762; KNL-NEXT:    kshiftrw $15, %k1, %k1
1763; KNL-NEXT:    vpinsrb $13, %r11d, %xmm6, %xmm6
1764; KNL-NEXT:    kmovw %k1, %r11d
1765; KNL-NEXT:    kshiftlw $2, %k0, %k1
1766; KNL-NEXT:    kshiftrw $15, %k1, %k1
1767; KNL-NEXT:    vpinsrb $14, %r14d, %xmm6, %xmm6
1768; KNL-NEXT:    kmovw %k1, %r14d
1769; KNL-NEXT:    kshiftlw $1, %k0, %k1
1770; KNL-NEXT:    kshiftrw $15, %k1, %k1
1771; KNL-NEXT:    vpinsrb $15, %r15d, %xmm6, %xmm6
1772; KNL-NEXT:    kmovw %k1, %r15d
1773; KNL-NEXT:    kshiftlw $0, %k0, %k0
1774; KNL-NEXT:    kshiftrw $15, %k0, %k0
1775; KNL-NEXT:    vmovd %r12d, %xmm7
1776; KNL-NEXT:    kmovw %k0, %r12d
1777; KNL-NEXT:    vpinsrb $1, %ecx, %xmm7, %xmm7
1778; KNL-NEXT:    vpinsrb $2, %edx, %xmm7, %xmm7
1779; KNL-NEXT:    vpinsrb $3, %r13d, %xmm7, %xmm7
1780; KNL-NEXT:    vpinsrb $4, %eax, %xmm7, %xmm7
1781; KNL-NEXT:    vpinsrb $5, %esi, %xmm7, %xmm7
1782; KNL-NEXT:    vpinsrb $6, %edi, %xmm7, %xmm7
1783; KNL-NEXT:    vpinsrb $7, %r8d, %xmm7, %xmm7
1784; KNL-NEXT:    vpinsrb $8, %r9d, %xmm7, %xmm7
1785; KNL-NEXT:    vpinsrb $9, %ebx, %xmm7, %xmm7
1786; KNL-NEXT:    vpinsrb $10, %ebp, %xmm7, %xmm7
1787; KNL-NEXT:    vpinsrb $11, %r10d, %xmm7, %xmm7
1788; KNL-NEXT:    vpinsrb $12, %r11d, %xmm7, %xmm7
1789; KNL-NEXT:    vpinsrb $13, %r14d, %xmm7, %xmm7
1790; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
1791; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
1792; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1793; KNL-NEXT:    vpand %ymm0, %ymm4, %ymm0
1794; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
1795; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
1796; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1797; KNL-NEXT:    vpand %ymm1, %ymm4, %ymm1
1798; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
1799; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
1800; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1801; KNL-NEXT:    vpand %ymm2, %ymm4, %ymm2
1802; KNL-NEXT:    vpinsrb $14, %r15d, %xmm7, %xmm4
1803; KNL-NEXT:    vpinsrb $15, %r12d, %xmm4, %xmm4
1804; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
1805; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
1806; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1807; KNL-NEXT:    vpand %ymm3, %ymm4, %ymm3
1808; KNL-NEXT:    popq %rbx
1809; KNL-NEXT:    popq %r12
1810; KNL-NEXT:    popq %r13
1811; KNL-NEXT:    popq %r14
1812; KNL-NEXT:    popq %r15
1813; KNL-NEXT:    popq %rbp
1814; KNL-NEXT:    retq
1815;
1816; SKX-LABEL: test21:
1817; SKX:       ## BB#0:
1818; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2
1819; SKX-NEXT:    vpmovb2m %zmm2, %k1
1820; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1821; SKX-NEXT:    kshiftrq $32, %k1, %k1
1822; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
1823; SKX-NEXT:    retq
1824  %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
1825  ret <64 x i16> %ret
1826}
1827
1828define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
1829; ALL-LABEL: shuffle_zext_16x8_to_16x16:
1830; ALL:       ## BB#0:
1831; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1832; ALL-NEXT:    retq
1833  %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
1834  %2 = bitcast <32 x i8> %1 to <16 x i16>
1835  ret <16 x i16> %2
1836}
1837
1838define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
1839; ALL-LABEL: zext_32x8_to_16x16:
1840; ALL:       ## BB#0:
1841; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1842; ALL-NEXT:    retq
1843  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
1844  %2 = bitcast <32 x i8> %1 to <16 x i16>
1845  ret <16 x i16> %2
1846}
1847
1848define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
1849; ALL-LABEL: zext_32x8_to_8x32:
1850; ALL:       ## BB#0:
1851; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1852; ALL-NEXT:    retq
1853  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
1854  %2 = bitcast <32 x i8> %1 to <8 x i32>
1855  ret <8 x i32> %2
1856}
1857
1858define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
1859; ALL-LABEL: zext_32x8_to_4x64:
1860; ALL:       ## BB#0:
1861; ALL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1862; ALL-NEXT:    retq
1863  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
1864  %2 = bitcast <32 x i8> %1 to <4 x i64>
1865  ret <4 x i64> %2
1866}
1867
1868define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
1869; ALL-LABEL: zext_16x16_to_8x32:
1870; ALL:       ## BB#0:
1871; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1872; ALL-NEXT:    retq
1873  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
1874  %2 = bitcast <16 x i16> %1 to <8 x i32>
1875  ret <8 x i32> %2
1876}
1877
1878define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
1879; ALL-LABEL: zext_16x16_to_4x64:
1880; ALL:       ## BB#0:
1881; ALL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1882; ALL-NEXT:    retq
1883  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
1884  %2 = bitcast <16 x i16> %1 to <4 x i64>
1885  ret <4 x i64> %2
1886}
1887
1888define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
1889; ALL-LABEL: zext_8x32_to_4x64:
1890; ALL:       ## BB#0:
1891; ALL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1892; ALL-NEXT:    retq
1893  %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
1894  %2 = bitcast <8 x i32> %1 to <4 x i64>
1895  ret <4 x i64> %2
1896}
1897
1898define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
1899; KNL-LABEL: zext_64xi1_to_64xi8:
1900; KNL:       ## BB#0:
1901; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
1902; KNL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1903; KNL-NEXT:    vpand %ymm2, %ymm0, %ymm0
1904; KNL-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
1905; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
1906; KNL-NEXT:    retq
1907;
1908; SKX-LABEL: zext_64xi1_to_64xi8:
1909; SKX:       ## BB#0:
1910; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
1911; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
1912; SKX-NEXT:    retq
1913  %mask = icmp eq <64 x i8> %x, %y
1914  %1 = zext <64 x i1> %mask to <64 x i8>
1915  ret <64 x i8> %1
1916}
1917
1918define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
1919; KNL-LABEL: zext_32xi1_to_32xi16:
1920; KNL:       ## BB#0:
1921; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
1922; KNL-NEXT:    vpsrlw $15, %ymm0, %ymm0
1923; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
1924; KNL-NEXT:    vpsrlw $15, %ymm1, %ymm1
1925; KNL-NEXT:    retq
1926;
1927; SKX-LABEL: zext_32xi1_to_32xi16:
1928; SKX:       ## BB#0:
1929; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
1930; SKX-NEXT:    vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
1931; SKX-NEXT:    retq
1932  %mask = icmp eq <32 x i16> %x, %y
1933  %1 = zext <32 x i1> %mask to <32 x i16>
1934  ret <32 x i16> %1
1935}
1936
1937define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
1938; KNL-LABEL: zext_16xi1_to_16xi16:
1939; KNL:       ## BB#0:
1940; KNL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1941; KNL-NEXT:    vpsrlw $15, %ymm0, %ymm0
1942; KNL-NEXT:    retq
1943;
1944; SKX-LABEL: zext_16xi1_to_16xi16:
1945; SKX:       ## BB#0:
1946; SKX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
1947; SKX-NEXT:    vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
1948; SKX-NEXT:    retq
1949  %mask = icmp eq <16 x i16> %x, %y
1950  %1 = zext <16 x i1> %mask to <16 x i16>
1951  ret <16 x i16> %1
1952}
1953
1954
1955define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
1956; KNL-LABEL: zext_32xi1_to_32xi8:
1957; KNL:       ## BB#0:
1958; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
1959; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
1960; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1961; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
1962; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
1963; KNL-NEXT:    vpmovdb %zmm1, %xmm1
1964; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1965; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1966; KNL-NEXT:    retq
1967;
1968; SKX-LABEL: zext_32xi1_to_32xi8:
1969; SKX:       ## BB#0:
1970; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
1971; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
1972; SKX-NEXT:    retq
1973  %mask = icmp eq <32 x i16> %x, %y
1974  %1 = zext <32 x i1> %mask to <32 x i8>
1975  ret <32 x i8> %1
1976}
1977
1978define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
1979; KNL-LABEL: zext_4xi1_to_4x32:
1980; KNL:       ## BB#0:
1981; KNL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1982; KNL-NEXT:    vpand %xmm2, %xmm1, %xmm1
1983; KNL-NEXT:    vpand %xmm2, %xmm0, %xmm0
1984; KNL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1985; KNL-NEXT:    vpsrld $31, %xmm0, %xmm0
1986; KNL-NEXT:    retq
1987;
1988; SKX-LABEL: zext_4xi1_to_4x32:
1989; SKX:       ## BB#0:
1990; SKX-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1991; SKX-NEXT:    vpandq %xmm2, %xmm1, %xmm1
1992; SKX-NEXT:    vpandq %xmm2, %xmm0, %xmm0
1993; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
1994; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1995; SKX-NEXT:    retq
1996  %mask = icmp eq <4 x i8> %x, %y
1997  %1 = zext <4 x i1> %mask to <4 x i32>
1998  ret <4 x i32> %1
1999}
2000
2001define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
2002; KNL-LABEL: zext_2xi1_to_2xi64:
2003; KNL:       ## BB#0:
2004; KNL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2005; KNL-NEXT:    vpand %xmm2, %xmm1, %xmm1
2006; KNL-NEXT:    vpand %xmm2, %xmm0, %xmm0
2007; KNL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
2008; KNL-NEXT:    vpsrlq $63, %xmm0, %xmm0
2009; KNL-NEXT:    retq
2010;
2011; SKX-LABEL: zext_2xi1_to_2xi64:
2012; SKX:       ## BB#0:
2013; SKX-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2014; SKX-NEXT:    vpandq %xmm2, %xmm1, %xmm1
2015; SKX-NEXT:    vpandq %xmm2, %xmm0, %xmm0
2016; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k1
2017; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
2018; SKX-NEXT:    retq
2019  %mask = icmp eq <2 x i8> %x, %y
2020  %1 = zext <2 x i1> %mask to <2 x i64>
2021  ret <2 x i64> %1
2022}
2023