• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4
5define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
6; KNL-LABEL: zext_8x8mem_to_8x16:
7; KNL:       # %bb.0:
8; KNL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
9; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
10; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
11; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
12; KNL-NEXT:    retq
13;
14; SKX-LABEL: zext_8x8mem_to_8x16:
15; SKX:       # %bb.0:
16; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
17; SKX-NEXT:    vpmovw2m %xmm0, %k1
18; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
19; SKX-NEXT:    retq
20  %a   = load <8 x i8>,<8 x i8> *%i,align 1
21  %x   = zext <8 x i8> %a to <8 x i16>
22  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
23  ret <8 x i16> %ret
24}
25
26define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
27; KNL-LABEL: sext_8x8mem_to_8x16:
28; KNL:       # %bb.0:
29; KNL-NEXT:    vpmovsxbw (%rdi), %xmm1
30; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
31; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
32; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
33; KNL-NEXT:    retq
34;
35; SKX-LABEL: sext_8x8mem_to_8x16:
36; SKX:       # %bb.0:
37; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
38; SKX-NEXT:    vpmovw2m %xmm0, %k1
39; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
40; SKX-NEXT:    retq
41  %a   = load <8 x i8>,<8 x i8> *%i,align 1
42  %x   = sext <8 x i8> %a to <8 x i16>
43  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
44  ret <8 x i16> %ret
45}
46
47
48define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
49; KNL-LABEL: zext_16x8mem_to_16x16:
50; KNL:       # %bb.0:
51; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
52; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
53; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
54; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
55; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
56; KNL-NEXT:    retq
57;
58; SKX-LABEL: zext_16x8mem_to_16x16:
59; SKX:       # %bb.0:
60; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
61; SKX-NEXT:    vpmovb2m %xmm0, %k1
62; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
63; SKX-NEXT:    retq
64  %a   = load <16 x i8>,<16 x i8> *%i,align 1
65  %x   = zext <16 x i8> %a to <16 x i16>
66  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
67  ret <16 x i16> %ret
68}
69
70define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
71; KNL-LABEL: sext_16x8mem_to_16x16:
72; KNL:       # %bb.0:
73; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
74; KNL-NEXT:    vpmovsxbw (%rdi), %ymm1
75; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
76; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
77; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
78; KNL-NEXT:    retq
79;
80; SKX-LABEL: sext_16x8mem_to_16x16:
81; SKX:       # %bb.0:
82; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
83; SKX-NEXT:    vpmovb2m %xmm0, %k1
84; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
85; SKX-NEXT:    retq
86  %a   = load <16 x i8>,<16 x i8> *%i,align 1
87  %x   = sext <16 x i8> %a to <16 x i16>
88  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
89  ret <16 x i16> %ret
90}
91
92define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
93; ALL-LABEL: zext_16x8_to_16x16:
94; ALL:       # %bb.0:
95; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
96; ALL-NEXT:    retq
97  %x   = zext <16 x i8> %a to <16 x i16>
98  ret <16 x i16> %x
99}
100
101define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
102; KNL-LABEL: zext_16x8_to_16x16_mask:
103; KNL:       # %bb.0:
104; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
105; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
106; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
107; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
108; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
109; KNL-NEXT:    retq
110;
111; SKX-LABEL: zext_16x8_to_16x16_mask:
112; SKX:       # %bb.0:
113; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
114; SKX-NEXT:    vpmovb2m %xmm1, %k1
115; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116; SKX-NEXT:    retq
117  %x   = zext <16 x i8> %a to <16 x i16>
118  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
119  ret <16 x i16> %ret
120}
121
122define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
123; ALL-LABEL: sext_16x8_to_16x16:
124; ALL:       # %bb.0:
125; ALL-NEXT:    vpmovsxbw %xmm0, %ymm0
126; ALL-NEXT:    retq
127  %x   = sext <16 x i8> %a to <16 x i16>
128  ret <16 x i16> %x
129}
130
131define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
132; KNL-LABEL: sext_16x8_to_16x16_mask:
133; KNL:       # %bb.0:
134; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
135; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
136; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
137; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
138; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
139; KNL-NEXT:    retq
140;
141; SKX-LABEL: sext_16x8_to_16x16_mask:
142; SKX:       # %bb.0:
143; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
144; SKX-NEXT:    vpmovb2m %xmm1, %k1
145; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
146; SKX-NEXT:    retq
147  %x   = sext <16 x i8> %a to <16 x i16>
148  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
149  ret <16 x i16> %ret
150}
151
152define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
153; KNL-LABEL: zext_32x8mem_to_32x16:
154; KNL:       # %bb.0:
155; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
156; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
157; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
158; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
159; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
160; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
161; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
162; KNL-NEXT:    vpand %ymm3, %ymm0, %ymm0
163; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
164; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
165; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
166; KNL-NEXT:    retq
167;
168; SKX-LABEL: zext_32x8mem_to_32x16:
169; SKX:       # %bb.0:
170; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
171; SKX-NEXT:    vpmovb2m %ymm0, %k1
172; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
173; SKX-NEXT:    retq
174  %a   = load <32 x i8>,<32 x i8> *%i,align 1
175  %x   = zext <32 x i8> %a to <32 x i16>
176  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
177  ret <32 x i16> %ret
178}
179
180define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
181; KNL-LABEL: sext_32x8mem_to_32x16:
182; KNL:       # %bb.0:
183; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
184; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
185; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
186; KNL-NEXT:    vpmovsxbw 16(%rdi), %ymm2
187; KNL-NEXT:    vpmovsxbw (%rdi), %ymm3
188; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
189; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
190; KNL-NEXT:    vpand %ymm3, %ymm0, %ymm0
191; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
192; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
193; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
194; KNL-NEXT:    retq
195;
196; SKX-LABEL: sext_32x8mem_to_32x16:
197; SKX:       # %bb.0:
198; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
199; SKX-NEXT:    vpmovb2m %ymm0, %k1
200; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
201; SKX-NEXT:    retq
202  %a   = load <32 x i8>,<32 x i8> *%i,align 1
203  %x   = sext <32 x i8> %a to <32 x i16>
204  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
205  ret <32 x i16> %ret
206}
207
208define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
209; KNL-LABEL: zext_32x8_to_32x16:
210; KNL:       # %bb.0:
211; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
212; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
213; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
214; KNL-NEXT:    vmovdqa %ymm2, %ymm0
215; KNL-NEXT:    retq
216;
217; SKX-LABEL: zext_32x8_to_32x16:
218; SKX:       # %bb.0:
219; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
220; SKX-NEXT:    retq
221  %x   = zext <32 x i8> %a to <32 x i16>
222  ret <32 x i16> %x
223}
224
225define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
226; KNL-LABEL: zext_32x8_to_32x16_mask:
227; KNL:       # %bb.0:
228; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
229; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
230; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
231; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
232; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
233; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
235; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
236; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
237; KNL-NEXT:    vpsllw $15, %ymm2, %ymm1
238; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
239; KNL-NEXT:    vpand %ymm3, %ymm1, %ymm1
240; KNL-NEXT:    retq
241;
242; SKX-LABEL: zext_32x8_to_32x16_mask:
243; SKX:       # %bb.0:
244; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
245; SKX-NEXT:    vpmovb2m %ymm1, %k1
246; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
247; SKX-NEXT:    retq
248  %x   = zext <32 x i8> %a to <32 x i16>
249  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
250  ret <32 x i16> %ret
251}
252
253define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
254; KNL-LABEL: sext_32x8_to_32x16:
255; KNL:       # %bb.0:
256; KNL-NEXT:    vpmovsxbw %xmm0, %ymm2
257; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
258; KNL-NEXT:    vpmovsxbw %xmm0, %ymm1
259; KNL-NEXT:    vmovdqa %ymm2, %ymm0
260; KNL-NEXT:    retq
261;
262; SKX-LABEL: sext_32x8_to_32x16:
263; SKX:       # %bb.0:
264; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
265; SKX-NEXT:    retq
266  %x   = sext <32 x i8> %a to <32 x i16>
267  ret <32 x i16> %x
268}
269
270define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
271; KNL-LABEL: sext_32x8_to_32x16_mask:
272; KNL:       # %bb.0:
273; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
274; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
275; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
276; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
277; KNL-NEXT:    vpmovsxbw %xmm3, %ymm3
278; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
279; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
280; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
281; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
282; KNL-NEXT:    vpsllw $15, %ymm2, %ymm1
283; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
284; KNL-NEXT:    vpand %ymm3, %ymm1, %ymm1
285; KNL-NEXT:    retq
286;
287; SKX-LABEL: sext_32x8_to_32x16_mask:
288; SKX:       # %bb.0:
289; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
290; SKX-NEXT:    vpmovb2m %ymm1, %k1
291; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
292; SKX-NEXT:    retq
293  %x   = sext <32 x i8> %a to <32 x i16>
294  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
295  ret <32 x i16> %ret
296}
297
298define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
299; KNL-LABEL: zext_4x8mem_to_4x32:
300; KNL:       # %bb.0:
301; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
302; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
303; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
304; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
305; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
306; KNL-NEXT:    vzeroupper
307; KNL-NEXT:    retq
308;
309; SKX-LABEL: zext_4x8mem_to_4x32:
310; SKX:       # %bb.0:
311; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
312; SKX-NEXT:    vpmovd2m %xmm0, %k1
313; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
314; SKX-NEXT:    retq
315  %a   = load <4 x i8>,<4 x i8> *%i,align 1
316  %x   = zext <4 x i8> %a to <4 x i32>
317  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
318  ret <4 x i32> %ret
319}
320
321define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
322; KNL-LABEL: sext_4x8mem_to_4x32:
323; KNL:       # %bb.0:
324; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
325; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
326; KNL-NEXT:    vpmovsxbd (%rdi), %xmm0
327; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
328; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
329; KNL-NEXT:    vzeroupper
330; KNL-NEXT:    retq
331;
332; SKX-LABEL: sext_4x8mem_to_4x32:
333; SKX:       # %bb.0:
334; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
335; SKX-NEXT:    vpmovd2m %xmm0, %k1
336; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
337; SKX-NEXT:    retq
338  %a   = load <4 x i8>,<4 x i8> *%i,align 1
339  %x   = sext <4 x i8> %a to <4 x i32>
340  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
341  ret <4 x i32> %ret
342}
343
344define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
345; KNL-LABEL: zext_8x8mem_to_8x32:
346; KNL:       # %bb.0:
347; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
348; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
349; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
350; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
351; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
352; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
353; KNL-NEXT:    retq
354;
355; SKX-LABEL: zext_8x8mem_to_8x32:
356; SKX:       # %bb.0:
357; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
358; SKX-NEXT:    vpmovw2m %xmm0, %k1
359; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
360; SKX-NEXT:    retq
361  %a   = load <8 x i8>,<8 x i8> *%i,align 1
362  %x   = zext <8 x i8> %a to <8 x i32>
363  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
364  ret <8 x i32> %ret
365}
366
367define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
368; KNL-LABEL: sext_8x8mem_to_8x32:
369; KNL:       # %bb.0:
370; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
371; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
372; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
373; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
374; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
375; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
376; KNL-NEXT:    retq
377;
378; SKX-LABEL: sext_8x8mem_to_8x32:
379; SKX:       # %bb.0:
380; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
381; SKX-NEXT:    vpmovw2m %xmm0, %k1
382; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
383; SKX-NEXT:    retq
384  %a   = load <8 x i8>,<8 x i8> *%i,align 1
385  %x   = sext <8 x i8> %a to <8 x i32>
386  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
387  ret <8 x i32> %ret
388}
389
390define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
391; KNL-LABEL: zext_16x8mem_to_16x32:
392; KNL:       # %bb.0:
393; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
394; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
395; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
396; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
397; KNL-NEXT:    retq
398;
399; SKX-LABEL: zext_16x8mem_to_16x32:
400; SKX:       # %bb.0:
401; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
402; SKX-NEXT:    vpmovb2m %xmm0, %k1
403; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
404; SKX-NEXT:    retq
405  %a   = load <16 x i8>,<16 x i8> *%i,align 1
406  %x   = zext <16 x i8> %a to <16 x i32>
407  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
408  ret <16 x i32> %ret
409}
410
411define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
412; KNL-LABEL: sext_16x8mem_to_16x32:
413; KNL:       # %bb.0:
414; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
415; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
416; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
417; KNL-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
418; KNL-NEXT:    retq
419;
420; SKX-LABEL: sext_16x8mem_to_16x32:
421; SKX:       # %bb.0:
422; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
423; SKX-NEXT:    vpmovb2m %xmm0, %k1
424; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
425; SKX-NEXT:    retq
426  %a   = load <16 x i8>,<16 x i8> *%i,align 1
427  %x   = sext <16 x i8> %a to <16 x i32>
428  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
429  ret <16 x i32> %ret
430}
431
432define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
433; KNL-LABEL: zext_16x8_to_16x32_mask:
434; KNL:       # %bb.0:
435; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
436; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
437; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
438; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
439; KNL-NEXT:    retq
440;
441; SKX-LABEL: zext_16x8_to_16x32_mask:
442; SKX:       # %bb.0:
443; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
444; SKX-NEXT:    vpmovb2m %xmm1, %k1
445; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
446; SKX-NEXT:    retq
447  %x   = zext <16 x i8> %a to <16 x i32>
448  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
449  ret <16 x i32> %ret
450}
451
452define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
453; KNL-LABEL: sext_16x8_to_16x32_mask:
454; KNL:       # %bb.0:
455; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
456; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
457; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
458; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
459; KNL-NEXT:    retq
460;
461; SKX-LABEL: sext_16x8_to_16x32_mask:
462; SKX:       # %bb.0:
463; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
464; SKX-NEXT:    vpmovb2m %xmm1, %k1
465; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
466; SKX-NEXT:    retq
467  %x   = sext <16 x i8> %a to <16 x i32>
468  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
469  ret <16 x i32> %ret
470}
471
472define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
473; ALL-LABEL: zext_16x8_to_16x32:
474; ALL:       # %bb.0:
475; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
476; ALL-NEXT:    retq
477  %x = zext <16 x i8> %i to <16 x i32>
478  ret <16 x i32> %x
479}
480
481define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
482; ALL-LABEL: sext_16x8_to_16x32:
483; ALL:       # %bb.0:
484; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
485; ALL-NEXT:    retq
486  %x = sext <16 x i8> %i to <16 x i32>
487  ret <16 x i32> %x
488}
489
490define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
491; KNL-LABEL: zext_2x8mem_to_2x64:
492; KNL:       # %bb.0:
493; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
494; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
495; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
496; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
497; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
498; KNL-NEXT:    vzeroupper
499; KNL-NEXT:    retq
500;
501; SKX-LABEL: zext_2x8mem_to_2x64:
502; SKX:       # %bb.0:
503; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
504; SKX-NEXT:    vpmovq2m %xmm0, %k1
505; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
506; SKX-NEXT:    retq
507  %a   = load <2 x i8>,<2 x i8> *%i,align 1
508  %x   = zext <2 x i8> %a to <2 x i64>
509  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
510  ret <2 x i64> %ret
511}
512define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
513; KNL-LABEL: sext_2x8mem_to_2x64mask:
514; KNL:       # %bb.0:
515; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
516; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
517; KNL-NEXT:    vpmovsxbq (%rdi), %xmm0
518; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
519; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
520; KNL-NEXT:    vzeroupper
521; KNL-NEXT:    retq
522;
523; SKX-LABEL: sext_2x8mem_to_2x64mask:
524; SKX:       # %bb.0:
525; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
526; SKX-NEXT:    vpmovq2m %xmm0, %k1
527; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
528; SKX-NEXT:    retq
529  %a   = load <2 x i8>,<2 x i8> *%i,align 1
530  %x   = sext <2 x i8> %a to <2 x i64>
531  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
532  ret <2 x i64> %ret
533}
534define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
535; ALL-LABEL: sext_2x8mem_to_2x64:
536; ALL:       # %bb.0:
537; ALL-NEXT:    vpmovsxbq (%rdi), %xmm0
538; ALL-NEXT:    retq
539  %a   = load <2 x i8>,<2 x i8> *%i,align 1
540  %x   = sext <2 x i8> %a to <2 x i64>
541  ret <2 x i64> %x
542}
543
544define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
545; KNL-LABEL: zext_4x8mem_to_4x64:
546; KNL:       # %bb.0:
547; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
548; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
549; KNL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
550; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
551; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
552; KNL-NEXT:    retq
553;
554; SKX-LABEL: zext_4x8mem_to_4x64:
555; SKX:       # %bb.0:
556; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
557; SKX-NEXT:    vpmovd2m %xmm0, %k1
558; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
559; SKX-NEXT:    retq
560  %a   = load <4 x i8>,<4 x i8> *%i,align 1
561  %x   = zext <4 x i8> %a to <4 x i64>
562  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
563  ret <4 x i64> %ret
564}
565
566define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
567; KNL-LABEL: sext_4x8mem_to_4x64mask:
568; KNL:       # %bb.0:
569; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
570; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
571; KNL-NEXT:    vpmovsxbq (%rdi), %ymm0
572; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
573; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
574; KNL-NEXT:    retq
575;
576; SKX-LABEL: sext_4x8mem_to_4x64mask:
577; SKX:       # %bb.0:
578; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
579; SKX-NEXT:    vpmovd2m %xmm0, %k1
580; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
581; SKX-NEXT:    retq
582  %a   = load <4 x i8>,<4 x i8> *%i,align 1
583  %x   = sext <4 x i8> %a to <4 x i64>
584  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
585  ret <4 x i64> %ret
586}
587
588define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
589; ALL-LABEL: sext_4x8mem_to_4x64:
590; ALL:       # %bb.0:
591; ALL-NEXT:    vpmovsxbq (%rdi), %ymm0
592; ALL-NEXT:    retq
593  %a   = load <4 x i8>,<4 x i8> *%i,align 1
594  %x   = sext <4 x i8> %a to <4 x i64>
595  ret <4 x i64> %x
596}
597
598define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
599; KNL-LABEL: zext_8x8mem_to_8x64:
600; KNL:       # %bb.0:
601; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
602; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
603; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
604; KNL-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
605; KNL-NEXT:    retq
606;
607; SKX-LABEL: zext_8x8mem_to_8x64:
608; SKX:       # %bb.0:
609; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
610; SKX-NEXT:    vpmovw2m %xmm0, %k1
611; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
612; SKX-NEXT:    retq
613  %a   = load <8 x i8>,<8 x i8> *%i,align 1
614  %x   = zext <8 x i8> %a to <8 x i64>
615  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
616  ret <8 x i64> %ret
617}
618
619define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
620; KNL-LABEL: sext_8x8mem_to_8x64mask:
621; KNL:       # %bb.0:
622; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
623; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
624; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
625; KNL-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
626; KNL-NEXT:    retq
627;
628; SKX-LABEL: sext_8x8mem_to_8x64mask:
629; SKX:       # %bb.0:
630; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
631; SKX-NEXT:    vpmovw2m %xmm0, %k1
632; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
633; SKX-NEXT:    retq
634  %a   = load <8 x i8>,<8 x i8> *%i,align 1
635  %x   = sext <8 x i8> %a to <8 x i64>
636  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
637  ret <8 x i64> %ret
638}
639
640define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
641; ALL-LABEL: sext_8x8mem_to_8x64:
642; ALL:       # %bb.0:
643; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
644; ALL-NEXT:    retq
645  %a   = load <8 x i8>,<8 x i8> *%i,align 1
646  %x   = sext <8 x i8> %a to <8 x i64>
647  ret <8 x i64> %x
648}
649
650define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
651; KNL-LABEL: zext_4x16mem_to_4x32:
652; KNL:       # %bb.0:
653; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
654; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
655; KNL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
656; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
657; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
658; KNL-NEXT:    vzeroupper
659; KNL-NEXT:    retq
660;
661; SKX-LABEL: zext_4x16mem_to_4x32:
662; SKX:       # %bb.0:
663; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
664; SKX-NEXT:    vpmovd2m %xmm0, %k1
665; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
666; SKX-NEXT:    retq
667  %a   = load <4 x i16>,<4 x i16> *%i,align 1
668  %x   = zext <4 x i16> %a to <4 x i32>
669  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
670  ret <4 x i32> %ret
671}
672
673define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
674; KNL-LABEL: sext_4x16mem_to_4x32mask:
675; KNL:       # %bb.0:
676; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
677; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
678; KNL-NEXT:    vpmovsxwd (%rdi), %xmm0
679; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
680; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
681; KNL-NEXT:    vzeroupper
682; KNL-NEXT:    retq
683;
684; SKX-LABEL: sext_4x16mem_to_4x32mask:
685; SKX:       # %bb.0:
686; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
687; SKX-NEXT:    vpmovd2m %xmm0, %k1
688; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
689; SKX-NEXT:    retq
690  %a   = load <4 x i16>,<4 x i16> *%i,align 1
691  %x   = sext <4 x i16> %a to <4 x i32>
692  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
693  ret <4 x i32> %ret
694}
695
696define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
697; ALL-LABEL: sext_4x16mem_to_4x32:
698; ALL:       # %bb.0:
699; ALL-NEXT:    vpmovsxwd (%rdi), %xmm0
700; ALL-NEXT:    retq
701  %a   = load <4 x i16>,<4 x i16> *%i,align 1
702  %x   = sext <4 x i16> %a to <4 x i32>
703  ret <4 x i32> %x
704}
705
706
707define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
708; KNL-LABEL: zext_8x16mem_to_8x32:
709; KNL:       # %bb.0:
710; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
711; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
712; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
713; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
714; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
715; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
716; KNL-NEXT:    retq
717;
718; SKX-LABEL: zext_8x16mem_to_8x32:
719; SKX:       # %bb.0:
720; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
721; SKX-NEXT:    vpmovw2m %xmm0, %k1
722; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
723; SKX-NEXT:    retq
724  %a   = load <8 x i16>,<8 x i16> *%i,align 1
725  %x   = zext <8 x i16> %a to <8 x i32>
726  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
727  ret <8 x i32> %ret
728}
729
730define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
731; KNL-LABEL: sext_8x16mem_to_8x32mask:
732; KNL:       # %bb.0:
733; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
734; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
735; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
736; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
737; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
738; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
739; KNL-NEXT:    retq
740;
741; SKX-LABEL: sext_8x16mem_to_8x32mask:
742; SKX:       # %bb.0:
743; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
744; SKX-NEXT:    vpmovw2m %xmm0, %k1
745; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
746; SKX-NEXT:    retq
747  %a   = load <8 x i16>,<8 x i16> *%i,align 1
748  %x   = sext <8 x i16> %a to <8 x i32>
749  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
750  ret <8 x i32> %ret
751}
752
753define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
754; ALL-LABEL: sext_8x16mem_to_8x32:
755; ALL:       # %bb.0:
756; ALL-NEXT:    vpmovsxwd (%rdi), %ymm0
757; ALL-NEXT:    retq
758  %a   = load <8 x i16>,<8 x i16> *%i,align 1
759  %x   = sext <8 x i16> %a to <8 x i32>
760  ret <8 x i32> %x
761}
762
763define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
764; KNL-LABEL: zext_8x16_to_8x32mask:
765; KNL:       # %bb.0:
766; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
767; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
768; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
769; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
770; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
771; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
772; KNL-NEXT:    retq
773;
774; SKX-LABEL: zext_8x16_to_8x32mask:
775; SKX:       # %bb.0:
776; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
777; SKX-NEXT:    vpmovw2m %xmm1, %k1
778; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
779; SKX-NEXT:    retq
780  %x   = zext <8 x i16> %a to <8 x i32>
781  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
782  ret <8 x i32> %ret
783}
784
785define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
786; ALL-LABEL: zext_8x16_to_8x32:
787; ALL:       # %bb.0:
788; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
789; ALL-NEXT:    retq
790  %x   = zext <8 x i16> %a to <8 x i32>
791  ret <8 x i32> %x
792}
793
794define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
795; KNL-LABEL: zext_16x16mem_to_16x32:
796; KNL:       # %bb.0:
797; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
798; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
799; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
800; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
801; KNL-NEXT:    retq
802;
803; SKX-LABEL: zext_16x16mem_to_16x32:
804; SKX:       # %bb.0:
805; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
806; SKX-NEXT:    vpmovb2m %xmm0, %k1
807; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
808; SKX-NEXT:    retq
809  %a   = load <16 x i16>,<16 x i16> *%i,align 1
810  %x   = zext <16 x i16> %a to <16 x i32>
811  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
812  ret <16 x i32> %ret
813}
814
815define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
816; KNL-LABEL: sext_16x16mem_to_16x32mask:
817; KNL:       # %bb.0:
818; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
819; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
820; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
821; KNL-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
822; KNL-NEXT:    retq
823;
824; SKX-LABEL: sext_16x16mem_to_16x32mask:
825; SKX:       # %bb.0:
826; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
827; SKX-NEXT:    vpmovb2m %xmm0, %k1
828; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
829; SKX-NEXT:    retq
830  %a   = load <16 x i16>,<16 x i16> *%i,align 1
831  %x   = sext <16 x i16> %a to <16 x i32>
832  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
833  ret <16 x i32> %ret
834}
835
836define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
837; ALL-LABEL: sext_16x16mem_to_16x32:
838; ALL:       # %bb.0:
839; ALL-NEXT:    vpmovsxwd (%rdi), %zmm0
840; ALL-NEXT:    retq
841  %a   = load <16 x i16>,<16 x i16> *%i,align 1
842  %x   = sext <16 x i16> %a to <16 x i32>
843  ret <16 x i32> %x
844}
845define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
846; KNL-LABEL: zext_16x16_to_16x32mask:
847; KNL:       # %bb.0:
848; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
849; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
850; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
851; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
852; KNL-NEXT:    retq
853;
854; SKX-LABEL: zext_16x16_to_16x32mask:
855; SKX:       # %bb.0:
856; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
857; SKX-NEXT:    vpmovb2m %xmm1, %k1
858; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
859; SKX-NEXT:    retq
860  %x   = zext <16 x i16> %a to <16 x i32>
861  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
862  ret <16 x i32> %ret
863}
864
865define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
866; ALL-LABEL: zext_16x16_to_16x32:
867; ALL:       # %bb.0:
868; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
869; ALL-NEXT:    retq
870  %x   = zext <16 x i16> %a to <16 x i32>
871  ret <16 x i32> %x
872}
873
874define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
875; KNL-LABEL: zext_2x16mem_to_2x64:
876; KNL:       # %bb.0:
877; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
878; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
879; KNL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
880; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
881; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
882; KNL-NEXT:    vzeroupper
883; KNL-NEXT:    retq
884;
885; SKX-LABEL: zext_2x16mem_to_2x64:
886; SKX:       # %bb.0:
887; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
888; SKX-NEXT:    vpmovq2m %xmm0, %k1
889; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
890; SKX-NEXT:    retq
891  %a   = load <2 x i16>,<2 x i16> *%i,align 1
892  %x   = zext <2 x i16> %a to <2 x i64>
893  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
894  ret <2 x i64> %ret
895}
896
897define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
898; KNL-LABEL: sext_2x16mem_to_2x64mask:
899; KNL:       # %bb.0:
900; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
901; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
902; KNL-NEXT:    vpmovsxwq (%rdi), %xmm0
903; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
904; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
905; KNL-NEXT:    vzeroupper
906; KNL-NEXT:    retq
907;
908; SKX-LABEL: sext_2x16mem_to_2x64mask:
909; SKX:       # %bb.0:
910; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
911; SKX-NEXT:    vpmovq2m %xmm0, %k1
912; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
913; SKX-NEXT:    retq
914  %a   = load <2 x i16>,<2 x i16> *%i,align 1
915  %x   = sext <2 x i16> %a to <2 x i64>
916  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
917  ret <2 x i64> %ret
918}
919
920define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
921; ALL-LABEL: sext_2x16mem_to_2x64:
922; ALL:       # %bb.0:
923; ALL-NEXT:    vpmovsxwq (%rdi), %xmm0
924; ALL-NEXT:    retq
925  %a   = load <2 x i16>,<2 x i16> *%i,align 1
926  %x   = sext <2 x i16> %a to <2 x i64>
927  ret <2 x i64> %x
928}
929
930define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
931; KNL-LABEL: zext_4x16mem_to_4x64:
932; KNL:       # %bb.0:
933; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
934; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
935; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
936; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
937; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
938; KNL-NEXT:    retq
939;
940; SKX-LABEL: zext_4x16mem_to_4x64:
941; SKX:       # %bb.0:
942; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
943; SKX-NEXT:    vpmovd2m %xmm0, %k1
944; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
945; SKX-NEXT:    retq
946  %a   = load <4 x i16>,<4 x i16> *%i,align 1
947  %x   = zext <4 x i16> %a to <4 x i64>
948  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
949  ret <4 x i64> %ret
950}
951
952define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
953; KNL-LABEL: sext_4x16mem_to_4x64mask:
954; KNL:       # %bb.0:
955; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
956; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
957; KNL-NEXT:    vpmovsxwq (%rdi), %ymm0
958; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
959; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
960; KNL-NEXT:    retq
961;
962; SKX-LABEL: sext_4x16mem_to_4x64mask:
963; SKX:       # %bb.0:
964; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
965; SKX-NEXT:    vpmovd2m %xmm0, %k1
966; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
967; SKX-NEXT:    retq
968  %a   = load <4 x i16>,<4 x i16> *%i,align 1
969  %x   = sext <4 x i16> %a to <4 x i64>
970  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
971  ret <4 x i64> %ret
972}
973
974define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
975; ALL-LABEL: sext_4x16mem_to_4x64:
976; ALL:       # %bb.0:
977; ALL-NEXT:    vpmovsxwq (%rdi), %ymm0
978; ALL-NEXT:    retq
979  %a   = load <4 x i16>,<4 x i16> *%i,align 1
980  %x   = sext <4 x i16> %a to <4 x i64>
981  ret <4 x i64> %x
982}
983
984define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
985; KNL-LABEL: zext_8x16mem_to_8x64:
986; KNL:       # %bb.0:
987; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
988; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
989; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
990; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
991; KNL-NEXT:    retq
992;
993; SKX-LABEL: zext_8x16mem_to_8x64:
994; SKX:       # %bb.0:
995; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
996; SKX-NEXT:    vpmovw2m %xmm0, %k1
997; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
998; SKX-NEXT:    retq
999  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1000  %x   = zext <8 x i16> %a to <8 x i64>
1001  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1002  ret <8 x i64> %ret
1003}
1004
1005define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1006; KNL-LABEL: sext_8x16mem_to_8x64mask:
1007; KNL:       # %bb.0:
1008; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1009; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1010; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1011; KNL-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1012; KNL-NEXT:    retq
1013;
1014; SKX-LABEL: sext_8x16mem_to_8x64mask:
1015; SKX:       # %bb.0:
1016; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1017; SKX-NEXT:    vpmovw2m %xmm0, %k1
1018; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1019; SKX-NEXT:    retq
1020  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1021  %x   = sext <8 x i16> %a to <8 x i64>
1022  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1023  ret <8 x i64> %ret
1024}
1025
1026define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
1027; ALL-LABEL: sext_8x16mem_to_8x64:
1028; ALL:       # %bb.0:
1029; ALL-NEXT:    vpmovsxwq (%rdi), %zmm0
1030; ALL-NEXT:    retq
1031  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1032  %x   = sext <8 x i16> %a to <8 x i64>
1033  ret <8 x i64> %x
1034}
1035
1036define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1037; KNL-LABEL: zext_8x16_to_8x64mask:
1038; KNL:       # %bb.0:
1039; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
1040; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
1041; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
1042; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1043; KNL-NEXT:    retq
1044;
1045; SKX-LABEL: zext_8x16_to_8x64mask:
1046; SKX:       # %bb.0:
1047; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
1048; SKX-NEXT:    vpmovw2m %xmm1, %k1
1049; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1050; SKX-NEXT:    retq
1051  %x   = zext <8 x i16> %a to <8 x i64>
1052  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1053  ret <8 x i64> %ret
1054}
1055
1056define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1057; ALL-LABEL: zext_8x16_to_8x64:
1058; ALL:       # %bb.0:
1059; ALL-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1060; ALL-NEXT:    retq
1061  %ret   = zext <8 x i16> %a to <8 x i64>
1062  ret <8 x i64> %ret
1063}
1064
1065define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1066; KNL-LABEL: zext_2x32mem_to_2x64:
1067; KNL:       # %bb.0:
1068; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1069; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1070; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1071; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1072; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1073; KNL-NEXT:    vzeroupper
1074; KNL-NEXT:    retq
1075;
1076; SKX-LABEL: zext_2x32mem_to_2x64:
1077; SKX:       # %bb.0:
1078; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
1079; SKX-NEXT:    vpmovq2m %xmm0, %k1
1080; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1081; SKX-NEXT:    retq
1082  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1083  %x   = zext <2 x i32> %a to <2 x i64>
1084  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1085  ret <2 x i64> %ret
1086}
1087
1088define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1089; KNL-LABEL: sext_2x32mem_to_2x64mask:
1090; KNL:       # %bb.0:
1091; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1092; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1093; KNL-NEXT:    vpmovsxdq (%rdi), %xmm0
1094; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1095; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1096; KNL-NEXT:    vzeroupper
1097; KNL-NEXT:    retq
1098;
1099; SKX-LABEL: sext_2x32mem_to_2x64mask:
1100; SKX:       # %bb.0:
1101; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
1102; SKX-NEXT:    vpmovq2m %xmm0, %k1
1103; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1104; SKX-NEXT:    retq
1105  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1106  %x   = sext <2 x i32> %a to <2 x i64>
1107  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1108  ret <2 x i64> %ret
1109}
1110
1111define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
1112; ALL-LABEL: sext_2x32mem_to_2x64:
1113; ALL:       # %bb.0:
1114; ALL-NEXT:    vpmovsxdq (%rdi), %xmm0
1115; ALL-NEXT:    retq
1116  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1117  %x   = sext <2 x i32> %a to <2 x i64>
1118  ret <2 x i64> %x
1119}
1120
1121define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1122; KNL-LABEL: zext_4x32mem_to_4x64:
1123; KNL:       # %bb.0:
1124; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1125; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1126; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1127; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1128; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1129; KNL-NEXT:    retq
1130;
1131; SKX-LABEL: zext_4x32mem_to_4x64:
1132; SKX:       # %bb.0:
1133; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1134; SKX-NEXT:    vpmovd2m %xmm0, %k1
1135; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1136; SKX-NEXT:    retq
1137  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1138  %x   = zext <4 x i32> %a to <4 x i64>
1139  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1140  ret <4 x i64> %ret
1141}
1142
1143define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1144; KNL-LABEL: sext_4x32mem_to_4x64mask:
1145; KNL:       # %bb.0:
1146; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1147; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1148; KNL-NEXT:    vpmovsxdq (%rdi), %ymm0
1149; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1150; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1151; KNL-NEXT:    retq
1152;
1153; SKX-LABEL: sext_4x32mem_to_4x64mask:
1154; SKX:       # %bb.0:
1155; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
1156; SKX-NEXT:    vpmovd2m %xmm0, %k1
1157; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1158; SKX-NEXT:    retq
1159  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1160  %x   = sext <4 x i32> %a to <4 x i64>
1161  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1162  ret <4 x i64> %ret
1163}
1164
1165define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
1166; ALL-LABEL: sext_4x32mem_to_4x64:
1167; ALL:       # %bb.0:
1168; ALL-NEXT:    vpmovsxdq (%rdi), %ymm0
1169; ALL-NEXT:    retq
1170  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1171  %x   = sext <4 x i32> %a to <4 x i64>
1172  ret <4 x i64> %x
1173}
1174
1175define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1176; ALL-LABEL: sext_4x32_to_4x64:
1177; ALL:       # %bb.0:
1178; ALL-NEXT:    vpmovsxdq %xmm0, %ymm0
1179; ALL-NEXT:    retq
1180  %x   = sext <4 x i32> %a to <4 x i64>
1181  ret <4 x i64> %x
1182}
1183
1184define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1185; KNL-LABEL: zext_4x32_to_4x64mask:
1186; KNL:       # %bb.0:
1187; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
1188; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
1189; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1190; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1191; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1192; KNL-NEXT:    retq
1193;
1194; SKX-LABEL: zext_4x32_to_4x64mask:
1195; SKX:       # %bb.0:
1196; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
1197; SKX-NEXT:    vpmovd2m %xmm1, %k1
1198; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1199; SKX-NEXT:    retq
1200  %x   = zext <4 x i32> %a to <4 x i64>
1201  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1202  ret <4 x i64> %ret
1203}
1204
1205define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1206; KNL-LABEL: zext_8x32mem_to_8x64:
1207; KNL:       # %bb.0:
1208; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1209; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1210; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1211; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1212; KNL-NEXT:    retq
1213;
1214; SKX-LABEL: zext_8x32mem_to_8x64:
1215; SKX:       # %bb.0:
1216; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1217; SKX-NEXT:    vpmovw2m %xmm0, %k1
1218; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1219; SKX-NEXT:    retq
1220  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1221  %x   = zext <8 x i32> %a to <8 x i64>
1222  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1223  ret <8 x i64> %ret
1224}
1225
1226define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1227; KNL-LABEL: sext_8x32mem_to_8x64mask:
1228; KNL:       # %bb.0:
1229; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1230; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1231; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1232; KNL-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1233; KNL-NEXT:    retq
1234;
1235; SKX-LABEL: sext_8x32mem_to_8x64mask:
1236; SKX:       # %bb.0:
1237; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1238; SKX-NEXT:    vpmovw2m %xmm0, %k1
1239; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1240; SKX-NEXT:    retq
1241  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1242  %x   = sext <8 x i32> %a to <8 x i64>
1243  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1244  ret <8 x i64> %ret
1245}
1246
1247define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
1248; ALL-LABEL: sext_8x32mem_to_8x64:
1249; ALL:       # %bb.0:
1250; ALL-NEXT:    vpmovsxdq (%rdi), %zmm0
1251; ALL-NEXT:    retq
1252  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1253  %x   = sext <8 x i32> %a to <8 x i64>
1254  ret <8 x i64> %x
1255}
1256
1257define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1258; ALL-LABEL: sext_8x32_to_8x64:
1259; ALL:       # %bb.0:
1260; ALL-NEXT:    vpmovsxdq %ymm0, %zmm0
1261; ALL-NEXT:    retq
1262  %x   = sext <8 x i32> %a to <8 x i64>
1263  ret <8 x i64> %x
1264}
1265
1266define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1267; KNL-LABEL: zext_8x32_to_8x64mask:
1268; KNL:       # %bb.0:
1269; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
1270; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
1271; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
1272; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1273; KNL-NEXT:    retq
1274;
1275; SKX-LABEL: zext_8x32_to_8x64mask:
1276; SKX:       # %bb.0:
1277; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
1278; SKX-NEXT:    vpmovw2m %xmm1, %k1
1279; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1280; SKX-NEXT:    retq
1281  %x   = zext <8 x i32> %a to <8 x i64>
1282  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1283  ret <8 x i64> %ret
1284}
1285define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1286; ALL-LABEL: fptrunc_test:
1287; ALL:       # %bb.0:
1288; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
1289; ALL-NEXT:    retq
1290  %b = fptrunc <8 x double> %a to <8 x float>
1291  ret <8 x float> %b
1292}
1293
1294define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1295; ALL-LABEL: fpext_test:
1296; ALL:       # %bb.0:
1297; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
1298; ALL-NEXT:    retq
1299  %b = fpext <8 x float> %a to <8 x double>
1300  ret <8 x double> %b
1301}
1302
1303define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1304; KNL-LABEL: zext_16i1_to_16xi32:
1305; KNL:       # %bb.0:
1306; KNL-NEXT:    kmovw %edi, %k1
1307; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1308; KNL-NEXT:    vpsrld $31, %zmm0, %zmm0
1309; KNL-NEXT:    retq
1310;
1311; SKX-LABEL: zext_16i1_to_16xi32:
1312; SKX:       # %bb.0:
1313; SKX-NEXT:    kmovd %edi, %k0
1314; SKX-NEXT:    vpmovm2d %k0, %zmm0
1315; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0
1316; SKX-NEXT:    retq
1317  %a = bitcast i16 %b to <16 x i1>
1318  %c = zext <16 x i1> %a to <16 x i32>
1319  ret <16 x i32> %c
1320}
1321
1322define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1323; KNL-LABEL: zext_8i1_to_8xi64:
1324; KNL:       # %bb.0:
1325; KNL-NEXT:    kmovw %edi, %k1
1326; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1327; KNL-NEXT:    vpsrlq $63, %zmm0, %zmm0
1328; KNL-NEXT:    retq
1329;
1330; SKX-LABEL: zext_8i1_to_8xi64:
1331; SKX:       # %bb.0:
1332; SKX-NEXT:    kmovd %edi, %k0
1333; SKX-NEXT:    vpmovm2q %k0, %zmm0
1334; SKX-NEXT:    vpsrlq $63, %zmm0, %zmm0
1335; SKX-NEXT:    retq
1336  %a = bitcast i8 %b to <8 x i1>
1337  %c = zext <8 x i1> %a to <8 x i64>
1338  ret <8 x i64> %c
1339}
1340
1341define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1342; KNL-LABEL: trunc_16i8_to_16i1:
1343; KNL:       # %bb.0:
1344; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1345; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1346; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1347; KNL-NEXT:    kmovw %k0, %eax
1348; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
1349; KNL-NEXT:    vzeroupper
1350; KNL-NEXT:    retq
1351;
1352; SKX-LABEL: trunc_16i8_to_16i1:
1353; SKX:       # %bb.0:
1354; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1355; SKX-NEXT:    vpmovb2m %xmm0, %k0
1356; SKX-NEXT:    kmovd %k0, %eax
1357; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
1358; SKX-NEXT:    retq
1359  %mask_b = trunc <16 x i8>%a to <16 x i1>
1360  %mask = bitcast <16 x i1> %mask_b to i16
1361  ret i16 %mask
1362}
1363
1364define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1365; KNL-LABEL: trunc_16i32_to_16i1:
1366; KNL:       # %bb.0:
1367; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1368; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1369; KNL-NEXT:    kmovw %k0, %eax
1370; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
1371; KNL-NEXT:    vzeroupper
1372; KNL-NEXT:    retq
1373;
1374; SKX-LABEL: trunc_16i32_to_16i1:
1375; SKX:       # %bb.0:
1376; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
1377; SKX-NEXT:    vpmovd2m %zmm0, %k0
1378; SKX-NEXT:    kmovd %k0, %eax
1379; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
1380; SKX-NEXT:    vzeroupper
1381; SKX-NEXT:    retq
1382  %mask_b = trunc <16 x i32>%a to <16 x i1>
1383  %mask = bitcast <16 x i1> %mask_b to i16
1384  ret i16 %mask
1385}
1386
1387define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1388; ALL-LABEL: trunc_4i32_to_4i1:
1389; ALL:       # %bb.0:
1390; ALL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1391; ALL-NEXT:    vpslld $31, %xmm0, %xmm0
1392; ALL-NEXT:    vpsrad $31, %xmm0, %xmm0
1393; ALL-NEXT:    retq
1394  %mask_a = trunc <4 x i32>%a to <4 x i1>
1395  %mask_b = trunc <4 x i32>%b to <4 x i1>
1396  %a_and_b = and <4 x i1>%mask_a, %mask_b
1397  %res = sext <4 x i1>%a_and_b to <4 x i32>
1398  ret <4 x i32>%res
1399}
1400
1401
1402define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1403; KNL-LABEL: trunc_8i16_to_8i1:
1404; KNL:       # %bb.0:
1405; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1406; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1407; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
1408; KNL-NEXT:    kmovw %k0, %eax
1409; KNL-NEXT:    # kill: def $al killed $al killed $eax
1410; KNL-NEXT:    vzeroupper
1411; KNL-NEXT:    retq
1412;
1413; SKX-LABEL: trunc_8i16_to_8i1:
1414; SKX:       # %bb.0:
1415; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1416; SKX-NEXT:    vpmovw2m %xmm0, %k0
1417; SKX-NEXT:    kmovd %k0, %eax
1418; SKX-NEXT:    # kill: def $al killed $al killed $eax
1419; SKX-NEXT:    retq
1420  %mask_b = trunc <8 x i16>%a to <8 x i1>
1421  %mask = bitcast <8 x i1> %mask_b to i8
1422  ret i8 %mask
1423}
1424
1425define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1426; KNL-LABEL: sext_8i1_8i32:
1427; KNL:       # %bb.0:
1428; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1429; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
1430; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1431; KNL-NEXT:    retq
1432;
1433; SKX-LABEL: sext_8i1_8i32:
1434; SKX:       # %bb.0:
1435; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1436; SKX-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
1437; SKX-NEXT:    retq
1438  %x = icmp slt <8 x i32> %a1, %a2
1439  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1440  %y = sext <8 x i1> %x1 to <8 x i32>
1441  ret <8 x i32> %y
1442}
1443
1444
1445define i16 @trunc_i32_to_i1(i32 %a) {
1446; KNL-LABEL: trunc_i32_to_i1:
1447; KNL:       # %bb.0:
1448; KNL-NEXT:    movw $-4, %ax
1449; KNL-NEXT:    kmovw %eax, %k0
1450; KNL-NEXT:    kshiftrw $1, %k0, %k0
1451; KNL-NEXT:    kshiftlw $1, %k0, %k0
1452; KNL-NEXT:    andl $1, %edi
1453; KNL-NEXT:    kmovw %edi, %k1
1454; KNL-NEXT:    korw %k1, %k0, %k0
1455; KNL-NEXT:    kmovw %k0, %eax
1456; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
1457; KNL-NEXT:    retq
1458;
1459; SKX-LABEL: trunc_i32_to_i1:
1460; SKX:       # %bb.0:
1461; SKX-NEXT:    movw $-4, %ax
1462; SKX-NEXT:    kmovd %eax, %k0
1463; SKX-NEXT:    kshiftrw $1, %k0, %k0
1464; SKX-NEXT:    kshiftlw $1, %k0, %k0
1465; SKX-NEXT:    andl $1, %edi
1466; SKX-NEXT:    kmovw %edi, %k1
1467; SKX-NEXT:    korw %k1, %k0, %k0
1468; SKX-NEXT:    kmovd %k0, %eax
1469; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
1470; SKX-NEXT:    retq
1471  %a_i = trunc i32 %a to i1
1472  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1473  %res = bitcast <16 x i1> %maskv to i16
1474  ret i16 %res
1475}
1476
1477define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1478; KNL-LABEL: sext_8i1_8i16:
1479; KNL:       # %bb.0:
1480; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1481; KNL-NEXT:    vpmovdw %zmm0, %ymm0
1482; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1483; KNL-NEXT:    vzeroupper
1484; KNL-NEXT:    retq
1485;
1486; SKX-LABEL: sext_8i1_8i16:
1487; SKX:       # %bb.0:
1488; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1489; SKX-NEXT:    vpmovm2w %k0, %xmm0
1490; SKX-NEXT:    vzeroupper
1491; SKX-NEXT:    retq
1492  %x = icmp slt <8 x i32> %a1, %a2
1493  %y = sext <8 x i1> %x to <8 x i16>
1494  ret <8 x i16> %y
1495}
1496
1497define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1498; KNL-LABEL: sext_16i1_16i32:
1499; KNL:       # %bb.0:
1500; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1501; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1502; KNL-NEXT:    retq
1503;
1504; SKX-LABEL: sext_16i1_16i32:
1505; SKX:       # %bb.0:
1506; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
1507; SKX-NEXT:    vpmovm2d %k0, %zmm0
1508; SKX-NEXT:    retq
1509  %x = icmp slt <16 x i32> %a1, %a2
1510  %y = sext <16 x i1> %x to <16 x i32>
1511  ret <16 x i32> %y
1512}
1513
1514define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1515; KNL-LABEL: sext_8i1_8i64:
1516; KNL:       # %bb.0:
1517; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1518; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
1519; KNL-NEXT:    retq
1520;
1521; SKX-LABEL: sext_8i1_8i64:
1522; SKX:       # %bb.0:
1523; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1524; SKX-NEXT:    vpmovm2q %k0, %zmm0
1525; SKX-NEXT:    retq
1526  %x = icmp slt <8 x i32> %a1, %a2
1527  %y = sext <8 x i1> %x to <8 x i64>
1528  ret <8 x i64> %y
1529}
1530
1531define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
1532; ALL-LABEL: extload_v8i64:
1533; ALL:       # %bb.0:
1534; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
1535; ALL-NEXT:    vmovdqa64 %zmm0, (%rsi)
1536; ALL-NEXT:    vzeroupper
1537; ALL-NEXT:    retq
1538  %sign_load = load <8 x i8>, <8 x i8>* %a
1539  %c = sext <8 x i8> %sign_load to <8 x i64>
1540  store <8 x i64> %c, <8 x i64>* %res
1541  ret void
1542}
1543
1544define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1545; KNL-LABEL: test21:
1546; KNL:       # %bb.0:
1547; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
1548; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
1549; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
1550; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
1551; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
1552; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1553; KNL-NEXT:    vpand %ymm0, %ymm4, %ymm0
1554; KNL-NEXT:    vpsllw $15, %ymm5, %ymm4
1555; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1556; KNL-NEXT:    vpand %ymm1, %ymm4, %ymm1
1557; KNL-NEXT:    vpsllw $15, %ymm6, %ymm4
1558; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1559; KNL-NEXT:    vpand %ymm2, %ymm4, %ymm2
1560; KNL-NEXT:    vpsllw $15, %ymm7, %ymm4
1561; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
1562; KNL-NEXT:    vpand %ymm3, %ymm4, %ymm3
1563; KNL-NEXT:    retq
1564;
1565; SKX-LABEL: test21:
1566; SKX:       # %bb.0:
1567; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2
1568; SKX-NEXT:    vpmovb2m %zmm2, %k1
1569; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1570; SKX-NEXT:    kshiftrq $32, %k1, %k1
1571; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
1572; SKX-NEXT:    retq
1573  %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
1574  ret <64 x i16> %ret
1575}
1576
1577define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
1578; ALL-LABEL: shuffle_zext_16x8_to_16x16:
1579; ALL:       # %bb.0:
1580; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1581; ALL-NEXT:    retq
1582  %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
1583  %2 = bitcast <32 x i8> %1 to <16 x i16>
1584  ret <16 x i16> %2
1585}
1586
1587define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
1588; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
1589; KNL:       # %bb.0:
1590; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1591; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1592; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
1593; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
1594; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
1595; KNL-NEXT:    retq
1596;
1597; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
1598; SKX:       # %bb.0:
1599; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
1600; SKX-NEXT:    vpmovb2m %xmm1, %k1
1601; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1602; SKX-NEXT:    retq
1603  %x   = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
1604  %bc  = bitcast <32 x i8> %x to <16 x i16>
1605  %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
1606  ret <16 x i16> %ret
1607}
1608
1609define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
1610; ALL-LABEL: zext_32x8_to_16x16:
1611; ALL:       # %bb.0:
1612; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1613; ALL-NEXT:    retq
1614  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
1615  %2 = bitcast <32 x i8> %1 to <16 x i16>
1616  ret <16 x i16> %2
1617}
1618
1619define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
1620; ALL-LABEL: zext_32x8_to_8x32:
1621; ALL:       # %bb.0:
1622; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1623; ALL-NEXT:    retq
1624  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
1625  %2 = bitcast <32 x i8> %1 to <8 x i32>
1626  ret <8 x i32> %2
1627}
1628
1629define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
1630; ALL-LABEL: zext_32x8_to_4x64:
1631; ALL:       # %bb.0:
1632; ALL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1633; ALL-NEXT:    retq
1634  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
1635  %2 = bitcast <32 x i8> %1 to <4 x i64>
1636  ret <4 x i64> %2
1637}
1638
1639define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
1640; ALL-LABEL: zext_16x16_to_8x32:
1641; ALL:       # %bb.0:
1642; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1643; ALL-NEXT:    retq
1644  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
1645  %2 = bitcast <16 x i16> %1 to <8 x i32>
1646  ret <8 x i32> %2
1647}
1648
1649define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
1650; ALL-LABEL: zext_16x16_to_4x64:
1651; ALL:       # %bb.0:
1652; ALL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1653; ALL-NEXT:    retq
1654  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
1655  %2 = bitcast <16 x i16> %1 to <4 x i64>
1656  ret <4 x i64> %2
1657}
1658
1659define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
1660; ALL-LABEL: zext_8x32_to_4x64:
1661; ALL:       # %bb.0:
1662; ALL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1663; ALL-NEXT:    retq
1664  %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
1665  %2 = bitcast <8 x i32> %1 to <4 x i64>
1666  ret <4 x i64> %2
1667}
1668
1669define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
1670; KNL-LABEL: zext_64xi1_to_64xi8:
1671; KNL:       # %bb.0:
1672; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
1673; KNL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1674; KNL-NEXT:    vpand %ymm2, %ymm0, %ymm0
1675; KNL-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
1676; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
1677; KNL-NEXT:    retq
1678;
1679; SKX-LABEL: zext_64xi1_to_64xi8:
1680; SKX:       # %bb.0:
1681; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
1682; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
1683; SKX-NEXT:    retq
1684  %mask = icmp eq <64 x i8> %x, %y
1685  %1 = zext <64 x i1> %mask to <64 x i8>
1686  ret <64 x i8> %1
1687}
1688
1689define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
1690; KNL-LABEL: zext_32xi1_to_32xi16:
1691; KNL:       # %bb.0:
1692; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
1693; KNL-NEXT:    vpsrlw $15, %ymm0, %ymm0
1694; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
1695; KNL-NEXT:    vpsrlw $15, %ymm1, %ymm1
1696; KNL-NEXT:    retq
1697;
1698; SKX-LABEL: zext_32xi1_to_32xi16:
1699; SKX:       # %bb.0:
1700; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
1701; SKX-NEXT:    vpmovm2w %k0, %zmm0
1702; SKX-NEXT:    vpsrlw $15, %zmm0, %zmm0
1703; SKX-NEXT:    retq
1704  %mask = icmp eq <32 x i16> %x, %y
1705  %1 = zext <32 x i1> %mask to <32 x i16>
1706  ret <32 x i16> %1
1707}
1708
1709define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
1710; ALL-LABEL: zext_16xi1_to_16xi16:
1711; ALL:       # %bb.0:
1712; ALL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1713; ALL-NEXT:    vpsrlw $15, %ymm0, %ymm0
1714; ALL-NEXT:    retq
1715  %mask = icmp eq <16 x i16> %x, %y
1716  %1 = zext <16 x i1> %mask to <16 x i16>
1717  ret <16 x i16> %1
1718}
1719
1720
1721define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
1722; KNL-LABEL: zext_32xi1_to_32xi8:
1723; KNL:       # %bb.0:
1724; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
1725; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
1726; KNL-NEXT:    vpmovdb %zmm0, %xmm0
1727; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
1728; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
1729; KNL-NEXT:    vpmovdb %zmm1, %xmm1
1730; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1731; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
1732; KNL-NEXT:    retq
1733;
1734; SKX-LABEL: zext_32xi1_to_32xi8:
1735; SKX:       # %bb.0:
1736; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
1737; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
1738; SKX-NEXT:    retq
1739  %mask = icmp eq <32 x i16> %x, %y
1740  %1 = zext <32 x i1> %mask to <32 x i8>
1741  ret <32 x i8> %1
1742}
1743
1744define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
1745; ALL-LABEL: zext_4xi1_to_4x32:
1746; ALL:       # %bb.0:
1747; ALL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1748; ALL-NEXT:    vpand %xmm2, %xmm1, %xmm1
1749; ALL-NEXT:    vpand %xmm2, %xmm0, %xmm0
1750; ALL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1751; ALL-NEXT:    vpsrld $31, %xmm0, %xmm0
1752; ALL-NEXT:    retq
1753  %mask = icmp eq <4 x i8> %x, %y
1754  %1 = zext <4 x i1> %mask to <4 x i32>
1755  ret <4 x i32> %1
1756}
1757
1758define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
1759; ALL-LABEL: zext_2xi1_to_2xi64:
1760; ALL:       # %bb.0:
1761; ALL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1762; ALL-NEXT:    vpand %xmm2, %xmm1, %xmm1
1763; ALL-NEXT:    vpand %xmm2, %xmm0, %xmm0
1764; ALL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
1765; ALL-NEXT:    vpsrlq $63, %xmm0, %xmm0
1766; ALL-NEXT:    retq
1767  %mask = icmp eq <2 x i8> %x, %y
1768  %1 = zext <2 x i1> %mask to <2 x i64>
1769  ret <2 x i64> %1
1770}
1771