• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
9
10;
11; 128-bit vectors
12;
13
14define <2 x i64> @ext_i2_2i64(i2 %a0) {
15; SSE2-SSSE3-LABEL: ext_i2_2i64:
16; SSE2-SSSE3:       # %bb.0:
17; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
18; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
19; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
21; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
22; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
23; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
25; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
26; SSE2-SSSE3-NEXT:    retq
27;
28; AVX1-LABEL: ext_i2_2i64:
29; AVX1:       # %bb.0:
30; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
31; AVX1-NEXT:    vmovq %rdi, %xmm0
32; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
34; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
36; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
37; AVX1-NEXT:    retq
38;
39; AVX2-LABEL: ext_i2_2i64:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
42; AVX2-NEXT:    vmovq %rdi, %xmm0
43; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
44; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
45; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
46; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
47; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
48; AVX2-NEXT:    retq
49;
50; AVX512F-LABEL: ext_i2_2i64:
51; AVX512F:       # %bb.0:
52; AVX512F-NEXT:    kmovw %edi, %k1
53; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
54; AVX512F-NEXT:    vpsrlq $63, %xmm0, %xmm0
55; AVX512F-NEXT:    vzeroupper
56; AVX512F-NEXT:    retq
57;
58; AVX512VLBW-LABEL: ext_i2_2i64:
59; AVX512VLBW:       # %bb.0:
60; AVX512VLBW-NEXT:    kmovd %edi, %k1
61; AVX512VLBW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
62; AVX512VLBW-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
63; AVX512VLBW-NEXT:    vpsrlq $63, %xmm0, %xmm0
64; AVX512VLBW-NEXT:    retq
65  %1 = bitcast i2 %a0 to <2 x i1>
66  %2 = zext <2 x i1> %1 to <2 x i64>
67  ret <2 x i64> %2
68}
69
70define <4 x i32> @ext_i4_4i32(i4 %a0) {
71; SSE2-SSSE3-LABEL: ext_i4_4i32:
72; SSE2-SSSE3:       # %bb.0:
73; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
74; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
75; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
76; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
77; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
78; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
79; SSE2-SSSE3-NEXT:    retq
80;
81; AVX1-LABEL: ext_i4_4i32:
82; AVX1:       # %bb.0:
83; AVX1-NEXT:    vmovd %edi, %xmm0
84; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
85; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
86; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
87; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
88; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
89; AVX1-NEXT:    retq
90;
91; AVX2-LABEL: ext_i4_4i32:
92; AVX2:       # %bb.0:
93; AVX2-NEXT:    vmovd %edi, %xmm0
94; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
95; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
96; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
97; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
98; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
99; AVX2-NEXT:    retq
100;
101; AVX512F-LABEL: ext_i4_4i32:
102; AVX512F:       # %bb.0:
103; AVX512F-NEXT:    kmovw %edi, %k1
104; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
105; AVX512F-NEXT:    vpsrld $31, %xmm0, %xmm0
106; AVX512F-NEXT:    vzeroupper
107; AVX512F-NEXT:    retq
108;
109; AVX512VLBW-LABEL: ext_i4_4i32:
110; AVX512VLBW:       # %bb.0:
111; AVX512VLBW-NEXT:    kmovd %edi, %k1
112; AVX512VLBW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
113; AVX512VLBW-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
114; AVX512VLBW-NEXT:    vpsrld $31, %xmm0, %xmm0
115; AVX512VLBW-NEXT:    retq
116  %1 = bitcast i4 %a0 to <4 x i1>
117  %2 = zext <4 x i1> %1 to <4 x i32>
118  ret <4 x i32> %2
119}
120
121define <8 x i16> @ext_i8_8i16(i8 %a0) {
122; SSE2-SSSE3-LABEL: ext_i8_8i16:
123; SSE2-SSSE3:       # %bb.0:
124; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
125; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
126; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
127; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
128; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
129; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
130; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
131; SSE2-SSSE3-NEXT:    retq
132;
133; AVX1-LABEL: ext_i8_8i16:
134; AVX1:       # %bb.0:
135; AVX1-NEXT:    vmovd %edi, %xmm0
136; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
137; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
138; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
139; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
140; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
141; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
142; AVX1-NEXT:    retq
143;
144; AVX2-LABEL: ext_i8_8i16:
145; AVX2:       # %bb.0:
146; AVX2-NEXT:    vmovd %edi, %xmm0
147; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
148; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
149; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
150; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
151; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
152; AVX2-NEXT:    retq
153;
154; AVX512F-LABEL: ext_i8_8i16:
155; AVX512F:       # %bb.0:
156; AVX512F-NEXT:    kmovw %edi, %k1
157; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
158; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
159; AVX512F-NEXT:    vpsrlw $15, %xmm0, %xmm0
160; AVX512F-NEXT:    vzeroupper
161; AVX512F-NEXT:    retq
162;
163; AVX512VLBW-LABEL: ext_i8_8i16:
164; AVX512VLBW:       # %bb.0:
165; AVX512VLBW-NEXT:    kmovd %edi, %k0
166; AVX512VLBW-NEXT:    vpmovm2w %k0, %xmm0
167; AVX512VLBW-NEXT:    vpsrlw $15, %xmm0, %xmm0
168; AVX512VLBW-NEXT:    retq
169  %1 = bitcast i8 %a0 to <8 x i1>
170  %2 = zext <8 x i1> %1 to <8 x i16>
171  ret <8 x i16> %2
172}
173
174define <16 x i8> @ext_i16_16i8(i16 %a0) {
175; SSE2-LABEL: ext_i16_16i8:
176; SSE2:       # %bb.0:
177; SSE2-NEXT:    movd %edi, %xmm0
178; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
179; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
180; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
181; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
182; SSE2-NEXT:    pand %xmm1, %xmm0
183; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
184; SSE2-NEXT:    psrlw $7, %xmm0
185; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
186; SSE2-NEXT:    retq
187;
188; SSSE3-LABEL: ext_i16_16i8:
189; SSSE3:       # %bb.0:
190; SSSE3-NEXT:    movd %edi, %xmm0
191; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
192; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
193; SSSE3-NEXT:    pand %xmm1, %xmm0
194; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
195; SSSE3-NEXT:    psrlw $7, %xmm0
196; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm0
197; SSSE3-NEXT:    retq
198;
199; AVX1-LABEL: ext_i16_16i8:
200; AVX1:       # %bb.0:
201; AVX1-NEXT:    vmovd %edi, %xmm0
202; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
203; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
204; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
205; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
206; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
207; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
208; AVX1-NEXT:    retq
209;
210; AVX2-LABEL: ext_i16_16i8:
211; AVX2:       # %bb.0:
212; AVX2-NEXT:    vmovd %edi, %xmm0
213; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
214; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
215; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
216; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
217; AVX2-NEXT:    vpsrlw $7, %xmm0, %xmm0
218; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
219; AVX2-NEXT:    retq
220;
221; AVX512F-LABEL: ext_i16_16i8:
222; AVX512F:       # %bb.0:
223; AVX512F-NEXT:    kmovw %edi, %k1
224; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
225; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
226; AVX512F-NEXT:    vzeroupper
227; AVX512F-NEXT:    retq
228;
229; AVX512VLBW-LABEL: ext_i16_16i8:
230; AVX512VLBW:       # %bb.0:
231; AVX512VLBW-NEXT:    kmovd %edi, %k1
232; AVX512VLBW-NEXT:    vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z}
233; AVX512VLBW-NEXT:    retq
234  %1 = bitcast i16 %a0 to <16 x i1>
235  %2 = zext <16 x i1> %1 to <16 x i8>
236  ret <16 x i8> %2
237}
238
239;
240; 256-bit vectors
241;
242
243define <4 x i64> @ext_i4_4i64(i4 %a0) {
244; SSE2-SSSE3-LABEL: ext_i4_4i64:
245; SSE2-SSSE3:       # %bb.0:
246; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
247; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
248; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
249; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
250; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
251; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
252; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
253; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
254; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
255; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
256; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
257; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
258; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
259; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
260; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
261; SSE2-SSSE3-NEXT:    psrlq $63, %xmm1
262; SSE2-SSSE3-NEXT:    retq
263;
264; AVX1-LABEL: ext_i4_4i64:
265; AVX1:       # %bb.0:
266; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
267; AVX1-NEXT:    vmovq %rdi, %xmm0
268; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
269; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
270; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
271; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
272; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm2
273; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
274; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
275; AVX1-NEXT:    vpsrlq $63, %xmm2, %xmm2
276; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
277; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
278; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
279; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
280; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
281; AVX1-NEXT:    retq
282;
283; AVX2-LABEL: ext_i4_4i64:
284; AVX2:       # %bb.0:
285; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
286; AVX2-NEXT:    vmovq %rdi, %xmm0
287; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
288; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
289; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
290; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
291; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
292; AVX2-NEXT:    retq
293;
294; AVX512F-LABEL: ext_i4_4i64:
295; AVX512F:       # %bb.0:
296; AVX512F-NEXT:    kmovw %edi, %k1
297; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
298; AVX512F-NEXT:    vpsrlq $63, %ymm0, %ymm0
299; AVX512F-NEXT:    retq
300;
301; AVX512VLBW-LABEL: ext_i4_4i64:
302; AVX512VLBW:       # %bb.0:
303; AVX512VLBW-NEXT:    kmovd %edi, %k1
304; AVX512VLBW-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
305; AVX512VLBW-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
306; AVX512VLBW-NEXT:    vpsrlq $63, %ymm0, %ymm0
307; AVX512VLBW-NEXT:    retq
308  %1 = bitcast i4 %a0 to <4 x i1>
309  %2 = zext <4 x i1> %1 to <4 x i64>
310  ret <4 x i64> %2
311}
312
313define <8 x i32> @ext_i8_8i32(i8 %a0) {
314; SSE2-SSSE3-LABEL: ext_i8_8i32:
315; SSE2-SSSE3:       # %bb.0:
316; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
317; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
318; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
319; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
320; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
321; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
322; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
323; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
324; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
325; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
326; SSE2-SSSE3-NEXT:    psrld $31, %xmm1
327; SSE2-SSSE3-NEXT:    retq
328;
329; AVX1-LABEL: ext_i8_8i32:
330; AVX1:       # %bb.0:
331; AVX1-NEXT:    vmovd %edi, %xmm0
332; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
333; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
334; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
335; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
336; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm2
337; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
338; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
339; AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
340; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
341; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
342; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
343; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
344; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
345; AVX1-NEXT:    retq
346;
347; AVX2-LABEL: ext_i8_8i32:
348; AVX2:       # %bb.0:
349; AVX2-NEXT:    vmovd %edi, %xmm0
350; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
351; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
352; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
353; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
354; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
355; AVX2-NEXT:    retq
356;
357; AVX512F-LABEL: ext_i8_8i32:
358; AVX512F:       # %bb.0:
359; AVX512F-NEXT:    kmovw %edi, %k1
360; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
361; AVX512F-NEXT:    vpsrld $31, %ymm0, %ymm0
362; AVX512F-NEXT:    retq
363;
364; AVX512VLBW-LABEL: ext_i8_8i32:
365; AVX512VLBW:       # %bb.0:
366; AVX512VLBW-NEXT:    kmovd %edi, %k1
367; AVX512VLBW-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
368; AVX512VLBW-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
369; AVX512VLBW-NEXT:    vpsrld $31, %ymm0, %ymm0
370; AVX512VLBW-NEXT:    retq
371  %1 = bitcast i8 %a0 to <8 x i1>
372  %2 = zext <8 x i1> %1 to <8 x i32>
373  ret <8 x i32> %2
374}
375
376define <16 x i16> @ext_i16_16i16(i16 %a0) {
377; SSE2-SSSE3-LABEL: ext_i16_16i16:
378; SSE2-SSSE3:       # %bb.0:
379; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
380; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
381; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
382; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
383; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
384; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
385; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm0
386; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
387; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
388; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
389; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm1
390; SSE2-SSSE3-NEXT:    psrlw $15, %xmm1
391; SSE2-SSSE3-NEXT:    retq
392;
393; AVX1-LABEL: ext_i16_16i16:
394; AVX1:       # %bb.0:
395; AVX1-NEXT:    vmovd %edi, %xmm0
396; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
397; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
398; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
399; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
400; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
401; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm2
402; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
403; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
404; AVX1-NEXT:    vpsrlw $15, %xmm2, %xmm2
405; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
406; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
407; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
408; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
409; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
410; AVX1-NEXT:    retq
411;
412; AVX2-LABEL: ext_i16_16i16:
413; AVX2:       # %bb.0:
414; AVX2-NEXT:    vmovd %edi, %xmm0
415; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
416; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
417; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
418; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
419; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
420; AVX2-NEXT:    retq
421;
422; AVX512F-LABEL: ext_i16_16i16:
423; AVX512F:       # %bb.0:
424; AVX512F-NEXT:    kmovw %edi, %k1
425; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
426; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
427; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
428; AVX512F-NEXT:    retq
429;
430; AVX512VLBW-LABEL: ext_i16_16i16:
431; AVX512VLBW:       # %bb.0:
432; AVX512VLBW-NEXT:    kmovd %edi, %k0
433; AVX512VLBW-NEXT:    vpmovm2w %k0, %ymm0
434; AVX512VLBW-NEXT:    vpsrlw $15, %ymm0, %ymm0
435; AVX512VLBW-NEXT:    retq
436  %1 = bitcast i16 %a0 to <16 x i1>
437  %2 = zext <16 x i1> %1 to <16 x i16>
438  ret <16 x i16> %2
439}
440
441define <32 x i8> @ext_i32_32i8(i32 %a0) {
442; SSE2-SSSE3-LABEL: ext_i32_32i8:
443; SSE2-SSSE3:       # %bb.0:
444; SSE2-SSSE3-NEXT:    movd %edi, %xmm1
445; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
446; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
447; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
448; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
449; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
450; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm0
451; SSE2-SSSE3-NEXT:    psrlw $7, %xmm0
452; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
453; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
454; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
455; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
456; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
457; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm1
458; SSE2-SSSE3-NEXT:    psrlw $7, %xmm1
459; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm1
460; SSE2-SSSE3-NEXT:    retq
461;
462; AVX1-LABEL: ext_i32_32i8:
463; AVX1:       # %bb.0:
464; AVX1-NEXT:    vmovd %edi, %xmm0
465; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
466; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
467; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
468; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
469; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
470; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
471; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
472; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
473; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
474; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
475; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
476; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
477; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
478; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
479; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
480; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
481; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
482; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
483; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
484; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
485; AVX1-NEXT:    retq
486;
487; AVX2-SLOW-LABEL: ext_i32_32i8:
488; AVX2-SLOW:       # %bb.0:
489; AVX2-SLOW-NEXT:    vmovd %edi, %xmm0
490; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
491; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
492; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
493; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
494; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
495; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
496; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
497; AVX2-SLOW-NEXT:    vpand %ymm1, %ymm0, %ymm0
498; AVX2-SLOW-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
499; AVX2-SLOW-NEXT:    vpsrlw $7, %ymm0, %ymm0
500; AVX2-SLOW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
501; AVX2-SLOW-NEXT:    retq
502;
503; AVX2-FAST-LABEL: ext_i32_32i8:
504; AVX2-FAST:       # %bb.0:
505; AVX2-FAST-NEXT:    vmovd %edi, %xmm0
506; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
507; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
508; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
509; AVX2-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
510; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
511; AVX2-FAST-NEXT:    vpand %ymm1, %ymm0, %ymm0
512; AVX2-FAST-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
513; AVX2-FAST-NEXT:    vpsrlw $7, %ymm0, %ymm0
514; AVX2-FAST-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
515; AVX2-FAST-NEXT:    retq
516;
517; AVX512F-LABEL: ext_i32_32i8:
518; AVX512F:       # %bb.0:
519; AVX512F-NEXT:    kmovw %edi, %k1
520; AVX512F-NEXT:    shrl $16, %edi
521; AVX512F-NEXT:    kmovw %edi, %k2
522; AVX512F-NEXT:    movl {{.*}}(%rip), %eax
523; AVX512F-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
524; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
525; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k2} {z}
526; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
527; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
528; AVX512F-NEXT:    retq
529;
530; AVX512VLBW-LABEL: ext_i32_32i8:
531; AVX512VLBW:       # %bb.0:
532; AVX512VLBW-NEXT:    kmovd %edi, %k1
533; AVX512VLBW-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
534; AVX512VLBW-NEXT:    retq
535  %1 = bitcast i32 %a0 to <32 x i1>
536  %2 = zext <32 x i1> %1 to <32 x i8>
537  ret <32 x i8> %2
538}
539
540;
541; 512-bit vectors
542;
543
544define <8 x i64> @ext_i8_8i64(i8 %a0) {
545; SSE2-SSSE3-LABEL: ext_i8_8i64:
546; SSE2-SSSE3:       # %bb.0:
547; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
548; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
549; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
550; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
551; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm1
552; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
553; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
554; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
555; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
556; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
557; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
558; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
559; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
560; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
561; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
562; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
563; SSE2-SSSE3-NEXT:    psrlq $63, %xmm1
564; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32]
565; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
566; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
567; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
568; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
569; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
570; SSE2-SSSE3-NEXT:    psrlq $63, %xmm2
571; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [64,128]
572; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
573; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
574; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
575; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
576; SSE2-SSSE3-NEXT:    psrlq $63, %xmm3
577; SSE2-SSSE3-NEXT:    retq
578;
579; AVX1-LABEL: ext_i8_8i64:
580; AVX1:       # %bb.0:
581; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
582; AVX1-NEXT:    vmovq %rdi, %xmm0
583; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
584; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
585; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
586; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
587; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm3
588; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
589; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
590; AVX1-NEXT:    vpsrlq $63, %xmm3, %xmm3
591; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
592; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
593; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
594; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
595; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
596; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
597; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm3
598; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
599; AVX1-NEXT:    vpsrlq $63, %xmm3, %xmm3
600; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
601; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
602; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
603; AVX1-NEXT:    vpsrlq $63, %xmm1, %xmm1
604; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
605; AVX1-NEXT:    retq
606;
607; AVX2-LABEL: ext_i8_8i64:
608; AVX2:       # %bb.0:
609; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
610; AVX2-NEXT:    vmovq %rdi, %xmm0
611; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm1
612; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
613; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
614; AVX2-NEXT:    vpcmpeqq %ymm0, %ymm2, %ymm0
615; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
616; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
617; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
618; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
619; AVX2-NEXT:    vpsrlq $63, %ymm1, %ymm1
620; AVX2-NEXT:    retq
621;
622; AVX512F-LABEL: ext_i8_8i64:
623; AVX512F:       # %bb.0:
624; AVX512F-NEXT:    kmovw %edi, %k1
625; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
626; AVX512F-NEXT:    vpsrlq $63, %zmm0, %zmm0
627; AVX512F-NEXT:    retq
628;
629; AVX512VLBW-LABEL: ext_i8_8i64:
630; AVX512VLBW:       # %bb.0:
631; AVX512VLBW-NEXT:    kmovd %edi, %k1
632; AVX512VLBW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
633; AVX512VLBW-NEXT:    vpsrlq $63, %zmm0, %zmm0
634; AVX512VLBW-NEXT:    retq
635  %1 = bitcast i8 %a0 to <8 x i1>
636  %2 = zext <8 x i1> %1 to <8 x i64>
637  ret <8 x i64> %2
638}
639
640define <16 x i32> @ext_i16_16i32(i16 %a0) {
641; SSE2-SSSE3-LABEL: ext_i16_16i32:
642; SSE2-SSSE3:       # %bb.0:
643; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
644; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
645; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
646; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm0
647; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
648; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
649; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
650; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
651; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm1
652; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
653; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
654; SSE2-SSSE3-NEXT:    psrld $31, %xmm1
655; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
656; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
657; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
658; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
659; SSE2-SSSE3-NEXT:    psrld $31, %xmm2
660; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
661; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
662; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm3
663; SSE2-SSSE3-NEXT:    psrld $31, %xmm3
664; SSE2-SSSE3-NEXT:    retq
665;
666; AVX1-LABEL: ext_i16_16i32:
667; AVX1:       # %bb.0:
668; AVX1-NEXT:    vmovd %edi, %xmm0
669; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
670; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
671; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
672; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
673; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm3
674; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
675; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
676; AVX1-NEXT:    vpsrld $31, %xmm3, %xmm3
677; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
678; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
679; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
680; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
681; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
682; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
683; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm3
684; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
685; AVX1-NEXT:    vpsrld $31, %xmm3, %xmm3
686; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
687; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
688; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
689; AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
690; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
691; AVX1-NEXT:    retq
692;
693; AVX2-LABEL: ext_i16_16i32:
694; AVX2:       # %bb.0:
695; AVX2-NEXT:    vmovd %edi, %xmm0
696; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm1
697; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
698; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
699; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm2, %ymm0
700; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
701; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
702; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
703; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
704; AVX2-NEXT:    vpsrld $31, %ymm1, %ymm1
705; AVX2-NEXT:    retq
706;
707; AVX512F-LABEL: ext_i16_16i32:
708; AVX512F:       # %bb.0:
709; AVX512F-NEXT:    kmovw %edi, %k1
710; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
711; AVX512F-NEXT:    vpsrld $31, %zmm0, %zmm0
712; AVX512F-NEXT:    retq
713;
714; AVX512VLBW-LABEL: ext_i16_16i32:
715; AVX512VLBW:       # %bb.0:
716; AVX512VLBW-NEXT:    kmovd %edi, %k1
717; AVX512VLBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
718; AVX512VLBW-NEXT:    vpsrld $31, %zmm0, %zmm0
719; AVX512VLBW-NEXT:    retq
720  %1 = bitcast i16 %a0 to <16 x i1>
721  %2 = zext <16 x i1> %1 to <16 x i32>
722  ret <16 x i32> %2
723}
724
725define <32 x i16> @ext_i32_32i16(i32 %a0) {
726; SSE2-SSSE3-LABEL: ext_i32_32i16:
727; SSE2-SSSE3:       # %bb.0:
728; SSE2-SSSE3-NEXT:    movd %edi, %xmm2
729; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
730; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
731; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
732; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
733; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
734; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm0
735; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
736; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
737; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
738; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm1
739; SSE2-SSSE3-NEXT:    psrlw $15, %xmm1
740; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
741; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
742; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
743; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
744; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm2
745; SSE2-SSSE3-NEXT:    psrlw $15, %xmm2
746; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
747; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm3
748; SSE2-SSSE3-NEXT:    psrlw $15, %xmm3
749; SSE2-SSSE3-NEXT:    retq
750;
751; AVX1-LABEL: ext_i32_32i16:
752; AVX1:       # %bb.0:
753; AVX1-NEXT:    vmovd %edi, %xmm1
754; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
755; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
756; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
757; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
758; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
759; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
760; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm4
761; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
762; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
763; AVX1-NEXT:    vpsrlw $15, %xmm4, %xmm4
764; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
765; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
766; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
767; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
768; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
769; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
770; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
771; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
772; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
773; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm2
774; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
775; AVX1-NEXT:    vpsrlw $15, %xmm2, %xmm2
776; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
777; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
778; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
779; AVX1-NEXT:    vpsrlw $15, %xmm1, %xmm1
780; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
781; AVX1-NEXT:    retq
782;
783; AVX2-LABEL: ext_i32_32i16:
784; AVX2:       # %bb.0:
785; AVX2-NEXT:    vmovd %edi, %xmm0
786; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
787; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
788; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
789; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
790; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
791; AVX2-NEXT:    shrl $16, %edi
792; AVX2-NEXT:    vmovd %edi, %xmm2
793; AVX2-NEXT:    vpbroadcastw %xmm2, %ymm2
794; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm2
795; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm2, %ymm1
796; AVX2-NEXT:    vpsrlw $15, %ymm1, %ymm1
797; AVX2-NEXT:    retq
798;
799; AVX512F-LABEL: ext_i32_32i16:
800; AVX512F:       # %bb.0:
801; AVX512F-NEXT:    kmovw %edi, %k1
802; AVX512F-NEXT:    shrl $16, %edi
803; AVX512F-NEXT:    kmovw %edi, %k2
804; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
805; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
806; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
807; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
808; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
809; AVX512F-NEXT:    vpsrlw $15, %ymm1, %ymm1
810; AVX512F-NEXT:    retq
811;
812; AVX512VLBW-LABEL: ext_i32_32i16:
813; AVX512VLBW:       # %bb.0:
814; AVX512VLBW-NEXT:    kmovd %edi, %k0
815; AVX512VLBW-NEXT:    vpmovm2w %k0, %zmm0
816; AVX512VLBW-NEXT:    vpsrlw $15, %zmm0, %zmm0
817; AVX512VLBW-NEXT:    retq
818  %1 = bitcast i32 %a0 to <32 x i1>
819  %2 = zext <32 x i1> %1 to <32 x i16>
820  ret <32 x i16> %2
821}
822
823define <64 x i8> @ext_i64_64i8(i64 %a0) {
824; SSE2-SSSE3-LABEL: ext_i64_64i8:
825; SSE2-SSSE3:       # %bb.0:
826; SSE2-SSSE3-NEXT:    movq %rdi, %xmm3
827; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
828; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
829; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
830; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
831; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
832; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm0
833; SSE2-SSSE3-NEXT:    psrlw $7, %xmm0
834; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
835; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
836; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
837; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
838; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm1
839; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm1
840; SSE2-SSSE3-NEXT:    psrlw $7, %xmm1
841; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
842; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
843; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
844; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
845; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm2
846; SSE2-SSSE3-NEXT:    psrlw $7, %xmm2
847; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm2
848; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
849; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
850; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
851; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm3
852; SSE2-SSSE3-NEXT:    psrlw $7, %xmm3
853; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
854; SSE2-SSSE3-NEXT:    retq
855;
856; AVX1-LABEL: ext_i64_64i8:
857; AVX1:       # %bb.0:
858; AVX1-NEXT:    vmovq %rdi, %xmm0
859; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
860; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
861; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
862; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
863; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
864; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
865; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
866; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
867; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
868; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
869; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
870; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
871; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
872; AVX1-NEXT:    vpsrlw $7, %xmm3, %xmm3
873; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
874; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
875; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
876; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
877; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
878; AVX1-NEXT:    vpand %xmm6, %xmm0, %xmm0
879; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
880; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
881; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
882; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
883; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
884; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
885; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
886; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
887; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
888; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
889; AVX1-NEXT:    vpsrlw $7, %xmm2, %xmm2
890; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
891; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm1, %xmm1
892; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
893; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
894; AVX1-NEXT:    vpand %xmm6, %xmm1, %xmm1
895; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
896; AVX1-NEXT:    retq
897;
898; AVX2-SLOW-LABEL: ext_i64_64i8:
899; AVX2-SLOW:       # %bb.0:
900; AVX2-SLOW-NEXT:    vmovq %rdi, %xmm0
901; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
902; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
903; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
904; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
905; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
906; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
907; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
908; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm0, %ymm0
909; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
910; AVX2-SLOW-NEXT:    vpsrlw $7, %ymm0, %ymm0
911; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
912; AVX2-SLOW-NEXT:    vpand %ymm3, %ymm0, %ymm0
913; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
914; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
915; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
916; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
917; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm4, %ymm1
918; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm1, %ymm1
919; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
920; AVX2-SLOW-NEXT:    vpsrlw $7, %ymm1, %ymm1
921; AVX2-SLOW-NEXT:    vpand %ymm3, %ymm1, %ymm1
922; AVX2-SLOW-NEXT:    retq
923;
924; AVX2-FAST-LABEL: ext_i64_64i8:
925; AVX2-FAST:       # %bb.0:
926; AVX2-FAST-NEXT:    vmovq %rdi, %xmm0
927; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
928; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
929; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
930; AVX2-FAST-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
931; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
932; AVX2-FAST-NEXT:    vpand %ymm2, %ymm0, %ymm0
933; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
934; AVX2-FAST-NEXT:    vpsrlw $7, %ymm0, %ymm0
935; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
936; AVX2-FAST-NEXT:    vpand %ymm3, %ymm0, %ymm0
937; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm4 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
938; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
939; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm4, %ymm1
940; AVX2-FAST-NEXT:    vpand %ymm2, %ymm1, %ymm1
941; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
942; AVX2-FAST-NEXT:    vpsrlw $7, %ymm1, %ymm1
943; AVX2-FAST-NEXT:    vpand %ymm3, %ymm1, %ymm1
944; AVX2-FAST-NEXT:    retq
945;
946; AVX512F-LABEL: ext_i64_64i8:
947; AVX512F:       # %bb.0:
948; AVX512F-NEXT:    movq %rdi, %rax
949; AVX512F-NEXT:    movq %rdi, %rcx
950; AVX512F-NEXT:    kmovw %edi, %k1
951; AVX512F-NEXT:    movl %edi, %edx
952; AVX512F-NEXT:    shrl $16, %edx
953; AVX512F-NEXT:    shrq $32, %rax
954; AVX512F-NEXT:    shrq $48, %rcx
955; AVX512F-NEXT:    kmovw %ecx, %k2
956; AVX512F-NEXT:    kmovw %eax, %k3
957; AVX512F-NEXT:    kmovw %edx, %k4
958; AVX512F-NEXT:    movl {{.*}}(%rip), %eax
959; AVX512F-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
960; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
961; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k4} {z}
962; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
963; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
964; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k3} {z}
965; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
966; AVX512F-NEXT:    vpbroadcastd %eax, %zmm2 {%k2} {z}
967; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
968; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
969; AVX512F-NEXT:    retq
970;
971; AVX512VLBW-LABEL: ext_i64_64i8:
972; AVX512VLBW:       # %bb.0:
973; AVX512VLBW-NEXT:    kmovq %rdi, %k1
974; AVX512VLBW-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
975; AVX512VLBW-NEXT:    retq
976  %1 = bitcast i64 %a0 to <64 x i1>
977  %2 = zext <64 x i1> %1 to <64 x i8>
978  ret <64 x i8> %2
979}
980