• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
8
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
15; SSE2-SSSE3:       # %bb.0:
16; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
17; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
18; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
20; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
21; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
22; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
24; SSE2-SSSE3-NEXT:    retq
25;
26; AVX1-LABEL: ext_i2_2i64:
27; AVX1:       # %bb.0:
28; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
29; AVX1-NEXT:    vmovq %rdi, %xmm0
30; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
32; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
33; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
34; AVX1-NEXT:    retq
35;
36; AVX2-LABEL: ext_i2_2i64:
37; AVX2:       # %bb.0:
38; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
39; AVX2-NEXT:    vmovq %rdi, %xmm0
40; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
41; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
42; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
43; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
44; AVX2-NEXT:    retq
45;
46; AVX512-LABEL: ext_i2_2i64:
47; AVX512:       # %bb.0:
48; AVX512-NEXT:    kmovd %edi, %k1
49; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
50; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
51; AVX512-NEXT:    retq
52  %1 = bitcast i2 %a0 to <2 x i1>
53  %2 = sext <2 x i1> %1 to <2 x i64>
54  ret <2 x i64> %2
55}
56
57define <4 x i32> @ext_i4_4i32(i4 %a0) {
58; SSE2-SSSE3-LABEL: ext_i4_4i32:
59; SSE2-SSSE3:       # %bb.0:
60; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
61; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
62; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
63; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
64; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
65; SSE2-SSSE3-NEXT:    retq
66;
67; AVX1-LABEL: ext_i4_4i32:
68; AVX1:       # %bb.0:
69; AVX1-NEXT:    vmovd %edi, %xmm0
70; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
72; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
73; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
74; AVX1-NEXT:    retq
75;
76; AVX2-LABEL: ext_i4_4i32:
77; AVX2:       # %bb.0:
78; AVX2-NEXT:    vmovd %edi, %xmm0
79; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
80; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
81; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
83; AVX2-NEXT:    retq
84;
85; AVX512-LABEL: ext_i4_4i32:
86; AVX512:       # %bb.0:
87; AVX512-NEXT:    kmovd %edi, %k1
88; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
89; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
90; AVX512-NEXT:    retq
91  %1 = bitcast i4 %a0 to <4 x i1>
92  %2 = sext <4 x i1> %1 to <4 x i32>
93  ret <4 x i32> %2
94}
95
96define <8 x i16> @ext_i8_8i16(i8 %a0) {
97; SSE2-SSSE3-LABEL: ext_i8_8i16:
98; SSE2-SSSE3:       # %bb.0:
99; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
100; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
101; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
102; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
103; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
104; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
105; SSE2-SSSE3-NEXT:    retq
106;
107; AVX1-LABEL: ext_i8_8i16:
108; AVX1:       # %bb.0:
109; AVX1-NEXT:    vmovd %edi, %xmm0
110; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
111; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
112; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
113; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
114; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
115; AVX1-NEXT:    retq
116;
117; AVX2-LABEL: ext_i8_8i16:
118; AVX2:       # %bb.0:
119; AVX2-NEXT:    vmovd %edi, %xmm0
120; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
121; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
122; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
123; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
124; AVX2-NEXT:    retq
125;
126; AVX512-LABEL: ext_i8_8i16:
127; AVX512:       # %bb.0:
128; AVX512-NEXT:    kmovd %edi, %k0
129; AVX512-NEXT:    vpmovm2w %k0, %xmm0
130; AVX512-NEXT:    retq
131  %1 = bitcast i8 %a0 to <8 x i1>
132  %2 = sext <8 x i1> %1 to <8 x i16>
133  ret <8 x i16> %2
134}
135
136define <16 x i8> @ext_i16_16i8(i16 %a0) {
137; SSE2-LABEL: ext_i16_16i8:
138; SSE2:       # %bb.0:
139; SSE2-NEXT:    movd %edi, %xmm0
140; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
142; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
143; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
144; SSE2-NEXT:    pand %xmm1, %xmm0
145; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
146; SSE2-NEXT:    retq
147;
148; SSSE3-LABEL: ext_i16_16i8:
149; SSSE3:       # %bb.0:
150; SSSE3-NEXT:    movd %edi, %xmm0
151; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
152; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
153; SSSE3-NEXT:    pand %xmm1, %xmm0
154; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
155; SSSE3-NEXT:    retq
156;
157; AVX1-LABEL: ext_i16_16i8:
158; AVX1:       # %bb.0:
159; AVX1-NEXT:    vmovd %edi, %xmm0
160; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
161; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
162; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
163; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
164; AVX1-NEXT:    retq
165;
166; AVX2-LABEL: ext_i16_16i8:
167; AVX2:       # %bb.0:
168; AVX2-NEXT:    vmovd %edi, %xmm0
169; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
170; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
171; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
172; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
173; AVX2-NEXT:    retq
174;
175; AVX512-LABEL: ext_i16_16i8:
176; AVX512:       # %bb.0:
177; AVX512-NEXT:    kmovd %edi, %k0
178; AVX512-NEXT:    vpmovm2b %k0, %xmm0
179; AVX512-NEXT:    retq
180  %1 = bitcast i16 %a0 to <16 x i1>
181  %2 = sext <16 x i1> %1 to <16 x i8>
182  ret <16 x i8> %2
183}
184
185;
186; 256-bit vectors
187;
188
189define <4 x i64> @ext_i4_4i64(i4 %a0) {
190; SSE2-SSSE3-LABEL: ext_i4_4i64:
191; SSE2-SSSE3:       # %bb.0:
192; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
193; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
194; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
195; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
196; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
197; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
198; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
199; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
200; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
201; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
202; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
203; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
204; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
205; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
206; SSE2-SSSE3-NEXT:    retq
207;
208; AVX1-LABEL: ext_i4_4i64:
209; AVX1:       # %bb.0:
210; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
211; AVX1-NEXT:    vmovq %rdi, %xmm0
212; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
213; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
214; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
215; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
216; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
217; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
218; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
219; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
220; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
221; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
222; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
223; AVX1-NEXT:    retq
224;
225; AVX2-LABEL: ext_i4_4i64:
226; AVX2:       # %bb.0:
227; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
228; AVX2-NEXT:    vmovq %rdi, %xmm0
229; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
230; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
231; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
232; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
233; AVX2-NEXT:    retq
234;
235; AVX512-LABEL: ext_i4_4i64:
236; AVX512:       # %bb.0:
237; AVX512-NEXT:    kmovd %edi, %k1
238; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
239; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
240; AVX512-NEXT:    retq
241  %1 = bitcast i4 %a0 to <4 x i1>
242  %2 = sext <4 x i1> %1 to <4 x i64>
243  ret <4 x i64> %2
244}
245
246define <8 x i32> @ext_i8_8i32(i8 %a0) {
247; SSE2-SSSE3-LABEL: ext_i8_8i32:
248; SSE2-SSSE3:       # %bb.0:
249; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
250; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
251; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
252; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
253; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
254; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
255; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
256; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
257; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
258; SSE2-SSSE3-NEXT:    retq
259;
260; AVX1-LABEL: ext_i8_8i32:
261; AVX1:       # %bb.0:
262; AVX1-NEXT:    vmovd %edi, %xmm0
263; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
264; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
265; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
266; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
267; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
268; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
269; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
270; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
271; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
272; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
273; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
274; AVX1-NEXT:    retq
275;
276; AVX2-LABEL: ext_i8_8i32:
277; AVX2:       # %bb.0:
278; AVX2-NEXT:    vmovd %edi, %xmm0
279; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
280; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
281; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
282; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
283; AVX2-NEXT:    retq
284;
285; AVX512-LABEL: ext_i8_8i32:
286; AVX512:       # %bb.0:
287; AVX512-NEXT:    kmovd %edi, %k1
288; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
289; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
290; AVX512-NEXT:    retq
291  %1 = bitcast i8 %a0 to <8 x i1>
292  %2 = sext <8 x i1> %1 to <8 x i32>
293  ret <8 x i32> %2
294}
295
296define <16 x i16> @ext_i16_16i16(i16 %a0) {
297; SSE2-SSSE3-LABEL: ext_i16_16i16:
298; SSE2-SSSE3:       # %bb.0:
299; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
300; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
301; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
302; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
303; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
304; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
305; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm0
306; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
307; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
308; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm1
309; SSE2-SSSE3-NEXT:    retq
310;
311; AVX1-LABEL: ext_i16_16i16:
312; AVX1:       # %bb.0:
313; AVX1-NEXT:    vmovd %edi, %xmm0
314; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
315; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
316; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
317; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
318; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
319; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
320; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
321; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
322; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
323; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
324; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
325; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
326; AVX1-NEXT:    retq
327;
328; AVX2-LABEL: ext_i16_16i16:
329; AVX2:       # %bb.0:
330; AVX2-NEXT:    vmovd %edi, %xmm0
331; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
332; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
333; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
334; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
335; AVX2-NEXT:    retq
336;
337; AVX512-LABEL: ext_i16_16i16:
338; AVX512:       # %bb.0:
339; AVX512-NEXT:    kmovd %edi, %k0
340; AVX512-NEXT:    vpmovm2w %k0, %ymm0
341; AVX512-NEXT:    retq
342  %1 = bitcast i16 %a0 to <16 x i1>
343  %2 = sext <16 x i1> %1 to <16 x i16>
344  ret <16 x i16> %2
345}
346
347define <32 x i8> @ext_i32_32i8(i32 %a0) {
348; SSE2-SSSE3-LABEL: ext_i32_32i8:
349; SSE2-SSSE3:       # %bb.0:
350; SSE2-SSSE3-NEXT:    movd %edi, %xmm1
351; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
352; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
353; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
354; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
355; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
356; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm0
357; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
358; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
359; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
360; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm1
361; SSE2-SSSE3-NEXT:    retq
362;
363; AVX1-LABEL: ext_i32_32i8:
364; AVX1:       # %bb.0:
365; AVX1-NEXT:    vmovd %edi, %xmm0
366; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
367; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
368; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
369; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
370; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
371; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
372; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
373; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
374; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
375; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
376; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
377; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
378; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
379; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
380; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
381; AVX1-NEXT:    retq
382;
383; AVX2-SLOW-LABEL: ext_i32_32i8:
384; AVX2-SLOW:       # %bb.0:
385; AVX2-SLOW-NEXT:    vmovd %edi, %xmm0
386; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
387; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
388; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
389; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
390; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
391; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
392; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
393; AVX2-SLOW-NEXT:    vpand %ymm1, %ymm0, %ymm0
394; AVX2-SLOW-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
395; AVX2-SLOW-NEXT:    retq
396;
397; AVX2-FAST-LABEL: ext_i32_32i8:
398; AVX2-FAST:       # %bb.0:
399; AVX2-FAST-NEXT:    vmovd %edi, %xmm0
400; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
401; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
402; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
403; AVX2-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
404; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
405; AVX2-FAST-NEXT:    vpand %ymm1, %ymm0, %ymm0
406; AVX2-FAST-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
407; AVX2-FAST-NEXT:    retq
408;
409; AVX512-LABEL: ext_i32_32i8:
410; AVX512:       # %bb.0:
411; AVX512-NEXT:    kmovd %edi, %k0
412; AVX512-NEXT:    vpmovm2b %k0, %ymm0
413; AVX512-NEXT:    retq
414  %1 = bitcast i32 %a0 to <32 x i1>
415  %2 = sext <32 x i1> %1 to <32 x i8>
416  ret <32 x i8> %2
417}
418
419;
420; 512-bit vectors
421;
422
423define <8 x i64> @ext_i8_8i64(i8 %a0) {
424; SSE2-SSSE3-LABEL: ext_i8_8i64:
425; SSE2-SSSE3:       # %bb.0:
426; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
427; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
428; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
429; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
430; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm1
431; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
432; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
433; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
434; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
435; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
436; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
437; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
438; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
439; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
440; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
441; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32]
442; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
443; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
444; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
445; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
446; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
447; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [64,128]
448; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
449; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
450; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
451; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
452; SSE2-SSSE3-NEXT:    retq
453;
454; AVX1-LABEL: ext_i8_8i64:
455; AVX1:       # %bb.0:
456; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
457; AVX1-NEXT:    vmovq %rdi, %xmm0
458; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
459; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
460; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
461; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
462; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
463; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
464; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
465; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
466; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
467; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
468; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
469; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
470; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
471; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
472; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
473; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
474; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
475; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
476; AVX1-NEXT:    retq
477;
478; AVX2-LABEL: ext_i8_8i64:
479; AVX2:       # %bb.0:
480; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
481; AVX2-NEXT:    vmovq %rdi, %xmm0
482; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm1
483; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
484; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
485; AVX2-NEXT:    vpcmpeqq %ymm0, %ymm2, %ymm0
486; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
487; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
488; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
489; AVX2-NEXT:    retq
490;
491; AVX512-LABEL: ext_i8_8i64:
492; AVX512:       # %bb.0:
493; AVX512-NEXT:    kmovd %edi, %k1
494; AVX512-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
495; AVX512-NEXT:    retq
496  %1 = bitcast i8 %a0 to <8 x i1>
497  %2 = sext <8 x i1> %1 to <8 x i64>
498  ret <8 x i64> %2
499}
500
501define <16 x i32> @ext_i16_16i32(i16 %a0) {
502; SSE2-SSSE3-LABEL: ext_i16_16i32:
503; SSE2-SSSE3:       # %bb.0:
504; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
505; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
506; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
507; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm0
508; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
509; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
510; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
511; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm1
512; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
513; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
514; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
515; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
516; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
517; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
518; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
519; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
520; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm3
521; SSE2-SSSE3-NEXT:    retq
522;
523; AVX1-LABEL: ext_i16_16i32:
524; AVX1:       # %bb.0:
525; AVX1-NEXT:    vmovd %edi, %xmm0
526; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
527; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
528; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
529; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
530; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
531; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
532; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
533; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
534; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
535; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
536; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
537; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
538; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
539; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
540; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
541; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
542; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
543; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
544; AVX1-NEXT:    retq
545;
546; AVX2-LABEL: ext_i16_16i32:
547; AVX2:       # %bb.0:
548; AVX2-NEXT:    vmovd %edi, %xmm0
549; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm1
550; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
551; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
552; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm2, %ymm0
553; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
554; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
555; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
556; AVX2-NEXT:    retq
557;
558; AVX512-LABEL: ext_i16_16i32:
559; AVX512:       # %bb.0:
560; AVX512-NEXT:    kmovd %edi, %k1
561; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
562; AVX512-NEXT:    retq
563  %1 = bitcast i16 %a0 to <16 x i1>
564  %2 = sext <16 x i1> %1 to <16 x i32>
565  ret <16 x i32> %2
566}
567
568define <32 x i16> @ext_i32_32i16(i32 %a0) {
569; SSE2-SSSE3-LABEL: ext_i32_32i16:
570; SSE2-SSSE3:       # %bb.0:
571; SSE2-SSSE3-NEXT:    movd %edi, %xmm2
572; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
573; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
574; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
575; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
576; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
577; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm0
578; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
579; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
580; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm1
581; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
582; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
583; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
584; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
585; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm2
586; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
587; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm3
588; SSE2-SSSE3-NEXT:    retq
589;
590; AVX1-LABEL: ext_i32_32i16:
591; AVX1:       # %bb.0:
592; AVX1-NEXT:    vmovd %edi, %xmm1
593; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
594; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
595; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
596; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
597; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
598; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
599; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
600; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
601; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
602; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
603; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
604; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
605; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
606; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
607; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
608; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
609; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
610; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
611; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm2, %xmm2
612; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
613; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm1, %xmm1
614; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
615; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
616; AVX1-NEXT:    retq
617;
618; AVX2-LABEL: ext_i32_32i16:
619; AVX2:       # %bb.0:
620; AVX2-NEXT:    vmovd %edi, %xmm0
621; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
622; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
623; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
624; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
625; AVX2-NEXT:    shrl $16, %edi
626; AVX2-NEXT:    vmovd %edi, %xmm2
627; AVX2-NEXT:    vpbroadcastw %xmm2, %ymm2
628; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm2
629; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm2, %ymm1
630; AVX2-NEXT:    retq
631;
632; AVX512-LABEL: ext_i32_32i16:
633; AVX512:       # %bb.0:
634; AVX512-NEXT:    kmovd %edi, %k0
635; AVX512-NEXT:    vpmovm2w %k0, %zmm0
636; AVX512-NEXT:    retq
637  %1 = bitcast i32 %a0 to <32 x i1>
638  %2 = sext <32 x i1> %1 to <32 x i16>
639  ret <32 x i16> %2
640}
641
642define <64 x i8> @ext_i64_64i8(i64 %a0) {
643; SSE2-SSSE3-LABEL: ext_i64_64i8:
644; SSE2-SSSE3:       # %bb.0:
645; SSE2-SSSE3-NEXT:    movq %rdi, %xmm3
646; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
647; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
648; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
649; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
650; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
651; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm0
652; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
653; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
654; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm1
655; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm1
656; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
657; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
658; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
659; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm2
660; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
661; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
662; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
663; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm3
664; SSE2-SSSE3-NEXT:    retq
665;
666; AVX1-LABEL: ext_i64_64i8:
667; AVX1:       # %bb.0:
668; AVX1-NEXT:    vmovq %rdi, %xmm0
669; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
670; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
671; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
672; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
673; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
674; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
675; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
676; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
677; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
678; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
679; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
680; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
681; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
682; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
683; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
684; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
685; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
686; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
687; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
688; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
689; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
690; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
691; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
692; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
693; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
694; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm1, %xmm1
695; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
696; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
697; AVX1-NEXT:    retq
698;
699; AVX2-SLOW-LABEL: ext_i64_64i8:
700; AVX2-SLOW:       # %bb.0:
701; AVX2-SLOW-NEXT:    vmovq %rdi, %xmm0
702; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
703; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
704; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
705; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
706; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
707; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
708; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
709; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm0, %ymm0
710; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
711; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
712; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
713; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
714; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
715; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
716; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm1, %ymm1
717; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
718; AVX2-SLOW-NEXT:    retq
719;
720; AVX2-FAST-LABEL: ext_i64_64i8:
721; AVX2-FAST:       # %bb.0:
722; AVX2-FAST-NEXT:    vmovq %rdi, %xmm0
723; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
724; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
725; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
726; AVX2-FAST-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
727; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
728; AVX2-FAST-NEXT:    vpand %ymm2, %ymm0, %ymm0
729; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
730; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm3 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
731; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
732; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
733; AVX2-FAST-NEXT:    vpand %ymm2, %ymm1, %ymm1
734; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
735; AVX2-FAST-NEXT:    retq
736;
737; AVX512-LABEL: ext_i64_64i8:
738; AVX512:       # %bb.0:
739; AVX512-NEXT:    kmovq %rdi, %k0
740; AVX512-NEXT:    vpmovm2b %k0, %zmm0
741; AVX512-NEXT:    retq
742  %1 = bitcast i64 %a0 to <64 x i1>
743  %2 = sext <64 x i1> %1 to <64 x i8>
744  ret <64 x i8> %2
745}
746