• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512BW
8
9define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
10; SSE2-SSSE3-LABEL: v8i16:
11; SSE2-SSSE3:       # %bb.0:
12; SSE2-SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
13; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
14; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
15; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
16; SSE2-SSSE3-NEXT:    retq
17;
18; AVX12-LABEL: v8i16:
19; AVX12:       # %bb.0:
20; AVX12-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
21; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
22; AVX12-NEXT:    vpmovmskb %xmm0, %eax
23; AVX12-NEXT:    # kill: def $al killed $al killed $eax
24; AVX12-NEXT:    retq
25;
26; AVX512F-LABEL: v8i16:
27; AVX512F:       # %bb.0:
28; AVX512F-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
29; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
30; AVX512F-NEXT:    vptestmd %ymm0, %ymm0, %k0
31; AVX512F-NEXT:    kmovw %k0, %eax
32; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
33; AVX512F-NEXT:    vzeroupper
34; AVX512F-NEXT:    retq
35;
36; AVX512BW-LABEL: v8i16:
37; AVX512BW:       # %bb.0:
38; AVX512BW-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
39; AVX512BW-NEXT:    kmovd %k0, %eax
40; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
41; AVX512BW-NEXT:    retq
42  %x = icmp sgt <8 x i16> %a, %b
43  %res = bitcast <8 x i1> %x to i8
44  ret i8 %res
45}
46
47define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) {
48; SSE2-SSSE3-LABEL: v4i32:
49; SSE2-SSSE3:       # %bb.0:
50; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
51; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
52; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
53; SSE2-SSSE3-NEXT:    retq
54;
55; AVX12-LABEL: v4i32:
56; AVX12:       # %bb.0:
57; AVX12-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
58; AVX12-NEXT:    vmovmskps %xmm0, %eax
59; AVX12-NEXT:    # kill: def $al killed $al killed $eax
60; AVX12-NEXT:    retq
61;
62; AVX512F-LABEL: v4i32:
63; AVX512F:       # %bb.0:
64; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
65; AVX512F-NEXT:    kmovw %k0, %eax
66; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
67; AVX512F-NEXT:    retq
68;
69; AVX512BW-LABEL: v4i32:
70; AVX512BW:       # %bb.0:
71; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
72; AVX512BW-NEXT:    kmovd %k0, %eax
73; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
74; AVX512BW-NEXT:    retq
75  %x = icmp sgt <4 x i32> %a, %b
76  %res = bitcast <4 x i1> %x to i4
77  ret i4 %res
78}
79
80define i4 @v4f32(<4 x float> %a, <4 x float> %b) {
81; SSE2-SSSE3-LABEL: v4f32:
82; SSE2-SSSE3:       # %bb.0:
83; SSE2-SSSE3-NEXT:    cmpltps %xmm0, %xmm1
84; SSE2-SSSE3-NEXT:    movmskps %xmm1, %eax
85; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
86; SSE2-SSSE3-NEXT:    retq
87;
88; AVX12-LABEL: v4f32:
89; AVX12:       # %bb.0:
90; AVX12-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
91; AVX12-NEXT:    vmovmskps %xmm0, %eax
92; AVX12-NEXT:    # kill: def $al killed $al killed $eax
93; AVX12-NEXT:    retq
94;
95; AVX512F-LABEL: v4f32:
96; AVX512F:       # %bb.0:
97; AVX512F-NEXT:    vcmpltps %xmm0, %xmm1, %k0
98; AVX512F-NEXT:    kmovw %k0, %eax
99; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
100; AVX512F-NEXT:    retq
101;
102; AVX512BW-LABEL: v4f32:
103; AVX512BW:       # %bb.0:
104; AVX512BW-NEXT:    vcmpltps %xmm0, %xmm1, %k0
105; AVX512BW-NEXT:    kmovd %k0, %eax
106; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
107; AVX512BW-NEXT:    retq
108  %x = fcmp ogt <4 x float> %a, %b
109  %res = bitcast <4 x i1> %x to i4
110  ret i4 %res
111}
112
113define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
114; SSE2-SSSE3-LABEL: v16i8:
115; SSE2-SSSE3:       # %bb.0:
116; SSE2-SSSE3-NEXT:    pcmpgtb %xmm1, %xmm0
117; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
118; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
119; SSE2-SSSE3-NEXT:    retq
120;
121; AVX12-LABEL: v16i8:
122; AVX12:       # %bb.0:
123; AVX12-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
124; AVX12-NEXT:    vpmovmskb %xmm0, %eax
125; AVX12-NEXT:    # kill: def $ax killed $ax killed $eax
126; AVX12-NEXT:    retq
127;
128; AVX512F-LABEL: v16i8:
129; AVX512F:       # %bb.0:
130; AVX512F-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
131; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
132; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
133; AVX512F-NEXT:    kmovw %k0, %eax
134; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
135; AVX512F-NEXT:    vzeroupper
136; AVX512F-NEXT:    retq
137;
138; AVX512BW-LABEL: v16i8:
139; AVX512BW:       # %bb.0:
140; AVX512BW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
141; AVX512BW-NEXT:    kmovd %k0, %eax
142; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
143; AVX512BW-NEXT:    retq
144  %x = icmp sgt <16 x i8> %a, %b
145  %res = bitcast <16 x i1> %x to i16
146  ret i16 %res
147}
148
149define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
150; SSE2-SSSE3-LABEL: v2i8:
151; SSE2-SSSE3:       # %bb.0:
152; SSE2-SSSE3-NEXT:    psllq $56, %xmm0
153; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
154; SSE2-SSSE3-NEXT:    psrad $31, %xmm2
155; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
156; SSE2-SSSE3-NEXT:    psrad $24, %xmm0
157; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
158; SSE2-SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
159; SSE2-SSSE3-NEXT:    psllq $56, %xmm1
160; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm2
161; SSE2-SSSE3-NEXT:    psrad $31, %xmm2
162; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
163; SSE2-SSSE3-NEXT:    psrad $24, %xmm1
164; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
165; SSE2-SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
166; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
167; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
168; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
169; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
170; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
171; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
172; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
173; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
174; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
175; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
176; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
177; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
178; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
179; SSE2-SSSE3-NEXT:    retq
180;
181; AVX1-LABEL: v2i8:
182; AVX1:       # %bb.0:
183; AVX1-NEXT:    vpsllq $56, %xmm1, %xmm1
184; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
185; AVX1-NEXT:    vpsrad $24, %xmm1, %xmm1
186; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
187; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
188; AVX1-NEXT:    vpsllq $56, %xmm0, %xmm0
189; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
190; AVX1-NEXT:    vpsrad $24, %xmm0, %xmm0
191; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
192; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
193; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
194; AVX1-NEXT:    vmovmskpd %xmm0, %eax
195; AVX1-NEXT:    # kill: def $al killed $al killed $eax
196; AVX1-NEXT:    retq
197;
198; AVX2-LABEL: v2i8:
199; AVX2:       # %bb.0:
200; AVX2-NEXT:    vpsllq $56, %xmm1, %xmm1
201; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm2
202; AVX2-NEXT:    vpsrad $24, %xmm1, %xmm1
203; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
204; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
205; AVX2-NEXT:    vpsllq $56, %xmm0, %xmm0
206; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
207; AVX2-NEXT:    vpsrad $24, %xmm0, %xmm0
208; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
209; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
210; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
211; AVX2-NEXT:    vmovmskpd %xmm0, %eax
212; AVX2-NEXT:    # kill: def $al killed $al killed $eax
213; AVX2-NEXT:    retq
214;
215; AVX512F-LABEL: v2i8:
216; AVX512F:       # %bb.0:
217; AVX512F-NEXT:    vpsllq $56, %xmm1, %xmm1
218; AVX512F-NEXT:    vpsraq $56, %xmm1, %xmm1
219; AVX512F-NEXT:    vpsllq $56, %xmm0, %xmm0
220; AVX512F-NEXT:    vpsraq $56, %xmm0, %xmm0
221; AVX512F-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
222; AVX512F-NEXT:    kmovw %k0, %eax
223; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
224; AVX512F-NEXT:    retq
225;
226; AVX512BW-LABEL: v2i8:
227; AVX512BW:       # %bb.0:
228; AVX512BW-NEXT:    vpsllq $56, %xmm1, %xmm1
229; AVX512BW-NEXT:    vpsraq $56, %xmm1, %xmm1
230; AVX512BW-NEXT:    vpsllq $56, %xmm0, %xmm0
231; AVX512BW-NEXT:    vpsraq $56, %xmm0, %xmm0
232; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
233; AVX512BW-NEXT:    kmovd %k0, %eax
234; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
235; AVX512BW-NEXT:    retq
236  %x = icmp sgt <2 x i8> %a, %b
237  %res = bitcast <2 x i1> %x to i2
238  ret i2 %res
239}
240
241define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
242; SSE2-SSSE3-LABEL: v2i16:
243; SSE2-SSSE3:       # %bb.0:
244; SSE2-SSSE3-NEXT:    psllq $48, %xmm0
245; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
246; SSE2-SSSE3-NEXT:    psrad $31, %xmm2
247; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
248; SSE2-SSSE3-NEXT:    psrad $16, %xmm0
249; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
250; SSE2-SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
251; SSE2-SSSE3-NEXT:    psllq $48, %xmm1
252; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm2
253; SSE2-SSSE3-NEXT:    psrad $31, %xmm2
254; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
255; SSE2-SSSE3-NEXT:    psrad $16, %xmm1
256; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
257; SSE2-SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
258; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
259; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
260; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
261; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
262; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
263; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
264; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
265; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
266; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
267; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
268; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
269; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
270; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
271; SSE2-SSSE3-NEXT:    retq
272;
273; AVX1-LABEL: v2i16:
274; AVX1:       # %bb.0:
275; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm1
276; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
277; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
278; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
279; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
280; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
281; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
282; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
283; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
284; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
285; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
286; AVX1-NEXT:    vmovmskpd %xmm0, %eax
287; AVX1-NEXT:    # kill: def $al killed $al killed $eax
288; AVX1-NEXT:    retq
289;
290; AVX2-LABEL: v2i16:
291; AVX2:       # %bb.0:
292; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
293; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm2
294; AVX2-NEXT:    vpsrad $16, %xmm1, %xmm1
295; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
296; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
297; AVX2-NEXT:    vpsllq $48, %xmm0, %xmm0
298; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
299; AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
300; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
301; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
302; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
303; AVX2-NEXT:    vmovmskpd %xmm0, %eax
304; AVX2-NEXT:    # kill: def $al killed $al killed $eax
305; AVX2-NEXT:    retq
306;
307; AVX512F-LABEL: v2i16:
308; AVX512F:       # %bb.0:
309; AVX512F-NEXT:    vpsllq $48, %xmm1, %xmm1
310; AVX512F-NEXT:    vpsraq $48, %xmm1, %xmm1
311; AVX512F-NEXT:    vpsllq $48, %xmm0, %xmm0
312; AVX512F-NEXT:    vpsraq $48, %xmm0, %xmm0
313; AVX512F-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
314; AVX512F-NEXT:    kmovw %k0, %eax
315; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
316; AVX512F-NEXT:    retq
317;
318; AVX512BW-LABEL: v2i16:
319; AVX512BW:       # %bb.0:
320; AVX512BW-NEXT:    vpsllq $48, %xmm1, %xmm1
321; AVX512BW-NEXT:    vpsraq $48, %xmm1, %xmm1
322; AVX512BW-NEXT:    vpsllq $48, %xmm0, %xmm0
323; AVX512BW-NEXT:    vpsraq $48, %xmm0, %xmm0
324; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
325; AVX512BW-NEXT:    kmovd %k0, %eax
326; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
327; AVX512BW-NEXT:    retq
328  %x = icmp sgt <2 x i16> %a, %b
329  %res = bitcast <2 x i1> %x to i2
330  ret i2 %res
331}
332
333define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
334; SSE2-SSSE3-LABEL: v2i32:
335; SSE2-SSSE3:       # %bb.0:
336; SSE2-SSSE3-NEXT:    psllq $32, %xmm0
337; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
338; SSE2-SSSE3-NEXT:    psrad $31, %xmm0
339; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
340; SSE2-SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
341; SSE2-SSSE3-NEXT:    psllq $32, %xmm1
342; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
343; SSE2-SSSE3-NEXT:    psrad $31, %xmm1
344; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
345; SSE2-SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
346; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
347; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
348; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
349; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
350; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm1
351; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
352; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm2
353; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
354; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
355; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
356; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
357; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
358; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
359; SSE2-SSSE3-NEXT:    retq
360;
361; AVX1-LABEL: v2i32:
362; AVX1:       # %bb.0:
363; AVX1-NEXT:    vpsllq $32, %xmm1, %xmm1
364; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
365; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
366; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
367; AVX1-NEXT:    vpsllq $32, %xmm0, %xmm0
368; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
369; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
370; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
371; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
372; AVX1-NEXT:    vmovmskpd %xmm0, %eax
373; AVX1-NEXT:    # kill: def $al killed $al killed $eax
374; AVX1-NEXT:    retq
375;
376; AVX2-LABEL: v2i32:
377; AVX2:       # %bb.0:
378; AVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
379; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm2
380; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
381; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
382; AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
383; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
384; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
385; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
386; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
387; AVX2-NEXT:    vmovmskpd %xmm0, %eax
388; AVX2-NEXT:    # kill: def $al killed $al killed $eax
389; AVX2-NEXT:    retq
390;
391; AVX512F-LABEL: v2i32:
392; AVX512F:       # %bb.0:
393; AVX512F-NEXT:    vpsllq $32, %xmm1, %xmm1
394; AVX512F-NEXT:    vpsraq $32, %xmm1, %xmm1
395; AVX512F-NEXT:    vpsllq $32, %xmm0, %xmm0
396; AVX512F-NEXT:    vpsraq $32, %xmm0, %xmm0
397; AVX512F-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
398; AVX512F-NEXT:    kmovw %k0, %eax
399; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
400; AVX512F-NEXT:    retq
401;
402; AVX512BW-LABEL: v2i32:
403; AVX512BW:       # %bb.0:
404; AVX512BW-NEXT:    vpsllq $32, %xmm1, %xmm1
405; AVX512BW-NEXT:    vpsraq $32, %xmm1, %xmm1
406; AVX512BW-NEXT:    vpsllq $32, %xmm0, %xmm0
407; AVX512BW-NEXT:    vpsraq $32, %xmm0, %xmm0
408; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
409; AVX512BW-NEXT:    kmovd %k0, %eax
410; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
411; AVX512BW-NEXT:    retq
412  %x = icmp sgt <2 x i32> %a, %b
413  %res = bitcast <2 x i1> %x to i2
414  ret i2 %res
415}
416
417define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
418; SSE2-SSSE3-LABEL: v2i64:
419; SSE2-SSSE3:       # %bb.0:
420; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
421; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
422; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
423; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
424; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
425; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
426; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
427; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
428; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
429; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
430; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
431; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
432; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
433; SSE2-SSSE3-NEXT:    retq
434;
435; AVX12-LABEL: v2i64:
436; AVX12:       # %bb.0:
437; AVX12-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
438; AVX12-NEXT:    vmovmskpd %xmm0, %eax
439; AVX12-NEXT:    # kill: def $al killed $al killed $eax
440; AVX12-NEXT:    retq
441;
442; AVX512F-LABEL: v2i64:
443; AVX512F:       # %bb.0:
444; AVX512F-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
445; AVX512F-NEXT:    kmovw %k0, %eax
446; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
447; AVX512F-NEXT:    retq
448;
449; AVX512BW-LABEL: v2i64:
450; AVX512BW:       # %bb.0:
451; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
452; AVX512BW-NEXT:    kmovd %k0, %eax
453; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
454; AVX512BW-NEXT:    retq
455  %x = icmp sgt <2 x i64> %a, %b
456  %res = bitcast <2 x i1> %x to i2
457  ret i2 %res
458}
459
460define i2 @v2f64(<2 x double> %a, <2 x double> %b) {
461; SSE2-SSSE3-LABEL: v2f64:
462; SSE2-SSSE3:       # %bb.0:
463; SSE2-SSSE3-NEXT:    cmpltpd %xmm0, %xmm1
464; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
465; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
466; SSE2-SSSE3-NEXT:    retq
467;
468; AVX12-LABEL: v2f64:
469; AVX12:       # %bb.0:
470; AVX12-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
471; AVX12-NEXT:    vmovmskpd %xmm0, %eax
472; AVX12-NEXT:    # kill: def $al killed $al killed $eax
473; AVX12-NEXT:    retq
474;
475; AVX512F-LABEL: v2f64:
476; AVX512F:       # %bb.0:
477; AVX512F-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
478; AVX512F-NEXT:    kmovw %k0, %eax
479; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
480; AVX512F-NEXT:    retq
481;
482; AVX512BW-LABEL: v2f64:
483; AVX512BW:       # %bb.0:
484; AVX512BW-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
485; AVX512BW-NEXT:    kmovd %k0, %eax
486; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
487; AVX512BW-NEXT:    retq
488  %x = fcmp ogt <2 x double> %a, %b
489  %res = bitcast <2 x i1> %x to i2
490  ret i2 %res
491}
492
493define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
494; SSE2-SSSE3-LABEL: v4i8:
495; SSE2-SSSE3:       # %bb.0:
496; SSE2-SSSE3-NEXT:    pslld $24, %xmm1
497; SSE2-SSSE3-NEXT:    psrad $24, %xmm1
498; SSE2-SSSE3-NEXT:    pslld $24, %xmm0
499; SSE2-SSSE3-NEXT:    psrad $24, %xmm0
500; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
501; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
502; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
503; SSE2-SSSE3-NEXT:    retq
504;
505; AVX12-LABEL: v4i8:
506; AVX12:       # %bb.0:
507; AVX12-NEXT:    vpslld $24, %xmm1, %xmm1
508; AVX12-NEXT:    vpsrad $24, %xmm1, %xmm1
509; AVX12-NEXT:    vpslld $24, %xmm0, %xmm0
510; AVX12-NEXT:    vpsrad $24, %xmm0, %xmm0
511; AVX12-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
512; AVX12-NEXT:    vmovmskps %xmm0, %eax
513; AVX12-NEXT:    # kill: def $al killed $al killed $eax
514; AVX12-NEXT:    retq
515;
516; AVX512F-LABEL: v4i8:
517; AVX512F:       # %bb.0:
518; AVX512F-NEXT:    vpslld $24, %xmm1, %xmm1
519; AVX512F-NEXT:    vpsrad $24, %xmm1, %xmm1
520; AVX512F-NEXT:    vpslld $24, %xmm0, %xmm0
521; AVX512F-NEXT:    vpsrad $24, %xmm0, %xmm0
522; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
523; AVX512F-NEXT:    kmovw %k0, %eax
524; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
525; AVX512F-NEXT:    retq
526;
527; AVX512BW-LABEL: v4i8:
528; AVX512BW:       # %bb.0:
529; AVX512BW-NEXT:    vpslld $24, %xmm1, %xmm1
530; AVX512BW-NEXT:    vpsrad $24, %xmm1, %xmm1
531; AVX512BW-NEXT:    vpslld $24, %xmm0, %xmm0
532; AVX512BW-NEXT:    vpsrad $24, %xmm0, %xmm0
533; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
534; AVX512BW-NEXT:    kmovd %k0, %eax
535; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
536; AVX512BW-NEXT:    retq
537  %x = icmp sgt <4 x i8> %a, %b
538  %res = bitcast <4 x i1> %x to i4
539  ret i4 %res
540}
541
542define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
543; SSE2-SSSE3-LABEL: v4i16:
544; SSE2-SSSE3:       # %bb.0:
545; SSE2-SSSE3-NEXT:    pslld $16, %xmm1
546; SSE2-SSSE3-NEXT:    psrad $16, %xmm1
547; SSE2-SSSE3-NEXT:    pslld $16, %xmm0
548; SSE2-SSSE3-NEXT:    psrad $16, %xmm0
549; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
550; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
551; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
552; SSE2-SSSE3-NEXT:    retq
553;
554; AVX12-LABEL: v4i16:
555; AVX12:       # %bb.0:
556; AVX12-NEXT:    vpslld $16, %xmm1, %xmm1
557; AVX12-NEXT:    vpsrad $16, %xmm1, %xmm1
558; AVX12-NEXT:    vpslld $16, %xmm0, %xmm0
559; AVX12-NEXT:    vpsrad $16, %xmm0, %xmm0
560; AVX12-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
561; AVX12-NEXT:    vmovmskps %xmm0, %eax
562; AVX12-NEXT:    # kill: def $al killed $al killed $eax
563; AVX12-NEXT:    retq
564;
565; AVX512F-LABEL: v4i16:
566; AVX512F:       # %bb.0:
567; AVX512F-NEXT:    vpslld $16, %xmm1, %xmm1
568; AVX512F-NEXT:    vpsrad $16, %xmm1, %xmm1
569; AVX512F-NEXT:    vpslld $16, %xmm0, %xmm0
570; AVX512F-NEXT:    vpsrad $16, %xmm0, %xmm0
571; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
572; AVX512F-NEXT:    kmovw %k0, %eax
573; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
574; AVX512F-NEXT:    retq
575;
576; AVX512BW-LABEL: v4i16:
577; AVX512BW:       # %bb.0:
578; AVX512BW-NEXT:    vpslld $16, %xmm1, %xmm1
579; AVX512BW-NEXT:    vpsrad $16, %xmm1, %xmm1
580; AVX512BW-NEXT:    vpslld $16, %xmm0, %xmm0
581; AVX512BW-NEXT:    vpsrad $16, %xmm0, %xmm0
582; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
583; AVX512BW-NEXT:    kmovd %k0, %eax
584; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
585; AVX512BW-NEXT:    retq
586  %x = icmp sgt <4 x i16> %a, %b
587  %res = bitcast <4 x i1> %x to i4
588  ret i4 %res
589}
590
591define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
592; SSE2-SSSE3-LABEL: v8i8:
593; SSE2-SSSE3:       # %bb.0:
594; SSE2-SSSE3-NEXT:    psllw $8, %xmm1
595; SSE2-SSSE3-NEXT:    psraw $8, %xmm1
596; SSE2-SSSE3-NEXT:    psllw $8, %xmm0
597; SSE2-SSSE3-NEXT:    psraw $8, %xmm0
598; SSE2-SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
599; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
600; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
601; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
602; SSE2-SSSE3-NEXT:    retq
603;
604; AVX12-LABEL: v8i8:
605; AVX12:       # %bb.0:
606; AVX12-NEXT:    vpsllw $8, %xmm1, %xmm1
607; AVX12-NEXT:    vpsraw $8, %xmm1, %xmm1
608; AVX12-NEXT:    vpsllw $8, %xmm0, %xmm0
609; AVX12-NEXT:    vpsraw $8, %xmm0, %xmm0
610; AVX12-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
611; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
612; AVX12-NEXT:    vpmovmskb %xmm0, %eax
613; AVX12-NEXT:    # kill: def $al killed $al killed $eax
614; AVX12-NEXT:    retq
615;
616; AVX512F-LABEL: v8i8:
617; AVX512F:       # %bb.0:
618; AVX512F-NEXT:    vpsllw $8, %xmm1, %xmm1
619; AVX512F-NEXT:    vpsraw $8, %xmm1, %xmm1
620; AVX512F-NEXT:    vpsllw $8, %xmm0, %xmm0
621; AVX512F-NEXT:    vpsraw $8, %xmm0, %xmm0
622; AVX512F-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
623; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
624; AVX512F-NEXT:    vptestmd %ymm0, %ymm0, %k0
625; AVX512F-NEXT:    kmovw %k0, %eax
626; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
627; AVX512F-NEXT:    vzeroupper
628; AVX512F-NEXT:    retq
629;
630; AVX512BW-LABEL: v8i8:
631; AVX512BW:       # %bb.0:
632; AVX512BW-NEXT:    vpsllw $8, %xmm1, %xmm1
633; AVX512BW-NEXT:    vpsraw $8, %xmm1, %xmm1
634; AVX512BW-NEXT:    vpsllw $8, %xmm0, %xmm0
635; AVX512BW-NEXT:    vpsraw $8, %xmm0, %xmm0
636; AVX512BW-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
637; AVX512BW-NEXT:    kmovd %k0, %eax
638; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
639; AVX512BW-NEXT:    retq
640  %x = icmp sgt <8 x i8> %a, %b
641  %res = bitcast <8 x i1> %x to i8
642  ret i8 %res
643}
644
645define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) {
646; SSE2-SSSE3-LABEL: v16i8_widened_with_zeroes:
647; SSE2-SSSE3:       # %bb.0: # %entry
648; SSE2-SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
649; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
650; SSE2-SSSE3-NEXT:    retq
651;
652; AVX1-LABEL: v16i8_widened_with_zeroes:
653; AVX1:       # %bb.0: # %entry
654; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
655; AVX1-NEXT:    vpmovmskb %xmm0, %eax
656; AVX1-NEXT:    retq
657;
658; AVX2-LABEL: v16i8_widened_with_zeroes:
659; AVX2:       # %bb.0: # %entry
660; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
661; AVX2-NEXT:    vpmovmskb %ymm0, %eax
662; AVX2-NEXT:    vzeroupper
663; AVX2-NEXT:    retq
664;
665; AVX512F-LABEL: v16i8_widened_with_zeroes:
666; AVX512F:       # %bb.0: # %entry
667; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
668; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
669; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
670; AVX512F-NEXT:    kmovw %k0, %eax
671; AVX512F-NEXT:    movzwl %ax, %eax
672; AVX512F-NEXT:    vzeroupper
673; AVX512F-NEXT:    retq
674;
675; AVX512BW-LABEL: v16i8_widened_with_zeroes:
676; AVX512BW:       # %bb.0: # %entry
677; AVX512BW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
678; AVX512BW-NEXT:    kmovq %k0, %rax
679; AVX512BW-NEXT:    retq
680entry:
681  %c = icmp eq <16 x i8> %a, %b
682  %d = shufflevector <16 x i1> %c, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
683  %e = bitcast <64 x i1> %d to i64
684  ret i64 %e
685}
686
687define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) {
688; SSE2-SSSE3-LABEL: v16i8_widened_with_ones:
689; SSE2-SSSE3:       # %bb.0: # %entry
690; SSE2-SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
691; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %ecx
692; SSE2-SSSE3-NEXT:    orl $-65536, %ecx # imm = 0xFFFF0000
693; SSE2-SSSE3-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
694; SSE2-SSSE3-NEXT:    orq %rcx, %rax
695; SSE2-SSSE3-NEXT:    retq
696;
697; AVX1-LABEL: v16i8_widened_with_ones:
698; AVX1:       # %bb.0: # %entry
699; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
700; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
701; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
702; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
703; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
704; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
705; AVX1-NEXT:    orl $-65536, %ecx # imm = 0xFFFF0000
706; AVX1-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
707; AVX1-NEXT:    orq %rcx, %rax
708; AVX1-NEXT:    retq
709;
710; AVX2-LABEL: v16i8_widened_with_ones:
711; AVX2:       # %bb.0: # %entry
712; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
713; AVX2-NEXT:    vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm0
714; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
715; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
716; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
717; AVX2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
718; AVX2-NEXT:    orq %rcx, %rax
719; AVX2-NEXT:    vzeroupper
720; AVX2-NEXT:    retq
721;
722; AVX512F-LABEL: v16i8_widened_with_ones:
723; AVX512F:       # %bb.0: # %entry
724; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
725; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
726; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
727; AVX512F-NEXT:    kmovw %k0, %ecx
728; AVX512F-NEXT:    orl $-65536, %ecx # imm = 0xFFFF0000
729; AVX512F-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
730; AVX512F-NEXT:    orq %rcx, %rax
731; AVX512F-NEXT:    vzeroupper
732; AVX512F-NEXT:    retq
733;
734; AVX512BW-LABEL: v16i8_widened_with_ones:
735; AVX512BW:       # %bb.0: # %entry
736; AVX512BW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
737; AVX512BW-NEXT:    kxnorw %k0, %k0, %k1
738; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0
739; AVX512BW-NEXT:    kxnord %k0, %k0, %k1
740; AVX512BW-NEXT:    kunpckdq %k0, %k1, %k0
741; AVX512BW-NEXT:    kmovq %k0, %rax
742; AVX512BW-NEXT:    retq
743entry:
744  %c = icmp eq <16 x i8> %a, %b
745  %d = shufflevector <16 x i1> %c, <16 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
746  %e = bitcast <64 x i1> %d to i64
747  ret i64 %e
748}
749