• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW
8
9define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
10; SSE2-SSSE3-LABEL: v8i16:
11; SSE2-SSSE3:       # %bb.0:
12; SSE2-SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
13; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
14; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
15; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
16; SSE2-SSSE3-NEXT:    retq
17;
18; AVX12-LABEL: v8i16:
19; AVX12:       # %bb.0:
20; AVX12-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
21; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
22; AVX12-NEXT:    vpmovmskb %xmm0, %eax
23; AVX12-NEXT:    # kill: def $al killed $al killed $eax
24; AVX12-NEXT:    retq
25;
26; AVX512F-LABEL: v8i16:
27; AVX512F:       # %bb.0:
28; AVX512F-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
29; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
30; AVX512F-NEXT:    vptestmd %ymm0, %ymm0, %k0
31; AVX512F-NEXT:    kmovw %k0, %eax
32; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
33; AVX512F-NEXT:    vzeroupper
34; AVX512F-NEXT:    retq
35;
36; AVX512BW-LABEL: v8i16:
37; AVX512BW:       # %bb.0:
38; AVX512BW-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
39; AVX512BW-NEXT:    kmovd %k0, %eax
40; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
41; AVX512BW-NEXT:    retq
42  %x = icmp sgt <8 x i16> %a, %b
43  %res = bitcast <8 x i1> %x to i8
44  ret i8 %res
45}
46
47define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) {
48; SSE2-SSSE3-LABEL: v4i32:
49; SSE2-SSSE3:       # %bb.0:
50; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
51; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
52; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
53; SSE2-SSSE3-NEXT:    retq
54;
55; AVX12-LABEL: v4i32:
56; AVX12:       # %bb.0:
57; AVX12-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
58; AVX12-NEXT:    vmovmskps %xmm0, %eax
59; AVX12-NEXT:    # kill: def $al killed $al killed $eax
60; AVX12-NEXT:    retq
61;
62; AVX512F-LABEL: v4i32:
63; AVX512F:       # %bb.0:
64; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
65; AVX512F-NEXT:    kmovw %k0, %eax
66; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
67; AVX512F-NEXT:    retq
68;
69; AVX512BW-LABEL: v4i32:
70; AVX512BW:       # %bb.0:
71; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
72; AVX512BW-NEXT:    kmovd %k0, %eax
73; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
74; AVX512BW-NEXT:    retq
75  %x = icmp sgt <4 x i32> %a, %b
76  %res = bitcast <4 x i1> %x to i4
77  ret i4 %res
78}
79
80define i4 @v4f32(<4 x float> %a, <4 x float> %b) {
81; SSE2-SSSE3-LABEL: v4f32:
82; SSE2-SSSE3:       # %bb.0:
83; SSE2-SSSE3-NEXT:    cmpltps %xmm0, %xmm1
84; SSE2-SSSE3-NEXT:    movmskps %xmm1, %eax
85; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
86; SSE2-SSSE3-NEXT:    retq
87;
88; AVX12-LABEL: v4f32:
89; AVX12:       # %bb.0:
90; AVX12-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
91; AVX12-NEXT:    vmovmskps %xmm0, %eax
92; AVX12-NEXT:    # kill: def $al killed $al killed $eax
93; AVX12-NEXT:    retq
94;
95; AVX512F-LABEL: v4f32:
96; AVX512F:       # %bb.0:
97; AVX512F-NEXT:    vcmpltps %xmm0, %xmm1, %k0
98; AVX512F-NEXT:    kmovw %k0, %eax
99; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
100; AVX512F-NEXT:    retq
101;
102; AVX512BW-LABEL: v4f32:
103; AVX512BW:       # %bb.0:
104; AVX512BW-NEXT:    vcmpltps %xmm0, %xmm1, %k0
105; AVX512BW-NEXT:    kmovd %k0, %eax
106; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
107; AVX512BW-NEXT:    retq
108  %x = fcmp ogt <4 x float> %a, %b
109  %res = bitcast <4 x i1> %x to i4
110  ret i4 %res
111}
112
113define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
114; SSE2-SSSE3-LABEL: v16i8:
115; SSE2-SSSE3:       # %bb.0:
116; SSE2-SSSE3-NEXT:    pcmpgtb %xmm1, %xmm0
117; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
118; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
119; SSE2-SSSE3-NEXT:    retq
120;
121; AVX12-LABEL: v16i8:
122; AVX12:       # %bb.0:
123; AVX12-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
124; AVX12-NEXT:    vpmovmskb %xmm0, %eax
125; AVX12-NEXT:    # kill: def $ax killed $ax killed $eax
126; AVX12-NEXT:    retq
127;
128; AVX512F-LABEL: v16i8:
129; AVX512F:       # %bb.0:
130; AVX512F-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
131; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
132; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
133; AVX512F-NEXT:    retq
134;
135; AVX512BW-LABEL: v16i8:
136; AVX512BW:       # %bb.0:
137; AVX512BW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
138; AVX512BW-NEXT:    kmovd %k0, %eax
139; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
140; AVX512BW-NEXT:    retq
141  %x = icmp sgt <16 x i8> %a, %b
142  %res = bitcast <16 x i1> %x to i16
143  ret i16 %res
144}
145
146define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
147; SSE2-LABEL: v2i8:
148; SSE2:       # %bb.0:
149; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
150; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
151; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
152; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
153; SSE2-NEXT:    movmskpd %xmm0, %eax
154; SSE2-NEXT:    # kill: def $al killed $al killed $eax
155; SSE2-NEXT:    retq
156;
157; SSSE3-LABEL: v2i8:
158; SSSE3:       # %bb.0:
159; SSSE3-NEXT:    pcmpgtb %xmm1, %xmm0
160; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1]
161; SSSE3-NEXT:    movmskpd %xmm0, %eax
162; SSSE3-NEXT:    # kill: def $al killed $al killed $eax
163; SSSE3-NEXT:    retq
164;
165; AVX12-LABEL: v2i8:
166; AVX12:       # %bb.0:
167; AVX12-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
168; AVX12-NEXT:    vpmovsxbq %xmm0, %xmm0
169; AVX12-NEXT:    vmovmskpd %xmm0, %eax
170; AVX12-NEXT:    # kill: def $al killed $al killed $eax
171; AVX12-NEXT:    retq
172;
173; AVX512F-LABEL: v2i8:
174; AVX512F:       # %bb.0:
175; AVX512F-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
176; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
177; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
178; AVX512F-NEXT:    kmovw %k0, %eax
179; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
180; AVX512F-NEXT:    vzeroupper
181; AVX512F-NEXT:    retq
182;
183; AVX512BW-LABEL: v2i8:
184; AVX512BW:       # %bb.0:
185; AVX512BW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
186; AVX512BW-NEXT:    kmovd %k0, %eax
187; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
188; AVX512BW-NEXT:    retq
189  %x = icmp sgt <2 x i8> %a, %b
190  %res = bitcast <2 x i1> %x to i2
191  ret i2 %res
192}
193
194define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
195; SSE2-SSSE3-LABEL: v2i16:
196; SSE2-SSSE3:       # %bb.0:
197; SSE2-SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
198; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
199; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
200; SSE2-SSSE3-NEXT:    movmskpd %xmm0, %eax
201; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
202; SSE2-SSSE3-NEXT:    retq
203;
204; AVX12-LABEL: v2i16:
205; AVX12:       # %bb.0:
206; AVX12-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
207; AVX12-NEXT:    vpmovsxwq %xmm0, %xmm0
208; AVX12-NEXT:    vmovmskpd %xmm0, %eax
209; AVX12-NEXT:    # kill: def $al killed $al killed $eax
210; AVX12-NEXT:    retq
211;
212; AVX512F-LABEL: v2i16:
213; AVX512F:       # %bb.0:
214; AVX512F-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
215; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
216; AVX512F-NEXT:    vptestmd %ymm0, %ymm0, %k0
217; AVX512F-NEXT:    kmovw %k0, %eax
218; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
219; AVX512F-NEXT:    vzeroupper
220; AVX512F-NEXT:    retq
221;
222; AVX512BW-LABEL: v2i16:
223; AVX512BW:       # %bb.0:
224; AVX512BW-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
225; AVX512BW-NEXT:    kmovd %k0, %eax
226; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
227; AVX512BW-NEXT:    retq
228  %x = icmp sgt <2 x i16> %a, %b
229  %res = bitcast <2 x i1> %x to i2
230  ret i2 %res
231}
232
233define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
234; SSE2-SSSE3-LABEL: v2i32:
235; SSE2-SSSE3:       # %bb.0:
236; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
237; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
238; SSE2-SSSE3-NEXT:    movmskpd %xmm0, %eax
239; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
240; SSE2-SSSE3-NEXT:    retq
241;
242; AVX12-LABEL: v2i32:
243; AVX12:       # %bb.0:
244; AVX12-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
245; AVX12-NEXT:    vpmovsxdq %xmm0, %xmm0
246; AVX12-NEXT:    vmovmskpd %xmm0, %eax
247; AVX12-NEXT:    # kill: def $al killed $al killed $eax
248; AVX12-NEXT:    retq
249;
250; AVX512F-LABEL: v2i32:
251; AVX512F:       # %bb.0:
252; AVX512F-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
253; AVX512F-NEXT:    kmovw %k0, %eax
254; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
255; AVX512F-NEXT:    retq
256;
257; AVX512BW-LABEL: v2i32:
258; AVX512BW:       # %bb.0:
259; AVX512BW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
260; AVX512BW-NEXT:    kmovd %k0, %eax
261; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
262; AVX512BW-NEXT:    retq
263  %x = icmp sgt <2 x i32> %a, %b
264  %res = bitcast <2 x i1> %x to i2
265  ret i2 %res
266}
267
268define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
269; SSE2-SSSE3-LABEL: v2i64:
270; SSE2-SSSE3:       # %bb.0:
271; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
272; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
273; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
274; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
275; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
276; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm0
277; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
278; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
279; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
280; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
281; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
282; SSE2-SSSE3-NEXT:    retq
283;
284; AVX12-LABEL: v2i64:
285; AVX12:       # %bb.0:
286; AVX12-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
287; AVX12-NEXT:    vmovmskpd %xmm0, %eax
288; AVX12-NEXT:    # kill: def $al killed $al killed $eax
289; AVX12-NEXT:    retq
290;
291; AVX512F-LABEL: v2i64:
292; AVX512F:       # %bb.0:
293; AVX512F-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
294; AVX512F-NEXT:    kmovw %k0, %eax
295; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
296; AVX512F-NEXT:    retq
297;
298; AVX512BW-LABEL: v2i64:
299; AVX512BW:       # %bb.0:
300; AVX512BW-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
301; AVX512BW-NEXT:    kmovd %k0, %eax
302; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
303; AVX512BW-NEXT:    retq
304  %x = icmp sgt <2 x i64> %a, %b
305  %res = bitcast <2 x i1> %x to i2
306  ret i2 %res
307}
308
309define i2 @v2f64(<2 x double> %a, <2 x double> %b) {
310; SSE2-SSSE3-LABEL: v2f64:
311; SSE2-SSSE3:       # %bb.0:
312; SSE2-SSSE3-NEXT:    cmpltpd %xmm0, %xmm1
313; SSE2-SSSE3-NEXT:    movmskpd %xmm1, %eax
314; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
315; SSE2-SSSE3-NEXT:    retq
316;
317; AVX12-LABEL: v2f64:
318; AVX12:       # %bb.0:
319; AVX12-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
320; AVX12-NEXT:    vmovmskpd %xmm0, %eax
321; AVX12-NEXT:    # kill: def $al killed $al killed $eax
322; AVX12-NEXT:    retq
323;
324; AVX512F-LABEL: v2f64:
325; AVX512F:       # %bb.0:
326; AVX512F-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
327; AVX512F-NEXT:    kmovw %k0, %eax
328; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
329; AVX512F-NEXT:    retq
330;
331; AVX512BW-LABEL: v2f64:
332; AVX512BW:       # %bb.0:
333; AVX512BW-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
334; AVX512BW-NEXT:    kmovd %k0, %eax
335; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
336; AVX512BW-NEXT:    retq
337  %x = fcmp ogt <2 x double> %a, %b
338  %res = bitcast <2 x i1> %x to i2
339  ret i2 %res
340}
341
342define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
343; SSE2-SSSE3-LABEL: v4i8:
344; SSE2-SSSE3:       # %bb.0:
345; SSE2-SSSE3-NEXT:    pcmpgtb %xmm1, %xmm0
346; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
347; SSE2-SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
348; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
349; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
350; SSE2-SSSE3-NEXT:    retq
351;
352; AVX12-LABEL: v4i8:
353; AVX12:       # %bb.0:
354; AVX12-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
355; AVX12-NEXT:    vpmovsxbd %xmm0, %xmm0
356; AVX12-NEXT:    vmovmskps %xmm0, %eax
357; AVX12-NEXT:    # kill: def $al killed $al killed $eax
358; AVX12-NEXT:    retq
359;
360; AVX512F-LABEL: v4i8:
361; AVX512F:       # %bb.0:
362; AVX512F-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
363; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
364; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
365; AVX512F-NEXT:    kmovw %k0, %eax
366; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
367; AVX512F-NEXT:    vzeroupper
368; AVX512F-NEXT:    retq
369;
370; AVX512BW-LABEL: v4i8:
371; AVX512BW:       # %bb.0:
372; AVX512BW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
373; AVX512BW-NEXT:    kmovd %k0, %eax
374; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
375; AVX512BW-NEXT:    retq
376  %x = icmp sgt <4 x i8> %a, %b
377  %res = bitcast <4 x i1> %x to i4
378  ret i4 %res
379}
380
381define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
382; SSE2-SSSE3-LABEL: v4i16:
383; SSE2-SSSE3:       # %bb.0:
384; SSE2-SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
385; SSE2-SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
386; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
387; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
388; SSE2-SSSE3-NEXT:    retq
389;
390; AVX12-LABEL: v4i16:
391; AVX12:       # %bb.0:
392; AVX12-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
393; AVX12-NEXT:    vpmovsxwd %xmm0, %xmm0
394; AVX12-NEXT:    vmovmskps %xmm0, %eax
395; AVX12-NEXT:    # kill: def $al killed $al killed $eax
396; AVX12-NEXT:    retq
397;
398; AVX512F-LABEL: v4i16:
399; AVX512F:       # %bb.0:
400; AVX512F-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
401; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
402; AVX512F-NEXT:    vptestmd %ymm0, %ymm0, %k0
403; AVX512F-NEXT:    kmovw %k0, %eax
404; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
405; AVX512F-NEXT:    vzeroupper
406; AVX512F-NEXT:    retq
407;
408; AVX512BW-LABEL: v4i16:
409; AVX512BW:       # %bb.0:
410; AVX512BW-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
411; AVX512BW-NEXT:    kmovd %k0, %eax
412; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
413; AVX512BW-NEXT:    retq
414  %x = icmp sgt <4 x i16> %a, %b
415  %res = bitcast <4 x i1> %x to i4
416  ret i4 %res
417}
418
419define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
420; SSE2-SSSE3-LABEL: v8i8:
421; SSE2-SSSE3:       # %bb.0:
422; SSE2-SSSE3-NEXT:    pcmpgtb %xmm1, %xmm0
423; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
424; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
425; SSE2-SSSE3-NEXT:    retq
426;
427; AVX12-LABEL: v8i8:
428; AVX12:       # %bb.0:
429; AVX12-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
430; AVX12-NEXT:    vpmovmskb %xmm0, %eax
431; AVX12-NEXT:    # kill: def $al killed $al killed $eax
432; AVX12-NEXT:    retq
433;
434; AVX512F-LABEL: v8i8:
435; AVX512F:       # %bb.0:
436; AVX512F-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
437; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
438; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
439; AVX512F-NEXT:    kmovw %k0, %eax
440; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
441; AVX512F-NEXT:    vzeroupper
442; AVX512F-NEXT:    retq
443;
444; AVX512BW-LABEL: v8i8:
445; AVX512BW:       # %bb.0:
446; AVX512BW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
447; AVX512BW-NEXT:    kmovd %k0, %eax
448; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
449; AVX512BW-NEXT:    retq
450  %x = icmp sgt <8 x i8> %a, %b
451  %res = bitcast <8 x i1> %x to i8
452  ret i8 %res
453}
454
455define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) {
456; SSE2-SSSE3-LABEL: v16i8_widened_with_zeroes:
457; SSE2-SSSE3:       # %bb.0: # %entry
458; SSE2-SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
459; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
460; SSE2-SSSE3-NEXT:    retq
461;
462; AVX1-LABEL: v16i8_widened_with_zeroes:
463; AVX1:       # %bb.0: # %entry
464; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
465; AVX1-NEXT:    vpmovmskb %xmm0, %eax
466; AVX1-NEXT:    retq
467;
468; AVX2-LABEL: v16i8_widened_with_zeroes:
469; AVX2:       # %bb.0: # %entry
470; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
471; AVX2-NEXT:    vpmovmskb %ymm0, %eax
472; AVX2-NEXT:    vzeroupper
473; AVX2-NEXT:    retq
474;
475; AVX512F-LABEL: v16i8_widened_with_zeroes:
476; AVX512F:       # %bb.0: # %entry
477; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
478; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
479; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
480; AVX512F-NEXT:    kmovw %k0, %eax
481; AVX512F-NEXT:    vzeroupper
482; AVX512F-NEXT:    retq
483;
484; AVX512BW-LABEL: v16i8_widened_with_zeroes:
485; AVX512BW:       # %bb.0: # %entry
486; AVX512BW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
487; AVX512BW-NEXT:    kmovq %k0, %rax
488; AVX512BW-NEXT:    retq
489entry:
490  %c = icmp eq <16 x i8> %a, %b
491  %d = shufflevector <16 x i1> %c, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
492  %e = bitcast <64 x i1> %d to i64
493  ret i64 %e
494}
495
496define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) {
497; SSE2-SSSE3-LABEL: v16i8_widened_with_ones:
498; SSE2-SSSE3:       # %bb.0: # %entry
499; SSE2-SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
500; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %ecx
501; SSE2-SSSE3-NEXT:    orl $-65536, %ecx # imm = 0xFFFF0000
502; SSE2-SSSE3-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
503; SSE2-SSSE3-NEXT:    orq %rcx, %rax
504; SSE2-SSSE3-NEXT:    retq
505;
506; AVX1-LABEL: v16i8_widened_with_ones:
507; AVX1:       # %bb.0: # %entry
508; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
509; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
510; AVX1-NEXT:    orl $-65536, %ecx # imm = 0xFFFF0000
511; AVX1-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
512; AVX1-NEXT:    orq %rcx, %rax
513; AVX1-NEXT:    retq
514;
515; AVX2-LABEL: v16i8_widened_with_ones:
516; AVX2:       # %bb.0: # %entry
517; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
518; AVX2-NEXT:    vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm0
519; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
520; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
521; AVX2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
522; AVX2-NEXT:    orq %rcx, %rax
523; AVX2-NEXT:    vzeroupper
524; AVX2-NEXT:    retq
525;
526; AVX512F-LABEL: v16i8_widened_with_ones:
527; AVX512F:       # %bb.0: # %entry
528; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
529; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
530; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
531; AVX512F-NEXT:    kmovw %k0, %ecx
532; AVX512F-NEXT:    orl $-65536, %ecx # imm = 0xFFFF0000
533; AVX512F-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
534; AVX512F-NEXT:    orq %rcx, %rax
535; AVX512F-NEXT:    vzeroupper
536; AVX512F-NEXT:    retq
537;
538; AVX512BW-LABEL: v16i8_widened_with_ones:
539; AVX512BW:       # %bb.0: # %entry
540; AVX512BW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
541; AVX512BW-NEXT:    kxnorw %k0, %k0, %k1
542; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0
543; AVX512BW-NEXT:    kxnord %k0, %k0, %k1
544; AVX512BW-NEXT:    kunpckdq %k0, %k1, %k0
545; AVX512BW-NEXT:    kmovq %k0, %rax
546; AVX512BW-NEXT:    retq
547entry:
548  %c = icmp eq <16 x i8> %a, %b
549  %d = shufflevector <16 x i1> %c, <16 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
550  %e = bitcast <64 x i1> %d to i64
551  ret i64 %e
552}
553
554define void @bitcast_16i8_store(i16* %p, <16 x i8> %a0) {
555; SSE2-SSSE3-LABEL: bitcast_16i8_store:
556; SSE2-SSSE3:       # %bb.0:
557; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
558; SSE2-SSSE3-NEXT:    movw %ax, (%rdi)
559; SSE2-SSSE3-NEXT:    retq
560;
561; AVX12-LABEL: bitcast_16i8_store:
562; AVX12:       # %bb.0:
563; AVX12-NEXT:    vpmovmskb %xmm0, %eax
564; AVX12-NEXT:    movw %ax, (%rdi)
565; AVX12-NEXT:    retq
566;
567; AVX512F-LABEL: bitcast_16i8_store:
568; AVX512F:       # %bb.0:
569; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
570; AVX512F-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
571; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
572; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
573; AVX512F-NEXT:    kmovw %k0, (%rdi)
574; AVX512F-NEXT:    vzeroupper
575; AVX512F-NEXT:    retq
576;
577; AVX512BW-LABEL: bitcast_16i8_store:
578; AVX512BW:       # %bb.0:
579; AVX512BW-NEXT:    vpmovb2m %xmm0, %k0
580; AVX512BW-NEXT:    kmovw %k0, (%rdi)
581; AVX512BW-NEXT:    retq
582  %a1 = icmp slt <16 x i8> %a0, zeroinitializer
583  %a2 = bitcast <16 x i1> %a1 to i16
584  store i16 %a2, i16* %p
585  ret void
586}
587
588define void @bitcast_8i16_store(i8* %p, <8 x i16> %a0) {
589; SSE2-SSSE3-LABEL: bitcast_8i16_store:
590; SSE2-SSSE3:       # %bb.0:
591; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
592; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
593; SSE2-SSSE3-NEXT:    movb %al, (%rdi)
594; SSE2-SSSE3-NEXT:    retq
595;
596; AVX12-LABEL: bitcast_8i16_store:
597; AVX12:       # %bb.0:
598; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
599; AVX12-NEXT:    vpmovmskb %xmm0, %eax
600; AVX12-NEXT:    movb %al, (%rdi)
601; AVX12-NEXT:    retq
602;
603; AVX512F-LABEL: bitcast_8i16_store:
604; AVX512F:       # %bb.0:
605; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
606; AVX512F-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
607; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
608; AVX512F-NEXT:    vptestmd %ymm0, %ymm0, %k0
609; AVX512F-NEXT:    kmovw %k0, %eax
610; AVX512F-NEXT:    movb %al, (%rdi)
611; AVX512F-NEXT:    vzeroupper
612; AVX512F-NEXT:    retq
613;
614; AVX512BW-LABEL: bitcast_8i16_store:
615; AVX512BW:       # %bb.0:
616; AVX512BW-NEXT:    vpmovw2m %xmm0, %k0
617; AVX512BW-NEXT:    kmovd %k0, %eax
618; AVX512BW-NEXT:    movb %al, (%rdi)
619; AVX512BW-NEXT:    retq
620  %a1 = icmp slt <8 x i16> %a0, zeroinitializer
621  %a2 = bitcast <8 x i1> %a1 to i8
622  store i8 %a2, i8* %p
623  ret void
624}
625
626define void @bitcast_4i32_store(i4* %p, <4 x i32> %a0) {
627; SSE2-SSSE3-LABEL: bitcast_4i32_store:
628; SSE2-SSSE3:       # %bb.0:
629; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
630; SSE2-SSSE3-NEXT:    movb %al, (%rdi)
631; SSE2-SSSE3-NEXT:    retq
632;
633; AVX12-LABEL: bitcast_4i32_store:
634; AVX12:       # %bb.0:
635; AVX12-NEXT:    vmovmskps %xmm0, %eax
636; AVX12-NEXT:    movb %al, (%rdi)
637; AVX12-NEXT:    retq
638;
639; AVX512F-LABEL: bitcast_4i32_store:
640; AVX512F:       # %bb.0:
641; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
642; AVX512F-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0
643; AVX512F-NEXT:    kmovw %k0, %eax
644; AVX512F-NEXT:    movb %al, (%rdi)
645; AVX512F-NEXT:    retq
646;
647; AVX512BW-LABEL: bitcast_4i32_store:
648; AVX512BW:       # %bb.0:
649; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
650; AVX512BW-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0
651; AVX512BW-NEXT:    kmovd %k0, %eax
652; AVX512BW-NEXT:    movb %al, (%rdi)
653; AVX512BW-NEXT:    retq
654  %a1 = icmp slt <4 x i32> %a0, zeroinitializer
655  %a2 = bitcast <4 x i1> %a1 to i4
656  store i4 %a2, i4* %p
657  ret void
658}
659
660define void @bitcast_2i64_store(i2* %p, <2 x i64> %a0) {
661; SSE2-SSSE3-LABEL: bitcast_2i64_store:
662; SSE2-SSSE3:       # %bb.0:
663; SSE2-SSSE3-NEXT:    movmskpd %xmm0, %eax
664; SSE2-SSSE3-NEXT:    movb %al, (%rdi)
665; SSE2-SSSE3-NEXT:    retq
666;
667; AVX12-LABEL: bitcast_2i64_store:
668; AVX12:       # %bb.0:
669; AVX12-NEXT:    vmovmskpd %xmm0, %eax
670; AVX12-NEXT:    movb %al, (%rdi)
671; AVX12-NEXT:    retq
672;
673; AVX512F-LABEL: bitcast_2i64_store:
674; AVX512F:       # %bb.0:
675; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
676; AVX512F-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
677; AVX512F-NEXT:    kmovw %k0, %eax
678; AVX512F-NEXT:    movb %al, (%rdi)
679; AVX512F-NEXT:    retq
680;
681; AVX512BW-LABEL: bitcast_2i64_store:
682; AVX512BW:       # %bb.0:
683; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
684; AVX512BW-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
685; AVX512BW-NEXT:    kmovd %k0, %eax
686; AVX512BW-NEXT:    movb %al, (%rdi)
687; AVX512BW-NEXT:    retq
688  %a1 = icmp slt <2 x i64> %a0, zeroinitializer
689  %a2 = bitcast <2 x i1> %a1 to i2
690  store i2 %a2, i2* %p
691  ret void
692}
693