• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6
7; Lower common integer comparisons such as 'isPositive' efficiently:
8; https://llvm.org/bugs/show_bug.cgi?id=26701
9
10define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
11; SSE-LABEL: test_pcmpgtb:
12; SSE:       # BB#0:
13; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
14; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: test_pcmpgtb:
18; AVX:       # BB#0:
19; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
20; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
21; AVX-NEXT:    retq
22;
23  %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
24  %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
25  ret <16 x i8> %not
26}
27
28define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
29; SSE-LABEL: test_pcmpgtw:
30; SSE:       # BB#0:
31; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
32; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
33; SSE-NEXT:    retq
34;
35; AVX-LABEL: test_pcmpgtw:
36; AVX:       # BB#0:
37; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
38; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
39; AVX-NEXT:    retq
40;
41  %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
42  %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
43  ret <8 x i16> %not
44}
45
46define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
47; SSE-LABEL: test_pcmpgtd:
48; SSE:       # BB#0:
49; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
50; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
51; SSE-NEXT:    retq
52;
53; AVX-LABEL: test_pcmpgtd:
54; AVX:       # BB#0:
55; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
56; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
57; AVX-NEXT:    retq
58;
59  %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
60  %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
61  ret <4 x i32> %not
62}
63
64define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
65; SSE2-LABEL: test_pcmpgtq:
66; SSE2:       # BB#0:
67; SSE2-NEXT:    psrad $31, %xmm0
68; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
69; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
70; SSE2-NEXT:    pxor %xmm1, %xmm0
71; SSE2-NEXT:    retq
72;
73; SSE42-LABEL: test_pcmpgtq:
74; SSE42:       # BB#0:
75; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
76; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
77; SSE42-NEXT:    retq
78;
79; AVX-LABEL: test_pcmpgtq:
80; AVX:       # BB#0:
81; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
82; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
83; AVX-NEXT:    retq
84;
85  %sign = ashr <2 x i64> %x, <i64 63, i64 63>
86  %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
87  ret <2 x i64> %not
88}
89
90define <1 x i128> @test_strange_type(<1 x i128> %x) {
91; SSE2-LABEL: test_strange_type:
92; SSE2:       # BB#0:
93; SSE2-NEXT:    sarq $63, %rsi
94; SSE2-NEXT:    movd %rsi, %xmm0
95; SSE2-NEXT:    notq %rsi
96; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
97; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
98; SSE2-NEXT:    pxor %xmm0, %xmm1
99; SSE2-NEXT:    movd %xmm1, %rax
100; SSE2-NEXT:    movq %rsi, %rdx
101; SSE2-NEXT:    retq
102;
103; SSE42-LABEL: test_strange_type:
104; SSE42:       # BB#0:
105; SSE42-NEXT:    sarq $63, %rsi
106; SSE42-NEXT:    movd %rsi, %xmm0
107; SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
108; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
109; SSE42-NEXT:    pxor %xmm0, %xmm1
110; SSE42-NEXT:    movd %xmm1, %rax
111; SSE42-NEXT:    pextrq $1, %xmm1, %rdx
112; SSE42-NEXT:    retq
113;
114; AVX1-LABEL: test_strange_type:
115; AVX1:       # BB#0:
116; AVX1-NEXT:    sarq $63, %rsi
117; AVX1-NEXT:    vmovq %rsi, %xmm0
118; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
119; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
120; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
121; AVX1-NEXT:    vmovq %xmm0, %rax
122; AVX1-NEXT:    vpextrq $1, %xmm0, %rdx
123; AVX1-NEXT:    retq
124;
125; AVX2-LABEL: test_strange_type:
126; AVX2:       # BB#0:
127; AVX2-NEXT:    sarq $63, %rsi
128; AVX2-NEXT:    vmovq %rsi, %xmm0
129; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
130; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
131; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
132; AVX2-NEXT:    vmovq %xmm0, %rax
133; AVX2-NEXT:    vpextrq $1, %xmm0, %rdx
134; AVX2-NEXT:    retq
135;
136  %sign = ashr <1 x i128> %x, <i128 127>
137  %not = xor <1 x i128> %sign, <i128 -1>
138  ret <1 x i128> %not
139}
140
141define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
142; SSE-LABEL: test_pcmpgtb_256:
143; SSE:       # BB#0:
144; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
145; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
146; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
147; SSE-NEXT:    retq
148;
149; AVX1-LABEL: test_pcmpgtb_256:
150; AVX1:       # BB#0:
151; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
152; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
153; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
154; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
155; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
156; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
157; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
158; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
159; AVX1-NEXT:    retq
160;
161; AVX2-LABEL: test_pcmpgtb_256:
162; AVX2:       # BB#0:
163; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
164; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
165; AVX2-NEXT:    retq
166;
167  %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
168  %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
169  ret <32 x i8> %not
170}
171
172define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
173; SSE-LABEL: test_pcmpgtw_256:
174; SSE:       # BB#0:
175; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
176; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
177; SSE-NEXT:    pcmpgtw %xmm2, %xmm1
178; SSE-NEXT:    retq
179;
180; AVX1-LABEL: test_pcmpgtw_256:
181; AVX1:       # BB#0:
182; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
183; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
184; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
185; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
186; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
187; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
188; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
189; AVX1-NEXT:    retq
190;
191; AVX2-LABEL: test_pcmpgtw_256:
192; AVX2:       # BB#0:
193; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
194; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
195; AVX2-NEXT:    retq
196;
197  %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
198  %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
199  ret <16 x i16> %not
200}
201
202define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
203; SSE-LABEL: test_pcmpgtd_256:
204; SSE:       # BB#0:
205; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
206; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
207; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
208; SSE-NEXT:    retq
209;
210; AVX1-LABEL: test_pcmpgtd_256:
211; AVX1:       # BB#0:
212; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
213; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
214; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
215; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
216; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
217; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
218; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
219; AVX1-NEXT:    retq
220;
221; AVX2-LABEL: test_pcmpgtd_256:
222; AVX2:       # BB#0:
223; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
224; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
225; AVX2-NEXT:    retq
226;
227  %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
228  %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
229  ret <8 x i32> %not
230}
231
232define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
233; SSE2-LABEL: test_pcmpgtq_256:
234; SSE2:       # BB#0:
235; SSE2-NEXT:    psrad $31, %xmm1
236; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
237; SSE2-NEXT:    psrad $31, %xmm0
238; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
239; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
240; SSE2-NEXT:    pxor %xmm2, %xmm0
241; SSE2-NEXT:    pxor %xmm2, %xmm1
242; SSE2-NEXT:    retq
243;
244; SSE42-LABEL: test_pcmpgtq_256:
245; SSE42:       # BB#0:
246; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
247; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
248; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
249; SSE42-NEXT:    retq
250;
251; AVX1-LABEL: test_pcmpgtq_256:
252; AVX1:       # BB#0:
253; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
254; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
255; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
256; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
257; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
258; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
259; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
260; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
261; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
262; AVX1-NEXT:    retq
263;
264; AVX2-LABEL: test_pcmpgtq_256:
265; AVX2:       # BB#0:
266; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
267; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
268; AVX2-NEXT:    retq
269;
270  %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
271  %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
272  ret <4 x i64> %not
273}
274
275define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
276; SSE-LABEL: cmpeq_zext_v16i8:
277; SSE:       # BB#0:
278; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
279; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
280; SSE-NEXT:    retq
281;
282; AVX-LABEL: cmpeq_zext_v16i8:
283; AVX:       # BB#0:
284; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
285; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
286; AVX-NEXT:    retq
287;
288  %cmp = icmp eq <16 x i8> %a, %b
289  %zext = zext <16 x i1> %cmp to <16 x i8>
290  ret <16 x i8> %zext
291}
292
293define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
294; SSE-LABEL: cmpeq_zext_v16i16:
295; SSE:       # BB#0:
296; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
297; SSE-NEXT:    psrlw $15, %xmm0
298; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
299; SSE-NEXT:    psrlw $15, %xmm1
300; SSE-NEXT:    retq
301;
302; AVX1-LABEL: cmpeq_zext_v16i16:
303; AVX1:       # BB#0:
304; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
305; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
306; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
307; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
308; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
309; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
310; AVX1-NEXT:    retq
311;
312; AVX2-LABEL: cmpeq_zext_v16i16:
313; AVX2:       # BB#0:
314; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
315; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
316; AVX2-NEXT:    retq
317;
318  %cmp = icmp eq <16 x i16> %a, %b
319  %zext = zext <16 x i1> %cmp to <16 x i16>
320  ret <16 x i16> %zext
321}
322
323define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
324; SSE-LABEL: cmpeq_zext_v4i32:
325; SSE:       # BB#0:
326; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
327; SSE-NEXT:    psrld $31, %xmm0
328; SSE-NEXT:    retq
329;
330; AVX-LABEL: cmpeq_zext_v4i32:
331; AVX:       # BB#0:
332; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
333; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
334; AVX-NEXT:    retq
335;
336  %cmp = icmp eq <4 x i32> %a, %b
337  %zext = zext <4 x i1> %cmp to <4 x i32>
338  ret <4 x i32> %zext
339}
340
341define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
342; SSE2-LABEL: cmpeq_zext_v4i64:
343; SSE2:       # BB#0:
344; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
345; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
346; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1,1]
347; SSE2-NEXT:    pand %xmm4, %xmm2
348; SSE2-NEXT:    pand %xmm2, %xmm0
349; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
350; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
351; SSE2-NEXT:    pand %xmm4, %xmm2
352; SSE2-NEXT:    pand %xmm2, %xmm1
353; SSE2-NEXT:    retq
354;
355; SSE42-LABEL: cmpeq_zext_v4i64:
356; SSE42:       # BB#0:
357; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
358; SSE42-NEXT:    psrlq $63, %xmm0
359; SSE42-NEXT:    pcmpeqq %xmm3, %xmm1
360; SSE42-NEXT:    psrlq $63, %xmm1
361; SSE42-NEXT:    retq
362;
363; AVX1-LABEL: cmpeq_zext_v4i64:
364; AVX1:       # BB#0:
365; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
366; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
367; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
368; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
369; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
370; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
371; AVX1-NEXT:    retq
372;
373; AVX2-LABEL: cmpeq_zext_v4i64:
374; AVX2:       # BB#0:
375; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
376; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
377; AVX2-NEXT:    retq
378;
379  %cmp = icmp eq <4 x i64> %a, %b
380  %zext = zext <4 x i1> %cmp to <4 x i64>
381  ret <4 x i64> %zext
382}
383
384define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
385; SSE-LABEL: cmpgt_zext_v32i8:
386; SSE:       # BB#0:
387; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
388; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
389; SSE-NEXT:    pand %xmm2, %xmm0
390; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
391; SSE-NEXT:    pand %xmm2, %xmm1
392; SSE-NEXT:    retq
393;
394; AVX1-LABEL: cmpgt_zext_v32i8:
395; AVX1:       # BB#0:
396; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
397; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
398; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
399; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
400; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
401; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
402; AVX1-NEXT:    retq
403;
404; AVX2-LABEL: cmpgt_zext_v32i8:
405; AVX2:       # BB#0:
406; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
407; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
408; AVX2-NEXT:    retq
409;
410  %cmp = icmp sgt <32 x i8> %a, %b
411  %zext = zext <32 x i1> %cmp to <32 x i8>
412  ret <32 x i8> %zext
413}
414
415define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
416; SSE-LABEL: cmpgt_zext_v8i16:
417; SSE:       # BB#0:
418; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
419; SSE-NEXT:    psrlw $15, %xmm0
420; SSE-NEXT:    retq
421;
422; AVX-LABEL: cmpgt_zext_v8i16:
423; AVX:       # BB#0:
424; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
425; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm0
426; AVX-NEXT:    retq
427;
428  %cmp = icmp sgt <8 x i16> %a, %b
429  %zext = zext <8 x i1> %cmp to <8 x i16>
430  ret <8 x i16> %zext
431}
432
433define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
434; SSE-LABEL: cmpgt_zext_v8i32:
435; SSE:       # BB#0:
436; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
437; SSE-NEXT:    psrld $31, %xmm0
438; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
439; SSE-NEXT:    psrld $31, %xmm1
440; SSE-NEXT:    retq
441;
442; AVX1-LABEL: cmpgt_zext_v8i32:
443; AVX1:       # BB#0:
444; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
445; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
446; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
447; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
448; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
449; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
450; AVX1-NEXT:    retq
451;
452; AVX2-LABEL: cmpgt_zext_v8i32:
453; AVX2:       # BB#0:
454; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
455; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
456; AVX2-NEXT:    retq
457;
458  %cmp = icmp sgt <8 x i32> %a, %b
459  %zext = zext <8 x i1> %cmp to <8 x i32>
460  ret <8 x i32> %zext
461}
462
463define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
464; SSE2-LABEL: cmpgt_zext_v2i64:
465; SSE2:       # BB#0:
466; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
467; SSE2-NEXT:    pxor %xmm2, %xmm1
468; SSE2-NEXT:    pxor %xmm2, %xmm0
469; SSE2-NEXT:    movdqa %xmm0, %xmm2
470; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
471; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
472; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
473; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
474; SSE2-NEXT:    pand %xmm3, %xmm1
475; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
476; SSE2-NEXT:    por %xmm1, %xmm0
477; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
478; SSE2-NEXT:    retq
479;
480; SSE42-LABEL: cmpgt_zext_v2i64:
481; SSE42:       # BB#0:
482; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
483; SSE42-NEXT:    psrlq $63, %xmm0
484; SSE42-NEXT:    retq
485;
486; AVX-LABEL: cmpgt_zext_v2i64:
487; AVX:       # BB#0:
488; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
489; AVX-NEXT:    vpsrlq $63, %xmm0, %xmm0
490; AVX-NEXT:    retq
491;
492  %cmp = icmp sgt <2 x i64> %a, %b
493  %zext = zext <2 x i1> %cmp to <2 x i64>
494  ret <2 x i64> %zext
495}
496