• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQ
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQVL
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=BITALG_NOVLX
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=BITALG
12
13
14define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
15; SSE-LABEL: ugt_1_v16i8:
16; SSE:       # %bb.0:
17; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
18; SSE-NEXT:    movdqa %xmm0, %xmm1
19; SSE-NEXT:    paddb %xmm2, %xmm1
20; SSE-NEXT:    pand %xmm0, %xmm1
21; SSE-NEXT:    pxor %xmm0, %xmm0
22; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
23; SSE-NEXT:    pxor %xmm2, %xmm1
24; SSE-NEXT:    movdqa %xmm1, %xmm0
25; SSE-NEXT:    retq
26;
27; AVX1-LABEL: ugt_1_v16i8:
28; AVX1:       # %bb.0:
29; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
30; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm2
31; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
32; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
33; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
34; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
35; AVX1-NEXT:    retq
36;
37; AVX2-LABEL: ugt_1_v16i8:
38; AVX2:       # %bb.0:
39; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
40; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm2
41; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
42; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
43; AVX2-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
44; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
45; AVX2-NEXT:    retq
46;
47; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i8:
48; AVX512VPOPCNTDQ:       # %bb.0:
49; AVX512VPOPCNTDQ-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
50; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
51; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
52; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
53; AVX512VPOPCNTDQ-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
54; AVX512VPOPCNTDQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
55; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
56; AVX512VPOPCNTDQ-NEXT:    vzeroupper
57; AVX512VPOPCNTDQ-NEXT:    retq
58;
59; AVX512VPOPCNTDQVL-LABEL: ugt_1_v16i8:
60; AVX512VPOPCNTDQVL:       # %bb.0:
61; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
62; AVX512VPOPCNTDQVL-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
63; AVX512VPOPCNTDQVL-NEXT:    vpand %xmm1, %xmm0, %xmm0
64; AVX512VPOPCNTDQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
65; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
66; AVX512VPOPCNTDQVL-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
67; AVX512VPOPCNTDQVL-NEXT:    retq
68;
69; BITALG_NOVLX-LABEL: ugt_1_v16i8:
70; BITALG_NOVLX:       # %bb.0:
71; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
72; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
73; BITALG_NOVLX-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
74; BITALG_NOVLX-NEXT:    vzeroupper
75; BITALG_NOVLX-NEXT:    retq
76;
77; BITALG-LABEL: ugt_1_v16i8:
78; BITALG:       # %bb.0:
79; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
80; BITALG-NEXT:    vpcmpnleub {{.*}}(%rip), %xmm0, %k0
81; BITALG-NEXT:    vpmovm2b %k0, %xmm0
82; BITALG-NEXT:    retq
83  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
84  %3 = icmp ugt <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
85  %4 = sext <16 x i1> %3 to <16 x i8>
86  ret <16 x i8> %4
87}
88
89define <16 x i8> @ult_2_v16i8(<16 x i8> %0) {
90; SSE-LABEL: ult_2_v16i8:
91; SSE:       # %bb.0:
92; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
93; SSE-NEXT:    paddb %xmm0, %xmm1
94; SSE-NEXT:    pand %xmm1, %xmm0
95; SSE-NEXT:    pxor %xmm1, %xmm1
96; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
97; SSE-NEXT:    retq
98;
99; AVX-LABEL: ult_2_v16i8:
100; AVX:       # %bb.0:
101; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
102; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
103; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
104; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
105; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
106; AVX-NEXT:    retq
107;
108; BITALG_NOVLX-LABEL: ult_2_v16i8:
109; BITALG_NOVLX:       # %bb.0:
110; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
111; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
112; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
113; BITALG_NOVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
114; BITALG_NOVLX-NEXT:    vzeroupper
115; BITALG_NOVLX-NEXT:    retq
116;
117; BITALG-LABEL: ult_2_v16i8:
118; BITALG:       # %bb.0:
119; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
120; BITALG-NEXT:    vpcmpltub {{.*}}(%rip), %xmm0, %k0
121; BITALG-NEXT:    vpmovm2b %k0, %xmm0
122; BITALG-NEXT:    retq
123  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
124  %3 = icmp ult <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
125  %4 = sext <16 x i1> %3 to <16 x i8>
126  ret <16 x i8> %4
127}
128
129define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
130; SSE2-LABEL: ugt_2_v16i8:
131; SSE2:       # %bb.0:
132; SSE2-NEXT:    movdqa %xmm0, %xmm1
133; SSE2-NEXT:    psrlw $1, %xmm1
134; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
135; SSE2-NEXT:    psubb %xmm1, %xmm0
136; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
137; SSE2-NEXT:    movdqa %xmm0, %xmm2
138; SSE2-NEXT:    pand %xmm1, %xmm2
139; SSE2-NEXT:    psrlw $2, %xmm0
140; SSE2-NEXT:    pand %xmm1, %xmm0
141; SSE2-NEXT:    paddb %xmm2, %xmm0
142; SSE2-NEXT:    movdqa %xmm0, %xmm1
143; SSE2-NEXT:    psrlw $4, %xmm1
144; SSE2-NEXT:    paddb %xmm0, %xmm1
145; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
146; SSE2-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
147; SSE2-NEXT:    movdqa %xmm1, %xmm0
148; SSE2-NEXT:    retq
149;
150; SSE3-LABEL: ugt_2_v16i8:
151; SSE3:       # %bb.0:
152; SSE3-NEXT:    movdqa %xmm0, %xmm1
153; SSE3-NEXT:    psrlw $1, %xmm1
154; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
155; SSE3-NEXT:    psubb %xmm1, %xmm0
156; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
157; SSE3-NEXT:    movdqa %xmm0, %xmm2
158; SSE3-NEXT:    pand %xmm1, %xmm2
159; SSE3-NEXT:    psrlw $2, %xmm0
160; SSE3-NEXT:    pand %xmm1, %xmm0
161; SSE3-NEXT:    paddb %xmm2, %xmm0
162; SSE3-NEXT:    movdqa %xmm0, %xmm1
163; SSE3-NEXT:    psrlw $4, %xmm1
164; SSE3-NEXT:    paddb %xmm0, %xmm1
165; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
166; SSE3-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
167; SSE3-NEXT:    movdqa %xmm1, %xmm0
168; SSE3-NEXT:    retq
169;
170; SSSE3-LABEL: ugt_2_v16i8:
171; SSSE3:       # %bb.0:
172; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
173; SSSE3-NEXT:    movdqa %xmm0, %xmm2
174; SSSE3-NEXT:    pand %xmm1, %xmm2
175; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
176; SSSE3-NEXT:    movdqa %xmm3, %xmm4
177; SSSE3-NEXT:    pshufb %xmm2, %xmm4
178; SSSE3-NEXT:    psrlw $4, %xmm0
179; SSSE3-NEXT:    pand %xmm1, %xmm0
180; SSSE3-NEXT:    pshufb %xmm0, %xmm3
181; SSSE3-NEXT:    paddb %xmm4, %xmm3
182; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
183; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
184; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
185; SSSE3-NEXT:    retq
186;
187; SSE41-LABEL: ugt_2_v16i8:
188; SSE41:       # %bb.0:
189; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
190; SSE41-NEXT:    movdqa %xmm0, %xmm2
191; SSE41-NEXT:    pand %xmm1, %xmm2
192; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
193; SSE41-NEXT:    movdqa %xmm3, %xmm4
194; SSE41-NEXT:    pshufb %xmm2, %xmm4
195; SSE41-NEXT:    psrlw $4, %xmm0
196; SSE41-NEXT:    pand %xmm1, %xmm0
197; SSE41-NEXT:    pshufb %xmm0, %xmm3
198; SSE41-NEXT:    paddb %xmm4, %xmm3
199; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
200; SSE41-NEXT:    pmaxub %xmm3, %xmm0
201; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
202; SSE41-NEXT:    retq
203;
204; AVX1-LABEL: ugt_2_v16i8:
205; AVX1:       # %bb.0:
206; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
207; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
208; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
209; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
210; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
211; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
212; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
213; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
214; AVX1-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
215; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
216; AVX1-NEXT:    retq
217;
218; AVX2-LABEL: ugt_2_v16i8:
219; AVX2:       # %bb.0:
220; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
221; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
222; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
223; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
224; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
225; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
226; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
227; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
228; AVX2-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
229; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
230; AVX2-NEXT:    retq
231;
232; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i8:
233; AVX512VPOPCNTDQ:       # %bb.0:
234; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
235; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
236; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
237; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
238; AVX512VPOPCNTDQ-NEXT:    vzeroupper
239; AVX512VPOPCNTDQ-NEXT:    retq
240;
241; AVX512VPOPCNTDQVL-LABEL: ugt_2_v16i8:
242; AVX512VPOPCNTDQVL:       # %bb.0:
243; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
244; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
245; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
246; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
247; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
248; AVX512VPOPCNTDQVL-NEXT:    retq
249;
250; BITALG_NOVLX-LABEL: ugt_2_v16i8:
251; BITALG_NOVLX:       # %bb.0:
252; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
253; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
254; BITALG_NOVLX-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
255; BITALG_NOVLX-NEXT:    vzeroupper
256; BITALG_NOVLX-NEXT:    retq
257;
258; BITALG-LABEL: ugt_2_v16i8:
259; BITALG:       # %bb.0:
260; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
261; BITALG-NEXT:    vpcmpnleub {{.*}}(%rip), %xmm0, %k0
262; BITALG-NEXT:    vpmovm2b %k0, %xmm0
263; BITALG-NEXT:    retq
264  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
265  %3 = icmp ugt <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
266  %4 = sext <16 x i1> %3 to <16 x i8>
267  ret <16 x i8> %4
268}
269
270define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
271; SSE2-LABEL: ult_3_v16i8:
272; SSE2:       # %bb.0:
273; SSE2-NEXT:    movdqa %xmm0, %xmm1
274; SSE2-NEXT:    psrlw $1, %xmm1
275; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
276; SSE2-NEXT:    psubb %xmm1, %xmm0
277; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
278; SSE2-NEXT:    movdqa %xmm0, %xmm2
279; SSE2-NEXT:    pand %xmm1, %xmm2
280; SSE2-NEXT:    psrlw $2, %xmm0
281; SSE2-NEXT:    pand %xmm1, %xmm0
282; SSE2-NEXT:    paddb %xmm2, %xmm0
283; SSE2-NEXT:    movdqa %xmm0, %xmm1
284; SSE2-NEXT:    psrlw $4, %xmm1
285; SSE2-NEXT:    paddb %xmm0, %xmm1
286; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
287; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
288; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
289; SSE2-NEXT:    retq
290;
291; SSE3-LABEL: ult_3_v16i8:
292; SSE3:       # %bb.0:
293; SSE3-NEXT:    movdqa %xmm0, %xmm1
294; SSE3-NEXT:    psrlw $1, %xmm1
295; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
296; SSE3-NEXT:    psubb %xmm1, %xmm0
297; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
298; SSE3-NEXT:    movdqa %xmm0, %xmm2
299; SSE3-NEXT:    pand %xmm1, %xmm2
300; SSE3-NEXT:    psrlw $2, %xmm0
301; SSE3-NEXT:    pand %xmm1, %xmm0
302; SSE3-NEXT:    paddb %xmm2, %xmm0
303; SSE3-NEXT:    movdqa %xmm0, %xmm1
304; SSE3-NEXT:    psrlw $4, %xmm1
305; SSE3-NEXT:    paddb %xmm0, %xmm1
306; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
307; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
308; SSE3-NEXT:    pcmpgtb %xmm1, %xmm0
309; SSE3-NEXT:    retq
310;
311; SSSE3-LABEL: ult_3_v16i8:
312; SSSE3:       # %bb.0:
313; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
314; SSSE3-NEXT:    movdqa %xmm0, %xmm2
315; SSSE3-NEXT:    pand %xmm1, %xmm2
316; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
317; SSSE3-NEXT:    movdqa %xmm3, %xmm4
318; SSSE3-NEXT:    pshufb %xmm2, %xmm4
319; SSSE3-NEXT:    psrlw $4, %xmm0
320; SSSE3-NEXT:    pand %xmm1, %xmm0
321; SSSE3-NEXT:    pshufb %xmm0, %xmm3
322; SSSE3-NEXT:    paddb %xmm4, %xmm3
323; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
324; SSSE3-NEXT:    pminub %xmm3, %xmm0
325; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
326; SSSE3-NEXT:    retq
327;
328; SSE41-LABEL: ult_3_v16i8:
329; SSE41:       # %bb.0:
330; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
331; SSE41-NEXT:    movdqa %xmm0, %xmm2
332; SSE41-NEXT:    pand %xmm1, %xmm2
333; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
334; SSE41-NEXT:    movdqa %xmm3, %xmm4
335; SSE41-NEXT:    pshufb %xmm2, %xmm4
336; SSE41-NEXT:    psrlw $4, %xmm0
337; SSE41-NEXT:    pand %xmm1, %xmm0
338; SSE41-NEXT:    pshufb %xmm0, %xmm3
339; SSE41-NEXT:    paddb %xmm4, %xmm3
340; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
341; SSE41-NEXT:    pminub %xmm3, %xmm0
342; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
343; SSE41-NEXT:    retq
344;
345; AVX1-LABEL: ult_3_v16i8:
346; AVX1:       # %bb.0:
347; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
348; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
349; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
350; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
351; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
352; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
353; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
354; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
355; AVX1-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
356; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
357; AVX1-NEXT:    retq
358;
359; AVX2-LABEL: ult_3_v16i8:
360; AVX2:       # %bb.0:
361; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
362; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
363; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
364; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
365; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
366; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
367; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
368; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
369; AVX2-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
370; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
371; AVX2-NEXT:    retq
372;
373; AVX512VPOPCNTDQ-LABEL: ult_3_v16i8:
374; AVX512VPOPCNTDQ:       # %bb.0:
375; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
376; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
377; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
378; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
379; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
380; AVX512VPOPCNTDQ-NEXT:    vzeroupper
381; AVX512VPOPCNTDQ-NEXT:    retq
382;
383; AVX512VPOPCNTDQVL-LABEL: ult_3_v16i8:
384; AVX512VPOPCNTDQVL:       # %bb.0:
385; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
386; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
387; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
388; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
389; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
390; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
391; AVX512VPOPCNTDQVL-NEXT:    retq
392;
393; BITALG_NOVLX-LABEL: ult_3_v16i8:
394; BITALG_NOVLX:       # %bb.0:
395; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
396; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
397; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
398; BITALG_NOVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
399; BITALG_NOVLX-NEXT:    vzeroupper
400; BITALG_NOVLX-NEXT:    retq
401;
402; BITALG-LABEL: ult_3_v16i8:
403; BITALG:       # %bb.0:
404; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
405; BITALG-NEXT:    vpcmpltub {{.*}}(%rip), %xmm0, %k0
406; BITALG-NEXT:    vpmovm2b %k0, %xmm0
407; BITALG-NEXT:    retq
408  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
409  %3 = icmp ult <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
410  %4 = sext <16 x i1> %3 to <16 x i8>
411  ret <16 x i8> %4
412}
413
414define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
415; SSE2-LABEL: ugt_3_v16i8:
416; SSE2:       # %bb.0:
417; SSE2-NEXT:    movdqa %xmm0, %xmm1
418; SSE2-NEXT:    psrlw $1, %xmm1
419; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
420; SSE2-NEXT:    psubb %xmm1, %xmm0
421; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
422; SSE2-NEXT:    movdqa %xmm0, %xmm2
423; SSE2-NEXT:    pand %xmm1, %xmm2
424; SSE2-NEXT:    psrlw $2, %xmm0
425; SSE2-NEXT:    pand %xmm1, %xmm0
426; SSE2-NEXT:    paddb %xmm2, %xmm0
427; SSE2-NEXT:    movdqa %xmm0, %xmm1
428; SSE2-NEXT:    psrlw $4, %xmm1
429; SSE2-NEXT:    paddb %xmm0, %xmm1
430; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
431; SSE2-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
432; SSE2-NEXT:    movdqa %xmm1, %xmm0
433; SSE2-NEXT:    retq
434;
435; SSE3-LABEL: ugt_3_v16i8:
436; SSE3:       # %bb.0:
437; SSE3-NEXT:    movdqa %xmm0, %xmm1
438; SSE3-NEXT:    psrlw $1, %xmm1
439; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
440; SSE3-NEXT:    psubb %xmm1, %xmm0
441; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
442; SSE3-NEXT:    movdqa %xmm0, %xmm2
443; SSE3-NEXT:    pand %xmm1, %xmm2
444; SSE3-NEXT:    psrlw $2, %xmm0
445; SSE3-NEXT:    pand %xmm1, %xmm0
446; SSE3-NEXT:    paddb %xmm2, %xmm0
447; SSE3-NEXT:    movdqa %xmm0, %xmm1
448; SSE3-NEXT:    psrlw $4, %xmm1
449; SSE3-NEXT:    paddb %xmm0, %xmm1
450; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
451; SSE3-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
452; SSE3-NEXT:    movdqa %xmm1, %xmm0
453; SSE3-NEXT:    retq
454;
455; SSSE3-LABEL: ugt_3_v16i8:
456; SSSE3:       # %bb.0:
457; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
458; SSSE3-NEXT:    movdqa %xmm0, %xmm2
459; SSSE3-NEXT:    pand %xmm1, %xmm2
460; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
461; SSSE3-NEXT:    movdqa %xmm3, %xmm4
462; SSSE3-NEXT:    pshufb %xmm2, %xmm4
463; SSSE3-NEXT:    psrlw $4, %xmm0
464; SSSE3-NEXT:    pand %xmm1, %xmm0
465; SSSE3-NEXT:    pshufb %xmm0, %xmm3
466; SSSE3-NEXT:    paddb %xmm4, %xmm3
467; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
468; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
469; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
470; SSSE3-NEXT:    retq
471;
472; SSE41-LABEL: ugt_3_v16i8:
473; SSE41:       # %bb.0:
474; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
475; SSE41-NEXT:    movdqa %xmm0, %xmm2
476; SSE41-NEXT:    pand %xmm1, %xmm2
477; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
478; SSE41-NEXT:    movdqa %xmm3, %xmm4
479; SSE41-NEXT:    pshufb %xmm2, %xmm4
480; SSE41-NEXT:    psrlw $4, %xmm0
481; SSE41-NEXT:    pand %xmm1, %xmm0
482; SSE41-NEXT:    pshufb %xmm0, %xmm3
483; SSE41-NEXT:    paddb %xmm4, %xmm3
484; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
485; SSE41-NEXT:    pmaxub %xmm3, %xmm0
486; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
487; SSE41-NEXT:    retq
488;
489; AVX1-LABEL: ugt_3_v16i8:
490; AVX1:       # %bb.0:
491; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
492; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
493; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
494; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
495; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
496; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
497; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
498; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
499; AVX1-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
500; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
501; AVX1-NEXT:    retq
502;
503; AVX2-LABEL: ugt_3_v16i8:
504; AVX2:       # %bb.0:
505; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
506; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
507; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
508; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
509; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
510; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
511; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
512; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
513; AVX2-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
514; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
515; AVX2-NEXT:    retq
516;
517; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i8:
518; AVX512VPOPCNTDQ:       # %bb.0:
519; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
520; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
521; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
522; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
523; AVX512VPOPCNTDQ-NEXT:    vzeroupper
524; AVX512VPOPCNTDQ-NEXT:    retq
525;
526; AVX512VPOPCNTDQVL-LABEL: ugt_3_v16i8:
527; AVX512VPOPCNTDQVL:       # %bb.0:
528; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
529; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
530; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
531; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
532; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
533; AVX512VPOPCNTDQVL-NEXT:    retq
534;
535; BITALG_NOVLX-LABEL: ugt_3_v16i8:
536; BITALG_NOVLX:       # %bb.0:
537; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
538; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
539; BITALG_NOVLX-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
540; BITALG_NOVLX-NEXT:    vzeroupper
541; BITALG_NOVLX-NEXT:    retq
542;
543; BITALG-LABEL: ugt_3_v16i8:
544; BITALG:       # %bb.0:
545; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
546; BITALG-NEXT:    vpcmpnleub {{.*}}(%rip), %xmm0, %k0
547; BITALG-NEXT:    vpmovm2b %k0, %xmm0
548; BITALG-NEXT:    retq
549  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
550  %3 = icmp ugt <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
551  %4 = sext <16 x i1> %3 to <16 x i8>
552  ret <16 x i8> %4
553}
554
555define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
556; SSE2-LABEL: ult_4_v16i8:
557; SSE2:       # %bb.0:
558; SSE2-NEXT:    movdqa %xmm0, %xmm1
559; SSE2-NEXT:    psrlw $1, %xmm1
560; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
561; SSE2-NEXT:    psubb %xmm1, %xmm0
562; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
563; SSE2-NEXT:    movdqa %xmm0, %xmm2
564; SSE2-NEXT:    pand %xmm1, %xmm2
565; SSE2-NEXT:    psrlw $2, %xmm0
566; SSE2-NEXT:    pand %xmm1, %xmm0
567; SSE2-NEXT:    paddb %xmm2, %xmm0
568; SSE2-NEXT:    movdqa %xmm0, %xmm1
569; SSE2-NEXT:    psrlw $4, %xmm1
570; SSE2-NEXT:    paddb %xmm0, %xmm1
571; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
572; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
573; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
574; SSE2-NEXT:    retq
575;
576; SSE3-LABEL: ult_4_v16i8:
577; SSE3:       # %bb.0:
578; SSE3-NEXT:    movdqa %xmm0, %xmm1
579; SSE3-NEXT:    psrlw $1, %xmm1
580; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
581; SSE3-NEXT:    psubb %xmm1, %xmm0
582; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
583; SSE3-NEXT:    movdqa %xmm0, %xmm2
584; SSE3-NEXT:    pand %xmm1, %xmm2
585; SSE3-NEXT:    psrlw $2, %xmm0
586; SSE3-NEXT:    pand %xmm1, %xmm0
587; SSE3-NEXT:    paddb %xmm2, %xmm0
588; SSE3-NEXT:    movdqa %xmm0, %xmm1
589; SSE3-NEXT:    psrlw $4, %xmm1
590; SSE3-NEXT:    paddb %xmm0, %xmm1
591; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
592; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
593; SSE3-NEXT:    pcmpgtb %xmm1, %xmm0
594; SSE3-NEXT:    retq
595;
596; SSSE3-LABEL: ult_4_v16i8:
597; SSSE3:       # %bb.0:
598; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
599; SSSE3-NEXT:    movdqa %xmm0, %xmm2
600; SSSE3-NEXT:    pand %xmm1, %xmm2
601; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
602; SSSE3-NEXT:    movdqa %xmm3, %xmm4
603; SSSE3-NEXT:    pshufb %xmm2, %xmm4
604; SSSE3-NEXT:    psrlw $4, %xmm0
605; SSSE3-NEXT:    pand %xmm1, %xmm0
606; SSSE3-NEXT:    pshufb %xmm0, %xmm3
607; SSSE3-NEXT:    paddb %xmm4, %xmm3
608; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
609; SSSE3-NEXT:    pminub %xmm3, %xmm0
610; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
611; SSSE3-NEXT:    retq
612;
613; SSE41-LABEL: ult_4_v16i8:
614; SSE41:       # %bb.0:
615; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
616; SSE41-NEXT:    movdqa %xmm0, %xmm2
617; SSE41-NEXT:    pand %xmm1, %xmm2
618; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
619; SSE41-NEXT:    movdqa %xmm3, %xmm4
620; SSE41-NEXT:    pshufb %xmm2, %xmm4
621; SSE41-NEXT:    psrlw $4, %xmm0
622; SSE41-NEXT:    pand %xmm1, %xmm0
623; SSE41-NEXT:    pshufb %xmm0, %xmm3
624; SSE41-NEXT:    paddb %xmm4, %xmm3
625; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
626; SSE41-NEXT:    pminub %xmm3, %xmm0
627; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
628; SSE41-NEXT:    retq
629;
630; AVX1-LABEL: ult_4_v16i8:
631; AVX1:       # %bb.0:
632; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
633; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
634; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
635; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
636; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
637; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
638; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
639; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
640; AVX1-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
641; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
642; AVX1-NEXT:    retq
643;
644; AVX2-LABEL: ult_4_v16i8:
645; AVX2:       # %bb.0:
646; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
647; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
648; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
649; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
650; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
651; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
652; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
653; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
654; AVX2-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
655; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
656; AVX2-NEXT:    retq
657;
658; AVX512VPOPCNTDQ-LABEL: ult_4_v16i8:
659; AVX512VPOPCNTDQ:       # %bb.0:
660; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
661; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
662; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
663; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
664; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
665; AVX512VPOPCNTDQ-NEXT:    vzeroupper
666; AVX512VPOPCNTDQ-NEXT:    retq
667;
668; AVX512VPOPCNTDQVL-LABEL: ult_4_v16i8:
669; AVX512VPOPCNTDQVL:       # %bb.0:
670; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
671; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
672; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
673; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
674; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
675; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
676; AVX512VPOPCNTDQVL-NEXT:    retq
677;
678; BITALG_NOVLX-LABEL: ult_4_v16i8:
679; BITALG_NOVLX:       # %bb.0:
680; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
681; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
682; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
683; BITALG_NOVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
684; BITALG_NOVLX-NEXT:    vzeroupper
685; BITALG_NOVLX-NEXT:    retq
686;
687; BITALG-LABEL: ult_4_v16i8:
688; BITALG:       # %bb.0:
689; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
690; BITALG-NEXT:    vpcmpltub {{.*}}(%rip), %xmm0, %k0
691; BITALG-NEXT:    vpmovm2b %k0, %xmm0
692; BITALG-NEXT:    retq
693  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
694  %3 = icmp ult <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
695  %4 = sext <16 x i1> %3 to <16 x i8>
696  ret <16 x i8> %4
697}
698
699define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
700; SSE2-LABEL: ugt_4_v16i8:
701; SSE2:       # %bb.0:
702; SSE2-NEXT:    movdqa %xmm0, %xmm1
703; SSE2-NEXT:    psrlw $1, %xmm1
704; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
705; SSE2-NEXT:    psubb %xmm1, %xmm0
706; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
707; SSE2-NEXT:    movdqa %xmm0, %xmm2
708; SSE2-NEXT:    pand %xmm1, %xmm2
709; SSE2-NEXT:    psrlw $2, %xmm0
710; SSE2-NEXT:    pand %xmm1, %xmm0
711; SSE2-NEXT:    paddb %xmm2, %xmm0
712; SSE2-NEXT:    movdqa %xmm0, %xmm1
713; SSE2-NEXT:    psrlw $4, %xmm1
714; SSE2-NEXT:    paddb %xmm0, %xmm1
715; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
716; SSE2-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
717; SSE2-NEXT:    movdqa %xmm1, %xmm0
718; SSE2-NEXT:    retq
719;
720; SSE3-LABEL: ugt_4_v16i8:
721; SSE3:       # %bb.0:
722; SSE3-NEXT:    movdqa %xmm0, %xmm1
723; SSE3-NEXT:    psrlw $1, %xmm1
724; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
725; SSE3-NEXT:    psubb %xmm1, %xmm0
726; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
727; SSE3-NEXT:    movdqa %xmm0, %xmm2
728; SSE3-NEXT:    pand %xmm1, %xmm2
729; SSE3-NEXT:    psrlw $2, %xmm0
730; SSE3-NEXT:    pand %xmm1, %xmm0
731; SSE3-NEXT:    paddb %xmm2, %xmm0
732; SSE3-NEXT:    movdqa %xmm0, %xmm1
733; SSE3-NEXT:    psrlw $4, %xmm1
734; SSE3-NEXT:    paddb %xmm0, %xmm1
735; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
736; SSE3-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
737; SSE3-NEXT:    movdqa %xmm1, %xmm0
738; SSE3-NEXT:    retq
739;
740; SSSE3-LABEL: ugt_4_v16i8:
741; SSSE3:       # %bb.0:
742; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
743; SSSE3-NEXT:    movdqa %xmm0, %xmm2
744; SSSE3-NEXT:    pand %xmm1, %xmm2
745; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
746; SSSE3-NEXT:    movdqa %xmm3, %xmm4
747; SSSE3-NEXT:    pshufb %xmm2, %xmm4
748; SSSE3-NEXT:    psrlw $4, %xmm0
749; SSSE3-NEXT:    pand %xmm1, %xmm0
750; SSSE3-NEXT:    pshufb %xmm0, %xmm3
751; SSSE3-NEXT:    paddb %xmm4, %xmm3
752; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
753; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
754; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
755; SSSE3-NEXT:    retq
756;
757; SSE41-LABEL: ugt_4_v16i8:
758; SSE41:       # %bb.0:
759; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
760; SSE41-NEXT:    movdqa %xmm0, %xmm2
761; SSE41-NEXT:    pand %xmm1, %xmm2
762; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
763; SSE41-NEXT:    movdqa %xmm3, %xmm4
764; SSE41-NEXT:    pshufb %xmm2, %xmm4
765; SSE41-NEXT:    psrlw $4, %xmm0
766; SSE41-NEXT:    pand %xmm1, %xmm0
767; SSE41-NEXT:    pshufb %xmm0, %xmm3
768; SSE41-NEXT:    paddb %xmm4, %xmm3
769; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
770; SSE41-NEXT:    pmaxub %xmm3, %xmm0
771; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
772; SSE41-NEXT:    retq
773;
774; AVX1-LABEL: ugt_4_v16i8:
775; AVX1:       # %bb.0:
776; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
777; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
778; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
779; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
780; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
781; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
782; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
783; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
784; AVX1-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
785; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
786; AVX1-NEXT:    retq
787;
788; AVX2-LABEL: ugt_4_v16i8:
789; AVX2:       # %bb.0:
790; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
791; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
792; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
793; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
794; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
795; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
796; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
797; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
798; AVX2-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
799; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
800; AVX2-NEXT:    retq
801;
802; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i8:
803; AVX512VPOPCNTDQ:       # %bb.0:
804; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
805; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
806; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
807; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
808; AVX512VPOPCNTDQ-NEXT:    vzeroupper
809; AVX512VPOPCNTDQ-NEXT:    retq
810;
811; AVX512VPOPCNTDQVL-LABEL: ugt_4_v16i8:
812; AVX512VPOPCNTDQVL:       # %bb.0:
813; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
814; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
815; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
816; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
817; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
818; AVX512VPOPCNTDQVL-NEXT:    retq
819;
820; BITALG_NOVLX-LABEL: ugt_4_v16i8:
821; BITALG_NOVLX:       # %bb.0:
822; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
823; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
824; BITALG_NOVLX-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
825; BITALG_NOVLX-NEXT:    vzeroupper
826; BITALG_NOVLX-NEXT:    retq
827;
828; BITALG-LABEL: ugt_4_v16i8:
829; BITALG:       # %bb.0:
830; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
831; BITALG-NEXT:    vpcmpnleub {{.*}}(%rip), %xmm0, %k0
832; BITALG-NEXT:    vpmovm2b %k0, %xmm0
833; BITALG-NEXT:    retq
834  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
835  %3 = icmp ugt <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
836  %4 = sext <16 x i1> %3 to <16 x i8>
837  ret <16 x i8> %4
838}
839
840define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
841; SSE2-LABEL: ult_5_v16i8:
842; SSE2:       # %bb.0:
843; SSE2-NEXT:    movdqa %xmm0, %xmm1
844; SSE2-NEXT:    psrlw $1, %xmm1
845; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
846; SSE2-NEXT:    psubb %xmm1, %xmm0
847; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
848; SSE2-NEXT:    movdqa %xmm0, %xmm2
849; SSE2-NEXT:    pand %xmm1, %xmm2
850; SSE2-NEXT:    psrlw $2, %xmm0
851; SSE2-NEXT:    pand %xmm1, %xmm0
852; SSE2-NEXT:    paddb %xmm2, %xmm0
853; SSE2-NEXT:    movdqa %xmm0, %xmm1
854; SSE2-NEXT:    psrlw $4, %xmm1
855; SSE2-NEXT:    paddb %xmm0, %xmm1
856; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
857; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
858; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
859; SSE2-NEXT:    retq
860;
861; SSE3-LABEL: ult_5_v16i8:
862; SSE3:       # %bb.0:
863; SSE3-NEXT:    movdqa %xmm0, %xmm1
864; SSE3-NEXT:    psrlw $1, %xmm1
865; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
866; SSE3-NEXT:    psubb %xmm1, %xmm0
867; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
868; SSE3-NEXT:    movdqa %xmm0, %xmm2
869; SSE3-NEXT:    pand %xmm1, %xmm2
870; SSE3-NEXT:    psrlw $2, %xmm0
871; SSE3-NEXT:    pand %xmm1, %xmm0
872; SSE3-NEXT:    paddb %xmm2, %xmm0
873; SSE3-NEXT:    movdqa %xmm0, %xmm1
874; SSE3-NEXT:    psrlw $4, %xmm1
875; SSE3-NEXT:    paddb %xmm0, %xmm1
876; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
877; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
878; SSE3-NEXT:    pcmpgtb %xmm1, %xmm0
879; SSE3-NEXT:    retq
880;
881; SSSE3-LABEL: ult_5_v16i8:
882; SSSE3:       # %bb.0:
883; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
884; SSSE3-NEXT:    movdqa %xmm0, %xmm2
885; SSSE3-NEXT:    pand %xmm1, %xmm2
886; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
887; SSSE3-NEXT:    movdqa %xmm3, %xmm4
888; SSSE3-NEXT:    pshufb %xmm2, %xmm4
889; SSSE3-NEXT:    psrlw $4, %xmm0
890; SSSE3-NEXT:    pand %xmm1, %xmm0
891; SSSE3-NEXT:    pshufb %xmm0, %xmm3
892; SSSE3-NEXT:    paddb %xmm4, %xmm3
893; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
894; SSSE3-NEXT:    pminub %xmm3, %xmm0
895; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
896; SSSE3-NEXT:    retq
897;
898; SSE41-LABEL: ult_5_v16i8:
899; SSE41:       # %bb.0:
900; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
901; SSE41-NEXT:    movdqa %xmm0, %xmm2
902; SSE41-NEXT:    pand %xmm1, %xmm2
903; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
904; SSE41-NEXT:    movdqa %xmm3, %xmm4
905; SSE41-NEXT:    pshufb %xmm2, %xmm4
906; SSE41-NEXT:    psrlw $4, %xmm0
907; SSE41-NEXT:    pand %xmm1, %xmm0
908; SSE41-NEXT:    pshufb %xmm0, %xmm3
909; SSE41-NEXT:    paddb %xmm4, %xmm3
910; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
911; SSE41-NEXT:    pminub %xmm3, %xmm0
912; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
913; SSE41-NEXT:    retq
914;
915; AVX1-LABEL: ult_5_v16i8:
916; AVX1:       # %bb.0:
917; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
918; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
919; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
920; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
921; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
922; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
923; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
924; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
925; AVX1-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
926; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
927; AVX1-NEXT:    retq
928;
929; AVX2-LABEL: ult_5_v16i8:
930; AVX2:       # %bb.0:
931; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
932; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
933; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
934; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
935; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
936; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
937; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
938; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
939; AVX2-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
940; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
941; AVX2-NEXT:    retq
942;
943; AVX512VPOPCNTDQ-LABEL: ult_5_v16i8:
944; AVX512VPOPCNTDQ:       # %bb.0:
945; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
946; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
947; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
948; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
949; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
950; AVX512VPOPCNTDQ-NEXT:    vzeroupper
951; AVX512VPOPCNTDQ-NEXT:    retq
952;
953; AVX512VPOPCNTDQVL-LABEL: ult_5_v16i8:
954; AVX512VPOPCNTDQVL:       # %bb.0:
955; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
956; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
957; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
958; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
959; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
960; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
961; AVX512VPOPCNTDQVL-NEXT:    retq
962;
963; BITALG_NOVLX-LABEL: ult_5_v16i8:
964; BITALG_NOVLX:       # %bb.0:
965; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
966; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
967; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
968; BITALG_NOVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
969; BITALG_NOVLX-NEXT:    vzeroupper
970; BITALG_NOVLX-NEXT:    retq
971;
972; BITALG-LABEL: ult_5_v16i8:
973; BITALG:       # %bb.0:
974; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
975; BITALG-NEXT:    vpcmpltub {{.*}}(%rip), %xmm0, %k0
976; BITALG-NEXT:    vpmovm2b %k0, %xmm0
977; BITALG-NEXT:    retq
978  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
979  %3 = icmp ult <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
980  %4 = sext <16 x i1> %3 to <16 x i8>
981  ret <16 x i8> %4
982}
983
984define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
985; SSE2-LABEL: ugt_5_v16i8:
986; SSE2:       # %bb.0:
987; SSE2-NEXT:    movdqa %xmm0, %xmm1
988; SSE2-NEXT:    psrlw $1, %xmm1
989; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
990; SSE2-NEXT:    psubb %xmm1, %xmm0
991; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
992; SSE2-NEXT:    movdqa %xmm0, %xmm2
993; SSE2-NEXT:    pand %xmm1, %xmm2
994; SSE2-NEXT:    psrlw $2, %xmm0
995; SSE2-NEXT:    pand %xmm1, %xmm0
996; SSE2-NEXT:    paddb %xmm2, %xmm0
997; SSE2-NEXT:    movdqa %xmm0, %xmm1
998; SSE2-NEXT:    psrlw $4, %xmm1
999; SSE2-NEXT:    paddb %xmm0, %xmm1
1000; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1001; SSE2-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
1002; SSE2-NEXT:    movdqa %xmm1, %xmm0
1003; SSE2-NEXT:    retq
1004;
1005; SSE3-LABEL: ugt_5_v16i8:
1006; SSE3:       # %bb.0:
1007; SSE3-NEXT:    movdqa %xmm0, %xmm1
1008; SSE3-NEXT:    psrlw $1, %xmm1
1009; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1010; SSE3-NEXT:    psubb %xmm1, %xmm0
1011; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1012; SSE3-NEXT:    movdqa %xmm0, %xmm2
1013; SSE3-NEXT:    pand %xmm1, %xmm2
1014; SSE3-NEXT:    psrlw $2, %xmm0
1015; SSE3-NEXT:    pand %xmm1, %xmm0
1016; SSE3-NEXT:    paddb %xmm2, %xmm0
1017; SSE3-NEXT:    movdqa %xmm0, %xmm1
1018; SSE3-NEXT:    psrlw $4, %xmm1
1019; SSE3-NEXT:    paddb %xmm0, %xmm1
1020; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1021; SSE3-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
1022; SSE3-NEXT:    movdqa %xmm1, %xmm0
1023; SSE3-NEXT:    retq
1024;
1025; SSSE3-LABEL: ugt_5_v16i8:
1026; SSSE3:       # %bb.0:
1027; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1028; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1029; SSSE3-NEXT:    pand %xmm1, %xmm2
1030; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1031; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1032; SSSE3-NEXT:    pshufb %xmm2, %xmm4
1033; SSSE3-NEXT:    psrlw $4, %xmm0
1034; SSSE3-NEXT:    pand %xmm1, %xmm0
1035; SSSE3-NEXT:    pshufb %xmm0, %xmm3
1036; SSSE3-NEXT:    paddb %xmm4, %xmm3
1037; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1038; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
1039; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
1040; SSSE3-NEXT:    retq
1041;
1042; SSE41-LABEL: ugt_5_v16i8:
1043; SSE41:       # %bb.0:
1044; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1045; SSE41-NEXT:    movdqa %xmm0, %xmm2
1046; SSE41-NEXT:    pand %xmm1, %xmm2
1047; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1048; SSE41-NEXT:    movdqa %xmm3, %xmm4
1049; SSE41-NEXT:    pshufb %xmm2, %xmm4
1050; SSE41-NEXT:    psrlw $4, %xmm0
1051; SSE41-NEXT:    pand %xmm1, %xmm0
1052; SSE41-NEXT:    pshufb %xmm0, %xmm3
1053; SSE41-NEXT:    paddb %xmm4, %xmm3
1054; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1055; SSE41-NEXT:    pmaxub %xmm3, %xmm0
1056; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
1057; SSE41-NEXT:    retq
1058;
1059; AVX1-LABEL: ugt_5_v16i8:
1060; AVX1:       # %bb.0:
1061; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1062; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
1063; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1064; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1065; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1066; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1067; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1068; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1069; AVX1-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
1070; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1071; AVX1-NEXT:    retq
1072;
1073; AVX2-LABEL: ugt_5_v16i8:
1074; AVX2:       # %bb.0:
1075; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1076; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
1077; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1078; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1079; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
1080; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1081; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1082; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1083; AVX2-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
1084; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1085; AVX2-NEXT:    retq
1086;
1087; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i8:
1088; AVX512VPOPCNTDQ:       # %bb.0:
1089; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1090; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
1091; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
1092; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
1093; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1094; AVX512VPOPCNTDQ-NEXT:    retq
1095;
1096; AVX512VPOPCNTDQVL-LABEL: ugt_5_v16i8:
1097; AVX512VPOPCNTDQVL:       # %bb.0:
1098; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1099; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
1100; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
1101; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
1102; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
1103; AVX512VPOPCNTDQVL-NEXT:    retq
1104;
1105; BITALG_NOVLX-LABEL: ugt_5_v16i8:
1106; BITALG_NOVLX:       # %bb.0:
1107; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1108; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
1109; BITALG_NOVLX-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
1110; BITALG_NOVLX-NEXT:    vzeroupper
1111; BITALG_NOVLX-NEXT:    retq
1112;
1113; BITALG-LABEL: ugt_5_v16i8:
1114; BITALG:       # %bb.0:
1115; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
1116; BITALG-NEXT:    vpcmpnleub {{.*}}(%rip), %xmm0, %k0
1117; BITALG-NEXT:    vpmovm2b %k0, %xmm0
1118; BITALG-NEXT:    retq
1119  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1120  %3 = icmp ugt <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
1121  %4 = sext <16 x i1> %3 to <16 x i8>
1122  ret <16 x i8> %4
1123}
1124
1125define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
1126; SSE2-LABEL: ult_6_v16i8:
1127; SSE2:       # %bb.0:
1128; SSE2-NEXT:    movdqa %xmm0, %xmm1
1129; SSE2-NEXT:    psrlw $1, %xmm1
1130; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1131; SSE2-NEXT:    psubb %xmm1, %xmm0
1132; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1133; SSE2-NEXT:    movdqa %xmm0, %xmm2
1134; SSE2-NEXT:    pand %xmm1, %xmm2
1135; SSE2-NEXT:    psrlw $2, %xmm0
1136; SSE2-NEXT:    pand %xmm1, %xmm0
1137; SSE2-NEXT:    paddb %xmm2, %xmm0
1138; SSE2-NEXT:    movdqa %xmm0, %xmm1
1139; SSE2-NEXT:    psrlw $4, %xmm1
1140; SSE2-NEXT:    paddb %xmm0, %xmm1
1141; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1142; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1143; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
1144; SSE2-NEXT:    retq
1145;
1146; SSE3-LABEL: ult_6_v16i8:
1147; SSE3:       # %bb.0:
1148; SSE3-NEXT:    movdqa %xmm0, %xmm1
1149; SSE3-NEXT:    psrlw $1, %xmm1
1150; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1151; SSE3-NEXT:    psubb %xmm1, %xmm0
1152; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1153; SSE3-NEXT:    movdqa %xmm0, %xmm2
1154; SSE3-NEXT:    pand %xmm1, %xmm2
1155; SSE3-NEXT:    psrlw $2, %xmm0
1156; SSE3-NEXT:    pand %xmm1, %xmm0
1157; SSE3-NEXT:    paddb %xmm2, %xmm0
1158; SSE3-NEXT:    movdqa %xmm0, %xmm1
1159; SSE3-NEXT:    psrlw $4, %xmm1
1160; SSE3-NEXT:    paddb %xmm0, %xmm1
1161; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1162; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1163; SSE3-NEXT:    pcmpgtb %xmm1, %xmm0
1164; SSE3-NEXT:    retq
1165;
1166; SSSE3-LABEL: ult_6_v16i8:
1167; SSSE3:       # %bb.0:
1168; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1169; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1170; SSSE3-NEXT:    pand %xmm1, %xmm2
1171; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1172; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1173; SSSE3-NEXT:    pshufb %xmm2, %xmm4
1174; SSSE3-NEXT:    psrlw $4, %xmm0
1175; SSSE3-NEXT:    pand %xmm1, %xmm0
1176; SSSE3-NEXT:    pshufb %xmm0, %xmm3
1177; SSSE3-NEXT:    paddb %xmm4, %xmm3
1178; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
1179; SSSE3-NEXT:    pminub %xmm3, %xmm0
1180; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
1181; SSSE3-NEXT:    retq
1182;
1183; SSE41-LABEL: ult_6_v16i8:
1184; SSE41:       # %bb.0:
1185; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1186; SSE41-NEXT:    movdqa %xmm0, %xmm2
1187; SSE41-NEXT:    pand %xmm1, %xmm2
1188; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1189; SSE41-NEXT:    movdqa %xmm3, %xmm4
1190; SSE41-NEXT:    pshufb %xmm2, %xmm4
1191; SSE41-NEXT:    psrlw $4, %xmm0
1192; SSE41-NEXT:    pand %xmm1, %xmm0
1193; SSE41-NEXT:    pshufb %xmm0, %xmm3
1194; SSE41-NEXT:    paddb %xmm4, %xmm3
1195; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
1196; SSE41-NEXT:    pminub %xmm3, %xmm0
1197; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
1198; SSE41-NEXT:    retq
1199;
1200; AVX1-LABEL: ult_6_v16i8:
1201; AVX1:       # %bb.0:
1202; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1203; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
1204; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1205; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1206; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1207; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1208; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1209; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1210; AVX1-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
1211; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1212; AVX1-NEXT:    retq
1213;
1214; AVX2-LABEL: ult_6_v16i8:
1215; AVX2:       # %bb.0:
1216; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1217; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
1218; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1219; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1220; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
1221; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1222; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1223; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1224; AVX2-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
1225; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1226; AVX2-NEXT:    retq
1227;
1228; AVX512VPOPCNTDQ-LABEL: ult_6_v16i8:
1229; AVX512VPOPCNTDQ:       # %bb.0:
1230; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1231; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
1232; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
1233; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1234; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1235; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1236; AVX512VPOPCNTDQ-NEXT:    retq
1237;
1238; AVX512VPOPCNTDQVL-LABEL: ult_6_v16i8:
1239; AVX512VPOPCNTDQVL:       # %bb.0:
1240; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1241; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
1242; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
1243; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1244; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1245; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
1246; AVX512VPOPCNTDQVL-NEXT:    retq
1247;
1248; BITALG_NOVLX-LABEL: ult_6_v16i8:
1249; BITALG_NOVLX:       # %bb.0:
1250; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1251; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
1252; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1253; BITALG_NOVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1254; BITALG_NOVLX-NEXT:    vzeroupper
1255; BITALG_NOVLX-NEXT:    retq
1256;
1257; BITALG-LABEL: ult_6_v16i8:
1258; BITALG:       # %bb.0:
1259; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
1260; BITALG-NEXT:    vpcmpltub {{.*}}(%rip), %xmm0, %k0
1261; BITALG-NEXT:    vpmovm2b %k0, %xmm0
1262; BITALG-NEXT:    retq
1263  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1264  %3 = icmp ult <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
1265  %4 = sext <16 x i1> %3 to <16 x i8>
1266  ret <16 x i8> %4
1267}
1268
1269define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
1270; SSE2-LABEL: ugt_6_v16i8:
1271; SSE2:       # %bb.0:
1272; SSE2-NEXT:    movdqa %xmm0, %xmm1
1273; SSE2-NEXT:    psrlw $1, %xmm1
1274; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1275; SSE2-NEXT:    psubb %xmm1, %xmm0
1276; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1277; SSE2-NEXT:    movdqa %xmm0, %xmm2
1278; SSE2-NEXT:    pand %xmm1, %xmm2
1279; SSE2-NEXT:    psrlw $2, %xmm0
1280; SSE2-NEXT:    pand %xmm1, %xmm0
1281; SSE2-NEXT:    paddb %xmm2, %xmm0
1282; SSE2-NEXT:    movdqa %xmm0, %xmm1
1283; SSE2-NEXT:    psrlw $4, %xmm1
1284; SSE2-NEXT:    paddb %xmm0, %xmm1
1285; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1286; SSE2-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
1287; SSE2-NEXT:    movdqa %xmm1, %xmm0
1288; SSE2-NEXT:    retq
1289;
1290; SSE3-LABEL: ugt_6_v16i8:
1291; SSE3:       # %bb.0:
1292; SSE3-NEXT:    movdqa %xmm0, %xmm1
1293; SSE3-NEXT:    psrlw $1, %xmm1
1294; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1295; SSE3-NEXT:    psubb %xmm1, %xmm0
1296; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1297; SSE3-NEXT:    movdqa %xmm0, %xmm2
1298; SSE3-NEXT:    pand %xmm1, %xmm2
1299; SSE3-NEXT:    psrlw $2, %xmm0
1300; SSE3-NEXT:    pand %xmm1, %xmm0
1301; SSE3-NEXT:    paddb %xmm2, %xmm0
1302; SSE3-NEXT:    movdqa %xmm0, %xmm1
1303; SSE3-NEXT:    psrlw $4, %xmm1
1304; SSE3-NEXT:    paddb %xmm0, %xmm1
1305; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1306; SSE3-NEXT:    pcmpgtb {{.*}}(%rip), %xmm1
1307; SSE3-NEXT:    movdqa %xmm1, %xmm0
1308; SSE3-NEXT:    retq
1309;
1310; SSSE3-LABEL: ugt_6_v16i8:
1311; SSSE3:       # %bb.0:
1312; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1313; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1314; SSSE3-NEXT:    pand %xmm1, %xmm2
1315; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1316; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1317; SSSE3-NEXT:    pshufb %xmm2, %xmm4
1318; SSSE3-NEXT:    psrlw $4, %xmm0
1319; SSSE3-NEXT:    pand %xmm1, %xmm0
1320; SSSE3-NEXT:    pshufb %xmm0, %xmm3
1321; SSSE3-NEXT:    paddb %xmm4, %xmm3
1322; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1323; SSSE3-NEXT:    pmaxub %xmm3, %xmm0
1324; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
1325; SSSE3-NEXT:    retq
1326;
1327; SSE41-LABEL: ugt_6_v16i8:
1328; SSE41:       # %bb.0:
1329; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1330; SSE41-NEXT:    movdqa %xmm0, %xmm2
1331; SSE41-NEXT:    pand %xmm1, %xmm2
1332; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1333; SSE41-NEXT:    movdqa %xmm3, %xmm4
1334; SSE41-NEXT:    pshufb %xmm2, %xmm4
1335; SSE41-NEXT:    psrlw $4, %xmm0
1336; SSE41-NEXT:    pand %xmm1, %xmm0
1337; SSE41-NEXT:    pshufb %xmm0, %xmm3
1338; SSE41-NEXT:    paddb %xmm4, %xmm3
1339; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1340; SSE41-NEXT:    pmaxub %xmm3, %xmm0
1341; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
1342; SSE41-NEXT:    retq
1343;
1344; AVX1-LABEL: ugt_6_v16i8:
1345; AVX1:       # %bb.0:
1346; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1347; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
1348; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1349; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1350; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1351; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1352; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1353; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1354; AVX1-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
1355; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1356; AVX1-NEXT:    retq
1357;
1358; AVX2-LABEL: ugt_6_v16i8:
1359; AVX2:       # %bb.0:
1360; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1361; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
1362; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1363; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1364; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
1365; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1366; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1367; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1368; AVX2-NEXT:    vpmaxub {{.*}}(%rip), %xmm0, %xmm1
1369; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1370; AVX2-NEXT:    retq
1371;
1372; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i8:
1373; AVX512VPOPCNTDQ:       # %bb.0:
1374; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1375; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
1376; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
1377; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
1378; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1379; AVX512VPOPCNTDQ-NEXT:    retq
1380;
1381; AVX512VPOPCNTDQVL-LABEL: ugt_6_v16i8:
1382; AVX512VPOPCNTDQVL:       # %bb.0:
1383; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1384; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
1385; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
1386; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
1387; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
1388; AVX512VPOPCNTDQVL-NEXT:    retq
1389;
1390; BITALG_NOVLX-LABEL: ugt_6_v16i8:
1391; BITALG_NOVLX:       # %bb.0:
1392; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1393; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
1394; BITALG_NOVLX-NEXT:    vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0
1395; BITALG_NOVLX-NEXT:    vzeroupper
1396; BITALG_NOVLX-NEXT:    retq
1397;
1398; BITALG-LABEL: ugt_6_v16i8:
1399; BITALG:       # %bb.0:
1400; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
1401; BITALG-NEXT:    vpcmpnleub {{.*}}(%rip), %xmm0, %k0
1402; BITALG-NEXT:    vpmovm2b %k0, %xmm0
1403; BITALG-NEXT:    retq
1404  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1405  %3 = icmp ugt <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
1406  %4 = sext <16 x i1> %3 to <16 x i8>
1407  ret <16 x i8> %4
1408}
1409
1410define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
1411; SSE2-LABEL: ult_7_v16i8:
1412; SSE2:       # %bb.0:
1413; SSE2-NEXT:    movdqa %xmm0, %xmm1
1414; SSE2-NEXT:    psrlw $1, %xmm1
1415; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1416; SSE2-NEXT:    psubb %xmm1, %xmm0
1417; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1418; SSE2-NEXT:    movdqa %xmm0, %xmm2
1419; SSE2-NEXT:    pand %xmm1, %xmm2
1420; SSE2-NEXT:    psrlw $2, %xmm0
1421; SSE2-NEXT:    pand %xmm1, %xmm0
1422; SSE2-NEXT:    paddb %xmm2, %xmm0
1423; SSE2-NEXT:    movdqa %xmm0, %xmm1
1424; SSE2-NEXT:    psrlw $4, %xmm1
1425; SSE2-NEXT:    paddb %xmm0, %xmm1
1426; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1427; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1428; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
1429; SSE2-NEXT:    retq
1430;
1431; SSE3-LABEL: ult_7_v16i8:
1432; SSE3:       # %bb.0:
1433; SSE3-NEXT:    movdqa %xmm0, %xmm1
1434; SSE3-NEXT:    psrlw $1, %xmm1
1435; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1436; SSE3-NEXT:    psubb %xmm1, %xmm0
1437; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1438; SSE3-NEXT:    movdqa %xmm0, %xmm2
1439; SSE3-NEXT:    pand %xmm1, %xmm2
1440; SSE3-NEXT:    psrlw $2, %xmm0
1441; SSE3-NEXT:    pand %xmm1, %xmm0
1442; SSE3-NEXT:    paddb %xmm2, %xmm0
1443; SSE3-NEXT:    movdqa %xmm0, %xmm1
1444; SSE3-NEXT:    psrlw $4, %xmm1
1445; SSE3-NEXT:    paddb %xmm0, %xmm1
1446; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1447; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1448; SSE3-NEXT:    pcmpgtb %xmm1, %xmm0
1449; SSE3-NEXT:    retq
1450;
1451; SSSE3-LABEL: ult_7_v16i8:
1452; SSSE3:       # %bb.0:
1453; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1454; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1455; SSSE3-NEXT:    pand %xmm1, %xmm2
1456; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1457; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1458; SSSE3-NEXT:    pshufb %xmm2, %xmm4
1459; SSSE3-NEXT:    psrlw $4, %xmm0
1460; SSSE3-NEXT:    pand %xmm1, %xmm0
1461; SSSE3-NEXT:    pshufb %xmm0, %xmm3
1462; SSSE3-NEXT:    paddb %xmm4, %xmm3
1463; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1464; SSSE3-NEXT:    pminub %xmm3, %xmm0
1465; SSSE3-NEXT:    pcmpeqb %xmm3, %xmm0
1466; SSSE3-NEXT:    retq
1467;
1468; SSE41-LABEL: ult_7_v16i8:
1469; SSE41:       # %bb.0:
1470; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1471; SSE41-NEXT:    movdqa %xmm0, %xmm2
1472; SSE41-NEXT:    pand %xmm1, %xmm2
1473; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1474; SSE41-NEXT:    movdqa %xmm3, %xmm4
1475; SSE41-NEXT:    pshufb %xmm2, %xmm4
1476; SSE41-NEXT:    psrlw $4, %xmm0
1477; SSE41-NEXT:    pand %xmm1, %xmm0
1478; SSE41-NEXT:    pshufb %xmm0, %xmm3
1479; SSE41-NEXT:    paddb %xmm4, %xmm3
1480; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1481; SSE41-NEXT:    pminub %xmm3, %xmm0
1482; SSE41-NEXT:    pcmpeqb %xmm3, %xmm0
1483; SSE41-NEXT:    retq
1484;
1485; AVX1-LABEL: ult_7_v16i8:
1486; AVX1:       # %bb.0:
1487; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1488; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
1489; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1490; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1491; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1492; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1493; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1494; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1495; AVX1-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
1496; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1497; AVX1-NEXT:    retq
1498;
1499; AVX2-LABEL: ult_7_v16i8:
1500; AVX2:       # %bb.0:
1501; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1502; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
1503; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1504; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1505; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
1506; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1507; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1508; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1509; AVX2-NEXT:    vpminub {{.*}}(%rip), %xmm0, %xmm1
1510; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1511; AVX2-NEXT:    retq
1512;
1513; AVX512VPOPCNTDQ-LABEL: ult_7_v16i8:
1514; AVX512VPOPCNTDQ:       # %bb.0:
1515; AVX512VPOPCNTDQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1516; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
1517; AVX512VPOPCNTDQ-NEXT:    vpmovdb %zmm0, %xmm0
1518; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1519; AVX512VPOPCNTDQ-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1520; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1521; AVX512VPOPCNTDQ-NEXT:    retq
1522;
1523; AVX512VPOPCNTDQVL-LABEL: ult_7_v16i8:
1524; AVX512VPOPCNTDQVL:       # %bb.0:
1525; AVX512VPOPCNTDQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1526; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %zmm0, %zmm0
1527; AVX512VPOPCNTDQVL-NEXT:    vpmovdb %zmm0, %xmm0
1528; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1529; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1530; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
1531; AVX512VPOPCNTDQVL-NEXT:    retq
1532;
1533; BITALG_NOVLX-LABEL: ult_7_v16i8:
1534; BITALG_NOVLX:       # %bb.0:
1535; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1536; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
1537; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1538; BITALG_NOVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
1539; BITALG_NOVLX-NEXT:    vzeroupper
1540; BITALG_NOVLX-NEXT:    retq
1541;
1542; BITALG-LABEL: ult_7_v16i8:
1543; BITALG:       # %bb.0:
1544; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
1545; BITALG-NEXT:    vpcmpltub {{.*}}(%rip), %xmm0, %k0
1546; BITALG-NEXT:    vpmovm2b %k0, %xmm0
1547; BITALG-NEXT:    retq
1548  %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1549  %3 = icmp ult <16 x i8> %2, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
1550  %4 = sext <16 x i1> %3 to <16 x i8>
1551  ret <16 x i8> %4
1552}
1553
1554define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
1555; SSE-LABEL: ugt_1_v8i16:
1556; SSE:       # %bb.0:
1557; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
1558; SSE-NEXT:    movdqa %xmm0, %xmm1
1559; SSE-NEXT:    paddw %xmm2, %xmm1
1560; SSE-NEXT:    pand %xmm0, %xmm1
1561; SSE-NEXT:    pxor %xmm0, %xmm0
1562; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
1563; SSE-NEXT:    pxor %xmm2, %xmm1
1564; SSE-NEXT:    movdqa %xmm1, %xmm0
1565; SSE-NEXT:    retq
1566;
1567; AVX1-LABEL: ugt_1_v8i16:
1568; AVX1:       # %bb.0:
1569; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1570; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm2
1571; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1572; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1573; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
1574; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1575; AVX1-NEXT:    retq
1576;
1577; AVX2-LABEL: ugt_1_v8i16:
1578; AVX2:       # %bb.0:
1579; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1580; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm2
1581; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
1582; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1583; AVX2-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
1584; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1585; AVX2-NEXT:    retq
1586;
1587; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i16:
1588; AVX512VPOPCNTDQ:       # %bb.0:
1589; AVX512VPOPCNTDQ-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1590; AVX512VPOPCNTDQ-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
1591; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
1592; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1593; AVX512VPOPCNTDQ-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1594; AVX512VPOPCNTDQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
1595; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1596; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1597; AVX512VPOPCNTDQ-NEXT:    retq
1598;
1599; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i16:
1600; AVX512VPOPCNTDQVL:       # %bb.0:
1601; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1602; AVX512VPOPCNTDQVL-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
1603; AVX512VPOPCNTDQVL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1604; AVX512VPOPCNTDQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1605; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1606; AVX512VPOPCNTDQVL-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
1607; AVX512VPOPCNTDQVL-NEXT:    retq
1608;
1609; BITALG_NOVLX-LABEL: ugt_1_v8i16:
1610; BITALG_NOVLX:       # %bb.0:
1611; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1612; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
1613; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
1614; BITALG_NOVLX-NEXT:    vzeroupper
1615; BITALG_NOVLX-NEXT:    retq
1616;
1617; BITALG-LABEL: ugt_1_v8i16:
1618; BITALG:       # %bb.0:
1619; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
1620; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
1621; BITALG-NEXT:    vpmovm2w %k0, %xmm0
1622; BITALG-NEXT:    retq
1623  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1624  %3 = icmp ugt <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1625  %4 = sext <8 x i1> %3 to <8 x i16>
1626  ret <8 x i16> %4
1627}
1628
1629define <8 x i16> @ult_2_v8i16(<8 x i16> %0) {
1630; SSE-LABEL: ult_2_v8i16:
1631; SSE:       # %bb.0:
1632; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
1633; SSE-NEXT:    paddw %xmm0, %xmm1
1634; SSE-NEXT:    pand %xmm1, %xmm0
1635; SSE-NEXT:    pxor %xmm1, %xmm1
1636; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
1637; SSE-NEXT:    retq
1638;
1639; AVX-LABEL: ult_2_v8i16:
1640; AVX:       # %bb.0:
1641; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1642; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
1643; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
1644; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1645; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1646; AVX-NEXT:    retq
1647;
1648; BITALG_NOVLX-LABEL: ult_2_v8i16:
1649; BITALG_NOVLX:       # %bb.0:
1650; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1651; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
1652; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
1653; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1654; BITALG_NOVLX-NEXT:    vzeroupper
1655; BITALG_NOVLX-NEXT:    retq
1656;
1657; BITALG-LABEL: ult_2_v8i16:
1658; BITALG:       # %bb.0:
1659; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
1660; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
1661; BITALG-NEXT:    vpmovm2w %k0, %xmm0
1662; BITALG-NEXT:    retq
1663  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1664  %3 = icmp ult <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
1665  %4 = sext <8 x i1> %3 to <8 x i16>
1666  ret <8 x i16> %4
1667}
1668
1669define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) {
1670; SSE2-LABEL: ugt_2_v8i16:
1671; SSE2:       # %bb.0:
1672; SSE2-NEXT:    movdqa %xmm0, %xmm1
1673; SSE2-NEXT:    psrlw $1, %xmm1
1674; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1675; SSE2-NEXT:    psubb %xmm1, %xmm0
1676; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1677; SSE2-NEXT:    movdqa %xmm0, %xmm2
1678; SSE2-NEXT:    pand %xmm1, %xmm2
1679; SSE2-NEXT:    psrlw $2, %xmm0
1680; SSE2-NEXT:    pand %xmm1, %xmm0
1681; SSE2-NEXT:    paddb %xmm2, %xmm0
1682; SSE2-NEXT:    movdqa %xmm0, %xmm1
1683; SSE2-NEXT:    psrlw $4, %xmm1
1684; SSE2-NEXT:    paddb %xmm0, %xmm1
1685; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1686; SSE2-NEXT:    movdqa %xmm1, %xmm0
1687; SSE2-NEXT:    psllw $8, %xmm0
1688; SSE2-NEXT:    paddb %xmm1, %xmm0
1689; SSE2-NEXT:    psrlw $8, %xmm0
1690; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
1691; SSE2-NEXT:    retq
1692;
1693; SSE3-LABEL: ugt_2_v8i16:
1694; SSE3:       # %bb.0:
1695; SSE3-NEXT:    movdqa %xmm0, %xmm1
1696; SSE3-NEXT:    psrlw $1, %xmm1
1697; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1698; SSE3-NEXT:    psubb %xmm1, %xmm0
1699; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1700; SSE3-NEXT:    movdqa %xmm0, %xmm2
1701; SSE3-NEXT:    pand %xmm1, %xmm2
1702; SSE3-NEXT:    psrlw $2, %xmm0
1703; SSE3-NEXT:    pand %xmm1, %xmm0
1704; SSE3-NEXT:    paddb %xmm2, %xmm0
1705; SSE3-NEXT:    movdqa %xmm0, %xmm1
1706; SSE3-NEXT:    psrlw $4, %xmm1
1707; SSE3-NEXT:    paddb %xmm0, %xmm1
1708; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1709; SSE3-NEXT:    movdqa %xmm1, %xmm0
1710; SSE3-NEXT:    psllw $8, %xmm0
1711; SSE3-NEXT:    paddb %xmm1, %xmm0
1712; SSE3-NEXT:    psrlw $8, %xmm0
1713; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
1714; SSE3-NEXT:    retq
1715;
1716; SSSE3-LABEL: ugt_2_v8i16:
1717; SSSE3:       # %bb.0:
1718; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1719; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1720; SSSE3-NEXT:    pand %xmm1, %xmm2
1721; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1722; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1723; SSSE3-NEXT:    pshufb %xmm2, %xmm4
1724; SSSE3-NEXT:    psrlw $4, %xmm0
1725; SSSE3-NEXT:    pand %xmm1, %xmm0
1726; SSSE3-NEXT:    pshufb %xmm0, %xmm3
1727; SSSE3-NEXT:    paddb %xmm4, %xmm3
1728; SSSE3-NEXT:    movdqa %xmm3, %xmm0
1729; SSSE3-NEXT:    psllw $8, %xmm0
1730; SSSE3-NEXT:    paddb %xmm3, %xmm0
1731; SSSE3-NEXT:    psrlw $8, %xmm0
1732; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
1733; SSSE3-NEXT:    retq
1734;
1735; SSE41-LABEL: ugt_2_v8i16:
1736; SSE41:       # %bb.0:
1737; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1738; SSE41-NEXT:    movdqa %xmm0, %xmm2
1739; SSE41-NEXT:    pand %xmm1, %xmm2
1740; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1741; SSE41-NEXT:    movdqa %xmm3, %xmm4
1742; SSE41-NEXT:    pshufb %xmm2, %xmm4
1743; SSE41-NEXT:    psrlw $4, %xmm0
1744; SSE41-NEXT:    pand %xmm1, %xmm0
1745; SSE41-NEXT:    pshufb %xmm0, %xmm3
1746; SSE41-NEXT:    paddb %xmm4, %xmm3
1747; SSE41-NEXT:    movdqa %xmm3, %xmm0
1748; SSE41-NEXT:    psllw $8, %xmm0
1749; SSE41-NEXT:    paddb %xmm3, %xmm0
1750; SSE41-NEXT:    psrlw $8, %xmm0
1751; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
1752; SSE41-NEXT:    retq
1753;
1754; AVX1-LABEL: ugt_2_v8i16:
1755; AVX1:       # %bb.0:
1756; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1757; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
1758; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1759; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1760; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1761; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1762; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1763; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1764; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
1765; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
1766; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
1767; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
1768; AVX1-NEXT:    retq
1769;
1770; AVX2-LABEL: ugt_2_v8i16:
1771; AVX2:       # %bb.0:
1772; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1773; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
1774; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1775; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1776; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
1777; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1778; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1779; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1780; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
1781; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
1782; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
1783; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
1784; AVX2-NEXT:    retq
1785;
1786; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i16:
1787; AVX512VPOPCNTDQ:       # %bb.0:
1788; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1789; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
1790; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
1791; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
1792; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1793; AVX512VPOPCNTDQ-NEXT:    retq
1794;
1795; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i16:
1796; AVX512VPOPCNTDQVL:       # %bb.0:
1797; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1798; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
1799; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
1800; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
1801; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
1802; AVX512VPOPCNTDQVL-NEXT:    retq
1803;
1804; BITALG_NOVLX-LABEL: ugt_2_v8i16:
1805; BITALG_NOVLX:       # %bb.0:
1806; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1807; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
1808; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
1809; BITALG_NOVLX-NEXT:    vzeroupper
1810; BITALG_NOVLX-NEXT:    retq
1811;
1812; BITALG-LABEL: ugt_2_v8i16:
1813; BITALG:       # %bb.0:
1814; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
1815; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
1816; BITALG-NEXT:    vpmovm2w %k0, %xmm0
1817; BITALG-NEXT:    retq
1818  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1819  %3 = icmp ugt <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
1820  %4 = sext <8 x i1> %3 to <8 x i16>
1821  ret <8 x i16> %4
1822}
1823
1824define <8 x i16> @ult_3_v8i16(<8 x i16> %0) {
1825; SSE2-LABEL: ult_3_v8i16:
1826; SSE2:       # %bb.0:
1827; SSE2-NEXT:    movdqa %xmm0, %xmm1
1828; SSE2-NEXT:    psrlw $1, %xmm1
1829; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1830; SSE2-NEXT:    psubb %xmm1, %xmm0
1831; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1832; SSE2-NEXT:    movdqa %xmm0, %xmm2
1833; SSE2-NEXT:    pand %xmm1, %xmm2
1834; SSE2-NEXT:    psrlw $2, %xmm0
1835; SSE2-NEXT:    pand %xmm1, %xmm0
1836; SSE2-NEXT:    paddb %xmm2, %xmm0
1837; SSE2-NEXT:    movdqa %xmm0, %xmm1
1838; SSE2-NEXT:    psrlw $4, %xmm1
1839; SSE2-NEXT:    paddb %xmm0, %xmm1
1840; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1841; SSE2-NEXT:    movdqa %xmm1, %xmm2
1842; SSE2-NEXT:    psllw $8, %xmm2
1843; SSE2-NEXT:    paddb %xmm1, %xmm2
1844; SSE2-NEXT:    psrlw $8, %xmm2
1845; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1846; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
1847; SSE2-NEXT:    retq
1848;
1849; SSE3-LABEL: ult_3_v8i16:
1850; SSE3:       # %bb.0:
1851; SSE3-NEXT:    movdqa %xmm0, %xmm1
1852; SSE3-NEXT:    psrlw $1, %xmm1
1853; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1854; SSE3-NEXT:    psubb %xmm1, %xmm0
1855; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1856; SSE3-NEXT:    movdqa %xmm0, %xmm2
1857; SSE3-NEXT:    pand %xmm1, %xmm2
1858; SSE3-NEXT:    psrlw $2, %xmm0
1859; SSE3-NEXT:    pand %xmm1, %xmm0
1860; SSE3-NEXT:    paddb %xmm2, %xmm0
1861; SSE3-NEXT:    movdqa %xmm0, %xmm1
1862; SSE3-NEXT:    psrlw $4, %xmm1
1863; SSE3-NEXT:    paddb %xmm0, %xmm1
1864; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1865; SSE3-NEXT:    movdqa %xmm1, %xmm2
1866; SSE3-NEXT:    psllw $8, %xmm2
1867; SSE3-NEXT:    paddb %xmm1, %xmm2
1868; SSE3-NEXT:    psrlw $8, %xmm2
1869; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1870; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
1871; SSE3-NEXT:    retq
1872;
1873; SSSE3-LABEL: ult_3_v8i16:
1874; SSSE3:       # %bb.0:
1875; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1876; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1877; SSSE3-NEXT:    pand %xmm1, %xmm2
1878; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1879; SSSE3-NEXT:    movdqa %xmm3, %xmm4
1880; SSSE3-NEXT:    pshufb %xmm2, %xmm4
1881; SSSE3-NEXT:    psrlw $4, %xmm0
1882; SSSE3-NEXT:    pand %xmm1, %xmm0
1883; SSSE3-NEXT:    pshufb %xmm0, %xmm3
1884; SSSE3-NEXT:    paddb %xmm4, %xmm3
1885; SSSE3-NEXT:    movdqa %xmm3, %xmm1
1886; SSSE3-NEXT:    psllw $8, %xmm1
1887; SSSE3-NEXT:    paddb %xmm3, %xmm1
1888; SSSE3-NEXT:    psrlw $8, %xmm1
1889; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1890; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
1891; SSSE3-NEXT:    retq
1892;
1893; SSE41-LABEL: ult_3_v8i16:
1894; SSE41:       # %bb.0:
1895; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1896; SSE41-NEXT:    movdqa %xmm0, %xmm2
1897; SSE41-NEXT:    pand %xmm1, %xmm2
1898; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1899; SSE41-NEXT:    movdqa %xmm3, %xmm4
1900; SSE41-NEXT:    pshufb %xmm2, %xmm4
1901; SSE41-NEXT:    psrlw $4, %xmm0
1902; SSE41-NEXT:    pand %xmm1, %xmm0
1903; SSE41-NEXT:    pshufb %xmm0, %xmm3
1904; SSE41-NEXT:    paddb %xmm4, %xmm3
1905; SSE41-NEXT:    movdqa %xmm3, %xmm1
1906; SSE41-NEXT:    psllw $8, %xmm1
1907; SSE41-NEXT:    paddb %xmm3, %xmm1
1908; SSE41-NEXT:    psrlw $8, %xmm1
1909; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1910; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
1911; SSE41-NEXT:    retq
1912;
1913; AVX1-LABEL: ult_3_v8i16:
1914; AVX1:       # %bb.0:
1915; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1916; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
1917; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1918; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1919; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1920; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1921; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1922; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1923; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
1924; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
1925; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
1926; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1927; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1928; AVX1-NEXT:    retq
1929;
1930; AVX2-LABEL: ult_3_v8i16:
1931; AVX2:       # %bb.0:
1932; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1933; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
1934; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1935; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
1936; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
1937; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1938; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
1939; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1940; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
1941; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
1942; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
1943; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1944; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1945; AVX2-NEXT:    retq
1946;
1947; AVX512VPOPCNTDQ-LABEL: ult_3_v8i16:
1948; AVX512VPOPCNTDQ:       # %bb.0:
1949; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1950; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
1951; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
1952; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1953; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1954; AVX512VPOPCNTDQ-NEXT:    vzeroupper
1955; AVX512VPOPCNTDQ-NEXT:    retq
1956;
1957; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i16:
1958; AVX512VPOPCNTDQVL:       # %bb.0:
1959; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1960; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
1961; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
1962; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1963; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1964; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
1965; AVX512VPOPCNTDQVL-NEXT:    retq
1966;
1967; BITALG_NOVLX-LABEL: ult_3_v8i16:
1968; BITALG_NOVLX:       # %bb.0:
1969; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1970; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
1971; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1972; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1973; BITALG_NOVLX-NEXT:    vzeroupper
1974; BITALG_NOVLX-NEXT:    retq
1975;
1976; BITALG-LABEL: ult_3_v8i16:
1977; BITALG:       # %bb.0:
1978; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
1979; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
1980; BITALG-NEXT:    vpmovm2w %k0, %xmm0
1981; BITALG-NEXT:    retq
1982  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1983  %3 = icmp ult <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1984  %4 = sext <8 x i1> %3 to <8 x i16>
1985  ret <8 x i16> %4
1986}
1987
1988define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) {
1989; SSE2-LABEL: ugt_3_v8i16:
1990; SSE2:       # %bb.0:
1991; SSE2-NEXT:    movdqa %xmm0, %xmm1
1992; SSE2-NEXT:    psrlw $1, %xmm1
1993; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1994; SSE2-NEXT:    psubb %xmm1, %xmm0
1995; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1996; SSE2-NEXT:    movdqa %xmm0, %xmm2
1997; SSE2-NEXT:    pand %xmm1, %xmm2
1998; SSE2-NEXT:    psrlw $2, %xmm0
1999; SSE2-NEXT:    pand %xmm1, %xmm0
2000; SSE2-NEXT:    paddb %xmm2, %xmm0
2001; SSE2-NEXT:    movdqa %xmm0, %xmm1
2002; SSE2-NEXT:    psrlw $4, %xmm1
2003; SSE2-NEXT:    paddb %xmm0, %xmm1
2004; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2005; SSE2-NEXT:    movdqa %xmm1, %xmm0
2006; SSE2-NEXT:    psllw $8, %xmm0
2007; SSE2-NEXT:    paddb %xmm1, %xmm0
2008; SSE2-NEXT:    psrlw $8, %xmm0
2009; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2010; SSE2-NEXT:    retq
2011;
2012; SSE3-LABEL: ugt_3_v8i16:
2013; SSE3:       # %bb.0:
2014; SSE3-NEXT:    movdqa %xmm0, %xmm1
2015; SSE3-NEXT:    psrlw $1, %xmm1
2016; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2017; SSE3-NEXT:    psubb %xmm1, %xmm0
2018; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2019; SSE3-NEXT:    movdqa %xmm0, %xmm2
2020; SSE3-NEXT:    pand %xmm1, %xmm2
2021; SSE3-NEXT:    psrlw $2, %xmm0
2022; SSE3-NEXT:    pand %xmm1, %xmm0
2023; SSE3-NEXT:    paddb %xmm2, %xmm0
2024; SSE3-NEXT:    movdqa %xmm0, %xmm1
2025; SSE3-NEXT:    psrlw $4, %xmm1
2026; SSE3-NEXT:    paddb %xmm0, %xmm1
2027; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2028; SSE3-NEXT:    movdqa %xmm1, %xmm0
2029; SSE3-NEXT:    psllw $8, %xmm0
2030; SSE3-NEXT:    paddb %xmm1, %xmm0
2031; SSE3-NEXT:    psrlw $8, %xmm0
2032; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2033; SSE3-NEXT:    retq
2034;
2035; SSSE3-LABEL: ugt_3_v8i16:
2036; SSSE3:       # %bb.0:
2037; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2038; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2039; SSSE3-NEXT:    pand %xmm1, %xmm2
2040; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2041; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2042; SSSE3-NEXT:    pshufb %xmm2, %xmm4
2043; SSSE3-NEXT:    psrlw $4, %xmm0
2044; SSSE3-NEXT:    pand %xmm1, %xmm0
2045; SSSE3-NEXT:    pshufb %xmm0, %xmm3
2046; SSSE3-NEXT:    paddb %xmm4, %xmm3
2047; SSSE3-NEXT:    movdqa %xmm3, %xmm0
2048; SSSE3-NEXT:    psllw $8, %xmm0
2049; SSSE3-NEXT:    paddb %xmm3, %xmm0
2050; SSSE3-NEXT:    psrlw $8, %xmm0
2051; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2052; SSSE3-NEXT:    retq
2053;
2054; SSE41-LABEL: ugt_3_v8i16:
2055; SSE41:       # %bb.0:
2056; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2057; SSE41-NEXT:    movdqa %xmm0, %xmm2
2058; SSE41-NEXT:    pand %xmm1, %xmm2
2059; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2060; SSE41-NEXT:    movdqa %xmm3, %xmm4
2061; SSE41-NEXT:    pshufb %xmm2, %xmm4
2062; SSE41-NEXT:    psrlw $4, %xmm0
2063; SSE41-NEXT:    pand %xmm1, %xmm0
2064; SSE41-NEXT:    pshufb %xmm0, %xmm3
2065; SSE41-NEXT:    paddb %xmm4, %xmm3
2066; SSE41-NEXT:    movdqa %xmm3, %xmm0
2067; SSE41-NEXT:    psllw $8, %xmm0
2068; SSE41-NEXT:    paddb %xmm3, %xmm0
2069; SSE41-NEXT:    psrlw $8, %xmm0
2070; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2071; SSE41-NEXT:    retq
2072;
2073; AVX1-LABEL: ugt_3_v8i16:
2074; AVX1:       # %bb.0:
2075; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2076; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
2077; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2078; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2079; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
2080; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
2081; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2082; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2083; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
2084; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2085; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
2086; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2087; AVX1-NEXT:    retq
2088;
2089; AVX2-LABEL: ugt_3_v8i16:
2090; AVX2:       # %bb.0:
2091; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2092; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
2093; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2094; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2095; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2096; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2097; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2098; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2099; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
2100; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2101; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
2102; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2103; AVX2-NEXT:    retq
2104;
2105; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i16:
2106; AVX512VPOPCNTDQ:       # %bb.0:
2107; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2108; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
2109; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
2110; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2111; AVX512VPOPCNTDQ-NEXT:    vzeroupper
2112; AVX512VPOPCNTDQ-NEXT:    retq
2113;
2114; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i16:
2115; AVX512VPOPCNTDQVL:       # %bb.0:
2116; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2117; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
2118; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
2119; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2120; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
2121; AVX512VPOPCNTDQVL-NEXT:    retq
2122;
2123; BITALG_NOVLX-LABEL: ugt_3_v8i16:
2124; BITALG_NOVLX:       # %bb.0:
2125; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2126; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
2127; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2128; BITALG_NOVLX-NEXT:    vzeroupper
2129; BITALG_NOVLX-NEXT:    retq
2130;
2131; BITALG-LABEL: ugt_3_v8i16:
2132; BITALG:       # %bb.0:
2133; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
2134; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
2135; BITALG-NEXT:    vpmovm2w %k0, %xmm0
2136; BITALG-NEXT:    retq
2137  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2138  %3 = icmp ugt <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
2139  %4 = sext <8 x i1> %3 to <8 x i16>
2140  ret <8 x i16> %4
2141}
2142
2143define <8 x i16> @ult_4_v8i16(<8 x i16> %0) {
2144; SSE2-LABEL: ult_4_v8i16:
2145; SSE2:       # %bb.0:
2146; SSE2-NEXT:    movdqa %xmm0, %xmm1
2147; SSE2-NEXT:    psrlw $1, %xmm1
2148; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2149; SSE2-NEXT:    psubb %xmm1, %xmm0
2150; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2151; SSE2-NEXT:    movdqa %xmm0, %xmm2
2152; SSE2-NEXT:    pand %xmm1, %xmm2
2153; SSE2-NEXT:    psrlw $2, %xmm0
2154; SSE2-NEXT:    pand %xmm1, %xmm0
2155; SSE2-NEXT:    paddb %xmm2, %xmm0
2156; SSE2-NEXT:    movdqa %xmm0, %xmm1
2157; SSE2-NEXT:    psrlw $4, %xmm1
2158; SSE2-NEXT:    paddb %xmm0, %xmm1
2159; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2160; SSE2-NEXT:    movdqa %xmm1, %xmm2
2161; SSE2-NEXT:    psllw $8, %xmm2
2162; SSE2-NEXT:    paddb %xmm1, %xmm2
2163; SSE2-NEXT:    psrlw $8, %xmm2
2164; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2165; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
2166; SSE2-NEXT:    retq
2167;
2168; SSE3-LABEL: ult_4_v8i16:
2169; SSE3:       # %bb.0:
2170; SSE3-NEXT:    movdqa %xmm0, %xmm1
2171; SSE3-NEXT:    psrlw $1, %xmm1
2172; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2173; SSE3-NEXT:    psubb %xmm1, %xmm0
2174; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2175; SSE3-NEXT:    movdqa %xmm0, %xmm2
2176; SSE3-NEXT:    pand %xmm1, %xmm2
2177; SSE3-NEXT:    psrlw $2, %xmm0
2178; SSE3-NEXT:    pand %xmm1, %xmm0
2179; SSE3-NEXT:    paddb %xmm2, %xmm0
2180; SSE3-NEXT:    movdqa %xmm0, %xmm1
2181; SSE3-NEXT:    psrlw $4, %xmm1
2182; SSE3-NEXT:    paddb %xmm0, %xmm1
2183; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2184; SSE3-NEXT:    movdqa %xmm1, %xmm2
2185; SSE3-NEXT:    psllw $8, %xmm2
2186; SSE3-NEXT:    paddb %xmm1, %xmm2
2187; SSE3-NEXT:    psrlw $8, %xmm2
2188; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2189; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
2190; SSE3-NEXT:    retq
2191;
2192; SSSE3-LABEL: ult_4_v8i16:
2193; SSSE3:       # %bb.0:
2194; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2195; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2196; SSSE3-NEXT:    pand %xmm1, %xmm2
2197; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2198; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2199; SSSE3-NEXT:    pshufb %xmm2, %xmm4
2200; SSSE3-NEXT:    psrlw $4, %xmm0
2201; SSSE3-NEXT:    pand %xmm1, %xmm0
2202; SSSE3-NEXT:    pshufb %xmm0, %xmm3
2203; SSSE3-NEXT:    paddb %xmm4, %xmm3
2204; SSSE3-NEXT:    movdqa %xmm3, %xmm1
2205; SSSE3-NEXT:    psllw $8, %xmm1
2206; SSSE3-NEXT:    paddb %xmm3, %xmm1
2207; SSSE3-NEXT:    psrlw $8, %xmm1
2208; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2209; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
2210; SSSE3-NEXT:    retq
2211;
2212; SSE41-LABEL: ult_4_v8i16:
2213; SSE41:       # %bb.0:
2214; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2215; SSE41-NEXT:    movdqa %xmm0, %xmm2
2216; SSE41-NEXT:    pand %xmm1, %xmm2
2217; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2218; SSE41-NEXT:    movdqa %xmm3, %xmm4
2219; SSE41-NEXT:    pshufb %xmm2, %xmm4
2220; SSE41-NEXT:    psrlw $4, %xmm0
2221; SSE41-NEXT:    pand %xmm1, %xmm0
2222; SSE41-NEXT:    pshufb %xmm0, %xmm3
2223; SSE41-NEXT:    paddb %xmm4, %xmm3
2224; SSE41-NEXT:    movdqa %xmm3, %xmm1
2225; SSE41-NEXT:    psllw $8, %xmm1
2226; SSE41-NEXT:    paddb %xmm3, %xmm1
2227; SSE41-NEXT:    psrlw $8, %xmm1
2228; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2229; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
2230; SSE41-NEXT:    retq
2231;
2232; AVX1-LABEL: ult_4_v8i16:
2233; AVX1:       # %bb.0:
2234; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2235; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
2236; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2237; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2238; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
2239; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
2240; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2241; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2242; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
2243; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2244; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
2245; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2246; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2247; AVX1-NEXT:    retq
2248;
2249; AVX2-LABEL: ult_4_v8i16:
2250; AVX2:       # %bb.0:
2251; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2252; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
2253; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2254; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2255; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2256; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2257; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2258; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2259; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
2260; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2261; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
2262; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2263; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2264; AVX2-NEXT:    retq
2265;
2266; AVX512VPOPCNTDQ-LABEL: ult_4_v8i16:
2267; AVX512VPOPCNTDQ:       # %bb.0:
2268; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2269; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
2270; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
2271; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2272; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2273; AVX512VPOPCNTDQ-NEXT:    vzeroupper
2274; AVX512VPOPCNTDQ-NEXT:    retq
2275;
2276; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i16:
2277; AVX512VPOPCNTDQVL:       # %bb.0:
2278; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2279; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
2280; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
2281; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2282; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2283; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
2284; AVX512VPOPCNTDQVL-NEXT:    retq
2285;
2286; BITALG_NOVLX-LABEL: ult_4_v8i16:
2287; BITALG_NOVLX:       # %bb.0:
2288; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2289; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
2290; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2291; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2292; BITALG_NOVLX-NEXT:    vzeroupper
2293; BITALG_NOVLX-NEXT:    retq
2294;
2295; BITALG-LABEL: ult_4_v8i16:
2296; BITALG:       # %bb.0:
2297; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
2298; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
2299; BITALG-NEXT:    vpmovm2w %k0, %xmm0
2300; BITALG-NEXT:    retq
2301  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2302  %3 = icmp ult <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2303  %4 = sext <8 x i1> %3 to <8 x i16>
2304  ret <8 x i16> %4
2305}
2306
2307define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) {
2308; SSE2-LABEL: ugt_4_v8i16:
2309; SSE2:       # %bb.0:
2310; SSE2-NEXT:    movdqa %xmm0, %xmm1
2311; SSE2-NEXT:    psrlw $1, %xmm1
2312; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2313; SSE2-NEXT:    psubb %xmm1, %xmm0
2314; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2315; SSE2-NEXT:    movdqa %xmm0, %xmm2
2316; SSE2-NEXT:    pand %xmm1, %xmm2
2317; SSE2-NEXT:    psrlw $2, %xmm0
2318; SSE2-NEXT:    pand %xmm1, %xmm0
2319; SSE2-NEXT:    paddb %xmm2, %xmm0
2320; SSE2-NEXT:    movdqa %xmm0, %xmm1
2321; SSE2-NEXT:    psrlw $4, %xmm1
2322; SSE2-NEXT:    paddb %xmm0, %xmm1
2323; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2324; SSE2-NEXT:    movdqa %xmm1, %xmm0
2325; SSE2-NEXT:    psllw $8, %xmm0
2326; SSE2-NEXT:    paddb %xmm1, %xmm0
2327; SSE2-NEXT:    psrlw $8, %xmm0
2328; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2329; SSE2-NEXT:    retq
2330;
2331; SSE3-LABEL: ugt_4_v8i16:
2332; SSE3:       # %bb.0:
2333; SSE3-NEXT:    movdqa %xmm0, %xmm1
2334; SSE3-NEXT:    psrlw $1, %xmm1
2335; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2336; SSE3-NEXT:    psubb %xmm1, %xmm0
2337; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2338; SSE3-NEXT:    movdqa %xmm0, %xmm2
2339; SSE3-NEXT:    pand %xmm1, %xmm2
2340; SSE3-NEXT:    psrlw $2, %xmm0
2341; SSE3-NEXT:    pand %xmm1, %xmm0
2342; SSE3-NEXT:    paddb %xmm2, %xmm0
2343; SSE3-NEXT:    movdqa %xmm0, %xmm1
2344; SSE3-NEXT:    psrlw $4, %xmm1
2345; SSE3-NEXT:    paddb %xmm0, %xmm1
2346; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2347; SSE3-NEXT:    movdqa %xmm1, %xmm0
2348; SSE3-NEXT:    psllw $8, %xmm0
2349; SSE3-NEXT:    paddb %xmm1, %xmm0
2350; SSE3-NEXT:    psrlw $8, %xmm0
2351; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2352; SSE3-NEXT:    retq
2353;
2354; SSSE3-LABEL: ugt_4_v8i16:
2355; SSSE3:       # %bb.0:
2356; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2357; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2358; SSSE3-NEXT:    pand %xmm1, %xmm2
2359; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2360; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2361; SSSE3-NEXT:    pshufb %xmm2, %xmm4
2362; SSSE3-NEXT:    psrlw $4, %xmm0
2363; SSSE3-NEXT:    pand %xmm1, %xmm0
2364; SSSE3-NEXT:    pshufb %xmm0, %xmm3
2365; SSSE3-NEXT:    paddb %xmm4, %xmm3
2366; SSSE3-NEXT:    movdqa %xmm3, %xmm0
2367; SSSE3-NEXT:    psllw $8, %xmm0
2368; SSSE3-NEXT:    paddb %xmm3, %xmm0
2369; SSSE3-NEXT:    psrlw $8, %xmm0
2370; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2371; SSSE3-NEXT:    retq
2372;
2373; SSE41-LABEL: ugt_4_v8i16:
2374; SSE41:       # %bb.0:
2375; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2376; SSE41-NEXT:    movdqa %xmm0, %xmm2
2377; SSE41-NEXT:    pand %xmm1, %xmm2
2378; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2379; SSE41-NEXT:    movdqa %xmm3, %xmm4
2380; SSE41-NEXT:    pshufb %xmm2, %xmm4
2381; SSE41-NEXT:    psrlw $4, %xmm0
2382; SSE41-NEXT:    pand %xmm1, %xmm0
2383; SSE41-NEXT:    pshufb %xmm0, %xmm3
2384; SSE41-NEXT:    paddb %xmm4, %xmm3
2385; SSE41-NEXT:    movdqa %xmm3, %xmm0
2386; SSE41-NEXT:    psllw $8, %xmm0
2387; SSE41-NEXT:    paddb %xmm3, %xmm0
2388; SSE41-NEXT:    psrlw $8, %xmm0
2389; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2390; SSE41-NEXT:    retq
2391;
2392; AVX1-LABEL: ugt_4_v8i16:
2393; AVX1:       # %bb.0:
2394; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2395; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
2396; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2397; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2398; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
2399; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
2400; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2401; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2402; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
2403; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2404; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
2405; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2406; AVX1-NEXT:    retq
2407;
2408; AVX2-LABEL: ugt_4_v8i16:
2409; AVX2:       # %bb.0:
2410; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2411; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
2412; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2413; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2414; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2415; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2416; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2417; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2418; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
2419; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2420; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
2421; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2422; AVX2-NEXT:    retq
2423;
2424; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i16:
2425; AVX512VPOPCNTDQ:       # %bb.0:
2426; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2427; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
2428; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
2429; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2430; AVX512VPOPCNTDQ-NEXT:    vzeroupper
2431; AVX512VPOPCNTDQ-NEXT:    retq
2432;
2433; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i16:
2434; AVX512VPOPCNTDQVL:       # %bb.0:
2435; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2436; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
2437; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
2438; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2439; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
2440; AVX512VPOPCNTDQVL-NEXT:    retq
2441;
2442; BITALG_NOVLX-LABEL: ugt_4_v8i16:
2443; BITALG_NOVLX:       # %bb.0:
2444; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2445; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
2446; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2447; BITALG_NOVLX-NEXT:    vzeroupper
2448; BITALG_NOVLX-NEXT:    retq
2449;
2450; BITALG-LABEL: ugt_4_v8i16:
2451; BITALG:       # %bb.0:
2452; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
2453; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
2454; BITALG-NEXT:    vpmovm2w %k0, %xmm0
2455; BITALG-NEXT:    retq
2456  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2457  %3 = icmp ugt <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2458  %4 = sext <8 x i1> %3 to <8 x i16>
2459  ret <8 x i16> %4
2460}
2461
2462define <8 x i16> @ult_5_v8i16(<8 x i16> %0) {
2463; SSE2-LABEL: ult_5_v8i16:
2464; SSE2:       # %bb.0:
2465; SSE2-NEXT:    movdqa %xmm0, %xmm1
2466; SSE2-NEXT:    psrlw $1, %xmm1
2467; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2468; SSE2-NEXT:    psubb %xmm1, %xmm0
2469; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2470; SSE2-NEXT:    movdqa %xmm0, %xmm2
2471; SSE2-NEXT:    pand %xmm1, %xmm2
2472; SSE2-NEXT:    psrlw $2, %xmm0
2473; SSE2-NEXT:    pand %xmm1, %xmm0
2474; SSE2-NEXT:    paddb %xmm2, %xmm0
2475; SSE2-NEXT:    movdqa %xmm0, %xmm1
2476; SSE2-NEXT:    psrlw $4, %xmm1
2477; SSE2-NEXT:    paddb %xmm0, %xmm1
2478; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2479; SSE2-NEXT:    movdqa %xmm1, %xmm2
2480; SSE2-NEXT:    psllw $8, %xmm2
2481; SSE2-NEXT:    paddb %xmm1, %xmm2
2482; SSE2-NEXT:    psrlw $8, %xmm2
2483; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2484; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
2485; SSE2-NEXT:    retq
2486;
2487; SSE3-LABEL: ult_5_v8i16:
2488; SSE3:       # %bb.0:
2489; SSE3-NEXT:    movdqa %xmm0, %xmm1
2490; SSE3-NEXT:    psrlw $1, %xmm1
2491; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2492; SSE3-NEXT:    psubb %xmm1, %xmm0
2493; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2494; SSE3-NEXT:    movdqa %xmm0, %xmm2
2495; SSE3-NEXT:    pand %xmm1, %xmm2
2496; SSE3-NEXT:    psrlw $2, %xmm0
2497; SSE3-NEXT:    pand %xmm1, %xmm0
2498; SSE3-NEXT:    paddb %xmm2, %xmm0
2499; SSE3-NEXT:    movdqa %xmm0, %xmm1
2500; SSE3-NEXT:    psrlw $4, %xmm1
2501; SSE3-NEXT:    paddb %xmm0, %xmm1
2502; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2503; SSE3-NEXT:    movdqa %xmm1, %xmm2
2504; SSE3-NEXT:    psllw $8, %xmm2
2505; SSE3-NEXT:    paddb %xmm1, %xmm2
2506; SSE3-NEXT:    psrlw $8, %xmm2
2507; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2508; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
2509; SSE3-NEXT:    retq
2510;
2511; SSSE3-LABEL: ult_5_v8i16:
2512; SSSE3:       # %bb.0:
2513; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2514; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2515; SSSE3-NEXT:    pand %xmm1, %xmm2
2516; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2517; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2518; SSSE3-NEXT:    pshufb %xmm2, %xmm4
2519; SSSE3-NEXT:    psrlw $4, %xmm0
2520; SSSE3-NEXT:    pand %xmm1, %xmm0
2521; SSSE3-NEXT:    pshufb %xmm0, %xmm3
2522; SSSE3-NEXT:    paddb %xmm4, %xmm3
2523; SSSE3-NEXT:    movdqa %xmm3, %xmm1
2524; SSSE3-NEXT:    psllw $8, %xmm1
2525; SSSE3-NEXT:    paddb %xmm3, %xmm1
2526; SSSE3-NEXT:    psrlw $8, %xmm1
2527; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2528; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
2529; SSSE3-NEXT:    retq
2530;
2531; SSE41-LABEL: ult_5_v8i16:
2532; SSE41:       # %bb.0:
2533; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2534; SSE41-NEXT:    movdqa %xmm0, %xmm2
2535; SSE41-NEXT:    pand %xmm1, %xmm2
2536; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2537; SSE41-NEXT:    movdqa %xmm3, %xmm4
2538; SSE41-NEXT:    pshufb %xmm2, %xmm4
2539; SSE41-NEXT:    psrlw $4, %xmm0
2540; SSE41-NEXT:    pand %xmm1, %xmm0
2541; SSE41-NEXT:    pshufb %xmm0, %xmm3
2542; SSE41-NEXT:    paddb %xmm4, %xmm3
2543; SSE41-NEXT:    movdqa %xmm3, %xmm1
2544; SSE41-NEXT:    psllw $8, %xmm1
2545; SSE41-NEXT:    paddb %xmm3, %xmm1
2546; SSE41-NEXT:    psrlw $8, %xmm1
2547; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2548; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
2549; SSE41-NEXT:    retq
2550;
2551; AVX1-LABEL: ult_5_v8i16:
2552; AVX1:       # %bb.0:
2553; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2554; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
2555; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2556; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2557; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
2558; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
2559; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2560; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2561; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
2562; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2563; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
2564; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2565; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2566; AVX1-NEXT:    retq
2567;
2568; AVX2-LABEL: ult_5_v8i16:
2569; AVX2:       # %bb.0:
2570; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2571; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
2572; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2573; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2574; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2575; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2576; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2577; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2578; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
2579; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2580; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
2581; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2582; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2583; AVX2-NEXT:    retq
2584;
2585; AVX512VPOPCNTDQ-LABEL: ult_5_v8i16:
2586; AVX512VPOPCNTDQ:       # %bb.0:
2587; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2588; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
2589; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
2590; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2591; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2592; AVX512VPOPCNTDQ-NEXT:    vzeroupper
2593; AVX512VPOPCNTDQ-NEXT:    retq
2594;
2595; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i16:
2596; AVX512VPOPCNTDQVL:       # %bb.0:
2597; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2598; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
2599; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
2600; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2601; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2602; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
2603; AVX512VPOPCNTDQVL-NEXT:    retq
2604;
2605; BITALG_NOVLX-LABEL: ult_5_v8i16:
2606; BITALG_NOVLX:       # %bb.0:
2607; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2608; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
2609; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2610; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2611; BITALG_NOVLX-NEXT:    vzeroupper
2612; BITALG_NOVLX-NEXT:    retq
2613;
2614; BITALG-LABEL: ult_5_v8i16:
2615; BITALG:       # %bb.0:
2616; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
2617; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
2618; BITALG-NEXT:    vpmovm2w %k0, %xmm0
2619; BITALG-NEXT:    retq
2620  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2621  %3 = icmp ult <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
2622  %4 = sext <8 x i1> %3 to <8 x i16>
2623  ret <8 x i16> %4
2624}
2625
2626define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) {
2627; SSE2-LABEL: ugt_5_v8i16:
2628; SSE2:       # %bb.0:
2629; SSE2-NEXT:    movdqa %xmm0, %xmm1
2630; SSE2-NEXT:    psrlw $1, %xmm1
2631; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2632; SSE2-NEXT:    psubb %xmm1, %xmm0
2633; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2634; SSE2-NEXT:    movdqa %xmm0, %xmm2
2635; SSE2-NEXT:    pand %xmm1, %xmm2
2636; SSE2-NEXT:    psrlw $2, %xmm0
2637; SSE2-NEXT:    pand %xmm1, %xmm0
2638; SSE2-NEXT:    paddb %xmm2, %xmm0
2639; SSE2-NEXT:    movdqa %xmm0, %xmm1
2640; SSE2-NEXT:    psrlw $4, %xmm1
2641; SSE2-NEXT:    paddb %xmm0, %xmm1
2642; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2643; SSE2-NEXT:    movdqa %xmm1, %xmm0
2644; SSE2-NEXT:    psllw $8, %xmm0
2645; SSE2-NEXT:    paddb %xmm1, %xmm0
2646; SSE2-NEXT:    psrlw $8, %xmm0
2647; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2648; SSE2-NEXT:    retq
2649;
2650; SSE3-LABEL: ugt_5_v8i16:
2651; SSE3:       # %bb.0:
2652; SSE3-NEXT:    movdqa %xmm0, %xmm1
2653; SSE3-NEXT:    psrlw $1, %xmm1
2654; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2655; SSE3-NEXT:    psubb %xmm1, %xmm0
2656; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2657; SSE3-NEXT:    movdqa %xmm0, %xmm2
2658; SSE3-NEXT:    pand %xmm1, %xmm2
2659; SSE3-NEXT:    psrlw $2, %xmm0
2660; SSE3-NEXT:    pand %xmm1, %xmm0
2661; SSE3-NEXT:    paddb %xmm2, %xmm0
2662; SSE3-NEXT:    movdqa %xmm0, %xmm1
2663; SSE3-NEXT:    psrlw $4, %xmm1
2664; SSE3-NEXT:    paddb %xmm0, %xmm1
2665; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2666; SSE3-NEXT:    movdqa %xmm1, %xmm0
2667; SSE3-NEXT:    psllw $8, %xmm0
2668; SSE3-NEXT:    paddb %xmm1, %xmm0
2669; SSE3-NEXT:    psrlw $8, %xmm0
2670; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2671; SSE3-NEXT:    retq
2672;
2673; SSSE3-LABEL: ugt_5_v8i16:
2674; SSSE3:       # %bb.0:
2675; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2676; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2677; SSSE3-NEXT:    pand %xmm1, %xmm2
2678; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2679; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2680; SSSE3-NEXT:    pshufb %xmm2, %xmm4
2681; SSSE3-NEXT:    psrlw $4, %xmm0
2682; SSSE3-NEXT:    pand %xmm1, %xmm0
2683; SSSE3-NEXT:    pshufb %xmm0, %xmm3
2684; SSSE3-NEXT:    paddb %xmm4, %xmm3
2685; SSSE3-NEXT:    movdqa %xmm3, %xmm0
2686; SSSE3-NEXT:    psllw $8, %xmm0
2687; SSSE3-NEXT:    paddb %xmm3, %xmm0
2688; SSSE3-NEXT:    psrlw $8, %xmm0
2689; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2690; SSSE3-NEXT:    retq
2691;
2692; SSE41-LABEL: ugt_5_v8i16:
2693; SSE41:       # %bb.0:
2694; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2695; SSE41-NEXT:    movdqa %xmm0, %xmm2
2696; SSE41-NEXT:    pand %xmm1, %xmm2
2697; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2698; SSE41-NEXT:    movdqa %xmm3, %xmm4
2699; SSE41-NEXT:    pshufb %xmm2, %xmm4
2700; SSE41-NEXT:    psrlw $4, %xmm0
2701; SSE41-NEXT:    pand %xmm1, %xmm0
2702; SSE41-NEXT:    pshufb %xmm0, %xmm3
2703; SSE41-NEXT:    paddb %xmm4, %xmm3
2704; SSE41-NEXT:    movdqa %xmm3, %xmm0
2705; SSE41-NEXT:    psllw $8, %xmm0
2706; SSE41-NEXT:    paddb %xmm3, %xmm0
2707; SSE41-NEXT:    psrlw $8, %xmm0
2708; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2709; SSE41-NEXT:    retq
2710;
2711; AVX1-LABEL: ugt_5_v8i16:
2712; AVX1:       # %bb.0:
2713; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2714; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
2715; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2716; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2717; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
2718; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
2719; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2720; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2721; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
2722; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2723; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
2724; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2725; AVX1-NEXT:    retq
2726;
2727; AVX2-LABEL: ugt_5_v8i16:
2728; AVX2:       # %bb.0:
2729; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2730; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
2731; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2732; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2733; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2734; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2735; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2736; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2737; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
2738; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2739; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
2740; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2741; AVX2-NEXT:    retq
2742;
2743; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i16:
2744; AVX512VPOPCNTDQ:       # %bb.0:
2745; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2746; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
2747; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
2748; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2749; AVX512VPOPCNTDQ-NEXT:    vzeroupper
2750; AVX512VPOPCNTDQ-NEXT:    retq
2751;
2752; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i16:
2753; AVX512VPOPCNTDQVL:       # %bb.0:
2754; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2755; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
2756; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
2757; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2758; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
2759; AVX512VPOPCNTDQVL-NEXT:    retq
2760;
2761; BITALG_NOVLX-LABEL: ugt_5_v8i16:
2762; BITALG_NOVLX:       # %bb.0:
2763; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2764; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
2765; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
2766; BITALG_NOVLX-NEXT:    vzeroupper
2767; BITALG_NOVLX-NEXT:    retq
2768;
2769; BITALG-LABEL: ugt_5_v8i16:
2770; BITALG:       # %bb.0:
2771; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
2772; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
2773; BITALG-NEXT:    vpmovm2w %k0, %xmm0
2774; BITALG-NEXT:    retq
2775  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2776  %3 = icmp ugt <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
2777  %4 = sext <8 x i1> %3 to <8 x i16>
2778  ret <8 x i16> %4
2779}
2780
2781define <8 x i16> @ult_6_v8i16(<8 x i16> %0) {
2782; SSE2-LABEL: ult_6_v8i16:
2783; SSE2:       # %bb.0:
2784; SSE2-NEXT:    movdqa %xmm0, %xmm1
2785; SSE2-NEXT:    psrlw $1, %xmm1
2786; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2787; SSE2-NEXT:    psubb %xmm1, %xmm0
2788; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2789; SSE2-NEXT:    movdqa %xmm0, %xmm2
2790; SSE2-NEXT:    pand %xmm1, %xmm2
2791; SSE2-NEXT:    psrlw $2, %xmm0
2792; SSE2-NEXT:    pand %xmm1, %xmm0
2793; SSE2-NEXT:    paddb %xmm2, %xmm0
2794; SSE2-NEXT:    movdqa %xmm0, %xmm1
2795; SSE2-NEXT:    psrlw $4, %xmm1
2796; SSE2-NEXT:    paddb %xmm0, %xmm1
2797; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2798; SSE2-NEXT:    movdqa %xmm1, %xmm2
2799; SSE2-NEXT:    psllw $8, %xmm2
2800; SSE2-NEXT:    paddb %xmm1, %xmm2
2801; SSE2-NEXT:    psrlw $8, %xmm2
2802; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2803; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
2804; SSE2-NEXT:    retq
2805;
2806; SSE3-LABEL: ult_6_v8i16:
2807; SSE3:       # %bb.0:
2808; SSE3-NEXT:    movdqa %xmm0, %xmm1
2809; SSE3-NEXT:    psrlw $1, %xmm1
2810; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2811; SSE3-NEXT:    psubb %xmm1, %xmm0
2812; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2813; SSE3-NEXT:    movdqa %xmm0, %xmm2
2814; SSE3-NEXT:    pand %xmm1, %xmm2
2815; SSE3-NEXT:    psrlw $2, %xmm0
2816; SSE3-NEXT:    pand %xmm1, %xmm0
2817; SSE3-NEXT:    paddb %xmm2, %xmm0
2818; SSE3-NEXT:    movdqa %xmm0, %xmm1
2819; SSE3-NEXT:    psrlw $4, %xmm1
2820; SSE3-NEXT:    paddb %xmm0, %xmm1
2821; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2822; SSE3-NEXT:    movdqa %xmm1, %xmm2
2823; SSE3-NEXT:    psllw $8, %xmm2
2824; SSE3-NEXT:    paddb %xmm1, %xmm2
2825; SSE3-NEXT:    psrlw $8, %xmm2
2826; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2827; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
2828; SSE3-NEXT:    retq
2829;
2830; SSSE3-LABEL: ult_6_v8i16:
2831; SSSE3:       # %bb.0:
2832; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2833; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2834; SSSE3-NEXT:    pand %xmm1, %xmm2
2835; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2836; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2837; SSSE3-NEXT:    pshufb %xmm2, %xmm4
2838; SSSE3-NEXT:    psrlw $4, %xmm0
2839; SSSE3-NEXT:    pand %xmm1, %xmm0
2840; SSSE3-NEXT:    pshufb %xmm0, %xmm3
2841; SSSE3-NEXT:    paddb %xmm4, %xmm3
2842; SSSE3-NEXT:    movdqa %xmm3, %xmm1
2843; SSSE3-NEXT:    psllw $8, %xmm1
2844; SSSE3-NEXT:    paddb %xmm3, %xmm1
2845; SSSE3-NEXT:    psrlw $8, %xmm1
2846; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2847; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
2848; SSSE3-NEXT:    retq
2849;
2850; SSE41-LABEL: ult_6_v8i16:
2851; SSE41:       # %bb.0:
2852; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2853; SSE41-NEXT:    movdqa %xmm0, %xmm2
2854; SSE41-NEXT:    pand %xmm1, %xmm2
2855; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2856; SSE41-NEXT:    movdqa %xmm3, %xmm4
2857; SSE41-NEXT:    pshufb %xmm2, %xmm4
2858; SSE41-NEXT:    psrlw $4, %xmm0
2859; SSE41-NEXT:    pand %xmm1, %xmm0
2860; SSE41-NEXT:    pshufb %xmm0, %xmm3
2861; SSE41-NEXT:    paddb %xmm4, %xmm3
2862; SSE41-NEXT:    movdqa %xmm3, %xmm1
2863; SSE41-NEXT:    psllw $8, %xmm1
2864; SSE41-NEXT:    paddb %xmm3, %xmm1
2865; SSE41-NEXT:    psrlw $8, %xmm1
2866; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2867; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
2868; SSE41-NEXT:    retq
2869;
2870; AVX1-LABEL: ult_6_v8i16:
2871; AVX1:       # %bb.0:
2872; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2873; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
2874; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2875; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2876; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
2877; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
2878; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2879; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2880; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
2881; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2882; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
2883; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2884; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2885; AVX1-NEXT:    retq
2886;
2887; AVX2-LABEL: ult_6_v8i16:
2888; AVX2:       # %bb.0:
2889; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2890; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
2891; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2892; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
2893; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2894; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2895; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
2896; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2897; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
2898; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
2899; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
2900; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2901; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2902; AVX2-NEXT:    retq
2903;
2904; AVX512VPOPCNTDQ-LABEL: ult_6_v8i16:
2905; AVX512VPOPCNTDQ:       # %bb.0:
2906; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2907; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
2908; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
2909; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2910; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2911; AVX512VPOPCNTDQ-NEXT:    vzeroupper
2912; AVX512VPOPCNTDQ-NEXT:    retq
2913;
2914; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i16:
2915; AVX512VPOPCNTDQVL:       # %bb.0:
2916; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2917; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
2918; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
2919; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2920; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2921; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
2922; AVX512VPOPCNTDQVL-NEXT:    retq
2923;
2924; BITALG_NOVLX-LABEL: ult_6_v8i16:
2925; BITALG_NOVLX:       # %bb.0:
2926; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2927; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
2928; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2929; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
2930; BITALG_NOVLX-NEXT:    vzeroupper
2931; BITALG_NOVLX-NEXT:    retq
2932;
2933; BITALG-LABEL: ult_6_v8i16:
2934; BITALG:       # %bb.0:
2935; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
2936; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
2937; BITALG-NEXT:    vpmovm2w %k0, %xmm0
2938; BITALG-NEXT:    retq
2939  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2940  %3 = icmp ult <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
2941  %4 = sext <8 x i1> %3 to <8 x i16>
2942  ret <8 x i16> %4
2943}
2944
2945define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) {
2946; SSE2-LABEL: ugt_6_v8i16:
2947; SSE2:       # %bb.0:
2948; SSE2-NEXT:    movdqa %xmm0, %xmm1
2949; SSE2-NEXT:    psrlw $1, %xmm1
2950; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2951; SSE2-NEXT:    psubb %xmm1, %xmm0
2952; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2953; SSE2-NEXT:    movdqa %xmm0, %xmm2
2954; SSE2-NEXT:    pand %xmm1, %xmm2
2955; SSE2-NEXT:    psrlw $2, %xmm0
2956; SSE2-NEXT:    pand %xmm1, %xmm0
2957; SSE2-NEXT:    paddb %xmm2, %xmm0
2958; SSE2-NEXT:    movdqa %xmm0, %xmm1
2959; SSE2-NEXT:    psrlw $4, %xmm1
2960; SSE2-NEXT:    paddb %xmm0, %xmm1
2961; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
2962; SSE2-NEXT:    movdqa %xmm1, %xmm0
2963; SSE2-NEXT:    psllw $8, %xmm0
2964; SSE2-NEXT:    paddb %xmm1, %xmm0
2965; SSE2-NEXT:    psrlw $8, %xmm0
2966; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2967; SSE2-NEXT:    retq
2968;
2969; SSE3-LABEL: ugt_6_v8i16:
2970; SSE3:       # %bb.0:
2971; SSE3-NEXT:    movdqa %xmm0, %xmm1
2972; SSE3-NEXT:    psrlw $1, %xmm1
2973; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2974; SSE3-NEXT:    psubb %xmm1, %xmm0
2975; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2976; SSE3-NEXT:    movdqa %xmm0, %xmm2
2977; SSE3-NEXT:    pand %xmm1, %xmm2
2978; SSE3-NEXT:    psrlw $2, %xmm0
2979; SSE3-NEXT:    pand %xmm1, %xmm0
2980; SSE3-NEXT:    paddb %xmm2, %xmm0
2981; SSE3-NEXT:    movdqa %xmm0, %xmm1
2982; SSE3-NEXT:    psrlw $4, %xmm1
2983; SSE3-NEXT:    paddb %xmm0, %xmm1
2984; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
2985; SSE3-NEXT:    movdqa %xmm1, %xmm0
2986; SSE3-NEXT:    psllw $8, %xmm0
2987; SSE3-NEXT:    paddb %xmm1, %xmm0
2988; SSE3-NEXT:    psrlw $8, %xmm0
2989; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
2990; SSE3-NEXT:    retq
2991;
2992; SSSE3-LABEL: ugt_6_v8i16:
2993; SSSE3:       # %bb.0:
2994; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2995; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2996; SSSE3-NEXT:    pand %xmm1, %xmm2
2997; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2998; SSSE3-NEXT:    movdqa %xmm3, %xmm4
2999; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3000; SSSE3-NEXT:    psrlw $4, %xmm0
3001; SSSE3-NEXT:    pand %xmm1, %xmm0
3002; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3003; SSSE3-NEXT:    paddb %xmm4, %xmm3
3004; SSSE3-NEXT:    movdqa %xmm3, %xmm0
3005; SSSE3-NEXT:    psllw $8, %xmm0
3006; SSSE3-NEXT:    paddb %xmm3, %xmm0
3007; SSSE3-NEXT:    psrlw $8, %xmm0
3008; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3009; SSSE3-NEXT:    retq
3010;
3011; SSE41-LABEL: ugt_6_v8i16:
3012; SSE41:       # %bb.0:
3013; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3014; SSE41-NEXT:    movdqa %xmm0, %xmm2
3015; SSE41-NEXT:    pand %xmm1, %xmm2
3016; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3017; SSE41-NEXT:    movdqa %xmm3, %xmm4
3018; SSE41-NEXT:    pshufb %xmm2, %xmm4
3019; SSE41-NEXT:    psrlw $4, %xmm0
3020; SSE41-NEXT:    pand %xmm1, %xmm0
3021; SSE41-NEXT:    pshufb %xmm0, %xmm3
3022; SSE41-NEXT:    paddb %xmm4, %xmm3
3023; SSE41-NEXT:    movdqa %xmm3, %xmm0
3024; SSE41-NEXT:    psllw $8, %xmm0
3025; SSE41-NEXT:    paddb %xmm3, %xmm0
3026; SSE41-NEXT:    psrlw $8, %xmm0
3027; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3028; SSE41-NEXT:    retq
3029;
3030; AVX1-LABEL: ugt_6_v8i16:
3031; AVX1:       # %bb.0:
3032; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3033; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3034; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3035; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3036; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3037; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3038; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3039; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3040; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3041; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3042; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3043; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3044; AVX1-NEXT:    retq
3045;
3046; AVX2-LABEL: ugt_6_v8i16:
3047; AVX2:       # %bb.0:
3048; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3049; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
3050; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3051; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3052; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
3053; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3054; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3055; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3056; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
3057; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3058; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
3059; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3060; AVX2-NEXT:    retq
3061;
3062; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i16:
3063; AVX512VPOPCNTDQ:       # %bb.0:
3064; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3065; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
3066; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
3067; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3068; AVX512VPOPCNTDQ-NEXT:    vzeroupper
3069; AVX512VPOPCNTDQ-NEXT:    retq
3070;
3071; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i16:
3072; AVX512VPOPCNTDQVL:       # %bb.0:
3073; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3074; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
3075; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
3076; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3077; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
3078; AVX512VPOPCNTDQVL-NEXT:    retq
3079;
3080; BITALG_NOVLX-LABEL: ugt_6_v8i16:
3081; BITALG_NOVLX:       # %bb.0:
3082; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3083; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
3084; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3085; BITALG_NOVLX-NEXT:    vzeroupper
3086; BITALG_NOVLX-NEXT:    retq
3087;
3088; BITALG-LABEL: ugt_6_v8i16:
3089; BITALG:       # %bb.0:
3090; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
3091; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
3092; BITALG-NEXT:    vpmovm2w %k0, %xmm0
3093; BITALG-NEXT:    retq
3094  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3095  %3 = icmp ugt <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
3096  %4 = sext <8 x i1> %3 to <8 x i16>
3097  ret <8 x i16> %4
3098}
3099
3100define <8 x i16> @ult_7_v8i16(<8 x i16> %0) {
3101; SSE2-LABEL: ult_7_v8i16:
3102; SSE2:       # %bb.0:
3103; SSE2-NEXT:    movdqa %xmm0, %xmm1
3104; SSE2-NEXT:    psrlw $1, %xmm1
3105; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3106; SSE2-NEXT:    psubb %xmm1, %xmm0
3107; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3108; SSE2-NEXT:    movdqa %xmm0, %xmm2
3109; SSE2-NEXT:    pand %xmm1, %xmm2
3110; SSE2-NEXT:    psrlw $2, %xmm0
3111; SSE2-NEXT:    pand %xmm1, %xmm0
3112; SSE2-NEXT:    paddb %xmm2, %xmm0
3113; SSE2-NEXT:    movdqa %xmm0, %xmm1
3114; SSE2-NEXT:    psrlw $4, %xmm1
3115; SSE2-NEXT:    paddb %xmm0, %xmm1
3116; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3117; SSE2-NEXT:    movdqa %xmm1, %xmm2
3118; SSE2-NEXT:    psllw $8, %xmm2
3119; SSE2-NEXT:    paddb %xmm1, %xmm2
3120; SSE2-NEXT:    psrlw $8, %xmm2
3121; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3122; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
3123; SSE2-NEXT:    retq
3124;
3125; SSE3-LABEL: ult_7_v8i16:
3126; SSE3:       # %bb.0:
3127; SSE3-NEXT:    movdqa %xmm0, %xmm1
3128; SSE3-NEXT:    psrlw $1, %xmm1
3129; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3130; SSE3-NEXT:    psubb %xmm1, %xmm0
3131; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3132; SSE3-NEXT:    movdqa %xmm0, %xmm2
3133; SSE3-NEXT:    pand %xmm1, %xmm2
3134; SSE3-NEXT:    psrlw $2, %xmm0
3135; SSE3-NEXT:    pand %xmm1, %xmm0
3136; SSE3-NEXT:    paddb %xmm2, %xmm0
3137; SSE3-NEXT:    movdqa %xmm0, %xmm1
3138; SSE3-NEXT:    psrlw $4, %xmm1
3139; SSE3-NEXT:    paddb %xmm0, %xmm1
3140; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3141; SSE3-NEXT:    movdqa %xmm1, %xmm2
3142; SSE3-NEXT:    psllw $8, %xmm2
3143; SSE3-NEXT:    paddb %xmm1, %xmm2
3144; SSE3-NEXT:    psrlw $8, %xmm2
3145; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3146; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
3147; SSE3-NEXT:    retq
3148;
3149; SSSE3-LABEL: ult_7_v8i16:
3150; SSSE3:       # %bb.0:
3151; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3152; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3153; SSSE3-NEXT:    pand %xmm1, %xmm2
3154; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3155; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3156; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3157; SSSE3-NEXT:    psrlw $4, %xmm0
3158; SSSE3-NEXT:    pand %xmm1, %xmm0
3159; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3160; SSSE3-NEXT:    paddb %xmm4, %xmm3
3161; SSSE3-NEXT:    movdqa %xmm3, %xmm1
3162; SSSE3-NEXT:    psllw $8, %xmm1
3163; SSSE3-NEXT:    paddb %xmm3, %xmm1
3164; SSSE3-NEXT:    psrlw $8, %xmm1
3165; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3166; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
3167; SSSE3-NEXT:    retq
3168;
3169; SSE41-LABEL: ult_7_v8i16:
3170; SSE41:       # %bb.0:
3171; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3172; SSE41-NEXT:    movdqa %xmm0, %xmm2
3173; SSE41-NEXT:    pand %xmm1, %xmm2
3174; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3175; SSE41-NEXT:    movdqa %xmm3, %xmm4
3176; SSE41-NEXT:    pshufb %xmm2, %xmm4
3177; SSE41-NEXT:    psrlw $4, %xmm0
3178; SSE41-NEXT:    pand %xmm1, %xmm0
3179; SSE41-NEXT:    pshufb %xmm0, %xmm3
3180; SSE41-NEXT:    paddb %xmm4, %xmm3
3181; SSE41-NEXT:    movdqa %xmm3, %xmm1
3182; SSE41-NEXT:    psllw $8, %xmm1
3183; SSE41-NEXT:    paddb %xmm3, %xmm1
3184; SSE41-NEXT:    psrlw $8, %xmm1
3185; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3186; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
3187; SSE41-NEXT:    retq
3188;
3189; AVX1-LABEL: ult_7_v8i16:
3190; AVX1:       # %bb.0:
3191; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3192; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3193; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3194; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3195; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3196; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3197; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3198; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3199; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3200; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3201; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3202; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3203; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3204; AVX1-NEXT:    retq
3205;
3206; AVX2-LABEL: ult_7_v8i16:
3207; AVX2:       # %bb.0:
3208; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3209; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
3210; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3211; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3212; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
3213; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3214; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3215; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3216; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
3217; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3218; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
3219; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3220; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3221; AVX2-NEXT:    retq
3222;
3223; AVX512VPOPCNTDQ-LABEL: ult_7_v8i16:
3224; AVX512VPOPCNTDQ:       # %bb.0:
3225; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3226; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
3227; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
3228; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3229; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3230; AVX512VPOPCNTDQ-NEXT:    vzeroupper
3231; AVX512VPOPCNTDQ-NEXT:    retq
3232;
3233; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i16:
3234; AVX512VPOPCNTDQVL:       # %bb.0:
3235; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3236; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
3237; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
3238; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3239; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3240; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
3241; AVX512VPOPCNTDQVL-NEXT:    retq
3242;
3243; BITALG_NOVLX-LABEL: ult_7_v8i16:
3244; BITALG_NOVLX:       # %bb.0:
3245; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3246; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
3247; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3248; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3249; BITALG_NOVLX-NEXT:    vzeroupper
3250; BITALG_NOVLX-NEXT:    retq
3251;
3252; BITALG-LABEL: ult_7_v8i16:
3253; BITALG:       # %bb.0:
3254; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
3255; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
3256; BITALG-NEXT:    vpmovm2w %k0, %xmm0
3257; BITALG-NEXT:    retq
3258  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3259  %3 = icmp ult <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
3260  %4 = sext <8 x i1> %3 to <8 x i16>
3261  ret <8 x i16> %4
3262}
3263
3264define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) {
3265; SSE2-LABEL: ugt_7_v8i16:
3266; SSE2:       # %bb.0:
3267; SSE2-NEXT:    movdqa %xmm0, %xmm1
3268; SSE2-NEXT:    psrlw $1, %xmm1
3269; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3270; SSE2-NEXT:    psubb %xmm1, %xmm0
3271; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3272; SSE2-NEXT:    movdqa %xmm0, %xmm2
3273; SSE2-NEXT:    pand %xmm1, %xmm2
3274; SSE2-NEXT:    psrlw $2, %xmm0
3275; SSE2-NEXT:    pand %xmm1, %xmm0
3276; SSE2-NEXT:    paddb %xmm2, %xmm0
3277; SSE2-NEXT:    movdqa %xmm0, %xmm1
3278; SSE2-NEXT:    psrlw $4, %xmm1
3279; SSE2-NEXT:    paddb %xmm0, %xmm1
3280; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3281; SSE2-NEXT:    movdqa %xmm1, %xmm0
3282; SSE2-NEXT:    psllw $8, %xmm0
3283; SSE2-NEXT:    paddb %xmm1, %xmm0
3284; SSE2-NEXT:    psrlw $8, %xmm0
3285; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3286; SSE2-NEXT:    retq
3287;
3288; SSE3-LABEL: ugt_7_v8i16:
3289; SSE3:       # %bb.0:
3290; SSE3-NEXT:    movdqa %xmm0, %xmm1
3291; SSE3-NEXT:    psrlw $1, %xmm1
3292; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3293; SSE3-NEXT:    psubb %xmm1, %xmm0
3294; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3295; SSE3-NEXT:    movdqa %xmm0, %xmm2
3296; SSE3-NEXT:    pand %xmm1, %xmm2
3297; SSE3-NEXT:    psrlw $2, %xmm0
3298; SSE3-NEXT:    pand %xmm1, %xmm0
3299; SSE3-NEXT:    paddb %xmm2, %xmm0
3300; SSE3-NEXT:    movdqa %xmm0, %xmm1
3301; SSE3-NEXT:    psrlw $4, %xmm1
3302; SSE3-NEXT:    paddb %xmm0, %xmm1
3303; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3304; SSE3-NEXT:    movdqa %xmm1, %xmm0
3305; SSE3-NEXT:    psllw $8, %xmm0
3306; SSE3-NEXT:    paddb %xmm1, %xmm0
3307; SSE3-NEXT:    psrlw $8, %xmm0
3308; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3309; SSE3-NEXT:    retq
3310;
3311; SSSE3-LABEL: ugt_7_v8i16:
3312; SSSE3:       # %bb.0:
3313; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3314; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3315; SSSE3-NEXT:    pand %xmm1, %xmm2
3316; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3317; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3318; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3319; SSSE3-NEXT:    psrlw $4, %xmm0
3320; SSSE3-NEXT:    pand %xmm1, %xmm0
3321; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3322; SSSE3-NEXT:    paddb %xmm4, %xmm3
3323; SSSE3-NEXT:    movdqa %xmm3, %xmm0
3324; SSSE3-NEXT:    psllw $8, %xmm0
3325; SSSE3-NEXT:    paddb %xmm3, %xmm0
3326; SSSE3-NEXT:    psrlw $8, %xmm0
3327; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3328; SSSE3-NEXT:    retq
3329;
3330; SSE41-LABEL: ugt_7_v8i16:
3331; SSE41:       # %bb.0:
3332; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3333; SSE41-NEXT:    movdqa %xmm0, %xmm2
3334; SSE41-NEXT:    pand %xmm1, %xmm2
3335; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3336; SSE41-NEXT:    movdqa %xmm3, %xmm4
3337; SSE41-NEXT:    pshufb %xmm2, %xmm4
3338; SSE41-NEXT:    psrlw $4, %xmm0
3339; SSE41-NEXT:    pand %xmm1, %xmm0
3340; SSE41-NEXT:    pshufb %xmm0, %xmm3
3341; SSE41-NEXT:    paddb %xmm4, %xmm3
3342; SSE41-NEXT:    movdqa %xmm3, %xmm0
3343; SSE41-NEXT:    psllw $8, %xmm0
3344; SSE41-NEXT:    paddb %xmm3, %xmm0
3345; SSE41-NEXT:    psrlw $8, %xmm0
3346; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3347; SSE41-NEXT:    retq
3348;
3349; AVX1-LABEL: ugt_7_v8i16:
3350; AVX1:       # %bb.0:
3351; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3352; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3353; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3354; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3355; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3356; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3357; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3358; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3359; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3360; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3361; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3362; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3363; AVX1-NEXT:    retq
3364;
3365; AVX2-LABEL: ugt_7_v8i16:
3366; AVX2:       # %bb.0:
3367; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3368; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
3369; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3370; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3371; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
3372; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3373; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3374; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3375; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
3376; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3377; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
3378; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3379; AVX2-NEXT:    retq
3380;
3381; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i16:
3382; AVX512VPOPCNTDQ:       # %bb.0:
3383; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3384; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
3385; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
3386; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3387; AVX512VPOPCNTDQ-NEXT:    vzeroupper
3388; AVX512VPOPCNTDQ-NEXT:    retq
3389;
3390; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i16:
3391; AVX512VPOPCNTDQVL:       # %bb.0:
3392; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3393; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
3394; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
3395; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3396; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
3397; AVX512VPOPCNTDQVL-NEXT:    retq
3398;
3399; BITALG_NOVLX-LABEL: ugt_7_v8i16:
3400; BITALG_NOVLX:       # %bb.0:
3401; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3402; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
3403; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3404; BITALG_NOVLX-NEXT:    vzeroupper
3405; BITALG_NOVLX-NEXT:    retq
3406;
3407; BITALG-LABEL: ugt_7_v8i16:
3408; BITALG:       # %bb.0:
3409; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
3410; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
3411; BITALG-NEXT:    vpmovm2w %k0, %xmm0
3412; BITALG-NEXT:    retq
3413  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3414  %3 = icmp ugt <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
3415  %4 = sext <8 x i1> %3 to <8 x i16>
3416  ret <8 x i16> %4
3417}
3418
3419define <8 x i16> @ult_8_v8i16(<8 x i16> %0) {
3420; SSE2-LABEL: ult_8_v8i16:
3421; SSE2:       # %bb.0:
3422; SSE2-NEXT:    movdqa %xmm0, %xmm1
3423; SSE2-NEXT:    psrlw $1, %xmm1
3424; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3425; SSE2-NEXT:    psubb %xmm1, %xmm0
3426; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3427; SSE2-NEXT:    movdqa %xmm0, %xmm2
3428; SSE2-NEXT:    pand %xmm1, %xmm2
3429; SSE2-NEXT:    psrlw $2, %xmm0
3430; SSE2-NEXT:    pand %xmm1, %xmm0
3431; SSE2-NEXT:    paddb %xmm2, %xmm0
3432; SSE2-NEXT:    movdqa %xmm0, %xmm1
3433; SSE2-NEXT:    psrlw $4, %xmm1
3434; SSE2-NEXT:    paddb %xmm0, %xmm1
3435; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3436; SSE2-NEXT:    movdqa %xmm1, %xmm2
3437; SSE2-NEXT:    psllw $8, %xmm2
3438; SSE2-NEXT:    paddb %xmm1, %xmm2
3439; SSE2-NEXT:    psrlw $8, %xmm2
3440; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3441; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
3442; SSE2-NEXT:    retq
3443;
3444; SSE3-LABEL: ult_8_v8i16:
3445; SSE3:       # %bb.0:
3446; SSE3-NEXT:    movdqa %xmm0, %xmm1
3447; SSE3-NEXT:    psrlw $1, %xmm1
3448; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3449; SSE3-NEXT:    psubb %xmm1, %xmm0
3450; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3451; SSE3-NEXT:    movdqa %xmm0, %xmm2
3452; SSE3-NEXT:    pand %xmm1, %xmm2
3453; SSE3-NEXT:    psrlw $2, %xmm0
3454; SSE3-NEXT:    pand %xmm1, %xmm0
3455; SSE3-NEXT:    paddb %xmm2, %xmm0
3456; SSE3-NEXT:    movdqa %xmm0, %xmm1
3457; SSE3-NEXT:    psrlw $4, %xmm1
3458; SSE3-NEXT:    paddb %xmm0, %xmm1
3459; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3460; SSE3-NEXT:    movdqa %xmm1, %xmm2
3461; SSE3-NEXT:    psllw $8, %xmm2
3462; SSE3-NEXT:    paddb %xmm1, %xmm2
3463; SSE3-NEXT:    psrlw $8, %xmm2
3464; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3465; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
3466; SSE3-NEXT:    retq
3467;
3468; SSSE3-LABEL: ult_8_v8i16:
3469; SSSE3:       # %bb.0:
3470; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3471; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3472; SSSE3-NEXT:    pand %xmm1, %xmm2
3473; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3474; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3475; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3476; SSSE3-NEXT:    psrlw $4, %xmm0
3477; SSSE3-NEXT:    pand %xmm1, %xmm0
3478; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3479; SSSE3-NEXT:    paddb %xmm4, %xmm3
3480; SSSE3-NEXT:    movdqa %xmm3, %xmm1
3481; SSSE3-NEXT:    psllw $8, %xmm1
3482; SSSE3-NEXT:    paddb %xmm3, %xmm1
3483; SSSE3-NEXT:    psrlw $8, %xmm1
3484; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3485; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
3486; SSSE3-NEXT:    retq
3487;
3488; SSE41-LABEL: ult_8_v8i16:
3489; SSE41:       # %bb.0:
3490; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3491; SSE41-NEXT:    movdqa %xmm0, %xmm2
3492; SSE41-NEXT:    pand %xmm1, %xmm2
3493; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3494; SSE41-NEXT:    movdqa %xmm3, %xmm4
3495; SSE41-NEXT:    pshufb %xmm2, %xmm4
3496; SSE41-NEXT:    psrlw $4, %xmm0
3497; SSE41-NEXT:    pand %xmm1, %xmm0
3498; SSE41-NEXT:    pshufb %xmm0, %xmm3
3499; SSE41-NEXT:    paddb %xmm4, %xmm3
3500; SSE41-NEXT:    movdqa %xmm3, %xmm1
3501; SSE41-NEXT:    psllw $8, %xmm1
3502; SSE41-NEXT:    paddb %xmm3, %xmm1
3503; SSE41-NEXT:    psrlw $8, %xmm1
3504; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3505; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
3506; SSE41-NEXT:    retq
3507;
3508; AVX1-LABEL: ult_8_v8i16:
3509; AVX1:       # %bb.0:
3510; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3511; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3512; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3513; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3514; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3515; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3516; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3517; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3518; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3519; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3520; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3521; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3522; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3523; AVX1-NEXT:    retq
3524;
3525; AVX2-LABEL: ult_8_v8i16:
3526; AVX2:       # %bb.0:
3527; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3528; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
3529; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3530; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3531; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
3532; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3533; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3534; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3535; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
3536; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3537; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
3538; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3539; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3540; AVX2-NEXT:    retq
3541;
3542; AVX512VPOPCNTDQ-LABEL: ult_8_v8i16:
3543; AVX512VPOPCNTDQ:       # %bb.0:
3544; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3545; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
3546; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
3547; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3548; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3549; AVX512VPOPCNTDQ-NEXT:    vzeroupper
3550; AVX512VPOPCNTDQ-NEXT:    retq
3551;
3552; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i16:
3553; AVX512VPOPCNTDQVL:       # %bb.0:
3554; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3555; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
3556; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
3557; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3558; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3559; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
3560; AVX512VPOPCNTDQVL-NEXT:    retq
3561;
3562; BITALG_NOVLX-LABEL: ult_8_v8i16:
3563; BITALG_NOVLX:       # %bb.0:
3564; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3565; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
3566; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3567; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3568; BITALG_NOVLX-NEXT:    vzeroupper
3569; BITALG_NOVLX-NEXT:    retq
3570;
3571; BITALG-LABEL: ult_8_v8i16:
3572; BITALG:       # %bb.0:
3573; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
3574; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
3575; BITALG-NEXT:    vpmovm2w %k0, %xmm0
3576; BITALG-NEXT:    retq
3577  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3578  %3 = icmp ult <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3579  %4 = sext <8 x i1> %3 to <8 x i16>
3580  ret <8 x i16> %4
3581}
3582
3583define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) {
3584; SSE2-LABEL: ugt_8_v8i16:
3585; SSE2:       # %bb.0:
3586; SSE2-NEXT:    movdqa %xmm0, %xmm1
3587; SSE2-NEXT:    psrlw $1, %xmm1
3588; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3589; SSE2-NEXT:    psubb %xmm1, %xmm0
3590; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3591; SSE2-NEXT:    movdqa %xmm0, %xmm2
3592; SSE2-NEXT:    pand %xmm1, %xmm2
3593; SSE2-NEXT:    psrlw $2, %xmm0
3594; SSE2-NEXT:    pand %xmm1, %xmm0
3595; SSE2-NEXT:    paddb %xmm2, %xmm0
3596; SSE2-NEXT:    movdqa %xmm0, %xmm1
3597; SSE2-NEXT:    psrlw $4, %xmm1
3598; SSE2-NEXT:    paddb %xmm0, %xmm1
3599; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3600; SSE2-NEXT:    movdqa %xmm1, %xmm0
3601; SSE2-NEXT:    psllw $8, %xmm0
3602; SSE2-NEXT:    paddb %xmm1, %xmm0
3603; SSE2-NEXT:    psrlw $8, %xmm0
3604; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3605; SSE2-NEXT:    retq
3606;
3607; SSE3-LABEL: ugt_8_v8i16:
3608; SSE3:       # %bb.0:
3609; SSE3-NEXT:    movdqa %xmm0, %xmm1
3610; SSE3-NEXT:    psrlw $1, %xmm1
3611; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3612; SSE3-NEXT:    psubb %xmm1, %xmm0
3613; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3614; SSE3-NEXT:    movdqa %xmm0, %xmm2
3615; SSE3-NEXT:    pand %xmm1, %xmm2
3616; SSE3-NEXT:    psrlw $2, %xmm0
3617; SSE3-NEXT:    pand %xmm1, %xmm0
3618; SSE3-NEXT:    paddb %xmm2, %xmm0
3619; SSE3-NEXT:    movdqa %xmm0, %xmm1
3620; SSE3-NEXT:    psrlw $4, %xmm1
3621; SSE3-NEXT:    paddb %xmm0, %xmm1
3622; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3623; SSE3-NEXT:    movdqa %xmm1, %xmm0
3624; SSE3-NEXT:    psllw $8, %xmm0
3625; SSE3-NEXT:    paddb %xmm1, %xmm0
3626; SSE3-NEXT:    psrlw $8, %xmm0
3627; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3628; SSE3-NEXT:    retq
3629;
3630; SSSE3-LABEL: ugt_8_v8i16:
3631; SSSE3:       # %bb.0:
3632; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3633; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3634; SSSE3-NEXT:    pand %xmm1, %xmm2
3635; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3636; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3637; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3638; SSSE3-NEXT:    psrlw $4, %xmm0
3639; SSSE3-NEXT:    pand %xmm1, %xmm0
3640; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3641; SSSE3-NEXT:    paddb %xmm4, %xmm3
3642; SSSE3-NEXT:    movdqa %xmm3, %xmm0
3643; SSSE3-NEXT:    psllw $8, %xmm0
3644; SSSE3-NEXT:    paddb %xmm3, %xmm0
3645; SSSE3-NEXT:    psrlw $8, %xmm0
3646; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3647; SSSE3-NEXT:    retq
3648;
3649; SSE41-LABEL: ugt_8_v8i16:
3650; SSE41:       # %bb.0:
3651; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3652; SSE41-NEXT:    movdqa %xmm0, %xmm2
3653; SSE41-NEXT:    pand %xmm1, %xmm2
3654; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3655; SSE41-NEXT:    movdqa %xmm3, %xmm4
3656; SSE41-NEXT:    pshufb %xmm2, %xmm4
3657; SSE41-NEXT:    psrlw $4, %xmm0
3658; SSE41-NEXT:    pand %xmm1, %xmm0
3659; SSE41-NEXT:    pshufb %xmm0, %xmm3
3660; SSE41-NEXT:    paddb %xmm4, %xmm3
3661; SSE41-NEXT:    movdqa %xmm3, %xmm0
3662; SSE41-NEXT:    psllw $8, %xmm0
3663; SSE41-NEXT:    paddb %xmm3, %xmm0
3664; SSE41-NEXT:    psrlw $8, %xmm0
3665; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3666; SSE41-NEXT:    retq
3667;
3668; AVX1-LABEL: ugt_8_v8i16:
3669; AVX1:       # %bb.0:
3670; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3671; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3672; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3673; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3674; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3675; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3676; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3677; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3678; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3679; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3680; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3681; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3682; AVX1-NEXT:    retq
3683;
3684; AVX2-LABEL: ugt_8_v8i16:
3685; AVX2:       # %bb.0:
3686; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3687; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
3688; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3689; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3690; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
3691; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3692; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3693; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3694; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
3695; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3696; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
3697; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3698; AVX2-NEXT:    retq
3699;
3700; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i16:
3701; AVX512VPOPCNTDQ:       # %bb.0:
3702; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3703; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
3704; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
3705; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3706; AVX512VPOPCNTDQ-NEXT:    vzeroupper
3707; AVX512VPOPCNTDQ-NEXT:    retq
3708;
3709; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i16:
3710; AVX512VPOPCNTDQVL:       # %bb.0:
3711; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3712; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
3713; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
3714; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3715; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
3716; AVX512VPOPCNTDQVL-NEXT:    retq
3717;
3718; BITALG_NOVLX-LABEL: ugt_8_v8i16:
3719; BITALG_NOVLX:       # %bb.0:
3720; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3721; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
3722; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
3723; BITALG_NOVLX-NEXT:    vzeroupper
3724; BITALG_NOVLX-NEXT:    retq
3725;
3726; BITALG-LABEL: ugt_8_v8i16:
3727; BITALG:       # %bb.0:
3728; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
3729; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
3730; BITALG-NEXT:    vpmovm2w %k0, %xmm0
3731; BITALG-NEXT:    retq
3732  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3733  %3 = icmp ugt <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3734  %4 = sext <8 x i1> %3 to <8 x i16>
3735  ret <8 x i16> %4
3736}
3737
3738define <8 x i16> @ult_9_v8i16(<8 x i16> %0) {
3739; SSE2-LABEL: ult_9_v8i16:
3740; SSE2:       # %bb.0:
3741; SSE2-NEXT:    movdqa %xmm0, %xmm1
3742; SSE2-NEXT:    psrlw $1, %xmm1
3743; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3744; SSE2-NEXT:    psubb %xmm1, %xmm0
3745; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3746; SSE2-NEXT:    movdqa %xmm0, %xmm2
3747; SSE2-NEXT:    pand %xmm1, %xmm2
3748; SSE2-NEXT:    psrlw $2, %xmm0
3749; SSE2-NEXT:    pand %xmm1, %xmm0
3750; SSE2-NEXT:    paddb %xmm2, %xmm0
3751; SSE2-NEXT:    movdqa %xmm0, %xmm1
3752; SSE2-NEXT:    psrlw $4, %xmm1
3753; SSE2-NEXT:    paddb %xmm0, %xmm1
3754; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3755; SSE2-NEXT:    movdqa %xmm1, %xmm2
3756; SSE2-NEXT:    psllw $8, %xmm2
3757; SSE2-NEXT:    paddb %xmm1, %xmm2
3758; SSE2-NEXT:    psrlw $8, %xmm2
3759; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3760; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
3761; SSE2-NEXT:    retq
3762;
3763; SSE3-LABEL: ult_9_v8i16:
3764; SSE3:       # %bb.0:
3765; SSE3-NEXT:    movdqa %xmm0, %xmm1
3766; SSE3-NEXT:    psrlw $1, %xmm1
3767; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3768; SSE3-NEXT:    psubb %xmm1, %xmm0
3769; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3770; SSE3-NEXT:    movdqa %xmm0, %xmm2
3771; SSE3-NEXT:    pand %xmm1, %xmm2
3772; SSE3-NEXT:    psrlw $2, %xmm0
3773; SSE3-NEXT:    pand %xmm1, %xmm0
3774; SSE3-NEXT:    paddb %xmm2, %xmm0
3775; SSE3-NEXT:    movdqa %xmm0, %xmm1
3776; SSE3-NEXT:    psrlw $4, %xmm1
3777; SSE3-NEXT:    paddb %xmm0, %xmm1
3778; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3779; SSE3-NEXT:    movdqa %xmm1, %xmm2
3780; SSE3-NEXT:    psllw $8, %xmm2
3781; SSE3-NEXT:    paddb %xmm1, %xmm2
3782; SSE3-NEXT:    psrlw $8, %xmm2
3783; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3784; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
3785; SSE3-NEXT:    retq
3786;
3787; SSSE3-LABEL: ult_9_v8i16:
3788; SSSE3:       # %bb.0:
3789; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3790; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3791; SSSE3-NEXT:    pand %xmm1, %xmm2
3792; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3793; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3794; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3795; SSSE3-NEXT:    psrlw $4, %xmm0
3796; SSSE3-NEXT:    pand %xmm1, %xmm0
3797; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3798; SSSE3-NEXT:    paddb %xmm4, %xmm3
3799; SSSE3-NEXT:    movdqa %xmm3, %xmm1
3800; SSSE3-NEXT:    psllw $8, %xmm1
3801; SSSE3-NEXT:    paddb %xmm3, %xmm1
3802; SSSE3-NEXT:    psrlw $8, %xmm1
3803; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3804; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
3805; SSSE3-NEXT:    retq
3806;
3807; SSE41-LABEL: ult_9_v8i16:
3808; SSE41:       # %bb.0:
3809; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3810; SSE41-NEXT:    movdqa %xmm0, %xmm2
3811; SSE41-NEXT:    pand %xmm1, %xmm2
3812; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3813; SSE41-NEXT:    movdqa %xmm3, %xmm4
3814; SSE41-NEXT:    pshufb %xmm2, %xmm4
3815; SSE41-NEXT:    psrlw $4, %xmm0
3816; SSE41-NEXT:    pand %xmm1, %xmm0
3817; SSE41-NEXT:    pshufb %xmm0, %xmm3
3818; SSE41-NEXT:    paddb %xmm4, %xmm3
3819; SSE41-NEXT:    movdqa %xmm3, %xmm1
3820; SSE41-NEXT:    psllw $8, %xmm1
3821; SSE41-NEXT:    paddb %xmm3, %xmm1
3822; SSE41-NEXT:    psrlw $8, %xmm1
3823; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3824; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
3825; SSE41-NEXT:    retq
3826;
3827; AVX1-LABEL: ult_9_v8i16:
3828; AVX1:       # %bb.0:
3829; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3830; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3831; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3832; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3833; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3834; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3835; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3836; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3837; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3838; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3839; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
3840; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3841; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3842; AVX1-NEXT:    retq
3843;
3844; AVX2-LABEL: ult_9_v8i16:
3845; AVX2:       # %bb.0:
3846; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3847; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
3848; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3849; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3850; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
3851; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3852; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3853; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3854; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
3855; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3856; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
3857; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3858; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3859; AVX2-NEXT:    retq
3860;
3861; AVX512VPOPCNTDQ-LABEL: ult_9_v8i16:
3862; AVX512VPOPCNTDQ:       # %bb.0:
3863; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3864; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
3865; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
3866; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3867; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3868; AVX512VPOPCNTDQ-NEXT:    vzeroupper
3869; AVX512VPOPCNTDQ-NEXT:    retq
3870;
3871; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i16:
3872; AVX512VPOPCNTDQVL:       # %bb.0:
3873; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3874; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
3875; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
3876; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3877; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3878; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
3879; AVX512VPOPCNTDQVL-NEXT:    retq
3880;
3881; BITALG_NOVLX-LABEL: ult_9_v8i16:
3882; BITALG_NOVLX:       # %bb.0:
3883; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3884; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
3885; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3886; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
3887; BITALG_NOVLX-NEXT:    vzeroupper
3888; BITALG_NOVLX-NEXT:    retq
3889;
3890; BITALG-LABEL: ult_9_v8i16:
3891; BITALG:       # %bb.0:
3892; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
3893; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
3894; BITALG-NEXT:    vpmovm2w %k0, %xmm0
3895; BITALG-NEXT:    retq
3896  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3897  %3 = icmp ult <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
3898  %4 = sext <8 x i1> %3 to <8 x i16>
3899  ret <8 x i16> %4
3900}
3901
3902define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) {
3903; SSE2-LABEL: ugt_9_v8i16:
3904; SSE2:       # %bb.0:
3905; SSE2-NEXT:    movdqa %xmm0, %xmm1
3906; SSE2-NEXT:    psrlw $1, %xmm1
3907; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3908; SSE2-NEXT:    psubb %xmm1, %xmm0
3909; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3910; SSE2-NEXT:    movdqa %xmm0, %xmm2
3911; SSE2-NEXT:    pand %xmm1, %xmm2
3912; SSE2-NEXT:    psrlw $2, %xmm0
3913; SSE2-NEXT:    pand %xmm1, %xmm0
3914; SSE2-NEXT:    paddb %xmm2, %xmm0
3915; SSE2-NEXT:    movdqa %xmm0, %xmm1
3916; SSE2-NEXT:    psrlw $4, %xmm1
3917; SSE2-NEXT:    paddb %xmm0, %xmm1
3918; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
3919; SSE2-NEXT:    movdqa %xmm1, %xmm0
3920; SSE2-NEXT:    psllw $8, %xmm0
3921; SSE2-NEXT:    paddb %xmm1, %xmm0
3922; SSE2-NEXT:    psrlw $8, %xmm0
3923; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3924; SSE2-NEXT:    retq
3925;
3926; SSE3-LABEL: ugt_9_v8i16:
3927; SSE3:       # %bb.0:
3928; SSE3-NEXT:    movdqa %xmm0, %xmm1
3929; SSE3-NEXT:    psrlw $1, %xmm1
3930; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3931; SSE3-NEXT:    psubb %xmm1, %xmm0
3932; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3933; SSE3-NEXT:    movdqa %xmm0, %xmm2
3934; SSE3-NEXT:    pand %xmm1, %xmm2
3935; SSE3-NEXT:    psrlw $2, %xmm0
3936; SSE3-NEXT:    pand %xmm1, %xmm0
3937; SSE3-NEXT:    paddb %xmm2, %xmm0
3938; SSE3-NEXT:    movdqa %xmm0, %xmm1
3939; SSE3-NEXT:    psrlw $4, %xmm1
3940; SSE3-NEXT:    paddb %xmm0, %xmm1
3941; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
3942; SSE3-NEXT:    movdqa %xmm1, %xmm0
3943; SSE3-NEXT:    psllw $8, %xmm0
3944; SSE3-NEXT:    paddb %xmm1, %xmm0
3945; SSE3-NEXT:    psrlw $8, %xmm0
3946; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3947; SSE3-NEXT:    retq
3948;
3949; SSSE3-LABEL: ugt_9_v8i16:
3950; SSSE3:       # %bb.0:
3951; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3952; SSSE3-NEXT:    movdqa %xmm0, %xmm2
3953; SSSE3-NEXT:    pand %xmm1, %xmm2
3954; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3955; SSSE3-NEXT:    movdqa %xmm3, %xmm4
3956; SSSE3-NEXT:    pshufb %xmm2, %xmm4
3957; SSSE3-NEXT:    psrlw $4, %xmm0
3958; SSSE3-NEXT:    pand %xmm1, %xmm0
3959; SSSE3-NEXT:    pshufb %xmm0, %xmm3
3960; SSSE3-NEXT:    paddb %xmm4, %xmm3
3961; SSSE3-NEXT:    movdqa %xmm3, %xmm0
3962; SSSE3-NEXT:    psllw $8, %xmm0
3963; SSSE3-NEXT:    paddb %xmm3, %xmm0
3964; SSSE3-NEXT:    psrlw $8, %xmm0
3965; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3966; SSSE3-NEXT:    retq
3967;
3968; SSE41-LABEL: ugt_9_v8i16:
3969; SSE41:       # %bb.0:
3970; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3971; SSE41-NEXT:    movdqa %xmm0, %xmm2
3972; SSE41-NEXT:    pand %xmm1, %xmm2
3973; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3974; SSE41-NEXT:    movdqa %xmm3, %xmm4
3975; SSE41-NEXT:    pshufb %xmm2, %xmm4
3976; SSE41-NEXT:    psrlw $4, %xmm0
3977; SSE41-NEXT:    pand %xmm1, %xmm0
3978; SSE41-NEXT:    pshufb %xmm0, %xmm3
3979; SSE41-NEXT:    paddb %xmm4, %xmm3
3980; SSE41-NEXT:    movdqa %xmm3, %xmm0
3981; SSE41-NEXT:    psllw $8, %xmm0
3982; SSE41-NEXT:    paddb %xmm3, %xmm0
3983; SSE41-NEXT:    psrlw $8, %xmm0
3984; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
3985; SSE41-NEXT:    retq
3986;
3987; AVX1-LABEL: ugt_9_v8i16:
3988; AVX1:       # %bb.0:
3989; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3990; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
3991; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3992; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
3993; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
3994; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
3995; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
3996; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
3997; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
3998; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
3999; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4000; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4001; AVX1-NEXT:    retq
4002;
4003; AVX2-LABEL: ugt_9_v8i16:
4004; AVX2:       # %bb.0:
4005; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4006; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4007; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4008; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4009; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4010; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4011; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4012; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4013; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4014; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4015; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4016; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4017; AVX2-NEXT:    retq
4018;
4019; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i16:
4020; AVX512VPOPCNTDQ:       # %bb.0:
4021; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4022; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4023; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4024; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4025; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4026; AVX512VPOPCNTDQ-NEXT:    retq
4027;
4028; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i16:
4029; AVX512VPOPCNTDQVL:       # %bb.0:
4030; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4031; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4032; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4033; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4034; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4035; AVX512VPOPCNTDQVL-NEXT:    retq
4036;
4037; BITALG_NOVLX-LABEL: ugt_9_v8i16:
4038; BITALG_NOVLX:       # %bb.0:
4039; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4040; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4041; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4042; BITALG_NOVLX-NEXT:    vzeroupper
4043; BITALG_NOVLX-NEXT:    retq
4044;
4045; BITALG-LABEL: ugt_9_v8i16:
4046; BITALG:       # %bb.0:
4047; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
4048; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
4049; BITALG-NEXT:    vpmovm2w %k0, %xmm0
4050; BITALG-NEXT:    retq
4051  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4052  %3 = icmp ugt <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
4053  %4 = sext <8 x i1> %3 to <8 x i16>
4054  ret <8 x i16> %4
4055}
4056
4057define <8 x i16> @ult_10_v8i16(<8 x i16> %0) {
4058; SSE2-LABEL: ult_10_v8i16:
4059; SSE2:       # %bb.0:
4060; SSE2-NEXT:    movdqa %xmm0, %xmm1
4061; SSE2-NEXT:    psrlw $1, %xmm1
4062; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4063; SSE2-NEXT:    psubb %xmm1, %xmm0
4064; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4065; SSE2-NEXT:    movdqa %xmm0, %xmm2
4066; SSE2-NEXT:    pand %xmm1, %xmm2
4067; SSE2-NEXT:    psrlw $2, %xmm0
4068; SSE2-NEXT:    pand %xmm1, %xmm0
4069; SSE2-NEXT:    paddb %xmm2, %xmm0
4070; SSE2-NEXT:    movdqa %xmm0, %xmm1
4071; SSE2-NEXT:    psrlw $4, %xmm1
4072; SSE2-NEXT:    paddb %xmm0, %xmm1
4073; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4074; SSE2-NEXT:    movdqa %xmm1, %xmm2
4075; SSE2-NEXT:    psllw $8, %xmm2
4076; SSE2-NEXT:    paddb %xmm1, %xmm2
4077; SSE2-NEXT:    psrlw $8, %xmm2
4078; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4079; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
4080; SSE2-NEXT:    retq
4081;
4082; SSE3-LABEL: ult_10_v8i16:
4083; SSE3:       # %bb.0:
4084; SSE3-NEXT:    movdqa %xmm0, %xmm1
4085; SSE3-NEXT:    psrlw $1, %xmm1
4086; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4087; SSE3-NEXT:    psubb %xmm1, %xmm0
4088; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4089; SSE3-NEXT:    movdqa %xmm0, %xmm2
4090; SSE3-NEXT:    pand %xmm1, %xmm2
4091; SSE3-NEXT:    psrlw $2, %xmm0
4092; SSE3-NEXT:    pand %xmm1, %xmm0
4093; SSE3-NEXT:    paddb %xmm2, %xmm0
4094; SSE3-NEXT:    movdqa %xmm0, %xmm1
4095; SSE3-NEXT:    psrlw $4, %xmm1
4096; SSE3-NEXT:    paddb %xmm0, %xmm1
4097; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4098; SSE3-NEXT:    movdqa %xmm1, %xmm2
4099; SSE3-NEXT:    psllw $8, %xmm2
4100; SSE3-NEXT:    paddb %xmm1, %xmm2
4101; SSE3-NEXT:    psrlw $8, %xmm2
4102; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4103; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
4104; SSE3-NEXT:    retq
4105;
4106; SSSE3-LABEL: ult_10_v8i16:
4107; SSSE3:       # %bb.0:
4108; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4109; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4110; SSSE3-NEXT:    pand %xmm1, %xmm2
4111; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4112; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4113; SSSE3-NEXT:    pshufb %xmm2, %xmm4
4114; SSSE3-NEXT:    psrlw $4, %xmm0
4115; SSSE3-NEXT:    pand %xmm1, %xmm0
4116; SSSE3-NEXT:    pshufb %xmm0, %xmm3
4117; SSSE3-NEXT:    paddb %xmm4, %xmm3
4118; SSSE3-NEXT:    movdqa %xmm3, %xmm1
4119; SSSE3-NEXT:    psllw $8, %xmm1
4120; SSSE3-NEXT:    paddb %xmm3, %xmm1
4121; SSSE3-NEXT:    psrlw $8, %xmm1
4122; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4123; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
4124; SSSE3-NEXT:    retq
4125;
4126; SSE41-LABEL: ult_10_v8i16:
4127; SSE41:       # %bb.0:
4128; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4129; SSE41-NEXT:    movdqa %xmm0, %xmm2
4130; SSE41-NEXT:    pand %xmm1, %xmm2
4131; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4132; SSE41-NEXT:    movdqa %xmm3, %xmm4
4133; SSE41-NEXT:    pshufb %xmm2, %xmm4
4134; SSE41-NEXT:    psrlw $4, %xmm0
4135; SSE41-NEXT:    pand %xmm1, %xmm0
4136; SSE41-NEXT:    pshufb %xmm0, %xmm3
4137; SSE41-NEXT:    paddb %xmm4, %xmm3
4138; SSE41-NEXT:    movdqa %xmm3, %xmm1
4139; SSE41-NEXT:    psllw $8, %xmm1
4140; SSE41-NEXT:    paddb %xmm3, %xmm1
4141; SSE41-NEXT:    psrlw $8, %xmm1
4142; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4143; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
4144; SSE41-NEXT:    retq
4145;
4146; AVX1-LABEL: ult_10_v8i16:
4147; AVX1:       # %bb.0:
4148; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4149; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
4150; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4151; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4152; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
4153; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
4154; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4155; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4156; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
4157; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4158; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4159; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4160; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4161; AVX1-NEXT:    retq
4162;
4163; AVX2-LABEL: ult_10_v8i16:
4164; AVX2:       # %bb.0:
4165; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4166; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4167; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4168; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4169; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4170; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4171; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4172; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4173; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4174; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4175; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4176; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4177; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4178; AVX2-NEXT:    retq
4179;
4180; AVX512VPOPCNTDQ-LABEL: ult_10_v8i16:
4181; AVX512VPOPCNTDQ:       # %bb.0:
4182; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4183; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4184; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4185; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4186; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4187; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4188; AVX512VPOPCNTDQ-NEXT:    retq
4189;
4190; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i16:
4191; AVX512VPOPCNTDQVL:       # %bb.0:
4192; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4193; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4194; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4195; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4196; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4197; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4198; AVX512VPOPCNTDQVL-NEXT:    retq
4199;
4200; BITALG_NOVLX-LABEL: ult_10_v8i16:
4201; BITALG_NOVLX:       # %bb.0:
4202; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4203; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4204; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4205; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4206; BITALG_NOVLX-NEXT:    vzeroupper
4207; BITALG_NOVLX-NEXT:    retq
4208;
4209; BITALG-LABEL: ult_10_v8i16:
4210; BITALG:       # %bb.0:
4211; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
4212; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
4213; BITALG-NEXT:    vpmovm2w %k0, %xmm0
4214; BITALG-NEXT:    retq
4215  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4216  %3 = icmp ult <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
4217  %4 = sext <8 x i1> %3 to <8 x i16>
4218  ret <8 x i16> %4
4219}
4220
4221define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) {
4222; SSE2-LABEL: ugt_10_v8i16:
4223; SSE2:       # %bb.0:
4224; SSE2-NEXT:    movdqa %xmm0, %xmm1
4225; SSE2-NEXT:    psrlw $1, %xmm1
4226; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4227; SSE2-NEXT:    psubb %xmm1, %xmm0
4228; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4229; SSE2-NEXT:    movdqa %xmm0, %xmm2
4230; SSE2-NEXT:    pand %xmm1, %xmm2
4231; SSE2-NEXT:    psrlw $2, %xmm0
4232; SSE2-NEXT:    pand %xmm1, %xmm0
4233; SSE2-NEXT:    paddb %xmm2, %xmm0
4234; SSE2-NEXT:    movdqa %xmm0, %xmm1
4235; SSE2-NEXT:    psrlw $4, %xmm1
4236; SSE2-NEXT:    paddb %xmm0, %xmm1
4237; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4238; SSE2-NEXT:    movdqa %xmm1, %xmm0
4239; SSE2-NEXT:    psllw $8, %xmm0
4240; SSE2-NEXT:    paddb %xmm1, %xmm0
4241; SSE2-NEXT:    psrlw $8, %xmm0
4242; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4243; SSE2-NEXT:    retq
4244;
4245; SSE3-LABEL: ugt_10_v8i16:
4246; SSE3:       # %bb.0:
4247; SSE3-NEXT:    movdqa %xmm0, %xmm1
4248; SSE3-NEXT:    psrlw $1, %xmm1
4249; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4250; SSE3-NEXT:    psubb %xmm1, %xmm0
4251; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4252; SSE3-NEXT:    movdqa %xmm0, %xmm2
4253; SSE3-NEXT:    pand %xmm1, %xmm2
4254; SSE3-NEXT:    psrlw $2, %xmm0
4255; SSE3-NEXT:    pand %xmm1, %xmm0
4256; SSE3-NEXT:    paddb %xmm2, %xmm0
4257; SSE3-NEXT:    movdqa %xmm0, %xmm1
4258; SSE3-NEXT:    psrlw $4, %xmm1
4259; SSE3-NEXT:    paddb %xmm0, %xmm1
4260; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4261; SSE3-NEXT:    movdqa %xmm1, %xmm0
4262; SSE3-NEXT:    psllw $8, %xmm0
4263; SSE3-NEXT:    paddb %xmm1, %xmm0
4264; SSE3-NEXT:    psrlw $8, %xmm0
4265; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4266; SSE3-NEXT:    retq
4267;
4268; SSSE3-LABEL: ugt_10_v8i16:
4269; SSSE3:       # %bb.0:
4270; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4271; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4272; SSSE3-NEXT:    pand %xmm1, %xmm2
4273; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4274; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4275; SSSE3-NEXT:    pshufb %xmm2, %xmm4
4276; SSSE3-NEXT:    psrlw $4, %xmm0
4277; SSSE3-NEXT:    pand %xmm1, %xmm0
4278; SSSE3-NEXT:    pshufb %xmm0, %xmm3
4279; SSSE3-NEXT:    paddb %xmm4, %xmm3
4280; SSSE3-NEXT:    movdqa %xmm3, %xmm0
4281; SSSE3-NEXT:    psllw $8, %xmm0
4282; SSSE3-NEXT:    paddb %xmm3, %xmm0
4283; SSSE3-NEXT:    psrlw $8, %xmm0
4284; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4285; SSSE3-NEXT:    retq
4286;
4287; SSE41-LABEL: ugt_10_v8i16:
4288; SSE41:       # %bb.0:
4289; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4290; SSE41-NEXT:    movdqa %xmm0, %xmm2
4291; SSE41-NEXT:    pand %xmm1, %xmm2
4292; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4293; SSE41-NEXT:    movdqa %xmm3, %xmm4
4294; SSE41-NEXT:    pshufb %xmm2, %xmm4
4295; SSE41-NEXT:    psrlw $4, %xmm0
4296; SSE41-NEXT:    pand %xmm1, %xmm0
4297; SSE41-NEXT:    pshufb %xmm0, %xmm3
4298; SSE41-NEXT:    paddb %xmm4, %xmm3
4299; SSE41-NEXT:    movdqa %xmm3, %xmm0
4300; SSE41-NEXT:    psllw $8, %xmm0
4301; SSE41-NEXT:    paddb %xmm3, %xmm0
4302; SSE41-NEXT:    psrlw $8, %xmm0
4303; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4304; SSE41-NEXT:    retq
4305;
4306; AVX1-LABEL: ugt_10_v8i16:
4307; AVX1:       # %bb.0:
4308; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4309; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
4310; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4311; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4312; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
4313; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
4314; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4315; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4316; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
4317; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4318; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4319; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4320; AVX1-NEXT:    retq
4321;
4322; AVX2-LABEL: ugt_10_v8i16:
4323; AVX2:       # %bb.0:
4324; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4325; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4326; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4327; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4328; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4329; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4330; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4331; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4332; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4333; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4334; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4335; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4336; AVX2-NEXT:    retq
4337;
4338; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i16:
4339; AVX512VPOPCNTDQ:       # %bb.0:
4340; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4341; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4342; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4343; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4344; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4345; AVX512VPOPCNTDQ-NEXT:    retq
4346;
4347; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i16:
4348; AVX512VPOPCNTDQVL:       # %bb.0:
4349; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4350; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4351; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4352; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4353; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4354; AVX512VPOPCNTDQVL-NEXT:    retq
4355;
4356; BITALG_NOVLX-LABEL: ugt_10_v8i16:
4357; BITALG_NOVLX:       # %bb.0:
4358; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4359; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4360; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4361; BITALG_NOVLX-NEXT:    vzeroupper
4362; BITALG_NOVLX-NEXT:    retq
4363;
4364; BITALG-LABEL: ugt_10_v8i16:
4365; BITALG:       # %bb.0:
4366; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
4367; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
4368; BITALG-NEXT:    vpmovm2w %k0, %xmm0
4369; BITALG-NEXT:    retq
4370  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4371  %3 = icmp ugt <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
4372  %4 = sext <8 x i1> %3 to <8 x i16>
4373  ret <8 x i16> %4
4374}
4375
4376define <8 x i16> @ult_11_v8i16(<8 x i16> %0) {
4377; SSE2-LABEL: ult_11_v8i16:
4378; SSE2:       # %bb.0:
4379; SSE2-NEXT:    movdqa %xmm0, %xmm1
4380; SSE2-NEXT:    psrlw $1, %xmm1
4381; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4382; SSE2-NEXT:    psubb %xmm1, %xmm0
4383; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4384; SSE2-NEXT:    movdqa %xmm0, %xmm2
4385; SSE2-NEXT:    pand %xmm1, %xmm2
4386; SSE2-NEXT:    psrlw $2, %xmm0
4387; SSE2-NEXT:    pand %xmm1, %xmm0
4388; SSE2-NEXT:    paddb %xmm2, %xmm0
4389; SSE2-NEXT:    movdqa %xmm0, %xmm1
4390; SSE2-NEXT:    psrlw $4, %xmm1
4391; SSE2-NEXT:    paddb %xmm0, %xmm1
4392; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4393; SSE2-NEXT:    movdqa %xmm1, %xmm2
4394; SSE2-NEXT:    psllw $8, %xmm2
4395; SSE2-NEXT:    paddb %xmm1, %xmm2
4396; SSE2-NEXT:    psrlw $8, %xmm2
4397; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4398; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
4399; SSE2-NEXT:    retq
4400;
4401; SSE3-LABEL: ult_11_v8i16:
4402; SSE3:       # %bb.0:
4403; SSE3-NEXT:    movdqa %xmm0, %xmm1
4404; SSE3-NEXT:    psrlw $1, %xmm1
4405; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4406; SSE3-NEXT:    psubb %xmm1, %xmm0
4407; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4408; SSE3-NEXT:    movdqa %xmm0, %xmm2
4409; SSE3-NEXT:    pand %xmm1, %xmm2
4410; SSE3-NEXT:    psrlw $2, %xmm0
4411; SSE3-NEXT:    pand %xmm1, %xmm0
4412; SSE3-NEXT:    paddb %xmm2, %xmm0
4413; SSE3-NEXT:    movdqa %xmm0, %xmm1
4414; SSE3-NEXT:    psrlw $4, %xmm1
4415; SSE3-NEXT:    paddb %xmm0, %xmm1
4416; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4417; SSE3-NEXT:    movdqa %xmm1, %xmm2
4418; SSE3-NEXT:    psllw $8, %xmm2
4419; SSE3-NEXT:    paddb %xmm1, %xmm2
4420; SSE3-NEXT:    psrlw $8, %xmm2
4421; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4422; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
4423; SSE3-NEXT:    retq
4424;
4425; SSSE3-LABEL: ult_11_v8i16:
4426; SSSE3:       # %bb.0:
4427; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4428; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4429; SSSE3-NEXT:    pand %xmm1, %xmm2
4430; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4431; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4432; SSSE3-NEXT:    pshufb %xmm2, %xmm4
4433; SSSE3-NEXT:    psrlw $4, %xmm0
4434; SSSE3-NEXT:    pand %xmm1, %xmm0
4435; SSSE3-NEXT:    pshufb %xmm0, %xmm3
4436; SSSE3-NEXT:    paddb %xmm4, %xmm3
4437; SSSE3-NEXT:    movdqa %xmm3, %xmm1
4438; SSSE3-NEXT:    psllw $8, %xmm1
4439; SSSE3-NEXT:    paddb %xmm3, %xmm1
4440; SSSE3-NEXT:    psrlw $8, %xmm1
4441; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4442; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
4443; SSSE3-NEXT:    retq
4444;
4445; SSE41-LABEL: ult_11_v8i16:
4446; SSE41:       # %bb.0:
4447; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4448; SSE41-NEXT:    movdqa %xmm0, %xmm2
4449; SSE41-NEXT:    pand %xmm1, %xmm2
4450; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4451; SSE41-NEXT:    movdqa %xmm3, %xmm4
4452; SSE41-NEXT:    pshufb %xmm2, %xmm4
4453; SSE41-NEXT:    psrlw $4, %xmm0
4454; SSE41-NEXT:    pand %xmm1, %xmm0
4455; SSE41-NEXT:    pshufb %xmm0, %xmm3
4456; SSE41-NEXT:    paddb %xmm4, %xmm3
4457; SSE41-NEXT:    movdqa %xmm3, %xmm1
4458; SSE41-NEXT:    psllw $8, %xmm1
4459; SSE41-NEXT:    paddb %xmm3, %xmm1
4460; SSE41-NEXT:    psrlw $8, %xmm1
4461; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4462; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
4463; SSE41-NEXT:    retq
4464;
4465; AVX1-LABEL: ult_11_v8i16:
4466; AVX1:       # %bb.0:
4467; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4468; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
4469; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4470; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4471; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
4472; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
4473; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4474; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4475; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
4476; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4477; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4478; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4479; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4480; AVX1-NEXT:    retq
4481;
4482; AVX2-LABEL: ult_11_v8i16:
4483; AVX2:       # %bb.0:
4484; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4485; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4486; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4487; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4488; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4489; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4490; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4491; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4492; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4493; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4494; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4495; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4496; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4497; AVX2-NEXT:    retq
4498;
4499; AVX512VPOPCNTDQ-LABEL: ult_11_v8i16:
4500; AVX512VPOPCNTDQ:       # %bb.0:
4501; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4502; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4503; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4504; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4505; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4506; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4507; AVX512VPOPCNTDQ-NEXT:    retq
4508;
4509; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i16:
4510; AVX512VPOPCNTDQVL:       # %bb.0:
4511; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4512; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4513; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4514; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4515; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4516; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4517; AVX512VPOPCNTDQVL-NEXT:    retq
4518;
4519; BITALG_NOVLX-LABEL: ult_11_v8i16:
4520; BITALG_NOVLX:       # %bb.0:
4521; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4522; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4523; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4524; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4525; BITALG_NOVLX-NEXT:    vzeroupper
4526; BITALG_NOVLX-NEXT:    retq
4527;
4528; BITALG-LABEL: ult_11_v8i16:
4529; BITALG:       # %bb.0:
4530; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
4531; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
4532; BITALG-NEXT:    vpmovm2w %k0, %xmm0
4533; BITALG-NEXT:    retq
4534  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4535  %3 = icmp ult <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
4536  %4 = sext <8 x i1> %3 to <8 x i16>
4537  ret <8 x i16> %4
4538}
4539
4540define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) {
4541; SSE2-LABEL: ugt_11_v8i16:
4542; SSE2:       # %bb.0:
4543; SSE2-NEXT:    movdqa %xmm0, %xmm1
4544; SSE2-NEXT:    psrlw $1, %xmm1
4545; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4546; SSE2-NEXT:    psubb %xmm1, %xmm0
4547; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4548; SSE2-NEXT:    movdqa %xmm0, %xmm2
4549; SSE2-NEXT:    pand %xmm1, %xmm2
4550; SSE2-NEXT:    psrlw $2, %xmm0
4551; SSE2-NEXT:    pand %xmm1, %xmm0
4552; SSE2-NEXT:    paddb %xmm2, %xmm0
4553; SSE2-NEXT:    movdqa %xmm0, %xmm1
4554; SSE2-NEXT:    psrlw $4, %xmm1
4555; SSE2-NEXT:    paddb %xmm0, %xmm1
4556; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4557; SSE2-NEXT:    movdqa %xmm1, %xmm0
4558; SSE2-NEXT:    psllw $8, %xmm0
4559; SSE2-NEXT:    paddb %xmm1, %xmm0
4560; SSE2-NEXT:    psrlw $8, %xmm0
4561; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4562; SSE2-NEXT:    retq
4563;
4564; SSE3-LABEL: ugt_11_v8i16:
4565; SSE3:       # %bb.0:
4566; SSE3-NEXT:    movdqa %xmm0, %xmm1
4567; SSE3-NEXT:    psrlw $1, %xmm1
4568; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4569; SSE3-NEXT:    psubb %xmm1, %xmm0
4570; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4571; SSE3-NEXT:    movdqa %xmm0, %xmm2
4572; SSE3-NEXT:    pand %xmm1, %xmm2
4573; SSE3-NEXT:    psrlw $2, %xmm0
4574; SSE3-NEXT:    pand %xmm1, %xmm0
4575; SSE3-NEXT:    paddb %xmm2, %xmm0
4576; SSE3-NEXT:    movdqa %xmm0, %xmm1
4577; SSE3-NEXT:    psrlw $4, %xmm1
4578; SSE3-NEXT:    paddb %xmm0, %xmm1
4579; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4580; SSE3-NEXT:    movdqa %xmm1, %xmm0
4581; SSE3-NEXT:    psllw $8, %xmm0
4582; SSE3-NEXT:    paddb %xmm1, %xmm0
4583; SSE3-NEXT:    psrlw $8, %xmm0
4584; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4585; SSE3-NEXT:    retq
4586;
4587; SSSE3-LABEL: ugt_11_v8i16:
4588; SSSE3:       # %bb.0:
4589; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4590; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4591; SSSE3-NEXT:    pand %xmm1, %xmm2
4592; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4593; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4594; SSSE3-NEXT:    pshufb %xmm2, %xmm4
4595; SSSE3-NEXT:    psrlw $4, %xmm0
4596; SSSE3-NEXT:    pand %xmm1, %xmm0
4597; SSSE3-NEXT:    pshufb %xmm0, %xmm3
4598; SSSE3-NEXT:    paddb %xmm4, %xmm3
4599; SSSE3-NEXT:    movdqa %xmm3, %xmm0
4600; SSSE3-NEXT:    psllw $8, %xmm0
4601; SSSE3-NEXT:    paddb %xmm3, %xmm0
4602; SSSE3-NEXT:    psrlw $8, %xmm0
4603; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4604; SSSE3-NEXT:    retq
4605;
4606; SSE41-LABEL: ugt_11_v8i16:
4607; SSE41:       # %bb.0:
4608; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4609; SSE41-NEXT:    movdqa %xmm0, %xmm2
4610; SSE41-NEXT:    pand %xmm1, %xmm2
4611; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4612; SSE41-NEXT:    movdqa %xmm3, %xmm4
4613; SSE41-NEXT:    pshufb %xmm2, %xmm4
4614; SSE41-NEXT:    psrlw $4, %xmm0
4615; SSE41-NEXT:    pand %xmm1, %xmm0
4616; SSE41-NEXT:    pshufb %xmm0, %xmm3
4617; SSE41-NEXT:    paddb %xmm4, %xmm3
4618; SSE41-NEXT:    movdqa %xmm3, %xmm0
4619; SSE41-NEXT:    psllw $8, %xmm0
4620; SSE41-NEXT:    paddb %xmm3, %xmm0
4621; SSE41-NEXT:    psrlw $8, %xmm0
4622; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4623; SSE41-NEXT:    retq
4624;
4625; AVX1-LABEL: ugt_11_v8i16:
4626; AVX1:       # %bb.0:
4627; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4628; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
4629; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4630; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4631; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
4632; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
4633; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4634; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4635; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
4636; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4637; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4638; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4639; AVX1-NEXT:    retq
4640;
4641; AVX2-LABEL: ugt_11_v8i16:
4642; AVX2:       # %bb.0:
4643; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4644; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4645; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4646; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4647; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4648; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4649; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4650; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4651; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4652; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4653; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4654; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4655; AVX2-NEXT:    retq
4656;
4657; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i16:
4658; AVX512VPOPCNTDQ:       # %bb.0:
4659; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4660; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4661; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4662; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4663; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4664; AVX512VPOPCNTDQ-NEXT:    retq
4665;
4666; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i16:
4667; AVX512VPOPCNTDQVL:       # %bb.0:
4668; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4669; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4670; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4671; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4672; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4673; AVX512VPOPCNTDQVL-NEXT:    retq
4674;
4675; BITALG_NOVLX-LABEL: ugt_11_v8i16:
4676; BITALG_NOVLX:       # %bb.0:
4677; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4678; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4679; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4680; BITALG_NOVLX-NEXT:    vzeroupper
4681; BITALG_NOVLX-NEXT:    retq
4682;
4683; BITALG-LABEL: ugt_11_v8i16:
4684; BITALG:       # %bb.0:
4685; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
4686; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
4687; BITALG-NEXT:    vpmovm2w %k0, %xmm0
4688; BITALG-NEXT:    retq
4689  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4690  %3 = icmp ugt <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
4691  %4 = sext <8 x i1> %3 to <8 x i16>
4692  ret <8 x i16> %4
4693}
4694
4695define <8 x i16> @ult_12_v8i16(<8 x i16> %0) {
4696; SSE2-LABEL: ult_12_v8i16:
4697; SSE2:       # %bb.0:
4698; SSE2-NEXT:    movdqa %xmm0, %xmm1
4699; SSE2-NEXT:    psrlw $1, %xmm1
4700; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4701; SSE2-NEXT:    psubb %xmm1, %xmm0
4702; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4703; SSE2-NEXT:    movdqa %xmm0, %xmm2
4704; SSE2-NEXT:    pand %xmm1, %xmm2
4705; SSE2-NEXT:    psrlw $2, %xmm0
4706; SSE2-NEXT:    pand %xmm1, %xmm0
4707; SSE2-NEXT:    paddb %xmm2, %xmm0
4708; SSE2-NEXT:    movdqa %xmm0, %xmm1
4709; SSE2-NEXT:    psrlw $4, %xmm1
4710; SSE2-NEXT:    paddb %xmm0, %xmm1
4711; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4712; SSE2-NEXT:    movdqa %xmm1, %xmm2
4713; SSE2-NEXT:    psllw $8, %xmm2
4714; SSE2-NEXT:    paddb %xmm1, %xmm2
4715; SSE2-NEXT:    psrlw $8, %xmm2
4716; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4717; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
4718; SSE2-NEXT:    retq
4719;
4720; SSE3-LABEL: ult_12_v8i16:
4721; SSE3:       # %bb.0:
4722; SSE3-NEXT:    movdqa %xmm0, %xmm1
4723; SSE3-NEXT:    psrlw $1, %xmm1
4724; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4725; SSE3-NEXT:    psubb %xmm1, %xmm0
4726; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4727; SSE3-NEXT:    movdqa %xmm0, %xmm2
4728; SSE3-NEXT:    pand %xmm1, %xmm2
4729; SSE3-NEXT:    psrlw $2, %xmm0
4730; SSE3-NEXT:    pand %xmm1, %xmm0
4731; SSE3-NEXT:    paddb %xmm2, %xmm0
4732; SSE3-NEXT:    movdqa %xmm0, %xmm1
4733; SSE3-NEXT:    psrlw $4, %xmm1
4734; SSE3-NEXT:    paddb %xmm0, %xmm1
4735; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4736; SSE3-NEXT:    movdqa %xmm1, %xmm2
4737; SSE3-NEXT:    psllw $8, %xmm2
4738; SSE3-NEXT:    paddb %xmm1, %xmm2
4739; SSE3-NEXT:    psrlw $8, %xmm2
4740; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4741; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
4742; SSE3-NEXT:    retq
4743;
4744; SSSE3-LABEL: ult_12_v8i16:
4745; SSSE3:       # %bb.0:
4746; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4747; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4748; SSSE3-NEXT:    pand %xmm1, %xmm2
4749; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4750; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4751; SSSE3-NEXT:    pshufb %xmm2, %xmm4
4752; SSSE3-NEXT:    psrlw $4, %xmm0
4753; SSSE3-NEXT:    pand %xmm1, %xmm0
4754; SSSE3-NEXT:    pshufb %xmm0, %xmm3
4755; SSSE3-NEXT:    paddb %xmm4, %xmm3
4756; SSSE3-NEXT:    movdqa %xmm3, %xmm1
4757; SSSE3-NEXT:    psllw $8, %xmm1
4758; SSSE3-NEXT:    paddb %xmm3, %xmm1
4759; SSSE3-NEXT:    psrlw $8, %xmm1
4760; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4761; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
4762; SSSE3-NEXT:    retq
4763;
4764; SSE41-LABEL: ult_12_v8i16:
4765; SSE41:       # %bb.0:
4766; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4767; SSE41-NEXT:    movdqa %xmm0, %xmm2
4768; SSE41-NEXT:    pand %xmm1, %xmm2
4769; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4770; SSE41-NEXT:    movdqa %xmm3, %xmm4
4771; SSE41-NEXT:    pshufb %xmm2, %xmm4
4772; SSE41-NEXT:    psrlw $4, %xmm0
4773; SSE41-NEXT:    pand %xmm1, %xmm0
4774; SSE41-NEXT:    pshufb %xmm0, %xmm3
4775; SSE41-NEXT:    paddb %xmm4, %xmm3
4776; SSE41-NEXT:    movdqa %xmm3, %xmm1
4777; SSE41-NEXT:    psllw $8, %xmm1
4778; SSE41-NEXT:    paddb %xmm3, %xmm1
4779; SSE41-NEXT:    psrlw $8, %xmm1
4780; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4781; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
4782; SSE41-NEXT:    retq
4783;
4784; AVX1-LABEL: ult_12_v8i16:
4785; AVX1:       # %bb.0:
4786; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4787; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
4788; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4789; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4790; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
4791; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
4792; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4793; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4794; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
4795; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4796; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4797; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4798; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4799; AVX1-NEXT:    retq
4800;
4801; AVX2-LABEL: ult_12_v8i16:
4802; AVX2:       # %bb.0:
4803; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4804; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4805; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4806; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4807; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4808; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4809; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4810; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4811; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4812; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4813; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4814; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4815; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4816; AVX2-NEXT:    retq
4817;
4818; AVX512VPOPCNTDQ-LABEL: ult_12_v8i16:
4819; AVX512VPOPCNTDQ:       # %bb.0:
4820; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4821; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4822; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4823; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4824; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4825; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4826; AVX512VPOPCNTDQ-NEXT:    retq
4827;
4828; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i16:
4829; AVX512VPOPCNTDQVL:       # %bb.0:
4830; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4831; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4832; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4833; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4834; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4835; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4836; AVX512VPOPCNTDQVL-NEXT:    retq
4837;
4838; BITALG_NOVLX-LABEL: ult_12_v8i16:
4839; BITALG_NOVLX:       # %bb.0:
4840; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4841; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4842; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4843; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
4844; BITALG_NOVLX-NEXT:    vzeroupper
4845; BITALG_NOVLX-NEXT:    retq
4846;
4847; BITALG-LABEL: ult_12_v8i16:
4848; BITALG:       # %bb.0:
4849; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
4850; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
4851; BITALG-NEXT:    vpmovm2w %k0, %xmm0
4852; BITALG-NEXT:    retq
4853  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4854  %3 = icmp ult <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
4855  %4 = sext <8 x i1> %3 to <8 x i16>
4856  ret <8 x i16> %4
4857}
4858
4859define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) {
4860; SSE2-LABEL: ugt_12_v8i16:
4861; SSE2:       # %bb.0:
4862; SSE2-NEXT:    movdqa %xmm0, %xmm1
4863; SSE2-NEXT:    psrlw $1, %xmm1
4864; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4865; SSE2-NEXT:    psubb %xmm1, %xmm0
4866; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4867; SSE2-NEXT:    movdqa %xmm0, %xmm2
4868; SSE2-NEXT:    pand %xmm1, %xmm2
4869; SSE2-NEXT:    psrlw $2, %xmm0
4870; SSE2-NEXT:    pand %xmm1, %xmm0
4871; SSE2-NEXT:    paddb %xmm2, %xmm0
4872; SSE2-NEXT:    movdqa %xmm0, %xmm1
4873; SSE2-NEXT:    psrlw $4, %xmm1
4874; SSE2-NEXT:    paddb %xmm0, %xmm1
4875; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
4876; SSE2-NEXT:    movdqa %xmm1, %xmm0
4877; SSE2-NEXT:    psllw $8, %xmm0
4878; SSE2-NEXT:    paddb %xmm1, %xmm0
4879; SSE2-NEXT:    psrlw $8, %xmm0
4880; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4881; SSE2-NEXT:    retq
4882;
4883; SSE3-LABEL: ugt_12_v8i16:
4884; SSE3:       # %bb.0:
4885; SSE3-NEXT:    movdqa %xmm0, %xmm1
4886; SSE3-NEXT:    psrlw $1, %xmm1
4887; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4888; SSE3-NEXT:    psubb %xmm1, %xmm0
4889; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4890; SSE3-NEXT:    movdqa %xmm0, %xmm2
4891; SSE3-NEXT:    pand %xmm1, %xmm2
4892; SSE3-NEXT:    psrlw $2, %xmm0
4893; SSE3-NEXT:    pand %xmm1, %xmm0
4894; SSE3-NEXT:    paddb %xmm2, %xmm0
4895; SSE3-NEXT:    movdqa %xmm0, %xmm1
4896; SSE3-NEXT:    psrlw $4, %xmm1
4897; SSE3-NEXT:    paddb %xmm0, %xmm1
4898; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
4899; SSE3-NEXT:    movdqa %xmm1, %xmm0
4900; SSE3-NEXT:    psllw $8, %xmm0
4901; SSE3-NEXT:    paddb %xmm1, %xmm0
4902; SSE3-NEXT:    psrlw $8, %xmm0
4903; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4904; SSE3-NEXT:    retq
4905;
4906; SSSE3-LABEL: ugt_12_v8i16:
4907; SSSE3:       # %bb.0:
4908; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4909; SSSE3-NEXT:    movdqa %xmm0, %xmm2
4910; SSSE3-NEXT:    pand %xmm1, %xmm2
4911; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4912; SSSE3-NEXT:    movdqa %xmm3, %xmm4
4913; SSSE3-NEXT:    pshufb %xmm2, %xmm4
4914; SSSE3-NEXT:    psrlw $4, %xmm0
4915; SSSE3-NEXT:    pand %xmm1, %xmm0
4916; SSSE3-NEXT:    pshufb %xmm0, %xmm3
4917; SSSE3-NEXT:    paddb %xmm4, %xmm3
4918; SSSE3-NEXT:    movdqa %xmm3, %xmm0
4919; SSSE3-NEXT:    psllw $8, %xmm0
4920; SSSE3-NEXT:    paddb %xmm3, %xmm0
4921; SSSE3-NEXT:    psrlw $8, %xmm0
4922; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4923; SSSE3-NEXT:    retq
4924;
4925; SSE41-LABEL: ugt_12_v8i16:
4926; SSE41:       # %bb.0:
4927; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4928; SSE41-NEXT:    movdqa %xmm0, %xmm2
4929; SSE41-NEXT:    pand %xmm1, %xmm2
4930; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4931; SSE41-NEXT:    movdqa %xmm3, %xmm4
4932; SSE41-NEXT:    pshufb %xmm2, %xmm4
4933; SSE41-NEXT:    psrlw $4, %xmm0
4934; SSE41-NEXT:    pand %xmm1, %xmm0
4935; SSE41-NEXT:    pshufb %xmm0, %xmm3
4936; SSE41-NEXT:    paddb %xmm4, %xmm3
4937; SSE41-NEXT:    movdqa %xmm3, %xmm0
4938; SSE41-NEXT:    psllw $8, %xmm0
4939; SSE41-NEXT:    paddb %xmm3, %xmm0
4940; SSE41-NEXT:    psrlw $8, %xmm0
4941; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
4942; SSE41-NEXT:    retq
4943;
4944; AVX1-LABEL: ugt_12_v8i16:
4945; AVX1:       # %bb.0:
4946; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4947; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
4948; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4949; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4950; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
4951; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
4952; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4953; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4954; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
4955; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4956; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
4957; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4958; AVX1-NEXT:    retq
4959;
4960; AVX2-LABEL: ugt_12_v8i16:
4961; AVX2:       # %bb.0:
4962; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4963; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
4964; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4965; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
4966; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
4967; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
4968; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
4969; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
4970; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
4971; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
4972; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
4973; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4974; AVX2-NEXT:    retq
4975;
4976; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i16:
4977; AVX512VPOPCNTDQ:       # %bb.0:
4978; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4979; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
4980; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
4981; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4982; AVX512VPOPCNTDQ-NEXT:    vzeroupper
4983; AVX512VPOPCNTDQ-NEXT:    retq
4984;
4985; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i16:
4986; AVX512VPOPCNTDQVL:       # %bb.0:
4987; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4988; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
4989; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
4990; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4991; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
4992; AVX512VPOPCNTDQVL-NEXT:    retq
4993;
4994; BITALG_NOVLX-LABEL: ugt_12_v8i16:
4995; BITALG_NOVLX:       # %bb.0:
4996; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
4997; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
4998; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
4999; BITALG_NOVLX-NEXT:    vzeroupper
5000; BITALG_NOVLX-NEXT:    retq
5001;
5002; BITALG-LABEL: ugt_12_v8i16:
5003; BITALG:       # %bb.0:
5004; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
5005; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
5006; BITALG-NEXT:    vpmovm2w %k0, %xmm0
5007; BITALG-NEXT:    retq
5008  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5009  %3 = icmp ugt <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
5010  %4 = sext <8 x i1> %3 to <8 x i16>
5011  ret <8 x i16> %4
5012}
5013
5014define <8 x i16> @ult_13_v8i16(<8 x i16> %0) {
5015; SSE2-LABEL: ult_13_v8i16:
5016; SSE2:       # %bb.0:
5017; SSE2-NEXT:    movdqa %xmm0, %xmm1
5018; SSE2-NEXT:    psrlw $1, %xmm1
5019; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5020; SSE2-NEXT:    psubb %xmm1, %xmm0
5021; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5022; SSE2-NEXT:    movdqa %xmm0, %xmm2
5023; SSE2-NEXT:    pand %xmm1, %xmm2
5024; SSE2-NEXT:    psrlw $2, %xmm0
5025; SSE2-NEXT:    pand %xmm1, %xmm0
5026; SSE2-NEXT:    paddb %xmm2, %xmm0
5027; SSE2-NEXT:    movdqa %xmm0, %xmm1
5028; SSE2-NEXT:    psrlw $4, %xmm1
5029; SSE2-NEXT:    paddb %xmm0, %xmm1
5030; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5031; SSE2-NEXT:    movdqa %xmm1, %xmm2
5032; SSE2-NEXT:    psllw $8, %xmm2
5033; SSE2-NEXT:    paddb %xmm1, %xmm2
5034; SSE2-NEXT:    psrlw $8, %xmm2
5035; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5036; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
5037; SSE2-NEXT:    retq
5038;
5039; SSE3-LABEL: ult_13_v8i16:
5040; SSE3:       # %bb.0:
5041; SSE3-NEXT:    movdqa %xmm0, %xmm1
5042; SSE3-NEXT:    psrlw $1, %xmm1
5043; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5044; SSE3-NEXT:    psubb %xmm1, %xmm0
5045; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5046; SSE3-NEXT:    movdqa %xmm0, %xmm2
5047; SSE3-NEXT:    pand %xmm1, %xmm2
5048; SSE3-NEXT:    psrlw $2, %xmm0
5049; SSE3-NEXT:    pand %xmm1, %xmm0
5050; SSE3-NEXT:    paddb %xmm2, %xmm0
5051; SSE3-NEXT:    movdqa %xmm0, %xmm1
5052; SSE3-NEXT:    psrlw $4, %xmm1
5053; SSE3-NEXT:    paddb %xmm0, %xmm1
5054; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5055; SSE3-NEXT:    movdqa %xmm1, %xmm2
5056; SSE3-NEXT:    psllw $8, %xmm2
5057; SSE3-NEXT:    paddb %xmm1, %xmm2
5058; SSE3-NEXT:    psrlw $8, %xmm2
5059; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5060; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
5061; SSE3-NEXT:    retq
5062;
5063; SSSE3-LABEL: ult_13_v8i16:
5064; SSSE3:       # %bb.0:
5065; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5066; SSSE3-NEXT:    movdqa %xmm0, %xmm2
5067; SSSE3-NEXT:    pand %xmm1, %xmm2
5068; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5069; SSSE3-NEXT:    movdqa %xmm3, %xmm4
5070; SSSE3-NEXT:    pshufb %xmm2, %xmm4
5071; SSSE3-NEXT:    psrlw $4, %xmm0
5072; SSSE3-NEXT:    pand %xmm1, %xmm0
5073; SSSE3-NEXT:    pshufb %xmm0, %xmm3
5074; SSSE3-NEXT:    paddb %xmm4, %xmm3
5075; SSSE3-NEXT:    movdqa %xmm3, %xmm1
5076; SSSE3-NEXT:    psllw $8, %xmm1
5077; SSSE3-NEXT:    paddb %xmm3, %xmm1
5078; SSSE3-NEXT:    psrlw $8, %xmm1
5079; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5080; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
5081; SSSE3-NEXT:    retq
5082;
5083; SSE41-LABEL: ult_13_v8i16:
5084; SSE41:       # %bb.0:
5085; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5086; SSE41-NEXT:    movdqa %xmm0, %xmm2
5087; SSE41-NEXT:    pand %xmm1, %xmm2
5088; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5089; SSE41-NEXT:    movdqa %xmm3, %xmm4
5090; SSE41-NEXT:    pshufb %xmm2, %xmm4
5091; SSE41-NEXT:    psrlw $4, %xmm0
5092; SSE41-NEXT:    pand %xmm1, %xmm0
5093; SSE41-NEXT:    pshufb %xmm0, %xmm3
5094; SSE41-NEXT:    paddb %xmm4, %xmm3
5095; SSE41-NEXT:    movdqa %xmm3, %xmm1
5096; SSE41-NEXT:    psllw $8, %xmm1
5097; SSE41-NEXT:    paddb %xmm3, %xmm1
5098; SSE41-NEXT:    psrlw $8, %xmm1
5099; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5100; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
5101; SSE41-NEXT:    retq
5102;
5103; AVX1-LABEL: ult_13_v8i16:
5104; AVX1:       # %bb.0:
5105; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5106; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
5107; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5108; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5109; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
5110; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
5111; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5112; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5113; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
5114; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5115; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
5116; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5117; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5118; AVX1-NEXT:    retq
5119;
5120; AVX2-LABEL: ult_13_v8i16:
5121; AVX2:       # %bb.0:
5122; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5123; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
5124; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5125; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5126; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
5127; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
5128; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5129; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5130; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
5131; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5132; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
5133; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5134; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5135; AVX2-NEXT:    retq
5136;
5137; AVX512VPOPCNTDQ-LABEL: ult_13_v8i16:
5138; AVX512VPOPCNTDQ:       # %bb.0:
5139; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5140; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5141; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
5142; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5143; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5144; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5145; AVX512VPOPCNTDQ-NEXT:    retq
5146;
5147; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i16:
5148; AVX512VPOPCNTDQVL:       # %bb.0:
5149; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5150; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
5151; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
5152; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5153; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5154; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
5155; AVX512VPOPCNTDQVL-NEXT:    retq
5156;
5157; BITALG_NOVLX-LABEL: ult_13_v8i16:
5158; BITALG_NOVLX:       # %bb.0:
5159; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5160; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
5161; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5162; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5163; BITALG_NOVLX-NEXT:    vzeroupper
5164; BITALG_NOVLX-NEXT:    retq
5165;
5166; BITALG-LABEL: ult_13_v8i16:
5167; BITALG:       # %bb.0:
5168; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
5169; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
5170; BITALG-NEXT:    vpmovm2w %k0, %xmm0
5171; BITALG-NEXT:    retq
5172  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5173  %3 = icmp ult <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
5174  %4 = sext <8 x i1> %3 to <8 x i16>
5175  ret <8 x i16> %4
5176}
5177
5178define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) {
5179; SSE2-LABEL: ugt_13_v8i16:
5180; SSE2:       # %bb.0:
5181; SSE2-NEXT:    movdqa %xmm0, %xmm1
5182; SSE2-NEXT:    psrlw $1, %xmm1
5183; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5184; SSE2-NEXT:    psubb %xmm1, %xmm0
5185; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5186; SSE2-NEXT:    movdqa %xmm0, %xmm2
5187; SSE2-NEXT:    pand %xmm1, %xmm2
5188; SSE2-NEXT:    psrlw $2, %xmm0
5189; SSE2-NEXT:    pand %xmm1, %xmm0
5190; SSE2-NEXT:    paddb %xmm2, %xmm0
5191; SSE2-NEXT:    movdqa %xmm0, %xmm1
5192; SSE2-NEXT:    psrlw $4, %xmm1
5193; SSE2-NEXT:    paddb %xmm0, %xmm1
5194; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5195; SSE2-NEXT:    movdqa %xmm1, %xmm0
5196; SSE2-NEXT:    psllw $8, %xmm0
5197; SSE2-NEXT:    paddb %xmm1, %xmm0
5198; SSE2-NEXT:    psrlw $8, %xmm0
5199; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5200; SSE2-NEXT:    retq
5201;
5202; SSE3-LABEL: ugt_13_v8i16:
5203; SSE3:       # %bb.0:
5204; SSE3-NEXT:    movdqa %xmm0, %xmm1
5205; SSE3-NEXT:    psrlw $1, %xmm1
5206; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5207; SSE3-NEXT:    psubb %xmm1, %xmm0
5208; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5209; SSE3-NEXT:    movdqa %xmm0, %xmm2
5210; SSE3-NEXT:    pand %xmm1, %xmm2
5211; SSE3-NEXT:    psrlw $2, %xmm0
5212; SSE3-NEXT:    pand %xmm1, %xmm0
5213; SSE3-NEXT:    paddb %xmm2, %xmm0
5214; SSE3-NEXT:    movdqa %xmm0, %xmm1
5215; SSE3-NEXT:    psrlw $4, %xmm1
5216; SSE3-NEXT:    paddb %xmm0, %xmm1
5217; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5218; SSE3-NEXT:    movdqa %xmm1, %xmm0
5219; SSE3-NEXT:    psllw $8, %xmm0
5220; SSE3-NEXT:    paddb %xmm1, %xmm0
5221; SSE3-NEXT:    psrlw $8, %xmm0
5222; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5223; SSE3-NEXT:    retq
5224;
5225; SSSE3-LABEL: ugt_13_v8i16:
5226; SSSE3:       # %bb.0:
5227; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5228; SSSE3-NEXT:    movdqa %xmm0, %xmm2
5229; SSSE3-NEXT:    pand %xmm1, %xmm2
5230; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5231; SSSE3-NEXT:    movdqa %xmm3, %xmm4
5232; SSSE3-NEXT:    pshufb %xmm2, %xmm4
5233; SSSE3-NEXT:    psrlw $4, %xmm0
5234; SSSE3-NEXT:    pand %xmm1, %xmm0
5235; SSSE3-NEXT:    pshufb %xmm0, %xmm3
5236; SSSE3-NEXT:    paddb %xmm4, %xmm3
5237; SSSE3-NEXT:    movdqa %xmm3, %xmm0
5238; SSSE3-NEXT:    psllw $8, %xmm0
5239; SSSE3-NEXT:    paddb %xmm3, %xmm0
5240; SSSE3-NEXT:    psrlw $8, %xmm0
5241; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5242; SSSE3-NEXT:    retq
5243;
5244; SSE41-LABEL: ugt_13_v8i16:
5245; SSE41:       # %bb.0:
5246; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5247; SSE41-NEXT:    movdqa %xmm0, %xmm2
5248; SSE41-NEXT:    pand %xmm1, %xmm2
5249; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5250; SSE41-NEXT:    movdqa %xmm3, %xmm4
5251; SSE41-NEXT:    pshufb %xmm2, %xmm4
5252; SSE41-NEXT:    psrlw $4, %xmm0
5253; SSE41-NEXT:    pand %xmm1, %xmm0
5254; SSE41-NEXT:    pshufb %xmm0, %xmm3
5255; SSE41-NEXT:    paddb %xmm4, %xmm3
5256; SSE41-NEXT:    movdqa %xmm3, %xmm0
5257; SSE41-NEXT:    psllw $8, %xmm0
5258; SSE41-NEXT:    paddb %xmm3, %xmm0
5259; SSE41-NEXT:    psrlw $8, %xmm0
5260; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5261; SSE41-NEXT:    retq
5262;
5263; AVX1-LABEL: ugt_13_v8i16:
5264; AVX1:       # %bb.0:
5265; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5266; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
5267; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5268; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5269; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
5270; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
5271; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5272; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5273; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
5274; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5275; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
5276; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5277; AVX1-NEXT:    retq
5278;
5279; AVX2-LABEL: ugt_13_v8i16:
5280; AVX2:       # %bb.0:
5281; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5282; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
5283; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5284; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5285; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
5286; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
5287; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5288; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5289; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
5290; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5291; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
5292; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5293; AVX2-NEXT:    retq
5294;
5295; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i16:
5296; AVX512VPOPCNTDQ:       # %bb.0:
5297; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5298; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5299; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
5300; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5301; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5302; AVX512VPOPCNTDQ-NEXT:    retq
5303;
5304; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i16:
5305; AVX512VPOPCNTDQVL:       # %bb.0:
5306; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5307; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
5308; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
5309; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5310; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
5311; AVX512VPOPCNTDQVL-NEXT:    retq
5312;
5313; BITALG_NOVLX-LABEL: ugt_13_v8i16:
5314; BITALG_NOVLX:       # %bb.0:
5315; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5316; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
5317; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5318; BITALG_NOVLX-NEXT:    vzeroupper
5319; BITALG_NOVLX-NEXT:    retq
5320;
5321; BITALG-LABEL: ugt_13_v8i16:
5322; BITALG:       # %bb.0:
5323; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
5324; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
5325; BITALG-NEXT:    vpmovm2w %k0, %xmm0
5326; BITALG-NEXT:    retq
5327  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5328  %3 = icmp ugt <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
5329  %4 = sext <8 x i1> %3 to <8 x i16>
5330  ret <8 x i16> %4
5331}
5332
5333define <8 x i16> @ult_14_v8i16(<8 x i16> %0) {
5334; SSE2-LABEL: ult_14_v8i16:
5335; SSE2:       # %bb.0:
5336; SSE2-NEXT:    movdqa %xmm0, %xmm1
5337; SSE2-NEXT:    psrlw $1, %xmm1
5338; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5339; SSE2-NEXT:    psubb %xmm1, %xmm0
5340; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5341; SSE2-NEXT:    movdqa %xmm0, %xmm2
5342; SSE2-NEXT:    pand %xmm1, %xmm2
5343; SSE2-NEXT:    psrlw $2, %xmm0
5344; SSE2-NEXT:    pand %xmm1, %xmm0
5345; SSE2-NEXT:    paddb %xmm2, %xmm0
5346; SSE2-NEXT:    movdqa %xmm0, %xmm1
5347; SSE2-NEXT:    psrlw $4, %xmm1
5348; SSE2-NEXT:    paddb %xmm0, %xmm1
5349; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5350; SSE2-NEXT:    movdqa %xmm1, %xmm2
5351; SSE2-NEXT:    psllw $8, %xmm2
5352; SSE2-NEXT:    paddb %xmm1, %xmm2
5353; SSE2-NEXT:    psrlw $8, %xmm2
5354; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5355; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
5356; SSE2-NEXT:    retq
5357;
5358; SSE3-LABEL: ult_14_v8i16:
5359; SSE3:       # %bb.0:
5360; SSE3-NEXT:    movdqa %xmm0, %xmm1
5361; SSE3-NEXT:    psrlw $1, %xmm1
5362; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5363; SSE3-NEXT:    psubb %xmm1, %xmm0
5364; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5365; SSE3-NEXT:    movdqa %xmm0, %xmm2
5366; SSE3-NEXT:    pand %xmm1, %xmm2
5367; SSE3-NEXT:    psrlw $2, %xmm0
5368; SSE3-NEXT:    pand %xmm1, %xmm0
5369; SSE3-NEXT:    paddb %xmm2, %xmm0
5370; SSE3-NEXT:    movdqa %xmm0, %xmm1
5371; SSE3-NEXT:    psrlw $4, %xmm1
5372; SSE3-NEXT:    paddb %xmm0, %xmm1
5373; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5374; SSE3-NEXT:    movdqa %xmm1, %xmm2
5375; SSE3-NEXT:    psllw $8, %xmm2
5376; SSE3-NEXT:    paddb %xmm1, %xmm2
5377; SSE3-NEXT:    psrlw $8, %xmm2
5378; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5379; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
5380; SSE3-NEXT:    retq
5381;
5382; SSSE3-LABEL: ult_14_v8i16:
5383; SSSE3:       # %bb.0:
5384; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5385; SSSE3-NEXT:    movdqa %xmm0, %xmm2
5386; SSSE3-NEXT:    pand %xmm1, %xmm2
5387; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5388; SSSE3-NEXT:    movdqa %xmm3, %xmm4
5389; SSSE3-NEXT:    pshufb %xmm2, %xmm4
5390; SSSE3-NEXT:    psrlw $4, %xmm0
5391; SSSE3-NEXT:    pand %xmm1, %xmm0
5392; SSSE3-NEXT:    pshufb %xmm0, %xmm3
5393; SSSE3-NEXT:    paddb %xmm4, %xmm3
5394; SSSE3-NEXT:    movdqa %xmm3, %xmm1
5395; SSSE3-NEXT:    psllw $8, %xmm1
5396; SSSE3-NEXT:    paddb %xmm3, %xmm1
5397; SSSE3-NEXT:    psrlw $8, %xmm1
5398; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5399; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
5400; SSSE3-NEXT:    retq
5401;
5402; SSE41-LABEL: ult_14_v8i16:
5403; SSE41:       # %bb.0:
5404; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5405; SSE41-NEXT:    movdqa %xmm0, %xmm2
5406; SSE41-NEXT:    pand %xmm1, %xmm2
5407; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5408; SSE41-NEXT:    movdqa %xmm3, %xmm4
5409; SSE41-NEXT:    pshufb %xmm2, %xmm4
5410; SSE41-NEXT:    psrlw $4, %xmm0
5411; SSE41-NEXT:    pand %xmm1, %xmm0
5412; SSE41-NEXT:    pshufb %xmm0, %xmm3
5413; SSE41-NEXT:    paddb %xmm4, %xmm3
5414; SSE41-NEXT:    movdqa %xmm3, %xmm1
5415; SSE41-NEXT:    psllw $8, %xmm1
5416; SSE41-NEXT:    paddb %xmm3, %xmm1
5417; SSE41-NEXT:    psrlw $8, %xmm1
5418; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5419; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
5420; SSE41-NEXT:    retq
5421;
5422; AVX1-LABEL: ult_14_v8i16:
5423; AVX1:       # %bb.0:
5424; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5425; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
5426; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5427; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5428; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
5429; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
5430; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5431; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5432; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
5433; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5434; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
5435; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5436; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5437; AVX1-NEXT:    retq
5438;
5439; AVX2-LABEL: ult_14_v8i16:
5440; AVX2:       # %bb.0:
5441; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5442; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
5443; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5444; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5445; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
5446; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
5447; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5448; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5449; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
5450; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5451; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
5452; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5453; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5454; AVX2-NEXT:    retq
5455;
5456; AVX512VPOPCNTDQ-LABEL: ult_14_v8i16:
5457; AVX512VPOPCNTDQ:       # %bb.0:
5458; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5459; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5460; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
5461; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5462; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5463; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5464; AVX512VPOPCNTDQ-NEXT:    retq
5465;
5466; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i16:
5467; AVX512VPOPCNTDQVL:       # %bb.0:
5468; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5469; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
5470; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
5471; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5472; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5473; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
5474; AVX512VPOPCNTDQVL-NEXT:    retq
5475;
5476; BITALG_NOVLX-LABEL: ult_14_v8i16:
5477; BITALG_NOVLX:       # %bb.0:
5478; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5479; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
5480; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5481; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5482; BITALG_NOVLX-NEXT:    vzeroupper
5483; BITALG_NOVLX-NEXT:    retq
5484;
5485; BITALG-LABEL: ult_14_v8i16:
5486; BITALG:       # %bb.0:
5487; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
5488; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
5489; BITALG-NEXT:    vpmovm2w %k0, %xmm0
5490; BITALG-NEXT:    retq
5491  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5492  %3 = icmp ult <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
5493  %4 = sext <8 x i1> %3 to <8 x i16>
5494  ret <8 x i16> %4
5495}
5496
5497define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) {
5498; SSE2-LABEL: ugt_14_v8i16:
5499; SSE2:       # %bb.0:
5500; SSE2-NEXT:    movdqa %xmm0, %xmm1
5501; SSE2-NEXT:    psrlw $1, %xmm1
5502; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5503; SSE2-NEXT:    psubb %xmm1, %xmm0
5504; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5505; SSE2-NEXT:    movdqa %xmm0, %xmm2
5506; SSE2-NEXT:    pand %xmm1, %xmm2
5507; SSE2-NEXT:    psrlw $2, %xmm0
5508; SSE2-NEXT:    pand %xmm1, %xmm0
5509; SSE2-NEXT:    paddb %xmm2, %xmm0
5510; SSE2-NEXT:    movdqa %xmm0, %xmm1
5511; SSE2-NEXT:    psrlw $4, %xmm1
5512; SSE2-NEXT:    paddb %xmm0, %xmm1
5513; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5514; SSE2-NEXT:    movdqa %xmm1, %xmm0
5515; SSE2-NEXT:    psllw $8, %xmm0
5516; SSE2-NEXT:    paddb %xmm1, %xmm0
5517; SSE2-NEXT:    psrlw $8, %xmm0
5518; SSE2-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5519; SSE2-NEXT:    retq
5520;
5521; SSE3-LABEL: ugt_14_v8i16:
5522; SSE3:       # %bb.0:
5523; SSE3-NEXT:    movdqa %xmm0, %xmm1
5524; SSE3-NEXT:    psrlw $1, %xmm1
5525; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5526; SSE3-NEXT:    psubb %xmm1, %xmm0
5527; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5528; SSE3-NEXT:    movdqa %xmm0, %xmm2
5529; SSE3-NEXT:    pand %xmm1, %xmm2
5530; SSE3-NEXT:    psrlw $2, %xmm0
5531; SSE3-NEXT:    pand %xmm1, %xmm0
5532; SSE3-NEXT:    paddb %xmm2, %xmm0
5533; SSE3-NEXT:    movdqa %xmm0, %xmm1
5534; SSE3-NEXT:    psrlw $4, %xmm1
5535; SSE3-NEXT:    paddb %xmm0, %xmm1
5536; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5537; SSE3-NEXT:    movdqa %xmm1, %xmm0
5538; SSE3-NEXT:    psllw $8, %xmm0
5539; SSE3-NEXT:    paddb %xmm1, %xmm0
5540; SSE3-NEXT:    psrlw $8, %xmm0
5541; SSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5542; SSE3-NEXT:    retq
5543;
5544; SSSE3-LABEL: ugt_14_v8i16:
5545; SSSE3:       # %bb.0:
5546; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5547; SSSE3-NEXT:    movdqa %xmm0, %xmm2
5548; SSSE3-NEXT:    pand %xmm1, %xmm2
5549; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5550; SSSE3-NEXT:    movdqa %xmm3, %xmm4
5551; SSSE3-NEXT:    pshufb %xmm2, %xmm4
5552; SSSE3-NEXT:    psrlw $4, %xmm0
5553; SSSE3-NEXT:    pand %xmm1, %xmm0
5554; SSSE3-NEXT:    pshufb %xmm0, %xmm3
5555; SSSE3-NEXT:    paddb %xmm4, %xmm3
5556; SSSE3-NEXT:    movdqa %xmm3, %xmm0
5557; SSSE3-NEXT:    psllw $8, %xmm0
5558; SSSE3-NEXT:    paddb %xmm3, %xmm0
5559; SSSE3-NEXT:    psrlw $8, %xmm0
5560; SSSE3-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5561; SSSE3-NEXT:    retq
5562;
5563; SSE41-LABEL: ugt_14_v8i16:
5564; SSE41:       # %bb.0:
5565; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5566; SSE41-NEXT:    movdqa %xmm0, %xmm2
5567; SSE41-NEXT:    pand %xmm1, %xmm2
5568; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5569; SSE41-NEXT:    movdqa %xmm3, %xmm4
5570; SSE41-NEXT:    pshufb %xmm2, %xmm4
5571; SSE41-NEXT:    psrlw $4, %xmm0
5572; SSE41-NEXT:    pand %xmm1, %xmm0
5573; SSE41-NEXT:    pshufb %xmm0, %xmm3
5574; SSE41-NEXT:    paddb %xmm4, %xmm3
5575; SSE41-NEXT:    movdqa %xmm3, %xmm0
5576; SSE41-NEXT:    psllw $8, %xmm0
5577; SSE41-NEXT:    paddb %xmm3, %xmm0
5578; SSE41-NEXT:    psrlw $8, %xmm0
5579; SSE41-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
5580; SSE41-NEXT:    retq
5581;
5582; AVX1-LABEL: ugt_14_v8i16:
5583; AVX1:       # %bb.0:
5584; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5585; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
5586; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5587; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5588; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
5589; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
5590; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5591; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5592; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
5593; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5594; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
5595; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5596; AVX1-NEXT:    retq
5597;
5598; AVX2-LABEL: ugt_14_v8i16:
5599; AVX2:       # %bb.0:
5600; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5601; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
5602; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5603; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5604; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
5605; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
5606; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5607; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5608; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
5609; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5610; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
5611; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5612; AVX2-NEXT:    retq
5613;
5614; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i16:
5615; AVX512VPOPCNTDQ:       # %bb.0:
5616; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5617; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5618; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
5619; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5620; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5621; AVX512VPOPCNTDQ-NEXT:    retq
5622;
5623; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i16:
5624; AVX512VPOPCNTDQVL:       # %bb.0:
5625; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5626; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
5627; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
5628; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5629; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
5630; AVX512VPOPCNTDQVL-NEXT:    retq
5631;
5632; BITALG_NOVLX-LABEL: ugt_14_v8i16:
5633; BITALG_NOVLX:       # %bb.0:
5634; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5635; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
5636; BITALG_NOVLX-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
5637; BITALG_NOVLX-NEXT:    vzeroupper
5638; BITALG_NOVLX-NEXT:    retq
5639;
5640; BITALG-LABEL: ugt_14_v8i16:
5641; BITALG:       # %bb.0:
5642; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
5643; BITALG-NEXT:    vpcmpnleuw {{.*}}(%rip), %xmm0, %k0
5644; BITALG-NEXT:    vpmovm2w %k0, %xmm0
5645; BITALG-NEXT:    retq
5646  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5647  %3 = icmp ugt <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
5648  %4 = sext <8 x i1> %3 to <8 x i16>
5649  ret <8 x i16> %4
5650}
5651
5652define <8 x i16> @ult_15_v8i16(<8 x i16> %0) {
5653; SSE2-LABEL: ult_15_v8i16:
5654; SSE2:       # %bb.0:
5655; SSE2-NEXT:    movdqa %xmm0, %xmm1
5656; SSE2-NEXT:    psrlw $1, %xmm1
5657; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5658; SSE2-NEXT:    psubb %xmm1, %xmm0
5659; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5660; SSE2-NEXT:    movdqa %xmm0, %xmm2
5661; SSE2-NEXT:    pand %xmm1, %xmm2
5662; SSE2-NEXT:    psrlw $2, %xmm0
5663; SSE2-NEXT:    pand %xmm1, %xmm0
5664; SSE2-NEXT:    paddb %xmm2, %xmm0
5665; SSE2-NEXT:    movdqa %xmm0, %xmm1
5666; SSE2-NEXT:    psrlw $4, %xmm1
5667; SSE2-NEXT:    paddb %xmm0, %xmm1
5668; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5669; SSE2-NEXT:    movdqa %xmm1, %xmm2
5670; SSE2-NEXT:    psllw $8, %xmm2
5671; SSE2-NEXT:    paddb %xmm1, %xmm2
5672; SSE2-NEXT:    psrlw $8, %xmm2
5673; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5674; SSE2-NEXT:    pcmpgtw %xmm2, %xmm0
5675; SSE2-NEXT:    retq
5676;
5677; SSE3-LABEL: ult_15_v8i16:
5678; SSE3:       # %bb.0:
5679; SSE3-NEXT:    movdqa %xmm0, %xmm1
5680; SSE3-NEXT:    psrlw $1, %xmm1
5681; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5682; SSE3-NEXT:    psubb %xmm1, %xmm0
5683; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5684; SSE3-NEXT:    movdqa %xmm0, %xmm2
5685; SSE3-NEXT:    pand %xmm1, %xmm2
5686; SSE3-NEXT:    psrlw $2, %xmm0
5687; SSE3-NEXT:    pand %xmm1, %xmm0
5688; SSE3-NEXT:    paddb %xmm2, %xmm0
5689; SSE3-NEXT:    movdqa %xmm0, %xmm1
5690; SSE3-NEXT:    psrlw $4, %xmm1
5691; SSE3-NEXT:    paddb %xmm0, %xmm1
5692; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5693; SSE3-NEXT:    movdqa %xmm1, %xmm2
5694; SSE3-NEXT:    psllw $8, %xmm2
5695; SSE3-NEXT:    paddb %xmm1, %xmm2
5696; SSE3-NEXT:    psrlw $8, %xmm2
5697; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5698; SSE3-NEXT:    pcmpgtw %xmm2, %xmm0
5699; SSE3-NEXT:    retq
5700;
5701; SSSE3-LABEL: ult_15_v8i16:
5702; SSSE3:       # %bb.0:
5703; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5704; SSSE3-NEXT:    movdqa %xmm0, %xmm2
5705; SSSE3-NEXT:    pand %xmm1, %xmm2
5706; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5707; SSSE3-NEXT:    movdqa %xmm3, %xmm4
5708; SSSE3-NEXT:    pshufb %xmm2, %xmm4
5709; SSSE3-NEXT:    psrlw $4, %xmm0
5710; SSSE3-NEXT:    pand %xmm1, %xmm0
5711; SSSE3-NEXT:    pshufb %xmm0, %xmm3
5712; SSSE3-NEXT:    paddb %xmm4, %xmm3
5713; SSSE3-NEXT:    movdqa %xmm3, %xmm1
5714; SSSE3-NEXT:    psllw $8, %xmm1
5715; SSSE3-NEXT:    paddb %xmm3, %xmm1
5716; SSSE3-NEXT:    psrlw $8, %xmm1
5717; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5718; SSSE3-NEXT:    pcmpgtw %xmm1, %xmm0
5719; SSSE3-NEXT:    retq
5720;
5721; SSE41-LABEL: ult_15_v8i16:
5722; SSE41:       # %bb.0:
5723; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5724; SSE41-NEXT:    movdqa %xmm0, %xmm2
5725; SSE41-NEXT:    pand %xmm1, %xmm2
5726; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5727; SSE41-NEXT:    movdqa %xmm3, %xmm4
5728; SSE41-NEXT:    pshufb %xmm2, %xmm4
5729; SSE41-NEXT:    psrlw $4, %xmm0
5730; SSE41-NEXT:    pand %xmm1, %xmm0
5731; SSE41-NEXT:    pshufb %xmm0, %xmm3
5732; SSE41-NEXT:    paddb %xmm4, %xmm3
5733; SSE41-NEXT:    movdqa %xmm3, %xmm1
5734; SSE41-NEXT:    psllw $8, %xmm1
5735; SSE41-NEXT:    paddb %xmm3, %xmm1
5736; SSE41-NEXT:    psrlw $8, %xmm1
5737; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5738; SSE41-NEXT:    pcmpgtw %xmm1, %xmm0
5739; SSE41-NEXT:    retq
5740;
5741; AVX1-LABEL: ult_15_v8i16:
5742; AVX1:       # %bb.0:
5743; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5744; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
5745; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5746; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5747; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
5748; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
5749; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5750; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5751; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
5752; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5753; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
5754; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5755; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5756; AVX1-NEXT:    retq
5757;
5758; AVX2-LABEL: ult_15_v8i16:
5759; AVX2:       # %bb.0:
5760; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5761; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
5762; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5763; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
5764; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
5765; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
5766; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
5767; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
5768; AVX2-NEXT:    vpsllw $8, %xmm0, %xmm1
5769; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
5770; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
5771; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5772; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5773; AVX2-NEXT:    retq
5774;
5775; AVX512VPOPCNTDQ-LABEL: ult_15_v8i16:
5776; AVX512VPOPCNTDQ:       # %bb.0:
5777; AVX512VPOPCNTDQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5778; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5779; AVX512VPOPCNTDQ-NEXT:    vpmovdw %zmm0, %ymm0
5780; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5781; AVX512VPOPCNTDQ-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5782; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5783; AVX512VPOPCNTDQ-NEXT:    retq
5784;
5785; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i16:
5786; AVX512VPOPCNTDQVL:       # %bb.0:
5787; AVX512VPOPCNTDQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5788; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %ymm0, %ymm0
5789; AVX512VPOPCNTDQVL-NEXT:    vpmovdw %ymm0, %xmm0
5790; AVX512VPOPCNTDQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5791; AVX512VPOPCNTDQVL-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5792; AVX512VPOPCNTDQVL-NEXT:    vzeroupper
5793; AVX512VPOPCNTDQVL-NEXT:    retq
5794;
5795; BITALG_NOVLX-LABEL: ult_15_v8i16:
5796; BITALG_NOVLX:       # %bb.0:
5797; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5798; BITALG_NOVLX-NEXT:    vpopcntw %zmm0, %zmm0
5799; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5800; BITALG_NOVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
5801; BITALG_NOVLX-NEXT:    vzeroupper
5802; BITALG_NOVLX-NEXT:    retq
5803;
5804; BITALG-LABEL: ult_15_v8i16:
5805; BITALG:       # %bb.0:
5806; BITALG-NEXT:    vpopcntw %xmm0, %xmm0
5807; BITALG-NEXT:    vpcmpltuw {{.*}}(%rip), %xmm0, %k0
5808; BITALG-NEXT:    vpmovm2w %k0, %xmm0
5809; BITALG-NEXT:    retq
5810  %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5811  %3 = icmp ult <8 x i16> %2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
5812  %4 = sext <8 x i1> %3 to <8 x i16>
5813  ret <8 x i16> %4
5814}
5815
5816define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
5817; SSE-LABEL: ugt_1_v4i32:
5818; SSE:       # %bb.0:
5819; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
5820; SSE-NEXT:    movdqa %xmm0, %xmm1
5821; SSE-NEXT:    paddd %xmm2, %xmm1
5822; SSE-NEXT:    pand %xmm0, %xmm1
5823; SSE-NEXT:    pxor %xmm0, %xmm0
5824; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
5825; SSE-NEXT:    pxor %xmm2, %xmm1
5826; SSE-NEXT:    movdqa %xmm1, %xmm0
5827; SSE-NEXT:    retq
5828;
5829; AVX1-LABEL: ugt_1_v4i32:
5830; AVX1:       # %bb.0:
5831; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5832; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
5833; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
5834; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
5835; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
5836; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
5837; AVX1-NEXT:    retq
5838;
5839; AVX2-LABEL: ugt_1_v4i32:
5840; AVX2:       # %bb.0:
5841; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5842; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
5843; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
5844; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
5845; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
5846; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
5847; AVX2-NEXT:    retq
5848;
5849; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i32:
5850; AVX512VPOPCNTDQ:       # %bb.0:
5851; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5852; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5853; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
5854; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
5855; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5856; AVX512VPOPCNTDQ-NEXT:    retq
5857;
5858; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32:
5859; AVX512VPOPCNTDQVL:       # %bb.0:
5860; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
5861; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
5862; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
5863; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
5864; AVX512VPOPCNTDQVL-NEXT:    retq
5865;
5866; BITALG_NOVLX-LABEL: ugt_1_v4i32:
5867; BITALG_NOVLX:       # %bb.0:
5868; BITALG_NOVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5869; BITALG_NOVLX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
5870; BITALG_NOVLX-NEXT:    vpand %xmm1, %xmm0, %xmm0
5871; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5872; BITALG_NOVLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
5873; BITALG_NOVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
5874; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5875; BITALG_NOVLX-NEXT:    vzeroupper
5876; BITALG_NOVLX-NEXT:    retq
5877;
5878; BITALG-LABEL: ugt_1_v4i32:
5879; BITALG:       # %bb.0:
5880; BITALG-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5881; BITALG-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
5882; BITALG-NEXT:    vpand %xmm1, %xmm0, %xmm0
5883; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5884; BITALG-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
5885; BITALG-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
5886; BITALG-NEXT:    retq
5887  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
5888  %3 = icmp ugt <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
5889  %4 = sext <4 x i1> %3 to <4 x i32>
5890  ret <4 x i32> %4
5891}
5892
5893define <4 x i32> @ult_2_v4i32(<4 x i32> %0) {
5894; SSE-LABEL: ult_2_v4i32:
5895; SSE:       # %bb.0:
5896; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
5897; SSE-NEXT:    paddd %xmm0, %xmm1
5898; SSE-NEXT:    pand %xmm1, %xmm0
5899; SSE-NEXT:    pxor %xmm1, %xmm1
5900; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
5901; SSE-NEXT:    retq
5902;
5903; AVX1-LABEL: ult_2_v4i32:
5904; AVX1:       # %bb.0:
5905; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5906; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
5907; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
5908; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5909; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
5910; AVX1-NEXT:    retq
5911;
5912; AVX2-LABEL: ult_2_v4i32:
5913; AVX2:       # %bb.0:
5914; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5915; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
5916; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
5917; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5918; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
5919; AVX2-NEXT:    retq
5920;
5921; AVX512VPOPCNTDQ-LABEL: ult_2_v4i32:
5922; AVX512VPOPCNTDQ:       # %bb.0:
5923; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5924; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
5925; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
5926; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
5927; AVX512VPOPCNTDQ-NEXT:    vzeroupper
5928; AVX512VPOPCNTDQ-NEXT:    retq
5929;
5930; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32:
5931; AVX512VPOPCNTDQVL:       # %bb.0:
5932; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
5933; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
5934; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
5935; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
5936; AVX512VPOPCNTDQVL-NEXT:    retq
5937;
5938; BITALG_NOVLX-LABEL: ult_2_v4i32:
5939; BITALG_NOVLX:       # %bb.0:
5940; BITALG_NOVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5941; BITALG_NOVLX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
5942; BITALG_NOVLX-NEXT:    vpand %xmm1, %xmm0, %xmm0
5943; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5944; BITALG_NOVLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
5945; BITALG_NOVLX-NEXT:    retq
5946;
5947; BITALG-LABEL: ult_2_v4i32:
5948; BITALG:       # %bb.0:
5949; BITALG-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
5950; BITALG-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
5951; BITALG-NEXT:    vpand %xmm1, %xmm0, %xmm0
5952; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
5953; BITALG-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
5954; BITALG-NEXT:    retq
5955  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
5956  %3 = icmp ult <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
5957  %4 = sext <4 x i1> %3 to <4 x i32>
5958  ret <4 x i32> %4
5959}
5960
5961define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) {
5962; SSE2-LABEL: ugt_2_v4i32:
5963; SSE2:       # %bb.0:
5964; SSE2-NEXT:    movdqa %xmm0, %xmm1
5965; SSE2-NEXT:    psrlw $1, %xmm1
5966; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5967; SSE2-NEXT:    psubb %xmm1, %xmm0
5968; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5969; SSE2-NEXT:    movdqa %xmm0, %xmm2
5970; SSE2-NEXT:    pand %xmm1, %xmm2
5971; SSE2-NEXT:    psrlw $2, %xmm0
5972; SSE2-NEXT:    pand %xmm1, %xmm0
5973; SSE2-NEXT:    paddb %xmm2, %xmm0
5974; SSE2-NEXT:    movdqa %xmm0, %xmm1
5975; SSE2-NEXT:    psrlw $4, %xmm1
5976; SSE2-NEXT:    paddb %xmm0, %xmm1
5977; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
5978; SSE2-NEXT:    pxor %xmm0, %xmm0
5979; SSE2-NEXT:    movdqa %xmm1, %xmm2
5980; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
5981; SSE2-NEXT:    psadbw %xmm0, %xmm2
5982; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5983; SSE2-NEXT:    psadbw %xmm0, %xmm1
5984; SSE2-NEXT:    packuswb %xmm2, %xmm1
5985; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
5986; SSE2-NEXT:    movdqa %xmm1, %xmm0
5987; SSE2-NEXT:    retq
5988;
5989; SSE3-LABEL: ugt_2_v4i32:
5990; SSE3:       # %bb.0:
5991; SSE3-NEXT:    movdqa %xmm0, %xmm1
5992; SSE3-NEXT:    psrlw $1, %xmm1
5993; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
5994; SSE3-NEXT:    psubb %xmm1, %xmm0
5995; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5996; SSE3-NEXT:    movdqa %xmm0, %xmm2
5997; SSE3-NEXT:    pand %xmm1, %xmm2
5998; SSE3-NEXT:    psrlw $2, %xmm0
5999; SSE3-NEXT:    pand %xmm1, %xmm0
6000; SSE3-NEXT:    paddb %xmm2, %xmm0
6001; SSE3-NEXT:    movdqa %xmm0, %xmm1
6002; SSE3-NEXT:    psrlw $4, %xmm1
6003; SSE3-NEXT:    paddb %xmm0, %xmm1
6004; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6005; SSE3-NEXT:    pxor %xmm0, %xmm0
6006; SSE3-NEXT:    movdqa %xmm1, %xmm2
6007; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6008; SSE3-NEXT:    psadbw %xmm0, %xmm2
6009; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6010; SSE3-NEXT:    psadbw %xmm0, %xmm1
6011; SSE3-NEXT:    packuswb %xmm2, %xmm1
6012; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6013; SSE3-NEXT:    movdqa %xmm1, %xmm0
6014; SSE3-NEXT:    retq
6015;
6016; SSSE3-LABEL: ugt_2_v4i32:
6017; SSSE3:       # %bb.0:
6018; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6019; SSSE3-NEXT:    movdqa %xmm0, %xmm3
6020; SSSE3-NEXT:    pand %xmm2, %xmm3
6021; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6022; SSSE3-NEXT:    movdqa %xmm1, %xmm4
6023; SSSE3-NEXT:    pshufb %xmm3, %xmm4
6024; SSSE3-NEXT:    psrlw $4, %xmm0
6025; SSSE3-NEXT:    pand %xmm2, %xmm0
6026; SSSE3-NEXT:    pshufb %xmm0, %xmm1
6027; SSSE3-NEXT:    paddb %xmm4, %xmm1
6028; SSSE3-NEXT:    pxor %xmm0, %xmm0
6029; SSSE3-NEXT:    movdqa %xmm1, %xmm2
6030; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6031; SSSE3-NEXT:    psadbw %xmm0, %xmm2
6032; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6033; SSSE3-NEXT:    psadbw %xmm0, %xmm1
6034; SSSE3-NEXT:    packuswb %xmm2, %xmm1
6035; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6036; SSSE3-NEXT:    movdqa %xmm1, %xmm0
6037; SSSE3-NEXT:    retq
6038;
6039; SSE41-LABEL: ugt_2_v4i32:
6040; SSE41:       # %bb.0:
6041; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6042; SSE41-NEXT:    movdqa %xmm0, %xmm2
6043; SSE41-NEXT:    pand %xmm1, %xmm2
6044; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6045; SSE41-NEXT:    movdqa %xmm3, %xmm4
6046; SSE41-NEXT:    pshufb %xmm2, %xmm4
6047; SSE41-NEXT:    psrlw $4, %xmm0
6048; SSE41-NEXT:    pand %xmm1, %xmm0
6049; SSE41-NEXT:    pshufb %xmm0, %xmm3
6050; SSE41-NEXT:    paddb %xmm4, %xmm3
6051; SSE41-NEXT:    pxor %xmm1, %xmm1
6052; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6053; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6054; SSE41-NEXT:    psadbw %xmm1, %xmm3
6055; SSE41-NEXT:    psadbw %xmm1, %xmm0
6056; SSE41-NEXT:    packuswb %xmm3, %xmm0
6057; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
6058; SSE41-NEXT:    retq
6059;
6060; AVX1-LABEL: ugt_2_v4i32:
6061; AVX1:       # %bb.0:
6062; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6063; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
6064; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6065; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6066; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
6067; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
6068; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6069; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6070; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6071; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6072; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6073; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6074; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6075; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6076; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
6077; AVX1-NEXT:    retq
6078;
6079; AVX2-LABEL: ugt_2_v4i32:
6080; AVX2:       # %bb.0:
6081; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6082; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
6083; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6084; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6085; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
6086; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
6087; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6088; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6089; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6090; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6091; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6092; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6093; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6094; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6095; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6096; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6097; AVX2-NEXT:    retq
6098;
6099; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i32:
6100; AVX512VPOPCNTDQ:       # %bb.0:
6101; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6102; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
6103; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6104; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6105; AVX512VPOPCNTDQ-NEXT:    vzeroupper
6106; AVX512VPOPCNTDQ-NEXT:    retq
6107;
6108; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32:
6109; AVX512VPOPCNTDQVL:       # %bb.0:
6110; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
6111; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
6112; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6113; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6114; AVX512VPOPCNTDQVL-NEXT:    retq
6115;
6116; BITALG_NOVLX-LABEL: ugt_2_v4i32:
6117; BITALG_NOVLX:       # %bb.0:
6118; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6119; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
6120; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6121; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6122; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6123; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6124; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6125; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6126; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6127; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6128; BITALG_NOVLX-NEXT:    vzeroupper
6129; BITALG_NOVLX-NEXT:    retq
6130;
6131; BITALG-LABEL: ugt_2_v4i32:
6132; BITALG:       # %bb.0:
6133; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
6134; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6135; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6136; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6137; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6138; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6139; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6140; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
6141; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6142; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6143; BITALG-NEXT:    retq
6144  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6145  %3 = icmp ugt <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
6146  %4 = sext <4 x i1> %3 to <4 x i32>
6147  ret <4 x i32> %4
6148}
6149
6150define <4 x i32> @ult_3_v4i32(<4 x i32> %0) {
6151; SSE2-LABEL: ult_3_v4i32:
6152; SSE2:       # %bb.0:
6153; SSE2-NEXT:    movdqa %xmm0, %xmm1
6154; SSE2-NEXT:    psrlw $1, %xmm1
6155; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6156; SSE2-NEXT:    psubb %xmm1, %xmm0
6157; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6158; SSE2-NEXT:    movdqa %xmm0, %xmm2
6159; SSE2-NEXT:    pand %xmm1, %xmm2
6160; SSE2-NEXT:    psrlw $2, %xmm0
6161; SSE2-NEXT:    pand %xmm1, %xmm0
6162; SSE2-NEXT:    paddb %xmm2, %xmm0
6163; SSE2-NEXT:    movdqa %xmm0, %xmm1
6164; SSE2-NEXT:    psrlw $4, %xmm1
6165; SSE2-NEXT:    paddb %xmm0, %xmm1
6166; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6167; SSE2-NEXT:    pxor %xmm0, %xmm0
6168; SSE2-NEXT:    movdqa %xmm1, %xmm2
6169; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6170; SSE2-NEXT:    psadbw %xmm0, %xmm2
6171; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6172; SSE2-NEXT:    psadbw %xmm0, %xmm1
6173; SSE2-NEXT:    packuswb %xmm2, %xmm1
6174; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3]
6175; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
6176; SSE2-NEXT:    retq
6177;
6178; SSE3-LABEL: ult_3_v4i32:
6179; SSE3:       # %bb.0:
6180; SSE3-NEXT:    movdqa %xmm0, %xmm1
6181; SSE3-NEXT:    psrlw $1, %xmm1
6182; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6183; SSE3-NEXT:    psubb %xmm1, %xmm0
6184; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6185; SSE3-NEXT:    movdqa %xmm0, %xmm2
6186; SSE3-NEXT:    pand %xmm1, %xmm2
6187; SSE3-NEXT:    psrlw $2, %xmm0
6188; SSE3-NEXT:    pand %xmm1, %xmm0
6189; SSE3-NEXT:    paddb %xmm2, %xmm0
6190; SSE3-NEXT:    movdqa %xmm0, %xmm1
6191; SSE3-NEXT:    psrlw $4, %xmm1
6192; SSE3-NEXT:    paddb %xmm0, %xmm1
6193; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6194; SSE3-NEXT:    pxor %xmm0, %xmm0
6195; SSE3-NEXT:    movdqa %xmm1, %xmm2
6196; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6197; SSE3-NEXT:    psadbw %xmm0, %xmm2
6198; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6199; SSE3-NEXT:    psadbw %xmm0, %xmm1
6200; SSE3-NEXT:    packuswb %xmm2, %xmm1
6201; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3]
6202; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
6203; SSE3-NEXT:    retq
6204;
6205; SSSE3-LABEL: ult_3_v4i32:
6206; SSSE3:       # %bb.0:
6207; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6208; SSSE3-NEXT:    movdqa %xmm0, %xmm2
6209; SSSE3-NEXT:    pand %xmm1, %xmm2
6210; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6211; SSSE3-NEXT:    movdqa %xmm3, %xmm4
6212; SSSE3-NEXT:    pshufb %xmm2, %xmm4
6213; SSSE3-NEXT:    psrlw $4, %xmm0
6214; SSSE3-NEXT:    pand %xmm1, %xmm0
6215; SSSE3-NEXT:    pshufb %xmm0, %xmm3
6216; SSSE3-NEXT:    paddb %xmm4, %xmm3
6217; SSSE3-NEXT:    pxor %xmm0, %xmm0
6218; SSSE3-NEXT:    movdqa %xmm3, %xmm1
6219; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6220; SSSE3-NEXT:    psadbw %xmm0, %xmm1
6221; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6222; SSSE3-NEXT:    psadbw %xmm0, %xmm3
6223; SSSE3-NEXT:    packuswb %xmm1, %xmm3
6224; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3]
6225; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
6226; SSSE3-NEXT:    retq
6227;
6228; SSE41-LABEL: ult_3_v4i32:
6229; SSE41:       # %bb.0:
6230; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6231; SSE41-NEXT:    movdqa %xmm0, %xmm2
6232; SSE41-NEXT:    pand %xmm1, %xmm2
6233; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6234; SSE41-NEXT:    movdqa %xmm3, %xmm4
6235; SSE41-NEXT:    pshufb %xmm2, %xmm4
6236; SSE41-NEXT:    psrlw $4, %xmm0
6237; SSE41-NEXT:    pand %xmm1, %xmm0
6238; SSE41-NEXT:    pshufb %xmm0, %xmm3
6239; SSE41-NEXT:    paddb %xmm4, %xmm3
6240; SSE41-NEXT:    pxor %xmm0, %xmm0
6241; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6242; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6243; SSE41-NEXT:    psadbw %xmm0, %xmm3
6244; SSE41-NEXT:    psadbw %xmm0, %xmm1
6245; SSE41-NEXT:    packuswb %xmm3, %xmm1
6246; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [3,3,3,3]
6247; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
6248; SSE41-NEXT:    retq
6249;
6250; AVX1-LABEL: ult_3_v4i32:
6251; AVX1:       # %bb.0:
6252; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6253; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
6254; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6255; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6256; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
6257; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
6258; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6259; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6260; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6261; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6262; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6263; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6264; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6265; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6266; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3,3,3]
6267; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6268; AVX1-NEXT:    retq
6269;
6270; AVX2-LABEL: ult_3_v4i32:
6271; AVX2:       # %bb.0:
6272; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6273; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
6274; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6275; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6276; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
6277; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
6278; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6279; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6280; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6281; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6282; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6283; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6284; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6285; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6286; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6287; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6288; AVX2-NEXT:    retq
6289;
6290; AVX512VPOPCNTDQ-LABEL: ult_3_v4i32:
6291; AVX512VPOPCNTDQ:       # %bb.0:
6292; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6293; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
6294; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6295; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6296; AVX512VPOPCNTDQ-NEXT:    vzeroupper
6297; AVX512VPOPCNTDQ-NEXT:    retq
6298;
6299; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32:
6300; AVX512VPOPCNTDQVL:       # %bb.0:
6301; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
6302; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
6303; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6304; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6305; AVX512VPOPCNTDQVL-NEXT:    retq
6306;
6307; BITALG_NOVLX-LABEL: ult_3_v4i32:
6308; BITALG_NOVLX:       # %bb.0:
6309; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6310; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
6311; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6312; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6313; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6314; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6315; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6316; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6317; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6318; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6319; BITALG_NOVLX-NEXT:    vzeroupper
6320; BITALG_NOVLX-NEXT:    retq
6321;
6322; BITALG-LABEL: ult_3_v4i32:
6323; BITALG:       # %bb.0:
6324; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
6325; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6326; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6327; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6328; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6329; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6330; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6331; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
6332; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6333; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6334; BITALG-NEXT:    retq
6335  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6336  %3 = icmp ult <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
6337  %4 = sext <4 x i1> %3 to <4 x i32>
6338  ret <4 x i32> %4
6339}
6340
6341define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) {
6342; SSE2-LABEL: ugt_3_v4i32:
6343; SSE2:       # %bb.0:
6344; SSE2-NEXT:    movdqa %xmm0, %xmm1
6345; SSE2-NEXT:    psrlw $1, %xmm1
6346; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6347; SSE2-NEXT:    psubb %xmm1, %xmm0
6348; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6349; SSE2-NEXT:    movdqa %xmm0, %xmm2
6350; SSE2-NEXT:    pand %xmm1, %xmm2
6351; SSE2-NEXT:    psrlw $2, %xmm0
6352; SSE2-NEXT:    pand %xmm1, %xmm0
6353; SSE2-NEXT:    paddb %xmm2, %xmm0
6354; SSE2-NEXT:    movdqa %xmm0, %xmm1
6355; SSE2-NEXT:    psrlw $4, %xmm1
6356; SSE2-NEXT:    paddb %xmm0, %xmm1
6357; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6358; SSE2-NEXT:    pxor %xmm0, %xmm0
6359; SSE2-NEXT:    movdqa %xmm1, %xmm2
6360; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6361; SSE2-NEXT:    psadbw %xmm0, %xmm2
6362; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6363; SSE2-NEXT:    psadbw %xmm0, %xmm1
6364; SSE2-NEXT:    packuswb %xmm2, %xmm1
6365; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6366; SSE2-NEXT:    movdqa %xmm1, %xmm0
6367; SSE2-NEXT:    retq
6368;
6369; SSE3-LABEL: ugt_3_v4i32:
6370; SSE3:       # %bb.0:
6371; SSE3-NEXT:    movdqa %xmm0, %xmm1
6372; SSE3-NEXT:    psrlw $1, %xmm1
6373; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6374; SSE3-NEXT:    psubb %xmm1, %xmm0
6375; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6376; SSE3-NEXT:    movdqa %xmm0, %xmm2
6377; SSE3-NEXT:    pand %xmm1, %xmm2
6378; SSE3-NEXT:    psrlw $2, %xmm0
6379; SSE3-NEXT:    pand %xmm1, %xmm0
6380; SSE3-NEXT:    paddb %xmm2, %xmm0
6381; SSE3-NEXT:    movdqa %xmm0, %xmm1
6382; SSE3-NEXT:    psrlw $4, %xmm1
6383; SSE3-NEXT:    paddb %xmm0, %xmm1
6384; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6385; SSE3-NEXT:    pxor %xmm0, %xmm0
6386; SSE3-NEXT:    movdqa %xmm1, %xmm2
6387; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6388; SSE3-NEXT:    psadbw %xmm0, %xmm2
6389; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6390; SSE3-NEXT:    psadbw %xmm0, %xmm1
6391; SSE3-NEXT:    packuswb %xmm2, %xmm1
6392; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6393; SSE3-NEXT:    movdqa %xmm1, %xmm0
6394; SSE3-NEXT:    retq
6395;
6396; SSSE3-LABEL: ugt_3_v4i32:
6397; SSSE3:       # %bb.0:
6398; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6399; SSSE3-NEXT:    movdqa %xmm0, %xmm3
6400; SSSE3-NEXT:    pand %xmm2, %xmm3
6401; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6402; SSSE3-NEXT:    movdqa %xmm1, %xmm4
6403; SSSE3-NEXT:    pshufb %xmm3, %xmm4
6404; SSSE3-NEXT:    psrlw $4, %xmm0
6405; SSSE3-NEXT:    pand %xmm2, %xmm0
6406; SSSE3-NEXT:    pshufb %xmm0, %xmm1
6407; SSSE3-NEXT:    paddb %xmm4, %xmm1
6408; SSSE3-NEXT:    pxor %xmm0, %xmm0
6409; SSSE3-NEXT:    movdqa %xmm1, %xmm2
6410; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6411; SSSE3-NEXT:    psadbw %xmm0, %xmm2
6412; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6413; SSSE3-NEXT:    psadbw %xmm0, %xmm1
6414; SSSE3-NEXT:    packuswb %xmm2, %xmm1
6415; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6416; SSSE3-NEXT:    movdqa %xmm1, %xmm0
6417; SSSE3-NEXT:    retq
6418;
6419; SSE41-LABEL: ugt_3_v4i32:
6420; SSE41:       # %bb.0:
6421; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6422; SSE41-NEXT:    movdqa %xmm0, %xmm2
6423; SSE41-NEXT:    pand %xmm1, %xmm2
6424; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6425; SSE41-NEXT:    movdqa %xmm3, %xmm4
6426; SSE41-NEXT:    pshufb %xmm2, %xmm4
6427; SSE41-NEXT:    psrlw $4, %xmm0
6428; SSE41-NEXT:    pand %xmm1, %xmm0
6429; SSE41-NEXT:    pshufb %xmm0, %xmm3
6430; SSE41-NEXT:    paddb %xmm4, %xmm3
6431; SSE41-NEXT:    pxor %xmm1, %xmm1
6432; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6433; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6434; SSE41-NEXT:    psadbw %xmm1, %xmm3
6435; SSE41-NEXT:    psadbw %xmm1, %xmm0
6436; SSE41-NEXT:    packuswb %xmm3, %xmm0
6437; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
6438; SSE41-NEXT:    retq
6439;
6440; AVX1-LABEL: ugt_3_v4i32:
6441; AVX1:       # %bb.0:
6442; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6443; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
6444; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6445; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6446; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
6447; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
6448; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6449; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6450; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6451; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6452; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6453; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6454; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6455; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6456; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
6457; AVX1-NEXT:    retq
6458;
6459; AVX2-LABEL: ugt_3_v4i32:
6460; AVX2:       # %bb.0:
6461; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6462; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
6463; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6464; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6465; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
6466; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
6467; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6468; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6469; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6470; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6471; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6472; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6473; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6474; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6475; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6476; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6477; AVX2-NEXT:    retq
6478;
6479; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i32:
6480; AVX512VPOPCNTDQ:       # %bb.0:
6481; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6482; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
6483; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6484; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6485; AVX512VPOPCNTDQ-NEXT:    vzeroupper
6486; AVX512VPOPCNTDQ-NEXT:    retq
6487;
6488; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32:
6489; AVX512VPOPCNTDQVL:       # %bb.0:
6490; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
6491; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
6492; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6493; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6494; AVX512VPOPCNTDQVL-NEXT:    retq
6495;
6496; BITALG_NOVLX-LABEL: ugt_3_v4i32:
6497; BITALG_NOVLX:       # %bb.0:
6498; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6499; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
6500; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6501; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6502; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6503; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6504; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6505; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6506; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6507; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6508; BITALG_NOVLX-NEXT:    vzeroupper
6509; BITALG_NOVLX-NEXT:    retq
6510;
6511; BITALG-LABEL: ugt_3_v4i32:
6512; BITALG:       # %bb.0:
6513; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
6514; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6515; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6516; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6517; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6518; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6519; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6520; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
6521; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6522; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6523; BITALG-NEXT:    retq
6524  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6525  %3 = icmp ugt <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
6526  %4 = sext <4 x i1> %3 to <4 x i32>
6527  ret <4 x i32> %4
6528}
6529
6530define <4 x i32> @ult_4_v4i32(<4 x i32> %0) {
6531; SSE2-LABEL: ult_4_v4i32:
6532; SSE2:       # %bb.0:
6533; SSE2-NEXT:    movdqa %xmm0, %xmm1
6534; SSE2-NEXT:    psrlw $1, %xmm1
6535; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6536; SSE2-NEXT:    psubb %xmm1, %xmm0
6537; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6538; SSE2-NEXT:    movdqa %xmm0, %xmm2
6539; SSE2-NEXT:    pand %xmm1, %xmm2
6540; SSE2-NEXT:    psrlw $2, %xmm0
6541; SSE2-NEXT:    pand %xmm1, %xmm0
6542; SSE2-NEXT:    paddb %xmm2, %xmm0
6543; SSE2-NEXT:    movdqa %xmm0, %xmm1
6544; SSE2-NEXT:    psrlw $4, %xmm1
6545; SSE2-NEXT:    paddb %xmm0, %xmm1
6546; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6547; SSE2-NEXT:    pxor %xmm0, %xmm0
6548; SSE2-NEXT:    movdqa %xmm1, %xmm2
6549; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6550; SSE2-NEXT:    psadbw %xmm0, %xmm2
6551; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6552; SSE2-NEXT:    psadbw %xmm0, %xmm1
6553; SSE2-NEXT:    packuswb %xmm2, %xmm1
6554; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4]
6555; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
6556; SSE2-NEXT:    retq
6557;
6558; SSE3-LABEL: ult_4_v4i32:
6559; SSE3:       # %bb.0:
6560; SSE3-NEXT:    movdqa %xmm0, %xmm1
6561; SSE3-NEXT:    psrlw $1, %xmm1
6562; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6563; SSE3-NEXT:    psubb %xmm1, %xmm0
6564; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6565; SSE3-NEXT:    movdqa %xmm0, %xmm2
6566; SSE3-NEXT:    pand %xmm1, %xmm2
6567; SSE3-NEXT:    psrlw $2, %xmm0
6568; SSE3-NEXT:    pand %xmm1, %xmm0
6569; SSE3-NEXT:    paddb %xmm2, %xmm0
6570; SSE3-NEXT:    movdqa %xmm0, %xmm1
6571; SSE3-NEXT:    psrlw $4, %xmm1
6572; SSE3-NEXT:    paddb %xmm0, %xmm1
6573; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6574; SSE3-NEXT:    pxor %xmm0, %xmm0
6575; SSE3-NEXT:    movdqa %xmm1, %xmm2
6576; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6577; SSE3-NEXT:    psadbw %xmm0, %xmm2
6578; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6579; SSE3-NEXT:    psadbw %xmm0, %xmm1
6580; SSE3-NEXT:    packuswb %xmm2, %xmm1
6581; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4]
6582; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
6583; SSE3-NEXT:    retq
6584;
6585; SSSE3-LABEL: ult_4_v4i32:
6586; SSSE3:       # %bb.0:
6587; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6588; SSSE3-NEXT:    movdqa %xmm0, %xmm2
6589; SSSE3-NEXT:    pand %xmm1, %xmm2
6590; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6591; SSSE3-NEXT:    movdqa %xmm3, %xmm4
6592; SSSE3-NEXT:    pshufb %xmm2, %xmm4
6593; SSSE3-NEXT:    psrlw $4, %xmm0
6594; SSSE3-NEXT:    pand %xmm1, %xmm0
6595; SSSE3-NEXT:    pshufb %xmm0, %xmm3
6596; SSSE3-NEXT:    paddb %xmm4, %xmm3
6597; SSSE3-NEXT:    pxor %xmm0, %xmm0
6598; SSSE3-NEXT:    movdqa %xmm3, %xmm1
6599; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6600; SSSE3-NEXT:    psadbw %xmm0, %xmm1
6601; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6602; SSSE3-NEXT:    psadbw %xmm0, %xmm3
6603; SSSE3-NEXT:    packuswb %xmm1, %xmm3
6604; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4]
6605; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
6606; SSSE3-NEXT:    retq
6607;
6608; SSE41-LABEL: ult_4_v4i32:
6609; SSE41:       # %bb.0:
6610; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6611; SSE41-NEXT:    movdqa %xmm0, %xmm2
6612; SSE41-NEXT:    pand %xmm1, %xmm2
6613; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6614; SSE41-NEXT:    movdqa %xmm3, %xmm4
6615; SSE41-NEXT:    pshufb %xmm2, %xmm4
6616; SSE41-NEXT:    psrlw $4, %xmm0
6617; SSE41-NEXT:    pand %xmm1, %xmm0
6618; SSE41-NEXT:    pshufb %xmm0, %xmm3
6619; SSE41-NEXT:    paddb %xmm4, %xmm3
6620; SSE41-NEXT:    pxor %xmm0, %xmm0
6621; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6622; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6623; SSE41-NEXT:    psadbw %xmm0, %xmm3
6624; SSE41-NEXT:    psadbw %xmm0, %xmm1
6625; SSE41-NEXT:    packuswb %xmm3, %xmm1
6626; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [4,4,4,4]
6627; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
6628; SSE41-NEXT:    retq
6629;
6630; AVX1-LABEL: ult_4_v4i32:
6631; AVX1:       # %bb.0:
6632; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6633; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
6634; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6635; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6636; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
6637; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
6638; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6639; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6640; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6641; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6642; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6643; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6644; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6645; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6646; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4]
6647; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6648; AVX1-NEXT:    retq
6649;
6650; AVX2-LABEL: ult_4_v4i32:
6651; AVX2:       # %bb.0:
6652; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6653; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
6654; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6655; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6656; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
6657; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
6658; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6659; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6660; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6661; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6662; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6663; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6664; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6665; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6666; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6667; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6668; AVX2-NEXT:    retq
6669;
6670; AVX512VPOPCNTDQ-LABEL: ult_4_v4i32:
6671; AVX512VPOPCNTDQ:       # %bb.0:
6672; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6673; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
6674; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6675; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6676; AVX512VPOPCNTDQ-NEXT:    vzeroupper
6677; AVX512VPOPCNTDQ-NEXT:    retq
6678;
6679; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32:
6680; AVX512VPOPCNTDQVL:       # %bb.0:
6681; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
6682; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
6683; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6684; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6685; AVX512VPOPCNTDQVL-NEXT:    retq
6686;
6687; BITALG_NOVLX-LABEL: ult_4_v4i32:
6688; BITALG_NOVLX:       # %bb.0:
6689; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6690; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
6691; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6692; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6693; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6694; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6695; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6696; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6697; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6698; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
6699; BITALG_NOVLX-NEXT:    vzeroupper
6700; BITALG_NOVLX-NEXT:    retq
6701;
6702; BITALG-LABEL: ult_4_v4i32:
6703; BITALG:       # %bb.0:
6704; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
6705; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6706; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6707; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6708; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6709; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6710; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6711; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
6712; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6713; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6714; BITALG-NEXT:    retq
6715  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6716  %3 = icmp ult <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
6717  %4 = sext <4 x i1> %3 to <4 x i32>
6718  ret <4 x i32> %4
6719}
6720
6721define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) {
6722; SSE2-LABEL: ugt_4_v4i32:
6723; SSE2:       # %bb.0:
6724; SSE2-NEXT:    movdqa %xmm0, %xmm1
6725; SSE2-NEXT:    psrlw $1, %xmm1
6726; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6727; SSE2-NEXT:    psubb %xmm1, %xmm0
6728; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6729; SSE2-NEXT:    movdqa %xmm0, %xmm2
6730; SSE2-NEXT:    pand %xmm1, %xmm2
6731; SSE2-NEXT:    psrlw $2, %xmm0
6732; SSE2-NEXT:    pand %xmm1, %xmm0
6733; SSE2-NEXT:    paddb %xmm2, %xmm0
6734; SSE2-NEXT:    movdqa %xmm0, %xmm1
6735; SSE2-NEXT:    psrlw $4, %xmm1
6736; SSE2-NEXT:    paddb %xmm0, %xmm1
6737; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6738; SSE2-NEXT:    pxor %xmm0, %xmm0
6739; SSE2-NEXT:    movdqa %xmm1, %xmm2
6740; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6741; SSE2-NEXT:    psadbw %xmm0, %xmm2
6742; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6743; SSE2-NEXT:    psadbw %xmm0, %xmm1
6744; SSE2-NEXT:    packuswb %xmm2, %xmm1
6745; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6746; SSE2-NEXT:    movdqa %xmm1, %xmm0
6747; SSE2-NEXT:    retq
6748;
6749; SSE3-LABEL: ugt_4_v4i32:
6750; SSE3:       # %bb.0:
6751; SSE3-NEXT:    movdqa %xmm0, %xmm1
6752; SSE3-NEXT:    psrlw $1, %xmm1
6753; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6754; SSE3-NEXT:    psubb %xmm1, %xmm0
6755; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6756; SSE3-NEXT:    movdqa %xmm0, %xmm2
6757; SSE3-NEXT:    pand %xmm1, %xmm2
6758; SSE3-NEXT:    psrlw $2, %xmm0
6759; SSE3-NEXT:    pand %xmm1, %xmm0
6760; SSE3-NEXT:    paddb %xmm2, %xmm0
6761; SSE3-NEXT:    movdqa %xmm0, %xmm1
6762; SSE3-NEXT:    psrlw $4, %xmm1
6763; SSE3-NEXT:    paddb %xmm0, %xmm1
6764; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6765; SSE3-NEXT:    pxor %xmm0, %xmm0
6766; SSE3-NEXT:    movdqa %xmm1, %xmm2
6767; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6768; SSE3-NEXT:    psadbw %xmm0, %xmm2
6769; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6770; SSE3-NEXT:    psadbw %xmm0, %xmm1
6771; SSE3-NEXT:    packuswb %xmm2, %xmm1
6772; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6773; SSE3-NEXT:    movdqa %xmm1, %xmm0
6774; SSE3-NEXT:    retq
6775;
6776; SSSE3-LABEL: ugt_4_v4i32:
6777; SSSE3:       # %bb.0:
6778; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6779; SSSE3-NEXT:    movdqa %xmm0, %xmm3
6780; SSSE3-NEXT:    pand %xmm2, %xmm3
6781; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6782; SSSE3-NEXT:    movdqa %xmm1, %xmm4
6783; SSSE3-NEXT:    pshufb %xmm3, %xmm4
6784; SSSE3-NEXT:    psrlw $4, %xmm0
6785; SSSE3-NEXT:    pand %xmm2, %xmm0
6786; SSSE3-NEXT:    pshufb %xmm0, %xmm1
6787; SSSE3-NEXT:    paddb %xmm4, %xmm1
6788; SSSE3-NEXT:    pxor %xmm0, %xmm0
6789; SSSE3-NEXT:    movdqa %xmm1, %xmm2
6790; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6791; SSSE3-NEXT:    psadbw %xmm0, %xmm2
6792; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6793; SSSE3-NEXT:    psadbw %xmm0, %xmm1
6794; SSSE3-NEXT:    packuswb %xmm2, %xmm1
6795; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
6796; SSSE3-NEXT:    movdqa %xmm1, %xmm0
6797; SSSE3-NEXT:    retq
6798;
6799; SSE41-LABEL: ugt_4_v4i32:
6800; SSE41:       # %bb.0:
6801; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6802; SSE41-NEXT:    movdqa %xmm0, %xmm2
6803; SSE41-NEXT:    pand %xmm1, %xmm2
6804; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6805; SSE41-NEXT:    movdqa %xmm3, %xmm4
6806; SSE41-NEXT:    pshufb %xmm2, %xmm4
6807; SSE41-NEXT:    psrlw $4, %xmm0
6808; SSE41-NEXT:    pand %xmm1, %xmm0
6809; SSE41-NEXT:    pshufb %xmm0, %xmm3
6810; SSE41-NEXT:    paddb %xmm4, %xmm3
6811; SSE41-NEXT:    pxor %xmm1, %xmm1
6812; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6813; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6814; SSE41-NEXT:    psadbw %xmm1, %xmm3
6815; SSE41-NEXT:    psadbw %xmm1, %xmm0
6816; SSE41-NEXT:    packuswb %xmm3, %xmm0
6817; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
6818; SSE41-NEXT:    retq
6819;
6820; AVX1-LABEL: ugt_4_v4i32:
6821; AVX1:       # %bb.0:
6822; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6823; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
6824; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6825; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6826; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
6827; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
6828; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6829; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6830; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6831; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6832; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6833; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6834; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6835; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6836; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
6837; AVX1-NEXT:    retq
6838;
6839; AVX2-LABEL: ugt_4_v4i32:
6840; AVX2:       # %bb.0:
6841; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6842; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
6843; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6844; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
6845; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
6846; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
6847; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
6848; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
6849; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6850; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6851; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6852; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6853; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6854; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6855; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6856; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6857; AVX2-NEXT:    retq
6858;
6859; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i32:
6860; AVX512VPOPCNTDQ:       # %bb.0:
6861; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6862; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
6863; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6864; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6865; AVX512VPOPCNTDQ-NEXT:    vzeroupper
6866; AVX512VPOPCNTDQ-NEXT:    retq
6867;
6868; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32:
6869; AVX512VPOPCNTDQVL:       # %bb.0:
6870; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
6871; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
6872; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6873; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6874; AVX512VPOPCNTDQVL-NEXT:    retq
6875;
6876; BITALG_NOVLX-LABEL: ugt_4_v4i32:
6877; BITALG_NOVLX:       # %bb.0:
6878; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6879; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
6880; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6881; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6882; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6883; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6884; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6885; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6886; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6887; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
6888; BITALG_NOVLX-NEXT:    vzeroupper
6889; BITALG_NOVLX-NEXT:    retq
6890;
6891; BITALG-LABEL: ugt_4_v4i32:
6892; BITALG:       # %bb.0:
6893; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
6894; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
6895; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6896; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
6897; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6898; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
6899; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
6900; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
6901; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
6902; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6903; BITALG-NEXT:    retq
6904  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6905  %3 = icmp ugt <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
6906  %4 = sext <4 x i1> %3 to <4 x i32>
6907  ret <4 x i32> %4
6908}
6909
6910define <4 x i32> @ult_5_v4i32(<4 x i32> %0) {
6911; SSE2-LABEL: ult_5_v4i32:
6912; SSE2:       # %bb.0:
6913; SSE2-NEXT:    movdqa %xmm0, %xmm1
6914; SSE2-NEXT:    psrlw $1, %xmm1
6915; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6916; SSE2-NEXT:    psubb %xmm1, %xmm0
6917; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6918; SSE2-NEXT:    movdqa %xmm0, %xmm2
6919; SSE2-NEXT:    pand %xmm1, %xmm2
6920; SSE2-NEXT:    psrlw $2, %xmm0
6921; SSE2-NEXT:    pand %xmm1, %xmm0
6922; SSE2-NEXT:    paddb %xmm2, %xmm0
6923; SSE2-NEXT:    movdqa %xmm0, %xmm1
6924; SSE2-NEXT:    psrlw $4, %xmm1
6925; SSE2-NEXT:    paddb %xmm0, %xmm1
6926; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
6927; SSE2-NEXT:    pxor %xmm0, %xmm0
6928; SSE2-NEXT:    movdqa %xmm1, %xmm2
6929; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6930; SSE2-NEXT:    psadbw %xmm0, %xmm2
6931; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6932; SSE2-NEXT:    psadbw %xmm0, %xmm1
6933; SSE2-NEXT:    packuswb %xmm2, %xmm1
6934; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5]
6935; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
6936; SSE2-NEXT:    retq
6937;
6938; SSE3-LABEL: ult_5_v4i32:
6939; SSE3:       # %bb.0:
6940; SSE3-NEXT:    movdqa %xmm0, %xmm1
6941; SSE3-NEXT:    psrlw $1, %xmm1
6942; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6943; SSE3-NEXT:    psubb %xmm1, %xmm0
6944; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6945; SSE3-NEXT:    movdqa %xmm0, %xmm2
6946; SSE3-NEXT:    pand %xmm1, %xmm2
6947; SSE3-NEXT:    psrlw $2, %xmm0
6948; SSE3-NEXT:    pand %xmm1, %xmm0
6949; SSE3-NEXT:    paddb %xmm2, %xmm0
6950; SSE3-NEXT:    movdqa %xmm0, %xmm1
6951; SSE3-NEXT:    psrlw $4, %xmm1
6952; SSE3-NEXT:    paddb %xmm0, %xmm1
6953; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
6954; SSE3-NEXT:    pxor %xmm0, %xmm0
6955; SSE3-NEXT:    movdqa %xmm1, %xmm2
6956; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6957; SSE3-NEXT:    psadbw %xmm0, %xmm2
6958; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6959; SSE3-NEXT:    psadbw %xmm0, %xmm1
6960; SSE3-NEXT:    packuswb %xmm2, %xmm1
6961; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5]
6962; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
6963; SSE3-NEXT:    retq
6964;
6965; SSSE3-LABEL: ult_5_v4i32:
6966; SSSE3:       # %bb.0:
6967; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6968; SSSE3-NEXT:    movdqa %xmm0, %xmm2
6969; SSSE3-NEXT:    pand %xmm1, %xmm2
6970; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6971; SSSE3-NEXT:    movdqa %xmm3, %xmm4
6972; SSSE3-NEXT:    pshufb %xmm2, %xmm4
6973; SSSE3-NEXT:    psrlw $4, %xmm0
6974; SSSE3-NEXT:    pand %xmm1, %xmm0
6975; SSSE3-NEXT:    pshufb %xmm0, %xmm3
6976; SSSE3-NEXT:    paddb %xmm4, %xmm3
6977; SSSE3-NEXT:    pxor %xmm0, %xmm0
6978; SSSE3-NEXT:    movdqa %xmm3, %xmm1
6979; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6980; SSSE3-NEXT:    psadbw %xmm0, %xmm1
6981; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6982; SSSE3-NEXT:    psadbw %xmm0, %xmm3
6983; SSSE3-NEXT:    packuswb %xmm1, %xmm3
6984; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5]
6985; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
6986; SSSE3-NEXT:    retq
6987;
6988; SSE41-LABEL: ult_5_v4i32:
6989; SSE41:       # %bb.0:
6990; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6991; SSE41-NEXT:    movdqa %xmm0, %xmm2
6992; SSE41-NEXT:    pand %xmm1, %xmm2
6993; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6994; SSE41-NEXT:    movdqa %xmm3, %xmm4
6995; SSE41-NEXT:    pshufb %xmm2, %xmm4
6996; SSE41-NEXT:    psrlw $4, %xmm0
6997; SSE41-NEXT:    pand %xmm1, %xmm0
6998; SSE41-NEXT:    pshufb %xmm0, %xmm3
6999; SSE41-NEXT:    paddb %xmm4, %xmm3
7000; SSE41-NEXT:    pxor %xmm0, %xmm0
7001; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7002; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7003; SSE41-NEXT:    psadbw %xmm0, %xmm3
7004; SSE41-NEXT:    psadbw %xmm0, %xmm1
7005; SSE41-NEXT:    packuswb %xmm3, %xmm1
7006; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [5,5,5,5]
7007; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
7008; SSE41-NEXT:    retq
7009;
7010; AVX1-LABEL: ult_5_v4i32:
7011; AVX1:       # %bb.0:
7012; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7013; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
7014; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7015; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7016; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
7017; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
7018; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7019; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7020; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7021; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7022; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7023; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7024; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7025; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7026; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5,5,5]
7027; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7028; AVX1-NEXT:    retq
7029;
7030; AVX2-LABEL: ult_5_v4i32:
7031; AVX2:       # %bb.0:
7032; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7033; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
7034; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7035; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7036; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
7037; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
7038; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7039; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7040; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7041; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7042; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7043; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7044; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7045; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7046; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7047; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7048; AVX2-NEXT:    retq
7049;
7050; AVX512VPOPCNTDQ-LABEL: ult_5_v4i32:
7051; AVX512VPOPCNTDQ:       # %bb.0:
7052; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7053; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
7054; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7055; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7056; AVX512VPOPCNTDQ-NEXT:    vzeroupper
7057; AVX512VPOPCNTDQ-NEXT:    retq
7058;
7059; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32:
7060; AVX512VPOPCNTDQVL:       # %bb.0:
7061; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
7062; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
7063; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7064; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7065; AVX512VPOPCNTDQVL-NEXT:    retq
7066;
7067; BITALG_NOVLX-LABEL: ult_5_v4i32:
7068; BITALG_NOVLX:       # %bb.0:
7069; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7070; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
7071; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7072; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7073; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7074; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7075; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7076; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7077; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7078; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7079; BITALG_NOVLX-NEXT:    vzeroupper
7080; BITALG_NOVLX-NEXT:    retq
7081;
7082; BITALG-LABEL: ult_5_v4i32:
7083; BITALG:       # %bb.0:
7084; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
7085; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7086; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7087; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7088; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7089; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7090; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7091; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
7092; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7093; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7094; BITALG-NEXT:    retq
7095  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7096  %3 = icmp ult <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
7097  %4 = sext <4 x i1> %3 to <4 x i32>
7098  ret <4 x i32> %4
7099}
7100
7101define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) {
7102; SSE2-LABEL: ugt_5_v4i32:
7103; SSE2:       # %bb.0:
7104; SSE2-NEXT:    movdqa %xmm0, %xmm1
7105; SSE2-NEXT:    psrlw $1, %xmm1
7106; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7107; SSE2-NEXT:    psubb %xmm1, %xmm0
7108; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7109; SSE2-NEXT:    movdqa %xmm0, %xmm2
7110; SSE2-NEXT:    pand %xmm1, %xmm2
7111; SSE2-NEXT:    psrlw $2, %xmm0
7112; SSE2-NEXT:    pand %xmm1, %xmm0
7113; SSE2-NEXT:    paddb %xmm2, %xmm0
7114; SSE2-NEXT:    movdqa %xmm0, %xmm1
7115; SSE2-NEXT:    psrlw $4, %xmm1
7116; SSE2-NEXT:    paddb %xmm0, %xmm1
7117; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7118; SSE2-NEXT:    pxor %xmm0, %xmm0
7119; SSE2-NEXT:    movdqa %xmm1, %xmm2
7120; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7121; SSE2-NEXT:    psadbw %xmm0, %xmm2
7122; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7123; SSE2-NEXT:    psadbw %xmm0, %xmm1
7124; SSE2-NEXT:    packuswb %xmm2, %xmm1
7125; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7126; SSE2-NEXT:    movdqa %xmm1, %xmm0
7127; SSE2-NEXT:    retq
7128;
7129; SSE3-LABEL: ugt_5_v4i32:
7130; SSE3:       # %bb.0:
7131; SSE3-NEXT:    movdqa %xmm0, %xmm1
7132; SSE3-NEXT:    psrlw $1, %xmm1
7133; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7134; SSE3-NEXT:    psubb %xmm1, %xmm0
7135; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7136; SSE3-NEXT:    movdqa %xmm0, %xmm2
7137; SSE3-NEXT:    pand %xmm1, %xmm2
7138; SSE3-NEXT:    psrlw $2, %xmm0
7139; SSE3-NEXT:    pand %xmm1, %xmm0
7140; SSE3-NEXT:    paddb %xmm2, %xmm0
7141; SSE3-NEXT:    movdqa %xmm0, %xmm1
7142; SSE3-NEXT:    psrlw $4, %xmm1
7143; SSE3-NEXT:    paddb %xmm0, %xmm1
7144; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7145; SSE3-NEXT:    pxor %xmm0, %xmm0
7146; SSE3-NEXT:    movdqa %xmm1, %xmm2
7147; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7148; SSE3-NEXT:    psadbw %xmm0, %xmm2
7149; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7150; SSE3-NEXT:    psadbw %xmm0, %xmm1
7151; SSE3-NEXT:    packuswb %xmm2, %xmm1
7152; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7153; SSE3-NEXT:    movdqa %xmm1, %xmm0
7154; SSE3-NEXT:    retq
7155;
7156; SSSE3-LABEL: ugt_5_v4i32:
7157; SSSE3:       # %bb.0:
7158; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7159; SSSE3-NEXT:    movdqa %xmm0, %xmm3
7160; SSSE3-NEXT:    pand %xmm2, %xmm3
7161; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7162; SSSE3-NEXT:    movdqa %xmm1, %xmm4
7163; SSSE3-NEXT:    pshufb %xmm3, %xmm4
7164; SSSE3-NEXT:    psrlw $4, %xmm0
7165; SSSE3-NEXT:    pand %xmm2, %xmm0
7166; SSSE3-NEXT:    pshufb %xmm0, %xmm1
7167; SSSE3-NEXT:    paddb %xmm4, %xmm1
7168; SSSE3-NEXT:    pxor %xmm0, %xmm0
7169; SSSE3-NEXT:    movdqa %xmm1, %xmm2
7170; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7171; SSSE3-NEXT:    psadbw %xmm0, %xmm2
7172; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7173; SSSE3-NEXT:    psadbw %xmm0, %xmm1
7174; SSSE3-NEXT:    packuswb %xmm2, %xmm1
7175; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7176; SSSE3-NEXT:    movdqa %xmm1, %xmm0
7177; SSSE3-NEXT:    retq
7178;
7179; SSE41-LABEL: ugt_5_v4i32:
7180; SSE41:       # %bb.0:
7181; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7182; SSE41-NEXT:    movdqa %xmm0, %xmm2
7183; SSE41-NEXT:    pand %xmm1, %xmm2
7184; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7185; SSE41-NEXT:    movdqa %xmm3, %xmm4
7186; SSE41-NEXT:    pshufb %xmm2, %xmm4
7187; SSE41-NEXT:    psrlw $4, %xmm0
7188; SSE41-NEXT:    pand %xmm1, %xmm0
7189; SSE41-NEXT:    pshufb %xmm0, %xmm3
7190; SSE41-NEXT:    paddb %xmm4, %xmm3
7191; SSE41-NEXT:    pxor %xmm1, %xmm1
7192; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7193; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7194; SSE41-NEXT:    psadbw %xmm1, %xmm3
7195; SSE41-NEXT:    psadbw %xmm1, %xmm0
7196; SSE41-NEXT:    packuswb %xmm3, %xmm0
7197; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
7198; SSE41-NEXT:    retq
7199;
7200; AVX1-LABEL: ugt_5_v4i32:
7201; AVX1:       # %bb.0:
7202; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7203; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
7204; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7205; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7206; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
7207; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
7208; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7209; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7210; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7211; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7212; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7213; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7214; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7215; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7216; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
7217; AVX1-NEXT:    retq
7218;
7219; AVX2-LABEL: ugt_5_v4i32:
7220; AVX2:       # %bb.0:
7221; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7222; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
7223; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7224; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7225; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
7226; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
7227; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7228; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7229; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7230; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7231; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7232; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7233; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7234; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7235; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7236; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7237; AVX2-NEXT:    retq
7238;
7239; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i32:
7240; AVX512VPOPCNTDQ:       # %bb.0:
7241; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7242; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
7243; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7244; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7245; AVX512VPOPCNTDQ-NEXT:    vzeroupper
7246; AVX512VPOPCNTDQ-NEXT:    retq
7247;
7248; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32:
7249; AVX512VPOPCNTDQVL:       # %bb.0:
7250; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
7251; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
7252; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7253; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7254; AVX512VPOPCNTDQVL-NEXT:    retq
7255;
7256; BITALG_NOVLX-LABEL: ugt_5_v4i32:
7257; BITALG_NOVLX:       # %bb.0:
7258; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7259; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
7260; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7261; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7262; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7263; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7264; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7265; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7266; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7267; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7268; BITALG_NOVLX-NEXT:    vzeroupper
7269; BITALG_NOVLX-NEXT:    retq
7270;
7271; BITALG-LABEL: ugt_5_v4i32:
7272; BITALG:       # %bb.0:
7273; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
7274; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7275; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7276; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7277; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7278; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7279; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7280; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
7281; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7282; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7283; BITALG-NEXT:    retq
7284  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7285  %3 = icmp ugt <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
7286  %4 = sext <4 x i1> %3 to <4 x i32>
7287  ret <4 x i32> %4
7288}
7289
7290define <4 x i32> @ult_6_v4i32(<4 x i32> %0) {
7291; SSE2-LABEL: ult_6_v4i32:
7292; SSE2:       # %bb.0:
7293; SSE2-NEXT:    movdqa %xmm0, %xmm1
7294; SSE2-NEXT:    psrlw $1, %xmm1
7295; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7296; SSE2-NEXT:    psubb %xmm1, %xmm0
7297; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7298; SSE2-NEXT:    movdqa %xmm0, %xmm2
7299; SSE2-NEXT:    pand %xmm1, %xmm2
7300; SSE2-NEXT:    psrlw $2, %xmm0
7301; SSE2-NEXT:    pand %xmm1, %xmm0
7302; SSE2-NEXT:    paddb %xmm2, %xmm0
7303; SSE2-NEXT:    movdqa %xmm0, %xmm1
7304; SSE2-NEXT:    psrlw $4, %xmm1
7305; SSE2-NEXT:    paddb %xmm0, %xmm1
7306; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7307; SSE2-NEXT:    pxor %xmm0, %xmm0
7308; SSE2-NEXT:    movdqa %xmm1, %xmm2
7309; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7310; SSE2-NEXT:    psadbw %xmm0, %xmm2
7311; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7312; SSE2-NEXT:    psadbw %xmm0, %xmm1
7313; SSE2-NEXT:    packuswb %xmm2, %xmm1
7314; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
7315; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
7316; SSE2-NEXT:    retq
7317;
7318; SSE3-LABEL: ult_6_v4i32:
7319; SSE3:       # %bb.0:
7320; SSE3-NEXT:    movdqa %xmm0, %xmm1
7321; SSE3-NEXT:    psrlw $1, %xmm1
7322; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7323; SSE3-NEXT:    psubb %xmm1, %xmm0
7324; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7325; SSE3-NEXT:    movdqa %xmm0, %xmm2
7326; SSE3-NEXT:    pand %xmm1, %xmm2
7327; SSE3-NEXT:    psrlw $2, %xmm0
7328; SSE3-NEXT:    pand %xmm1, %xmm0
7329; SSE3-NEXT:    paddb %xmm2, %xmm0
7330; SSE3-NEXT:    movdqa %xmm0, %xmm1
7331; SSE3-NEXT:    psrlw $4, %xmm1
7332; SSE3-NEXT:    paddb %xmm0, %xmm1
7333; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7334; SSE3-NEXT:    pxor %xmm0, %xmm0
7335; SSE3-NEXT:    movdqa %xmm1, %xmm2
7336; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7337; SSE3-NEXT:    psadbw %xmm0, %xmm2
7338; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7339; SSE3-NEXT:    psadbw %xmm0, %xmm1
7340; SSE3-NEXT:    packuswb %xmm2, %xmm1
7341; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
7342; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
7343; SSE3-NEXT:    retq
7344;
7345; SSSE3-LABEL: ult_6_v4i32:
7346; SSSE3:       # %bb.0:
7347; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7348; SSSE3-NEXT:    movdqa %xmm0, %xmm2
7349; SSSE3-NEXT:    pand %xmm1, %xmm2
7350; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7351; SSSE3-NEXT:    movdqa %xmm3, %xmm4
7352; SSSE3-NEXT:    pshufb %xmm2, %xmm4
7353; SSSE3-NEXT:    psrlw $4, %xmm0
7354; SSSE3-NEXT:    pand %xmm1, %xmm0
7355; SSSE3-NEXT:    pshufb %xmm0, %xmm3
7356; SSSE3-NEXT:    paddb %xmm4, %xmm3
7357; SSSE3-NEXT:    pxor %xmm0, %xmm0
7358; SSSE3-NEXT:    movdqa %xmm3, %xmm1
7359; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
7360; SSSE3-NEXT:    psadbw %xmm0, %xmm1
7361; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
7362; SSSE3-NEXT:    psadbw %xmm0, %xmm3
7363; SSSE3-NEXT:    packuswb %xmm1, %xmm3
7364; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
7365; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
7366; SSSE3-NEXT:    retq
7367;
7368; SSE41-LABEL: ult_6_v4i32:
7369; SSE41:       # %bb.0:
7370; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7371; SSE41-NEXT:    movdqa %xmm0, %xmm2
7372; SSE41-NEXT:    pand %xmm1, %xmm2
7373; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7374; SSE41-NEXT:    movdqa %xmm3, %xmm4
7375; SSE41-NEXT:    pshufb %xmm2, %xmm4
7376; SSE41-NEXT:    psrlw $4, %xmm0
7377; SSE41-NEXT:    pand %xmm1, %xmm0
7378; SSE41-NEXT:    pshufb %xmm0, %xmm3
7379; SSE41-NEXT:    paddb %xmm4, %xmm3
7380; SSE41-NEXT:    pxor %xmm0, %xmm0
7381; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7382; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7383; SSE41-NEXT:    psadbw %xmm0, %xmm3
7384; SSE41-NEXT:    psadbw %xmm0, %xmm1
7385; SSE41-NEXT:    packuswb %xmm3, %xmm1
7386; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
7387; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
7388; SSE41-NEXT:    retq
7389;
7390; AVX1-LABEL: ult_6_v4i32:
7391; AVX1:       # %bb.0:
7392; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7393; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
7394; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7395; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7396; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
7397; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
7398; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7399; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7400; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7401; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7402; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7403; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7404; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7405; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7406; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6,6,6]
7407; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7408; AVX1-NEXT:    retq
7409;
7410; AVX2-LABEL: ult_6_v4i32:
7411; AVX2:       # %bb.0:
7412; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7413; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
7414; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7415; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7416; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
7417; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
7418; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7419; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7420; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7421; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7422; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7423; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7424; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7425; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7426; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7427; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7428; AVX2-NEXT:    retq
7429;
7430; AVX512VPOPCNTDQ-LABEL: ult_6_v4i32:
7431; AVX512VPOPCNTDQ:       # %bb.0:
7432; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7433; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
7434; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7435; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7436; AVX512VPOPCNTDQ-NEXT:    vzeroupper
7437; AVX512VPOPCNTDQ-NEXT:    retq
7438;
7439; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32:
7440; AVX512VPOPCNTDQVL:       # %bb.0:
7441; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
7442; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
7443; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7444; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7445; AVX512VPOPCNTDQVL-NEXT:    retq
7446;
7447; BITALG_NOVLX-LABEL: ult_6_v4i32:
7448; BITALG_NOVLX:       # %bb.0:
7449; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7450; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
7451; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7452; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7453; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7454; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7455; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7456; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7457; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7458; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7459; BITALG_NOVLX-NEXT:    vzeroupper
7460; BITALG_NOVLX-NEXT:    retq
7461;
7462; BITALG-LABEL: ult_6_v4i32:
7463; BITALG:       # %bb.0:
7464; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
7465; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7466; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7467; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7468; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7469; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7470; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7471; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
7472; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7473; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7474; BITALG-NEXT:    retq
7475  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7476  %3 = icmp ult <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
7477  %4 = sext <4 x i1> %3 to <4 x i32>
7478  ret <4 x i32> %4
7479}
7480
7481define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) {
7482; SSE2-LABEL: ugt_6_v4i32:
7483; SSE2:       # %bb.0:
7484; SSE2-NEXT:    movdqa %xmm0, %xmm1
7485; SSE2-NEXT:    psrlw $1, %xmm1
7486; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7487; SSE2-NEXT:    psubb %xmm1, %xmm0
7488; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7489; SSE2-NEXT:    movdqa %xmm0, %xmm2
7490; SSE2-NEXT:    pand %xmm1, %xmm2
7491; SSE2-NEXT:    psrlw $2, %xmm0
7492; SSE2-NEXT:    pand %xmm1, %xmm0
7493; SSE2-NEXT:    paddb %xmm2, %xmm0
7494; SSE2-NEXT:    movdqa %xmm0, %xmm1
7495; SSE2-NEXT:    psrlw $4, %xmm1
7496; SSE2-NEXT:    paddb %xmm0, %xmm1
7497; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7498; SSE2-NEXT:    pxor %xmm0, %xmm0
7499; SSE2-NEXT:    movdqa %xmm1, %xmm2
7500; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7501; SSE2-NEXT:    psadbw %xmm0, %xmm2
7502; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7503; SSE2-NEXT:    psadbw %xmm0, %xmm1
7504; SSE2-NEXT:    packuswb %xmm2, %xmm1
7505; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7506; SSE2-NEXT:    movdqa %xmm1, %xmm0
7507; SSE2-NEXT:    retq
7508;
7509; SSE3-LABEL: ugt_6_v4i32:
7510; SSE3:       # %bb.0:
7511; SSE3-NEXT:    movdqa %xmm0, %xmm1
7512; SSE3-NEXT:    psrlw $1, %xmm1
7513; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7514; SSE3-NEXT:    psubb %xmm1, %xmm0
7515; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7516; SSE3-NEXT:    movdqa %xmm0, %xmm2
7517; SSE3-NEXT:    pand %xmm1, %xmm2
7518; SSE3-NEXT:    psrlw $2, %xmm0
7519; SSE3-NEXT:    pand %xmm1, %xmm0
7520; SSE3-NEXT:    paddb %xmm2, %xmm0
7521; SSE3-NEXT:    movdqa %xmm0, %xmm1
7522; SSE3-NEXT:    psrlw $4, %xmm1
7523; SSE3-NEXT:    paddb %xmm0, %xmm1
7524; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7525; SSE3-NEXT:    pxor %xmm0, %xmm0
7526; SSE3-NEXT:    movdqa %xmm1, %xmm2
7527; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7528; SSE3-NEXT:    psadbw %xmm0, %xmm2
7529; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7530; SSE3-NEXT:    psadbw %xmm0, %xmm1
7531; SSE3-NEXT:    packuswb %xmm2, %xmm1
7532; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7533; SSE3-NEXT:    movdqa %xmm1, %xmm0
7534; SSE3-NEXT:    retq
7535;
7536; SSSE3-LABEL: ugt_6_v4i32:
7537; SSSE3:       # %bb.0:
7538; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7539; SSSE3-NEXT:    movdqa %xmm0, %xmm3
7540; SSSE3-NEXT:    pand %xmm2, %xmm3
7541; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7542; SSSE3-NEXT:    movdqa %xmm1, %xmm4
7543; SSSE3-NEXT:    pshufb %xmm3, %xmm4
7544; SSSE3-NEXT:    psrlw $4, %xmm0
7545; SSSE3-NEXT:    pand %xmm2, %xmm0
7546; SSSE3-NEXT:    pshufb %xmm0, %xmm1
7547; SSSE3-NEXT:    paddb %xmm4, %xmm1
7548; SSSE3-NEXT:    pxor %xmm0, %xmm0
7549; SSSE3-NEXT:    movdqa %xmm1, %xmm2
7550; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7551; SSSE3-NEXT:    psadbw %xmm0, %xmm2
7552; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7553; SSSE3-NEXT:    psadbw %xmm0, %xmm1
7554; SSSE3-NEXT:    packuswb %xmm2, %xmm1
7555; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7556; SSSE3-NEXT:    movdqa %xmm1, %xmm0
7557; SSSE3-NEXT:    retq
7558;
7559; SSE41-LABEL: ugt_6_v4i32:
7560; SSE41:       # %bb.0:
7561; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7562; SSE41-NEXT:    movdqa %xmm0, %xmm2
7563; SSE41-NEXT:    pand %xmm1, %xmm2
7564; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7565; SSE41-NEXT:    movdqa %xmm3, %xmm4
7566; SSE41-NEXT:    pshufb %xmm2, %xmm4
7567; SSE41-NEXT:    psrlw $4, %xmm0
7568; SSE41-NEXT:    pand %xmm1, %xmm0
7569; SSE41-NEXT:    pshufb %xmm0, %xmm3
7570; SSE41-NEXT:    paddb %xmm4, %xmm3
7571; SSE41-NEXT:    pxor %xmm1, %xmm1
7572; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7573; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7574; SSE41-NEXT:    psadbw %xmm1, %xmm3
7575; SSE41-NEXT:    psadbw %xmm1, %xmm0
7576; SSE41-NEXT:    packuswb %xmm3, %xmm0
7577; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
7578; SSE41-NEXT:    retq
7579;
7580; AVX1-LABEL: ugt_6_v4i32:
7581; AVX1:       # %bb.0:
7582; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7583; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
7584; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7585; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7586; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
7587; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
7588; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7589; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7590; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7591; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7592; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7593; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7594; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7595; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7596; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
7597; AVX1-NEXT:    retq
7598;
7599; AVX2-LABEL: ugt_6_v4i32:
7600; AVX2:       # %bb.0:
7601; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7602; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
7603; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7604; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7605; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
7606; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
7607; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7608; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7609; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7610; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7611; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7612; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7613; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7614; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7615; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7616; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7617; AVX2-NEXT:    retq
7618;
7619; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i32:
7620; AVX512VPOPCNTDQ:       # %bb.0:
7621; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7622; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
7623; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7624; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7625; AVX512VPOPCNTDQ-NEXT:    vzeroupper
7626; AVX512VPOPCNTDQ-NEXT:    retq
7627;
7628; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32:
7629; AVX512VPOPCNTDQVL:       # %bb.0:
7630; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
7631; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
7632; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7633; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7634; AVX512VPOPCNTDQVL-NEXT:    retq
7635;
7636; BITALG_NOVLX-LABEL: ugt_6_v4i32:
7637; BITALG_NOVLX:       # %bb.0:
7638; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7639; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
7640; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7641; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7642; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7643; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7644; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7645; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7646; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7647; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7648; BITALG_NOVLX-NEXT:    vzeroupper
7649; BITALG_NOVLX-NEXT:    retq
7650;
7651; BITALG-LABEL: ugt_6_v4i32:
7652; BITALG:       # %bb.0:
7653; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
7654; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7655; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7656; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7657; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7658; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7659; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7660; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
7661; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7662; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7663; BITALG-NEXT:    retq
7664  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7665  %3 = icmp ugt <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
7666  %4 = sext <4 x i1> %3 to <4 x i32>
7667  ret <4 x i32> %4
7668}
7669
7670define <4 x i32> @ult_7_v4i32(<4 x i32> %0) {
7671; SSE2-LABEL: ult_7_v4i32:
7672; SSE2:       # %bb.0:
7673; SSE2-NEXT:    movdqa %xmm0, %xmm1
7674; SSE2-NEXT:    psrlw $1, %xmm1
7675; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7676; SSE2-NEXT:    psubb %xmm1, %xmm0
7677; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7678; SSE2-NEXT:    movdqa %xmm0, %xmm2
7679; SSE2-NEXT:    pand %xmm1, %xmm2
7680; SSE2-NEXT:    psrlw $2, %xmm0
7681; SSE2-NEXT:    pand %xmm1, %xmm0
7682; SSE2-NEXT:    paddb %xmm2, %xmm0
7683; SSE2-NEXT:    movdqa %xmm0, %xmm1
7684; SSE2-NEXT:    psrlw $4, %xmm1
7685; SSE2-NEXT:    paddb %xmm0, %xmm1
7686; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7687; SSE2-NEXT:    pxor %xmm0, %xmm0
7688; SSE2-NEXT:    movdqa %xmm1, %xmm2
7689; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7690; SSE2-NEXT:    psadbw %xmm0, %xmm2
7691; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7692; SSE2-NEXT:    psadbw %xmm0, %xmm1
7693; SSE2-NEXT:    packuswb %xmm2, %xmm1
7694; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7]
7695; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
7696; SSE2-NEXT:    retq
7697;
7698; SSE3-LABEL: ult_7_v4i32:
7699; SSE3:       # %bb.0:
7700; SSE3-NEXT:    movdqa %xmm0, %xmm1
7701; SSE3-NEXT:    psrlw $1, %xmm1
7702; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7703; SSE3-NEXT:    psubb %xmm1, %xmm0
7704; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7705; SSE3-NEXT:    movdqa %xmm0, %xmm2
7706; SSE3-NEXT:    pand %xmm1, %xmm2
7707; SSE3-NEXT:    psrlw $2, %xmm0
7708; SSE3-NEXT:    pand %xmm1, %xmm0
7709; SSE3-NEXT:    paddb %xmm2, %xmm0
7710; SSE3-NEXT:    movdqa %xmm0, %xmm1
7711; SSE3-NEXT:    psrlw $4, %xmm1
7712; SSE3-NEXT:    paddb %xmm0, %xmm1
7713; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7714; SSE3-NEXT:    pxor %xmm0, %xmm0
7715; SSE3-NEXT:    movdqa %xmm1, %xmm2
7716; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7717; SSE3-NEXT:    psadbw %xmm0, %xmm2
7718; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7719; SSE3-NEXT:    psadbw %xmm0, %xmm1
7720; SSE3-NEXT:    packuswb %xmm2, %xmm1
7721; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7]
7722; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
7723; SSE3-NEXT:    retq
7724;
7725; SSSE3-LABEL: ult_7_v4i32:
7726; SSSE3:       # %bb.0:
7727; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7728; SSSE3-NEXT:    movdqa %xmm0, %xmm2
7729; SSSE3-NEXT:    pand %xmm1, %xmm2
7730; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7731; SSSE3-NEXT:    movdqa %xmm3, %xmm4
7732; SSSE3-NEXT:    pshufb %xmm2, %xmm4
7733; SSSE3-NEXT:    psrlw $4, %xmm0
7734; SSSE3-NEXT:    pand %xmm1, %xmm0
7735; SSSE3-NEXT:    pshufb %xmm0, %xmm3
7736; SSSE3-NEXT:    paddb %xmm4, %xmm3
7737; SSSE3-NEXT:    pxor %xmm0, %xmm0
7738; SSSE3-NEXT:    movdqa %xmm3, %xmm1
7739; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
7740; SSSE3-NEXT:    psadbw %xmm0, %xmm1
7741; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
7742; SSSE3-NEXT:    psadbw %xmm0, %xmm3
7743; SSSE3-NEXT:    packuswb %xmm1, %xmm3
7744; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7]
7745; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
7746; SSSE3-NEXT:    retq
7747;
7748; SSE41-LABEL: ult_7_v4i32:
7749; SSE41:       # %bb.0:
7750; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7751; SSE41-NEXT:    movdqa %xmm0, %xmm2
7752; SSE41-NEXT:    pand %xmm1, %xmm2
7753; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7754; SSE41-NEXT:    movdqa %xmm3, %xmm4
7755; SSE41-NEXT:    pshufb %xmm2, %xmm4
7756; SSE41-NEXT:    psrlw $4, %xmm0
7757; SSE41-NEXT:    pand %xmm1, %xmm0
7758; SSE41-NEXT:    pshufb %xmm0, %xmm3
7759; SSE41-NEXT:    paddb %xmm4, %xmm3
7760; SSE41-NEXT:    pxor %xmm0, %xmm0
7761; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7762; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7763; SSE41-NEXT:    psadbw %xmm0, %xmm3
7764; SSE41-NEXT:    psadbw %xmm0, %xmm1
7765; SSE41-NEXT:    packuswb %xmm3, %xmm1
7766; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [7,7,7,7]
7767; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
7768; SSE41-NEXT:    retq
7769;
7770; AVX1-LABEL: ult_7_v4i32:
7771; AVX1:       # %bb.0:
7772; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7773; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
7774; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7775; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7776; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
7777; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
7778; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7779; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7780; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7781; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7782; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7783; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7784; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7785; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7786; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7,7,7]
7787; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7788; AVX1-NEXT:    retq
7789;
7790; AVX2-LABEL: ult_7_v4i32:
7791; AVX2:       # %bb.0:
7792; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7793; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
7794; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7795; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7796; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
7797; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
7798; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7799; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7800; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7801; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7802; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7803; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7804; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7805; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7806; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7807; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7808; AVX2-NEXT:    retq
7809;
7810; AVX512VPOPCNTDQ-LABEL: ult_7_v4i32:
7811; AVX512VPOPCNTDQ:       # %bb.0:
7812; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7813; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
7814; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7815; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7816; AVX512VPOPCNTDQ-NEXT:    vzeroupper
7817; AVX512VPOPCNTDQ-NEXT:    retq
7818;
7819; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32:
7820; AVX512VPOPCNTDQVL:       # %bb.0:
7821; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
7822; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
7823; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7824; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7825; AVX512VPOPCNTDQVL-NEXT:    retq
7826;
7827; BITALG_NOVLX-LABEL: ult_7_v4i32:
7828; BITALG_NOVLX:       # %bb.0:
7829; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7830; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
7831; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7832; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7833; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7834; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7835; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7836; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7837; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7838; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
7839; BITALG_NOVLX-NEXT:    vzeroupper
7840; BITALG_NOVLX-NEXT:    retq
7841;
7842; BITALG-LABEL: ult_7_v4i32:
7843; BITALG:       # %bb.0:
7844; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
7845; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7846; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7847; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7848; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7849; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7850; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7851; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
7852; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
7853; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7854; BITALG-NEXT:    retq
7855  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7856  %3 = icmp ult <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
7857  %4 = sext <4 x i1> %3 to <4 x i32>
7858  ret <4 x i32> %4
7859}
7860
7861define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) {
7862; SSE2-LABEL: ugt_7_v4i32:
7863; SSE2:       # %bb.0:
7864; SSE2-NEXT:    movdqa %xmm0, %xmm1
7865; SSE2-NEXT:    psrlw $1, %xmm1
7866; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7867; SSE2-NEXT:    psubb %xmm1, %xmm0
7868; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7869; SSE2-NEXT:    movdqa %xmm0, %xmm2
7870; SSE2-NEXT:    pand %xmm1, %xmm2
7871; SSE2-NEXT:    psrlw $2, %xmm0
7872; SSE2-NEXT:    pand %xmm1, %xmm0
7873; SSE2-NEXT:    paddb %xmm2, %xmm0
7874; SSE2-NEXT:    movdqa %xmm0, %xmm1
7875; SSE2-NEXT:    psrlw $4, %xmm1
7876; SSE2-NEXT:    paddb %xmm0, %xmm1
7877; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
7878; SSE2-NEXT:    pxor %xmm0, %xmm0
7879; SSE2-NEXT:    movdqa %xmm1, %xmm2
7880; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7881; SSE2-NEXT:    psadbw %xmm0, %xmm2
7882; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7883; SSE2-NEXT:    psadbw %xmm0, %xmm1
7884; SSE2-NEXT:    packuswb %xmm2, %xmm1
7885; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7886; SSE2-NEXT:    movdqa %xmm1, %xmm0
7887; SSE2-NEXT:    retq
7888;
7889; SSE3-LABEL: ugt_7_v4i32:
7890; SSE3:       # %bb.0:
7891; SSE3-NEXT:    movdqa %xmm0, %xmm1
7892; SSE3-NEXT:    psrlw $1, %xmm1
7893; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7894; SSE3-NEXT:    psubb %xmm1, %xmm0
7895; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7896; SSE3-NEXT:    movdqa %xmm0, %xmm2
7897; SSE3-NEXT:    pand %xmm1, %xmm2
7898; SSE3-NEXT:    psrlw $2, %xmm0
7899; SSE3-NEXT:    pand %xmm1, %xmm0
7900; SSE3-NEXT:    paddb %xmm2, %xmm0
7901; SSE3-NEXT:    movdqa %xmm0, %xmm1
7902; SSE3-NEXT:    psrlw $4, %xmm1
7903; SSE3-NEXT:    paddb %xmm0, %xmm1
7904; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
7905; SSE3-NEXT:    pxor %xmm0, %xmm0
7906; SSE3-NEXT:    movdqa %xmm1, %xmm2
7907; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7908; SSE3-NEXT:    psadbw %xmm0, %xmm2
7909; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7910; SSE3-NEXT:    psadbw %xmm0, %xmm1
7911; SSE3-NEXT:    packuswb %xmm2, %xmm1
7912; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7913; SSE3-NEXT:    movdqa %xmm1, %xmm0
7914; SSE3-NEXT:    retq
7915;
7916; SSSE3-LABEL: ugt_7_v4i32:
7917; SSSE3:       # %bb.0:
7918; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7919; SSSE3-NEXT:    movdqa %xmm0, %xmm3
7920; SSSE3-NEXT:    pand %xmm2, %xmm3
7921; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7922; SSSE3-NEXT:    movdqa %xmm1, %xmm4
7923; SSSE3-NEXT:    pshufb %xmm3, %xmm4
7924; SSSE3-NEXT:    psrlw $4, %xmm0
7925; SSSE3-NEXT:    pand %xmm2, %xmm0
7926; SSSE3-NEXT:    pshufb %xmm0, %xmm1
7927; SSSE3-NEXT:    paddb %xmm4, %xmm1
7928; SSSE3-NEXT:    pxor %xmm0, %xmm0
7929; SSSE3-NEXT:    movdqa %xmm1, %xmm2
7930; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7931; SSSE3-NEXT:    psadbw %xmm0, %xmm2
7932; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7933; SSSE3-NEXT:    psadbw %xmm0, %xmm1
7934; SSSE3-NEXT:    packuswb %xmm2, %xmm1
7935; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
7936; SSSE3-NEXT:    movdqa %xmm1, %xmm0
7937; SSSE3-NEXT:    retq
7938;
7939; SSE41-LABEL: ugt_7_v4i32:
7940; SSE41:       # %bb.0:
7941; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7942; SSE41-NEXT:    movdqa %xmm0, %xmm2
7943; SSE41-NEXT:    pand %xmm1, %xmm2
7944; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7945; SSE41-NEXT:    movdqa %xmm3, %xmm4
7946; SSE41-NEXT:    pshufb %xmm2, %xmm4
7947; SSE41-NEXT:    psrlw $4, %xmm0
7948; SSE41-NEXT:    pand %xmm1, %xmm0
7949; SSE41-NEXT:    pshufb %xmm0, %xmm3
7950; SSE41-NEXT:    paddb %xmm4, %xmm3
7951; SSE41-NEXT:    pxor %xmm1, %xmm1
7952; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7953; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7954; SSE41-NEXT:    psadbw %xmm1, %xmm3
7955; SSE41-NEXT:    psadbw %xmm1, %xmm0
7956; SSE41-NEXT:    packuswb %xmm3, %xmm0
7957; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
7958; SSE41-NEXT:    retq
7959;
7960; AVX1-LABEL: ugt_7_v4i32:
7961; AVX1:       # %bb.0:
7962; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7963; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
7964; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7965; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7966; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
7967; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
7968; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7969; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7970; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7971; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7972; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7973; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7974; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7975; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7976; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
7977; AVX1-NEXT:    retq
7978;
7979; AVX2-LABEL: ugt_7_v4i32:
7980; AVX2:       # %bb.0:
7981; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7982; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
7983; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7984; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
7985; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
7986; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
7987; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
7988; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
7989; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7990; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7991; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
7992; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7993; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
7994; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
7995; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7996; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
7997; AVX2-NEXT:    retq
7998;
7999; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i32:
8000; AVX512VPOPCNTDQ:       # %bb.0:
8001; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8002; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
8003; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
8004; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8005; AVX512VPOPCNTDQ-NEXT:    vzeroupper
8006; AVX512VPOPCNTDQ-NEXT:    retq
8007;
8008; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32:
8009; AVX512VPOPCNTDQVL:       # %bb.0:
8010; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
8011; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
8012; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8013; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8014; AVX512VPOPCNTDQVL-NEXT:    retq
8015;
8016; BITALG_NOVLX-LABEL: ugt_7_v4i32:
8017; BITALG_NOVLX:       # %bb.0:
8018; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8019; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
8020; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8021; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8022; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8023; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8024; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8025; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8026; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
8027; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8028; BITALG_NOVLX-NEXT:    vzeroupper
8029; BITALG_NOVLX-NEXT:    retq
8030;
8031; BITALG-LABEL: ugt_7_v4i32:
8032; BITALG:       # %bb.0:
8033; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
8034; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8035; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8036; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8037; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8038; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8039; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8040; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
8041; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8042; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8043; BITALG-NEXT:    retq
8044  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8045  %3 = icmp ugt <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
8046  %4 = sext <4 x i1> %3 to <4 x i32>
8047  ret <4 x i32> %4
8048}
8049
8050define <4 x i32> @ult_8_v4i32(<4 x i32> %0) {
8051; SSE2-LABEL: ult_8_v4i32:
8052; SSE2:       # %bb.0:
8053; SSE2-NEXT:    movdqa %xmm0, %xmm1
8054; SSE2-NEXT:    psrlw $1, %xmm1
8055; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8056; SSE2-NEXT:    psubb %xmm1, %xmm0
8057; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8058; SSE2-NEXT:    movdqa %xmm0, %xmm2
8059; SSE2-NEXT:    pand %xmm1, %xmm2
8060; SSE2-NEXT:    psrlw $2, %xmm0
8061; SSE2-NEXT:    pand %xmm1, %xmm0
8062; SSE2-NEXT:    paddb %xmm2, %xmm0
8063; SSE2-NEXT:    movdqa %xmm0, %xmm1
8064; SSE2-NEXT:    psrlw $4, %xmm1
8065; SSE2-NEXT:    paddb %xmm0, %xmm1
8066; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8067; SSE2-NEXT:    pxor %xmm0, %xmm0
8068; SSE2-NEXT:    movdqa %xmm1, %xmm2
8069; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8070; SSE2-NEXT:    psadbw %xmm0, %xmm2
8071; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8072; SSE2-NEXT:    psadbw %xmm0, %xmm1
8073; SSE2-NEXT:    packuswb %xmm2, %xmm1
8074; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
8075; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
8076; SSE2-NEXT:    retq
8077;
8078; SSE3-LABEL: ult_8_v4i32:
8079; SSE3:       # %bb.0:
8080; SSE3-NEXT:    movdqa %xmm0, %xmm1
8081; SSE3-NEXT:    psrlw $1, %xmm1
8082; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8083; SSE3-NEXT:    psubb %xmm1, %xmm0
8084; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8085; SSE3-NEXT:    movdqa %xmm0, %xmm2
8086; SSE3-NEXT:    pand %xmm1, %xmm2
8087; SSE3-NEXT:    psrlw $2, %xmm0
8088; SSE3-NEXT:    pand %xmm1, %xmm0
8089; SSE3-NEXT:    paddb %xmm2, %xmm0
8090; SSE3-NEXT:    movdqa %xmm0, %xmm1
8091; SSE3-NEXT:    psrlw $4, %xmm1
8092; SSE3-NEXT:    paddb %xmm0, %xmm1
8093; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8094; SSE3-NEXT:    pxor %xmm0, %xmm0
8095; SSE3-NEXT:    movdqa %xmm1, %xmm2
8096; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8097; SSE3-NEXT:    psadbw %xmm0, %xmm2
8098; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8099; SSE3-NEXT:    psadbw %xmm0, %xmm1
8100; SSE3-NEXT:    packuswb %xmm2, %xmm1
8101; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
8102; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
8103; SSE3-NEXT:    retq
8104;
8105; SSSE3-LABEL: ult_8_v4i32:
8106; SSSE3:       # %bb.0:
8107; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8108; SSSE3-NEXT:    movdqa %xmm0, %xmm2
8109; SSSE3-NEXT:    pand %xmm1, %xmm2
8110; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8111; SSSE3-NEXT:    movdqa %xmm3, %xmm4
8112; SSSE3-NEXT:    pshufb %xmm2, %xmm4
8113; SSSE3-NEXT:    psrlw $4, %xmm0
8114; SSSE3-NEXT:    pand %xmm1, %xmm0
8115; SSSE3-NEXT:    pshufb %xmm0, %xmm3
8116; SSSE3-NEXT:    paddb %xmm4, %xmm3
8117; SSSE3-NEXT:    pxor %xmm0, %xmm0
8118; SSSE3-NEXT:    movdqa %xmm3, %xmm1
8119; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8120; SSSE3-NEXT:    psadbw %xmm0, %xmm1
8121; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8122; SSSE3-NEXT:    psadbw %xmm0, %xmm3
8123; SSSE3-NEXT:    packuswb %xmm1, %xmm3
8124; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
8125; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
8126; SSSE3-NEXT:    retq
8127;
8128; SSE41-LABEL: ult_8_v4i32:
8129; SSE41:       # %bb.0:
8130; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8131; SSE41-NEXT:    movdqa %xmm0, %xmm2
8132; SSE41-NEXT:    pand %xmm1, %xmm2
8133; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8134; SSE41-NEXT:    movdqa %xmm3, %xmm4
8135; SSE41-NEXT:    pshufb %xmm2, %xmm4
8136; SSE41-NEXT:    psrlw $4, %xmm0
8137; SSE41-NEXT:    pand %xmm1, %xmm0
8138; SSE41-NEXT:    pshufb %xmm0, %xmm3
8139; SSE41-NEXT:    paddb %xmm4, %xmm3
8140; SSE41-NEXT:    pxor %xmm0, %xmm0
8141; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8142; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8143; SSE41-NEXT:    psadbw %xmm0, %xmm3
8144; SSE41-NEXT:    psadbw %xmm0, %xmm1
8145; SSE41-NEXT:    packuswb %xmm3, %xmm1
8146; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
8147; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
8148; SSE41-NEXT:    retq
8149;
8150; AVX1-LABEL: ult_8_v4i32:
8151; AVX1:       # %bb.0:
8152; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8153; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
8154; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8155; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8156; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
8157; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
8158; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8159; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8160; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8161; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8162; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8163; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8164; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8165; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8166; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8]
8167; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8168; AVX1-NEXT:    retq
8169;
8170; AVX2-LABEL: ult_8_v4i32:
8171; AVX2:       # %bb.0:
8172; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8173; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
8174; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8175; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8176; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
8177; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
8178; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8179; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8180; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8181; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8182; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8183; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8184; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8185; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8186; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8187; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8188; AVX2-NEXT:    retq
8189;
8190; AVX512VPOPCNTDQ-LABEL: ult_8_v4i32:
8191; AVX512VPOPCNTDQ:       # %bb.0:
8192; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8193; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
8194; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8195; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8196; AVX512VPOPCNTDQ-NEXT:    vzeroupper
8197; AVX512VPOPCNTDQ-NEXT:    retq
8198;
8199; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32:
8200; AVX512VPOPCNTDQVL:       # %bb.0:
8201; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
8202; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
8203; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8204; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8205; AVX512VPOPCNTDQVL-NEXT:    retq
8206;
8207; BITALG_NOVLX-LABEL: ult_8_v4i32:
8208; BITALG_NOVLX:       # %bb.0:
8209; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8210; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
8211; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8212; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8213; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8214; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8215; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8216; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8217; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8218; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8219; BITALG_NOVLX-NEXT:    vzeroupper
8220; BITALG_NOVLX-NEXT:    retq
8221;
8222; BITALG-LABEL: ult_8_v4i32:
8223; BITALG:       # %bb.0:
8224; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
8225; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8226; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8227; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8228; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8229; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8230; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8231; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
8232; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8233; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8234; BITALG-NEXT:    retq
8235  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8236  %3 = icmp ult <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
8237  %4 = sext <4 x i1> %3 to <4 x i32>
8238  ret <4 x i32> %4
8239}
8240
8241define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) {
8242; SSE2-LABEL: ugt_8_v4i32:
8243; SSE2:       # %bb.0:
8244; SSE2-NEXT:    movdqa %xmm0, %xmm1
8245; SSE2-NEXT:    psrlw $1, %xmm1
8246; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8247; SSE2-NEXT:    psubb %xmm1, %xmm0
8248; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8249; SSE2-NEXT:    movdqa %xmm0, %xmm2
8250; SSE2-NEXT:    pand %xmm1, %xmm2
8251; SSE2-NEXT:    psrlw $2, %xmm0
8252; SSE2-NEXT:    pand %xmm1, %xmm0
8253; SSE2-NEXT:    paddb %xmm2, %xmm0
8254; SSE2-NEXT:    movdqa %xmm0, %xmm1
8255; SSE2-NEXT:    psrlw $4, %xmm1
8256; SSE2-NEXT:    paddb %xmm0, %xmm1
8257; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8258; SSE2-NEXT:    pxor %xmm0, %xmm0
8259; SSE2-NEXT:    movdqa %xmm1, %xmm2
8260; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8261; SSE2-NEXT:    psadbw %xmm0, %xmm2
8262; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8263; SSE2-NEXT:    psadbw %xmm0, %xmm1
8264; SSE2-NEXT:    packuswb %xmm2, %xmm1
8265; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
8266; SSE2-NEXT:    movdqa %xmm1, %xmm0
8267; SSE2-NEXT:    retq
8268;
8269; SSE3-LABEL: ugt_8_v4i32:
8270; SSE3:       # %bb.0:
8271; SSE3-NEXT:    movdqa %xmm0, %xmm1
8272; SSE3-NEXT:    psrlw $1, %xmm1
8273; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8274; SSE3-NEXT:    psubb %xmm1, %xmm0
8275; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8276; SSE3-NEXT:    movdqa %xmm0, %xmm2
8277; SSE3-NEXT:    pand %xmm1, %xmm2
8278; SSE3-NEXT:    psrlw $2, %xmm0
8279; SSE3-NEXT:    pand %xmm1, %xmm0
8280; SSE3-NEXT:    paddb %xmm2, %xmm0
8281; SSE3-NEXT:    movdqa %xmm0, %xmm1
8282; SSE3-NEXT:    psrlw $4, %xmm1
8283; SSE3-NEXT:    paddb %xmm0, %xmm1
8284; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8285; SSE3-NEXT:    pxor %xmm0, %xmm0
8286; SSE3-NEXT:    movdqa %xmm1, %xmm2
8287; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8288; SSE3-NEXT:    psadbw %xmm0, %xmm2
8289; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8290; SSE3-NEXT:    psadbw %xmm0, %xmm1
8291; SSE3-NEXT:    packuswb %xmm2, %xmm1
8292; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
8293; SSE3-NEXT:    movdqa %xmm1, %xmm0
8294; SSE3-NEXT:    retq
8295;
8296; SSSE3-LABEL: ugt_8_v4i32:
8297; SSSE3:       # %bb.0:
8298; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8299; SSSE3-NEXT:    movdqa %xmm0, %xmm3
8300; SSSE3-NEXT:    pand %xmm2, %xmm3
8301; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8302; SSSE3-NEXT:    movdqa %xmm1, %xmm4
8303; SSSE3-NEXT:    pshufb %xmm3, %xmm4
8304; SSSE3-NEXT:    psrlw $4, %xmm0
8305; SSSE3-NEXT:    pand %xmm2, %xmm0
8306; SSSE3-NEXT:    pshufb %xmm0, %xmm1
8307; SSSE3-NEXT:    paddb %xmm4, %xmm1
8308; SSSE3-NEXT:    pxor %xmm0, %xmm0
8309; SSSE3-NEXT:    movdqa %xmm1, %xmm2
8310; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8311; SSSE3-NEXT:    psadbw %xmm0, %xmm2
8312; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8313; SSSE3-NEXT:    psadbw %xmm0, %xmm1
8314; SSSE3-NEXT:    packuswb %xmm2, %xmm1
8315; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
8316; SSSE3-NEXT:    movdqa %xmm1, %xmm0
8317; SSSE3-NEXT:    retq
8318;
8319; SSE41-LABEL: ugt_8_v4i32:
8320; SSE41:       # %bb.0:
8321; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8322; SSE41-NEXT:    movdqa %xmm0, %xmm2
8323; SSE41-NEXT:    pand %xmm1, %xmm2
8324; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8325; SSE41-NEXT:    movdqa %xmm3, %xmm4
8326; SSE41-NEXT:    pshufb %xmm2, %xmm4
8327; SSE41-NEXT:    psrlw $4, %xmm0
8328; SSE41-NEXT:    pand %xmm1, %xmm0
8329; SSE41-NEXT:    pshufb %xmm0, %xmm3
8330; SSE41-NEXT:    paddb %xmm4, %xmm3
8331; SSE41-NEXT:    pxor %xmm1, %xmm1
8332; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8333; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8334; SSE41-NEXT:    psadbw %xmm1, %xmm3
8335; SSE41-NEXT:    psadbw %xmm1, %xmm0
8336; SSE41-NEXT:    packuswb %xmm3, %xmm0
8337; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
8338; SSE41-NEXT:    retq
8339;
8340; AVX1-LABEL: ugt_8_v4i32:
8341; AVX1:       # %bb.0:
8342; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8343; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
8344; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8345; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8346; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
8347; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
8348; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8349; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8350; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8351; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8352; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8353; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8354; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8355; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8356; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
8357; AVX1-NEXT:    retq
8358;
8359; AVX2-LABEL: ugt_8_v4i32:
8360; AVX2:       # %bb.0:
8361; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8362; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
8363; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8364; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8365; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
8366; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
8367; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8368; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8369; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8370; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8371; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8372; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8373; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8374; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8375; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8376; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8377; AVX2-NEXT:    retq
8378;
8379; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i32:
8380; AVX512VPOPCNTDQ:       # %bb.0:
8381; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8382; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
8383; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8384; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8385; AVX512VPOPCNTDQ-NEXT:    vzeroupper
8386; AVX512VPOPCNTDQ-NEXT:    retq
8387;
8388; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32:
8389; AVX512VPOPCNTDQVL:       # %bb.0:
8390; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
8391; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
8392; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8393; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8394; AVX512VPOPCNTDQVL-NEXT:    retq
8395;
8396; BITALG_NOVLX-LABEL: ugt_8_v4i32:
8397; BITALG_NOVLX:       # %bb.0:
8398; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8399; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
8400; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8401; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8402; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8403; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8404; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8405; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8406; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8407; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8408; BITALG_NOVLX-NEXT:    vzeroupper
8409; BITALG_NOVLX-NEXT:    retq
8410;
8411; BITALG-LABEL: ugt_8_v4i32:
8412; BITALG:       # %bb.0:
8413; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
8414; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8415; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8416; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8417; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8418; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8419; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8420; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
8421; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8422; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8423; BITALG-NEXT:    retq
8424  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8425  %3 = icmp ugt <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
8426  %4 = sext <4 x i1> %3 to <4 x i32>
8427  ret <4 x i32> %4
8428}
8429
8430define <4 x i32> @ult_9_v4i32(<4 x i32> %0) {
8431; SSE2-LABEL: ult_9_v4i32:
8432; SSE2:       # %bb.0:
8433; SSE2-NEXT:    movdqa %xmm0, %xmm1
8434; SSE2-NEXT:    psrlw $1, %xmm1
8435; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8436; SSE2-NEXT:    psubb %xmm1, %xmm0
8437; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8438; SSE2-NEXT:    movdqa %xmm0, %xmm2
8439; SSE2-NEXT:    pand %xmm1, %xmm2
8440; SSE2-NEXT:    psrlw $2, %xmm0
8441; SSE2-NEXT:    pand %xmm1, %xmm0
8442; SSE2-NEXT:    paddb %xmm2, %xmm0
8443; SSE2-NEXT:    movdqa %xmm0, %xmm1
8444; SSE2-NEXT:    psrlw $4, %xmm1
8445; SSE2-NEXT:    paddb %xmm0, %xmm1
8446; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8447; SSE2-NEXT:    pxor %xmm0, %xmm0
8448; SSE2-NEXT:    movdqa %xmm1, %xmm2
8449; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8450; SSE2-NEXT:    psadbw %xmm0, %xmm2
8451; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8452; SSE2-NEXT:    psadbw %xmm0, %xmm1
8453; SSE2-NEXT:    packuswb %xmm2, %xmm1
8454; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9]
8455; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
8456; SSE2-NEXT:    retq
8457;
8458; SSE3-LABEL: ult_9_v4i32:
8459; SSE3:       # %bb.0:
8460; SSE3-NEXT:    movdqa %xmm0, %xmm1
8461; SSE3-NEXT:    psrlw $1, %xmm1
8462; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8463; SSE3-NEXT:    psubb %xmm1, %xmm0
8464; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8465; SSE3-NEXT:    movdqa %xmm0, %xmm2
8466; SSE3-NEXT:    pand %xmm1, %xmm2
8467; SSE3-NEXT:    psrlw $2, %xmm0
8468; SSE3-NEXT:    pand %xmm1, %xmm0
8469; SSE3-NEXT:    paddb %xmm2, %xmm0
8470; SSE3-NEXT:    movdqa %xmm0, %xmm1
8471; SSE3-NEXT:    psrlw $4, %xmm1
8472; SSE3-NEXT:    paddb %xmm0, %xmm1
8473; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8474; SSE3-NEXT:    pxor %xmm0, %xmm0
8475; SSE3-NEXT:    movdqa %xmm1, %xmm2
8476; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8477; SSE3-NEXT:    psadbw %xmm0, %xmm2
8478; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8479; SSE3-NEXT:    psadbw %xmm0, %xmm1
8480; SSE3-NEXT:    packuswb %xmm2, %xmm1
8481; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9]
8482; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
8483; SSE3-NEXT:    retq
8484;
8485; SSSE3-LABEL: ult_9_v4i32:
8486; SSSE3:       # %bb.0:
8487; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8488; SSSE3-NEXT:    movdqa %xmm0, %xmm2
8489; SSSE3-NEXT:    pand %xmm1, %xmm2
8490; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8491; SSSE3-NEXT:    movdqa %xmm3, %xmm4
8492; SSSE3-NEXT:    pshufb %xmm2, %xmm4
8493; SSSE3-NEXT:    psrlw $4, %xmm0
8494; SSSE3-NEXT:    pand %xmm1, %xmm0
8495; SSSE3-NEXT:    pshufb %xmm0, %xmm3
8496; SSSE3-NEXT:    paddb %xmm4, %xmm3
8497; SSSE3-NEXT:    pxor %xmm0, %xmm0
8498; SSSE3-NEXT:    movdqa %xmm3, %xmm1
8499; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8500; SSSE3-NEXT:    psadbw %xmm0, %xmm1
8501; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8502; SSSE3-NEXT:    psadbw %xmm0, %xmm3
8503; SSSE3-NEXT:    packuswb %xmm1, %xmm3
8504; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9]
8505; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
8506; SSSE3-NEXT:    retq
8507;
8508; SSE41-LABEL: ult_9_v4i32:
8509; SSE41:       # %bb.0:
8510; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8511; SSE41-NEXT:    movdqa %xmm0, %xmm2
8512; SSE41-NEXT:    pand %xmm1, %xmm2
8513; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8514; SSE41-NEXT:    movdqa %xmm3, %xmm4
8515; SSE41-NEXT:    pshufb %xmm2, %xmm4
8516; SSE41-NEXT:    psrlw $4, %xmm0
8517; SSE41-NEXT:    pand %xmm1, %xmm0
8518; SSE41-NEXT:    pshufb %xmm0, %xmm3
8519; SSE41-NEXT:    paddb %xmm4, %xmm3
8520; SSE41-NEXT:    pxor %xmm0, %xmm0
8521; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8522; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8523; SSE41-NEXT:    psadbw %xmm0, %xmm3
8524; SSE41-NEXT:    psadbw %xmm0, %xmm1
8525; SSE41-NEXT:    packuswb %xmm3, %xmm1
8526; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9,9,9,9]
8527; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
8528; SSE41-NEXT:    retq
8529;
8530; AVX1-LABEL: ult_9_v4i32:
8531; AVX1:       # %bb.0:
8532; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8533; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
8534; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8535; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8536; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
8537; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
8538; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8539; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8540; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8541; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8542; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8543; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8544; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8545; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8546; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9,9,9]
8547; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8548; AVX1-NEXT:    retq
8549;
8550; AVX2-LABEL: ult_9_v4i32:
8551; AVX2:       # %bb.0:
8552; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8553; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
8554; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8555; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8556; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
8557; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
8558; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8559; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8560; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8561; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8562; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8563; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8564; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8565; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8566; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8567; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8568; AVX2-NEXT:    retq
8569;
8570; AVX512VPOPCNTDQ-LABEL: ult_9_v4i32:
8571; AVX512VPOPCNTDQ:       # %bb.0:
8572; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8573; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
8574; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8575; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8576; AVX512VPOPCNTDQ-NEXT:    vzeroupper
8577; AVX512VPOPCNTDQ-NEXT:    retq
8578;
8579; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32:
8580; AVX512VPOPCNTDQVL:       # %bb.0:
8581; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
8582; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
8583; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8584; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8585; AVX512VPOPCNTDQVL-NEXT:    retq
8586;
8587; BITALG_NOVLX-LABEL: ult_9_v4i32:
8588; BITALG_NOVLX:       # %bb.0:
8589; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8590; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
8591; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8592; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8593; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8594; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8595; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8596; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8597; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8598; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8599; BITALG_NOVLX-NEXT:    vzeroupper
8600; BITALG_NOVLX-NEXT:    retq
8601;
8602; BITALG-LABEL: ult_9_v4i32:
8603; BITALG:       # %bb.0:
8604; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
8605; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8606; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8607; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8608; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8609; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8610; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8611; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
8612; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8613; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8614; BITALG-NEXT:    retq
8615  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8616  %3 = icmp ult <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
8617  %4 = sext <4 x i1> %3 to <4 x i32>
8618  ret <4 x i32> %4
8619}
8620
8621define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) {
8622; SSE2-LABEL: ugt_9_v4i32:
8623; SSE2:       # %bb.0:
8624; SSE2-NEXT:    movdqa %xmm0, %xmm1
8625; SSE2-NEXT:    psrlw $1, %xmm1
8626; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8627; SSE2-NEXT:    psubb %xmm1, %xmm0
8628; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8629; SSE2-NEXT:    movdqa %xmm0, %xmm2
8630; SSE2-NEXT:    pand %xmm1, %xmm2
8631; SSE2-NEXT:    psrlw $2, %xmm0
8632; SSE2-NEXT:    pand %xmm1, %xmm0
8633; SSE2-NEXT:    paddb %xmm2, %xmm0
8634; SSE2-NEXT:    movdqa %xmm0, %xmm1
8635; SSE2-NEXT:    psrlw $4, %xmm1
8636; SSE2-NEXT:    paddb %xmm0, %xmm1
8637; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8638; SSE2-NEXT:    pxor %xmm0, %xmm0
8639; SSE2-NEXT:    movdqa %xmm1, %xmm2
8640; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8641; SSE2-NEXT:    psadbw %xmm0, %xmm2
8642; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8643; SSE2-NEXT:    psadbw %xmm0, %xmm1
8644; SSE2-NEXT:    packuswb %xmm2, %xmm1
8645; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
8646; SSE2-NEXT:    movdqa %xmm1, %xmm0
8647; SSE2-NEXT:    retq
8648;
8649; SSE3-LABEL: ugt_9_v4i32:
8650; SSE3:       # %bb.0:
8651; SSE3-NEXT:    movdqa %xmm0, %xmm1
8652; SSE3-NEXT:    psrlw $1, %xmm1
8653; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8654; SSE3-NEXT:    psubb %xmm1, %xmm0
8655; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8656; SSE3-NEXT:    movdqa %xmm0, %xmm2
8657; SSE3-NEXT:    pand %xmm1, %xmm2
8658; SSE3-NEXT:    psrlw $2, %xmm0
8659; SSE3-NEXT:    pand %xmm1, %xmm0
8660; SSE3-NEXT:    paddb %xmm2, %xmm0
8661; SSE3-NEXT:    movdqa %xmm0, %xmm1
8662; SSE3-NEXT:    psrlw $4, %xmm1
8663; SSE3-NEXT:    paddb %xmm0, %xmm1
8664; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8665; SSE3-NEXT:    pxor %xmm0, %xmm0
8666; SSE3-NEXT:    movdqa %xmm1, %xmm2
8667; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8668; SSE3-NEXT:    psadbw %xmm0, %xmm2
8669; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8670; SSE3-NEXT:    psadbw %xmm0, %xmm1
8671; SSE3-NEXT:    packuswb %xmm2, %xmm1
8672; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
8673; SSE3-NEXT:    movdqa %xmm1, %xmm0
8674; SSE3-NEXT:    retq
8675;
8676; SSSE3-LABEL: ugt_9_v4i32:
8677; SSSE3:       # %bb.0:
8678; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8679; SSSE3-NEXT:    movdqa %xmm0, %xmm3
8680; SSSE3-NEXT:    pand %xmm2, %xmm3
8681; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8682; SSSE3-NEXT:    movdqa %xmm1, %xmm4
8683; SSSE3-NEXT:    pshufb %xmm3, %xmm4
8684; SSSE3-NEXT:    psrlw $4, %xmm0
8685; SSSE3-NEXT:    pand %xmm2, %xmm0
8686; SSSE3-NEXT:    pshufb %xmm0, %xmm1
8687; SSSE3-NEXT:    paddb %xmm4, %xmm1
8688; SSSE3-NEXT:    pxor %xmm0, %xmm0
8689; SSSE3-NEXT:    movdqa %xmm1, %xmm2
8690; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8691; SSSE3-NEXT:    psadbw %xmm0, %xmm2
8692; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8693; SSSE3-NEXT:    psadbw %xmm0, %xmm1
8694; SSSE3-NEXT:    packuswb %xmm2, %xmm1
8695; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
8696; SSSE3-NEXT:    movdqa %xmm1, %xmm0
8697; SSSE3-NEXT:    retq
8698;
8699; SSE41-LABEL: ugt_9_v4i32:
8700; SSE41:       # %bb.0:
8701; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8702; SSE41-NEXT:    movdqa %xmm0, %xmm2
8703; SSE41-NEXT:    pand %xmm1, %xmm2
8704; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8705; SSE41-NEXT:    movdqa %xmm3, %xmm4
8706; SSE41-NEXT:    pshufb %xmm2, %xmm4
8707; SSE41-NEXT:    psrlw $4, %xmm0
8708; SSE41-NEXT:    pand %xmm1, %xmm0
8709; SSE41-NEXT:    pshufb %xmm0, %xmm3
8710; SSE41-NEXT:    paddb %xmm4, %xmm3
8711; SSE41-NEXT:    pxor %xmm1, %xmm1
8712; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8713; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8714; SSE41-NEXT:    psadbw %xmm1, %xmm3
8715; SSE41-NEXT:    psadbw %xmm1, %xmm0
8716; SSE41-NEXT:    packuswb %xmm3, %xmm0
8717; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
8718; SSE41-NEXT:    retq
8719;
8720; AVX1-LABEL: ugt_9_v4i32:
8721; AVX1:       # %bb.0:
8722; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8723; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
8724; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8725; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8726; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
8727; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
8728; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8729; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8730; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8731; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8732; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8733; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8734; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8735; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8736; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
8737; AVX1-NEXT:    retq
8738;
8739; AVX2-LABEL: ugt_9_v4i32:
8740; AVX2:       # %bb.0:
8741; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8742; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
8743; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8744; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8745; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
8746; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
8747; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8748; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8749; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8750; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8751; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8752; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8753; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8754; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8755; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8756; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8757; AVX2-NEXT:    retq
8758;
8759; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i32:
8760; AVX512VPOPCNTDQ:       # %bb.0:
8761; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8762; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
8763; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8764; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8765; AVX512VPOPCNTDQ-NEXT:    vzeroupper
8766; AVX512VPOPCNTDQ-NEXT:    retq
8767;
8768; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32:
8769; AVX512VPOPCNTDQVL:       # %bb.0:
8770; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
8771; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
8772; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8773; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8774; AVX512VPOPCNTDQVL-NEXT:    retq
8775;
8776; BITALG_NOVLX-LABEL: ugt_9_v4i32:
8777; BITALG_NOVLX:       # %bb.0:
8778; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8779; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
8780; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8781; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8782; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8783; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8784; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8785; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8786; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8787; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
8788; BITALG_NOVLX-NEXT:    vzeroupper
8789; BITALG_NOVLX-NEXT:    retq
8790;
8791; BITALG-LABEL: ugt_9_v4i32:
8792; BITALG:       # %bb.0:
8793; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
8794; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8795; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8796; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8797; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8798; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8799; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8800; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
8801; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8802; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8803; BITALG-NEXT:    retq
8804  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8805  %3 = icmp ugt <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
8806  %4 = sext <4 x i1> %3 to <4 x i32>
8807  ret <4 x i32> %4
8808}
8809
8810define <4 x i32> @ult_10_v4i32(<4 x i32> %0) {
8811; SSE2-LABEL: ult_10_v4i32:
8812; SSE2:       # %bb.0:
8813; SSE2-NEXT:    movdqa %xmm0, %xmm1
8814; SSE2-NEXT:    psrlw $1, %xmm1
8815; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8816; SSE2-NEXT:    psubb %xmm1, %xmm0
8817; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8818; SSE2-NEXT:    movdqa %xmm0, %xmm2
8819; SSE2-NEXT:    pand %xmm1, %xmm2
8820; SSE2-NEXT:    psrlw $2, %xmm0
8821; SSE2-NEXT:    pand %xmm1, %xmm0
8822; SSE2-NEXT:    paddb %xmm2, %xmm0
8823; SSE2-NEXT:    movdqa %xmm0, %xmm1
8824; SSE2-NEXT:    psrlw $4, %xmm1
8825; SSE2-NEXT:    paddb %xmm0, %xmm1
8826; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
8827; SSE2-NEXT:    pxor %xmm0, %xmm0
8828; SSE2-NEXT:    movdqa %xmm1, %xmm2
8829; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8830; SSE2-NEXT:    psadbw %xmm0, %xmm2
8831; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8832; SSE2-NEXT:    psadbw %xmm0, %xmm1
8833; SSE2-NEXT:    packuswb %xmm2, %xmm1
8834; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
8835; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
8836; SSE2-NEXT:    retq
8837;
8838; SSE3-LABEL: ult_10_v4i32:
8839; SSE3:       # %bb.0:
8840; SSE3-NEXT:    movdqa %xmm0, %xmm1
8841; SSE3-NEXT:    psrlw $1, %xmm1
8842; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8843; SSE3-NEXT:    psubb %xmm1, %xmm0
8844; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8845; SSE3-NEXT:    movdqa %xmm0, %xmm2
8846; SSE3-NEXT:    pand %xmm1, %xmm2
8847; SSE3-NEXT:    psrlw $2, %xmm0
8848; SSE3-NEXT:    pand %xmm1, %xmm0
8849; SSE3-NEXT:    paddb %xmm2, %xmm0
8850; SSE3-NEXT:    movdqa %xmm0, %xmm1
8851; SSE3-NEXT:    psrlw $4, %xmm1
8852; SSE3-NEXT:    paddb %xmm0, %xmm1
8853; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
8854; SSE3-NEXT:    pxor %xmm0, %xmm0
8855; SSE3-NEXT:    movdqa %xmm1, %xmm2
8856; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8857; SSE3-NEXT:    psadbw %xmm0, %xmm2
8858; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8859; SSE3-NEXT:    psadbw %xmm0, %xmm1
8860; SSE3-NEXT:    packuswb %xmm2, %xmm1
8861; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
8862; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
8863; SSE3-NEXT:    retq
8864;
8865; SSSE3-LABEL: ult_10_v4i32:
8866; SSSE3:       # %bb.0:
8867; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8868; SSSE3-NEXT:    movdqa %xmm0, %xmm2
8869; SSSE3-NEXT:    pand %xmm1, %xmm2
8870; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8871; SSSE3-NEXT:    movdqa %xmm3, %xmm4
8872; SSSE3-NEXT:    pshufb %xmm2, %xmm4
8873; SSSE3-NEXT:    psrlw $4, %xmm0
8874; SSSE3-NEXT:    pand %xmm1, %xmm0
8875; SSSE3-NEXT:    pshufb %xmm0, %xmm3
8876; SSSE3-NEXT:    paddb %xmm4, %xmm3
8877; SSSE3-NEXT:    pxor %xmm0, %xmm0
8878; SSSE3-NEXT:    movdqa %xmm3, %xmm1
8879; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8880; SSSE3-NEXT:    psadbw %xmm0, %xmm1
8881; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8882; SSSE3-NEXT:    psadbw %xmm0, %xmm3
8883; SSSE3-NEXT:    packuswb %xmm1, %xmm3
8884; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
8885; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
8886; SSSE3-NEXT:    retq
8887;
8888; SSE41-LABEL: ult_10_v4i32:
8889; SSE41:       # %bb.0:
8890; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8891; SSE41-NEXT:    movdqa %xmm0, %xmm2
8892; SSE41-NEXT:    pand %xmm1, %xmm2
8893; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8894; SSE41-NEXT:    movdqa %xmm3, %xmm4
8895; SSE41-NEXT:    pshufb %xmm2, %xmm4
8896; SSE41-NEXT:    psrlw $4, %xmm0
8897; SSE41-NEXT:    pand %xmm1, %xmm0
8898; SSE41-NEXT:    pshufb %xmm0, %xmm3
8899; SSE41-NEXT:    paddb %xmm4, %xmm3
8900; SSE41-NEXT:    pxor %xmm0, %xmm0
8901; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8902; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8903; SSE41-NEXT:    psadbw %xmm0, %xmm3
8904; SSE41-NEXT:    psadbw %xmm0, %xmm1
8905; SSE41-NEXT:    packuswb %xmm3, %xmm1
8906; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
8907; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
8908; SSE41-NEXT:    retq
8909;
8910; AVX1-LABEL: ult_10_v4i32:
8911; AVX1:       # %bb.0:
8912; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8913; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
8914; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8915; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8916; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
8917; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
8918; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8919; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8920; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8921; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8922; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8923; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8924; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8925; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8926; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10,10,10]
8927; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8928; AVX1-NEXT:    retq
8929;
8930; AVX2-LABEL: ult_10_v4i32:
8931; AVX2:       # %bb.0:
8932; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8933; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
8934; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8935; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
8936; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
8937; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
8938; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
8939; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
8940; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8941; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8942; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8943; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8944; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8945; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8946; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8947; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8948; AVX2-NEXT:    retq
8949;
8950; AVX512VPOPCNTDQ-LABEL: ult_10_v4i32:
8951; AVX512VPOPCNTDQ:       # %bb.0:
8952; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8953; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
8954; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8955; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8956; AVX512VPOPCNTDQ-NEXT:    vzeroupper
8957; AVX512VPOPCNTDQ-NEXT:    retq
8958;
8959; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32:
8960; AVX512VPOPCNTDQVL:       # %bb.0:
8961; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
8962; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
8963; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8964; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8965; AVX512VPOPCNTDQVL-NEXT:    retq
8966;
8967; BITALG_NOVLX-LABEL: ult_10_v4i32:
8968; BITALG_NOVLX:       # %bb.0:
8969; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8970; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
8971; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8972; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8973; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8974; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8975; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8976; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8977; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8978; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
8979; BITALG_NOVLX-NEXT:    vzeroupper
8980; BITALG_NOVLX-NEXT:    retq
8981;
8982; BITALG-LABEL: ult_10_v4i32:
8983; BITALG:       # %bb.0:
8984; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
8985; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
8986; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8987; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
8988; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8989; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
8990; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
8991; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
8992; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
8993; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8994; BITALG-NEXT:    retq
8995  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8996  %3 = icmp ult <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
8997  %4 = sext <4 x i1> %3 to <4 x i32>
8998  ret <4 x i32> %4
8999}
9000
9001define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) {
9002; SSE2-LABEL: ugt_10_v4i32:
9003; SSE2:       # %bb.0:
9004; SSE2-NEXT:    movdqa %xmm0, %xmm1
9005; SSE2-NEXT:    psrlw $1, %xmm1
9006; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9007; SSE2-NEXT:    psubb %xmm1, %xmm0
9008; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9009; SSE2-NEXT:    movdqa %xmm0, %xmm2
9010; SSE2-NEXT:    pand %xmm1, %xmm2
9011; SSE2-NEXT:    psrlw $2, %xmm0
9012; SSE2-NEXT:    pand %xmm1, %xmm0
9013; SSE2-NEXT:    paddb %xmm2, %xmm0
9014; SSE2-NEXT:    movdqa %xmm0, %xmm1
9015; SSE2-NEXT:    psrlw $4, %xmm1
9016; SSE2-NEXT:    paddb %xmm0, %xmm1
9017; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9018; SSE2-NEXT:    pxor %xmm0, %xmm0
9019; SSE2-NEXT:    movdqa %xmm1, %xmm2
9020; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9021; SSE2-NEXT:    psadbw %xmm0, %xmm2
9022; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9023; SSE2-NEXT:    psadbw %xmm0, %xmm1
9024; SSE2-NEXT:    packuswb %xmm2, %xmm1
9025; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9026; SSE2-NEXT:    movdqa %xmm1, %xmm0
9027; SSE2-NEXT:    retq
9028;
9029; SSE3-LABEL: ugt_10_v4i32:
9030; SSE3:       # %bb.0:
9031; SSE3-NEXT:    movdqa %xmm0, %xmm1
9032; SSE3-NEXT:    psrlw $1, %xmm1
9033; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9034; SSE3-NEXT:    psubb %xmm1, %xmm0
9035; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9036; SSE3-NEXT:    movdqa %xmm0, %xmm2
9037; SSE3-NEXT:    pand %xmm1, %xmm2
9038; SSE3-NEXT:    psrlw $2, %xmm0
9039; SSE3-NEXT:    pand %xmm1, %xmm0
9040; SSE3-NEXT:    paddb %xmm2, %xmm0
9041; SSE3-NEXT:    movdqa %xmm0, %xmm1
9042; SSE3-NEXT:    psrlw $4, %xmm1
9043; SSE3-NEXT:    paddb %xmm0, %xmm1
9044; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9045; SSE3-NEXT:    pxor %xmm0, %xmm0
9046; SSE3-NEXT:    movdqa %xmm1, %xmm2
9047; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9048; SSE3-NEXT:    psadbw %xmm0, %xmm2
9049; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9050; SSE3-NEXT:    psadbw %xmm0, %xmm1
9051; SSE3-NEXT:    packuswb %xmm2, %xmm1
9052; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9053; SSE3-NEXT:    movdqa %xmm1, %xmm0
9054; SSE3-NEXT:    retq
9055;
9056; SSSE3-LABEL: ugt_10_v4i32:
9057; SSSE3:       # %bb.0:
9058; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9059; SSSE3-NEXT:    movdqa %xmm0, %xmm3
9060; SSSE3-NEXT:    pand %xmm2, %xmm3
9061; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9062; SSSE3-NEXT:    movdqa %xmm1, %xmm4
9063; SSSE3-NEXT:    pshufb %xmm3, %xmm4
9064; SSSE3-NEXT:    psrlw $4, %xmm0
9065; SSSE3-NEXT:    pand %xmm2, %xmm0
9066; SSSE3-NEXT:    pshufb %xmm0, %xmm1
9067; SSSE3-NEXT:    paddb %xmm4, %xmm1
9068; SSSE3-NEXT:    pxor %xmm0, %xmm0
9069; SSSE3-NEXT:    movdqa %xmm1, %xmm2
9070; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9071; SSSE3-NEXT:    psadbw %xmm0, %xmm2
9072; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9073; SSSE3-NEXT:    psadbw %xmm0, %xmm1
9074; SSSE3-NEXT:    packuswb %xmm2, %xmm1
9075; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9076; SSSE3-NEXT:    movdqa %xmm1, %xmm0
9077; SSSE3-NEXT:    retq
9078;
9079; SSE41-LABEL: ugt_10_v4i32:
9080; SSE41:       # %bb.0:
9081; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9082; SSE41-NEXT:    movdqa %xmm0, %xmm2
9083; SSE41-NEXT:    pand %xmm1, %xmm2
9084; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9085; SSE41-NEXT:    movdqa %xmm3, %xmm4
9086; SSE41-NEXT:    pshufb %xmm2, %xmm4
9087; SSE41-NEXT:    psrlw $4, %xmm0
9088; SSE41-NEXT:    pand %xmm1, %xmm0
9089; SSE41-NEXT:    pshufb %xmm0, %xmm3
9090; SSE41-NEXT:    paddb %xmm4, %xmm3
9091; SSE41-NEXT:    pxor %xmm1, %xmm1
9092; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9093; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9094; SSE41-NEXT:    psadbw %xmm1, %xmm3
9095; SSE41-NEXT:    psadbw %xmm1, %xmm0
9096; SSE41-NEXT:    packuswb %xmm3, %xmm0
9097; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
9098; SSE41-NEXT:    retq
9099;
9100; AVX1-LABEL: ugt_10_v4i32:
9101; AVX1:       # %bb.0:
9102; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9103; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
9104; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9105; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9106; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
9107; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
9108; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9109; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9110; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9111; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9112; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9113; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9114; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9115; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9116; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
9117; AVX1-NEXT:    retq
9118;
9119; AVX2-LABEL: ugt_10_v4i32:
9120; AVX2:       # %bb.0:
9121; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9122; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
9123; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9124; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9125; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
9126; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
9127; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9128; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9129; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9130; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9131; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9132; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9133; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9134; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9135; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9136; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9137; AVX2-NEXT:    retq
9138;
9139; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i32:
9140; AVX512VPOPCNTDQ:       # %bb.0:
9141; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9142; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
9143; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9144; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9145; AVX512VPOPCNTDQ-NEXT:    vzeroupper
9146; AVX512VPOPCNTDQ-NEXT:    retq
9147;
9148; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32:
9149; AVX512VPOPCNTDQVL:       # %bb.0:
9150; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
9151; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
9152; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9153; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9154; AVX512VPOPCNTDQVL-NEXT:    retq
9155;
9156; BITALG_NOVLX-LABEL: ugt_10_v4i32:
9157; BITALG_NOVLX:       # %bb.0:
9158; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9159; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
9160; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9161; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9162; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9163; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9164; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9165; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9166; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9167; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9168; BITALG_NOVLX-NEXT:    vzeroupper
9169; BITALG_NOVLX-NEXT:    retq
9170;
9171; BITALG-LABEL: ugt_10_v4i32:
9172; BITALG:       # %bb.0:
9173; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
9174; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9175; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9176; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9177; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9178; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9179; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9180; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
9181; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9182; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9183; BITALG-NEXT:    retq
9184  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9185  %3 = icmp ugt <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
9186  %4 = sext <4 x i1> %3 to <4 x i32>
9187  ret <4 x i32> %4
9188}
9189
9190define <4 x i32> @ult_11_v4i32(<4 x i32> %0) {
9191; SSE2-LABEL: ult_11_v4i32:
9192; SSE2:       # %bb.0:
9193; SSE2-NEXT:    movdqa %xmm0, %xmm1
9194; SSE2-NEXT:    psrlw $1, %xmm1
9195; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9196; SSE2-NEXT:    psubb %xmm1, %xmm0
9197; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9198; SSE2-NEXT:    movdqa %xmm0, %xmm2
9199; SSE2-NEXT:    pand %xmm1, %xmm2
9200; SSE2-NEXT:    psrlw $2, %xmm0
9201; SSE2-NEXT:    pand %xmm1, %xmm0
9202; SSE2-NEXT:    paddb %xmm2, %xmm0
9203; SSE2-NEXT:    movdqa %xmm0, %xmm1
9204; SSE2-NEXT:    psrlw $4, %xmm1
9205; SSE2-NEXT:    paddb %xmm0, %xmm1
9206; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9207; SSE2-NEXT:    pxor %xmm0, %xmm0
9208; SSE2-NEXT:    movdqa %xmm1, %xmm2
9209; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9210; SSE2-NEXT:    psadbw %xmm0, %xmm2
9211; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9212; SSE2-NEXT:    psadbw %xmm0, %xmm1
9213; SSE2-NEXT:    packuswb %xmm2, %xmm1
9214; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11]
9215; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
9216; SSE2-NEXT:    retq
9217;
9218; SSE3-LABEL: ult_11_v4i32:
9219; SSE3:       # %bb.0:
9220; SSE3-NEXT:    movdqa %xmm0, %xmm1
9221; SSE3-NEXT:    psrlw $1, %xmm1
9222; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9223; SSE3-NEXT:    psubb %xmm1, %xmm0
9224; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9225; SSE3-NEXT:    movdqa %xmm0, %xmm2
9226; SSE3-NEXT:    pand %xmm1, %xmm2
9227; SSE3-NEXT:    psrlw $2, %xmm0
9228; SSE3-NEXT:    pand %xmm1, %xmm0
9229; SSE3-NEXT:    paddb %xmm2, %xmm0
9230; SSE3-NEXT:    movdqa %xmm0, %xmm1
9231; SSE3-NEXT:    psrlw $4, %xmm1
9232; SSE3-NEXT:    paddb %xmm0, %xmm1
9233; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9234; SSE3-NEXT:    pxor %xmm0, %xmm0
9235; SSE3-NEXT:    movdqa %xmm1, %xmm2
9236; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9237; SSE3-NEXT:    psadbw %xmm0, %xmm2
9238; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9239; SSE3-NEXT:    psadbw %xmm0, %xmm1
9240; SSE3-NEXT:    packuswb %xmm2, %xmm1
9241; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11]
9242; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
9243; SSE3-NEXT:    retq
9244;
9245; SSSE3-LABEL: ult_11_v4i32:
9246; SSSE3:       # %bb.0:
9247; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9248; SSSE3-NEXT:    movdqa %xmm0, %xmm2
9249; SSSE3-NEXT:    pand %xmm1, %xmm2
9250; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9251; SSSE3-NEXT:    movdqa %xmm3, %xmm4
9252; SSSE3-NEXT:    pshufb %xmm2, %xmm4
9253; SSSE3-NEXT:    psrlw $4, %xmm0
9254; SSSE3-NEXT:    pand %xmm1, %xmm0
9255; SSSE3-NEXT:    pshufb %xmm0, %xmm3
9256; SSSE3-NEXT:    paddb %xmm4, %xmm3
9257; SSSE3-NEXT:    pxor %xmm0, %xmm0
9258; SSSE3-NEXT:    movdqa %xmm3, %xmm1
9259; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9260; SSSE3-NEXT:    psadbw %xmm0, %xmm1
9261; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9262; SSSE3-NEXT:    psadbw %xmm0, %xmm3
9263; SSSE3-NEXT:    packuswb %xmm1, %xmm3
9264; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11]
9265; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
9266; SSSE3-NEXT:    retq
9267;
9268; SSE41-LABEL: ult_11_v4i32:
9269; SSE41:       # %bb.0:
9270; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9271; SSE41-NEXT:    movdqa %xmm0, %xmm2
9272; SSE41-NEXT:    pand %xmm1, %xmm2
9273; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9274; SSE41-NEXT:    movdqa %xmm3, %xmm4
9275; SSE41-NEXT:    pshufb %xmm2, %xmm4
9276; SSE41-NEXT:    psrlw $4, %xmm0
9277; SSE41-NEXT:    pand %xmm1, %xmm0
9278; SSE41-NEXT:    pshufb %xmm0, %xmm3
9279; SSE41-NEXT:    paddb %xmm4, %xmm3
9280; SSE41-NEXT:    pxor %xmm0, %xmm0
9281; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9282; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9283; SSE41-NEXT:    psadbw %xmm0, %xmm3
9284; SSE41-NEXT:    psadbw %xmm0, %xmm1
9285; SSE41-NEXT:    packuswb %xmm3, %xmm1
9286; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [11,11,11,11]
9287; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
9288; SSE41-NEXT:    retq
9289;
9290; AVX1-LABEL: ult_11_v4i32:
9291; AVX1:       # %bb.0:
9292; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9293; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
9294; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9295; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9296; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
9297; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
9298; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9299; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9300; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9301; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9302; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9303; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9304; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9305; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9306; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11,11,11]
9307; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9308; AVX1-NEXT:    retq
9309;
9310; AVX2-LABEL: ult_11_v4i32:
9311; AVX2:       # %bb.0:
9312; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9313; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
9314; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9315; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9316; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
9317; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
9318; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9319; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9320; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9321; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9322; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9323; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9324; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9325; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9326; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9327; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9328; AVX2-NEXT:    retq
9329;
9330; AVX512VPOPCNTDQ-LABEL: ult_11_v4i32:
9331; AVX512VPOPCNTDQ:       # %bb.0:
9332; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9333; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
9334; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9335; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9336; AVX512VPOPCNTDQ-NEXT:    vzeroupper
9337; AVX512VPOPCNTDQ-NEXT:    retq
9338;
9339; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32:
9340; AVX512VPOPCNTDQVL:       # %bb.0:
9341; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
9342; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
9343; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9344; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9345; AVX512VPOPCNTDQVL-NEXT:    retq
9346;
9347; BITALG_NOVLX-LABEL: ult_11_v4i32:
9348; BITALG_NOVLX:       # %bb.0:
9349; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9350; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
9351; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9352; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9353; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9354; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9355; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9356; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9357; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9358; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9359; BITALG_NOVLX-NEXT:    vzeroupper
9360; BITALG_NOVLX-NEXT:    retq
9361;
9362; BITALG-LABEL: ult_11_v4i32:
9363; BITALG:       # %bb.0:
9364; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
9365; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9366; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9367; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9368; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9369; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9370; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9371; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
9372; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9373; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9374; BITALG-NEXT:    retq
9375  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9376  %3 = icmp ult <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
9377  %4 = sext <4 x i1> %3 to <4 x i32>
9378  ret <4 x i32> %4
9379}
9380
9381define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) {
9382; SSE2-LABEL: ugt_11_v4i32:
9383; SSE2:       # %bb.0:
9384; SSE2-NEXT:    movdqa %xmm0, %xmm1
9385; SSE2-NEXT:    psrlw $1, %xmm1
9386; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9387; SSE2-NEXT:    psubb %xmm1, %xmm0
9388; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9389; SSE2-NEXT:    movdqa %xmm0, %xmm2
9390; SSE2-NEXT:    pand %xmm1, %xmm2
9391; SSE2-NEXT:    psrlw $2, %xmm0
9392; SSE2-NEXT:    pand %xmm1, %xmm0
9393; SSE2-NEXT:    paddb %xmm2, %xmm0
9394; SSE2-NEXT:    movdqa %xmm0, %xmm1
9395; SSE2-NEXT:    psrlw $4, %xmm1
9396; SSE2-NEXT:    paddb %xmm0, %xmm1
9397; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9398; SSE2-NEXT:    pxor %xmm0, %xmm0
9399; SSE2-NEXT:    movdqa %xmm1, %xmm2
9400; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9401; SSE2-NEXT:    psadbw %xmm0, %xmm2
9402; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9403; SSE2-NEXT:    psadbw %xmm0, %xmm1
9404; SSE2-NEXT:    packuswb %xmm2, %xmm1
9405; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9406; SSE2-NEXT:    movdqa %xmm1, %xmm0
9407; SSE2-NEXT:    retq
9408;
9409; SSE3-LABEL: ugt_11_v4i32:
9410; SSE3:       # %bb.0:
9411; SSE3-NEXT:    movdqa %xmm0, %xmm1
9412; SSE3-NEXT:    psrlw $1, %xmm1
9413; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9414; SSE3-NEXT:    psubb %xmm1, %xmm0
9415; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9416; SSE3-NEXT:    movdqa %xmm0, %xmm2
9417; SSE3-NEXT:    pand %xmm1, %xmm2
9418; SSE3-NEXT:    psrlw $2, %xmm0
9419; SSE3-NEXT:    pand %xmm1, %xmm0
9420; SSE3-NEXT:    paddb %xmm2, %xmm0
9421; SSE3-NEXT:    movdqa %xmm0, %xmm1
9422; SSE3-NEXT:    psrlw $4, %xmm1
9423; SSE3-NEXT:    paddb %xmm0, %xmm1
9424; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9425; SSE3-NEXT:    pxor %xmm0, %xmm0
9426; SSE3-NEXT:    movdqa %xmm1, %xmm2
9427; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9428; SSE3-NEXT:    psadbw %xmm0, %xmm2
9429; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9430; SSE3-NEXT:    psadbw %xmm0, %xmm1
9431; SSE3-NEXT:    packuswb %xmm2, %xmm1
9432; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9433; SSE3-NEXT:    movdqa %xmm1, %xmm0
9434; SSE3-NEXT:    retq
9435;
9436; SSSE3-LABEL: ugt_11_v4i32:
9437; SSSE3:       # %bb.0:
9438; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9439; SSSE3-NEXT:    movdqa %xmm0, %xmm3
9440; SSSE3-NEXT:    pand %xmm2, %xmm3
9441; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9442; SSSE3-NEXT:    movdqa %xmm1, %xmm4
9443; SSSE3-NEXT:    pshufb %xmm3, %xmm4
9444; SSSE3-NEXT:    psrlw $4, %xmm0
9445; SSSE3-NEXT:    pand %xmm2, %xmm0
9446; SSSE3-NEXT:    pshufb %xmm0, %xmm1
9447; SSSE3-NEXT:    paddb %xmm4, %xmm1
9448; SSSE3-NEXT:    pxor %xmm0, %xmm0
9449; SSSE3-NEXT:    movdqa %xmm1, %xmm2
9450; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9451; SSSE3-NEXT:    psadbw %xmm0, %xmm2
9452; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9453; SSSE3-NEXT:    psadbw %xmm0, %xmm1
9454; SSSE3-NEXT:    packuswb %xmm2, %xmm1
9455; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9456; SSSE3-NEXT:    movdqa %xmm1, %xmm0
9457; SSSE3-NEXT:    retq
9458;
9459; SSE41-LABEL: ugt_11_v4i32:
9460; SSE41:       # %bb.0:
9461; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9462; SSE41-NEXT:    movdqa %xmm0, %xmm2
9463; SSE41-NEXT:    pand %xmm1, %xmm2
9464; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9465; SSE41-NEXT:    movdqa %xmm3, %xmm4
9466; SSE41-NEXT:    pshufb %xmm2, %xmm4
9467; SSE41-NEXT:    psrlw $4, %xmm0
9468; SSE41-NEXT:    pand %xmm1, %xmm0
9469; SSE41-NEXT:    pshufb %xmm0, %xmm3
9470; SSE41-NEXT:    paddb %xmm4, %xmm3
9471; SSE41-NEXT:    pxor %xmm1, %xmm1
9472; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9473; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9474; SSE41-NEXT:    psadbw %xmm1, %xmm3
9475; SSE41-NEXT:    psadbw %xmm1, %xmm0
9476; SSE41-NEXT:    packuswb %xmm3, %xmm0
9477; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
9478; SSE41-NEXT:    retq
9479;
9480; AVX1-LABEL: ugt_11_v4i32:
9481; AVX1:       # %bb.0:
9482; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9483; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
9484; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9485; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9486; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
9487; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
9488; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9489; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9490; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9491; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9492; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9493; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9494; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9495; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9496; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
9497; AVX1-NEXT:    retq
9498;
9499; AVX2-LABEL: ugt_11_v4i32:
9500; AVX2:       # %bb.0:
9501; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9502; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
9503; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9504; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9505; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
9506; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
9507; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9508; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9509; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9510; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9511; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9512; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9513; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9514; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9515; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9516; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9517; AVX2-NEXT:    retq
9518;
9519; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i32:
9520; AVX512VPOPCNTDQ:       # %bb.0:
9521; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9522; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
9523; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9524; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9525; AVX512VPOPCNTDQ-NEXT:    vzeroupper
9526; AVX512VPOPCNTDQ-NEXT:    retq
9527;
9528; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32:
9529; AVX512VPOPCNTDQVL:       # %bb.0:
9530; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
9531; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
9532; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9533; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9534; AVX512VPOPCNTDQVL-NEXT:    retq
9535;
9536; BITALG_NOVLX-LABEL: ugt_11_v4i32:
9537; BITALG_NOVLX:       # %bb.0:
9538; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9539; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
9540; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9541; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9542; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9543; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9544; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9545; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9546; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9547; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9548; BITALG_NOVLX-NEXT:    vzeroupper
9549; BITALG_NOVLX-NEXT:    retq
9550;
9551; BITALG-LABEL: ugt_11_v4i32:
9552; BITALG:       # %bb.0:
9553; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
9554; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9555; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9556; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9557; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9558; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9559; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9560; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
9561; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9562; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9563; BITALG-NEXT:    retq
9564  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9565  %3 = icmp ugt <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
9566  %4 = sext <4 x i1> %3 to <4 x i32>
9567  ret <4 x i32> %4
9568}
9569
9570define <4 x i32> @ult_12_v4i32(<4 x i32> %0) {
9571; SSE2-LABEL: ult_12_v4i32:
9572; SSE2:       # %bb.0:
9573; SSE2-NEXT:    movdqa %xmm0, %xmm1
9574; SSE2-NEXT:    psrlw $1, %xmm1
9575; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9576; SSE2-NEXT:    psubb %xmm1, %xmm0
9577; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9578; SSE2-NEXT:    movdqa %xmm0, %xmm2
9579; SSE2-NEXT:    pand %xmm1, %xmm2
9580; SSE2-NEXT:    psrlw $2, %xmm0
9581; SSE2-NEXT:    pand %xmm1, %xmm0
9582; SSE2-NEXT:    paddb %xmm2, %xmm0
9583; SSE2-NEXT:    movdqa %xmm0, %xmm1
9584; SSE2-NEXT:    psrlw $4, %xmm1
9585; SSE2-NEXT:    paddb %xmm0, %xmm1
9586; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9587; SSE2-NEXT:    pxor %xmm0, %xmm0
9588; SSE2-NEXT:    movdqa %xmm1, %xmm2
9589; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9590; SSE2-NEXT:    psadbw %xmm0, %xmm2
9591; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9592; SSE2-NEXT:    psadbw %xmm0, %xmm1
9593; SSE2-NEXT:    packuswb %xmm2, %xmm1
9594; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12]
9595; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
9596; SSE2-NEXT:    retq
9597;
9598; SSE3-LABEL: ult_12_v4i32:
9599; SSE3:       # %bb.0:
9600; SSE3-NEXT:    movdqa %xmm0, %xmm1
9601; SSE3-NEXT:    psrlw $1, %xmm1
9602; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9603; SSE3-NEXT:    psubb %xmm1, %xmm0
9604; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9605; SSE3-NEXT:    movdqa %xmm0, %xmm2
9606; SSE3-NEXT:    pand %xmm1, %xmm2
9607; SSE3-NEXT:    psrlw $2, %xmm0
9608; SSE3-NEXT:    pand %xmm1, %xmm0
9609; SSE3-NEXT:    paddb %xmm2, %xmm0
9610; SSE3-NEXT:    movdqa %xmm0, %xmm1
9611; SSE3-NEXT:    psrlw $4, %xmm1
9612; SSE3-NEXT:    paddb %xmm0, %xmm1
9613; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9614; SSE3-NEXT:    pxor %xmm0, %xmm0
9615; SSE3-NEXT:    movdqa %xmm1, %xmm2
9616; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9617; SSE3-NEXT:    psadbw %xmm0, %xmm2
9618; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9619; SSE3-NEXT:    psadbw %xmm0, %xmm1
9620; SSE3-NEXT:    packuswb %xmm2, %xmm1
9621; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12]
9622; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
9623; SSE3-NEXT:    retq
9624;
9625; SSSE3-LABEL: ult_12_v4i32:
9626; SSSE3:       # %bb.0:
9627; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9628; SSSE3-NEXT:    movdqa %xmm0, %xmm2
9629; SSSE3-NEXT:    pand %xmm1, %xmm2
9630; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9631; SSSE3-NEXT:    movdqa %xmm3, %xmm4
9632; SSSE3-NEXT:    pshufb %xmm2, %xmm4
9633; SSSE3-NEXT:    psrlw $4, %xmm0
9634; SSSE3-NEXT:    pand %xmm1, %xmm0
9635; SSSE3-NEXT:    pshufb %xmm0, %xmm3
9636; SSSE3-NEXT:    paddb %xmm4, %xmm3
9637; SSSE3-NEXT:    pxor %xmm0, %xmm0
9638; SSSE3-NEXT:    movdqa %xmm3, %xmm1
9639; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9640; SSSE3-NEXT:    psadbw %xmm0, %xmm1
9641; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9642; SSSE3-NEXT:    psadbw %xmm0, %xmm3
9643; SSSE3-NEXT:    packuswb %xmm1, %xmm3
9644; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12]
9645; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
9646; SSSE3-NEXT:    retq
9647;
9648; SSE41-LABEL: ult_12_v4i32:
9649; SSE41:       # %bb.0:
9650; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9651; SSE41-NEXT:    movdqa %xmm0, %xmm2
9652; SSE41-NEXT:    pand %xmm1, %xmm2
9653; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9654; SSE41-NEXT:    movdqa %xmm3, %xmm4
9655; SSE41-NEXT:    pshufb %xmm2, %xmm4
9656; SSE41-NEXT:    psrlw $4, %xmm0
9657; SSE41-NEXT:    pand %xmm1, %xmm0
9658; SSE41-NEXT:    pshufb %xmm0, %xmm3
9659; SSE41-NEXT:    paddb %xmm4, %xmm3
9660; SSE41-NEXT:    pxor %xmm0, %xmm0
9661; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9662; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9663; SSE41-NEXT:    psadbw %xmm0, %xmm3
9664; SSE41-NEXT:    psadbw %xmm0, %xmm1
9665; SSE41-NEXT:    packuswb %xmm3, %xmm1
9666; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [12,12,12,12]
9667; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
9668; SSE41-NEXT:    retq
9669;
9670; AVX1-LABEL: ult_12_v4i32:
9671; AVX1:       # %bb.0:
9672; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9673; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
9674; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9675; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9676; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
9677; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
9678; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9679; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9680; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9681; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9682; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9683; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9684; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9685; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9686; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12,12,12]
9687; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9688; AVX1-NEXT:    retq
9689;
9690; AVX2-LABEL: ult_12_v4i32:
9691; AVX2:       # %bb.0:
9692; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9693; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
9694; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9695; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9696; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
9697; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
9698; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9699; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9700; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9701; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9702; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9703; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9704; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9705; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9706; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9707; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9708; AVX2-NEXT:    retq
9709;
9710; AVX512VPOPCNTDQ-LABEL: ult_12_v4i32:
9711; AVX512VPOPCNTDQ:       # %bb.0:
9712; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9713; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
9714; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9715; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9716; AVX512VPOPCNTDQ-NEXT:    vzeroupper
9717; AVX512VPOPCNTDQ-NEXT:    retq
9718;
9719; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32:
9720; AVX512VPOPCNTDQVL:       # %bb.0:
9721; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
9722; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
9723; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9724; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9725; AVX512VPOPCNTDQVL-NEXT:    retq
9726;
9727; BITALG_NOVLX-LABEL: ult_12_v4i32:
9728; BITALG_NOVLX:       # %bb.0:
9729; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9730; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
9731; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9732; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9733; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9734; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9735; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9736; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9737; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9738; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
9739; BITALG_NOVLX-NEXT:    vzeroupper
9740; BITALG_NOVLX-NEXT:    retq
9741;
9742; BITALG-LABEL: ult_12_v4i32:
9743; BITALG:       # %bb.0:
9744; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
9745; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9746; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9747; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9748; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9749; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9750; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9751; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
9752; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9753; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9754; BITALG-NEXT:    retq
9755  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9756  %3 = icmp ult <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
9757  %4 = sext <4 x i1> %3 to <4 x i32>
9758  ret <4 x i32> %4
9759}
9760
9761define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) {
9762; SSE2-LABEL: ugt_12_v4i32:
9763; SSE2:       # %bb.0:
9764; SSE2-NEXT:    movdqa %xmm0, %xmm1
9765; SSE2-NEXT:    psrlw $1, %xmm1
9766; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9767; SSE2-NEXT:    psubb %xmm1, %xmm0
9768; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9769; SSE2-NEXT:    movdqa %xmm0, %xmm2
9770; SSE2-NEXT:    pand %xmm1, %xmm2
9771; SSE2-NEXT:    psrlw $2, %xmm0
9772; SSE2-NEXT:    pand %xmm1, %xmm0
9773; SSE2-NEXT:    paddb %xmm2, %xmm0
9774; SSE2-NEXT:    movdqa %xmm0, %xmm1
9775; SSE2-NEXT:    psrlw $4, %xmm1
9776; SSE2-NEXT:    paddb %xmm0, %xmm1
9777; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9778; SSE2-NEXT:    pxor %xmm0, %xmm0
9779; SSE2-NEXT:    movdqa %xmm1, %xmm2
9780; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9781; SSE2-NEXT:    psadbw %xmm0, %xmm2
9782; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9783; SSE2-NEXT:    psadbw %xmm0, %xmm1
9784; SSE2-NEXT:    packuswb %xmm2, %xmm1
9785; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9786; SSE2-NEXT:    movdqa %xmm1, %xmm0
9787; SSE2-NEXT:    retq
9788;
9789; SSE3-LABEL: ugt_12_v4i32:
9790; SSE3:       # %bb.0:
9791; SSE3-NEXT:    movdqa %xmm0, %xmm1
9792; SSE3-NEXT:    psrlw $1, %xmm1
9793; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9794; SSE3-NEXT:    psubb %xmm1, %xmm0
9795; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9796; SSE3-NEXT:    movdqa %xmm0, %xmm2
9797; SSE3-NEXT:    pand %xmm1, %xmm2
9798; SSE3-NEXT:    psrlw $2, %xmm0
9799; SSE3-NEXT:    pand %xmm1, %xmm0
9800; SSE3-NEXT:    paddb %xmm2, %xmm0
9801; SSE3-NEXT:    movdqa %xmm0, %xmm1
9802; SSE3-NEXT:    psrlw $4, %xmm1
9803; SSE3-NEXT:    paddb %xmm0, %xmm1
9804; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9805; SSE3-NEXT:    pxor %xmm0, %xmm0
9806; SSE3-NEXT:    movdqa %xmm1, %xmm2
9807; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9808; SSE3-NEXT:    psadbw %xmm0, %xmm2
9809; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9810; SSE3-NEXT:    psadbw %xmm0, %xmm1
9811; SSE3-NEXT:    packuswb %xmm2, %xmm1
9812; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9813; SSE3-NEXT:    movdqa %xmm1, %xmm0
9814; SSE3-NEXT:    retq
9815;
9816; SSSE3-LABEL: ugt_12_v4i32:
9817; SSSE3:       # %bb.0:
9818; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9819; SSSE3-NEXT:    movdqa %xmm0, %xmm3
9820; SSSE3-NEXT:    pand %xmm2, %xmm3
9821; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9822; SSSE3-NEXT:    movdqa %xmm1, %xmm4
9823; SSSE3-NEXT:    pshufb %xmm3, %xmm4
9824; SSSE3-NEXT:    psrlw $4, %xmm0
9825; SSSE3-NEXT:    pand %xmm2, %xmm0
9826; SSSE3-NEXT:    pshufb %xmm0, %xmm1
9827; SSSE3-NEXT:    paddb %xmm4, %xmm1
9828; SSSE3-NEXT:    pxor %xmm0, %xmm0
9829; SSSE3-NEXT:    movdqa %xmm1, %xmm2
9830; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9831; SSSE3-NEXT:    psadbw %xmm0, %xmm2
9832; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9833; SSSE3-NEXT:    psadbw %xmm0, %xmm1
9834; SSSE3-NEXT:    packuswb %xmm2, %xmm1
9835; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
9836; SSSE3-NEXT:    movdqa %xmm1, %xmm0
9837; SSSE3-NEXT:    retq
9838;
9839; SSE41-LABEL: ugt_12_v4i32:
9840; SSE41:       # %bb.0:
9841; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9842; SSE41-NEXT:    movdqa %xmm0, %xmm2
9843; SSE41-NEXT:    pand %xmm1, %xmm2
9844; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9845; SSE41-NEXT:    movdqa %xmm3, %xmm4
9846; SSE41-NEXT:    pshufb %xmm2, %xmm4
9847; SSE41-NEXT:    psrlw $4, %xmm0
9848; SSE41-NEXT:    pand %xmm1, %xmm0
9849; SSE41-NEXT:    pshufb %xmm0, %xmm3
9850; SSE41-NEXT:    paddb %xmm4, %xmm3
9851; SSE41-NEXT:    pxor %xmm1, %xmm1
9852; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9853; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9854; SSE41-NEXT:    psadbw %xmm1, %xmm3
9855; SSE41-NEXT:    psadbw %xmm1, %xmm0
9856; SSE41-NEXT:    packuswb %xmm3, %xmm0
9857; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
9858; SSE41-NEXT:    retq
9859;
9860; AVX1-LABEL: ugt_12_v4i32:
9861; AVX1:       # %bb.0:
9862; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9863; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
9864; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9865; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9866; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
9867; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
9868; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9869; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9870; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9871; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9872; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9873; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9874; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9875; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9876; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
9877; AVX1-NEXT:    retq
9878;
9879; AVX2-LABEL: ugt_12_v4i32:
9880; AVX2:       # %bb.0:
9881; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9882; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
9883; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9884; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
9885; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
9886; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
9887; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
9888; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
9889; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9890; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9891; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9892; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9893; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9894; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9895; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9896; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9897; AVX2-NEXT:    retq
9898;
9899; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i32:
9900; AVX512VPOPCNTDQ:       # %bb.0:
9901; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9902; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
9903; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9904; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9905; AVX512VPOPCNTDQ-NEXT:    vzeroupper
9906; AVX512VPOPCNTDQ-NEXT:    retq
9907;
9908; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32:
9909; AVX512VPOPCNTDQVL:       # %bb.0:
9910; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
9911; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
9912; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9913; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9914; AVX512VPOPCNTDQVL-NEXT:    retq
9915;
9916; BITALG_NOVLX-LABEL: ugt_12_v4i32:
9917; BITALG_NOVLX:       # %bb.0:
9918; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
9919; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
9920; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9921; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9922; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9923; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9924; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9925; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9926; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9927; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
9928; BITALG_NOVLX-NEXT:    vzeroupper
9929; BITALG_NOVLX-NEXT:    retq
9930;
9931; BITALG-LABEL: ugt_12_v4i32:
9932; BITALG:       # %bb.0:
9933; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
9934; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
9935; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9936; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
9937; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9938; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
9939; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
9940; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
9941; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
9942; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9943; BITALG-NEXT:    retq
9944  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9945  %3 = icmp ugt <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
9946  %4 = sext <4 x i1> %3 to <4 x i32>
9947  ret <4 x i32> %4
9948}
9949
9950define <4 x i32> @ult_13_v4i32(<4 x i32> %0) {
9951; SSE2-LABEL: ult_13_v4i32:
9952; SSE2:       # %bb.0:
9953; SSE2-NEXT:    movdqa %xmm0, %xmm1
9954; SSE2-NEXT:    psrlw $1, %xmm1
9955; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9956; SSE2-NEXT:    psubb %xmm1, %xmm0
9957; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9958; SSE2-NEXT:    movdqa %xmm0, %xmm2
9959; SSE2-NEXT:    pand %xmm1, %xmm2
9960; SSE2-NEXT:    psrlw $2, %xmm0
9961; SSE2-NEXT:    pand %xmm1, %xmm0
9962; SSE2-NEXT:    paddb %xmm2, %xmm0
9963; SSE2-NEXT:    movdqa %xmm0, %xmm1
9964; SSE2-NEXT:    psrlw $4, %xmm1
9965; SSE2-NEXT:    paddb %xmm0, %xmm1
9966; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
9967; SSE2-NEXT:    pxor %xmm0, %xmm0
9968; SSE2-NEXT:    movdqa %xmm1, %xmm2
9969; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9970; SSE2-NEXT:    psadbw %xmm0, %xmm2
9971; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9972; SSE2-NEXT:    psadbw %xmm0, %xmm1
9973; SSE2-NEXT:    packuswb %xmm2, %xmm1
9974; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13]
9975; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
9976; SSE2-NEXT:    retq
9977;
9978; SSE3-LABEL: ult_13_v4i32:
9979; SSE3:       # %bb.0:
9980; SSE3-NEXT:    movdqa %xmm0, %xmm1
9981; SSE3-NEXT:    psrlw $1, %xmm1
9982; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9983; SSE3-NEXT:    psubb %xmm1, %xmm0
9984; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9985; SSE3-NEXT:    movdqa %xmm0, %xmm2
9986; SSE3-NEXT:    pand %xmm1, %xmm2
9987; SSE3-NEXT:    psrlw $2, %xmm0
9988; SSE3-NEXT:    pand %xmm1, %xmm0
9989; SSE3-NEXT:    paddb %xmm2, %xmm0
9990; SSE3-NEXT:    movdqa %xmm0, %xmm1
9991; SSE3-NEXT:    psrlw $4, %xmm1
9992; SSE3-NEXT:    paddb %xmm0, %xmm1
9993; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
9994; SSE3-NEXT:    pxor %xmm0, %xmm0
9995; SSE3-NEXT:    movdqa %xmm1, %xmm2
9996; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9997; SSE3-NEXT:    psadbw %xmm0, %xmm2
9998; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9999; SSE3-NEXT:    psadbw %xmm0, %xmm1
10000; SSE3-NEXT:    packuswb %xmm2, %xmm1
10001; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13]
10002; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
10003; SSE3-NEXT:    retq
10004;
10005; SSSE3-LABEL: ult_13_v4i32:
10006; SSSE3:       # %bb.0:
10007; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10008; SSSE3-NEXT:    movdqa %xmm0, %xmm2
10009; SSSE3-NEXT:    pand %xmm1, %xmm2
10010; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10011; SSSE3-NEXT:    movdqa %xmm3, %xmm4
10012; SSSE3-NEXT:    pshufb %xmm2, %xmm4
10013; SSSE3-NEXT:    psrlw $4, %xmm0
10014; SSSE3-NEXT:    pand %xmm1, %xmm0
10015; SSSE3-NEXT:    pshufb %xmm0, %xmm3
10016; SSSE3-NEXT:    paddb %xmm4, %xmm3
10017; SSSE3-NEXT:    pxor %xmm0, %xmm0
10018; SSSE3-NEXT:    movdqa %xmm3, %xmm1
10019; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10020; SSSE3-NEXT:    psadbw %xmm0, %xmm1
10021; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10022; SSSE3-NEXT:    psadbw %xmm0, %xmm3
10023; SSSE3-NEXT:    packuswb %xmm1, %xmm3
10024; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13]
10025; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
10026; SSSE3-NEXT:    retq
10027;
10028; SSE41-LABEL: ult_13_v4i32:
10029; SSE41:       # %bb.0:
10030; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10031; SSE41-NEXT:    movdqa %xmm0, %xmm2
10032; SSE41-NEXT:    pand %xmm1, %xmm2
10033; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10034; SSE41-NEXT:    movdqa %xmm3, %xmm4
10035; SSE41-NEXT:    pshufb %xmm2, %xmm4
10036; SSE41-NEXT:    psrlw $4, %xmm0
10037; SSE41-NEXT:    pand %xmm1, %xmm0
10038; SSE41-NEXT:    pshufb %xmm0, %xmm3
10039; SSE41-NEXT:    paddb %xmm4, %xmm3
10040; SSE41-NEXT:    pxor %xmm0, %xmm0
10041; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10042; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10043; SSE41-NEXT:    psadbw %xmm0, %xmm3
10044; SSE41-NEXT:    psadbw %xmm0, %xmm1
10045; SSE41-NEXT:    packuswb %xmm3, %xmm1
10046; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [13,13,13,13]
10047; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
10048; SSE41-NEXT:    retq
10049;
10050; AVX1-LABEL: ult_13_v4i32:
10051; AVX1:       # %bb.0:
10052; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10053; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
10054; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10055; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10056; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
10057; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
10058; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10059; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10060; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10061; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10062; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10063; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10064; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10065; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10066; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13,13,13]
10067; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10068; AVX1-NEXT:    retq
10069;
10070; AVX2-LABEL: ult_13_v4i32:
10071; AVX2:       # %bb.0:
10072; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10073; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
10074; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10075; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10076; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
10077; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
10078; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10079; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10080; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10081; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10082; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10083; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10084; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10085; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10086; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10087; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10088; AVX2-NEXT:    retq
10089;
10090; AVX512VPOPCNTDQ-LABEL: ult_13_v4i32:
10091; AVX512VPOPCNTDQ:       # %bb.0:
10092; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10093; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
10094; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10095; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10096; AVX512VPOPCNTDQ-NEXT:    vzeroupper
10097; AVX512VPOPCNTDQ-NEXT:    retq
10098;
10099; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32:
10100; AVX512VPOPCNTDQVL:       # %bb.0:
10101; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
10102; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
10103; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10104; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10105; AVX512VPOPCNTDQVL-NEXT:    retq
10106;
10107; BITALG_NOVLX-LABEL: ult_13_v4i32:
10108; BITALG_NOVLX:       # %bb.0:
10109; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10110; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
10111; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10112; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10113; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10114; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10115; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10116; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10117; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10118; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10119; BITALG_NOVLX-NEXT:    vzeroupper
10120; BITALG_NOVLX-NEXT:    retq
10121;
10122; BITALG-LABEL: ult_13_v4i32:
10123; BITALG:       # %bb.0:
10124; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
10125; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10126; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10127; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10128; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10129; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10130; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10131; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
10132; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10133; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10134; BITALG-NEXT:    retq
10135  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10136  %3 = icmp ult <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
10137  %4 = sext <4 x i1> %3 to <4 x i32>
10138  ret <4 x i32> %4
10139}
10140
10141define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) {
10142; SSE2-LABEL: ugt_13_v4i32:
10143; SSE2:       # %bb.0:
10144; SSE2-NEXT:    movdqa %xmm0, %xmm1
10145; SSE2-NEXT:    psrlw $1, %xmm1
10146; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10147; SSE2-NEXT:    psubb %xmm1, %xmm0
10148; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10149; SSE2-NEXT:    movdqa %xmm0, %xmm2
10150; SSE2-NEXT:    pand %xmm1, %xmm2
10151; SSE2-NEXT:    psrlw $2, %xmm0
10152; SSE2-NEXT:    pand %xmm1, %xmm0
10153; SSE2-NEXT:    paddb %xmm2, %xmm0
10154; SSE2-NEXT:    movdqa %xmm0, %xmm1
10155; SSE2-NEXT:    psrlw $4, %xmm1
10156; SSE2-NEXT:    paddb %xmm0, %xmm1
10157; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10158; SSE2-NEXT:    pxor %xmm0, %xmm0
10159; SSE2-NEXT:    movdqa %xmm1, %xmm2
10160; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10161; SSE2-NEXT:    psadbw %xmm0, %xmm2
10162; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10163; SSE2-NEXT:    psadbw %xmm0, %xmm1
10164; SSE2-NEXT:    packuswb %xmm2, %xmm1
10165; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10166; SSE2-NEXT:    movdqa %xmm1, %xmm0
10167; SSE2-NEXT:    retq
10168;
10169; SSE3-LABEL: ugt_13_v4i32:
10170; SSE3:       # %bb.0:
10171; SSE3-NEXT:    movdqa %xmm0, %xmm1
10172; SSE3-NEXT:    psrlw $1, %xmm1
10173; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10174; SSE3-NEXT:    psubb %xmm1, %xmm0
10175; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10176; SSE3-NEXT:    movdqa %xmm0, %xmm2
10177; SSE3-NEXT:    pand %xmm1, %xmm2
10178; SSE3-NEXT:    psrlw $2, %xmm0
10179; SSE3-NEXT:    pand %xmm1, %xmm0
10180; SSE3-NEXT:    paddb %xmm2, %xmm0
10181; SSE3-NEXT:    movdqa %xmm0, %xmm1
10182; SSE3-NEXT:    psrlw $4, %xmm1
10183; SSE3-NEXT:    paddb %xmm0, %xmm1
10184; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10185; SSE3-NEXT:    pxor %xmm0, %xmm0
10186; SSE3-NEXT:    movdqa %xmm1, %xmm2
10187; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10188; SSE3-NEXT:    psadbw %xmm0, %xmm2
10189; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10190; SSE3-NEXT:    psadbw %xmm0, %xmm1
10191; SSE3-NEXT:    packuswb %xmm2, %xmm1
10192; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10193; SSE3-NEXT:    movdqa %xmm1, %xmm0
10194; SSE3-NEXT:    retq
10195;
10196; SSSE3-LABEL: ugt_13_v4i32:
10197; SSSE3:       # %bb.0:
10198; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10199; SSSE3-NEXT:    movdqa %xmm0, %xmm3
10200; SSSE3-NEXT:    pand %xmm2, %xmm3
10201; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10202; SSSE3-NEXT:    movdqa %xmm1, %xmm4
10203; SSSE3-NEXT:    pshufb %xmm3, %xmm4
10204; SSSE3-NEXT:    psrlw $4, %xmm0
10205; SSSE3-NEXT:    pand %xmm2, %xmm0
10206; SSSE3-NEXT:    pshufb %xmm0, %xmm1
10207; SSSE3-NEXT:    paddb %xmm4, %xmm1
10208; SSSE3-NEXT:    pxor %xmm0, %xmm0
10209; SSSE3-NEXT:    movdqa %xmm1, %xmm2
10210; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10211; SSSE3-NEXT:    psadbw %xmm0, %xmm2
10212; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10213; SSSE3-NEXT:    psadbw %xmm0, %xmm1
10214; SSSE3-NEXT:    packuswb %xmm2, %xmm1
10215; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10216; SSSE3-NEXT:    movdqa %xmm1, %xmm0
10217; SSSE3-NEXT:    retq
10218;
10219; SSE41-LABEL: ugt_13_v4i32:
10220; SSE41:       # %bb.0:
10221; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10222; SSE41-NEXT:    movdqa %xmm0, %xmm2
10223; SSE41-NEXT:    pand %xmm1, %xmm2
10224; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10225; SSE41-NEXT:    movdqa %xmm3, %xmm4
10226; SSE41-NEXT:    pshufb %xmm2, %xmm4
10227; SSE41-NEXT:    psrlw $4, %xmm0
10228; SSE41-NEXT:    pand %xmm1, %xmm0
10229; SSE41-NEXT:    pshufb %xmm0, %xmm3
10230; SSE41-NEXT:    paddb %xmm4, %xmm3
10231; SSE41-NEXT:    pxor %xmm1, %xmm1
10232; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10233; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10234; SSE41-NEXT:    psadbw %xmm1, %xmm3
10235; SSE41-NEXT:    psadbw %xmm1, %xmm0
10236; SSE41-NEXT:    packuswb %xmm3, %xmm0
10237; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
10238; SSE41-NEXT:    retq
10239;
10240; AVX1-LABEL: ugt_13_v4i32:
10241; AVX1:       # %bb.0:
10242; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10243; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
10244; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10245; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10246; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
10247; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
10248; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10249; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10250; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10251; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10252; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10253; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10254; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10255; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10256; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
10257; AVX1-NEXT:    retq
10258;
10259; AVX2-LABEL: ugt_13_v4i32:
10260; AVX2:       # %bb.0:
10261; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10262; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
10263; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10264; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10265; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
10266; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
10267; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10268; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10269; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10270; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10271; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10272; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10273; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10274; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10275; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10276; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
10277; AVX2-NEXT:    retq
10278;
10279; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i32:
10280; AVX512VPOPCNTDQ:       # %bb.0:
10281; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10282; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
10283; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10284; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
10285; AVX512VPOPCNTDQ-NEXT:    vzeroupper
10286; AVX512VPOPCNTDQ-NEXT:    retq
10287;
10288; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32:
10289; AVX512VPOPCNTDQVL:       # %bb.0:
10290; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
10291; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
10292; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10293; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10294; AVX512VPOPCNTDQVL-NEXT:    retq
10295;
10296; BITALG_NOVLX-LABEL: ugt_13_v4i32:
10297; BITALG_NOVLX:       # %bb.0:
10298; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10299; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
10300; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10301; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10302; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10303; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10304; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10305; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10306; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10307; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
10308; BITALG_NOVLX-NEXT:    vzeroupper
10309; BITALG_NOVLX-NEXT:    retq
10310;
10311; BITALG-LABEL: ugt_13_v4i32:
10312; BITALG:       # %bb.0:
10313; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
10314; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10315; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10316; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10317; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10318; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10319; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10320; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
10321; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10322; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10323; BITALG-NEXT:    retq
10324  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10325  %3 = icmp ugt <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
10326  %4 = sext <4 x i1> %3 to <4 x i32>
10327  ret <4 x i32> %4
10328}
10329
10330define <4 x i32> @ult_14_v4i32(<4 x i32> %0) {
10331; SSE2-LABEL: ult_14_v4i32:
10332; SSE2:       # %bb.0:
10333; SSE2-NEXT:    movdqa %xmm0, %xmm1
10334; SSE2-NEXT:    psrlw $1, %xmm1
10335; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10336; SSE2-NEXT:    psubb %xmm1, %xmm0
10337; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10338; SSE2-NEXT:    movdqa %xmm0, %xmm2
10339; SSE2-NEXT:    pand %xmm1, %xmm2
10340; SSE2-NEXT:    psrlw $2, %xmm0
10341; SSE2-NEXT:    pand %xmm1, %xmm0
10342; SSE2-NEXT:    paddb %xmm2, %xmm0
10343; SSE2-NEXT:    movdqa %xmm0, %xmm1
10344; SSE2-NEXT:    psrlw $4, %xmm1
10345; SSE2-NEXT:    paddb %xmm0, %xmm1
10346; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10347; SSE2-NEXT:    pxor %xmm0, %xmm0
10348; SSE2-NEXT:    movdqa %xmm1, %xmm2
10349; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10350; SSE2-NEXT:    psadbw %xmm0, %xmm2
10351; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10352; SSE2-NEXT:    psadbw %xmm0, %xmm1
10353; SSE2-NEXT:    packuswb %xmm2, %xmm1
10354; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14]
10355; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
10356; SSE2-NEXT:    retq
10357;
10358; SSE3-LABEL: ult_14_v4i32:
10359; SSE3:       # %bb.0:
10360; SSE3-NEXT:    movdqa %xmm0, %xmm1
10361; SSE3-NEXT:    psrlw $1, %xmm1
10362; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10363; SSE3-NEXT:    psubb %xmm1, %xmm0
10364; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10365; SSE3-NEXT:    movdqa %xmm0, %xmm2
10366; SSE3-NEXT:    pand %xmm1, %xmm2
10367; SSE3-NEXT:    psrlw $2, %xmm0
10368; SSE3-NEXT:    pand %xmm1, %xmm0
10369; SSE3-NEXT:    paddb %xmm2, %xmm0
10370; SSE3-NEXT:    movdqa %xmm0, %xmm1
10371; SSE3-NEXT:    psrlw $4, %xmm1
10372; SSE3-NEXT:    paddb %xmm0, %xmm1
10373; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10374; SSE3-NEXT:    pxor %xmm0, %xmm0
10375; SSE3-NEXT:    movdqa %xmm1, %xmm2
10376; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10377; SSE3-NEXT:    psadbw %xmm0, %xmm2
10378; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10379; SSE3-NEXT:    psadbw %xmm0, %xmm1
10380; SSE3-NEXT:    packuswb %xmm2, %xmm1
10381; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14]
10382; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
10383; SSE3-NEXT:    retq
10384;
10385; SSSE3-LABEL: ult_14_v4i32:
10386; SSSE3:       # %bb.0:
10387; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10388; SSSE3-NEXT:    movdqa %xmm0, %xmm2
10389; SSSE3-NEXT:    pand %xmm1, %xmm2
10390; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10391; SSSE3-NEXT:    movdqa %xmm3, %xmm4
10392; SSSE3-NEXT:    pshufb %xmm2, %xmm4
10393; SSSE3-NEXT:    psrlw $4, %xmm0
10394; SSSE3-NEXT:    pand %xmm1, %xmm0
10395; SSSE3-NEXT:    pshufb %xmm0, %xmm3
10396; SSSE3-NEXT:    paddb %xmm4, %xmm3
10397; SSSE3-NEXT:    pxor %xmm0, %xmm0
10398; SSSE3-NEXT:    movdqa %xmm3, %xmm1
10399; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10400; SSSE3-NEXT:    psadbw %xmm0, %xmm1
10401; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10402; SSSE3-NEXT:    psadbw %xmm0, %xmm3
10403; SSSE3-NEXT:    packuswb %xmm1, %xmm3
10404; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14]
10405; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
10406; SSSE3-NEXT:    retq
10407;
10408; SSE41-LABEL: ult_14_v4i32:
10409; SSE41:       # %bb.0:
10410; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10411; SSE41-NEXT:    movdqa %xmm0, %xmm2
10412; SSE41-NEXT:    pand %xmm1, %xmm2
10413; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10414; SSE41-NEXT:    movdqa %xmm3, %xmm4
10415; SSE41-NEXT:    pshufb %xmm2, %xmm4
10416; SSE41-NEXT:    psrlw $4, %xmm0
10417; SSE41-NEXT:    pand %xmm1, %xmm0
10418; SSE41-NEXT:    pshufb %xmm0, %xmm3
10419; SSE41-NEXT:    paddb %xmm4, %xmm3
10420; SSE41-NEXT:    pxor %xmm0, %xmm0
10421; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10422; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10423; SSE41-NEXT:    psadbw %xmm0, %xmm3
10424; SSE41-NEXT:    psadbw %xmm0, %xmm1
10425; SSE41-NEXT:    packuswb %xmm3, %xmm1
10426; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [14,14,14,14]
10427; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
10428; SSE41-NEXT:    retq
10429;
10430; AVX1-LABEL: ult_14_v4i32:
10431; AVX1:       # %bb.0:
10432; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10433; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
10434; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10435; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10436; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
10437; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
10438; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10439; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10440; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10441; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10442; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10443; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10444; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10445; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10446; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14,14,14]
10447; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10448; AVX1-NEXT:    retq
10449;
10450; AVX2-LABEL: ult_14_v4i32:
10451; AVX2:       # %bb.0:
10452; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10453; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
10454; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10455; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10456; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
10457; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
10458; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10459; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10460; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10461; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10462; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10463; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10464; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10465; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10466; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10467; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10468; AVX2-NEXT:    retq
10469;
10470; AVX512VPOPCNTDQ-LABEL: ult_14_v4i32:
10471; AVX512VPOPCNTDQ:       # %bb.0:
10472; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10473; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
10474; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10475; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10476; AVX512VPOPCNTDQ-NEXT:    vzeroupper
10477; AVX512VPOPCNTDQ-NEXT:    retq
10478;
10479; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32:
10480; AVX512VPOPCNTDQVL:       # %bb.0:
10481; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
10482; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
10483; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10484; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10485; AVX512VPOPCNTDQVL-NEXT:    retq
10486;
10487; BITALG_NOVLX-LABEL: ult_14_v4i32:
10488; BITALG_NOVLX:       # %bb.0:
10489; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10490; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
10491; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10492; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10493; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10494; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10495; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10496; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10497; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10498; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10499; BITALG_NOVLX-NEXT:    vzeroupper
10500; BITALG_NOVLX-NEXT:    retq
10501;
10502; BITALG-LABEL: ult_14_v4i32:
10503; BITALG:       # %bb.0:
10504; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
10505; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10506; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10507; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10508; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10509; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10510; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10511; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
10512; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10513; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10514; BITALG-NEXT:    retq
10515  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10516  %3 = icmp ult <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
10517  %4 = sext <4 x i1> %3 to <4 x i32>
10518  ret <4 x i32> %4
10519}
10520
10521define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) {
10522; SSE2-LABEL: ugt_14_v4i32:
10523; SSE2:       # %bb.0:
10524; SSE2-NEXT:    movdqa %xmm0, %xmm1
10525; SSE2-NEXT:    psrlw $1, %xmm1
10526; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10527; SSE2-NEXT:    psubb %xmm1, %xmm0
10528; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10529; SSE2-NEXT:    movdqa %xmm0, %xmm2
10530; SSE2-NEXT:    pand %xmm1, %xmm2
10531; SSE2-NEXT:    psrlw $2, %xmm0
10532; SSE2-NEXT:    pand %xmm1, %xmm0
10533; SSE2-NEXT:    paddb %xmm2, %xmm0
10534; SSE2-NEXT:    movdqa %xmm0, %xmm1
10535; SSE2-NEXT:    psrlw $4, %xmm1
10536; SSE2-NEXT:    paddb %xmm0, %xmm1
10537; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10538; SSE2-NEXT:    pxor %xmm0, %xmm0
10539; SSE2-NEXT:    movdqa %xmm1, %xmm2
10540; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10541; SSE2-NEXT:    psadbw %xmm0, %xmm2
10542; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10543; SSE2-NEXT:    psadbw %xmm0, %xmm1
10544; SSE2-NEXT:    packuswb %xmm2, %xmm1
10545; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10546; SSE2-NEXT:    movdqa %xmm1, %xmm0
10547; SSE2-NEXT:    retq
10548;
10549; SSE3-LABEL: ugt_14_v4i32:
10550; SSE3:       # %bb.0:
10551; SSE3-NEXT:    movdqa %xmm0, %xmm1
10552; SSE3-NEXT:    psrlw $1, %xmm1
10553; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10554; SSE3-NEXT:    psubb %xmm1, %xmm0
10555; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10556; SSE3-NEXT:    movdqa %xmm0, %xmm2
10557; SSE3-NEXT:    pand %xmm1, %xmm2
10558; SSE3-NEXT:    psrlw $2, %xmm0
10559; SSE3-NEXT:    pand %xmm1, %xmm0
10560; SSE3-NEXT:    paddb %xmm2, %xmm0
10561; SSE3-NEXT:    movdqa %xmm0, %xmm1
10562; SSE3-NEXT:    psrlw $4, %xmm1
10563; SSE3-NEXT:    paddb %xmm0, %xmm1
10564; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10565; SSE3-NEXT:    pxor %xmm0, %xmm0
10566; SSE3-NEXT:    movdqa %xmm1, %xmm2
10567; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10568; SSE3-NEXT:    psadbw %xmm0, %xmm2
10569; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10570; SSE3-NEXT:    psadbw %xmm0, %xmm1
10571; SSE3-NEXT:    packuswb %xmm2, %xmm1
10572; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10573; SSE3-NEXT:    movdqa %xmm1, %xmm0
10574; SSE3-NEXT:    retq
10575;
10576; SSSE3-LABEL: ugt_14_v4i32:
10577; SSSE3:       # %bb.0:
10578; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10579; SSSE3-NEXT:    movdqa %xmm0, %xmm3
10580; SSSE3-NEXT:    pand %xmm2, %xmm3
10581; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10582; SSSE3-NEXT:    movdqa %xmm1, %xmm4
10583; SSSE3-NEXT:    pshufb %xmm3, %xmm4
10584; SSSE3-NEXT:    psrlw $4, %xmm0
10585; SSSE3-NEXT:    pand %xmm2, %xmm0
10586; SSSE3-NEXT:    pshufb %xmm0, %xmm1
10587; SSSE3-NEXT:    paddb %xmm4, %xmm1
10588; SSSE3-NEXT:    pxor %xmm0, %xmm0
10589; SSSE3-NEXT:    movdqa %xmm1, %xmm2
10590; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10591; SSSE3-NEXT:    psadbw %xmm0, %xmm2
10592; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10593; SSSE3-NEXT:    psadbw %xmm0, %xmm1
10594; SSSE3-NEXT:    packuswb %xmm2, %xmm1
10595; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10596; SSSE3-NEXT:    movdqa %xmm1, %xmm0
10597; SSSE3-NEXT:    retq
10598;
10599; SSE41-LABEL: ugt_14_v4i32:
10600; SSE41:       # %bb.0:
10601; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10602; SSE41-NEXT:    movdqa %xmm0, %xmm2
10603; SSE41-NEXT:    pand %xmm1, %xmm2
10604; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10605; SSE41-NEXT:    movdqa %xmm3, %xmm4
10606; SSE41-NEXT:    pshufb %xmm2, %xmm4
10607; SSE41-NEXT:    psrlw $4, %xmm0
10608; SSE41-NEXT:    pand %xmm1, %xmm0
10609; SSE41-NEXT:    pshufb %xmm0, %xmm3
10610; SSE41-NEXT:    paddb %xmm4, %xmm3
10611; SSE41-NEXT:    pxor %xmm1, %xmm1
10612; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10613; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10614; SSE41-NEXT:    psadbw %xmm1, %xmm3
10615; SSE41-NEXT:    psadbw %xmm1, %xmm0
10616; SSE41-NEXT:    packuswb %xmm3, %xmm0
10617; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
10618; SSE41-NEXT:    retq
10619;
10620; AVX1-LABEL: ugt_14_v4i32:
10621; AVX1:       # %bb.0:
10622; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10623; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
10624; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10625; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10626; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
10627; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
10628; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10629; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10630; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10631; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10632; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10633; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10634; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10635; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10636; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
10637; AVX1-NEXT:    retq
10638;
10639; AVX2-LABEL: ugt_14_v4i32:
10640; AVX2:       # %bb.0:
10641; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10642; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
10643; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10644; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10645; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
10646; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
10647; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10648; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10649; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10650; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10651; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10652; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10653; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10654; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10655; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10656; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
10657; AVX2-NEXT:    retq
10658;
10659; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i32:
10660; AVX512VPOPCNTDQ:       # %bb.0:
10661; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10662; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
10663; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10664; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
10665; AVX512VPOPCNTDQ-NEXT:    vzeroupper
10666; AVX512VPOPCNTDQ-NEXT:    retq
10667;
10668; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32:
10669; AVX512VPOPCNTDQVL:       # %bb.0:
10670; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
10671; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
10672; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10673; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10674; AVX512VPOPCNTDQVL-NEXT:    retq
10675;
10676; BITALG_NOVLX-LABEL: ugt_14_v4i32:
10677; BITALG_NOVLX:       # %bb.0:
10678; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10679; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
10680; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10681; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10682; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10683; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10684; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10685; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10686; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10687; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
10688; BITALG_NOVLX-NEXT:    vzeroupper
10689; BITALG_NOVLX-NEXT:    retq
10690;
10691; BITALG-LABEL: ugt_14_v4i32:
10692; BITALG:       # %bb.0:
10693; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
10694; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10695; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10696; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10697; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10698; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10699; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10700; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
10701; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10702; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10703; BITALG-NEXT:    retq
10704  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10705  %3 = icmp ugt <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
10706  %4 = sext <4 x i1> %3 to <4 x i32>
10707  ret <4 x i32> %4
10708}
10709
10710define <4 x i32> @ult_15_v4i32(<4 x i32> %0) {
10711; SSE2-LABEL: ult_15_v4i32:
10712; SSE2:       # %bb.0:
10713; SSE2-NEXT:    movdqa %xmm0, %xmm1
10714; SSE2-NEXT:    psrlw $1, %xmm1
10715; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10716; SSE2-NEXT:    psubb %xmm1, %xmm0
10717; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10718; SSE2-NEXT:    movdqa %xmm0, %xmm2
10719; SSE2-NEXT:    pand %xmm1, %xmm2
10720; SSE2-NEXT:    psrlw $2, %xmm0
10721; SSE2-NEXT:    pand %xmm1, %xmm0
10722; SSE2-NEXT:    paddb %xmm2, %xmm0
10723; SSE2-NEXT:    movdqa %xmm0, %xmm1
10724; SSE2-NEXT:    psrlw $4, %xmm1
10725; SSE2-NEXT:    paddb %xmm0, %xmm1
10726; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10727; SSE2-NEXT:    pxor %xmm0, %xmm0
10728; SSE2-NEXT:    movdqa %xmm1, %xmm2
10729; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10730; SSE2-NEXT:    psadbw %xmm0, %xmm2
10731; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10732; SSE2-NEXT:    psadbw %xmm0, %xmm1
10733; SSE2-NEXT:    packuswb %xmm2, %xmm1
10734; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15]
10735; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
10736; SSE2-NEXT:    retq
10737;
10738; SSE3-LABEL: ult_15_v4i32:
10739; SSE3:       # %bb.0:
10740; SSE3-NEXT:    movdqa %xmm0, %xmm1
10741; SSE3-NEXT:    psrlw $1, %xmm1
10742; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10743; SSE3-NEXT:    psubb %xmm1, %xmm0
10744; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10745; SSE3-NEXT:    movdqa %xmm0, %xmm2
10746; SSE3-NEXT:    pand %xmm1, %xmm2
10747; SSE3-NEXT:    psrlw $2, %xmm0
10748; SSE3-NEXT:    pand %xmm1, %xmm0
10749; SSE3-NEXT:    paddb %xmm2, %xmm0
10750; SSE3-NEXT:    movdqa %xmm0, %xmm1
10751; SSE3-NEXT:    psrlw $4, %xmm1
10752; SSE3-NEXT:    paddb %xmm0, %xmm1
10753; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10754; SSE3-NEXT:    pxor %xmm0, %xmm0
10755; SSE3-NEXT:    movdqa %xmm1, %xmm2
10756; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10757; SSE3-NEXT:    psadbw %xmm0, %xmm2
10758; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10759; SSE3-NEXT:    psadbw %xmm0, %xmm1
10760; SSE3-NEXT:    packuswb %xmm2, %xmm1
10761; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15]
10762; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
10763; SSE3-NEXT:    retq
10764;
10765; SSSE3-LABEL: ult_15_v4i32:
10766; SSSE3:       # %bb.0:
10767; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10768; SSSE3-NEXT:    movdqa %xmm0, %xmm2
10769; SSSE3-NEXT:    pand %xmm1, %xmm2
10770; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10771; SSSE3-NEXT:    movdqa %xmm3, %xmm4
10772; SSSE3-NEXT:    pshufb %xmm2, %xmm4
10773; SSSE3-NEXT:    psrlw $4, %xmm0
10774; SSSE3-NEXT:    pand %xmm1, %xmm0
10775; SSSE3-NEXT:    pshufb %xmm0, %xmm3
10776; SSSE3-NEXT:    paddb %xmm4, %xmm3
10777; SSSE3-NEXT:    pxor %xmm0, %xmm0
10778; SSSE3-NEXT:    movdqa %xmm3, %xmm1
10779; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10780; SSSE3-NEXT:    psadbw %xmm0, %xmm1
10781; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10782; SSSE3-NEXT:    psadbw %xmm0, %xmm3
10783; SSSE3-NEXT:    packuswb %xmm1, %xmm3
10784; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15]
10785; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
10786; SSSE3-NEXT:    retq
10787;
10788; SSE41-LABEL: ult_15_v4i32:
10789; SSE41:       # %bb.0:
10790; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10791; SSE41-NEXT:    movdqa %xmm0, %xmm2
10792; SSE41-NEXT:    pand %xmm1, %xmm2
10793; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10794; SSE41-NEXT:    movdqa %xmm3, %xmm4
10795; SSE41-NEXT:    pshufb %xmm2, %xmm4
10796; SSE41-NEXT:    psrlw $4, %xmm0
10797; SSE41-NEXT:    pand %xmm1, %xmm0
10798; SSE41-NEXT:    pshufb %xmm0, %xmm3
10799; SSE41-NEXT:    paddb %xmm4, %xmm3
10800; SSE41-NEXT:    pxor %xmm0, %xmm0
10801; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10802; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10803; SSE41-NEXT:    psadbw %xmm0, %xmm3
10804; SSE41-NEXT:    psadbw %xmm0, %xmm1
10805; SSE41-NEXT:    packuswb %xmm3, %xmm1
10806; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15]
10807; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
10808; SSE41-NEXT:    retq
10809;
10810; AVX1-LABEL: ult_15_v4i32:
10811; AVX1:       # %bb.0:
10812; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10813; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
10814; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10815; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10816; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
10817; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
10818; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10819; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10820; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10821; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10822; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10823; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10824; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10825; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10826; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15]
10827; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10828; AVX1-NEXT:    retq
10829;
10830; AVX2-LABEL: ult_15_v4i32:
10831; AVX2:       # %bb.0:
10832; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10833; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
10834; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10835; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
10836; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
10837; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
10838; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
10839; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
10840; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10841; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10842; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10843; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10844; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10845; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10846; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10847; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10848; AVX2-NEXT:    retq
10849;
10850; AVX512VPOPCNTDQ-LABEL: ult_15_v4i32:
10851; AVX512VPOPCNTDQ:       # %bb.0:
10852; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10853; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
10854; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10855; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10856; AVX512VPOPCNTDQ-NEXT:    vzeroupper
10857; AVX512VPOPCNTDQ-NEXT:    retq
10858;
10859; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32:
10860; AVX512VPOPCNTDQVL:       # %bb.0:
10861; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
10862; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
10863; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10864; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10865; AVX512VPOPCNTDQVL-NEXT:    retq
10866;
10867; BITALG_NOVLX-LABEL: ult_15_v4i32:
10868; BITALG_NOVLX:       # %bb.0:
10869; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10870; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
10871; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10872; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10873; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10874; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10875; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10876; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10877; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10878; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
10879; BITALG_NOVLX-NEXT:    vzeroupper
10880; BITALG_NOVLX-NEXT:    retq
10881;
10882; BITALG-LABEL: ult_15_v4i32:
10883; BITALG:       # %bb.0:
10884; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
10885; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
10886; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10887; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
10888; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10889; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
10890; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
10891; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
10892; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
10893; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10894; BITALG-NEXT:    retq
10895  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10896  %3 = icmp ult <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
10897  %4 = sext <4 x i1> %3 to <4 x i32>
10898  ret <4 x i32> %4
10899}
10900
10901define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) {
10902; SSE2-LABEL: ugt_15_v4i32:
10903; SSE2:       # %bb.0:
10904; SSE2-NEXT:    movdqa %xmm0, %xmm1
10905; SSE2-NEXT:    psrlw $1, %xmm1
10906; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10907; SSE2-NEXT:    psubb %xmm1, %xmm0
10908; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10909; SSE2-NEXT:    movdqa %xmm0, %xmm2
10910; SSE2-NEXT:    pand %xmm1, %xmm2
10911; SSE2-NEXT:    psrlw $2, %xmm0
10912; SSE2-NEXT:    pand %xmm1, %xmm0
10913; SSE2-NEXT:    paddb %xmm2, %xmm0
10914; SSE2-NEXT:    movdqa %xmm0, %xmm1
10915; SSE2-NEXT:    psrlw $4, %xmm1
10916; SSE2-NEXT:    paddb %xmm0, %xmm1
10917; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
10918; SSE2-NEXT:    pxor %xmm0, %xmm0
10919; SSE2-NEXT:    movdqa %xmm1, %xmm2
10920; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10921; SSE2-NEXT:    psadbw %xmm0, %xmm2
10922; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10923; SSE2-NEXT:    psadbw %xmm0, %xmm1
10924; SSE2-NEXT:    packuswb %xmm2, %xmm1
10925; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10926; SSE2-NEXT:    movdqa %xmm1, %xmm0
10927; SSE2-NEXT:    retq
10928;
10929; SSE3-LABEL: ugt_15_v4i32:
10930; SSE3:       # %bb.0:
10931; SSE3-NEXT:    movdqa %xmm0, %xmm1
10932; SSE3-NEXT:    psrlw $1, %xmm1
10933; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10934; SSE3-NEXT:    psubb %xmm1, %xmm0
10935; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10936; SSE3-NEXT:    movdqa %xmm0, %xmm2
10937; SSE3-NEXT:    pand %xmm1, %xmm2
10938; SSE3-NEXT:    psrlw $2, %xmm0
10939; SSE3-NEXT:    pand %xmm1, %xmm0
10940; SSE3-NEXT:    paddb %xmm2, %xmm0
10941; SSE3-NEXT:    movdqa %xmm0, %xmm1
10942; SSE3-NEXT:    psrlw $4, %xmm1
10943; SSE3-NEXT:    paddb %xmm0, %xmm1
10944; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
10945; SSE3-NEXT:    pxor %xmm0, %xmm0
10946; SSE3-NEXT:    movdqa %xmm1, %xmm2
10947; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10948; SSE3-NEXT:    psadbw %xmm0, %xmm2
10949; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10950; SSE3-NEXT:    psadbw %xmm0, %xmm1
10951; SSE3-NEXT:    packuswb %xmm2, %xmm1
10952; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10953; SSE3-NEXT:    movdqa %xmm1, %xmm0
10954; SSE3-NEXT:    retq
10955;
10956; SSSE3-LABEL: ugt_15_v4i32:
10957; SSSE3:       # %bb.0:
10958; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10959; SSSE3-NEXT:    movdqa %xmm0, %xmm3
10960; SSSE3-NEXT:    pand %xmm2, %xmm3
10961; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10962; SSSE3-NEXT:    movdqa %xmm1, %xmm4
10963; SSSE3-NEXT:    pshufb %xmm3, %xmm4
10964; SSSE3-NEXT:    psrlw $4, %xmm0
10965; SSSE3-NEXT:    pand %xmm2, %xmm0
10966; SSSE3-NEXT:    pshufb %xmm0, %xmm1
10967; SSSE3-NEXT:    paddb %xmm4, %xmm1
10968; SSSE3-NEXT:    pxor %xmm0, %xmm0
10969; SSSE3-NEXT:    movdqa %xmm1, %xmm2
10970; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10971; SSSE3-NEXT:    psadbw %xmm0, %xmm2
10972; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10973; SSSE3-NEXT:    psadbw %xmm0, %xmm1
10974; SSSE3-NEXT:    packuswb %xmm2, %xmm1
10975; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
10976; SSSE3-NEXT:    movdqa %xmm1, %xmm0
10977; SSSE3-NEXT:    retq
10978;
10979; SSE41-LABEL: ugt_15_v4i32:
10980; SSE41:       # %bb.0:
10981; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10982; SSE41-NEXT:    movdqa %xmm0, %xmm2
10983; SSE41-NEXT:    pand %xmm1, %xmm2
10984; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10985; SSE41-NEXT:    movdqa %xmm3, %xmm4
10986; SSE41-NEXT:    pshufb %xmm2, %xmm4
10987; SSE41-NEXT:    psrlw $4, %xmm0
10988; SSE41-NEXT:    pand %xmm1, %xmm0
10989; SSE41-NEXT:    pshufb %xmm0, %xmm3
10990; SSE41-NEXT:    paddb %xmm4, %xmm3
10991; SSE41-NEXT:    pxor %xmm1, %xmm1
10992; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10993; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10994; SSE41-NEXT:    psadbw %xmm1, %xmm3
10995; SSE41-NEXT:    psadbw %xmm1, %xmm0
10996; SSE41-NEXT:    packuswb %xmm3, %xmm0
10997; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
10998; SSE41-NEXT:    retq
10999;
11000; AVX1-LABEL: ugt_15_v4i32:
11001; AVX1:       # %bb.0:
11002; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11003; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
11004; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11005; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11006; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
11007; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
11008; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11009; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11010; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11011; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11012; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11013; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11014; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11015; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11016; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
11017; AVX1-NEXT:    retq
11018;
11019; AVX2-LABEL: ugt_15_v4i32:
11020; AVX2:       # %bb.0:
11021; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11022; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
11023; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11024; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11025; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
11026; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
11027; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11028; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11029; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11030; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11031; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11032; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11033; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11034; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11035; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
11036; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11037; AVX2-NEXT:    retq
11038;
11039; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i32:
11040; AVX512VPOPCNTDQ:       # %bb.0:
11041; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11042; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
11043; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
11044; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11045; AVX512VPOPCNTDQ-NEXT:    vzeroupper
11046; AVX512VPOPCNTDQ-NEXT:    retq
11047;
11048; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32:
11049; AVX512VPOPCNTDQVL:       # %bb.0:
11050; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
11051; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
11052; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11053; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11054; AVX512VPOPCNTDQVL-NEXT:    retq
11055;
11056; BITALG_NOVLX-LABEL: ugt_15_v4i32:
11057; BITALG_NOVLX:       # %bb.0:
11058; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11059; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
11060; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11061; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11062; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11063; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11064; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11065; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11066; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
11067; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11068; BITALG_NOVLX-NEXT:    vzeroupper
11069; BITALG_NOVLX-NEXT:    retq
11070;
11071; BITALG-LABEL: ugt_15_v4i32:
11072; BITALG:       # %bb.0:
11073; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
11074; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11075; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11076; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11077; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11078; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11079; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11080; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
11081; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11082; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11083; BITALG-NEXT:    retq
11084  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11085  %3 = icmp ugt <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
11086  %4 = sext <4 x i1> %3 to <4 x i32>
11087  ret <4 x i32> %4
11088}
11089
11090define <4 x i32> @ult_16_v4i32(<4 x i32> %0) {
11091; SSE2-LABEL: ult_16_v4i32:
11092; SSE2:       # %bb.0:
11093; SSE2-NEXT:    movdqa %xmm0, %xmm1
11094; SSE2-NEXT:    psrlw $1, %xmm1
11095; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11096; SSE2-NEXT:    psubb %xmm1, %xmm0
11097; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11098; SSE2-NEXT:    movdqa %xmm0, %xmm2
11099; SSE2-NEXT:    pand %xmm1, %xmm2
11100; SSE2-NEXT:    psrlw $2, %xmm0
11101; SSE2-NEXT:    pand %xmm1, %xmm0
11102; SSE2-NEXT:    paddb %xmm2, %xmm0
11103; SSE2-NEXT:    movdqa %xmm0, %xmm1
11104; SSE2-NEXT:    psrlw $4, %xmm1
11105; SSE2-NEXT:    paddb %xmm0, %xmm1
11106; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11107; SSE2-NEXT:    pxor %xmm0, %xmm0
11108; SSE2-NEXT:    movdqa %xmm1, %xmm2
11109; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11110; SSE2-NEXT:    psadbw %xmm0, %xmm2
11111; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11112; SSE2-NEXT:    psadbw %xmm0, %xmm1
11113; SSE2-NEXT:    packuswb %xmm2, %xmm1
11114; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [16,16,16,16]
11115; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
11116; SSE2-NEXT:    retq
11117;
11118; SSE3-LABEL: ult_16_v4i32:
11119; SSE3:       # %bb.0:
11120; SSE3-NEXT:    movdqa %xmm0, %xmm1
11121; SSE3-NEXT:    psrlw $1, %xmm1
11122; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11123; SSE3-NEXT:    psubb %xmm1, %xmm0
11124; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11125; SSE3-NEXT:    movdqa %xmm0, %xmm2
11126; SSE3-NEXT:    pand %xmm1, %xmm2
11127; SSE3-NEXT:    psrlw $2, %xmm0
11128; SSE3-NEXT:    pand %xmm1, %xmm0
11129; SSE3-NEXT:    paddb %xmm2, %xmm0
11130; SSE3-NEXT:    movdqa %xmm0, %xmm1
11131; SSE3-NEXT:    psrlw $4, %xmm1
11132; SSE3-NEXT:    paddb %xmm0, %xmm1
11133; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11134; SSE3-NEXT:    pxor %xmm0, %xmm0
11135; SSE3-NEXT:    movdqa %xmm1, %xmm2
11136; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11137; SSE3-NEXT:    psadbw %xmm0, %xmm2
11138; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11139; SSE3-NEXT:    psadbw %xmm0, %xmm1
11140; SSE3-NEXT:    packuswb %xmm2, %xmm1
11141; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [16,16,16,16]
11142; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
11143; SSE3-NEXT:    retq
11144;
11145; SSSE3-LABEL: ult_16_v4i32:
11146; SSSE3:       # %bb.0:
11147; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11148; SSSE3-NEXT:    movdqa %xmm0, %xmm2
11149; SSSE3-NEXT:    pand %xmm1, %xmm2
11150; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11151; SSSE3-NEXT:    movdqa %xmm3, %xmm4
11152; SSSE3-NEXT:    pshufb %xmm2, %xmm4
11153; SSSE3-NEXT:    psrlw $4, %xmm0
11154; SSSE3-NEXT:    pand %xmm1, %xmm0
11155; SSSE3-NEXT:    pshufb %xmm0, %xmm3
11156; SSSE3-NEXT:    paddb %xmm4, %xmm3
11157; SSSE3-NEXT:    pxor %xmm0, %xmm0
11158; SSSE3-NEXT:    movdqa %xmm3, %xmm1
11159; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11160; SSSE3-NEXT:    psadbw %xmm0, %xmm1
11161; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11162; SSSE3-NEXT:    psadbw %xmm0, %xmm3
11163; SSSE3-NEXT:    packuswb %xmm1, %xmm3
11164; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [16,16,16,16]
11165; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
11166; SSSE3-NEXT:    retq
11167;
11168; SSE41-LABEL: ult_16_v4i32:
11169; SSE41:       # %bb.0:
11170; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11171; SSE41-NEXT:    movdqa %xmm0, %xmm2
11172; SSE41-NEXT:    pand %xmm1, %xmm2
11173; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11174; SSE41-NEXT:    movdqa %xmm3, %xmm4
11175; SSE41-NEXT:    pshufb %xmm2, %xmm4
11176; SSE41-NEXT:    psrlw $4, %xmm0
11177; SSE41-NEXT:    pand %xmm1, %xmm0
11178; SSE41-NEXT:    pshufb %xmm0, %xmm3
11179; SSE41-NEXT:    paddb %xmm4, %xmm3
11180; SSE41-NEXT:    pxor %xmm0, %xmm0
11181; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11182; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11183; SSE41-NEXT:    psadbw %xmm0, %xmm3
11184; SSE41-NEXT:    psadbw %xmm0, %xmm1
11185; SSE41-NEXT:    packuswb %xmm3, %xmm1
11186; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [16,16,16,16]
11187; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
11188; SSE41-NEXT:    retq
11189;
11190; AVX1-LABEL: ult_16_v4i32:
11191; AVX1:       # %bb.0:
11192; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11193; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
11194; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11195; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11196; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
11197; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
11198; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11199; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11200; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11201; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11202; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11203; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11204; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11205; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11206; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16,16,16]
11207; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11208; AVX1-NEXT:    retq
11209;
11210; AVX2-LABEL: ult_16_v4i32:
11211; AVX2:       # %bb.0:
11212; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11213; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
11214; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11215; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11216; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
11217; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
11218; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11219; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11220; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11221; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11222; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11223; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11224; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11225; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11226; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11227; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11228; AVX2-NEXT:    retq
11229;
11230; AVX512VPOPCNTDQ-LABEL: ult_16_v4i32:
11231; AVX512VPOPCNTDQ:       # %bb.0:
11232; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11233; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
11234; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11235; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11236; AVX512VPOPCNTDQ-NEXT:    vzeroupper
11237; AVX512VPOPCNTDQ-NEXT:    retq
11238;
11239; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32:
11240; AVX512VPOPCNTDQVL:       # %bb.0:
11241; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
11242; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
11243; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11244; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11245; AVX512VPOPCNTDQVL-NEXT:    retq
11246;
11247; BITALG_NOVLX-LABEL: ult_16_v4i32:
11248; BITALG_NOVLX:       # %bb.0:
11249; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11250; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
11251; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11252; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11253; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11254; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11255; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11256; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11257; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11258; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11259; BITALG_NOVLX-NEXT:    vzeroupper
11260; BITALG_NOVLX-NEXT:    retq
11261;
11262; BITALG-LABEL: ult_16_v4i32:
11263; BITALG:       # %bb.0:
11264; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
11265; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11266; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11267; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11268; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11269; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11270; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11271; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
11272; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11273; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11274; BITALG-NEXT:    retq
11275  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11276  %3 = icmp ult <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
11277  %4 = sext <4 x i1> %3 to <4 x i32>
11278  ret <4 x i32> %4
11279}
11280
11281define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) {
11282; SSE2-LABEL: ugt_16_v4i32:
11283; SSE2:       # %bb.0:
11284; SSE2-NEXT:    movdqa %xmm0, %xmm1
11285; SSE2-NEXT:    psrlw $1, %xmm1
11286; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11287; SSE2-NEXT:    psubb %xmm1, %xmm0
11288; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11289; SSE2-NEXT:    movdqa %xmm0, %xmm2
11290; SSE2-NEXT:    pand %xmm1, %xmm2
11291; SSE2-NEXT:    psrlw $2, %xmm0
11292; SSE2-NEXT:    pand %xmm1, %xmm0
11293; SSE2-NEXT:    paddb %xmm2, %xmm0
11294; SSE2-NEXT:    movdqa %xmm0, %xmm1
11295; SSE2-NEXT:    psrlw $4, %xmm1
11296; SSE2-NEXT:    paddb %xmm0, %xmm1
11297; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11298; SSE2-NEXT:    pxor %xmm0, %xmm0
11299; SSE2-NEXT:    movdqa %xmm1, %xmm2
11300; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11301; SSE2-NEXT:    psadbw %xmm0, %xmm2
11302; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11303; SSE2-NEXT:    psadbw %xmm0, %xmm1
11304; SSE2-NEXT:    packuswb %xmm2, %xmm1
11305; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
11306; SSE2-NEXT:    movdqa %xmm1, %xmm0
11307; SSE2-NEXT:    retq
11308;
11309; SSE3-LABEL: ugt_16_v4i32:
11310; SSE3:       # %bb.0:
11311; SSE3-NEXT:    movdqa %xmm0, %xmm1
11312; SSE3-NEXT:    psrlw $1, %xmm1
11313; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11314; SSE3-NEXT:    psubb %xmm1, %xmm0
11315; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11316; SSE3-NEXT:    movdqa %xmm0, %xmm2
11317; SSE3-NEXT:    pand %xmm1, %xmm2
11318; SSE3-NEXT:    psrlw $2, %xmm0
11319; SSE3-NEXT:    pand %xmm1, %xmm0
11320; SSE3-NEXT:    paddb %xmm2, %xmm0
11321; SSE3-NEXT:    movdqa %xmm0, %xmm1
11322; SSE3-NEXT:    psrlw $4, %xmm1
11323; SSE3-NEXT:    paddb %xmm0, %xmm1
11324; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11325; SSE3-NEXT:    pxor %xmm0, %xmm0
11326; SSE3-NEXT:    movdqa %xmm1, %xmm2
11327; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11328; SSE3-NEXT:    psadbw %xmm0, %xmm2
11329; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11330; SSE3-NEXT:    psadbw %xmm0, %xmm1
11331; SSE3-NEXT:    packuswb %xmm2, %xmm1
11332; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
11333; SSE3-NEXT:    movdqa %xmm1, %xmm0
11334; SSE3-NEXT:    retq
11335;
11336; SSSE3-LABEL: ugt_16_v4i32:
11337; SSSE3:       # %bb.0:
11338; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11339; SSSE3-NEXT:    movdqa %xmm0, %xmm3
11340; SSSE3-NEXT:    pand %xmm2, %xmm3
11341; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11342; SSSE3-NEXT:    movdqa %xmm1, %xmm4
11343; SSSE3-NEXT:    pshufb %xmm3, %xmm4
11344; SSSE3-NEXT:    psrlw $4, %xmm0
11345; SSSE3-NEXT:    pand %xmm2, %xmm0
11346; SSSE3-NEXT:    pshufb %xmm0, %xmm1
11347; SSSE3-NEXT:    paddb %xmm4, %xmm1
11348; SSSE3-NEXT:    pxor %xmm0, %xmm0
11349; SSSE3-NEXT:    movdqa %xmm1, %xmm2
11350; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11351; SSSE3-NEXT:    psadbw %xmm0, %xmm2
11352; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11353; SSSE3-NEXT:    psadbw %xmm0, %xmm1
11354; SSSE3-NEXT:    packuswb %xmm2, %xmm1
11355; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
11356; SSSE3-NEXT:    movdqa %xmm1, %xmm0
11357; SSSE3-NEXT:    retq
11358;
11359; SSE41-LABEL: ugt_16_v4i32:
11360; SSE41:       # %bb.0:
11361; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11362; SSE41-NEXT:    movdqa %xmm0, %xmm2
11363; SSE41-NEXT:    pand %xmm1, %xmm2
11364; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11365; SSE41-NEXT:    movdqa %xmm3, %xmm4
11366; SSE41-NEXT:    pshufb %xmm2, %xmm4
11367; SSE41-NEXT:    psrlw $4, %xmm0
11368; SSE41-NEXT:    pand %xmm1, %xmm0
11369; SSE41-NEXT:    pshufb %xmm0, %xmm3
11370; SSE41-NEXT:    paddb %xmm4, %xmm3
11371; SSE41-NEXT:    pxor %xmm1, %xmm1
11372; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11373; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11374; SSE41-NEXT:    psadbw %xmm1, %xmm3
11375; SSE41-NEXT:    psadbw %xmm1, %xmm0
11376; SSE41-NEXT:    packuswb %xmm3, %xmm0
11377; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
11378; SSE41-NEXT:    retq
11379;
11380; AVX1-LABEL: ugt_16_v4i32:
11381; AVX1:       # %bb.0:
11382; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11383; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
11384; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11385; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11386; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
11387; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
11388; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11389; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11390; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11391; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11392; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11393; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11394; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11395; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11396; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
11397; AVX1-NEXT:    retq
11398;
11399; AVX2-LABEL: ugt_16_v4i32:
11400; AVX2:       # %bb.0:
11401; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11402; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
11403; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11404; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11405; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
11406; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
11407; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11408; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11409; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11410; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11411; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11412; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11413; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11414; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11415; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11416; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11417; AVX2-NEXT:    retq
11418;
11419; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i32:
11420; AVX512VPOPCNTDQ:       # %bb.0:
11421; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11422; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
11423; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11424; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11425; AVX512VPOPCNTDQ-NEXT:    vzeroupper
11426; AVX512VPOPCNTDQ-NEXT:    retq
11427;
11428; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32:
11429; AVX512VPOPCNTDQVL:       # %bb.0:
11430; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
11431; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
11432; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11433; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11434; AVX512VPOPCNTDQVL-NEXT:    retq
11435;
11436; BITALG_NOVLX-LABEL: ugt_16_v4i32:
11437; BITALG_NOVLX:       # %bb.0:
11438; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11439; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
11440; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11441; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11442; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11443; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11444; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11445; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11446; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11447; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11448; BITALG_NOVLX-NEXT:    vzeroupper
11449; BITALG_NOVLX-NEXT:    retq
11450;
11451; BITALG-LABEL: ugt_16_v4i32:
11452; BITALG:       # %bb.0:
11453; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
11454; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11455; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11456; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11457; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11458; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11459; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11460; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
11461; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11462; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11463; BITALG-NEXT:    retq
11464  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11465  %3 = icmp ugt <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
11466  %4 = sext <4 x i1> %3 to <4 x i32>
11467  ret <4 x i32> %4
11468}
11469
11470define <4 x i32> @ult_17_v4i32(<4 x i32> %0) {
11471; SSE2-LABEL: ult_17_v4i32:
11472; SSE2:       # %bb.0:
11473; SSE2-NEXT:    movdqa %xmm0, %xmm1
11474; SSE2-NEXT:    psrlw $1, %xmm1
11475; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11476; SSE2-NEXT:    psubb %xmm1, %xmm0
11477; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11478; SSE2-NEXT:    movdqa %xmm0, %xmm2
11479; SSE2-NEXT:    pand %xmm1, %xmm2
11480; SSE2-NEXT:    psrlw $2, %xmm0
11481; SSE2-NEXT:    pand %xmm1, %xmm0
11482; SSE2-NEXT:    paddb %xmm2, %xmm0
11483; SSE2-NEXT:    movdqa %xmm0, %xmm1
11484; SSE2-NEXT:    psrlw $4, %xmm1
11485; SSE2-NEXT:    paddb %xmm0, %xmm1
11486; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11487; SSE2-NEXT:    pxor %xmm0, %xmm0
11488; SSE2-NEXT:    movdqa %xmm1, %xmm2
11489; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11490; SSE2-NEXT:    psadbw %xmm0, %xmm2
11491; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11492; SSE2-NEXT:    psadbw %xmm0, %xmm1
11493; SSE2-NEXT:    packuswb %xmm2, %xmm1
11494; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [17,17,17,17]
11495; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
11496; SSE2-NEXT:    retq
11497;
11498; SSE3-LABEL: ult_17_v4i32:
11499; SSE3:       # %bb.0:
11500; SSE3-NEXT:    movdqa %xmm0, %xmm1
11501; SSE3-NEXT:    psrlw $1, %xmm1
11502; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11503; SSE3-NEXT:    psubb %xmm1, %xmm0
11504; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11505; SSE3-NEXT:    movdqa %xmm0, %xmm2
11506; SSE3-NEXT:    pand %xmm1, %xmm2
11507; SSE3-NEXT:    psrlw $2, %xmm0
11508; SSE3-NEXT:    pand %xmm1, %xmm0
11509; SSE3-NEXT:    paddb %xmm2, %xmm0
11510; SSE3-NEXT:    movdqa %xmm0, %xmm1
11511; SSE3-NEXT:    psrlw $4, %xmm1
11512; SSE3-NEXT:    paddb %xmm0, %xmm1
11513; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11514; SSE3-NEXT:    pxor %xmm0, %xmm0
11515; SSE3-NEXT:    movdqa %xmm1, %xmm2
11516; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11517; SSE3-NEXT:    psadbw %xmm0, %xmm2
11518; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11519; SSE3-NEXT:    psadbw %xmm0, %xmm1
11520; SSE3-NEXT:    packuswb %xmm2, %xmm1
11521; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [17,17,17,17]
11522; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
11523; SSE3-NEXT:    retq
11524;
11525; SSSE3-LABEL: ult_17_v4i32:
11526; SSSE3:       # %bb.0:
11527; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11528; SSSE3-NEXT:    movdqa %xmm0, %xmm2
11529; SSSE3-NEXT:    pand %xmm1, %xmm2
11530; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11531; SSSE3-NEXT:    movdqa %xmm3, %xmm4
11532; SSSE3-NEXT:    pshufb %xmm2, %xmm4
11533; SSSE3-NEXT:    psrlw $4, %xmm0
11534; SSSE3-NEXT:    pand %xmm1, %xmm0
11535; SSSE3-NEXT:    pshufb %xmm0, %xmm3
11536; SSSE3-NEXT:    paddb %xmm4, %xmm3
11537; SSSE3-NEXT:    pxor %xmm0, %xmm0
11538; SSSE3-NEXT:    movdqa %xmm3, %xmm1
11539; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11540; SSSE3-NEXT:    psadbw %xmm0, %xmm1
11541; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11542; SSSE3-NEXT:    psadbw %xmm0, %xmm3
11543; SSSE3-NEXT:    packuswb %xmm1, %xmm3
11544; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [17,17,17,17]
11545; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
11546; SSSE3-NEXT:    retq
11547;
11548; SSE41-LABEL: ult_17_v4i32:
11549; SSE41:       # %bb.0:
11550; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11551; SSE41-NEXT:    movdqa %xmm0, %xmm2
11552; SSE41-NEXT:    pand %xmm1, %xmm2
11553; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11554; SSE41-NEXT:    movdqa %xmm3, %xmm4
11555; SSE41-NEXT:    pshufb %xmm2, %xmm4
11556; SSE41-NEXT:    psrlw $4, %xmm0
11557; SSE41-NEXT:    pand %xmm1, %xmm0
11558; SSE41-NEXT:    pshufb %xmm0, %xmm3
11559; SSE41-NEXT:    paddb %xmm4, %xmm3
11560; SSE41-NEXT:    pxor %xmm0, %xmm0
11561; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11562; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11563; SSE41-NEXT:    psadbw %xmm0, %xmm3
11564; SSE41-NEXT:    psadbw %xmm0, %xmm1
11565; SSE41-NEXT:    packuswb %xmm3, %xmm1
11566; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [17,17,17,17]
11567; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
11568; SSE41-NEXT:    retq
11569;
11570; AVX1-LABEL: ult_17_v4i32:
11571; AVX1:       # %bb.0:
11572; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11573; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
11574; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11575; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11576; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
11577; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
11578; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11579; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11580; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11581; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11582; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11583; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11584; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11585; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11586; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17,17,17]
11587; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11588; AVX1-NEXT:    retq
11589;
11590; AVX2-LABEL: ult_17_v4i32:
11591; AVX2:       # %bb.0:
11592; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11593; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
11594; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11595; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11596; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
11597; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
11598; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11599; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11600; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11601; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11602; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11603; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11604; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11605; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11606; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11607; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11608; AVX2-NEXT:    retq
11609;
11610; AVX512VPOPCNTDQ-LABEL: ult_17_v4i32:
11611; AVX512VPOPCNTDQ:       # %bb.0:
11612; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11613; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
11614; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11615; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11616; AVX512VPOPCNTDQ-NEXT:    vzeroupper
11617; AVX512VPOPCNTDQ-NEXT:    retq
11618;
11619; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32:
11620; AVX512VPOPCNTDQVL:       # %bb.0:
11621; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
11622; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
11623; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11624; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11625; AVX512VPOPCNTDQVL-NEXT:    retq
11626;
11627; BITALG_NOVLX-LABEL: ult_17_v4i32:
11628; BITALG_NOVLX:       # %bb.0:
11629; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11630; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
11631; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11632; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11633; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11634; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11635; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11636; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11637; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11638; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11639; BITALG_NOVLX-NEXT:    vzeroupper
11640; BITALG_NOVLX-NEXT:    retq
11641;
11642; BITALG-LABEL: ult_17_v4i32:
11643; BITALG:       # %bb.0:
11644; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
11645; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11646; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11647; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11648; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11649; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11650; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11651; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
11652; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11653; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11654; BITALG-NEXT:    retq
11655  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11656  %3 = icmp ult <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
11657  %4 = sext <4 x i1> %3 to <4 x i32>
11658  ret <4 x i32> %4
11659}
11660
11661define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) {
11662; SSE2-LABEL: ugt_17_v4i32:
11663; SSE2:       # %bb.0:
11664; SSE2-NEXT:    movdqa %xmm0, %xmm1
11665; SSE2-NEXT:    psrlw $1, %xmm1
11666; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11667; SSE2-NEXT:    psubb %xmm1, %xmm0
11668; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11669; SSE2-NEXT:    movdqa %xmm0, %xmm2
11670; SSE2-NEXT:    pand %xmm1, %xmm2
11671; SSE2-NEXT:    psrlw $2, %xmm0
11672; SSE2-NEXT:    pand %xmm1, %xmm0
11673; SSE2-NEXT:    paddb %xmm2, %xmm0
11674; SSE2-NEXT:    movdqa %xmm0, %xmm1
11675; SSE2-NEXT:    psrlw $4, %xmm1
11676; SSE2-NEXT:    paddb %xmm0, %xmm1
11677; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11678; SSE2-NEXT:    pxor %xmm0, %xmm0
11679; SSE2-NEXT:    movdqa %xmm1, %xmm2
11680; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11681; SSE2-NEXT:    psadbw %xmm0, %xmm2
11682; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11683; SSE2-NEXT:    psadbw %xmm0, %xmm1
11684; SSE2-NEXT:    packuswb %xmm2, %xmm1
11685; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
11686; SSE2-NEXT:    movdqa %xmm1, %xmm0
11687; SSE2-NEXT:    retq
11688;
11689; SSE3-LABEL: ugt_17_v4i32:
11690; SSE3:       # %bb.0:
11691; SSE3-NEXT:    movdqa %xmm0, %xmm1
11692; SSE3-NEXT:    psrlw $1, %xmm1
11693; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11694; SSE3-NEXT:    psubb %xmm1, %xmm0
11695; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11696; SSE3-NEXT:    movdqa %xmm0, %xmm2
11697; SSE3-NEXT:    pand %xmm1, %xmm2
11698; SSE3-NEXT:    psrlw $2, %xmm0
11699; SSE3-NEXT:    pand %xmm1, %xmm0
11700; SSE3-NEXT:    paddb %xmm2, %xmm0
11701; SSE3-NEXT:    movdqa %xmm0, %xmm1
11702; SSE3-NEXT:    psrlw $4, %xmm1
11703; SSE3-NEXT:    paddb %xmm0, %xmm1
11704; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11705; SSE3-NEXT:    pxor %xmm0, %xmm0
11706; SSE3-NEXT:    movdqa %xmm1, %xmm2
11707; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11708; SSE3-NEXT:    psadbw %xmm0, %xmm2
11709; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11710; SSE3-NEXT:    psadbw %xmm0, %xmm1
11711; SSE3-NEXT:    packuswb %xmm2, %xmm1
11712; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
11713; SSE3-NEXT:    movdqa %xmm1, %xmm0
11714; SSE3-NEXT:    retq
11715;
11716; SSSE3-LABEL: ugt_17_v4i32:
11717; SSSE3:       # %bb.0:
11718; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11719; SSSE3-NEXT:    movdqa %xmm0, %xmm3
11720; SSSE3-NEXT:    pand %xmm2, %xmm3
11721; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11722; SSSE3-NEXT:    movdqa %xmm1, %xmm4
11723; SSSE3-NEXT:    pshufb %xmm3, %xmm4
11724; SSSE3-NEXT:    psrlw $4, %xmm0
11725; SSSE3-NEXT:    pand %xmm2, %xmm0
11726; SSSE3-NEXT:    pshufb %xmm0, %xmm1
11727; SSSE3-NEXT:    paddb %xmm4, %xmm1
11728; SSSE3-NEXT:    pxor %xmm0, %xmm0
11729; SSSE3-NEXT:    movdqa %xmm1, %xmm2
11730; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11731; SSSE3-NEXT:    psadbw %xmm0, %xmm2
11732; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11733; SSSE3-NEXT:    psadbw %xmm0, %xmm1
11734; SSSE3-NEXT:    packuswb %xmm2, %xmm1
11735; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
11736; SSSE3-NEXT:    movdqa %xmm1, %xmm0
11737; SSSE3-NEXT:    retq
11738;
11739; SSE41-LABEL: ugt_17_v4i32:
11740; SSE41:       # %bb.0:
11741; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11742; SSE41-NEXT:    movdqa %xmm0, %xmm2
11743; SSE41-NEXT:    pand %xmm1, %xmm2
11744; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11745; SSE41-NEXT:    movdqa %xmm3, %xmm4
11746; SSE41-NEXT:    pshufb %xmm2, %xmm4
11747; SSE41-NEXT:    psrlw $4, %xmm0
11748; SSE41-NEXT:    pand %xmm1, %xmm0
11749; SSE41-NEXT:    pshufb %xmm0, %xmm3
11750; SSE41-NEXT:    paddb %xmm4, %xmm3
11751; SSE41-NEXT:    pxor %xmm1, %xmm1
11752; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11753; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11754; SSE41-NEXT:    psadbw %xmm1, %xmm3
11755; SSE41-NEXT:    psadbw %xmm1, %xmm0
11756; SSE41-NEXT:    packuswb %xmm3, %xmm0
11757; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
11758; SSE41-NEXT:    retq
11759;
11760; AVX1-LABEL: ugt_17_v4i32:
11761; AVX1:       # %bb.0:
11762; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11763; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
11764; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11765; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11766; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
11767; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
11768; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11769; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11770; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11771; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11772; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11773; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11774; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11775; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11776; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
11777; AVX1-NEXT:    retq
11778;
11779; AVX2-LABEL: ugt_17_v4i32:
11780; AVX2:       # %bb.0:
11781; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11782; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
11783; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11784; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11785; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
11786; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
11787; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11788; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11789; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11790; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11791; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11792; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11793; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11794; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11795; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11796; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11797; AVX2-NEXT:    retq
11798;
11799; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i32:
11800; AVX512VPOPCNTDQ:       # %bb.0:
11801; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11802; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
11803; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11804; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11805; AVX512VPOPCNTDQ-NEXT:    vzeroupper
11806; AVX512VPOPCNTDQ-NEXT:    retq
11807;
11808; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32:
11809; AVX512VPOPCNTDQVL:       # %bb.0:
11810; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
11811; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
11812; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11813; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11814; AVX512VPOPCNTDQVL-NEXT:    retq
11815;
11816; BITALG_NOVLX-LABEL: ugt_17_v4i32:
11817; BITALG_NOVLX:       # %bb.0:
11818; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11819; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
11820; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11821; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11822; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11823; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11824; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11825; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11826; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11827; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
11828; BITALG_NOVLX-NEXT:    vzeroupper
11829; BITALG_NOVLX-NEXT:    retq
11830;
11831; BITALG-LABEL: ugt_17_v4i32:
11832; BITALG:       # %bb.0:
11833; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
11834; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11835; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11836; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11837; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11838; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11839; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11840; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
11841; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
11842; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11843; BITALG-NEXT:    retq
11844  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11845  %3 = icmp ugt <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
11846  %4 = sext <4 x i1> %3 to <4 x i32>
11847  ret <4 x i32> %4
11848}
11849
11850define <4 x i32> @ult_18_v4i32(<4 x i32> %0) {
11851; SSE2-LABEL: ult_18_v4i32:
11852; SSE2:       # %bb.0:
11853; SSE2-NEXT:    movdqa %xmm0, %xmm1
11854; SSE2-NEXT:    psrlw $1, %xmm1
11855; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11856; SSE2-NEXT:    psubb %xmm1, %xmm0
11857; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11858; SSE2-NEXT:    movdqa %xmm0, %xmm2
11859; SSE2-NEXT:    pand %xmm1, %xmm2
11860; SSE2-NEXT:    psrlw $2, %xmm0
11861; SSE2-NEXT:    pand %xmm1, %xmm0
11862; SSE2-NEXT:    paddb %xmm2, %xmm0
11863; SSE2-NEXT:    movdqa %xmm0, %xmm1
11864; SSE2-NEXT:    psrlw $4, %xmm1
11865; SSE2-NEXT:    paddb %xmm0, %xmm1
11866; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
11867; SSE2-NEXT:    pxor %xmm0, %xmm0
11868; SSE2-NEXT:    movdqa %xmm1, %xmm2
11869; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11870; SSE2-NEXT:    psadbw %xmm0, %xmm2
11871; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11872; SSE2-NEXT:    psadbw %xmm0, %xmm1
11873; SSE2-NEXT:    packuswb %xmm2, %xmm1
11874; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [18,18,18,18]
11875; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
11876; SSE2-NEXT:    retq
11877;
11878; SSE3-LABEL: ult_18_v4i32:
11879; SSE3:       # %bb.0:
11880; SSE3-NEXT:    movdqa %xmm0, %xmm1
11881; SSE3-NEXT:    psrlw $1, %xmm1
11882; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11883; SSE3-NEXT:    psubb %xmm1, %xmm0
11884; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11885; SSE3-NEXT:    movdqa %xmm0, %xmm2
11886; SSE3-NEXT:    pand %xmm1, %xmm2
11887; SSE3-NEXT:    psrlw $2, %xmm0
11888; SSE3-NEXT:    pand %xmm1, %xmm0
11889; SSE3-NEXT:    paddb %xmm2, %xmm0
11890; SSE3-NEXT:    movdqa %xmm0, %xmm1
11891; SSE3-NEXT:    psrlw $4, %xmm1
11892; SSE3-NEXT:    paddb %xmm0, %xmm1
11893; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
11894; SSE3-NEXT:    pxor %xmm0, %xmm0
11895; SSE3-NEXT:    movdqa %xmm1, %xmm2
11896; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11897; SSE3-NEXT:    psadbw %xmm0, %xmm2
11898; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11899; SSE3-NEXT:    psadbw %xmm0, %xmm1
11900; SSE3-NEXT:    packuswb %xmm2, %xmm1
11901; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18,18,18,18]
11902; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
11903; SSE3-NEXT:    retq
11904;
11905; SSSE3-LABEL: ult_18_v4i32:
11906; SSSE3:       # %bb.0:
11907; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11908; SSSE3-NEXT:    movdqa %xmm0, %xmm2
11909; SSSE3-NEXT:    pand %xmm1, %xmm2
11910; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11911; SSSE3-NEXT:    movdqa %xmm3, %xmm4
11912; SSSE3-NEXT:    pshufb %xmm2, %xmm4
11913; SSSE3-NEXT:    psrlw $4, %xmm0
11914; SSSE3-NEXT:    pand %xmm1, %xmm0
11915; SSSE3-NEXT:    pshufb %xmm0, %xmm3
11916; SSSE3-NEXT:    paddb %xmm4, %xmm3
11917; SSSE3-NEXT:    pxor %xmm0, %xmm0
11918; SSSE3-NEXT:    movdqa %xmm3, %xmm1
11919; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11920; SSSE3-NEXT:    psadbw %xmm0, %xmm1
11921; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11922; SSSE3-NEXT:    psadbw %xmm0, %xmm3
11923; SSSE3-NEXT:    packuswb %xmm1, %xmm3
11924; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18,18,18,18]
11925; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
11926; SSSE3-NEXT:    retq
11927;
11928; SSE41-LABEL: ult_18_v4i32:
11929; SSE41:       # %bb.0:
11930; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11931; SSE41-NEXT:    movdqa %xmm0, %xmm2
11932; SSE41-NEXT:    pand %xmm1, %xmm2
11933; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11934; SSE41-NEXT:    movdqa %xmm3, %xmm4
11935; SSE41-NEXT:    pshufb %xmm2, %xmm4
11936; SSE41-NEXT:    psrlw $4, %xmm0
11937; SSE41-NEXT:    pand %xmm1, %xmm0
11938; SSE41-NEXT:    pshufb %xmm0, %xmm3
11939; SSE41-NEXT:    paddb %xmm4, %xmm3
11940; SSE41-NEXT:    pxor %xmm0, %xmm0
11941; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11942; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11943; SSE41-NEXT:    psadbw %xmm0, %xmm3
11944; SSE41-NEXT:    psadbw %xmm0, %xmm1
11945; SSE41-NEXT:    packuswb %xmm3, %xmm1
11946; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18,18,18,18]
11947; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
11948; SSE41-NEXT:    retq
11949;
11950; AVX1-LABEL: ult_18_v4i32:
11951; AVX1:       # %bb.0:
11952; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11953; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
11954; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11955; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11956; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
11957; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
11958; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11959; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11960; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11961; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11962; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11963; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11964; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11965; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11966; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18,18,18,18]
11967; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11968; AVX1-NEXT:    retq
11969;
11970; AVX2-LABEL: ult_18_v4i32:
11971; AVX2:       # %bb.0:
11972; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11973; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
11974; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11975; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
11976; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
11977; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
11978; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
11979; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
11980; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11981; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11982; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
11983; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11984; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
11985; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
11986; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11987; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11988; AVX2-NEXT:    retq
11989;
11990; AVX512VPOPCNTDQ-LABEL: ult_18_v4i32:
11991; AVX512VPOPCNTDQ:       # %bb.0:
11992; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11993; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
11994; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11995; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
11996; AVX512VPOPCNTDQ-NEXT:    vzeroupper
11997; AVX512VPOPCNTDQ-NEXT:    retq
11998;
11999; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32:
12000; AVX512VPOPCNTDQVL:       # %bb.0:
12001; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
12002; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
12003; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12004; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12005; AVX512VPOPCNTDQVL-NEXT:    retq
12006;
12007; BITALG_NOVLX-LABEL: ult_18_v4i32:
12008; BITALG_NOVLX:       # %bb.0:
12009; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12010; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
12011; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12012; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12013; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12014; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12015; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12016; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12017; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12018; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12019; BITALG_NOVLX-NEXT:    vzeroupper
12020; BITALG_NOVLX-NEXT:    retq
12021;
12022; BITALG-LABEL: ult_18_v4i32:
12023; BITALG:       # %bb.0:
12024; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
12025; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12026; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12027; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12028; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12029; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12030; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12031; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
12032; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12033; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12034; BITALG-NEXT:    retq
12035  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12036  %3 = icmp ult <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
12037  %4 = sext <4 x i1> %3 to <4 x i32>
12038  ret <4 x i32> %4
12039}
12040
12041define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) {
12042; SSE2-LABEL: ugt_18_v4i32:
12043; SSE2:       # %bb.0:
12044; SSE2-NEXT:    movdqa %xmm0, %xmm1
12045; SSE2-NEXT:    psrlw $1, %xmm1
12046; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12047; SSE2-NEXT:    psubb %xmm1, %xmm0
12048; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12049; SSE2-NEXT:    movdqa %xmm0, %xmm2
12050; SSE2-NEXT:    pand %xmm1, %xmm2
12051; SSE2-NEXT:    psrlw $2, %xmm0
12052; SSE2-NEXT:    pand %xmm1, %xmm0
12053; SSE2-NEXT:    paddb %xmm2, %xmm0
12054; SSE2-NEXT:    movdqa %xmm0, %xmm1
12055; SSE2-NEXT:    psrlw $4, %xmm1
12056; SSE2-NEXT:    paddb %xmm0, %xmm1
12057; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12058; SSE2-NEXT:    pxor %xmm0, %xmm0
12059; SSE2-NEXT:    movdqa %xmm1, %xmm2
12060; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12061; SSE2-NEXT:    psadbw %xmm0, %xmm2
12062; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12063; SSE2-NEXT:    psadbw %xmm0, %xmm1
12064; SSE2-NEXT:    packuswb %xmm2, %xmm1
12065; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12066; SSE2-NEXT:    movdqa %xmm1, %xmm0
12067; SSE2-NEXT:    retq
12068;
12069; SSE3-LABEL: ugt_18_v4i32:
12070; SSE3:       # %bb.0:
12071; SSE3-NEXT:    movdqa %xmm0, %xmm1
12072; SSE3-NEXT:    psrlw $1, %xmm1
12073; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12074; SSE3-NEXT:    psubb %xmm1, %xmm0
12075; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12076; SSE3-NEXT:    movdqa %xmm0, %xmm2
12077; SSE3-NEXT:    pand %xmm1, %xmm2
12078; SSE3-NEXT:    psrlw $2, %xmm0
12079; SSE3-NEXT:    pand %xmm1, %xmm0
12080; SSE3-NEXT:    paddb %xmm2, %xmm0
12081; SSE3-NEXT:    movdqa %xmm0, %xmm1
12082; SSE3-NEXT:    psrlw $4, %xmm1
12083; SSE3-NEXT:    paddb %xmm0, %xmm1
12084; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12085; SSE3-NEXT:    pxor %xmm0, %xmm0
12086; SSE3-NEXT:    movdqa %xmm1, %xmm2
12087; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12088; SSE3-NEXT:    psadbw %xmm0, %xmm2
12089; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12090; SSE3-NEXT:    psadbw %xmm0, %xmm1
12091; SSE3-NEXT:    packuswb %xmm2, %xmm1
12092; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12093; SSE3-NEXT:    movdqa %xmm1, %xmm0
12094; SSE3-NEXT:    retq
12095;
12096; SSSE3-LABEL: ugt_18_v4i32:
12097; SSSE3:       # %bb.0:
12098; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12099; SSSE3-NEXT:    movdqa %xmm0, %xmm3
12100; SSSE3-NEXT:    pand %xmm2, %xmm3
12101; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12102; SSSE3-NEXT:    movdqa %xmm1, %xmm4
12103; SSSE3-NEXT:    pshufb %xmm3, %xmm4
12104; SSSE3-NEXT:    psrlw $4, %xmm0
12105; SSSE3-NEXT:    pand %xmm2, %xmm0
12106; SSSE3-NEXT:    pshufb %xmm0, %xmm1
12107; SSSE3-NEXT:    paddb %xmm4, %xmm1
12108; SSSE3-NEXT:    pxor %xmm0, %xmm0
12109; SSSE3-NEXT:    movdqa %xmm1, %xmm2
12110; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12111; SSSE3-NEXT:    psadbw %xmm0, %xmm2
12112; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12113; SSSE3-NEXT:    psadbw %xmm0, %xmm1
12114; SSSE3-NEXT:    packuswb %xmm2, %xmm1
12115; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12116; SSSE3-NEXT:    movdqa %xmm1, %xmm0
12117; SSSE3-NEXT:    retq
12118;
12119; SSE41-LABEL: ugt_18_v4i32:
12120; SSE41:       # %bb.0:
12121; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12122; SSE41-NEXT:    movdqa %xmm0, %xmm2
12123; SSE41-NEXT:    pand %xmm1, %xmm2
12124; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12125; SSE41-NEXT:    movdqa %xmm3, %xmm4
12126; SSE41-NEXT:    pshufb %xmm2, %xmm4
12127; SSE41-NEXT:    psrlw $4, %xmm0
12128; SSE41-NEXT:    pand %xmm1, %xmm0
12129; SSE41-NEXT:    pshufb %xmm0, %xmm3
12130; SSE41-NEXT:    paddb %xmm4, %xmm3
12131; SSE41-NEXT:    pxor %xmm1, %xmm1
12132; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12133; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12134; SSE41-NEXT:    psadbw %xmm1, %xmm3
12135; SSE41-NEXT:    psadbw %xmm1, %xmm0
12136; SSE41-NEXT:    packuswb %xmm3, %xmm0
12137; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
12138; SSE41-NEXT:    retq
12139;
12140; AVX1-LABEL: ugt_18_v4i32:
12141; AVX1:       # %bb.0:
12142; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12143; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
12144; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12145; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12146; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
12147; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
12148; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12149; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12150; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12151; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12152; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12153; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12154; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12155; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12156; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
12157; AVX1-NEXT:    retq
12158;
12159; AVX2-LABEL: ugt_18_v4i32:
12160; AVX2:       # %bb.0:
12161; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12162; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
12163; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12164; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12165; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
12166; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
12167; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12168; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12169; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12170; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12171; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12172; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12173; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12174; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12175; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12176; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12177; AVX2-NEXT:    retq
12178;
12179; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i32:
12180; AVX512VPOPCNTDQ:       # %bb.0:
12181; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12182; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
12183; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12184; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12185; AVX512VPOPCNTDQ-NEXT:    vzeroupper
12186; AVX512VPOPCNTDQ-NEXT:    retq
12187;
12188; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32:
12189; AVX512VPOPCNTDQVL:       # %bb.0:
12190; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
12191; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
12192; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12193; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12194; AVX512VPOPCNTDQVL-NEXT:    retq
12195;
12196; BITALG_NOVLX-LABEL: ugt_18_v4i32:
12197; BITALG_NOVLX:       # %bb.0:
12198; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12199; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
12200; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12201; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12202; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12203; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12204; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12205; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12206; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12207; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12208; BITALG_NOVLX-NEXT:    vzeroupper
12209; BITALG_NOVLX-NEXT:    retq
12210;
12211; BITALG-LABEL: ugt_18_v4i32:
12212; BITALG:       # %bb.0:
12213; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
12214; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12215; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12216; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12217; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12218; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12219; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12220; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
12221; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12222; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12223; BITALG-NEXT:    retq
12224  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12225  %3 = icmp ugt <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
12226  %4 = sext <4 x i1> %3 to <4 x i32>
12227  ret <4 x i32> %4
12228}
12229
12230define <4 x i32> @ult_19_v4i32(<4 x i32> %0) {
12231; SSE2-LABEL: ult_19_v4i32:
12232; SSE2:       # %bb.0:
12233; SSE2-NEXT:    movdqa %xmm0, %xmm1
12234; SSE2-NEXT:    psrlw $1, %xmm1
12235; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12236; SSE2-NEXT:    psubb %xmm1, %xmm0
12237; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12238; SSE2-NEXT:    movdqa %xmm0, %xmm2
12239; SSE2-NEXT:    pand %xmm1, %xmm2
12240; SSE2-NEXT:    psrlw $2, %xmm0
12241; SSE2-NEXT:    pand %xmm1, %xmm0
12242; SSE2-NEXT:    paddb %xmm2, %xmm0
12243; SSE2-NEXT:    movdqa %xmm0, %xmm1
12244; SSE2-NEXT:    psrlw $4, %xmm1
12245; SSE2-NEXT:    paddb %xmm0, %xmm1
12246; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12247; SSE2-NEXT:    pxor %xmm0, %xmm0
12248; SSE2-NEXT:    movdqa %xmm1, %xmm2
12249; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12250; SSE2-NEXT:    psadbw %xmm0, %xmm2
12251; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12252; SSE2-NEXT:    psadbw %xmm0, %xmm1
12253; SSE2-NEXT:    packuswb %xmm2, %xmm1
12254; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [19,19,19,19]
12255; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
12256; SSE2-NEXT:    retq
12257;
12258; SSE3-LABEL: ult_19_v4i32:
12259; SSE3:       # %bb.0:
12260; SSE3-NEXT:    movdqa %xmm0, %xmm1
12261; SSE3-NEXT:    psrlw $1, %xmm1
12262; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12263; SSE3-NEXT:    psubb %xmm1, %xmm0
12264; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12265; SSE3-NEXT:    movdqa %xmm0, %xmm2
12266; SSE3-NEXT:    pand %xmm1, %xmm2
12267; SSE3-NEXT:    psrlw $2, %xmm0
12268; SSE3-NEXT:    pand %xmm1, %xmm0
12269; SSE3-NEXT:    paddb %xmm2, %xmm0
12270; SSE3-NEXT:    movdqa %xmm0, %xmm1
12271; SSE3-NEXT:    psrlw $4, %xmm1
12272; SSE3-NEXT:    paddb %xmm0, %xmm1
12273; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12274; SSE3-NEXT:    pxor %xmm0, %xmm0
12275; SSE3-NEXT:    movdqa %xmm1, %xmm2
12276; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12277; SSE3-NEXT:    psadbw %xmm0, %xmm2
12278; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12279; SSE3-NEXT:    psadbw %xmm0, %xmm1
12280; SSE3-NEXT:    packuswb %xmm2, %xmm1
12281; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [19,19,19,19]
12282; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
12283; SSE3-NEXT:    retq
12284;
12285; SSSE3-LABEL: ult_19_v4i32:
12286; SSSE3:       # %bb.0:
12287; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12288; SSSE3-NEXT:    movdqa %xmm0, %xmm2
12289; SSSE3-NEXT:    pand %xmm1, %xmm2
12290; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12291; SSSE3-NEXT:    movdqa %xmm3, %xmm4
12292; SSSE3-NEXT:    pshufb %xmm2, %xmm4
12293; SSSE3-NEXT:    psrlw $4, %xmm0
12294; SSSE3-NEXT:    pand %xmm1, %xmm0
12295; SSSE3-NEXT:    pshufb %xmm0, %xmm3
12296; SSSE3-NEXT:    paddb %xmm4, %xmm3
12297; SSSE3-NEXT:    pxor %xmm0, %xmm0
12298; SSSE3-NEXT:    movdqa %xmm3, %xmm1
12299; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12300; SSSE3-NEXT:    psadbw %xmm0, %xmm1
12301; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12302; SSSE3-NEXT:    psadbw %xmm0, %xmm3
12303; SSSE3-NEXT:    packuswb %xmm1, %xmm3
12304; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [19,19,19,19]
12305; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
12306; SSSE3-NEXT:    retq
12307;
12308; SSE41-LABEL: ult_19_v4i32:
12309; SSE41:       # %bb.0:
12310; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12311; SSE41-NEXT:    movdqa %xmm0, %xmm2
12312; SSE41-NEXT:    pand %xmm1, %xmm2
12313; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12314; SSE41-NEXT:    movdqa %xmm3, %xmm4
12315; SSE41-NEXT:    pshufb %xmm2, %xmm4
12316; SSE41-NEXT:    psrlw $4, %xmm0
12317; SSE41-NEXT:    pand %xmm1, %xmm0
12318; SSE41-NEXT:    pshufb %xmm0, %xmm3
12319; SSE41-NEXT:    paddb %xmm4, %xmm3
12320; SSE41-NEXT:    pxor %xmm0, %xmm0
12321; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12322; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12323; SSE41-NEXT:    psadbw %xmm0, %xmm3
12324; SSE41-NEXT:    psadbw %xmm0, %xmm1
12325; SSE41-NEXT:    packuswb %xmm3, %xmm1
12326; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [19,19,19,19]
12327; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
12328; SSE41-NEXT:    retq
12329;
12330; AVX1-LABEL: ult_19_v4i32:
12331; AVX1:       # %bb.0:
12332; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12333; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
12334; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12335; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12336; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
12337; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
12338; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12339; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12340; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12341; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12342; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12343; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12344; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12345; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12346; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [19,19,19,19]
12347; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12348; AVX1-NEXT:    retq
12349;
12350; AVX2-LABEL: ult_19_v4i32:
12351; AVX2:       # %bb.0:
12352; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12353; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
12354; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12355; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12356; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
12357; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
12358; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12359; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12360; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12361; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12362; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12363; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12364; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12365; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12366; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12367; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12368; AVX2-NEXT:    retq
12369;
12370; AVX512VPOPCNTDQ-LABEL: ult_19_v4i32:
12371; AVX512VPOPCNTDQ:       # %bb.0:
12372; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12373; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
12374; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12375; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12376; AVX512VPOPCNTDQ-NEXT:    vzeroupper
12377; AVX512VPOPCNTDQ-NEXT:    retq
12378;
12379; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32:
12380; AVX512VPOPCNTDQVL:       # %bb.0:
12381; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
12382; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
12383; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12384; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12385; AVX512VPOPCNTDQVL-NEXT:    retq
12386;
12387; BITALG_NOVLX-LABEL: ult_19_v4i32:
12388; BITALG_NOVLX:       # %bb.0:
12389; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12390; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
12391; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12392; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12393; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12394; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12395; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12396; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12397; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12398; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12399; BITALG_NOVLX-NEXT:    vzeroupper
12400; BITALG_NOVLX-NEXT:    retq
12401;
12402; BITALG-LABEL: ult_19_v4i32:
12403; BITALG:       # %bb.0:
12404; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
12405; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12406; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12407; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12408; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12409; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12410; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12411; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
12412; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12413; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12414; BITALG-NEXT:    retq
12415  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12416  %3 = icmp ult <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
12417  %4 = sext <4 x i1> %3 to <4 x i32>
12418  ret <4 x i32> %4
12419}
12420
12421define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) {
12422; SSE2-LABEL: ugt_19_v4i32:
12423; SSE2:       # %bb.0:
12424; SSE2-NEXT:    movdqa %xmm0, %xmm1
12425; SSE2-NEXT:    psrlw $1, %xmm1
12426; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12427; SSE2-NEXT:    psubb %xmm1, %xmm0
12428; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12429; SSE2-NEXT:    movdqa %xmm0, %xmm2
12430; SSE2-NEXT:    pand %xmm1, %xmm2
12431; SSE2-NEXT:    psrlw $2, %xmm0
12432; SSE2-NEXT:    pand %xmm1, %xmm0
12433; SSE2-NEXT:    paddb %xmm2, %xmm0
12434; SSE2-NEXT:    movdqa %xmm0, %xmm1
12435; SSE2-NEXT:    psrlw $4, %xmm1
12436; SSE2-NEXT:    paddb %xmm0, %xmm1
12437; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12438; SSE2-NEXT:    pxor %xmm0, %xmm0
12439; SSE2-NEXT:    movdqa %xmm1, %xmm2
12440; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12441; SSE2-NEXT:    psadbw %xmm0, %xmm2
12442; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12443; SSE2-NEXT:    psadbw %xmm0, %xmm1
12444; SSE2-NEXT:    packuswb %xmm2, %xmm1
12445; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12446; SSE2-NEXT:    movdqa %xmm1, %xmm0
12447; SSE2-NEXT:    retq
12448;
12449; SSE3-LABEL: ugt_19_v4i32:
12450; SSE3:       # %bb.0:
12451; SSE3-NEXT:    movdqa %xmm0, %xmm1
12452; SSE3-NEXT:    psrlw $1, %xmm1
12453; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12454; SSE3-NEXT:    psubb %xmm1, %xmm0
12455; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12456; SSE3-NEXT:    movdqa %xmm0, %xmm2
12457; SSE3-NEXT:    pand %xmm1, %xmm2
12458; SSE3-NEXT:    psrlw $2, %xmm0
12459; SSE3-NEXT:    pand %xmm1, %xmm0
12460; SSE3-NEXT:    paddb %xmm2, %xmm0
12461; SSE3-NEXT:    movdqa %xmm0, %xmm1
12462; SSE3-NEXT:    psrlw $4, %xmm1
12463; SSE3-NEXT:    paddb %xmm0, %xmm1
12464; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12465; SSE3-NEXT:    pxor %xmm0, %xmm0
12466; SSE3-NEXT:    movdqa %xmm1, %xmm2
12467; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12468; SSE3-NEXT:    psadbw %xmm0, %xmm2
12469; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12470; SSE3-NEXT:    psadbw %xmm0, %xmm1
12471; SSE3-NEXT:    packuswb %xmm2, %xmm1
12472; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12473; SSE3-NEXT:    movdqa %xmm1, %xmm0
12474; SSE3-NEXT:    retq
12475;
12476; SSSE3-LABEL: ugt_19_v4i32:
12477; SSSE3:       # %bb.0:
12478; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12479; SSSE3-NEXT:    movdqa %xmm0, %xmm3
12480; SSSE3-NEXT:    pand %xmm2, %xmm3
12481; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12482; SSSE3-NEXT:    movdqa %xmm1, %xmm4
12483; SSSE3-NEXT:    pshufb %xmm3, %xmm4
12484; SSSE3-NEXT:    psrlw $4, %xmm0
12485; SSSE3-NEXT:    pand %xmm2, %xmm0
12486; SSSE3-NEXT:    pshufb %xmm0, %xmm1
12487; SSSE3-NEXT:    paddb %xmm4, %xmm1
12488; SSSE3-NEXT:    pxor %xmm0, %xmm0
12489; SSSE3-NEXT:    movdqa %xmm1, %xmm2
12490; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12491; SSSE3-NEXT:    psadbw %xmm0, %xmm2
12492; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12493; SSSE3-NEXT:    psadbw %xmm0, %xmm1
12494; SSSE3-NEXT:    packuswb %xmm2, %xmm1
12495; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12496; SSSE3-NEXT:    movdqa %xmm1, %xmm0
12497; SSSE3-NEXT:    retq
12498;
12499; SSE41-LABEL: ugt_19_v4i32:
12500; SSE41:       # %bb.0:
12501; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12502; SSE41-NEXT:    movdqa %xmm0, %xmm2
12503; SSE41-NEXT:    pand %xmm1, %xmm2
12504; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12505; SSE41-NEXT:    movdqa %xmm3, %xmm4
12506; SSE41-NEXT:    pshufb %xmm2, %xmm4
12507; SSE41-NEXT:    psrlw $4, %xmm0
12508; SSE41-NEXT:    pand %xmm1, %xmm0
12509; SSE41-NEXT:    pshufb %xmm0, %xmm3
12510; SSE41-NEXT:    paddb %xmm4, %xmm3
12511; SSE41-NEXT:    pxor %xmm1, %xmm1
12512; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12513; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12514; SSE41-NEXT:    psadbw %xmm1, %xmm3
12515; SSE41-NEXT:    psadbw %xmm1, %xmm0
12516; SSE41-NEXT:    packuswb %xmm3, %xmm0
12517; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
12518; SSE41-NEXT:    retq
12519;
12520; AVX1-LABEL: ugt_19_v4i32:
12521; AVX1:       # %bb.0:
12522; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12523; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
12524; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12525; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12526; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
12527; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
12528; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12529; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12530; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12531; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12532; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12533; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12534; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12535; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12536; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
12537; AVX1-NEXT:    retq
12538;
12539; AVX2-LABEL: ugt_19_v4i32:
12540; AVX2:       # %bb.0:
12541; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12542; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
12543; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12544; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12545; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
12546; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
12547; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12548; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12549; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12550; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12551; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12552; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12553; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12554; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12555; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12556; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12557; AVX2-NEXT:    retq
12558;
12559; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i32:
12560; AVX512VPOPCNTDQ:       # %bb.0:
12561; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12562; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
12563; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12564; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12565; AVX512VPOPCNTDQ-NEXT:    vzeroupper
12566; AVX512VPOPCNTDQ-NEXT:    retq
12567;
12568; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32:
12569; AVX512VPOPCNTDQVL:       # %bb.0:
12570; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
12571; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
12572; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12573; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12574; AVX512VPOPCNTDQVL-NEXT:    retq
12575;
12576; BITALG_NOVLX-LABEL: ugt_19_v4i32:
12577; BITALG_NOVLX:       # %bb.0:
12578; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12579; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
12580; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12581; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12582; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12583; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12584; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12585; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12586; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12587; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12588; BITALG_NOVLX-NEXT:    vzeroupper
12589; BITALG_NOVLX-NEXT:    retq
12590;
12591; BITALG-LABEL: ugt_19_v4i32:
12592; BITALG:       # %bb.0:
12593; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
12594; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12595; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12596; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12597; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12598; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12599; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12600; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
12601; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12602; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12603; BITALG-NEXT:    retq
12604  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12605  %3 = icmp ugt <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
12606  %4 = sext <4 x i1> %3 to <4 x i32>
12607  ret <4 x i32> %4
12608}
12609
12610define <4 x i32> @ult_20_v4i32(<4 x i32> %0) {
12611; SSE2-LABEL: ult_20_v4i32:
12612; SSE2:       # %bb.0:
12613; SSE2-NEXT:    movdqa %xmm0, %xmm1
12614; SSE2-NEXT:    psrlw $1, %xmm1
12615; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12616; SSE2-NEXT:    psubb %xmm1, %xmm0
12617; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12618; SSE2-NEXT:    movdqa %xmm0, %xmm2
12619; SSE2-NEXT:    pand %xmm1, %xmm2
12620; SSE2-NEXT:    psrlw $2, %xmm0
12621; SSE2-NEXT:    pand %xmm1, %xmm0
12622; SSE2-NEXT:    paddb %xmm2, %xmm0
12623; SSE2-NEXT:    movdqa %xmm0, %xmm1
12624; SSE2-NEXT:    psrlw $4, %xmm1
12625; SSE2-NEXT:    paddb %xmm0, %xmm1
12626; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12627; SSE2-NEXT:    pxor %xmm0, %xmm0
12628; SSE2-NEXT:    movdqa %xmm1, %xmm2
12629; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12630; SSE2-NEXT:    psadbw %xmm0, %xmm2
12631; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12632; SSE2-NEXT:    psadbw %xmm0, %xmm1
12633; SSE2-NEXT:    packuswb %xmm2, %xmm1
12634; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [20,20,20,20]
12635; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
12636; SSE2-NEXT:    retq
12637;
12638; SSE3-LABEL: ult_20_v4i32:
12639; SSE3:       # %bb.0:
12640; SSE3-NEXT:    movdqa %xmm0, %xmm1
12641; SSE3-NEXT:    psrlw $1, %xmm1
12642; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12643; SSE3-NEXT:    psubb %xmm1, %xmm0
12644; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12645; SSE3-NEXT:    movdqa %xmm0, %xmm2
12646; SSE3-NEXT:    pand %xmm1, %xmm2
12647; SSE3-NEXT:    psrlw $2, %xmm0
12648; SSE3-NEXT:    pand %xmm1, %xmm0
12649; SSE3-NEXT:    paddb %xmm2, %xmm0
12650; SSE3-NEXT:    movdqa %xmm0, %xmm1
12651; SSE3-NEXT:    psrlw $4, %xmm1
12652; SSE3-NEXT:    paddb %xmm0, %xmm1
12653; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12654; SSE3-NEXT:    pxor %xmm0, %xmm0
12655; SSE3-NEXT:    movdqa %xmm1, %xmm2
12656; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12657; SSE3-NEXT:    psadbw %xmm0, %xmm2
12658; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12659; SSE3-NEXT:    psadbw %xmm0, %xmm1
12660; SSE3-NEXT:    packuswb %xmm2, %xmm1
12661; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [20,20,20,20]
12662; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
12663; SSE3-NEXT:    retq
12664;
12665; SSSE3-LABEL: ult_20_v4i32:
12666; SSSE3:       # %bb.0:
12667; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12668; SSSE3-NEXT:    movdqa %xmm0, %xmm2
12669; SSSE3-NEXT:    pand %xmm1, %xmm2
12670; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12671; SSSE3-NEXT:    movdqa %xmm3, %xmm4
12672; SSSE3-NEXT:    pshufb %xmm2, %xmm4
12673; SSSE3-NEXT:    psrlw $4, %xmm0
12674; SSSE3-NEXT:    pand %xmm1, %xmm0
12675; SSSE3-NEXT:    pshufb %xmm0, %xmm3
12676; SSSE3-NEXT:    paddb %xmm4, %xmm3
12677; SSSE3-NEXT:    pxor %xmm0, %xmm0
12678; SSSE3-NEXT:    movdqa %xmm3, %xmm1
12679; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12680; SSSE3-NEXT:    psadbw %xmm0, %xmm1
12681; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12682; SSSE3-NEXT:    psadbw %xmm0, %xmm3
12683; SSSE3-NEXT:    packuswb %xmm1, %xmm3
12684; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [20,20,20,20]
12685; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
12686; SSSE3-NEXT:    retq
12687;
12688; SSE41-LABEL: ult_20_v4i32:
12689; SSE41:       # %bb.0:
12690; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12691; SSE41-NEXT:    movdqa %xmm0, %xmm2
12692; SSE41-NEXT:    pand %xmm1, %xmm2
12693; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12694; SSE41-NEXT:    movdqa %xmm3, %xmm4
12695; SSE41-NEXT:    pshufb %xmm2, %xmm4
12696; SSE41-NEXT:    psrlw $4, %xmm0
12697; SSE41-NEXT:    pand %xmm1, %xmm0
12698; SSE41-NEXT:    pshufb %xmm0, %xmm3
12699; SSE41-NEXT:    paddb %xmm4, %xmm3
12700; SSE41-NEXT:    pxor %xmm0, %xmm0
12701; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12702; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12703; SSE41-NEXT:    psadbw %xmm0, %xmm3
12704; SSE41-NEXT:    psadbw %xmm0, %xmm1
12705; SSE41-NEXT:    packuswb %xmm3, %xmm1
12706; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [20,20,20,20]
12707; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
12708; SSE41-NEXT:    retq
12709;
12710; AVX1-LABEL: ult_20_v4i32:
12711; AVX1:       # %bb.0:
12712; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12713; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
12714; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12715; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12716; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
12717; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
12718; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12719; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12720; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12721; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12722; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12723; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12724; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12725; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12726; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [20,20,20,20]
12727; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12728; AVX1-NEXT:    retq
12729;
12730; AVX2-LABEL: ult_20_v4i32:
12731; AVX2:       # %bb.0:
12732; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12733; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
12734; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12735; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12736; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
12737; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
12738; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12739; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12740; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12741; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12742; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12743; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12744; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12745; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12746; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12747; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12748; AVX2-NEXT:    retq
12749;
12750; AVX512VPOPCNTDQ-LABEL: ult_20_v4i32:
12751; AVX512VPOPCNTDQ:       # %bb.0:
12752; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12753; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
12754; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12755; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12756; AVX512VPOPCNTDQ-NEXT:    vzeroupper
12757; AVX512VPOPCNTDQ-NEXT:    retq
12758;
12759; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32:
12760; AVX512VPOPCNTDQVL:       # %bb.0:
12761; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
12762; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
12763; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12764; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12765; AVX512VPOPCNTDQVL-NEXT:    retq
12766;
12767; BITALG_NOVLX-LABEL: ult_20_v4i32:
12768; BITALG_NOVLX:       # %bb.0:
12769; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12770; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
12771; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12772; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12773; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12774; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12775; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12776; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12777; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12778; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
12779; BITALG_NOVLX-NEXT:    vzeroupper
12780; BITALG_NOVLX-NEXT:    retq
12781;
12782; BITALG-LABEL: ult_20_v4i32:
12783; BITALG:       # %bb.0:
12784; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
12785; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12786; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12787; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12788; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12789; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12790; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12791; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
12792; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12793; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12794; BITALG-NEXT:    retq
12795  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12796  %3 = icmp ult <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
12797  %4 = sext <4 x i1> %3 to <4 x i32>
12798  ret <4 x i32> %4
12799}
12800
12801define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) {
12802; SSE2-LABEL: ugt_20_v4i32:
12803; SSE2:       # %bb.0:
12804; SSE2-NEXT:    movdqa %xmm0, %xmm1
12805; SSE2-NEXT:    psrlw $1, %xmm1
12806; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12807; SSE2-NEXT:    psubb %xmm1, %xmm0
12808; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12809; SSE2-NEXT:    movdqa %xmm0, %xmm2
12810; SSE2-NEXT:    pand %xmm1, %xmm2
12811; SSE2-NEXT:    psrlw $2, %xmm0
12812; SSE2-NEXT:    pand %xmm1, %xmm0
12813; SSE2-NEXT:    paddb %xmm2, %xmm0
12814; SSE2-NEXT:    movdqa %xmm0, %xmm1
12815; SSE2-NEXT:    psrlw $4, %xmm1
12816; SSE2-NEXT:    paddb %xmm0, %xmm1
12817; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12818; SSE2-NEXT:    pxor %xmm0, %xmm0
12819; SSE2-NEXT:    movdqa %xmm1, %xmm2
12820; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12821; SSE2-NEXT:    psadbw %xmm0, %xmm2
12822; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12823; SSE2-NEXT:    psadbw %xmm0, %xmm1
12824; SSE2-NEXT:    packuswb %xmm2, %xmm1
12825; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12826; SSE2-NEXT:    movdqa %xmm1, %xmm0
12827; SSE2-NEXT:    retq
12828;
12829; SSE3-LABEL: ugt_20_v4i32:
12830; SSE3:       # %bb.0:
12831; SSE3-NEXT:    movdqa %xmm0, %xmm1
12832; SSE3-NEXT:    psrlw $1, %xmm1
12833; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12834; SSE3-NEXT:    psubb %xmm1, %xmm0
12835; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12836; SSE3-NEXT:    movdqa %xmm0, %xmm2
12837; SSE3-NEXT:    pand %xmm1, %xmm2
12838; SSE3-NEXT:    psrlw $2, %xmm0
12839; SSE3-NEXT:    pand %xmm1, %xmm0
12840; SSE3-NEXT:    paddb %xmm2, %xmm0
12841; SSE3-NEXT:    movdqa %xmm0, %xmm1
12842; SSE3-NEXT:    psrlw $4, %xmm1
12843; SSE3-NEXT:    paddb %xmm0, %xmm1
12844; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
12845; SSE3-NEXT:    pxor %xmm0, %xmm0
12846; SSE3-NEXT:    movdqa %xmm1, %xmm2
12847; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12848; SSE3-NEXT:    psadbw %xmm0, %xmm2
12849; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12850; SSE3-NEXT:    psadbw %xmm0, %xmm1
12851; SSE3-NEXT:    packuswb %xmm2, %xmm1
12852; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12853; SSE3-NEXT:    movdqa %xmm1, %xmm0
12854; SSE3-NEXT:    retq
12855;
12856; SSSE3-LABEL: ugt_20_v4i32:
12857; SSSE3:       # %bb.0:
12858; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12859; SSSE3-NEXT:    movdqa %xmm0, %xmm3
12860; SSSE3-NEXT:    pand %xmm2, %xmm3
12861; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12862; SSSE3-NEXT:    movdqa %xmm1, %xmm4
12863; SSSE3-NEXT:    pshufb %xmm3, %xmm4
12864; SSSE3-NEXT:    psrlw $4, %xmm0
12865; SSSE3-NEXT:    pand %xmm2, %xmm0
12866; SSSE3-NEXT:    pshufb %xmm0, %xmm1
12867; SSSE3-NEXT:    paddb %xmm4, %xmm1
12868; SSSE3-NEXT:    pxor %xmm0, %xmm0
12869; SSSE3-NEXT:    movdqa %xmm1, %xmm2
12870; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12871; SSSE3-NEXT:    psadbw %xmm0, %xmm2
12872; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12873; SSSE3-NEXT:    psadbw %xmm0, %xmm1
12874; SSSE3-NEXT:    packuswb %xmm2, %xmm1
12875; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
12876; SSSE3-NEXT:    movdqa %xmm1, %xmm0
12877; SSSE3-NEXT:    retq
12878;
12879; SSE41-LABEL: ugt_20_v4i32:
12880; SSE41:       # %bb.0:
12881; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12882; SSE41-NEXT:    movdqa %xmm0, %xmm2
12883; SSE41-NEXT:    pand %xmm1, %xmm2
12884; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12885; SSE41-NEXT:    movdqa %xmm3, %xmm4
12886; SSE41-NEXT:    pshufb %xmm2, %xmm4
12887; SSE41-NEXT:    psrlw $4, %xmm0
12888; SSE41-NEXT:    pand %xmm1, %xmm0
12889; SSE41-NEXT:    pshufb %xmm0, %xmm3
12890; SSE41-NEXT:    paddb %xmm4, %xmm3
12891; SSE41-NEXT:    pxor %xmm1, %xmm1
12892; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12893; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12894; SSE41-NEXT:    psadbw %xmm1, %xmm3
12895; SSE41-NEXT:    psadbw %xmm1, %xmm0
12896; SSE41-NEXT:    packuswb %xmm3, %xmm0
12897; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
12898; SSE41-NEXT:    retq
12899;
12900; AVX1-LABEL: ugt_20_v4i32:
12901; AVX1:       # %bb.0:
12902; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12903; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
12904; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12905; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12906; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
12907; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
12908; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12909; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12910; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12911; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12912; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12913; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12914; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12915; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12916; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
12917; AVX1-NEXT:    retq
12918;
12919; AVX2-LABEL: ugt_20_v4i32:
12920; AVX2:       # %bb.0:
12921; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12922; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
12923; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12924; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
12925; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
12926; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
12927; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
12928; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
12929; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12930; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12931; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12932; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12933; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12934; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12935; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12936; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12937; AVX2-NEXT:    retq
12938;
12939; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i32:
12940; AVX512VPOPCNTDQ:       # %bb.0:
12941; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12942; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
12943; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12944; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12945; AVX512VPOPCNTDQ-NEXT:    vzeroupper
12946; AVX512VPOPCNTDQ-NEXT:    retq
12947;
12948; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32:
12949; AVX512VPOPCNTDQVL:       # %bb.0:
12950; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
12951; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
12952; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12953; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12954; AVX512VPOPCNTDQVL-NEXT:    retq
12955;
12956; BITALG_NOVLX-LABEL: ugt_20_v4i32:
12957; BITALG_NOVLX:       # %bb.0:
12958; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12959; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
12960; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12961; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12962; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12963; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12964; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12965; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12966; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12967; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
12968; BITALG_NOVLX-NEXT:    vzeroupper
12969; BITALG_NOVLX-NEXT:    retq
12970;
12971; BITALG-LABEL: ugt_20_v4i32:
12972; BITALG:       # %bb.0:
12973; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
12974; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12975; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12976; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
12977; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12978; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
12979; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
12980; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
12981; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
12982; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12983; BITALG-NEXT:    retq
12984  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12985  %3 = icmp ugt <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
12986  %4 = sext <4 x i1> %3 to <4 x i32>
12987  ret <4 x i32> %4
12988}
12989
12990define <4 x i32> @ult_21_v4i32(<4 x i32> %0) {
12991; SSE2-LABEL: ult_21_v4i32:
12992; SSE2:       # %bb.0:
12993; SSE2-NEXT:    movdqa %xmm0, %xmm1
12994; SSE2-NEXT:    psrlw $1, %xmm1
12995; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
12996; SSE2-NEXT:    psubb %xmm1, %xmm0
12997; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12998; SSE2-NEXT:    movdqa %xmm0, %xmm2
12999; SSE2-NEXT:    pand %xmm1, %xmm2
13000; SSE2-NEXT:    psrlw $2, %xmm0
13001; SSE2-NEXT:    pand %xmm1, %xmm0
13002; SSE2-NEXT:    paddb %xmm2, %xmm0
13003; SSE2-NEXT:    movdqa %xmm0, %xmm1
13004; SSE2-NEXT:    psrlw $4, %xmm1
13005; SSE2-NEXT:    paddb %xmm0, %xmm1
13006; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13007; SSE2-NEXT:    pxor %xmm0, %xmm0
13008; SSE2-NEXT:    movdqa %xmm1, %xmm2
13009; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13010; SSE2-NEXT:    psadbw %xmm0, %xmm2
13011; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13012; SSE2-NEXT:    psadbw %xmm0, %xmm1
13013; SSE2-NEXT:    packuswb %xmm2, %xmm1
13014; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [21,21,21,21]
13015; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
13016; SSE2-NEXT:    retq
13017;
13018; SSE3-LABEL: ult_21_v4i32:
13019; SSE3:       # %bb.0:
13020; SSE3-NEXT:    movdqa %xmm0, %xmm1
13021; SSE3-NEXT:    psrlw $1, %xmm1
13022; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13023; SSE3-NEXT:    psubb %xmm1, %xmm0
13024; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13025; SSE3-NEXT:    movdqa %xmm0, %xmm2
13026; SSE3-NEXT:    pand %xmm1, %xmm2
13027; SSE3-NEXT:    psrlw $2, %xmm0
13028; SSE3-NEXT:    pand %xmm1, %xmm0
13029; SSE3-NEXT:    paddb %xmm2, %xmm0
13030; SSE3-NEXT:    movdqa %xmm0, %xmm1
13031; SSE3-NEXT:    psrlw $4, %xmm1
13032; SSE3-NEXT:    paddb %xmm0, %xmm1
13033; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13034; SSE3-NEXT:    pxor %xmm0, %xmm0
13035; SSE3-NEXT:    movdqa %xmm1, %xmm2
13036; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13037; SSE3-NEXT:    psadbw %xmm0, %xmm2
13038; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13039; SSE3-NEXT:    psadbw %xmm0, %xmm1
13040; SSE3-NEXT:    packuswb %xmm2, %xmm1
13041; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [21,21,21,21]
13042; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
13043; SSE3-NEXT:    retq
13044;
13045; SSSE3-LABEL: ult_21_v4i32:
13046; SSSE3:       # %bb.0:
13047; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13048; SSSE3-NEXT:    movdqa %xmm0, %xmm2
13049; SSSE3-NEXT:    pand %xmm1, %xmm2
13050; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13051; SSSE3-NEXT:    movdqa %xmm3, %xmm4
13052; SSSE3-NEXT:    pshufb %xmm2, %xmm4
13053; SSSE3-NEXT:    psrlw $4, %xmm0
13054; SSSE3-NEXT:    pand %xmm1, %xmm0
13055; SSSE3-NEXT:    pshufb %xmm0, %xmm3
13056; SSSE3-NEXT:    paddb %xmm4, %xmm3
13057; SSSE3-NEXT:    pxor %xmm0, %xmm0
13058; SSSE3-NEXT:    movdqa %xmm3, %xmm1
13059; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13060; SSSE3-NEXT:    psadbw %xmm0, %xmm1
13061; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13062; SSSE3-NEXT:    psadbw %xmm0, %xmm3
13063; SSSE3-NEXT:    packuswb %xmm1, %xmm3
13064; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [21,21,21,21]
13065; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
13066; SSSE3-NEXT:    retq
13067;
13068; SSE41-LABEL: ult_21_v4i32:
13069; SSE41:       # %bb.0:
13070; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13071; SSE41-NEXT:    movdqa %xmm0, %xmm2
13072; SSE41-NEXT:    pand %xmm1, %xmm2
13073; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13074; SSE41-NEXT:    movdqa %xmm3, %xmm4
13075; SSE41-NEXT:    pshufb %xmm2, %xmm4
13076; SSE41-NEXT:    psrlw $4, %xmm0
13077; SSE41-NEXT:    pand %xmm1, %xmm0
13078; SSE41-NEXT:    pshufb %xmm0, %xmm3
13079; SSE41-NEXT:    paddb %xmm4, %xmm3
13080; SSE41-NEXT:    pxor %xmm0, %xmm0
13081; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13082; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13083; SSE41-NEXT:    psadbw %xmm0, %xmm3
13084; SSE41-NEXT:    psadbw %xmm0, %xmm1
13085; SSE41-NEXT:    packuswb %xmm3, %xmm1
13086; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [21,21,21,21]
13087; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
13088; SSE41-NEXT:    retq
13089;
13090; AVX1-LABEL: ult_21_v4i32:
13091; AVX1:       # %bb.0:
13092; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13093; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
13094; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13095; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13096; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
13097; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
13098; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13099; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13100; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13101; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13102; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13103; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13104; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13105; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13106; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [21,21,21,21]
13107; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13108; AVX1-NEXT:    retq
13109;
13110; AVX2-LABEL: ult_21_v4i32:
13111; AVX2:       # %bb.0:
13112; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13113; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
13114; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13115; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13116; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
13117; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
13118; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13119; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13120; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13121; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13122; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13123; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13124; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13125; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13126; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13127; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13128; AVX2-NEXT:    retq
13129;
13130; AVX512VPOPCNTDQ-LABEL: ult_21_v4i32:
13131; AVX512VPOPCNTDQ:       # %bb.0:
13132; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13133; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
13134; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13135; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13136; AVX512VPOPCNTDQ-NEXT:    vzeroupper
13137; AVX512VPOPCNTDQ-NEXT:    retq
13138;
13139; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32:
13140; AVX512VPOPCNTDQVL:       # %bb.0:
13141; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
13142; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
13143; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13144; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13145; AVX512VPOPCNTDQVL-NEXT:    retq
13146;
13147; BITALG_NOVLX-LABEL: ult_21_v4i32:
13148; BITALG_NOVLX:       # %bb.0:
13149; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13150; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
13151; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13152; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13153; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13154; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13155; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13156; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13157; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13158; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13159; BITALG_NOVLX-NEXT:    vzeroupper
13160; BITALG_NOVLX-NEXT:    retq
13161;
13162; BITALG-LABEL: ult_21_v4i32:
13163; BITALG:       # %bb.0:
13164; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
13165; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13166; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13167; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13168; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13169; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13170; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13171; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
13172; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13173; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13174; BITALG-NEXT:    retq
13175  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13176  %3 = icmp ult <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
13177  %4 = sext <4 x i1> %3 to <4 x i32>
13178  ret <4 x i32> %4
13179}
13180
13181define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) {
13182; SSE2-LABEL: ugt_21_v4i32:
13183; SSE2:       # %bb.0:
13184; SSE2-NEXT:    movdqa %xmm0, %xmm1
13185; SSE2-NEXT:    psrlw $1, %xmm1
13186; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13187; SSE2-NEXT:    psubb %xmm1, %xmm0
13188; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13189; SSE2-NEXT:    movdqa %xmm0, %xmm2
13190; SSE2-NEXT:    pand %xmm1, %xmm2
13191; SSE2-NEXT:    psrlw $2, %xmm0
13192; SSE2-NEXT:    pand %xmm1, %xmm0
13193; SSE2-NEXT:    paddb %xmm2, %xmm0
13194; SSE2-NEXT:    movdqa %xmm0, %xmm1
13195; SSE2-NEXT:    psrlw $4, %xmm1
13196; SSE2-NEXT:    paddb %xmm0, %xmm1
13197; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13198; SSE2-NEXT:    pxor %xmm0, %xmm0
13199; SSE2-NEXT:    movdqa %xmm1, %xmm2
13200; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13201; SSE2-NEXT:    psadbw %xmm0, %xmm2
13202; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13203; SSE2-NEXT:    psadbw %xmm0, %xmm1
13204; SSE2-NEXT:    packuswb %xmm2, %xmm1
13205; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13206; SSE2-NEXT:    movdqa %xmm1, %xmm0
13207; SSE2-NEXT:    retq
13208;
13209; SSE3-LABEL: ugt_21_v4i32:
13210; SSE3:       # %bb.0:
13211; SSE3-NEXT:    movdqa %xmm0, %xmm1
13212; SSE3-NEXT:    psrlw $1, %xmm1
13213; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13214; SSE3-NEXT:    psubb %xmm1, %xmm0
13215; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13216; SSE3-NEXT:    movdqa %xmm0, %xmm2
13217; SSE3-NEXT:    pand %xmm1, %xmm2
13218; SSE3-NEXT:    psrlw $2, %xmm0
13219; SSE3-NEXT:    pand %xmm1, %xmm0
13220; SSE3-NEXT:    paddb %xmm2, %xmm0
13221; SSE3-NEXT:    movdqa %xmm0, %xmm1
13222; SSE3-NEXT:    psrlw $4, %xmm1
13223; SSE3-NEXT:    paddb %xmm0, %xmm1
13224; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13225; SSE3-NEXT:    pxor %xmm0, %xmm0
13226; SSE3-NEXT:    movdqa %xmm1, %xmm2
13227; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13228; SSE3-NEXT:    psadbw %xmm0, %xmm2
13229; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13230; SSE3-NEXT:    psadbw %xmm0, %xmm1
13231; SSE3-NEXT:    packuswb %xmm2, %xmm1
13232; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13233; SSE3-NEXT:    movdqa %xmm1, %xmm0
13234; SSE3-NEXT:    retq
13235;
13236; SSSE3-LABEL: ugt_21_v4i32:
13237; SSSE3:       # %bb.0:
13238; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13239; SSSE3-NEXT:    movdqa %xmm0, %xmm3
13240; SSSE3-NEXT:    pand %xmm2, %xmm3
13241; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13242; SSSE3-NEXT:    movdqa %xmm1, %xmm4
13243; SSSE3-NEXT:    pshufb %xmm3, %xmm4
13244; SSSE3-NEXT:    psrlw $4, %xmm0
13245; SSSE3-NEXT:    pand %xmm2, %xmm0
13246; SSSE3-NEXT:    pshufb %xmm0, %xmm1
13247; SSSE3-NEXT:    paddb %xmm4, %xmm1
13248; SSSE3-NEXT:    pxor %xmm0, %xmm0
13249; SSSE3-NEXT:    movdqa %xmm1, %xmm2
13250; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13251; SSSE3-NEXT:    psadbw %xmm0, %xmm2
13252; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13253; SSSE3-NEXT:    psadbw %xmm0, %xmm1
13254; SSSE3-NEXT:    packuswb %xmm2, %xmm1
13255; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13256; SSSE3-NEXT:    movdqa %xmm1, %xmm0
13257; SSSE3-NEXT:    retq
13258;
13259; SSE41-LABEL: ugt_21_v4i32:
13260; SSE41:       # %bb.0:
13261; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13262; SSE41-NEXT:    movdqa %xmm0, %xmm2
13263; SSE41-NEXT:    pand %xmm1, %xmm2
13264; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13265; SSE41-NEXT:    movdqa %xmm3, %xmm4
13266; SSE41-NEXT:    pshufb %xmm2, %xmm4
13267; SSE41-NEXT:    psrlw $4, %xmm0
13268; SSE41-NEXT:    pand %xmm1, %xmm0
13269; SSE41-NEXT:    pshufb %xmm0, %xmm3
13270; SSE41-NEXT:    paddb %xmm4, %xmm3
13271; SSE41-NEXT:    pxor %xmm1, %xmm1
13272; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13273; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13274; SSE41-NEXT:    psadbw %xmm1, %xmm3
13275; SSE41-NEXT:    psadbw %xmm1, %xmm0
13276; SSE41-NEXT:    packuswb %xmm3, %xmm0
13277; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
13278; SSE41-NEXT:    retq
13279;
13280; AVX1-LABEL: ugt_21_v4i32:
13281; AVX1:       # %bb.0:
13282; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13283; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
13284; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13285; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13286; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
13287; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
13288; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13289; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13290; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13291; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13292; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13293; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13294; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13295; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13296; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
13297; AVX1-NEXT:    retq
13298;
13299; AVX2-LABEL: ugt_21_v4i32:
13300; AVX2:       # %bb.0:
13301; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13302; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
13303; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13304; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13305; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
13306; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
13307; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13308; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13309; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13310; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13311; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13312; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13313; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13314; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13315; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13316; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
13317; AVX2-NEXT:    retq
13318;
13319; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i32:
13320; AVX512VPOPCNTDQ:       # %bb.0:
13321; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13322; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
13323; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13324; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
13325; AVX512VPOPCNTDQ-NEXT:    vzeroupper
13326; AVX512VPOPCNTDQ-NEXT:    retq
13327;
13328; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32:
13329; AVX512VPOPCNTDQVL:       # %bb.0:
13330; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
13331; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
13332; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13333; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13334; AVX512VPOPCNTDQVL-NEXT:    retq
13335;
13336; BITALG_NOVLX-LABEL: ugt_21_v4i32:
13337; BITALG_NOVLX:       # %bb.0:
13338; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13339; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
13340; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13341; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13342; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13343; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13344; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13345; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13346; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13347; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
13348; BITALG_NOVLX-NEXT:    vzeroupper
13349; BITALG_NOVLX-NEXT:    retq
13350;
13351; BITALG-LABEL: ugt_21_v4i32:
13352; BITALG:       # %bb.0:
13353; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
13354; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13355; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13356; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13357; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13358; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13359; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13360; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
13361; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13362; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13363; BITALG-NEXT:    retq
13364  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13365  %3 = icmp ugt <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
13366  %4 = sext <4 x i1> %3 to <4 x i32>
13367  ret <4 x i32> %4
13368}
13369
13370define <4 x i32> @ult_22_v4i32(<4 x i32> %0) {
13371; SSE2-LABEL: ult_22_v4i32:
13372; SSE2:       # %bb.0:
13373; SSE2-NEXT:    movdqa %xmm0, %xmm1
13374; SSE2-NEXT:    psrlw $1, %xmm1
13375; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13376; SSE2-NEXT:    psubb %xmm1, %xmm0
13377; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13378; SSE2-NEXT:    movdqa %xmm0, %xmm2
13379; SSE2-NEXT:    pand %xmm1, %xmm2
13380; SSE2-NEXT:    psrlw $2, %xmm0
13381; SSE2-NEXT:    pand %xmm1, %xmm0
13382; SSE2-NEXT:    paddb %xmm2, %xmm0
13383; SSE2-NEXT:    movdqa %xmm0, %xmm1
13384; SSE2-NEXT:    psrlw $4, %xmm1
13385; SSE2-NEXT:    paddb %xmm0, %xmm1
13386; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13387; SSE2-NEXT:    pxor %xmm0, %xmm0
13388; SSE2-NEXT:    movdqa %xmm1, %xmm2
13389; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13390; SSE2-NEXT:    psadbw %xmm0, %xmm2
13391; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13392; SSE2-NEXT:    psadbw %xmm0, %xmm1
13393; SSE2-NEXT:    packuswb %xmm2, %xmm1
13394; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [22,22,22,22]
13395; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
13396; SSE2-NEXT:    retq
13397;
13398; SSE3-LABEL: ult_22_v4i32:
13399; SSE3:       # %bb.0:
13400; SSE3-NEXT:    movdqa %xmm0, %xmm1
13401; SSE3-NEXT:    psrlw $1, %xmm1
13402; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13403; SSE3-NEXT:    psubb %xmm1, %xmm0
13404; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13405; SSE3-NEXT:    movdqa %xmm0, %xmm2
13406; SSE3-NEXT:    pand %xmm1, %xmm2
13407; SSE3-NEXT:    psrlw $2, %xmm0
13408; SSE3-NEXT:    pand %xmm1, %xmm0
13409; SSE3-NEXT:    paddb %xmm2, %xmm0
13410; SSE3-NEXT:    movdqa %xmm0, %xmm1
13411; SSE3-NEXT:    psrlw $4, %xmm1
13412; SSE3-NEXT:    paddb %xmm0, %xmm1
13413; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13414; SSE3-NEXT:    pxor %xmm0, %xmm0
13415; SSE3-NEXT:    movdqa %xmm1, %xmm2
13416; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13417; SSE3-NEXT:    psadbw %xmm0, %xmm2
13418; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13419; SSE3-NEXT:    psadbw %xmm0, %xmm1
13420; SSE3-NEXT:    packuswb %xmm2, %xmm1
13421; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [22,22,22,22]
13422; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
13423; SSE3-NEXT:    retq
13424;
13425; SSSE3-LABEL: ult_22_v4i32:
13426; SSSE3:       # %bb.0:
13427; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13428; SSSE3-NEXT:    movdqa %xmm0, %xmm2
13429; SSSE3-NEXT:    pand %xmm1, %xmm2
13430; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13431; SSSE3-NEXT:    movdqa %xmm3, %xmm4
13432; SSSE3-NEXT:    pshufb %xmm2, %xmm4
13433; SSSE3-NEXT:    psrlw $4, %xmm0
13434; SSSE3-NEXT:    pand %xmm1, %xmm0
13435; SSSE3-NEXT:    pshufb %xmm0, %xmm3
13436; SSSE3-NEXT:    paddb %xmm4, %xmm3
13437; SSSE3-NEXT:    pxor %xmm0, %xmm0
13438; SSSE3-NEXT:    movdqa %xmm3, %xmm1
13439; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13440; SSSE3-NEXT:    psadbw %xmm0, %xmm1
13441; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13442; SSSE3-NEXT:    psadbw %xmm0, %xmm3
13443; SSSE3-NEXT:    packuswb %xmm1, %xmm3
13444; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [22,22,22,22]
13445; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
13446; SSSE3-NEXT:    retq
13447;
13448; SSE41-LABEL: ult_22_v4i32:
13449; SSE41:       # %bb.0:
13450; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13451; SSE41-NEXT:    movdqa %xmm0, %xmm2
13452; SSE41-NEXT:    pand %xmm1, %xmm2
13453; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13454; SSE41-NEXT:    movdqa %xmm3, %xmm4
13455; SSE41-NEXT:    pshufb %xmm2, %xmm4
13456; SSE41-NEXT:    psrlw $4, %xmm0
13457; SSE41-NEXT:    pand %xmm1, %xmm0
13458; SSE41-NEXT:    pshufb %xmm0, %xmm3
13459; SSE41-NEXT:    paddb %xmm4, %xmm3
13460; SSE41-NEXT:    pxor %xmm0, %xmm0
13461; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13462; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13463; SSE41-NEXT:    psadbw %xmm0, %xmm3
13464; SSE41-NEXT:    psadbw %xmm0, %xmm1
13465; SSE41-NEXT:    packuswb %xmm3, %xmm1
13466; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [22,22,22,22]
13467; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
13468; SSE41-NEXT:    retq
13469;
13470; AVX1-LABEL: ult_22_v4i32:
13471; AVX1:       # %bb.0:
13472; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13473; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
13474; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13475; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13476; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
13477; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
13478; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13479; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13480; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13481; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13482; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13483; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13484; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13485; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13486; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [22,22,22,22]
13487; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13488; AVX1-NEXT:    retq
13489;
13490; AVX2-LABEL: ult_22_v4i32:
13491; AVX2:       # %bb.0:
13492; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13493; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
13494; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13495; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13496; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
13497; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
13498; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13499; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13500; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13501; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13502; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13503; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13504; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13505; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13506; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13507; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13508; AVX2-NEXT:    retq
13509;
13510; AVX512VPOPCNTDQ-LABEL: ult_22_v4i32:
13511; AVX512VPOPCNTDQ:       # %bb.0:
13512; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13513; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
13514; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13515; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13516; AVX512VPOPCNTDQ-NEXT:    vzeroupper
13517; AVX512VPOPCNTDQ-NEXT:    retq
13518;
13519; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32:
13520; AVX512VPOPCNTDQVL:       # %bb.0:
13521; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
13522; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
13523; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13524; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13525; AVX512VPOPCNTDQVL-NEXT:    retq
13526;
13527; BITALG_NOVLX-LABEL: ult_22_v4i32:
13528; BITALG_NOVLX:       # %bb.0:
13529; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13530; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
13531; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13532; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13533; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13534; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13535; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13536; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13537; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13538; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13539; BITALG_NOVLX-NEXT:    vzeroupper
13540; BITALG_NOVLX-NEXT:    retq
13541;
13542; BITALG-LABEL: ult_22_v4i32:
13543; BITALG:       # %bb.0:
13544; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
13545; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13546; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13547; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13548; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13549; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13550; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13551; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
13552; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13553; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13554; BITALG-NEXT:    retq
13555  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13556  %3 = icmp ult <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
13557  %4 = sext <4 x i1> %3 to <4 x i32>
13558  ret <4 x i32> %4
13559}
13560
13561define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) {
13562; SSE2-LABEL: ugt_22_v4i32:
13563; SSE2:       # %bb.0:
13564; SSE2-NEXT:    movdqa %xmm0, %xmm1
13565; SSE2-NEXT:    psrlw $1, %xmm1
13566; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13567; SSE2-NEXT:    psubb %xmm1, %xmm0
13568; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13569; SSE2-NEXT:    movdqa %xmm0, %xmm2
13570; SSE2-NEXT:    pand %xmm1, %xmm2
13571; SSE2-NEXT:    psrlw $2, %xmm0
13572; SSE2-NEXT:    pand %xmm1, %xmm0
13573; SSE2-NEXT:    paddb %xmm2, %xmm0
13574; SSE2-NEXT:    movdqa %xmm0, %xmm1
13575; SSE2-NEXT:    psrlw $4, %xmm1
13576; SSE2-NEXT:    paddb %xmm0, %xmm1
13577; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13578; SSE2-NEXT:    pxor %xmm0, %xmm0
13579; SSE2-NEXT:    movdqa %xmm1, %xmm2
13580; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13581; SSE2-NEXT:    psadbw %xmm0, %xmm2
13582; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13583; SSE2-NEXT:    psadbw %xmm0, %xmm1
13584; SSE2-NEXT:    packuswb %xmm2, %xmm1
13585; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13586; SSE2-NEXT:    movdqa %xmm1, %xmm0
13587; SSE2-NEXT:    retq
13588;
13589; SSE3-LABEL: ugt_22_v4i32:
13590; SSE3:       # %bb.0:
13591; SSE3-NEXT:    movdqa %xmm0, %xmm1
13592; SSE3-NEXT:    psrlw $1, %xmm1
13593; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13594; SSE3-NEXT:    psubb %xmm1, %xmm0
13595; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13596; SSE3-NEXT:    movdqa %xmm0, %xmm2
13597; SSE3-NEXT:    pand %xmm1, %xmm2
13598; SSE3-NEXT:    psrlw $2, %xmm0
13599; SSE3-NEXT:    pand %xmm1, %xmm0
13600; SSE3-NEXT:    paddb %xmm2, %xmm0
13601; SSE3-NEXT:    movdqa %xmm0, %xmm1
13602; SSE3-NEXT:    psrlw $4, %xmm1
13603; SSE3-NEXT:    paddb %xmm0, %xmm1
13604; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13605; SSE3-NEXT:    pxor %xmm0, %xmm0
13606; SSE3-NEXT:    movdqa %xmm1, %xmm2
13607; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13608; SSE3-NEXT:    psadbw %xmm0, %xmm2
13609; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13610; SSE3-NEXT:    psadbw %xmm0, %xmm1
13611; SSE3-NEXT:    packuswb %xmm2, %xmm1
13612; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13613; SSE3-NEXT:    movdqa %xmm1, %xmm0
13614; SSE3-NEXT:    retq
13615;
13616; SSSE3-LABEL: ugt_22_v4i32:
13617; SSSE3:       # %bb.0:
13618; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13619; SSSE3-NEXT:    movdqa %xmm0, %xmm3
13620; SSSE3-NEXT:    pand %xmm2, %xmm3
13621; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13622; SSSE3-NEXT:    movdqa %xmm1, %xmm4
13623; SSSE3-NEXT:    pshufb %xmm3, %xmm4
13624; SSSE3-NEXT:    psrlw $4, %xmm0
13625; SSSE3-NEXT:    pand %xmm2, %xmm0
13626; SSSE3-NEXT:    pshufb %xmm0, %xmm1
13627; SSSE3-NEXT:    paddb %xmm4, %xmm1
13628; SSSE3-NEXT:    pxor %xmm0, %xmm0
13629; SSSE3-NEXT:    movdqa %xmm1, %xmm2
13630; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13631; SSSE3-NEXT:    psadbw %xmm0, %xmm2
13632; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13633; SSSE3-NEXT:    psadbw %xmm0, %xmm1
13634; SSSE3-NEXT:    packuswb %xmm2, %xmm1
13635; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13636; SSSE3-NEXT:    movdqa %xmm1, %xmm0
13637; SSSE3-NEXT:    retq
13638;
13639; SSE41-LABEL: ugt_22_v4i32:
13640; SSE41:       # %bb.0:
13641; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13642; SSE41-NEXT:    movdqa %xmm0, %xmm2
13643; SSE41-NEXT:    pand %xmm1, %xmm2
13644; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13645; SSE41-NEXT:    movdqa %xmm3, %xmm4
13646; SSE41-NEXT:    pshufb %xmm2, %xmm4
13647; SSE41-NEXT:    psrlw $4, %xmm0
13648; SSE41-NEXT:    pand %xmm1, %xmm0
13649; SSE41-NEXT:    pshufb %xmm0, %xmm3
13650; SSE41-NEXT:    paddb %xmm4, %xmm3
13651; SSE41-NEXT:    pxor %xmm1, %xmm1
13652; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13653; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13654; SSE41-NEXT:    psadbw %xmm1, %xmm3
13655; SSE41-NEXT:    psadbw %xmm1, %xmm0
13656; SSE41-NEXT:    packuswb %xmm3, %xmm0
13657; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
13658; SSE41-NEXT:    retq
13659;
13660; AVX1-LABEL: ugt_22_v4i32:
13661; AVX1:       # %bb.0:
13662; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13663; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
13664; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13665; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13666; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
13667; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
13668; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13669; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13670; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13671; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13672; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13673; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13674; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13675; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13676; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
13677; AVX1-NEXT:    retq
13678;
13679; AVX2-LABEL: ugt_22_v4i32:
13680; AVX2:       # %bb.0:
13681; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13682; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
13683; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13684; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13685; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
13686; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
13687; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13688; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13689; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13690; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13691; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13692; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13693; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13694; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13695; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13696; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
13697; AVX2-NEXT:    retq
13698;
13699; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i32:
13700; AVX512VPOPCNTDQ:       # %bb.0:
13701; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13702; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
13703; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13704; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
13705; AVX512VPOPCNTDQ-NEXT:    vzeroupper
13706; AVX512VPOPCNTDQ-NEXT:    retq
13707;
13708; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32:
13709; AVX512VPOPCNTDQVL:       # %bb.0:
13710; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
13711; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
13712; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13713; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13714; AVX512VPOPCNTDQVL-NEXT:    retq
13715;
13716; BITALG_NOVLX-LABEL: ugt_22_v4i32:
13717; BITALG_NOVLX:       # %bb.0:
13718; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13719; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
13720; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13721; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13722; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13723; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13724; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13725; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13726; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13727; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
13728; BITALG_NOVLX-NEXT:    vzeroupper
13729; BITALG_NOVLX-NEXT:    retq
13730;
13731; BITALG-LABEL: ugt_22_v4i32:
13732; BITALG:       # %bb.0:
13733; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
13734; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13735; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13736; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13737; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13738; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13739; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13740; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
13741; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13742; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13743; BITALG-NEXT:    retq
13744  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13745  %3 = icmp ugt <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
13746  %4 = sext <4 x i1> %3 to <4 x i32>
13747  ret <4 x i32> %4
13748}
13749
13750define <4 x i32> @ult_23_v4i32(<4 x i32> %0) {
13751; SSE2-LABEL: ult_23_v4i32:
13752; SSE2:       # %bb.0:
13753; SSE2-NEXT:    movdqa %xmm0, %xmm1
13754; SSE2-NEXT:    psrlw $1, %xmm1
13755; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13756; SSE2-NEXT:    psubb %xmm1, %xmm0
13757; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13758; SSE2-NEXT:    movdqa %xmm0, %xmm2
13759; SSE2-NEXT:    pand %xmm1, %xmm2
13760; SSE2-NEXT:    psrlw $2, %xmm0
13761; SSE2-NEXT:    pand %xmm1, %xmm0
13762; SSE2-NEXT:    paddb %xmm2, %xmm0
13763; SSE2-NEXT:    movdqa %xmm0, %xmm1
13764; SSE2-NEXT:    psrlw $4, %xmm1
13765; SSE2-NEXT:    paddb %xmm0, %xmm1
13766; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13767; SSE2-NEXT:    pxor %xmm0, %xmm0
13768; SSE2-NEXT:    movdqa %xmm1, %xmm2
13769; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13770; SSE2-NEXT:    psadbw %xmm0, %xmm2
13771; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13772; SSE2-NEXT:    psadbw %xmm0, %xmm1
13773; SSE2-NEXT:    packuswb %xmm2, %xmm1
13774; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [23,23,23,23]
13775; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
13776; SSE2-NEXT:    retq
13777;
13778; SSE3-LABEL: ult_23_v4i32:
13779; SSE3:       # %bb.0:
13780; SSE3-NEXT:    movdqa %xmm0, %xmm1
13781; SSE3-NEXT:    psrlw $1, %xmm1
13782; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13783; SSE3-NEXT:    psubb %xmm1, %xmm0
13784; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13785; SSE3-NEXT:    movdqa %xmm0, %xmm2
13786; SSE3-NEXT:    pand %xmm1, %xmm2
13787; SSE3-NEXT:    psrlw $2, %xmm0
13788; SSE3-NEXT:    pand %xmm1, %xmm0
13789; SSE3-NEXT:    paddb %xmm2, %xmm0
13790; SSE3-NEXT:    movdqa %xmm0, %xmm1
13791; SSE3-NEXT:    psrlw $4, %xmm1
13792; SSE3-NEXT:    paddb %xmm0, %xmm1
13793; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13794; SSE3-NEXT:    pxor %xmm0, %xmm0
13795; SSE3-NEXT:    movdqa %xmm1, %xmm2
13796; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13797; SSE3-NEXT:    psadbw %xmm0, %xmm2
13798; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13799; SSE3-NEXT:    psadbw %xmm0, %xmm1
13800; SSE3-NEXT:    packuswb %xmm2, %xmm1
13801; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [23,23,23,23]
13802; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
13803; SSE3-NEXT:    retq
13804;
13805; SSSE3-LABEL: ult_23_v4i32:
13806; SSSE3:       # %bb.0:
13807; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13808; SSSE3-NEXT:    movdqa %xmm0, %xmm2
13809; SSSE3-NEXT:    pand %xmm1, %xmm2
13810; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13811; SSSE3-NEXT:    movdqa %xmm3, %xmm4
13812; SSSE3-NEXT:    pshufb %xmm2, %xmm4
13813; SSSE3-NEXT:    psrlw $4, %xmm0
13814; SSSE3-NEXT:    pand %xmm1, %xmm0
13815; SSSE3-NEXT:    pshufb %xmm0, %xmm3
13816; SSSE3-NEXT:    paddb %xmm4, %xmm3
13817; SSSE3-NEXT:    pxor %xmm0, %xmm0
13818; SSSE3-NEXT:    movdqa %xmm3, %xmm1
13819; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13820; SSSE3-NEXT:    psadbw %xmm0, %xmm1
13821; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13822; SSSE3-NEXT:    psadbw %xmm0, %xmm3
13823; SSSE3-NEXT:    packuswb %xmm1, %xmm3
13824; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [23,23,23,23]
13825; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
13826; SSSE3-NEXT:    retq
13827;
13828; SSE41-LABEL: ult_23_v4i32:
13829; SSE41:       # %bb.0:
13830; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13831; SSE41-NEXT:    movdqa %xmm0, %xmm2
13832; SSE41-NEXT:    pand %xmm1, %xmm2
13833; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13834; SSE41-NEXT:    movdqa %xmm3, %xmm4
13835; SSE41-NEXT:    pshufb %xmm2, %xmm4
13836; SSE41-NEXT:    psrlw $4, %xmm0
13837; SSE41-NEXT:    pand %xmm1, %xmm0
13838; SSE41-NEXT:    pshufb %xmm0, %xmm3
13839; SSE41-NEXT:    paddb %xmm4, %xmm3
13840; SSE41-NEXT:    pxor %xmm0, %xmm0
13841; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13842; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13843; SSE41-NEXT:    psadbw %xmm0, %xmm3
13844; SSE41-NEXT:    psadbw %xmm0, %xmm1
13845; SSE41-NEXT:    packuswb %xmm3, %xmm1
13846; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [23,23,23,23]
13847; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
13848; SSE41-NEXT:    retq
13849;
13850; AVX1-LABEL: ult_23_v4i32:
13851; AVX1:       # %bb.0:
13852; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13853; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
13854; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13855; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13856; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
13857; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
13858; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13859; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13860; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13861; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13862; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13863; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13864; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13865; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13866; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [23,23,23,23]
13867; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13868; AVX1-NEXT:    retq
13869;
13870; AVX2-LABEL: ult_23_v4i32:
13871; AVX2:       # %bb.0:
13872; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13873; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
13874; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13875; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
13876; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
13877; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
13878; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
13879; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
13880; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13881; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13882; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13883; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13884; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13885; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13886; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13887; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13888; AVX2-NEXT:    retq
13889;
13890; AVX512VPOPCNTDQ-LABEL: ult_23_v4i32:
13891; AVX512VPOPCNTDQ:       # %bb.0:
13892; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13893; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
13894; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13895; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13896; AVX512VPOPCNTDQ-NEXT:    vzeroupper
13897; AVX512VPOPCNTDQ-NEXT:    retq
13898;
13899; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32:
13900; AVX512VPOPCNTDQVL:       # %bb.0:
13901; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
13902; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
13903; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13904; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13905; AVX512VPOPCNTDQVL-NEXT:    retq
13906;
13907; BITALG_NOVLX-LABEL: ult_23_v4i32:
13908; BITALG_NOVLX:       # %bb.0:
13909; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13910; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
13911; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13912; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13913; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13914; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13915; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13916; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13917; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13918; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
13919; BITALG_NOVLX-NEXT:    vzeroupper
13920; BITALG_NOVLX-NEXT:    retq
13921;
13922; BITALG-LABEL: ult_23_v4i32:
13923; BITALG:       # %bb.0:
13924; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
13925; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
13926; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13927; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
13928; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13929; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
13930; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
13931; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
13932; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
13933; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13934; BITALG-NEXT:    retq
13935  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13936  %3 = icmp ult <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
13937  %4 = sext <4 x i1> %3 to <4 x i32>
13938  ret <4 x i32> %4
13939}
13940
13941define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) {
13942; SSE2-LABEL: ugt_23_v4i32:
13943; SSE2:       # %bb.0:
13944; SSE2-NEXT:    movdqa %xmm0, %xmm1
13945; SSE2-NEXT:    psrlw $1, %xmm1
13946; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13947; SSE2-NEXT:    psubb %xmm1, %xmm0
13948; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13949; SSE2-NEXT:    movdqa %xmm0, %xmm2
13950; SSE2-NEXT:    pand %xmm1, %xmm2
13951; SSE2-NEXT:    psrlw $2, %xmm0
13952; SSE2-NEXT:    pand %xmm1, %xmm0
13953; SSE2-NEXT:    paddb %xmm2, %xmm0
13954; SSE2-NEXT:    movdqa %xmm0, %xmm1
13955; SSE2-NEXT:    psrlw $4, %xmm1
13956; SSE2-NEXT:    paddb %xmm0, %xmm1
13957; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
13958; SSE2-NEXT:    pxor %xmm0, %xmm0
13959; SSE2-NEXT:    movdqa %xmm1, %xmm2
13960; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13961; SSE2-NEXT:    psadbw %xmm0, %xmm2
13962; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13963; SSE2-NEXT:    psadbw %xmm0, %xmm1
13964; SSE2-NEXT:    packuswb %xmm2, %xmm1
13965; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13966; SSE2-NEXT:    movdqa %xmm1, %xmm0
13967; SSE2-NEXT:    retq
13968;
13969; SSE3-LABEL: ugt_23_v4i32:
13970; SSE3:       # %bb.0:
13971; SSE3-NEXT:    movdqa %xmm0, %xmm1
13972; SSE3-NEXT:    psrlw $1, %xmm1
13973; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13974; SSE3-NEXT:    psubb %xmm1, %xmm0
13975; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13976; SSE3-NEXT:    movdqa %xmm0, %xmm2
13977; SSE3-NEXT:    pand %xmm1, %xmm2
13978; SSE3-NEXT:    psrlw $2, %xmm0
13979; SSE3-NEXT:    pand %xmm1, %xmm0
13980; SSE3-NEXT:    paddb %xmm2, %xmm0
13981; SSE3-NEXT:    movdqa %xmm0, %xmm1
13982; SSE3-NEXT:    psrlw $4, %xmm1
13983; SSE3-NEXT:    paddb %xmm0, %xmm1
13984; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
13985; SSE3-NEXT:    pxor %xmm0, %xmm0
13986; SSE3-NEXT:    movdqa %xmm1, %xmm2
13987; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13988; SSE3-NEXT:    psadbw %xmm0, %xmm2
13989; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13990; SSE3-NEXT:    psadbw %xmm0, %xmm1
13991; SSE3-NEXT:    packuswb %xmm2, %xmm1
13992; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
13993; SSE3-NEXT:    movdqa %xmm1, %xmm0
13994; SSE3-NEXT:    retq
13995;
13996; SSSE3-LABEL: ugt_23_v4i32:
13997; SSSE3:       # %bb.0:
13998; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13999; SSSE3-NEXT:    movdqa %xmm0, %xmm3
14000; SSSE3-NEXT:    pand %xmm2, %xmm3
14001; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14002; SSSE3-NEXT:    movdqa %xmm1, %xmm4
14003; SSSE3-NEXT:    pshufb %xmm3, %xmm4
14004; SSSE3-NEXT:    psrlw $4, %xmm0
14005; SSSE3-NEXT:    pand %xmm2, %xmm0
14006; SSSE3-NEXT:    pshufb %xmm0, %xmm1
14007; SSSE3-NEXT:    paddb %xmm4, %xmm1
14008; SSSE3-NEXT:    pxor %xmm0, %xmm0
14009; SSSE3-NEXT:    movdqa %xmm1, %xmm2
14010; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14011; SSSE3-NEXT:    psadbw %xmm0, %xmm2
14012; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14013; SSSE3-NEXT:    psadbw %xmm0, %xmm1
14014; SSSE3-NEXT:    packuswb %xmm2, %xmm1
14015; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14016; SSSE3-NEXT:    movdqa %xmm1, %xmm0
14017; SSSE3-NEXT:    retq
14018;
14019; SSE41-LABEL: ugt_23_v4i32:
14020; SSE41:       # %bb.0:
14021; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14022; SSE41-NEXT:    movdqa %xmm0, %xmm2
14023; SSE41-NEXT:    pand %xmm1, %xmm2
14024; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14025; SSE41-NEXT:    movdqa %xmm3, %xmm4
14026; SSE41-NEXT:    pshufb %xmm2, %xmm4
14027; SSE41-NEXT:    psrlw $4, %xmm0
14028; SSE41-NEXT:    pand %xmm1, %xmm0
14029; SSE41-NEXT:    pshufb %xmm0, %xmm3
14030; SSE41-NEXT:    paddb %xmm4, %xmm3
14031; SSE41-NEXT:    pxor %xmm1, %xmm1
14032; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14033; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14034; SSE41-NEXT:    psadbw %xmm1, %xmm3
14035; SSE41-NEXT:    psadbw %xmm1, %xmm0
14036; SSE41-NEXT:    packuswb %xmm3, %xmm0
14037; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
14038; SSE41-NEXT:    retq
14039;
14040; AVX1-LABEL: ugt_23_v4i32:
14041; AVX1:       # %bb.0:
14042; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14043; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
14044; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14045; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14046; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
14047; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
14048; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14049; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14050; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14051; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14052; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14053; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14054; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14055; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14056; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
14057; AVX1-NEXT:    retq
14058;
14059; AVX2-LABEL: ugt_23_v4i32:
14060; AVX2:       # %bb.0:
14061; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14062; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
14063; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14064; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14065; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
14066; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
14067; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14068; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14069; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14070; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14071; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14072; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14073; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14074; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14075; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
14076; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14077; AVX2-NEXT:    retq
14078;
14079; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i32:
14080; AVX512VPOPCNTDQ:       # %bb.0:
14081; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14082; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
14083; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
14084; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14085; AVX512VPOPCNTDQ-NEXT:    vzeroupper
14086; AVX512VPOPCNTDQ-NEXT:    retq
14087;
14088; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32:
14089; AVX512VPOPCNTDQVL:       # %bb.0:
14090; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
14091; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
14092; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14093; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14094; AVX512VPOPCNTDQVL-NEXT:    retq
14095;
14096; BITALG_NOVLX-LABEL: ugt_23_v4i32:
14097; BITALG_NOVLX:       # %bb.0:
14098; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14099; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
14100; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14101; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14102; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14103; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14104; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14105; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14106; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
14107; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14108; BITALG_NOVLX-NEXT:    vzeroupper
14109; BITALG_NOVLX-NEXT:    retq
14110;
14111; BITALG-LABEL: ugt_23_v4i32:
14112; BITALG:       # %bb.0:
14113; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
14114; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14115; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14116; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14117; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14118; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14119; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14120; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
14121; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14122; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14123; BITALG-NEXT:    retq
14124  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14125  %3 = icmp ugt <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
14126  %4 = sext <4 x i1> %3 to <4 x i32>
14127  ret <4 x i32> %4
14128}
14129
14130define <4 x i32> @ult_24_v4i32(<4 x i32> %0) {
14131; SSE2-LABEL: ult_24_v4i32:
14132; SSE2:       # %bb.0:
14133; SSE2-NEXT:    movdqa %xmm0, %xmm1
14134; SSE2-NEXT:    psrlw $1, %xmm1
14135; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14136; SSE2-NEXT:    psubb %xmm1, %xmm0
14137; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14138; SSE2-NEXT:    movdqa %xmm0, %xmm2
14139; SSE2-NEXT:    pand %xmm1, %xmm2
14140; SSE2-NEXT:    psrlw $2, %xmm0
14141; SSE2-NEXT:    pand %xmm1, %xmm0
14142; SSE2-NEXT:    paddb %xmm2, %xmm0
14143; SSE2-NEXT:    movdqa %xmm0, %xmm1
14144; SSE2-NEXT:    psrlw $4, %xmm1
14145; SSE2-NEXT:    paddb %xmm0, %xmm1
14146; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14147; SSE2-NEXT:    pxor %xmm0, %xmm0
14148; SSE2-NEXT:    movdqa %xmm1, %xmm2
14149; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14150; SSE2-NEXT:    psadbw %xmm0, %xmm2
14151; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14152; SSE2-NEXT:    psadbw %xmm0, %xmm1
14153; SSE2-NEXT:    packuswb %xmm2, %xmm1
14154; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [24,24,24,24]
14155; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
14156; SSE2-NEXT:    retq
14157;
14158; SSE3-LABEL: ult_24_v4i32:
14159; SSE3:       # %bb.0:
14160; SSE3-NEXT:    movdqa %xmm0, %xmm1
14161; SSE3-NEXT:    psrlw $1, %xmm1
14162; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14163; SSE3-NEXT:    psubb %xmm1, %xmm0
14164; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14165; SSE3-NEXT:    movdqa %xmm0, %xmm2
14166; SSE3-NEXT:    pand %xmm1, %xmm2
14167; SSE3-NEXT:    psrlw $2, %xmm0
14168; SSE3-NEXT:    pand %xmm1, %xmm0
14169; SSE3-NEXT:    paddb %xmm2, %xmm0
14170; SSE3-NEXT:    movdqa %xmm0, %xmm1
14171; SSE3-NEXT:    psrlw $4, %xmm1
14172; SSE3-NEXT:    paddb %xmm0, %xmm1
14173; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14174; SSE3-NEXT:    pxor %xmm0, %xmm0
14175; SSE3-NEXT:    movdqa %xmm1, %xmm2
14176; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14177; SSE3-NEXT:    psadbw %xmm0, %xmm2
14178; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14179; SSE3-NEXT:    psadbw %xmm0, %xmm1
14180; SSE3-NEXT:    packuswb %xmm2, %xmm1
14181; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [24,24,24,24]
14182; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
14183; SSE3-NEXT:    retq
14184;
14185; SSSE3-LABEL: ult_24_v4i32:
14186; SSSE3:       # %bb.0:
14187; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14188; SSSE3-NEXT:    movdqa %xmm0, %xmm2
14189; SSSE3-NEXT:    pand %xmm1, %xmm2
14190; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14191; SSSE3-NEXT:    movdqa %xmm3, %xmm4
14192; SSSE3-NEXT:    pshufb %xmm2, %xmm4
14193; SSSE3-NEXT:    psrlw $4, %xmm0
14194; SSSE3-NEXT:    pand %xmm1, %xmm0
14195; SSSE3-NEXT:    pshufb %xmm0, %xmm3
14196; SSSE3-NEXT:    paddb %xmm4, %xmm3
14197; SSSE3-NEXT:    pxor %xmm0, %xmm0
14198; SSSE3-NEXT:    movdqa %xmm3, %xmm1
14199; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14200; SSSE3-NEXT:    psadbw %xmm0, %xmm1
14201; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14202; SSSE3-NEXT:    psadbw %xmm0, %xmm3
14203; SSSE3-NEXT:    packuswb %xmm1, %xmm3
14204; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [24,24,24,24]
14205; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
14206; SSSE3-NEXT:    retq
14207;
14208; SSE41-LABEL: ult_24_v4i32:
14209; SSE41:       # %bb.0:
14210; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14211; SSE41-NEXT:    movdqa %xmm0, %xmm2
14212; SSE41-NEXT:    pand %xmm1, %xmm2
14213; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14214; SSE41-NEXT:    movdqa %xmm3, %xmm4
14215; SSE41-NEXT:    pshufb %xmm2, %xmm4
14216; SSE41-NEXT:    psrlw $4, %xmm0
14217; SSE41-NEXT:    pand %xmm1, %xmm0
14218; SSE41-NEXT:    pshufb %xmm0, %xmm3
14219; SSE41-NEXT:    paddb %xmm4, %xmm3
14220; SSE41-NEXT:    pxor %xmm0, %xmm0
14221; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14222; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14223; SSE41-NEXT:    psadbw %xmm0, %xmm3
14224; SSE41-NEXT:    psadbw %xmm0, %xmm1
14225; SSE41-NEXT:    packuswb %xmm3, %xmm1
14226; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [24,24,24,24]
14227; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
14228; SSE41-NEXT:    retq
14229;
14230; AVX1-LABEL: ult_24_v4i32:
14231; AVX1:       # %bb.0:
14232; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14233; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
14234; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14235; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14236; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
14237; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
14238; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14239; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14240; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14241; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14242; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14243; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14244; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14245; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14246; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [24,24,24,24]
14247; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14248; AVX1-NEXT:    retq
14249;
14250; AVX2-LABEL: ult_24_v4i32:
14251; AVX2:       # %bb.0:
14252; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14253; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
14254; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14255; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14256; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
14257; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
14258; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14259; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14260; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14261; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14262; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14263; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14264; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14265; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14266; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14267; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14268; AVX2-NEXT:    retq
14269;
14270; AVX512VPOPCNTDQ-LABEL: ult_24_v4i32:
14271; AVX512VPOPCNTDQ:       # %bb.0:
14272; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14273; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
14274; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14275; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14276; AVX512VPOPCNTDQ-NEXT:    vzeroupper
14277; AVX512VPOPCNTDQ-NEXT:    retq
14278;
14279; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32:
14280; AVX512VPOPCNTDQVL:       # %bb.0:
14281; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
14282; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
14283; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14284; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14285; AVX512VPOPCNTDQVL-NEXT:    retq
14286;
14287; BITALG_NOVLX-LABEL: ult_24_v4i32:
14288; BITALG_NOVLX:       # %bb.0:
14289; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14290; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
14291; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14292; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14293; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14294; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14295; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14296; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14297; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14298; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14299; BITALG_NOVLX-NEXT:    vzeroupper
14300; BITALG_NOVLX-NEXT:    retq
14301;
14302; BITALG-LABEL: ult_24_v4i32:
14303; BITALG:       # %bb.0:
14304; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
14305; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14306; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14307; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14308; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14309; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14310; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14311; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
14312; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14313; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14314; BITALG-NEXT:    retq
14315  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14316  %3 = icmp ult <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
14317  %4 = sext <4 x i1> %3 to <4 x i32>
14318  ret <4 x i32> %4
14319}
14320
14321define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) {
14322; SSE2-LABEL: ugt_24_v4i32:
14323; SSE2:       # %bb.0:
14324; SSE2-NEXT:    movdqa %xmm0, %xmm1
14325; SSE2-NEXT:    psrlw $1, %xmm1
14326; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14327; SSE2-NEXT:    psubb %xmm1, %xmm0
14328; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14329; SSE2-NEXT:    movdqa %xmm0, %xmm2
14330; SSE2-NEXT:    pand %xmm1, %xmm2
14331; SSE2-NEXT:    psrlw $2, %xmm0
14332; SSE2-NEXT:    pand %xmm1, %xmm0
14333; SSE2-NEXT:    paddb %xmm2, %xmm0
14334; SSE2-NEXT:    movdqa %xmm0, %xmm1
14335; SSE2-NEXT:    psrlw $4, %xmm1
14336; SSE2-NEXT:    paddb %xmm0, %xmm1
14337; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14338; SSE2-NEXT:    pxor %xmm0, %xmm0
14339; SSE2-NEXT:    movdqa %xmm1, %xmm2
14340; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14341; SSE2-NEXT:    psadbw %xmm0, %xmm2
14342; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14343; SSE2-NEXT:    psadbw %xmm0, %xmm1
14344; SSE2-NEXT:    packuswb %xmm2, %xmm1
14345; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14346; SSE2-NEXT:    movdqa %xmm1, %xmm0
14347; SSE2-NEXT:    retq
14348;
14349; SSE3-LABEL: ugt_24_v4i32:
14350; SSE3:       # %bb.0:
14351; SSE3-NEXT:    movdqa %xmm0, %xmm1
14352; SSE3-NEXT:    psrlw $1, %xmm1
14353; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14354; SSE3-NEXT:    psubb %xmm1, %xmm0
14355; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14356; SSE3-NEXT:    movdqa %xmm0, %xmm2
14357; SSE3-NEXT:    pand %xmm1, %xmm2
14358; SSE3-NEXT:    psrlw $2, %xmm0
14359; SSE3-NEXT:    pand %xmm1, %xmm0
14360; SSE3-NEXT:    paddb %xmm2, %xmm0
14361; SSE3-NEXT:    movdqa %xmm0, %xmm1
14362; SSE3-NEXT:    psrlw $4, %xmm1
14363; SSE3-NEXT:    paddb %xmm0, %xmm1
14364; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14365; SSE3-NEXT:    pxor %xmm0, %xmm0
14366; SSE3-NEXT:    movdqa %xmm1, %xmm2
14367; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14368; SSE3-NEXT:    psadbw %xmm0, %xmm2
14369; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14370; SSE3-NEXT:    psadbw %xmm0, %xmm1
14371; SSE3-NEXT:    packuswb %xmm2, %xmm1
14372; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14373; SSE3-NEXT:    movdqa %xmm1, %xmm0
14374; SSE3-NEXT:    retq
14375;
14376; SSSE3-LABEL: ugt_24_v4i32:
14377; SSSE3:       # %bb.0:
14378; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14379; SSSE3-NEXT:    movdqa %xmm0, %xmm3
14380; SSSE3-NEXT:    pand %xmm2, %xmm3
14381; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14382; SSSE3-NEXT:    movdqa %xmm1, %xmm4
14383; SSSE3-NEXT:    pshufb %xmm3, %xmm4
14384; SSSE3-NEXT:    psrlw $4, %xmm0
14385; SSSE3-NEXT:    pand %xmm2, %xmm0
14386; SSSE3-NEXT:    pshufb %xmm0, %xmm1
14387; SSSE3-NEXT:    paddb %xmm4, %xmm1
14388; SSSE3-NEXT:    pxor %xmm0, %xmm0
14389; SSSE3-NEXT:    movdqa %xmm1, %xmm2
14390; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14391; SSSE3-NEXT:    psadbw %xmm0, %xmm2
14392; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14393; SSSE3-NEXT:    psadbw %xmm0, %xmm1
14394; SSSE3-NEXT:    packuswb %xmm2, %xmm1
14395; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14396; SSSE3-NEXT:    movdqa %xmm1, %xmm0
14397; SSSE3-NEXT:    retq
14398;
14399; SSE41-LABEL: ugt_24_v4i32:
14400; SSE41:       # %bb.0:
14401; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14402; SSE41-NEXT:    movdqa %xmm0, %xmm2
14403; SSE41-NEXT:    pand %xmm1, %xmm2
14404; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14405; SSE41-NEXT:    movdqa %xmm3, %xmm4
14406; SSE41-NEXT:    pshufb %xmm2, %xmm4
14407; SSE41-NEXT:    psrlw $4, %xmm0
14408; SSE41-NEXT:    pand %xmm1, %xmm0
14409; SSE41-NEXT:    pshufb %xmm0, %xmm3
14410; SSE41-NEXT:    paddb %xmm4, %xmm3
14411; SSE41-NEXT:    pxor %xmm1, %xmm1
14412; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14413; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14414; SSE41-NEXT:    psadbw %xmm1, %xmm3
14415; SSE41-NEXT:    psadbw %xmm1, %xmm0
14416; SSE41-NEXT:    packuswb %xmm3, %xmm0
14417; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
14418; SSE41-NEXT:    retq
14419;
14420; AVX1-LABEL: ugt_24_v4i32:
14421; AVX1:       # %bb.0:
14422; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14423; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
14424; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14425; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14426; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
14427; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
14428; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14429; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14430; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14431; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14432; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14433; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14434; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14435; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14436; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
14437; AVX1-NEXT:    retq
14438;
14439; AVX2-LABEL: ugt_24_v4i32:
14440; AVX2:       # %bb.0:
14441; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14442; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
14443; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14444; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14445; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
14446; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
14447; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14448; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14449; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14450; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14451; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14452; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14453; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14454; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14455; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14456; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14457; AVX2-NEXT:    retq
14458;
14459; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i32:
14460; AVX512VPOPCNTDQ:       # %bb.0:
14461; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14462; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
14463; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14464; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14465; AVX512VPOPCNTDQ-NEXT:    vzeroupper
14466; AVX512VPOPCNTDQ-NEXT:    retq
14467;
14468; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32:
14469; AVX512VPOPCNTDQVL:       # %bb.0:
14470; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
14471; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
14472; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14473; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14474; AVX512VPOPCNTDQVL-NEXT:    retq
14475;
14476; BITALG_NOVLX-LABEL: ugt_24_v4i32:
14477; BITALG_NOVLX:       # %bb.0:
14478; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14479; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
14480; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14481; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14482; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14483; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14484; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14485; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14486; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14487; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14488; BITALG_NOVLX-NEXT:    vzeroupper
14489; BITALG_NOVLX-NEXT:    retq
14490;
14491; BITALG-LABEL: ugt_24_v4i32:
14492; BITALG:       # %bb.0:
14493; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
14494; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14495; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14496; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14497; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14498; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14499; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14500; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
14501; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14502; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14503; BITALG-NEXT:    retq
14504  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14505  %3 = icmp ugt <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
14506  %4 = sext <4 x i1> %3 to <4 x i32>
14507  ret <4 x i32> %4
14508}
14509
14510define <4 x i32> @ult_25_v4i32(<4 x i32> %0) {
14511; SSE2-LABEL: ult_25_v4i32:
14512; SSE2:       # %bb.0:
14513; SSE2-NEXT:    movdqa %xmm0, %xmm1
14514; SSE2-NEXT:    psrlw $1, %xmm1
14515; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14516; SSE2-NEXT:    psubb %xmm1, %xmm0
14517; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14518; SSE2-NEXT:    movdqa %xmm0, %xmm2
14519; SSE2-NEXT:    pand %xmm1, %xmm2
14520; SSE2-NEXT:    psrlw $2, %xmm0
14521; SSE2-NEXT:    pand %xmm1, %xmm0
14522; SSE2-NEXT:    paddb %xmm2, %xmm0
14523; SSE2-NEXT:    movdqa %xmm0, %xmm1
14524; SSE2-NEXT:    psrlw $4, %xmm1
14525; SSE2-NEXT:    paddb %xmm0, %xmm1
14526; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14527; SSE2-NEXT:    pxor %xmm0, %xmm0
14528; SSE2-NEXT:    movdqa %xmm1, %xmm2
14529; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14530; SSE2-NEXT:    psadbw %xmm0, %xmm2
14531; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14532; SSE2-NEXT:    psadbw %xmm0, %xmm1
14533; SSE2-NEXT:    packuswb %xmm2, %xmm1
14534; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [25,25,25,25]
14535; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
14536; SSE2-NEXT:    retq
14537;
14538; SSE3-LABEL: ult_25_v4i32:
14539; SSE3:       # %bb.0:
14540; SSE3-NEXT:    movdqa %xmm0, %xmm1
14541; SSE3-NEXT:    psrlw $1, %xmm1
14542; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14543; SSE3-NEXT:    psubb %xmm1, %xmm0
14544; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14545; SSE3-NEXT:    movdqa %xmm0, %xmm2
14546; SSE3-NEXT:    pand %xmm1, %xmm2
14547; SSE3-NEXT:    psrlw $2, %xmm0
14548; SSE3-NEXT:    pand %xmm1, %xmm0
14549; SSE3-NEXT:    paddb %xmm2, %xmm0
14550; SSE3-NEXT:    movdqa %xmm0, %xmm1
14551; SSE3-NEXT:    psrlw $4, %xmm1
14552; SSE3-NEXT:    paddb %xmm0, %xmm1
14553; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14554; SSE3-NEXT:    pxor %xmm0, %xmm0
14555; SSE3-NEXT:    movdqa %xmm1, %xmm2
14556; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14557; SSE3-NEXT:    psadbw %xmm0, %xmm2
14558; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14559; SSE3-NEXT:    psadbw %xmm0, %xmm1
14560; SSE3-NEXT:    packuswb %xmm2, %xmm1
14561; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [25,25,25,25]
14562; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
14563; SSE3-NEXT:    retq
14564;
14565; SSSE3-LABEL: ult_25_v4i32:
14566; SSSE3:       # %bb.0:
14567; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14568; SSSE3-NEXT:    movdqa %xmm0, %xmm2
14569; SSSE3-NEXT:    pand %xmm1, %xmm2
14570; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14571; SSSE3-NEXT:    movdqa %xmm3, %xmm4
14572; SSSE3-NEXT:    pshufb %xmm2, %xmm4
14573; SSSE3-NEXT:    psrlw $4, %xmm0
14574; SSSE3-NEXT:    pand %xmm1, %xmm0
14575; SSSE3-NEXT:    pshufb %xmm0, %xmm3
14576; SSSE3-NEXT:    paddb %xmm4, %xmm3
14577; SSSE3-NEXT:    pxor %xmm0, %xmm0
14578; SSSE3-NEXT:    movdqa %xmm3, %xmm1
14579; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14580; SSSE3-NEXT:    psadbw %xmm0, %xmm1
14581; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14582; SSSE3-NEXT:    psadbw %xmm0, %xmm3
14583; SSSE3-NEXT:    packuswb %xmm1, %xmm3
14584; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [25,25,25,25]
14585; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
14586; SSSE3-NEXT:    retq
14587;
14588; SSE41-LABEL: ult_25_v4i32:
14589; SSE41:       # %bb.0:
14590; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14591; SSE41-NEXT:    movdqa %xmm0, %xmm2
14592; SSE41-NEXT:    pand %xmm1, %xmm2
14593; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14594; SSE41-NEXT:    movdqa %xmm3, %xmm4
14595; SSE41-NEXT:    pshufb %xmm2, %xmm4
14596; SSE41-NEXT:    psrlw $4, %xmm0
14597; SSE41-NEXT:    pand %xmm1, %xmm0
14598; SSE41-NEXT:    pshufb %xmm0, %xmm3
14599; SSE41-NEXT:    paddb %xmm4, %xmm3
14600; SSE41-NEXT:    pxor %xmm0, %xmm0
14601; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14602; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14603; SSE41-NEXT:    psadbw %xmm0, %xmm3
14604; SSE41-NEXT:    psadbw %xmm0, %xmm1
14605; SSE41-NEXT:    packuswb %xmm3, %xmm1
14606; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [25,25,25,25]
14607; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
14608; SSE41-NEXT:    retq
14609;
14610; AVX1-LABEL: ult_25_v4i32:
14611; AVX1:       # %bb.0:
14612; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14613; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
14614; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14615; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14616; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
14617; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
14618; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14619; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14620; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14621; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14622; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14623; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14624; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14625; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14626; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [25,25,25,25]
14627; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14628; AVX1-NEXT:    retq
14629;
14630; AVX2-LABEL: ult_25_v4i32:
14631; AVX2:       # %bb.0:
14632; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14633; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
14634; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14635; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14636; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
14637; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
14638; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14639; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14640; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14641; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14642; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14643; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14644; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14645; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14646; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14647; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14648; AVX2-NEXT:    retq
14649;
14650; AVX512VPOPCNTDQ-LABEL: ult_25_v4i32:
14651; AVX512VPOPCNTDQ:       # %bb.0:
14652; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14653; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
14654; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14655; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14656; AVX512VPOPCNTDQ-NEXT:    vzeroupper
14657; AVX512VPOPCNTDQ-NEXT:    retq
14658;
14659; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32:
14660; AVX512VPOPCNTDQVL:       # %bb.0:
14661; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
14662; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
14663; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14664; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14665; AVX512VPOPCNTDQVL-NEXT:    retq
14666;
14667; BITALG_NOVLX-LABEL: ult_25_v4i32:
14668; BITALG_NOVLX:       # %bb.0:
14669; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14670; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
14671; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14672; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14673; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14674; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14675; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14676; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14677; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14678; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
14679; BITALG_NOVLX-NEXT:    vzeroupper
14680; BITALG_NOVLX-NEXT:    retq
14681;
14682; BITALG-LABEL: ult_25_v4i32:
14683; BITALG:       # %bb.0:
14684; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
14685; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14686; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14687; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14688; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14689; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14690; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14691; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
14692; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14693; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14694; BITALG-NEXT:    retq
14695  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14696  %3 = icmp ult <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
14697  %4 = sext <4 x i1> %3 to <4 x i32>
14698  ret <4 x i32> %4
14699}
14700
14701define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) {
14702; SSE2-LABEL: ugt_25_v4i32:
14703; SSE2:       # %bb.0:
14704; SSE2-NEXT:    movdqa %xmm0, %xmm1
14705; SSE2-NEXT:    psrlw $1, %xmm1
14706; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14707; SSE2-NEXT:    psubb %xmm1, %xmm0
14708; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14709; SSE2-NEXT:    movdqa %xmm0, %xmm2
14710; SSE2-NEXT:    pand %xmm1, %xmm2
14711; SSE2-NEXT:    psrlw $2, %xmm0
14712; SSE2-NEXT:    pand %xmm1, %xmm0
14713; SSE2-NEXT:    paddb %xmm2, %xmm0
14714; SSE2-NEXT:    movdqa %xmm0, %xmm1
14715; SSE2-NEXT:    psrlw $4, %xmm1
14716; SSE2-NEXT:    paddb %xmm0, %xmm1
14717; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14718; SSE2-NEXT:    pxor %xmm0, %xmm0
14719; SSE2-NEXT:    movdqa %xmm1, %xmm2
14720; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14721; SSE2-NEXT:    psadbw %xmm0, %xmm2
14722; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14723; SSE2-NEXT:    psadbw %xmm0, %xmm1
14724; SSE2-NEXT:    packuswb %xmm2, %xmm1
14725; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14726; SSE2-NEXT:    movdqa %xmm1, %xmm0
14727; SSE2-NEXT:    retq
14728;
14729; SSE3-LABEL: ugt_25_v4i32:
14730; SSE3:       # %bb.0:
14731; SSE3-NEXT:    movdqa %xmm0, %xmm1
14732; SSE3-NEXT:    psrlw $1, %xmm1
14733; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14734; SSE3-NEXT:    psubb %xmm1, %xmm0
14735; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14736; SSE3-NEXT:    movdqa %xmm0, %xmm2
14737; SSE3-NEXT:    pand %xmm1, %xmm2
14738; SSE3-NEXT:    psrlw $2, %xmm0
14739; SSE3-NEXT:    pand %xmm1, %xmm0
14740; SSE3-NEXT:    paddb %xmm2, %xmm0
14741; SSE3-NEXT:    movdqa %xmm0, %xmm1
14742; SSE3-NEXT:    psrlw $4, %xmm1
14743; SSE3-NEXT:    paddb %xmm0, %xmm1
14744; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14745; SSE3-NEXT:    pxor %xmm0, %xmm0
14746; SSE3-NEXT:    movdqa %xmm1, %xmm2
14747; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14748; SSE3-NEXT:    psadbw %xmm0, %xmm2
14749; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14750; SSE3-NEXT:    psadbw %xmm0, %xmm1
14751; SSE3-NEXT:    packuswb %xmm2, %xmm1
14752; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14753; SSE3-NEXT:    movdqa %xmm1, %xmm0
14754; SSE3-NEXT:    retq
14755;
14756; SSSE3-LABEL: ugt_25_v4i32:
14757; SSSE3:       # %bb.0:
14758; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14759; SSSE3-NEXT:    movdqa %xmm0, %xmm3
14760; SSSE3-NEXT:    pand %xmm2, %xmm3
14761; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14762; SSSE3-NEXT:    movdqa %xmm1, %xmm4
14763; SSSE3-NEXT:    pshufb %xmm3, %xmm4
14764; SSSE3-NEXT:    psrlw $4, %xmm0
14765; SSSE3-NEXT:    pand %xmm2, %xmm0
14766; SSSE3-NEXT:    pshufb %xmm0, %xmm1
14767; SSSE3-NEXT:    paddb %xmm4, %xmm1
14768; SSSE3-NEXT:    pxor %xmm0, %xmm0
14769; SSSE3-NEXT:    movdqa %xmm1, %xmm2
14770; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14771; SSSE3-NEXT:    psadbw %xmm0, %xmm2
14772; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14773; SSSE3-NEXT:    psadbw %xmm0, %xmm1
14774; SSSE3-NEXT:    packuswb %xmm2, %xmm1
14775; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
14776; SSSE3-NEXT:    movdqa %xmm1, %xmm0
14777; SSSE3-NEXT:    retq
14778;
14779; SSE41-LABEL: ugt_25_v4i32:
14780; SSE41:       # %bb.0:
14781; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14782; SSE41-NEXT:    movdqa %xmm0, %xmm2
14783; SSE41-NEXT:    pand %xmm1, %xmm2
14784; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14785; SSE41-NEXT:    movdqa %xmm3, %xmm4
14786; SSE41-NEXT:    pshufb %xmm2, %xmm4
14787; SSE41-NEXT:    psrlw $4, %xmm0
14788; SSE41-NEXT:    pand %xmm1, %xmm0
14789; SSE41-NEXT:    pshufb %xmm0, %xmm3
14790; SSE41-NEXT:    paddb %xmm4, %xmm3
14791; SSE41-NEXT:    pxor %xmm1, %xmm1
14792; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14793; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14794; SSE41-NEXT:    psadbw %xmm1, %xmm3
14795; SSE41-NEXT:    psadbw %xmm1, %xmm0
14796; SSE41-NEXT:    packuswb %xmm3, %xmm0
14797; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
14798; SSE41-NEXT:    retq
14799;
14800; AVX1-LABEL: ugt_25_v4i32:
14801; AVX1:       # %bb.0:
14802; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14803; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
14804; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14805; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14806; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
14807; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
14808; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14809; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14810; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14811; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14812; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14813; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14814; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14815; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14816; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
14817; AVX1-NEXT:    retq
14818;
14819; AVX2-LABEL: ugt_25_v4i32:
14820; AVX2:       # %bb.0:
14821; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14822; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
14823; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14824; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14825; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
14826; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
14827; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14828; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
14829; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14830; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14831; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14832; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14833; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14834; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14835; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14836; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14837; AVX2-NEXT:    retq
14838;
14839; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i32:
14840; AVX512VPOPCNTDQ:       # %bb.0:
14841; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14842; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
14843; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14844; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14845; AVX512VPOPCNTDQ-NEXT:    vzeroupper
14846; AVX512VPOPCNTDQ-NEXT:    retq
14847;
14848; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32:
14849; AVX512VPOPCNTDQVL:       # %bb.0:
14850; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
14851; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
14852; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14853; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14854; AVX512VPOPCNTDQVL-NEXT:    retq
14855;
14856; BITALG_NOVLX-LABEL: ugt_25_v4i32:
14857; BITALG_NOVLX:       # %bb.0:
14858; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
14859; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
14860; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14861; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14862; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14863; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14864; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14865; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14866; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14867; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
14868; BITALG_NOVLX-NEXT:    vzeroupper
14869; BITALG_NOVLX-NEXT:    retq
14870;
14871; BITALG-LABEL: ugt_25_v4i32:
14872; BITALG:       # %bb.0:
14873; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
14874; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
14875; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14876; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
14877; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14878; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
14879; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
14880; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
14881; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
14882; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14883; BITALG-NEXT:    retq
14884  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14885  %3 = icmp ugt <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
14886  %4 = sext <4 x i1> %3 to <4 x i32>
14887  ret <4 x i32> %4
14888}
14889
14890define <4 x i32> @ult_26_v4i32(<4 x i32> %0) {
14891; SSE2-LABEL: ult_26_v4i32:
14892; SSE2:       # %bb.0:
14893; SSE2-NEXT:    movdqa %xmm0, %xmm1
14894; SSE2-NEXT:    psrlw $1, %xmm1
14895; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14896; SSE2-NEXT:    psubb %xmm1, %xmm0
14897; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14898; SSE2-NEXT:    movdqa %xmm0, %xmm2
14899; SSE2-NEXT:    pand %xmm1, %xmm2
14900; SSE2-NEXT:    psrlw $2, %xmm0
14901; SSE2-NEXT:    pand %xmm1, %xmm0
14902; SSE2-NEXT:    paddb %xmm2, %xmm0
14903; SSE2-NEXT:    movdqa %xmm0, %xmm1
14904; SSE2-NEXT:    psrlw $4, %xmm1
14905; SSE2-NEXT:    paddb %xmm0, %xmm1
14906; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
14907; SSE2-NEXT:    pxor %xmm0, %xmm0
14908; SSE2-NEXT:    movdqa %xmm1, %xmm2
14909; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14910; SSE2-NEXT:    psadbw %xmm0, %xmm2
14911; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14912; SSE2-NEXT:    psadbw %xmm0, %xmm1
14913; SSE2-NEXT:    packuswb %xmm2, %xmm1
14914; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [26,26,26,26]
14915; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
14916; SSE2-NEXT:    retq
14917;
14918; SSE3-LABEL: ult_26_v4i32:
14919; SSE3:       # %bb.0:
14920; SSE3-NEXT:    movdqa %xmm0, %xmm1
14921; SSE3-NEXT:    psrlw $1, %xmm1
14922; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14923; SSE3-NEXT:    psubb %xmm1, %xmm0
14924; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14925; SSE3-NEXT:    movdqa %xmm0, %xmm2
14926; SSE3-NEXT:    pand %xmm1, %xmm2
14927; SSE3-NEXT:    psrlw $2, %xmm0
14928; SSE3-NEXT:    pand %xmm1, %xmm0
14929; SSE3-NEXT:    paddb %xmm2, %xmm0
14930; SSE3-NEXT:    movdqa %xmm0, %xmm1
14931; SSE3-NEXT:    psrlw $4, %xmm1
14932; SSE3-NEXT:    paddb %xmm0, %xmm1
14933; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
14934; SSE3-NEXT:    pxor %xmm0, %xmm0
14935; SSE3-NEXT:    movdqa %xmm1, %xmm2
14936; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14937; SSE3-NEXT:    psadbw %xmm0, %xmm2
14938; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14939; SSE3-NEXT:    psadbw %xmm0, %xmm1
14940; SSE3-NEXT:    packuswb %xmm2, %xmm1
14941; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [26,26,26,26]
14942; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
14943; SSE3-NEXT:    retq
14944;
14945; SSSE3-LABEL: ult_26_v4i32:
14946; SSSE3:       # %bb.0:
14947; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14948; SSSE3-NEXT:    movdqa %xmm0, %xmm2
14949; SSSE3-NEXT:    pand %xmm1, %xmm2
14950; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14951; SSSE3-NEXT:    movdqa %xmm3, %xmm4
14952; SSSE3-NEXT:    pshufb %xmm2, %xmm4
14953; SSSE3-NEXT:    psrlw $4, %xmm0
14954; SSSE3-NEXT:    pand %xmm1, %xmm0
14955; SSSE3-NEXT:    pshufb %xmm0, %xmm3
14956; SSSE3-NEXT:    paddb %xmm4, %xmm3
14957; SSSE3-NEXT:    pxor %xmm0, %xmm0
14958; SSSE3-NEXT:    movdqa %xmm3, %xmm1
14959; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14960; SSSE3-NEXT:    psadbw %xmm0, %xmm1
14961; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14962; SSSE3-NEXT:    psadbw %xmm0, %xmm3
14963; SSSE3-NEXT:    packuswb %xmm1, %xmm3
14964; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [26,26,26,26]
14965; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
14966; SSSE3-NEXT:    retq
14967;
14968; SSE41-LABEL: ult_26_v4i32:
14969; SSE41:       # %bb.0:
14970; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14971; SSE41-NEXT:    movdqa %xmm0, %xmm2
14972; SSE41-NEXT:    pand %xmm1, %xmm2
14973; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14974; SSE41-NEXT:    movdqa %xmm3, %xmm4
14975; SSE41-NEXT:    pshufb %xmm2, %xmm4
14976; SSE41-NEXT:    psrlw $4, %xmm0
14977; SSE41-NEXT:    pand %xmm1, %xmm0
14978; SSE41-NEXT:    pshufb %xmm0, %xmm3
14979; SSE41-NEXT:    paddb %xmm4, %xmm3
14980; SSE41-NEXT:    pxor %xmm0, %xmm0
14981; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14982; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14983; SSE41-NEXT:    psadbw %xmm0, %xmm3
14984; SSE41-NEXT:    psadbw %xmm0, %xmm1
14985; SSE41-NEXT:    packuswb %xmm3, %xmm1
14986; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [26,26,26,26]
14987; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
14988; SSE41-NEXT:    retq
14989;
14990; AVX1-LABEL: ult_26_v4i32:
14991; AVX1:       # %bb.0:
14992; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14993; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
14994; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14995; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
14996; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
14997; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
14998; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
14999; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15000; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15001; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15002; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15003; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15004; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15005; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15006; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [26,26,26,26]
15007; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15008; AVX1-NEXT:    retq
15009;
15010; AVX2-LABEL: ult_26_v4i32:
15011; AVX2:       # %bb.0:
15012; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15013; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
15014; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15015; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15016; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
15017; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
15018; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15019; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15020; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15021; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15022; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15023; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15024; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15025; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15026; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15027; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15028; AVX2-NEXT:    retq
15029;
15030; AVX512VPOPCNTDQ-LABEL: ult_26_v4i32:
15031; AVX512VPOPCNTDQ:       # %bb.0:
15032; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15033; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
15034; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15035; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15036; AVX512VPOPCNTDQ-NEXT:    vzeroupper
15037; AVX512VPOPCNTDQ-NEXT:    retq
15038;
15039; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32:
15040; AVX512VPOPCNTDQVL:       # %bb.0:
15041; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
15042; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
15043; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15044; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15045; AVX512VPOPCNTDQVL-NEXT:    retq
15046;
15047; BITALG_NOVLX-LABEL: ult_26_v4i32:
15048; BITALG_NOVLX:       # %bb.0:
15049; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15050; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
15051; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15052; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15053; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15054; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15055; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15056; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15057; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15058; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15059; BITALG_NOVLX-NEXT:    vzeroupper
15060; BITALG_NOVLX-NEXT:    retq
15061;
15062; BITALG-LABEL: ult_26_v4i32:
15063; BITALG:       # %bb.0:
15064; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
15065; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15066; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15067; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15068; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15069; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15070; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15071; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
15072; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15073; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15074; BITALG-NEXT:    retq
15075  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15076  %3 = icmp ult <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
15077  %4 = sext <4 x i1> %3 to <4 x i32>
15078  ret <4 x i32> %4
15079}
15080
15081define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) {
15082; SSE2-LABEL: ugt_26_v4i32:
15083; SSE2:       # %bb.0:
15084; SSE2-NEXT:    movdqa %xmm0, %xmm1
15085; SSE2-NEXT:    psrlw $1, %xmm1
15086; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15087; SSE2-NEXT:    psubb %xmm1, %xmm0
15088; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15089; SSE2-NEXT:    movdqa %xmm0, %xmm2
15090; SSE2-NEXT:    pand %xmm1, %xmm2
15091; SSE2-NEXT:    psrlw $2, %xmm0
15092; SSE2-NEXT:    pand %xmm1, %xmm0
15093; SSE2-NEXT:    paddb %xmm2, %xmm0
15094; SSE2-NEXT:    movdqa %xmm0, %xmm1
15095; SSE2-NEXT:    psrlw $4, %xmm1
15096; SSE2-NEXT:    paddb %xmm0, %xmm1
15097; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15098; SSE2-NEXT:    pxor %xmm0, %xmm0
15099; SSE2-NEXT:    movdqa %xmm1, %xmm2
15100; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15101; SSE2-NEXT:    psadbw %xmm0, %xmm2
15102; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15103; SSE2-NEXT:    psadbw %xmm0, %xmm1
15104; SSE2-NEXT:    packuswb %xmm2, %xmm1
15105; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15106; SSE2-NEXT:    movdqa %xmm1, %xmm0
15107; SSE2-NEXT:    retq
15108;
15109; SSE3-LABEL: ugt_26_v4i32:
15110; SSE3:       # %bb.0:
15111; SSE3-NEXT:    movdqa %xmm0, %xmm1
15112; SSE3-NEXT:    psrlw $1, %xmm1
15113; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15114; SSE3-NEXT:    psubb %xmm1, %xmm0
15115; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15116; SSE3-NEXT:    movdqa %xmm0, %xmm2
15117; SSE3-NEXT:    pand %xmm1, %xmm2
15118; SSE3-NEXT:    psrlw $2, %xmm0
15119; SSE3-NEXT:    pand %xmm1, %xmm0
15120; SSE3-NEXT:    paddb %xmm2, %xmm0
15121; SSE3-NEXT:    movdqa %xmm0, %xmm1
15122; SSE3-NEXT:    psrlw $4, %xmm1
15123; SSE3-NEXT:    paddb %xmm0, %xmm1
15124; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15125; SSE3-NEXT:    pxor %xmm0, %xmm0
15126; SSE3-NEXT:    movdqa %xmm1, %xmm2
15127; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15128; SSE3-NEXT:    psadbw %xmm0, %xmm2
15129; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15130; SSE3-NEXT:    psadbw %xmm0, %xmm1
15131; SSE3-NEXT:    packuswb %xmm2, %xmm1
15132; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15133; SSE3-NEXT:    movdqa %xmm1, %xmm0
15134; SSE3-NEXT:    retq
15135;
15136; SSSE3-LABEL: ugt_26_v4i32:
15137; SSSE3:       # %bb.0:
15138; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15139; SSSE3-NEXT:    movdqa %xmm0, %xmm3
15140; SSSE3-NEXT:    pand %xmm2, %xmm3
15141; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15142; SSSE3-NEXT:    movdqa %xmm1, %xmm4
15143; SSSE3-NEXT:    pshufb %xmm3, %xmm4
15144; SSSE3-NEXT:    psrlw $4, %xmm0
15145; SSSE3-NEXT:    pand %xmm2, %xmm0
15146; SSSE3-NEXT:    pshufb %xmm0, %xmm1
15147; SSSE3-NEXT:    paddb %xmm4, %xmm1
15148; SSSE3-NEXT:    pxor %xmm0, %xmm0
15149; SSSE3-NEXT:    movdqa %xmm1, %xmm2
15150; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15151; SSSE3-NEXT:    psadbw %xmm0, %xmm2
15152; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15153; SSSE3-NEXT:    psadbw %xmm0, %xmm1
15154; SSSE3-NEXT:    packuswb %xmm2, %xmm1
15155; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15156; SSSE3-NEXT:    movdqa %xmm1, %xmm0
15157; SSSE3-NEXT:    retq
15158;
15159; SSE41-LABEL: ugt_26_v4i32:
15160; SSE41:       # %bb.0:
15161; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15162; SSE41-NEXT:    movdqa %xmm0, %xmm2
15163; SSE41-NEXT:    pand %xmm1, %xmm2
15164; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15165; SSE41-NEXT:    movdqa %xmm3, %xmm4
15166; SSE41-NEXT:    pshufb %xmm2, %xmm4
15167; SSE41-NEXT:    psrlw $4, %xmm0
15168; SSE41-NEXT:    pand %xmm1, %xmm0
15169; SSE41-NEXT:    pshufb %xmm0, %xmm3
15170; SSE41-NEXT:    paddb %xmm4, %xmm3
15171; SSE41-NEXT:    pxor %xmm1, %xmm1
15172; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15173; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15174; SSE41-NEXT:    psadbw %xmm1, %xmm3
15175; SSE41-NEXT:    psadbw %xmm1, %xmm0
15176; SSE41-NEXT:    packuswb %xmm3, %xmm0
15177; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
15178; SSE41-NEXT:    retq
15179;
15180; AVX1-LABEL: ugt_26_v4i32:
15181; AVX1:       # %bb.0:
15182; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15183; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
15184; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15185; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15186; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
15187; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
15188; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15189; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15190; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15191; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15192; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15193; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15194; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15195; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15196; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
15197; AVX1-NEXT:    retq
15198;
15199; AVX2-LABEL: ugt_26_v4i32:
15200; AVX2:       # %bb.0:
15201; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15202; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
15203; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15204; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15205; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
15206; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
15207; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15208; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15209; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15210; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15211; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15212; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15213; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15214; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15215; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15216; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15217; AVX2-NEXT:    retq
15218;
15219; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i32:
15220; AVX512VPOPCNTDQ:       # %bb.0:
15221; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15222; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
15223; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15224; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15225; AVX512VPOPCNTDQ-NEXT:    vzeroupper
15226; AVX512VPOPCNTDQ-NEXT:    retq
15227;
15228; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32:
15229; AVX512VPOPCNTDQVL:       # %bb.0:
15230; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
15231; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
15232; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15233; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15234; AVX512VPOPCNTDQVL-NEXT:    retq
15235;
15236; BITALG_NOVLX-LABEL: ugt_26_v4i32:
15237; BITALG_NOVLX:       # %bb.0:
15238; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15239; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
15240; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15241; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15242; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15243; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15244; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15245; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15246; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15247; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15248; BITALG_NOVLX-NEXT:    vzeroupper
15249; BITALG_NOVLX-NEXT:    retq
15250;
15251; BITALG-LABEL: ugt_26_v4i32:
15252; BITALG:       # %bb.0:
15253; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
15254; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15255; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15256; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15257; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15258; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15259; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15260; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
15261; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15262; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15263; BITALG-NEXT:    retq
15264  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15265  %3 = icmp ugt <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
15266  %4 = sext <4 x i1> %3 to <4 x i32>
15267  ret <4 x i32> %4
15268}
15269
15270define <4 x i32> @ult_27_v4i32(<4 x i32> %0) {
15271; SSE2-LABEL: ult_27_v4i32:
15272; SSE2:       # %bb.0:
15273; SSE2-NEXT:    movdqa %xmm0, %xmm1
15274; SSE2-NEXT:    psrlw $1, %xmm1
15275; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15276; SSE2-NEXT:    psubb %xmm1, %xmm0
15277; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15278; SSE2-NEXT:    movdqa %xmm0, %xmm2
15279; SSE2-NEXT:    pand %xmm1, %xmm2
15280; SSE2-NEXT:    psrlw $2, %xmm0
15281; SSE2-NEXT:    pand %xmm1, %xmm0
15282; SSE2-NEXT:    paddb %xmm2, %xmm0
15283; SSE2-NEXT:    movdqa %xmm0, %xmm1
15284; SSE2-NEXT:    psrlw $4, %xmm1
15285; SSE2-NEXT:    paddb %xmm0, %xmm1
15286; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15287; SSE2-NEXT:    pxor %xmm0, %xmm0
15288; SSE2-NEXT:    movdqa %xmm1, %xmm2
15289; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15290; SSE2-NEXT:    psadbw %xmm0, %xmm2
15291; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15292; SSE2-NEXT:    psadbw %xmm0, %xmm1
15293; SSE2-NEXT:    packuswb %xmm2, %xmm1
15294; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [27,27,27,27]
15295; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
15296; SSE2-NEXT:    retq
15297;
15298; SSE3-LABEL: ult_27_v4i32:
15299; SSE3:       # %bb.0:
15300; SSE3-NEXT:    movdqa %xmm0, %xmm1
15301; SSE3-NEXT:    psrlw $1, %xmm1
15302; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15303; SSE3-NEXT:    psubb %xmm1, %xmm0
15304; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15305; SSE3-NEXT:    movdqa %xmm0, %xmm2
15306; SSE3-NEXT:    pand %xmm1, %xmm2
15307; SSE3-NEXT:    psrlw $2, %xmm0
15308; SSE3-NEXT:    pand %xmm1, %xmm0
15309; SSE3-NEXT:    paddb %xmm2, %xmm0
15310; SSE3-NEXT:    movdqa %xmm0, %xmm1
15311; SSE3-NEXT:    psrlw $4, %xmm1
15312; SSE3-NEXT:    paddb %xmm0, %xmm1
15313; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15314; SSE3-NEXT:    pxor %xmm0, %xmm0
15315; SSE3-NEXT:    movdqa %xmm1, %xmm2
15316; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15317; SSE3-NEXT:    psadbw %xmm0, %xmm2
15318; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15319; SSE3-NEXT:    psadbw %xmm0, %xmm1
15320; SSE3-NEXT:    packuswb %xmm2, %xmm1
15321; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [27,27,27,27]
15322; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
15323; SSE3-NEXT:    retq
15324;
15325; SSSE3-LABEL: ult_27_v4i32:
15326; SSSE3:       # %bb.0:
15327; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15328; SSSE3-NEXT:    movdqa %xmm0, %xmm2
15329; SSSE3-NEXT:    pand %xmm1, %xmm2
15330; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15331; SSSE3-NEXT:    movdqa %xmm3, %xmm4
15332; SSSE3-NEXT:    pshufb %xmm2, %xmm4
15333; SSSE3-NEXT:    psrlw $4, %xmm0
15334; SSSE3-NEXT:    pand %xmm1, %xmm0
15335; SSSE3-NEXT:    pshufb %xmm0, %xmm3
15336; SSSE3-NEXT:    paddb %xmm4, %xmm3
15337; SSSE3-NEXT:    pxor %xmm0, %xmm0
15338; SSSE3-NEXT:    movdqa %xmm3, %xmm1
15339; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15340; SSSE3-NEXT:    psadbw %xmm0, %xmm1
15341; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15342; SSSE3-NEXT:    psadbw %xmm0, %xmm3
15343; SSSE3-NEXT:    packuswb %xmm1, %xmm3
15344; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [27,27,27,27]
15345; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
15346; SSSE3-NEXT:    retq
15347;
15348; SSE41-LABEL: ult_27_v4i32:
15349; SSE41:       # %bb.0:
15350; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15351; SSE41-NEXT:    movdqa %xmm0, %xmm2
15352; SSE41-NEXT:    pand %xmm1, %xmm2
15353; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15354; SSE41-NEXT:    movdqa %xmm3, %xmm4
15355; SSE41-NEXT:    pshufb %xmm2, %xmm4
15356; SSE41-NEXT:    psrlw $4, %xmm0
15357; SSE41-NEXT:    pand %xmm1, %xmm0
15358; SSE41-NEXT:    pshufb %xmm0, %xmm3
15359; SSE41-NEXT:    paddb %xmm4, %xmm3
15360; SSE41-NEXT:    pxor %xmm0, %xmm0
15361; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15362; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15363; SSE41-NEXT:    psadbw %xmm0, %xmm3
15364; SSE41-NEXT:    psadbw %xmm0, %xmm1
15365; SSE41-NEXT:    packuswb %xmm3, %xmm1
15366; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [27,27,27,27]
15367; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
15368; SSE41-NEXT:    retq
15369;
15370; AVX1-LABEL: ult_27_v4i32:
15371; AVX1:       # %bb.0:
15372; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15373; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
15374; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15375; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15376; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
15377; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
15378; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15379; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15380; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15381; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15382; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15383; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15384; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15385; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15386; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [27,27,27,27]
15387; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15388; AVX1-NEXT:    retq
15389;
15390; AVX2-LABEL: ult_27_v4i32:
15391; AVX2:       # %bb.0:
15392; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15393; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
15394; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15395; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15396; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
15397; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
15398; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15399; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15400; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15401; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15402; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15403; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15404; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15405; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15406; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15407; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15408; AVX2-NEXT:    retq
15409;
15410; AVX512VPOPCNTDQ-LABEL: ult_27_v4i32:
15411; AVX512VPOPCNTDQ:       # %bb.0:
15412; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15413; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
15414; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15415; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15416; AVX512VPOPCNTDQ-NEXT:    vzeroupper
15417; AVX512VPOPCNTDQ-NEXT:    retq
15418;
15419; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32:
15420; AVX512VPOPCNTDQVL:       # %bb.0:
15421; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
15422; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
15423; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15424; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15425; AVX512VPOPCNTDQVL-NEXT:    retq
15426;
15427; BITALG_NOVLX-LABEL: ult_27_v4i32:
15428; BITALG_NOVLX:       # %bb.0:
15429; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15430; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
15431; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15432; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15433; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15434; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15435; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15436; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15437; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15438; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15439; BITALG_NOVLX-NEXT:    vzeroupper
15440; BITALG_NOVLX-NEXT:    retq
15441;
15442; BITALG-LABEL: ult_27_v4i32:
15443; BITALG:       # %bb.0:
15444; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
15445; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15446; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15447; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15448; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15449; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15450; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15451; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
15452; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15453; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15454; BITALG-NEXT:    retq
15455  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15456  %3 = icmp ult <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
15457  %4 = sext <4 x i1> %3 to <4 x i32>
15458  ret <4 x i32> %4
15459}
15460
15461define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) {
15462; SSE2-LABEL: ugt_27_v4i32:
15463; SSE2:       # %bb.0:
15464; SSE2-NEXT:    movdqa %xmm0, %xmm1
15465; SSE2-NEXT:    psrlw $1, %xmm1
15466; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15467; SSE2-NEXT:    psubb %xmm1, %xmm0
15468; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15469; SSE2-NEXT:    movdqa %xmm0, %xmm2
15470; SSE2-NEXT:    pand %xmm1, %xmm2
15471; SSE2-NEXT:    psrlw $2, %xmm0
15472; SSE2-NEXT:    pand %xmm1, %xmm0
15473; SSE2-NEXT:    paddb %xmm2, %xmm0
15474; SSE2-NEXT:    movdqa %xmm0, %xmm1
15475; SSE2-NEXT:    psrlw $4, %xmm1
15476; SSE2-NEXT:    paddb %xmm0, %xmm1
15477; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15478; SSE2-NEXT:    pxor %xmm0, %xmm0
15479; SSE2-NEXT:    movdqa %xmm1, %xmm2
15480; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15481; SSE2-NEXT:    psadbw %xmm0, %xmm2
15482; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15483; SSE2-NEXT:    psadbw %xmm0, %xmm1
15484; SSE2-NEXT:    packuswb %xmm2, %xmm1
15485; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15486; SSE2-NEXT:    movdqa %xmm1, %xmm0
15487; SSE2-NEXT:    retq
15488;
15489; SSE3-LABEL: ugt_27_v4i32:
15490; SSE3:       # %bb.0:
15491; SSE3-NEXT:    movdqa %xmm0, %xmm1
15492; SSE3-NEXT:    psrlw $1, %xmm1
15493; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15494; SSE3-NEXT:    psubb %xmm1, %xmm0
15495; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15496; SSE3-NEXT:    movdqa %xmm0, %xmm2
15497; SSE3-NEXT:    pand %xmm1, %xmm2
15498; SSE3-NEXT:    psrlw $2, %xmm0
15499; SSE3-NEXT:    pand %xmm1, %xmm0
15500; SSE3-NEXT:    paddb %xmm2, %xmm0
15501; SSE3-NEXT:    movdqa %xmm0, %xmm1
15502; SSE3-NEXT:    psrlw $4, %xmm1
15503; SSE3-NEXT:    paddb %xmm0, %xmm1
15504; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15505; SSE3-NEXT:    pxor %xmm0, %xmm0
15506; SSE3-NEXT:    movdqa %xmm1, %xmm2
15507; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15508; SSE3-NEXT:    psadbw %xmm0, %xmm2
15509; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15510; SSE3-NEXT:    psadbw %xmm0, %xmm1
15511; SSE3-NEXT:    packuswb %xmm2, %xmm1
15512; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15513; SSE3-NEXT:    movdqa %xmm1, %xmm0
15514; SSE3-NEXT:    retq
15515;
15516; SSSE3-LABEL: ugt_27_v4i32:
15517; SSSE3:       # %bb.0:
15518; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15519; SSSE3-NEXT:    movdqa %xmm0, %xmm3
15520; SSSE3-NEXT:    pand %xmm2, %xmm3
15521; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15522; SSSE3-NEXT:    movdqa %xmm1, %xmm4
15523; SSSE3-NEXT:    pshufb %xmm3, %xmm4
15524; SSSE3-NEXT:    psrlw $4, %xmm0
15525; SSSE3-NEXT:    pand %xmm2, %xmm0
15526; SSSE3-NEXT:    pshufb %xmm0, %xmm1
15527; SSSE3-NEXT:    paddb %xmm4, %xmm1
15528; SSSE3-NEXT:    pxor %xmm0, %xmm0
15529; SSSE3-NEXT:    movdqa %xmm1, %xmm2
15530; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15531; SSSE3-NEXT:    psadbw %xmm0, %xmm2
15532; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15533; SSSE3-NEXT:    psadbw %xmm0, %xmm1
15534; SSSE3-NEXT:    packuswb %xmm2, %xmm1
15535; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15536; SSSE3-NEXT:    movdqa %xmm1, %xmm0
15537; SSSE3-NEXT:    retq
15538;
15539; SSE41-LABEL: ugt_27_v4i32:
15540; SSE41:       # %bb.0:
15541; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15542; SSE41-NEXT:    movdqa %xmm0, %xmm2
15543; SSE41-NEXT:    pand %xmm1, %xmm2
15544; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15545; SSE41-NEXT:    movdqa %xmm3, %xmm4
15546; SSE41-NEXT:    pshufb %xmm2, %xmm4
15547; SSE41-NEXT:    psrlw $4, %xmm0
15548; SSE41-NEXT:    pand %xmm1, %xmm0
15549; SSE41-NEXT:    pshufb %xmm0, %xmm3
15550; SSE41-NEXT:    paddb %xmm4, %xmm3
15551; SSE41-NEXT:    pxor %xmm1, %xmm1
15552; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15553; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15554; SSE41-NEXT:    psadbw %xmm1, %xmm3
15555; SSE41-NEXT:    psadbw %xmm1, %xmm0
15556; SSE41-NEXT:    packuswb %xmm3, %xmm0
15557; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
15558; SSE41-NEXT:    retq
15559;
15560; AVX1-LABEL: ugt_27_v4i32:
15561; AVX1:       # %bb.0:
15562; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15563; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
15564; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15565; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15566; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
15567; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
15568; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15569; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15570; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15571; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15572; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15573; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15574; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15575; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15576; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
15577; AVX1-NEXT:    retq
15578;
15579; AVX2-LABEL: ugt_27_v4i32:
15580; AVX2:       # %bb.0:
15581; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15582; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
15583; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15584; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15585; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
15586; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
15587; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15588; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15589; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15590; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15591; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15592; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15593; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15594; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15595; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15596; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15597; AVX2-NEXT:    retq
15598;
15599; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i32:
15600; AVX512VPOPCNTDQ:       # %bb.0:
15601; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15602; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
15603; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15604; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15605; AVX512VPOPCNTDQ-NEXT:    vzeroupper
15606; AVX512VPOPCNTDQ-NEXT:    retq
15607;
15608; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32:
15609; AVX512VPOPCNTDQVL:       # %bb.0:
15610; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
15611; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
15612; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15613; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15614; AVX512VPOPCNTDQVL-NEXT:    retq
15615;
15616; BITALG_NOVLX-LABEL: ugt_27_v4i32:
15617; BITALG_NOVLX:       # %bb.0:
15618; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15619; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
15620; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15621; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15622; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15623; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15624; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15625; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15626; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15627; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15628; BITALG_NOVLX-NEXT:    vzeroupper
15629; BITALG_NOVLX-NEXT:    retq
15630;
15631; BITALG-LABEL: ugt_27_v4i32:
15632; BITALG:       # %bb.0:
15633; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
15634; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15635; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15636; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15637; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15638; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15639; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15640; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
15641; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15642; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15643; BITALG-NEXT:    retq
15644  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15645  %3 = icmp ugt <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
15646  %4 = sext <4 x i1> %3 to <4 x i32>
15647  ret <4 x i32> %4
15648}
15649
15650define <4 x i32> @ult_28_v4i32(<4 x i32> %0) {
15651; SSE2-LABEL: ult_28_v4i32:
15652; SSE2:       # %bb.0:
15653; SSE2-NEXT:    movdqa %xmm0, %xmm1
15654; SSE2-NEXT:    psrlw $1, %xmm1
15655; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15656; SSE2-NEXT:    psubb %xmm1, %xmm0
15657; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15658; SSE2-NEXT:    movdqa %xmm0, %xmm2
15659; SSE2-NEXT:    pand %xmm1, %xmm2
15660; SSE2-NEXT:    psrlw $2, %xmm0
15661; SSE2-NEXT:    pand %xmm1, %xmm0
15662; SSE2-NEXT:    paddb %xmm2, %xmm0
15663; SSE2-NEXT:    movdqa %xmm0, %xmm1
15664; SSE2-NEXT:    psrlw $4, %xmm1
15665; SSE2-NEXT:    paddb %xmm0, %xmm1
15666; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15667; SSE2-NEXT:    pxor %xmm0, %xmm0
15668; SSE2-NEXT:    movdqa %xmm1, %xmm2
15669; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15670; SSE2-NEXT:    psadbw %xmm0, %xmm2
15671; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15672; SSE2-NEXT:    psadbw %xmm0, %xmm1
15673; SSE2-NEXT:    packuswb %xmm2, %xmm1
15674; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [28,28,28,28]
15675; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
15676; SSE2-NEXT:    retq
15677;
15678; SSE3-LABEL: ult_28_v4i32:
15679; SSE3:       # %bb.0:
15680; SSE3-NEXT:    movdqa %xmm0, %xmm1
15681; SSE3-NEXT:    psrlw $1, %xmm1
15682; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15683; SSE3-NEXT:    psubb %xmm1, %xmm0
15684; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15685; SSE3-NEXT:    movdqa %xmm0, %xmm2
15686; SSE3-NEXT:    pand %xmm1, %xmm2
15687; SSE3-NEXT:    psrlw $2, %xmm0
15688; SSE3-NEXT:    pand %xmm1, %xmm0
15689; SSE3-NEXT:    paddb %xmm2, %xmm0
15690; SSE3-NEXT:    movdqa %xmm0, %xmm1
15691; SSE3-NEXT:    psrlw $4, %xmm1
15692; SSE3-NEXT:    paddb %xmm0, %xmm1
15693; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15694; SSE3-NEXT:    pxor %xmm0, %xmm0
15695; SSE3-NEXT:    movdqa %xmm1, %xmm2
15696; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15697; SSE3-NEXT:    psadbw %xmm0, %xmm2
15698; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15699; SSE3-NEXT:    psadbw %xmm0, %xmm1
15700; SSE3-NEXT:    packuswb %xmm2, %xmm1
15701; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [28,28,28,28]
15702; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
15703; SSE3-NEXT:    retq
15704;
15705; SSSE3-LABEL: ult_28_v4i32:
15706; SSSE3:       # %bb.0:
15707; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15708; SSSE3-NEXT:    movdqa %xmm0, %xmm2
15709; SSSE3-NEXT:    pand %xmm1, %xmm2
15710; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15711; SSSE3-NEXT:    movdqa %xmm3, %xmm4
15712; SSSE3-NEXT:    pshufb %xmm2, %xmm4
15713; SSSE3-NEXT:    psrlw $4, %xmm0
15714; SSSE3-NEXT:    pand %xmm1, %xmm0
15715; SSSE3-NEXT:    pshufb %xmm0, %xmm3
15716; SSSE3-NEXT:    paddb %xmm4, %xmm3
15717; SSSE3-NEXT:    pxor %xmm0, %xmm0
15718; SSSE3-NEXT:    movdqa %xmm3, %xmm1
15719; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15720; SSSE3-NEXT:    psadbw %xmm0, %xmm1
15721; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15722; SSSE3-NEXT:    psadbw %xmm0, %xmm3
15723; SSSE3-NEXT:    packuswb %xmm1, %xmm3
15724; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [28,28,28,28]
15725; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
15726; SSSE3-NEXT:    retq
15727;
15728; SSE41-LABEL: ult_28_v4i32:
15729; SSE41:       # %bb.0:
15730; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15731; SSE41-NEXT:    movdqa %xmm0, %xmm2
15732; SSE41-NEXT:    pand %xmm1, %xmm2
15733; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15734; SSE41-NEXT:    movdqa %xmm3, %xmm4
15735; SSE41-NEXT:    pshufb %xmm2, %xmm4
15736; SSE41-NEXT:    psrlw $4, %xmm0
15737; SSE41-NEXT:    pand %xmm1, %xmm0
15738; SSE41-NEXT:    pshufb %xmm0, %xmm3
15739; SSE41-NEXT:    paddb %xmm4, %xmm3
15740; SSE41-NEXT:    pxor %xmm0, %xmm0
15741; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15742; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15743; SSE41-NEXT:    psadbw %xmm0, %xmm3
15744; SSE41-NEXT:    psadbw %xmm0, %xmm1
15745; SSE41-NEXT:    packuswb %xmm3, %xmm1
15746; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [28,28,28,28]
15747; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
15748; SSE41-NEXT:    retq
15749;
15750; AVX1-LABEL: ult_28_v4i32:
15751; AVX1:       # %bb.0:
15752; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15753; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
15754; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15755; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15756; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
15757; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
15758; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15759; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15760; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15761; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15762; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15763; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15764; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15765; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15766; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [28,28,28,28]
15767; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15768; AVX1-NEXT:    retq
15769;
15770; AVX2-LABEL: ult_28_v4i32:
15771; AVX2:       # %bb.0:
15772; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15773; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
15774; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15775; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15776; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
15777; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
15778; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15779; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15780; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15781; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15782; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15783; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15784; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15785; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15786; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15787; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15788; AVX2-NEXT:    retq
15789;
15790; AVX512VPOPCNTDQ-LABEL: ult_28_v4i32:
15791; AVX512VPOPCNTDQ:       # %bb.0:
15792; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15793; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
15794; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15795; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15796; AVX512VPOPCNTDQ-NEXT:    vzeroupper
15797; AVX512VPOPCNTDQ-NEXT:    retq
15798;
15799; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32:
15800; AVX512VPOPCNTDQVL:       # %bb.0:
15801; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
15802; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
15803; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15804; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15805; AVX512VPOPCNTDQVL-NEXT:    retq
15806;
15807; BITALG_NOVLX-LABEL: ult_28_v4i32:
15808; BITALG_NOVLX:       # %bb.0:
15809; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15810; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
15811; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15812; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15813; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15814; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15815; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15816; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15817; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15818; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
15819; BITALG_NOVLX-NEXT:    vzeroupper
15820; BITALG_NOVLX-NEXT:    retq
15821;
15822; BITALG-LABEL: ult_28_v4i32:
15823; BITALG:       # %bb.0:
15824; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
15825; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15826; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15827; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15828; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15829; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15830; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15831; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
15832; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15833; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15834; BITALG-NEXT:    retq
15835  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15836  %3 = icmp ult <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
15837  %4 = sext <4 x i1> %3 to <4 x i32>
15838  ret <4 x i32> %4
15839}
15840
15841define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) {
15842; SSE2-LABEL: ugt_28_v4i32:
15843; SSE2:       # %bb.0:
15844; SSE2-NEXT:    movdqa %xmm0, %xmm1
15845; SSE2-NEXT:    psrlw $1, %xmm1
15846; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15847; SSE2-NEXT:    psubb %xmm1, %xmm0
15848; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15849; SSE2-NEXT:    movdqa %xmm0, %xmm2
15850; SSE2-NEXT:    pand %xmm1, %xmm2
15851; SSE2-NEXT:    psrlw $2, %xmm0
15852; SSE2-NEXT:    pand %xmm1, %xmm0
15853; SSE2-NEXT:    paddb %xmm2, %xmm0
15854; SSE2-NEXT:    movdqa %xmm0, %xmm1
15855; SSE2-NEXT:    psrlw $4, %xmm1
15856; SSE2-NEXT:    paddb %xmm0, %xmm1
15857; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
15858; SSE2-NEXT:    pxor %xmm0, %xmm0
15859; SSE2-NEXT:    movdqa %xmm1, %xmm2
15860; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15861; SSE2-NEXT:    psadbw %xmm0, %xmm2
15862; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15863; SSE2-NEXT:    psadbw %xmm0, %xmm1
15864; SSE2-NEXT:    packuswb %xmm2, %xmm1
15865; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15866; SSE2-NEXT:    movdqa %xmm1, %xmm0
15867; SSE2-NEXT:    retq
15868;
15869; SSE3-LABEL: ugt_28_v4i32:
15870; SSE3:       # %bb.0:
15871; SSE3-NEXT:    movdqa %xmm0, %xmm1
15872; SSE3-NEXT:    psrlw $1, %xmm1
15873; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15874; SSE3-NEXT:    psubb %xmm1, %xmm0
15875; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15876; SSE3-NEXT:    movdqa %xmm0, %xmm2
15877; SSE3-NEXT:    pand %xmm1, %xmm2
15878; SSE3-NEXT:    psrlw $2, %xmm0
15879; SSE3-NEXT:    pand %xmm1, %xmm0
15880; SSE3-NEXT:    paddb %xmm2, %xmm0
15881; SSE3-NEXT:    movdqa %xmm0, %xmm1
15882; SSE3-NEXT:    psrlw $4, %xmm1
15883; SSE3-NEXT:    paddb %xmm0, %xmm1
15884; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
15885; SSE3-NEXT:    pxor %xmm0, %xmm0
15886; SSE3-NEXT:    movdqa %xmm1, %xmm2
15887; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15888; SSE3-NEXT:    psadbw %xmm0, %xmm2
15889; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15890; SSE3-NEXT:    psadbw %xmm0, %xmm1
15891; SSE3-NEXT:    packuswb %xmm2, %xmm1
15892; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15893; SSE3-NEXT:    movdqa %xmm1, %xmm0
15894; SSE3-NEXT:    retq
15895;
15896; SSSE3-LABEL: ugt_28_v4i32:
15897; SSSE3:       # %bb.0:
15898; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15899; SSSE3-NEXT:    movdqa %xmm0, %xmm3
15900; SSSE3-NEXT:    pand %xmm2, %xmm3
15901; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15902; SSSE3-NEXT:    movdqa %xmm1, %xmm4
15903; SSSE3-NEXT:    pshufb %xmm3, %xmm4
15904; SSSE3-NEXT:    psrlw $4, %xmm0
15905; SSSE3-NEXT:    pand %xmm2, %xmm0
15906; SSSE3-NEXT:    pshufb %xmm0, %xmm1
15907; SSSE3-NEXT:    paddb %xmm4, %xmm1
15908; SSSE3-NEXT:    pxor %xmm0, %xmm0
15909; SSSE3-NEXT:    movdqa %xmm1, %xmm2
15910; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15911; SSSE3-NEXT:    psadbw %xmm0, %xmm2
15912; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15913; SSSE3-NEXT:    psadbw %xmm0, %xmm1
15914; SSSE3-NEXT:    packuswb %xmm2, %xmm1
15915; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
15916; SSSE3-NEXT:    movdqa %xmm1, %xmm0
15917; SSSE3-NEXT:    retq
15918;
15919; SSE41-LABEL: ugt_28_v4i32:
15920; SSE41:       # %bb.0:
15921; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15922; SSE41-NEXT:    movdqa %xmm0, %xmm2
15923; SSE41-NEXT:    pand %xmm1, %xmm2
15924; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15925; SSE41-NEXT:    movdqa %xmm3, %xmm4
15926; SSE41-NEXT:    pshufb %xmm2, %xmm4
15927; SSE41-NEXT:    psrlw $4, %xmm0
15928; SSE41-NEXT:    pand %xmm1, %xmm0
15929; SSE41-NEXT:    pshufb %xmm0, %xmm3
15930; SSE41-NEXT:    paddb %xmm4, %xmm3
15931; SSE41-NEXT:    pxor %xmm1, %xmm1
15932; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15933; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15934; SSE41-NEXT:    psadbw %xmm1, %xmm3
15935; SSE41-NEXT:    psadbw %xmm1, %xmm0
15936; SSE41-NEXT:    packuswb %xmm3, %xmm0
15937; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
15938; SSE41-NEXT:    retq
15939;
15940; AVX1-LABEL: ugt_28_v4i32:
15941; AVX1:       # %bb.0:
15942; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15943; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
15944; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15945; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15946; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
15947; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
15948; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15949; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15950; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15951; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15952; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15953; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15954; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15955; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15956; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
15957; AVX1-NEXT:    retq
15958;
15959; AVX2-LABEL: ugt_28_v4i32:
15960; AVX2:       # %bb.0:
15961; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15962; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
15963; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15964; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
15965; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
15966; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
15967; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
15968; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
15969; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
15970; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15971; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
15972; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15973; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
15974; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
15975; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15976; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15977; AVX2-NEXT:    retq
15978;
15979; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i32:
15980; AVX512VPOPCNTDQ:       # %bb.0:
15981; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15982; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
15983; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15984; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
15985; AVX512VPOPCNTDQ-NEXT:    vzeroupper
15986; AVX512VPOPCNTDQ-NEXT:    retq
15987;
15988; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32:
15989; AVX512VPOPCNTDQVL:       # %bb.0:
15990; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
15991; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
15992; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
15993; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15994; AVX512VPOPCNTDQVL-NEXT:    retq
15995;
15996; BITALG_NOVLX-LABEL: ugt_28_v4i32:
15997; BITALG_NOVLX:       # %bb.0:
15998; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15999; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
16000; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16001; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16002; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16003; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16004; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16005; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16006; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
16007; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16008; BITALG_NOVLX-NEXT:    vzeroupper
16009; BITALG_NOVLX-NEXT:    retq
16010;
16011; BITALG-LABEL: ugt_28_v4i32:
16012; BITALG:       # %bb.0:
16013; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
16014; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16015; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16016; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16017; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16018; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16019; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16020; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
16021; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16022; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16023; BITALG-NEXT:    retq
16024  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16025  %3 = icmp ugt <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
16026  %4 = sext <4 x i1> %3 to <4 x i32>
16027  ret <4 x i32> %4
16028}
16029
16030define <4 x i32> @ult_29_v4i32(<4 x i32> %0) {
16031; SSE2-LABEL: ult_29_v4i32:
16032; SSE2:       # %bb.0:
16033; SSE2-NEXT:    movdqa %xmm0, %xmm1
16034; SSE2-NEXT:    psrlw $1, %xmm1
16035; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16036; SSE2-NEXT:    psubb %xmm1, %xmm0
16037; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16038; SSE2-NEXT:    movdqa %xmm0, %xmm2
16039; SSE2-NEXT:    pand %xmm1, %xmm2
16040; SSE2-NEXT:    psrlw $2, %xmm0
16041; SSE2-NEXT:    pand %xmm1, %xmm0
16042; SSE2-NEXT:    paddb %xmm2, %xmm0
16043; SSE2-NEXT:    movdqa %xmm0, %xmm1
16044; SSE2-NEXT:    psrlw $4, %xmm1
16045; SSE2-NEXT:    paddb %xmm0, %xmm1
16046; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16047; SSE2-NEXT:    pxor %xmm0, %xmm0
16048; SSE2-NEXT:    movdqa %xmm1, %xmm2
16049; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16050; SSE2-NEXT:    psadbw %xmm0, %xmm2
16051; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16052; SSE2-NEXT:    psadbw %xmm0, %xmm1
16053; SSE2-NEXT:    packuswb %xmm2, %xmm1
16054; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [29,29,29,29]
16055; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
16056; SSE2-NEXT:    retq
16057;
16058; SSE3-LABEL: ult_29_v4i32:
16059; SSE3:       # %bb.0:
16060; SSE3-NEXT:    movdqa %xmm0, %xmm1
16061; SSE3-NEXT:    psrlw $1, %xmm1
16062; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16063; SSE3-NEXT:    psubb %xmm1, %xmm0
16064; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16065; SSE3-NEXT:    movdqa %xmm0, %xmm2
16066; SSE3-NEXT:    pand %xmm1, %xmm2
16067; SSE3-NEXT:    psrlw $2, %xmm0
16068; SSE3-NEXT:    pand %xmm1, %xmm0
16069; SSE3-NEXT:    paddb %xmm2, %xmm0
16070; SSE3-NEXT:    movdqa %xmm0, %xmm1
16071; SSE3-NEXT:    psrlw $4, %xmm1
16072; SSE3-NEXT:    paddb %xmm0, %xmm1
16073; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16074; SSE3-NEXT:    pxor %xmm0, %xmm0
16075; SSE3-NEXT:    movdqa %xmm1, %xmm2
16076; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16077; SSE3-NEXT:    psadbw %xmm0, %xmm2
16078; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16079; SSE3-NEXT:    psadbw %xmm0, %xmm1
16080; SSE3-NEXT:    packuswb %xmm2, %xmm1
16081; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [29,29,29,29]
16082; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
16083; SSE3-NEXT:    retq
16084;
16085; SSSE3-LABEL: ult_29_v4i32:
16086; SSSE3:       # %bb.0:
16087; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16088; SSSE3-NEXT:    movdqa %xmm0, %xmm2
16089; SSSE3-NEXT:    pand %xmm1, %xmm2
16090; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16091; SSSE3-NEXT:    movdqa %xmm3, %xmm4
16092; SSSE3-NEXT:    pshufb %xmm2, %xmm4
16093; SSSE3-NEXT:    psrlw $4, %xmm0
16094; SSSE3-NEXT:    pand %xmm1, %xmm0
16095; SSSE3-NEXT:    pshufb %xmm0, %xmm3
16096; SSSE3-NEXT:    paddb %xmm4, %xmm3
16097; SSSE3-NEXT:    pxor %xmm0, %xmm0
16098; SSSE3-NEXT:    movdqa %xmm3, %xmm1
16099; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16100; SSSE3-NEXT:    psadbw %xmm0, %xmm1
16101; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16102; SSSE3-NEXT:    psadbw %xmm0, %xmm3
16103; SSSE3-NEXT:    packuswb %xmm1, %xmm3
16104; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [29,29,29,29]
16105; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
16106; SSSE3-NEXT:    retq
16107;
16108; SSE41-LABEL: ult_29_v4i32:
16109; SSE41:       # %bb.0:
16110; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16111; SSE41-NEXT:    movdqa %xmm0, %xmm2
16112; SSE41-NEXT:    pand %xmm1, %xmm2
16113; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16114; SSE41-NEXT:    movdqa %xmm3, %xmm4
16115; SSE41-NEXT:    pshufb %xmm2, %xmm4
16116; SSE41-NEXT:    psrlw $4, %xmm0
16117; SSE41-NEXT:    pand %xmm1, %xmm0
16118; SSE41-NEXT:    pshufb %xmm0, %xmm3
16119; SSE41-NEXT:    paddb %xmm4, %xmm3
16120; SSE41-NEXT:    pxor %xmm0, %xmm0
16121; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16122; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16123; SSE41-NEXT:    psadbw %xmm0, %xmm3
16124; SSE41-NEXT:    psadbw %xmm0, %xmm1
16125; SSE41-NEXT:    packuswb %xmm3, %xmm1
16126; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [29,29,29,29]
16127; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
16128; SSE41-NEXT:    retq
16129;
16130; AVX1-LABEL: ult_29_v4i32:
16131; AVX1:       # %bb.0:
16132; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16133; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
16134; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16135; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16136; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
16137; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
16138; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16139; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16140; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16141; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16142; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16143; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16144; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16145; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16146; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [29,29,29,29]
16147; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16148; AVX1-NEXT:    retq
16149;
16150; AVX2-LABEL: ult_29_v4i32:
16151; AVX2:       # %bb.0:
16152; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16153; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
16154; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16155; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16156; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
16157; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
16158; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16159; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16160; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16161; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16162; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16163; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16164; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16165; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16166; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16167; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16168; AVX2-NEXT:    retq
16169;
16170; AVX512VPOPCNTDQ-LABEL: ult_29_v4i32:
16171; AVX512VPOPCNTDQ:       # %bb.0:
16172; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16173; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
16174; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16175; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16176; AVX512VPOPCNTDQ-NEXT:    vzeroupper
16177; AVX512VPOPCNTDQ-NEXT:    retq
16178;
16179; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32:
16180; AVX512VPOPCNTDQVL:       # %bb.0:
16181; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
16182; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
16183; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16184; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16185; AVX512VPOPCNTDQVL-NEXT:    retq
16186;
16187; BITALG_NOVLX-LABEL: ult_29_v4i32:
16188; BITALG_NOVLX:       # %bb.0:
16189; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16190; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
16191; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16192; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16193; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16194; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16195; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16196; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16197; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16198; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16199; BITALG_NOVLX-NEXT:    vzeroupper
16200; BITALG_NOVLX-NEXT:    retq
16201;
16202; BITALG-LABEL: ult_29_v4i32:
16203; BITALG:       # %bb.0:
16204; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
16205; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16206; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16207; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16208; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16209; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16210; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16211; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
16212; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16213; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16214; BITALG-NEXT:    retq
16215  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16216  %3 = icmp ult <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
16217  %4 = sext <4 x i1> %3 to <4 x i32>
16218  ret <4 x i32> %4
16219}
16220
16221define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) {
16222; SSE2-LABEL: ugt_29_v4i32:
16223; SSE2:       # %bb.0:
16224; SSE2-NEXT:    movdqa %xmm0, %xmm1
16225; SSE2-NEXT:    psrlw $1, %xmm1
16226; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16227; SSE2-NEXT:    psubb %xmm1, %xmm0
16228; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16229; SSE2-NEXT:    movdqa %xmm0, %xmm2
16230; SSE2-NEXT:    pand %xmm1, %xmm2
16231; SSE2-NEXT:    psrlw $2, %xmm0
16232; SSE2-NEXT:    pand %xmm1, %xmm0
16233; SSE2-NEXT:    paddb %xmm2, %xmm0
16234; SSE2-NEXT:    movdqa %xmm0, %xmm1
16235; SSE2-NEXT:    psrlw $4, %xmm1
16236; SSE2-NEXT:    paddb %xmm0, %xmm1
16237; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16238; SSE2-NEXT:    pxor %xmm0, %xmm0
16239; SSE2-NEXT:    movdqa %xmm1, %xmm2
16240; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16241; SSE2-NEXT:    psadbw %xmm0, %xmm2
16242; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16243; SSE2-NEXT:    psadbw %xmm0, %xmm1
16244; SSE2-NEXT:    packuswb %xmm2, %xmm1
16245; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
16246; SSE2-NEXT:    movdqa %xmm1, %xmm0
16247; SSE2-NEXT:    retq
16248;
16249; SSE3-LABEL: ugt_29_v4i32:
16250; SSE3:       # %bb.0:
16251; SSE3-NEXT:    movdqa %xmm0, %xmm1
16252; SSE3-NEXT:    psrlw $1, %xmm1
16253; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16254; SSE3-NEXT:    psubb %xmm1, %xmm0
16255; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16256; SSE3-NEXT:    movdqa %xmm0, %xmm2
16257; SSE3-NEXT:    pand %xmm1, %xmm2
16258; SSE3-NEXT:    psrlw $2, %xmm0
16259; SSE3-NEXT:    pand %xmm1, %xmm0
16260; SSE3-NEXT:    paddb %xmm2, %xmm0
16261; SSE3-NEXT:    movdqa %xmm0, %xmm1
16262; SSE3-NEXT:    psrlw $4, %xmm1
16263; SSE3-NEXT:    paddb %xmm0, %xmm1
16264; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16265; SSE3-NEXT:    pxor %xmm0, %xmm0
16266; SSE3-NEXT:    movdqa %xmm1, %xmm2
16267; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16268; SSE3-NEXT:    psadbw %xmm0, %xmm2
16269; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16270; SSE3-NEXT:    psadbw %xmm0, %xmm1
16271; SSE3-NEXT:    packuswb %xmm2, %xmm1
16272; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
16273; SSE3-NEXT:    movdqa %xmm1, %xmm0
16274; SSE3-NEXT:    retq
16275;
16276; SSSE3-LABEL: ugt_29_v4i32:
16277; SSSE3:       # %bb.0:
16278; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16279; SSSE3-NEXT:    movdqa %xmm0, %xmm3
16280; SSSE3-NEXT:    pand %xmm2, %xmm3
16281; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16282; SSSE3-NEXT:    movdqa %xmm1, %xmm4
16283; SSSE3-NEXT:    pshufb %xmm3, %xmm4
16284; SSSE3-NEXT:    psrlw $4, %xmm0
16285; SSSE3-NEXT:    pand %xmm2, %xmm0
16286; SSSE3-NEXT:    pshufb %xmm0, %xmm1
16287; SSSE3-NEXT:    paddb %xmm4, %xmm1
16288; SSSE3-NEXT:    pxor %xmm0, %xmm0
16289; SSSE3-NEXT:    movdqa %xmm1, %xmm2
16290; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16291; SSSE3-NEXT:    psadbw %xmm0, %xmm2
16292; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16293; SSSE3-NEXT:    psadbw %xmm0, %xmm1
16294; SSSE3-NEXT:    packuswb %xmm2, %xmm1
16295; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
16296; SSSE3-NEXT:    movdqa %xmm1, %xmm0
16297; SSSE3-NEXT:    retq
16298;
16299; SSE41-LABEL: ugt_29_v4i32:
16300; SSE41:       # %bb.0:
16301; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16302; SSE41-NEXT:    movdqa %xmm0, %xmm2
16303; SSE41-NEXT:    pand %xmm1, %xmm2
16304; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16305; SSE41-NEXT:    movdqa %xmm3, %xmm4
16306; SSE41-NEXT:    pshufb %xmm2, %xmm4
16307; SSE41-NEXT:    psrlw $4, %xmm0
16308; SSE41-NEXT:    pand %xmm1, %xmm0
16309; SSE41-NEXT:    pshufb %xmm0, %xmm3
16310; SSE41-NEXT:    paddb %xmm4, %xmm3
16311; SSE41-NEXT:    pxor %xmm1, %xmm1
16312; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
16313; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
16314; SSE41-NEXT:    psadbw %xmm1, %xmm3
16315; SSE41-NEXT:    psadbw %xmm1, %xmm0
16316; SSE41-NEXT:    packuswb %xmm3, %xmm0
16317; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
16318; SSE41-NEXT:    retq
16319;
16320; AVX1-LABEL: ugt_29_v4i32:
16321; AVX1:       # %bb.0:
16322; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16323; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
16324; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16325; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16326; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
16327; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
16328; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16329; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16330; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16331; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16332; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16333; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16334; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16335; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16336; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
16337; AVX1-NEXT:    retq
16338;
16339; AVX2-LABEL: ugt_29_v4i32:
16340; AVX2:       # %bb.0:
16341; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16342; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
16343; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16344; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16345; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
16346; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
16347; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16348; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16349; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16350; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16351; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16352; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16353; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16354; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16355; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16356; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16357; AVX2-NEXT:    retq
16358;
16359; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i32:
16360; AVX512VPOPCNTDQ:       # %bb.0:
16361; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16362; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
16363; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16364; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16365; AVX512VPOPCNTDQ-NEXT:    vzeroupper
16366; AVX512VPOPCNTDQ-NEXT:    retq
16367;
16368; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32:
16369; AVX512VPOPCNTDQVL:       # %bb.0:
16370; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
16371; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
16372; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16373; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16374; AVX512VPOPCNTDQVL-NEXT:    retq
16375;
16376; BITALG_NOVLX-LABEL: ugt_29_v4i32:
16377; BITALG_NOVLX:       # %bb.0:
16378; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16379; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
16380; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16381; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16382; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16383; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16384; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16385; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16386; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16387; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16388; BITALG_NOVLX-NEXT:    vzeroupper
16389; BITALG_NOVLX-NEXT:    retq
16390;
16391; BITALG-LABEL: ugt_29_v4i32:
16392; BITALG:       # %bb.0:
16393; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
16394; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16395; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16396; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16397; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16398; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16399; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16400; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
16401; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16402; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16403; BITALG-NEXT:    retq
16404  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16405  %3 = icmp ugt <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
16406  %4 = sext <4 x i1> %3 to <4 x i32>
16407  ret <4 x i32> %4
16408}
16409
16410define <4 x i32> @ult_30_v4i32(<4 x i32> %0) {
16411; SSE2-LABEL: ult_30_v4i32:
16412; SSE2:       # %bb.0:
16413; SSE2-NEXT:    movdqa %xmm0, %xmm1
16414; SSE2-NEXT:    psrlw $1, %xmm1
16415; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16416; SSE2-NEXT:    psubb %xmm1, %xmm0
16417; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16418; SSE2-NEXT:    movdqa %xmm0, %xmm2
16419; SSE2-NEXT:    pand %xmm1, %xmm2
16420; SSE2-NEXT:    psrlw $2, %xmm0
16421; SSE2-NEXT:    pand %xmm1, %xmm0
16422; SSE2-NEXT:    paddb %xmm2, %xmm0
16423; SSE2-NEXT:    movdqa %xmm0, %xmm1
16424; SSE2-NEXT:    psrlw $4, %xmm1
16425; SSE2-NEXT:    paddb %xmm0, %xmm1
16426; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16427; SSE2-NEXT:    pxor %xmm0, %xmm0
16428; SSE2-NEXT:    movdqa %xmm1, %xmm2
16429; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16430; SSE2-NEXT:    psadbw %xmm0, %xmm2
16431; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16432; SSE2-NEXT:    psadbw %xmm0, %xmm1
16433; SSE2-NEXT:    packuswb %xmm2, %xmm1
16434; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [30,30,30,30]
16435; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
16436; SSE2-NEXT:    retq
16437;
16438; SSE3-LABEL: ult_30_v4i32:
16439; SSE3:       # %bb.0:
16440; SSE3-NEXT:    movdqa %xmm0, %xmm1
16441; SSE3-NEXT:    psrlw $1, %xmm1
16442; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16443; SSE3-NEXT:    psubb %xmm1, %xmm0
16444; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16445; SSE3-NEXT:    movdqa %xmm0, %xmm2
16446; SSE3-NEXT:    pand %xmm1, %xmm2
16447; SSE3-NEXT:    psrlw $2, %xmm0
16448; SSE3-NEXT:    pand %xmm1, %xmm0
16449; SSE3-NEXT:    paddb %xmm2, %xmm0
16450; SSE3-NEXT:    movdqa %xmm0, %xmm1
16451; SSE3-NEXT:    psrlw $4, %xmm1
16452; SSE3-NEXT:    paddb %xmm0, %xmm1
16453; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16454; SSE3-NEXT:    pxor %xmm0, %xmm0
16455; SSE3-NEXT:    movdqa %xmm1, %xmm2
16456; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16457; SSE3-NEXT:    psadbw %xmm0, %xmm2
16458; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16459; SSE3-NEXT:    psadbw %xmm0, %xmm1
16460; SSE3-NEXT:    packuswb %xmm2, %xmm1
16461; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [30,30,30,30]
16462; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
16463; SSE3-NEXT:    retq
16464;
16465; SSSE3-LABEL: ult_30_v4i32:
16466; SSSE3:       # %bb.0:
16467; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16468; SSSE3-NEXT:    movdqa %xmm0, %xmm2
16469; SSSE3-NEXT:    pand %xmm1, %xmm2
16470; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16471; SSSE3-NEXT:    movdqa %xmm3, %xmm4
16472; SSSE3-NEXT:    pshufb %xmm2, %xmm4
16473; SSSE3-NEXT:    psrlw $4, %xmm0
16474; SSSE3-NEXT:    pand %xmm1, %xmm0
16475; SSSE3-NEXT:    pshufb %xmm0, %xmm3
16476; SSSE3-NEXT:    paddb %xmm4, %xmm3
16477; SSSE3-NEXT:    pxor %xmm0, %xmm0
16478; SSSE3-NEXT:    movdqa %xmm3, %xmm1
16479; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16480; SSSE3-NEXT:    psadbw %xmm0, %xmm1
16481; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16482; SSSE3-NEXT:    psadbw %xmm0, %xmm3
16483; SSSE3-NEXT:    packuswb %xmm1, %xmm3
16484; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [30,30,30,30]
16485; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
16486; SSSE3-NEXT:    retq
16487;
16488; SSE41-LABEL: ult_30_v4i32:
16489; SSE41:       # %bb.0:
16490; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16491; SSE41-NEXT:    movdqa %xmm0, %xmm2
16492; SSE41-NEXT:    pand %xmm1, %xmm2
16493; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16494; SSE41-NEXT:    movdqa %xmm3, %xmm4
16495; SSE41-NEXT:    pshufb %xmm2, %xmm4
16496; SSE41-NEXT:    psrlw $4, %xmm0
16497; SSE41-NEXT:    pand %xmm1, %xmm0
16498; SSE41-NEXT:    pshufb %xmm0, %xmm3
16499; SSE41-NEXT:    paddb %xmm4, %xmm3
16500; SSE41-NEXT:    pxor %xmm0, %xmm0
16501; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16502; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16503; SSE41-NEXT:    psadbw %xmm0, %xmm3
16504; SSE41-NEXT:    psadbw %xmm0, %xmm1
16505; SSE41-NEXT:    packuswb %xmm3, %xmm1
16506; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [30,30,30,30]
16507; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
16508; SSE41-NEXT:    retq
16509;
16510; AVX1-LABEL: ult_30_v4i32:
16511; AVX1:       # %bb.0:
16512; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16513; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
16514; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16515; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16516; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
16517; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
16518; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16519; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16520; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16521; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16522; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16523; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16524; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16525; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16526; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [30,30,30,30]
16527; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16528; AVX1-NEXT:    retq
16529;
16530; AVX2-LABEL: ult_30_v4i32:
16531; AVX2:       # %bb.0:
16532; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16533; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
16534; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16535; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16536; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
16537; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
16538; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16539; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16540; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16541; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16542; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16543; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16544; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16545; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16546; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16547; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16548; AVX2-NEXT:    retq
16549;
16550; AVX512VPOPCNTDQ-LABEL: ult_30_v4i32:
16551; AVX512VPOPCNTDQ:       # %bb.0:
16552; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16553; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
16554; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16555; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16556; AVX512VPOPCNTDQ-NEXT:    vzeroupper
16557; AVX512VPOPCNTDQ-NEXT:    retq
16558;
16559; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32:
16560; AVX512VPOPCNTDQVL:       # %bb.0:
16561; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
16562; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
16563; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16564; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16565; AVX512VPOPCNTDQVL-NEXT:    retq
16566;
16567; BITALG_NOVLX-LABEL: ult_30_v4i32:
16568; BITALG_NOVLX:       # %bb.0:
16569; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16570; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
16571; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16572; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16573; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16574; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16575; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16576; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16577; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16578; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16579; BITALG_NOVLX-NEXT:    vzeroupper
16580; BITALG_NOVLX-NEXT:    retq
16581;
16582; BITALG-LABEL: ult_30_v4i32:
16583; BITALG:       # %bb.0:
16584; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
16585; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16586; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16587; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16588; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16589; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16590; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16591; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
16592; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16593; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16594; BITALG-NEXT:    retq
16595  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16596  %3 = icmp ult <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
16597  %4 = sext <4 x i1> %3 to <4 x i32>
16598  ret <4 x i32> %4
16599}
16600
16601define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) {
16602; SSE2-LABEL: ugt_30_v4i32:
16603; SSE2:       # %bb.0:
16604; SSE2-NEXT:    movdqa %xmm0, %xmm1
16605; SSE2-NEXT:    psrlw $1, %xmm1
16606; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16607; SSE2-NEXT:    psubb %xmm1, %xmm0
16608; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16609; SSE2-NEXT:    movdqa %xmm0, %xmm2
16610; SSE2-NEXT:    pand %xmm1, %xmm2
16611; SSE2-NEXT:    psrlw $2, %xmm0
16612; SSE2-NEXT:    pand %xmm1, %xmm0
16613; SSE2-NEXT:    paddb %xmm2, %xmm0
16614; SSE2-NEXT:    movdqa %xmm0, %xmm1
16615; SSE2-NEXT:    psrlw $4, %xmm1
16616; SSE2-NEXT:    paddb %xmm0, %xmm1
16617; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16618; SSE2-NEXT:    pxor %xmm0, %xmm0
16619; SSE2-NEXT:    movdqa %xmm1, %xmm2
16620; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16621; SSE2-NEXT:    psadbw %xmm0, %xmm2
16622; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16623; SSE2-NEXT:    psadbw %xmm0, %xmm1
16624; SSE2-NEXT:    packuswb %xmm2, %xmm1
16625; SSE2-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
16626; SSE2-NEXT:    movdqa %xmm1, %xmm0
16627; SSE2-NEXT:    retq
16628;
16629; SSE3-LABEL: ugt_30_v4i32:
16630; SSE3:       # %bb.0:
16631; SSE3-NEXT:    movdqa %xmm0, %xmm1
16632; SSE3-NEXT:    psrlw $1, %xmm1
16633; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16634; SSE3-NEXT:    psubb %xmm1, %xmm0
16635; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16636; SSE3-NEXT:    movdqa %xmm0, %xmm2
16637; SSE3-NEXT:    pand %xmm1, %xmm2
16638; SSE3-NEXT:    psrlw $2, %xmm0
16639; SSE3-NEXT:    pand %xmm1, %xmm0
16640; SSE3-NEXT:    paddb %xmm2, %xmm0
16641; SSE3-NEXT:    movdqa %xmm0, %xmm1
16642; SSE3-NEXT:    psrlw $4, %xmm1
16643; SSE3-NEXT:    paddb %xmm0, %xmm1
16644; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16645; SSE3-NEXT:    pxor %xmm0, %xmm0
16646; SSE3-NEXT:    movdqa %xmm1, %xmm2
16647; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16648; SSE3-NEXT:    psadbw %xmm0, %xmm2
16649; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16650; SSE3-NEXT:    psadbw %xmm0, %xmm1
16651; SSE3-NEXT:    packuswb %xmm2, %xmm1
16652; SSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
16653; SSE3-NEXT:    movdqa %xmm1, %xmm0
16654; SSE3-NEXT:    retq
16655;
16656; SSSE3-LABEL: ugt_30_v4i32:
16657; SSSE3:       # %bb.0:
16658; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16659; SSSE3-NEXT:    movdqa %xmm0, %xmm3
16660; SSSE3-NEXT:    pand %xmm2, %xmm3
16661; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16662; SSSE3-NEXT:    movdqa %xmm1, %xmm4
16663; SSSE3-NEXT:    pshufb %xmm3, %xmm4
16664; SSSE3-NEXT:    psrlw $4, %xmm0
16665; SSSE3-NEXT:    pand %xmm2, %xmm0
16666; SSSE3-NEXT:    pshufb %xmm0, %xmm1
16667; SSSE3-NEXT:    paddb %xmm4, %xmm1
16668; SSSE3-NEXT:    pxor %xmm0, %xmm0
16669; SSSE3-NEXT:    movdqa %xmm1, %xmm2
16670; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16671; SSSE3-NEXT:    psadbw %xmm0, %xmm2
16672; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16673; SSSE3-NEXT:    psadbw %xmm0, %xmm1
16674; SSSE3-NEXT:    packuswb %xmm2, %xmm1
16675; SSSE3-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
16676; SSSE3-NEXT:    movdqa %xmm1, %xmm0
16677; SSSE3-NEXT:    retq
16678;
16679; SSE41-LABEL: ugt_30_v4i32:
16680; SSE41:       # %bb.0:
16681; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16682; SSE41-NEXT:    movdqa %xmm0, %xmm2
16683; SSE41-NEXT:    pand %xmm1, %xmm2
16684; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16685; SSE41-NEXT:    movdqa %xmm3, %xmm4
16686; SSE41-NEXT:    pshufb %xmm2, %xmm4
16687; SSE41-NEXT:    psrlw $4, %xmm0
16688; SSE41-NEXT:    pand %xmm1, %xmm0
16689; SSE41-NEXT:    pshufb %xmm0, %xmm3
16690; SSE41-NEXT:    paddb %xmm4, %xmm3
16691; SSE41-NEXT:    pxor %xmm1, %xmm1
16692; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
16693; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
16694; SSE41-NEXT:    psadbw %xmm1, %xmm3
16695; SSE41-NEXT:    psadbw %xmm1, %xmm0
16696; SSE41-NEXT:    packuswb %xmm3, %xmm0
16697; SSE41-NEXT:    pcmpgtd {{.*}}(%rip), %xmm0
16698; SSE41-NEXT:    retq
16699;
16700; AVX1-LABEL: ugt_30_v4i32:
16701; AVX1:       # %bb.0:
16702; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16703; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
16704; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16705; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16706; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
16707; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
16708; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16709; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16710; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16711; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16712; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16713; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16714; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16715; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16716; AVX1-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0
16717; AVX1-NEXT:    retq
16718;
16719; AVX2-LABEL: ugt_30_v4i32:
16720; AVX2:       # %bb.0:
16721; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16722; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
16723; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16724; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16725; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
16726; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
16727; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16728; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16729; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16730; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16731; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16732; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16733; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16734; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16735; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16736; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16737; AVX2-NEXT:    retq
16738;
16739; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i32:
16740; AVX512VPOPCNTDQ:       # %bb.0:
16741; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16742; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
16743; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16744; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16745; AVX512VPOPCNTDQ-NEXT:    vzeroupper
16746; AVX512VPOPCNTDQ-NEXT:    retq
16747;
16748; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32:
16749; AVX512VPOPCNTDQVL:       # %bb.0:
16750; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
16751; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
16752; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16753; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16754; AVX512VPOPCNTDQVL-NEXT:    retq
16755;
16756; BITALG_NOVLX-LABEL: ugt_30_v4i32:
16757; BITALG_NOVLX:       # %bb.0:
16758; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16759; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
16760; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16761; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16762; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16763; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16764; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16765; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16766; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16767; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
16768; BITALG_NOVLX-NEXT:    vzeroupper
16769; BITALG_NOVLX-NEXT:    retq
16770;
16771; BITALG-LABEL: ugt_30_v4i32:
16772; BITALG:       # %bb.0:
16773; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
16774; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16775; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16776; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16777; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16778; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16779; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16780; BITALG-NEXT:    vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1
16781; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16782; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16783; BITALG-NEXT:    retq
16784  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16785  %3 = icmp ugt <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
16786  %4 = sext <4 x i1> %3 to <4 x i32>
16787  ret <4 x i32> %4
16788}
16789
16790define <4 x i32> @ult_31_v4i32(<4 x i32> %0) {
16791; SSE2-LABEL: ult_31_v4i32:
16792; SSE2:       # %bb.0:
16793; SSE2-NEXT:    movdqa %xmm0, %xmm1
16794; SSE2-NEXT:    psrlw $1, %xmm1
16795; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16796; SSE2-NEXT:    psubb %xmm1, %xmm0
16797; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16798; SSE2-NEXT:    movdqa %xmm0, %xmm2
16799; SSE2-NEXT:    pand %xmm1, %xmm2
16800; SSE2-NEXT:    psrlw $2, %xmm0
16801; SSE2-NEXT:    pand %xmm1, %xmm0
16802; SSE2-NEXT:    paddb %xmm2, %xmm0
16803; SSE2-NEXT:    movdqa %xmm0, %xmm1
16804; SSE2-NEXT:    psrlw $4, %xmm1
16805; SSE2-NEXT:    paddb %xmm0, %xmm1
16806; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
16807; SSE2-NEXT:    pxor %xmm0, %xmm0
16808; SSE2-NEXT:    movdqa %xmm1, %xmm2
16809; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16810; SSE2-NEXT:    psadbw %xmm0, %xmm2
16811; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16812; SSE2-NEXT:    psadbw %xmm0, %xmm1
16813; SSE2-NEXT:    packuswb %xmm2, %xmm1
16814; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [31,31,31,31]
16815; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
16816; SSE2-NEXT:    retq
16817;
16818; SSE3-LABEL: ult_31_v4i32:
16819; SSE3:       # %bb.0:
16820; SSE3-NEXT:    movdqa %xmm0, %xmm1
16821; SSE3-NEXT:    psrlw $1, %xmm1
16822; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16823; SSE3-NEXT:    psubb %xmm1, %xmm0
16824; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16825; SSE3-NEXT:    movdqa %xmm0, %xmm2
16826; SSE3-NEXT:    pand %xmm1, %xmm2
16827; SSE3-NEXT:    psrlw $2, %xmm0
16828; SSE3-NEXT:    pand %xmm1, %xmm0
16829; SSE3-NEXT:    paddb %xmm2, %xmm0
16830; SSE3-NEXT:    movdqa %xmm0, %xmm1
16831; SSE3-NEXT:    psrlw $4, %xmm1
16832; SSE3-NEXT:    paddb %xmm0, %xmm1
16833; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
16834; SSE3-NEXT:    pxor %xmm0, %xmm0
16835; SSE3-NEXT:    movdqa %xmm1, %xmm2
16836; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16837; SSE3-NEXT:    psadbw %xmm0, %xmm2
16838; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16839; SSE3-NEXT:    psadbw %xmm0, %xmm1
16840; SSE3-NEXT:    packuswb %xmm2, %xmm1
16841; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [31,31,31,31]
16842; SSE3-NEXT:    pcmpgtd %xmm1, %xmm0
16843; SSE3-NEXT:    retq
16844;
16845; SSSE3-LABEL: ult_31_v4i32:
16846; SSSE3:       # %bb.0:
16847; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16848; SSSE3-NEXT:    movdqa %xmm0, %xmm2
16849; SSSE3-NEXT:    pand %xmm1, %xmm2
16850; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16851; SSSE3-NEXT:    movdqa %xmm3, %xmm4
16852; SSSE3-NEXT:    pshufb %xmm2, %xmm4
16853; SSSE3-NEXT:    psrlw $4, %xmm0
16854; SSSE3-NEXT:    pand %xmm1, %xmm0
16855; SSSE3-NEXT:    pshufb %xmm0, %xmm3
16856; SSSE3-NEXT:    paddb %xmm4, %xmm3
16857; SSSE3-NEXT:    pxor %xmm0, %xmm0
16858; SSSE3-NEXT:    movdqa %xmm3, %xmm1
16859; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16860; SSSE3-NEXT:    psadbw %xmm0, %xmm1
16861; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16862; SSSE3-NEXT:    psadbw %xmm0, %xmm3
16863; SSSE3-NEXT:    packuswb %xmm1, %xmm3
16864; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [31,31,31,31]
16865; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm0
16866; SSSE3-NEXT:    retq
16867;
16868; SSE41-LABEL: ult_31_v4i32:
16869; SSE41:       # %bb.0:
16870; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16871; SSE41-NEXT:    movdqa %xmm0, %xmm2
16872; SSE41-NEXT:    pand %xmm1, %xmm2
16873; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16874; SSE41-NEXT:    movdqa %xmm3, %xmm4
16875; SSE41-NEXT:    pshufb %xmm2, %xmm4
16876; SSE41-NEXT:    psrlw $4, %xmm0
16877; SSE41-NEXT:    pand %xmm1, %xmm0
16878; SSE41-NEXT:    pshufb %xmm0, %xmm3
16879; SSE41-NEXT:    paddb %xmm4, %xmm3
16880; SSE41-NEXT:    pxor %xmm0, %xmm0
16881; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16882; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16883; SSE41-NEXT:    psadbw %xmm0, %xmm3
16884; SSE41-NEXT:    psadbw %xmm0, %xmm1
16885; SSE41-NEXT:    packuswb %xmm3, %xmm1
16886; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [31,31,31,31]
16887; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
16888; SSE41-NEXT:    retq
16889;
16890; AVX1-LABEL: ult_31_v4i32:
16891; AVX1:       # %bb.0:
16892; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16893; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
16894; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16895; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16896; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
16897; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
16898; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16899; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16900; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16901; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16902; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16903; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16904; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16905; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16906; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [31,31,31,31]
16907; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16908; AVX1-NEXT:    retq
16909;
16910; AVX2-LABEL: ult_31_v4i32:
16911; AVX2:       # %bb.0:
16912; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16913; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
16914; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16915; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
16916; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
16917; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
16918; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
16919; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
16920; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16921; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16922; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16923; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16924; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16925; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16926; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16927; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16928; AVX2-NEXT:    retq
16929;
16930; AVX512VPOPCNTDQ-LABEL: ult_31_v4i32:
16931; AVX512VPOPCNTDQ:       # %bb.0:
16932; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16933; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
16934; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16935; AVX512VPOPCNTDQ-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16936; AVX512VPOPCNTDQ-NEXT:    vzeroupper
16937; AVX512VPOPCNTDQ-NEXT:    retq
16938;
16939; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32:
16940; AVX512VPOPCNTDQVL:       # %bb.0:
16941; AVX512VPOPCNTDQVL-NEXT:    vpopcntd %xmm0, %xmm0
16942; AVX512VPOPCNTDQVL-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
16943; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16944; AVX512VPOPCNTDQVL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16945; AVX512VPOPCNTDQVL-NEXT:    retq
16946;
16947; BITALG_NOVLX-LABEL: ult_31_v4i32:
16948; BITALG_NOVLX:       # %bb.0:
16949; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16950; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
16951; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16952; BITALG_NOVLX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16953; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16954; BITALG_NOVLX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16955; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16956; BITALG_NOVLX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16957; BITALG_NOVLX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16958; BITALG_NOVLX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
16959; BITALG_NOVLX-NEXT:    vzeroupper
16960; BITALG_NOVLX-NEXT:    retq
16961;
16962; BITALG-LABEL: ult_31_v4i32:
16963; BITALG:       # %bb.0:
16964; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
16965; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
16966; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16967; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
16968; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16969; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
16970; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
16971; BITALG-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
16972; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
16973; BITALG-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16974; BITALG-NEXT:    retq
16975  %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16976  %3 = icmp ult <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
16977  %4 = sext <4 x i1> %3 to <4 x i32>
16978  ret <4 x i32> %4
16979}
16980
16981define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
16982; SSE2-LABEL: ugt_1_v2i64:
16983; SSE2:       # %bb.0:
16984; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
16985; SSE2-NEXT:    movdqa %xmm0, %xmm2
16986; SSE2-NEXT:    paddq %xmm1, %xmm2
16987; SSE2-NEXT:    pand %xmm0, %xmm2
16988; SSE2-NEXT:    pxor %xmm3, %xmm3
16989; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
16990; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2]
16991; SSE2-NEXT:    pand %xmm3, %xmm0
16992; SSE2-NEXT:    pxor %xmm1, %xmm0
16993; SSE2-NEXT:    retq
16994;
16995; SSE3-LABEL: ugt_1_v2i64:
16996; SSE3:       # %bb.0:
16997; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
16998; SSE3-NEXT:    movdqa %xmm0, %xmm2
16999; SSE3-NEXT:    paddq %xmm1, %xmm2
17000; SSE3-NEXT:    pand %xmm0, %xmm2
17001; SSE3-NEXT:    pxor %xmm3, %xmm3
17002; SSE3-NEXT:    pcmpeqd %xmm2, %xmm3
17003; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2]
17004; SSE3-NEXT:    pand %xmm3, %xmm0
17005; SSE3-NEXT:    pxor %xmm1, %xmm0
17006; SSE3-NEXT:    retq
17007;
17008; SSSE3-LABEL: ugt_1_v2i64:
17009; SSSE3:       # %bb.0:
17010; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm1
17011; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17012; SSSE3-NEXT:    paddq %xmm1, %xmm2
17013; SSSE3-NEXT:    pand %xmm0, %xmm2
17014; SSSE3-NEXT:    pxor %xmm3, %xmm3
17015; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
17016; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2]
17017; SSSE3-NEXT:    pand %xmm3, %xmm0
17018; SSSE3-NEXT:    pxor %xmm1, %xmm0
17019; SSSE3-NEXT:    retq
17020;
17021; SSE41-LABEL: ugt_1_v2i64:
17022; SSE41:       # %bb.0:
17023; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
17024; SSE41-NEXT:    movdqa %xmm0, %xmm1
17025; SSE41-NEXT:    paddq %xmm2, %xmm1
17026; SSE41-NEXT:    pand %xmm0, %xmm1
17027; SSE41-NEXT:    pxor %xmm0, %xmm0
17028; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
17029; SSE41-NEXT:    pxor %xmm2, %xmm1
17030; SSE41-NEXT:    movdqa %xmm1, %xmm0
17031; SSE41-NEXT:    retq
17032;
17033; AVX1-LABEL: ugt_1_v2i64:
17034; AVX1:       # %bb.0:
17035; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17036; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
17037; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
17038; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
17039; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
17040; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
17041; AVX1-NEXT:    retq
17042;
17043; AVX2-LABEL: ugt_1_v2i64:
17044; AVX2:       # %bb.0:
17045; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17046; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
17047; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
17048; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
17049; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
17050; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
17051; AVX2-NEXT:    retq
17052;
17053; AVX512VPOPCNTDQ-LABEL: ugt_1_v2i64:
17054; AVX512VPOPCNTDQ:       # %bb.0:
17055; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17056; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
17057; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17058; AVX512VPOPCNTDQ-NEXT:    vzeroupper
17059; AVX512VPOPCNTDQ-NEXT:    retq
17060;
17061; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64:
17062; AVX512VPOPCNTDQVL:       # %bb.0:
17063; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
17064; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
17065; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17066; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17067; AVX512VPOPCNTDQVL-NEXT:    retq
17068;
17069; BITALG_NOVLX-LABEL: ugt_1_v2i64:
17070; BITALG_NOVLX:       # %bb.0:
17071; BITALG_NOVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17072; BITALG_NOVLX-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
17073; BITALG_NOVLX-NEXT:    vpand %xmm1, %xmm0, %xmm0
17074; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17075; BITALG_NOVLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
17076; BITALG_NOVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
17077; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
17078; BITALG_NOVLX-NEXT:    vzeroupper
17079; BITALG_NOVLX-NEXT:    retq
17080;
17081; BITALG-LABEL: ugt_1_v2i64:
17082; BITALG:       # %bb.0:
17083; BITALG-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17084; BITALG-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
17085; BITALG-NEXT:    vpand %xmm1, %xmm0, %xmm0
17086; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17087; BITALG-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
17088; BITALG-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
17089; BITALG-NEXT:    retq
17090  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17091  %3 = icmp ugt <2 x i64> %2, <i64 1, i64 1>
17092  %4 = sext <2 x i1> %3 to <2 x i64>
17093  ret <2 x i64> %4
17094}
17095
17096define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
17097; SSE2-LABEL: ult_2_v2i64:
17098; SSE2:       # %bb.0:
17099; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
17100; SSE2-NEXT:    paddq %xmm0, %xmm1
17101; SSE2-NEXT:    pand %xmm0, %xmm1
17102; SSE2-NEXT:    pxor %xmm2, %xmm2
17103; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
17104; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
17105; SSE2-NEXT:    pand %xmm2, %xmm0
17106; SSE2-NEXT:    retq
17107;
17108; SSE3-LABEL: ult_2_v2i64:
17109; SSE3:       # %bb.0:
17110; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
17111; SSE3-NEXT:    paddq %xmm0, %xmm1
17112; SSE3-NEXT:    pand %xmm0, %xmm1
17113; SSE3-NEXT:    pxor %xmm2, %xmm2
17114; SSE3-NEXT:    pcmpeqd %xmm1, %xmm2
17115; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
17116; SSE3-NEXT:    pand %xmm2, %xmm0
17117; SSE3-NEXT:    retq
17118;
17119; SSSE3-LABEL: ult_2_v2i64:
17120; SSSE3:       # %bb.0:
17121; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm1
17122; SSSE3-NEXT:    paddq %xmm0, %xmm1
17123; SSSE3-NEXT:    pand %xmm0, %xmm1
17124; SSSE3-NEXT:    pxor %xmm2, %xmm2
17125; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
17126; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
17127; SSSE3-NEXT:    pand %xmm2, %xmm0
17128; SSSE3-NEXT:    retq
17129;
17130; SSE41-LABEL: ult_2_v2i64:
17131; SSE41:       # %bb.0:
17132; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
17133; SSE41-NEXT:    paddq %xmm0, %xmm1
17134; SSE41-NEXT:    pand %xmm1, %xmm0
17135; SSE41-NEXT:    pxor %xmm1, %xmm1
17136; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
17137; SSE41-NEXT:    retq
17138;
17139; AVX1-LABEL: ult_2_v2i64:
17140; AVX1:       # %bb.0:
17141; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17142; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
17143; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
17144; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17145; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
17146; AVX1-NEXT:    retq
17147;
17148; AVX2-LABEL: ult_2_v2i64:
17149; AVX2:       # %bb.0:
17150; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17151; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
17152; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
17153; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17154; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
17155; AVX2-NEXT:    retq
17156;
17157; AVX512VPOPCNTDQ-LABEL: ult_2_v2i64:
17158; AVX512VPOPCNTDQ:       # %bb.0:
17159; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17160; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
17161; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2]
17162; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17163; AVX512VPOPCNTDQ-NEXT:    vzeroupper
17164; AVX512VPOPCNTDQ-NEXT:    retq
17165;
17166; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64:
17167; AVX512VPOPCNTDQVL:       # %bb.0:
17168; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
17169; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
17170; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17171; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17172; AVX512VPOPCNTDQVL-NEXT:    retq
17173;
17174; BITALG_NOVLX-LABEL: ult_2_v2i64:
17175; BITALG_NOVLX:       # %bb.0:
17176; BITALG_NOVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17177; BITALG_NOVLX-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
17178; BITALG_NOVLX-NEXT:    vpand %xmm1, %xmm0, %xmm0
17179; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17180; BITALG_NOVLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
17181; BITALG_NOVLX-NEXT:    retq
17182;
17183; BITALG-LABEL: ult_2_v2i64:
17184; BITALG:       # %bb.0:
17185; BITALG-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
17186; BITALG-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
17187; BITALG-NEXT:    vpand %xmm1, %xmm0, %xmm0
17188; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17189; BITALG-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
17190; BITALG-NEXT:    retq
17191  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17192  %3 = icmp ult <2 x i64> %2, <i64 2, i64 2>
17193  %4 = sext <2 x i1> %3 to <2 x i64>
17194  ret <2 x i64> %4
17195}
17196
17197define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
17198; SSE2-LABEL: ugt_2_v2i64:
17199; SSE2:       # %bb.0:
17200; SSE2-NEXT:    movdqa %xmm0, %xmm1
17201; SSE2-NEXT:    psrlw $1, %xmm1
17202; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17203; SSE2-NEXT:    psubb %xmm1, %xmm0
17204; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17205; SSE2-NEXT:    movdqa %xmm0, %xmm2
17206; SSE2-NEXT:    pand %xmm1, %xmm2
17207; SSE2-NEXT:    psrlw $2, %xmm0
17208; SSE2-NEXT:    pand %xmm1, %xmm0
17209; SSE2-NEXT:    paddb %xmm2, %xmm0
17210; SSE2-NEXT:    movdqa %xmm0, %xmm1
17211; SSE2-NEXT:    psrlw $4, %xmm1
17212; SSE2-NEXT:    paddb %xmm0, %xmm1
17213; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17214; SSE2-NEXT:    pxor %xmm0, %xmm0
17215; SSE2-NEXT:    psadbw %xmm1, %xmm0
17216; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
17217; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483650,2147483650]
17218; SSE2-NEXT:    movdqa %xmm0, %xmm2
17219; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
17220; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17221; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
17222; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17223; SSE2-NEXT:    pand %xmm3, %xmm1
17224; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17225; SSE2-NEXT:    por %xmm1, %xmm0
17226; SSE2-NEXT:    retq
17227;
17228; SSE3-LABEL: ugt_2_v2i64:
17229; SSE3:       # %bb.0:
17230; SSE3-NEXT:    movdqa %xmm0, %xmm1
17231; SSE3-NEXT:    psrlw $1, %xmm1
17232; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17233; SSE3-NEXT:    psubb %xmm1, %xmm0
17234; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17235; SSE3-NEXT:    movdqa %xmm0, %xmm2
17236; SSE3-NEXT:    pand %xmm1, %xmm2
17237; SSE3-NEXT:    psrlw $2, %xmm0
17238; SSE3-NEXT:    pand %xmm1, %xmm0
17239; SSE3-NEXT:    paddb %xmm2, %xmm0
17240; SSE3-NEXT:    movdqa %xmm0, %xmm1
17241; SSE3-NEXT:    psrlw $4, %xmm1
17242; SSE3-NEXT:    paddb %xmm0, %xmm1
17243; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17244; SSE3-NEXT:    pxor %xmm0, %xmm0
17245; SSE3-NEXT:    psadbw %xmm1, %xmm0
17246; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
17247; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483650,2147483650]
17248; SSE3-NEXT:    movdqa %xmm0, %xmm2
17249; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
17250; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17251; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17252; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17253; SSE3-NEXT:    pand %xmm3, %xmm1
17254; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17255; SSE3-NEXT:    por %xmm1, %xmm0
17256; SSE3-NEXT:    retq
17257;
17258; SSSE3-LABEL: ugt_2_v2i64:
17259; SSSE3:       # %bb.0:
17260; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17261; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17262; SSSE3-NEXT:    pand %xmm1, %xmm2
17263; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17264; SSSE3-NEXT:    movdqa %xmm3, %xmm4
17265; SSSE3-NEXT:    pshufb %xmm2, %xmm4
17266; SSSE3-NEXT:    psrlw $4, %xmm0
17267; SSSE3-NEXT:    pand %xmm1, %xmm0
17268; SSSE3-NEXT:    pshufb %xmm0, %xmm3
17269; SSSE3-NEXT:    paddb %xmm4, %xmm3
17270; SSSE3-NEXT:    pxor %xmm0, %xmm0
17271; SSSE3-NEXT:    psadbw %xmm3, %xmm0
17272; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
17273; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483650,2147483650]
17274; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17275; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
17276; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17277; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17278; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17279; SSSE3-NEXT:    pand %xmm3, %xmm1
17280; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17281; SSSE3-NEXT:    por %xmm1, %xmm0
17282; SSSE3-NEXT:    retq
17283;
17284; SSE41-LABEL: ugt_2_v2i64:
17285; SSE41:       # %bb.0:
17286; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17287; SSE41-NEXT:    movdqa %xmm0, %xmm2
17288; SSE41-NEXT:    pand %xmm1, %xmm2
17289; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17290; SSE41-NEXT:    movdqa %xmm3, %xmm4
17291; SSE41-NEXT:    pshufb %xmm2, %xmm4
17292; SSE41-NEXT:    psrlw $4, %xmm0
17293; SSE41-NEXT:    pand %xmm1, %xmm0
17294; SSE41-NEXT:    pshufb %xmm0, %xmm3
17295; SSE41-NEXT:    paddb %xmm4, %xmm3
17296; SSE41-NEXT:    pxor %xmm0, %xmm0
17297; SSE41-NEXT:    psadbw %xmm3, %xmm0
17298; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
17299; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483650,2147483650]
17300; SSE41-NEXT:    movdqa %xmm0, %xmm2
17301; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
17302; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17303; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
17304; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17305; SSE41-NEXT:    pand %xmm3, %xmm1
17306; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17307; SSE41-NEXT:    por %xmm1, %xmm0
17308; SSE41-NEXT:    retq
17309;
17310; AVX1-LABEL: ugt_2_v2i64:
17311; AVX1:       # %bb.0:
17312; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17313; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
17314; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17315; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17316; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
17317; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
17318; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17319; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17320; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17321; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17322; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17323; AVX1-NEXT:    retq
17324;
17325; AVX2-LABEL: ugt_2_v2i64:
17326; AVX2:       # %bb.0:
17327; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17328; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
17329; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17330; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17331; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
17332; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
17333; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17334; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17335; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17336; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17337; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17338; AVX2-NEXT:    retq
17339;
17340; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64:
17341; AVX512VPOPCNTDQ:       # %bb.0:
17342; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17343; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
17344; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17345; AVX512VPOPCNTDQ-NEXT:    vzeroupper
17346; AVX512VPOPCNTDQ-NEXT:    retq
17347;
17348; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64:
17349; AVX512VPOPCNTDQVL:       # %bb.0:
17350; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
17351; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
17352; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17353; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17354; AVX512VPOPCNTDQVL-NEXT:    retq
17355;
17356; BITALG_NOVLX-LABEL: ugt_2_v2i64:
17357; BITALG_NOVLX:       # %bb.0:
17358; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17359; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
17360; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17361; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17362; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17363; BITALG_NOVLX-NEXT:    vzeroupper
17364; BITALG_NOVLX-NEXT:    retq
17365;
17366; BITALG-LABEL: ugt_2_v2i64:
17367; BITALG:       # %bb.0:
17368; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
17369; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17370; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17371; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
17372; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17373; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17374; BITALG-NEXT:    retq
17375  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17376  %3 = icmp ugt <2 x i64> %2, <i64 2, i64 2>
17377  %4 = sext <2 x i1> %3 to <2 x i64>
17378  ret <2 x i64> %4
17379}
17380
17381define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
17382; SSE2-LABEL: ult_3_v2i64:
17383; SSE2:       # %bb.0:
17384; SSE2-NEXT:    movdqa %xmm0, %xmm1
17385; SSE2-NEXT:    psrlw $1, %xmm1
17386; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17387; SSE2-NEXT:    psubb %xmm1, %xmm0
17388; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17389; SSE2-NEXT:    movdqa %xmm0, %xmm2
17390; SSE2-NEXT:    pand %xmm1, %xmm2
17391; SSE2-NEXT:    psrlw $2, %xmm0
17392; SSE2-NEXT:    pand %xmm1, %xmm0
17393; SSE2-NEXT:    paddb %xmm2, %xmm0
17394; SSE2-NEXT:    movdqa %xmm0, %xmm1
17395; SSE2-NEXT:    psrlw $4, %xmm1
17396; SSE2-NEXT:    paddb %xmm0, %xmm1
17397; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17398; SSE2-NEXT:    pxor %xmm0, %xmm0
17399; SSE2-NEXT:    psadbw %xmm1, %xmm0
17400; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
17401; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17402; SSE2-NEXT:    movdqa %xmm1, %xmm2
17403; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
17404; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17405; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
17406; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17407; SSE2-NEXT:    pand %xmm3, %xmm1
17408; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17409; SSE2-NEXT:    por %xmm1, %xmm0
17410; SSE2-NEXT:    retq
17411;
17412; SSE3-LABEL: ult_3_v2i64:
17413; SSE3:       # %bb.0:
17414; SSE3-NEXT:    movdqa %xmm0, %xmm1
17415; SSE3-NEXT:    psrlw $1, %xmm1
17416; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17417; SSE3-NEXT:    psubb %xmm1, %xmm0
17418; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17419; SSE3-NEXT:    movdqa %xmm0, %xmm2
17420; SSE3-NEXT:    pand %xmm1, %xmm2
17421; SSE3-NEXT:    psrlw $2, %xmm0
17422; SSE3-NEXT:    pand %xmm1, %xmm0
17423; SSE3-NEXT:    paddb %xmm2, %xmm0
17424; SSE3-NEXT:    movdqa %xmm0, %xmm1
17425; SSE3-NEXT:    psrlw $4, %xmm1
17426; SSE3-NEXT:    paddb %xmm0, %xmm1
17427; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17428; SSE3-NEXT:    pxor %xmm0, %xmm0
17429; SSE3-NEXT:    psadbw %xmm1, %xmm0
17430; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
17431; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17432; SSE3-NEXT:    movdqa %xmm1, %xmm2
17433; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
17434; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17435; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17436; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17437; SSE3-NEXT:    pand %xmm3, %xmm1
17438; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17439; SSE3-NEXT:    por %xmm1, %xmm0
17440; SSE3-NEXT:    retq
17441;
17442; SSSE3-LABEL: ult_3_v2i64:
17443; SSSE3:       # %bb.0:
17444; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17445; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17446; SSSE3-NEXT:    pand %xmm1, %xmm2
17447; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17448; SSSE3-NEXT:    movdqa %xmm3, %xmm4
17449; SSSE3-NEXT:    pshufb %xmm2, %xmm4
17450; SSSE3-NEXT:    psrlw $4, %xmm0
17451; SSSE3-NEXT:    pand %xmm1, %xmm0
17452; SSSE3-NEXT:    pshufb %xmm0, %xmm3
17453; SSSE3-NEXT:    paddb %xmm4, %xmm3
17454; SSSE3-NEXT:    pxor %xmm0, %xmm0
17455; SSSE3-NEXT:    psadbw %xmm3, %xmm0
17456; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
17457; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17458; SSSE3-NEXT:    movdqa %xmm1, %xmm2
17459; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
17460; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17461; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17462; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17463; SSSE3-NEXT:    pand %xmm3, %xmm1
17464; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17465; SSSE3-NEXT:    por %xmm1, %xmm0
17466; SSSE3-NEXT:    retq
17467;
17468; SSE41-LABEL: ult_3_v2i64:
17469; SSE41:       # %bb.0:
17470; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17471; SSE41-NEXT:    movdqa %xmm0, %xmm2
17472; SSE41-NEXT:    pand %xmm1, %xmm2
17473; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17474; SSE41-NEXT:    movdqa %xmm3, %xmm4
17475; SSE41-NEXT:    pshufb %xmm2, %xmm4
17476; SSE41-NEXT:    psrlw $4, %xmm0
17477; SSE41-NEXT:    pand %xmm1, %xmm0
17478; SSE41-NEXT:    pshufb %xmm0, %xmm3
17479; SSE41-NEXT:    paddb %xmm4, %xmm3
17480; SSE41-NEXT:    pxor %xmm0, %xmm0
17481; SSE41-NEXT:    psadbw %xmm3, %xmm0
17482; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
17483; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17484; SSE41-NEXT:    movdqa %xmm1, %xmm2
17485; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
17486; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17487; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
17488; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17489; SSE41-NEXT:    pand %xmm3, %xmm1
17490; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17491; SSE41-NEXT:    por %xmm1, %xmm0
17492; SSE41-NEXT:    retq
17493;
17494; AVX1-LABEL: ult_3_v2i64:
17495; AVX1:       # %bb.0:
17496; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17497; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
17498; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17499; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17500; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
17501; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
17502; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17503; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17504; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17505; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17506; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3]
17507; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17508; AVX1-NEXT:    retq
17509;
17510; AVX2-LABEL: ult_3_v2i64:
17511; AVX2:       # %bb.0:
17512; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17513; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
17514; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17515; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17516; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
17517; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
17518; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17519; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17520; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17521; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17522; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3]
17523; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17524; AVX2-NEXT:    retq
17525;
17526; AVX512VPOPCNTDQ-LABEL: ult_3_v2i64:
17527; AVX512VPOPCNTDQ:       # %bb.0:
17528; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17529; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
17530; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3]
17531; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17532; AVX512VPOPCNTDQ-NEXT:    vzeroupper
17533; AVX512VPOPCNTDQ-NEXT:    retq
17534;
17535; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64:
17536; AVX512VPOPCNTDQVL:       # %bb.0:
17537; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
17538; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
17539; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17540; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17541; AVX512VPOPCNTDQVL-NEXT:    retq
17542;
17543; BITALG_NOVLX-LABEL: ult_3_v2i64:
17544; BITALG_NOVLX:       # %bb.0:
17545; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17546; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
17547; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17548; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17549; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3]
17550; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17551; BITALG_NOVLX-NEXT:    vzeroupper
17552; BITALG_NOVLX-NEXT:    retq
17553;
17554; BITALG-LABEL: ult_3_v2i64:
17555; BITALG:       # %bb.0:
17556; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
17557; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17558; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17559; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
17560; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17561; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17562; BITALG-NEXT:    retq
17563  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17564  %3 = icmp ult <2 x i64> %2, <i64 3, i64 3>
17565  %4 = sext <2 x i1> %3 to <2 x i64>
17566  ret <2 x i64> %4
17567}
17568
17569define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
17570; SSE2-LABEL: ugt_3_v2i64:
17571; SSE2:       # %bb.0:
17572; SSE2-NEXT:    movdqa %xmm0, %xmm1
17573; SSE2-NEXT:    psrlw $1, %xmm1
17574; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17575; SSE2-NEXT:    psubb %xmm1, %xmm0
17576; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17577; SSE2-NEXT:    movdqa %xmm0, %xmm2
17578; SSE2-NEXT:    pand %xmm1, %xmm2
17579; SSE2-NEXT:    psrlw $2, %xmm0
17580; SSE2-NEXT:    pand %xmm1, %xmm0
17581; SSE2-NEXT:    paddb %xmm2, %xmm0
17582; SSE2-NEXT:    movdqa %xmm0, %xmm1
17583; SSE2-NEXT:    psrlw $4, %xmm1
17584; SSE2-NEXT:    paddb %xmm0, %xmm1
17585; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17586; SSE2-NEXT:    pxor %xmm0, %xmm0
17587; SSE2-NEXT:    psadbw %xmm1, %xmm0
17588; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
17589; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17590; SSE2-NEXT:    movdqa %xmm0, %xmm2
17591; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
17592; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17593; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
17594; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17595; SSE2-NEXT:    pand %xmm3, %xmm1
17596; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17597; SSE2-NEXT:    por %xmm1, %xmm0
17598; SSE2-NEXT:    retq
17599;
17600; SSE3-LABEL: ugt_3_v2i64:
17601; SSE3:       # %bb.0:
17602; SSE3-NEXT:    movdqa %xmm0, %xmm1
17603; SSE3-NEXT:    psrlw $1, %xmm1
17604; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17605; SSE3-NEXT:    psubb %xmm1, %xmm0
17606; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17607; SSE3-NEXT:    movdqa %xmm0, %xmm2
17608; SSE3-NEXT:    pand %xmm1, %xmm2
17609; SSE3-NEXT:    psrlw $2, %xmm0
17610; SSE3-NEXT:    pand %xmm1, %xmm0
17611; SSE3-NEXT:    paddb %xmm2, %xmm0
17612; SSE3-NEXT:    movdqa %xmm0, %xmm1
17613; SSE3-NEXT:    psrlw $4, %xmm1
17614; SSE3-NEXT:    paddb %xmm0, %xmm1
17615; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17616; SSE3-NEXT:    pxor %xmm0, %xmm0
17617; SSE3-NEXT:    psadbw %xmm1, %xmm0
17618; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
17619; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17620; SSE3-NEXT:    movdqa %xmm0, %xmm2
17621; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
17622; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17623; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17624; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17625; SSE3-NEXT:    pand %xmm3, %xmm1
17626; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17627; SSE3-NEXT:    por %xmm1, %xmm0
17628; SSE3-NEXT:    retq
17629;
17630; SSSE3-LABEL: ugt_3_v2i64:
17631; SSSE3:       # %bb.0:
17632; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17633; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17634; SSSE3-NEXT:    pand %xmm1, %xmm2
17635; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17636; SSSE3-NEXT:    movdqa %xmm3, %xmm4
17637; SSSE3-NEXT:    pshufb %xmm2, %xmm4
17638; SSSE3-NEXT:    psrlw $4, %xmm0
17639; SSSE3-NEXT:    pand %xmm1, %xmm0
17640; SSSE3-NEXT:    pshufb %xmm0, %xmm3
17641; SSSE3-NEXT:    paddb %xmm4, %xmm3
17642; SSSE3-NEXT:    pxor %xmm0, %xmm0
17643; SSSE3-NEXT:    psadbw %xmm3, %xmm0
17644; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
17645; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17646; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17647; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
17648; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17649; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17650; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17651; SSSE3-NEXT:    pand %xmm3, %xmm1
17652; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17653; SSSE3-NEXT:    por %xmm1, %xmm0
17654; SSSE3-NEXT:    retq
17655;
17656; SSE41-LABEL: ugt_3_v2i64:
17657; SSE41:       # %bb.0:
17658; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17659; SSE41-NEXT:    movdqa %xmm0, %xmm2
17660; SSE41-NEXT:    pand %xmm1, %xmm2
17661; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17662; SSE41-NEXT:    movdqa %xmm3, %xmm4
17663; SSE41-NEXT:    pshufb %xmm2, %xmm4
17664; SSE41-NEXT:    psrlw $4, %xmm0
17665; SSE41-NEXT:    pand %xmm1, %xmm0
17666; SSE41-NEXT:    pshufb %xmm0, %xmm3
17667; SSE41-NEXT:    paddb %xmm4, %xmm3
17668; SSE41-NEXT:    pxor %xmm0, %xmm0
17669; SSE41-NEXT:    psadbw %xmm3, %xmm0
17670; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
17671; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483651,2147483651]
17672; SSE41-NEXT:    movdqa %xmm0, %xmm2
17673; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
17674; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17675; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
17676; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17677; SSE41-NEXT:    pand %xmm3, %xmm1
17678; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17679; SSE41-NEXT:    por %xmm1, %xmm0
17680; SSE41-NEXT:    retq
17681;
17682; AVX1-LABEL: ugt_3_v2i64:
17683; AVX1:       # %bb.0:
17684; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17685; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
17686; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17687; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17688; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
17689; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
17690; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17691; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17692; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17693; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17694; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17695; AVX1-NEXT:    retq
17696;
17697; AVX2-LABEL: ugt_3_v2i64:
17698; AVX2:       # %bb.0:
17699; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17700; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
17701; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17702; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17703; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
17704; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
17705; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17706; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17707; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17708; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17709; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17710; AVX2-NEXT:    retq
17711;
17712; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64:
17713; AVX512VPOPCNTDQ:       # %bb.0:
17714; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17715; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
17716; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17717; AVX512VPOPCNTDQ-NEXT:    vzeroupper
17718; AVX512VPOPCNTDQ-NEXT:    retq
17719;
17720; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64:
17721; AVX512VPOPCNTDQVL:       # %bb.0:
17722; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
17723; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
17724; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17725; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17726; AVX512VPOPCNTDQVL-NEXT:    retq
17727;
17728; BITALG_NOVLX-LABEL: ugt_3_v2i64:
17729; BITALG_NOVLX:       # %bb.0:
17730; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17731; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
17732; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17733; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17734; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
17735; BITALG_NOVLX-NEXT:    vzeroupper
17736; BITALG_NOVLX-NEXT:    retq
17737;
17738; BITALG-LABEL: ugt_3_v2i64:
17739; BITALG:       # %bb.0:
17740; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
17741; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17742; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17743; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
17744; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17745; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17746; BITALG-NEXT:    retq
17747  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17748  %3 = icmp ugt <2 x i64> %2, <i64 3, i64 3>
17749  %4 = sext <2 x i1> %3 to <2 x i64>
17750  ret <2 x i64> %4
17751}
17752
17753define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
17754; SSE2-LABEL: ult_4_v2i64:
17755; SSE2:       # %bb.0:
17756; SSE2-NEXT:    movdqa %xmm0, %xmm1
17757; SSE2-NEXT:    psrlw $1, %xmm1
17758; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17759; SSE2-NEXT:    psubb %xmm1, %xmm0
17760; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17761; SSE2-NEXT:    movdqa %xmm0, %xmm2
17762; SSE2-NEXT:    pand %xmm1, %xmm2
17763; SSE2-NEXT:    psrlw $2, %xmm0
17764; SSE2-NEXT:    pand %xmm1, %xmm0
17765; SSE2-NEXT:    paddb %xmm2, %xmm0
17766; SSE2-NEXT:    movdqa %xmm0, %xmm1
17767; SSE2-NEXT:    psrlw $4, %xmm1
17768; SSE2-NEXT:    paddb %xmm0, %xmm1
17769; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17770; SSE2-NEXT:    pxor %xmm0, %xmm0
17771; SSE2-NEXT:    psadbw %xmm1, %xmm0
17772; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
17773; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
17774; SSE2-NEXT:    movdqa %xmm1, %xmm2
17775; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
17776; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17777; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
17778; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17779; SSE2-NEXT:    pand %xmm3, %xmm1
17780; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17781; SSE2-NEXT:    por %xmm1, %xmm0
17782; SSE2-NEXT:    retq
17783;
17784; SSE3-LABEL: ult_4_v2i64:
17785; SSE3:       # %bb.0:
17786; SSE3-NEXT:    movdqa %xmm0, %xmm1
17787; SSE3-NEXT:    psrlw $1, %xmm1
17788; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17789; SSE3-NEXT:    psubb %xmm1, %xmm0
17790; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17791; SSE3-NEXT:    movdqa %xmm0, %xmm2
17792; SSE3-NEXT:    pand %xmm1, %xmm2
17793; SSE3-NEXT:    psrlw $2, %xmm0
17794; SSE3-NEXT:    pand %xmm1, %xmm0
17795; SSE3-NEXT:    paddb %xmm2, %xmm0
17796; SSE3-NEXT:    movdqa %xmm0, %xmm1
17797; SSE3-NEXT:    psrlw $4, %xmm1
17798; SSE3-NEXT:    paddb %xmm0, %xmm1
17799; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17800; SSE3-NEXT:    pxor %xmm0, %xmm0
17801; SSE3-NEXT:    psadbw %xmm1, %xmm0
17802; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
17803; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
17804; SSE3-NEXT:    movdqa %xmm1, %xmm2
17805; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
17806; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17807; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17808; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17809; SSE3-NEXT:    pand %xmm3, %xmm1
17810; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17811; SSE3-NEXT:    por %xmm1, %xmm0
17812; SSE3-NEXT:    retq
17813;
17814; SSSE3-LABEL: ult_4_v2i64:
17815; SSSE3:       # %bb.0:
17816; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17817; SSSE3-NEXT:    movdqa %xmm0, %xmm2
17818; SSSE3-NEXT:    pand %xmm1, %xmm2
17819; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17820; SSSE3-NEXT:    movdqa %xmm3, %xmm4
17821; SSSE3-NEXT:    pshufb %xmm2, %xmm4
17822; SSSE3-NEXT:    psrlw $4, %xmm0
17823; SSSE3-NEXT:    pand %xmm1, %xmm0
17824; SSSE3-NEXT:    pshufb %xmm0, %xmm3
17825; SSSE3-NEXT:    paddb %xmm4, %xmm3
17826; SSSE3-NEXT:    pxor %xmm0, %xmm0
17827; SSSE3-NEXT:    psadbw %xmm3, %xmm0
17828; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
17829; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
17830; SSSE3-NEXT:    movdqa %xmm1, %xmm2
17831; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
17832; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17833; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17834; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17835; SSSE3-NEXT:    pand %xmm3, %xmm1
17836; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17837; SSSE3-NEXT:    por %xmm1, %xmm0
17838; SSSE3-NEXT:    retq
17839;
17840; SSE41-LABEL: ult_4_v2i64:
17841; SSE41:       # %bb.0:
17842; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17843; SSE41-NEXT:    movdqa %xmm0, %xmm2
17844; SSE41-NEXT:    pand %xmm1, %xmm2
17845; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17846; SSE41-NEXT:    movdqa %xmm3, %xmm4
17847; SSE41-NEXT:    pshufb %xmm2, %xmm4
17848; SSE41-NEXT:    psrlw $4, %xmm0
17849; SSE41-NEXT:    pand %xmm1, %xmm0
17850; SSE41-NEXT:    pshufb %xmm0, %xmm3
17851; SSE41-NEXT:    paddb %xmm4, %xmm3
17852; SSE41-NEXT:    pxor %xmm0, %xmm0
17853; SSE41-NEXT:    psadbw %xmm3, %xmm0
17854; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
17855; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
17856; SSE41-NEXT:    movdqa %xmm1, %xmm2
17857; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
17858; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17859; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
17860; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17861; SSE41-NEXT:    pand %xmm3, %xmm1
17862; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17863; SSE41-NEXT:    por %xmm1, %xmm0
17864; SSE41-NEXT:    retq
17865;
17866; AVX1-LABEL: ult_4_v2i64:
17867; AVX1:       # %bb.0:
17868; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17869; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
17870; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17871; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17872; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
17873; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
17874; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17875; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17876; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17877; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17878; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4]
17879; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17880; AVX1-NEXT:    retq
17881;
17882; AVX2-LABEL: ult_4_v2i64:
17883; AVX2:       # %bb.0:
17884; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17885; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
17886; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17887; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
17888; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
17889; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
17890; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
17891; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
17892; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17893; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17894; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4]
17895; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17896; AVX2-NEXT:    retq
17897;
17898; AVX512VPOPCNTDQ-LABEL: ult_4_v2i64:
17899; AVX512VPOPCNTDQ:       # %bb.0:
17900; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17901; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
17902; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4]
17903; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17904; AVX512VPOPCNTDQ-NEXT:    vzeroupper
17905; AVX512VPOPCNTDQ-NEXT:    retq
17906;
17907; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64:
17908; AVX512VPOPCNTDQVL:       # %bb.0:
17909; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
17910; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
17911; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17912; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17913; AVX512VPOPCNTDQVL-NEXT:    retq
17914;
17915; BITALG_NOVLX-LABEL: ult_4_v2i64:
17916; BITALG_NOVLX:       # %bb.0:
17917; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17918; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
17919; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17920; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17921; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4]
17922; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
17923; BITALG_NOVLX-NEXT:    vzeroupper
17924; BITALG_NOVLX-NEXT:    retq
17925;
17926; BITALG-LABEL: ult_4_v2i64:
17927; BITALG:       # %bb.0:
17928; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
17929; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
17930; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
17931; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
17932; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
17933; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17934; BITALG-NEXT:    retq
17935  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17936  %3 = icmp ult <2 x i64> %2, <i64 4, i64 4>
17937  %4 = sext <2 x i1> %3 to <2 x i64>
17938  ret <2 x i64> %4
17939}
17940
17941define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
17942; SSE2-LABEL: ugt_4_v2i64:
17943; SSE2:       # %bb.0:
17944; SSE2-NEXT:    movdqa %xmm0, %xmm1
17945; SSE2-NEXT:    psrlw $1, %xmm1
17946; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17947; SSE2-NEXT:    psubb %xmm1, %xmm0
17948; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17949; SSE2-NEXT:    movdqa %xmm0, %xmm2
17950; SSE2-NEXT:    pand %xmm1, %xmm2
17951; SSE2-NEXT:    psrlw $2, %xmm0
17952; SSE2-NEXT:    pand %xmm1, %xmm0
17953; SSE2-NEXT:    paddb %xmm2, %xmm0
17954; SSE2-NEXT:    movdqa %xmm0, %xmm1
17955; SSE2-NEXT:    psrlw $4, %xmm1
17956; SSE2-NEXT:    paddb %xmm0, %xmm1
17957; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
17958; SSE2-NEXT:    pxor %xmm0, %xmm0
17959; SSE2-NEXT:    psadbw %xmm1, %xmm0
17960; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
17961; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
17962; SSE2-NEXT:    movdqa %xmm0, %xmm2
17963; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
17964; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17965; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
17966; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17967; SSE2-NEXT:    pand %xmm3, %xmm1
17968; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17969; SSE2-NEXT:    por %xmm1, %xmm0
17970; SSE2-NEXT:    retq
17971;
17972; SSE3-LABEL: ugt_4_v2i64:
17973; SSE3:       # %bb.0:
17974; SSE3-NEXT:    movdqa %xmm0, %xmm1
17975; SSE3-NEXT:    psrlw $1, %xmm1
17976; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17977; SSE3-NEXT:    psubb %xmm1, %xmm0
17978; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17979; SSE3-NEXT:    movdqa %xmm0, %xmm2
17980; SSE3-NEXT:    pand %xmm1, %xmm2
17981; SSE3-NEXT:    psrlw $2, %xmm0
17982; SSE3-NEXT:    pand %xmm1, %xmm0
17983; SSE3-NEXT:    paddb %xmm2, %xmm0
17984; SSE3-NEXT:    movdqa %xmm0, %xmm1
17985; SSE3-NEXT:    psrlw $4, %xmm1
17986; SSE3-NEXT:    paddb %xmm0, %xmm1
17987; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
17988; SSE3-NEXT:    pxor %xmm0, %xmm0
17989; SSE3-NEXT:    psadbw %xmm1, %xmm0
17990; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
17991; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
17992; SSE3-NEXT:    movdqa %xmm0, %xmm2
17993; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
17994; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
17995; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
17996; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
17997; SSE3-NEXT:    pand %xmm3, %xmm1
17998; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
17999; SSE3-NEXT:    por %xmm1, %xmm0
18000; SSE3-NEXT:    retq
18001;
18002; SSSE3-LABEL: ugt_4_v2i64:
18003; SSSE3:       # %bb.0:
18004; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18005; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18006; SSSE3-NEXT:    pand %xmm1, %xmm2
18007; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18008; SSSE3-NEXT:    movdqa %xmm3, %xmm4
18009; SSSE3-NEXT:    pshufb %xmm2, %xmm4
18010; SSSE3-NEXT:    psrlw $4, %xmm0
18011; SSSE3-NEXT:    pand %xmm1, %xmm0
18012; SSSE3-NEXT:    pshufb %xmm0, %xmm3
18013; SSSE3-NEXT:    paddb %xmm4, %xmm3
18014; SSSE3-NEXT:    pxor %xmm0, %xmm0
18015; SSSE3-NEXT:    psadbw %xmm3, %xmm0
18016; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
18017; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
18018; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18019; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
18020; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18021; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18022; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18023; SSSE3-NEXT:    pand %xmm3, %xmm1
18024; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18025; SSSE3-NEXT:    por %xmm1, %xmm0
18026; SSSE3-NEXT:    retq
18027;
18028; SSE41-LABEL: ugt_4_v2i64:
18029; SSE41:       # %bb.0:
18030; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18031; SSE41-NEXT:    movdqa %xmm0, %xmm2
18032; SSE41-NEXT:    pand %xmm1, %xmm2
18033; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18034; SSE41-NEXT:    movdqa %xmm3, %xmm4
18035; SSE41-NEXT:    pshufb %xmm2, %xmm4
18036; SSE41-NEXT:    psrlw $4, %xmm0
18037; SSE41-NEXT:    pand %xmm1, %xmm0
18038; SSE41-NEXT:    pshufb %xmm0, %xmm3
18039; SSE41-NEXT:    paddb %xmm4, %xmm3
18040; SSE41-NEXT:    pxor %xmm0, %xmm0
18041; SSE41-NEXT:    psadbw %xmm3, %xmm0
18042; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
18043; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483652,2147483652]
18044; SSE41-NEXT:    movdqa %xmm0, %xmm2
18045; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
18046; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18047; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
18048; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18049; SSE41-NEXT:    pand %xmm3, %xmm1
18050; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18051; SSE41-NEXT:    por %xmm1, %xmm0
18052; SSE41-NEXT:    retq
18053;
18054; AVX1-LABEL: ugt_4_v2i64:
18055; AVX1:       # %bb.0:
18056; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18057; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
18058; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18059; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18060; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
18061; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
18062; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18063; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18064; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18065; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18066; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18067; AVX1-NEXT:    retq
18068;
18069; AVX2-LABEL: ugt_4_v2i64:
18070; AVX2:       # %bb.0:
18071; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18072; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
18073; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18074; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18075; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
18076; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18077; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18078; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18079; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18080; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18081; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18082; AVX2-NEXT:    retq
18083;
18084; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64:
18085; AVX512VPOPCNTDQ:       # %bb.0:
18086; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18087; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
18088; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18089; AVX512VPOPCNTDQ-NEXT:    vzeroupper
18090; AVX512VPOPCNTDQ-NEXT:    retq
18091;
18092; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64:
18093; AVX512VPOPCNTDQVL:       # %bb.0:
18094; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
18095; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
18096; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18097; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18098; AVX512VPOPCNTDQVL-NEXT:    retq
18099;
18100; BITALG_NOVLX-LABEL: ugt_4_v2i64:
18101; BITALG_NOVLX:       # %bb.0:
18102; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18103; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
18104; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18105; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18106; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18107; BITALG_NOVLX-NEXT:    vzeroupper
18108; BITALG_NOVLX-NEXT:    retq
18109;
18110; BITALG-LABEL: ugt_4_v2i64:
18111; BITALG:       # %bb.0:
18112; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
18113; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18114; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18115; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
18116; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18117; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18118; BITALG-NEXT:    retq
18119  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18120  %3 = icmp ugt <2 x i64> %2, <i64 4, i64 4>
18121  %4 = sext <2 x i1> %3 to <2 x i64>
18122  ret <2 x i64> %4
18123}
18124
18125define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
18126; SSE2-LABEL: ult_5_v2i64:
18127; SSE2:       # %bb.0:
18128; SSE2-NEXT:    movdqa %xmm0, %xmm1
18129; SSE2-NEXT:    psrlw $1, %xmm1
18130; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18131; SSE2-NEXT:    psubb %xmm1, %xmm0
18132; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18133; SSE2-NEXT:    movdqa %xmm0, %xmm2
18134; SSE2-NEXT:    pand %xmm1, %xmm2
18135; SSE2-NEXT:    psrlw $2, %xmm0
18136; SSE2-NEXT:    pand %xmm1, %xmm0
18137; SSE2-NEXT:    paddb %xmm2, %xmm0
18138; SSE2-NEXT:    movdqa %xmm0, %xmm1
18139; SSE2-NEXT:    psrlw $4, %xmm1
18140; SSE2-NEXT:    paddb %xmm0, %xmm1
18141; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18142; SSE2-NEXT:    pxor %xmm0, %xmm0
18143; SSE2-NEXT:    psadbw %xmm1, %xmm0
18144; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
18145; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18146; SSE2-NEXT:    movdqa %xmm1, %xmm2
18147; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
18148; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18149; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
18150; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18151; SSE2-NEXT:    pand %xmm3, %xmm1
18152; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18153; SSE2-NEXT:    por %xmm1, %xmm0
18154; SSE2-NEXT:    retq
18155;
18156; SSE3-LABEL: ult_5_v2i64:
18157; SSE3:       # %bb.0:
18158; SSE3-NEXT:    movdqa %xmm0, %xmm1
18159; SSE3-NEXT:    psrlw $1, %xmm1
18160; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18161; SSE3-NEXT:    psubb %xmm1, %xmm0
18162; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18163; SSE3-NEXT:    movdqa %xmm0, %xmm2
18164; SSE3-NEXT:    pand %xmm1, %xmm2
18165; SSE3-NEXT:    psrlw $2, %xmm0
18166; SSE3-NEXT:    pand %xmm1, %xmm0
18167; SSE3-NEXT:    paddb %xmm2, %xmm0
18168; SSE3-NEXT:    movdqa %xmm0, %xmm1
18169; SSE3-NEXT:    psrlw $4, %xmm1
18170; SSE3-NEXT:    paddb %xmm0, %xmm1
18171; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18172; SSE3-NEXT:    pxor %xmm0, %xmm0
18173; SSE3-NEXT:    psadbw %xmm1, %xmm0
18174; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
18175; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18176; SSE3-NEXT:    movdqa %xmm1, %xmm2
18177; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
18178; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18179; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18180; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18181; SSE3-NEXT:    pand %xmm3, %xmm1
18182; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18183; SSE3-NEXT:    por %xmm1, %xmm0
18184; SSE3-NEXT:    retq
18185;
18186; SSSE3-LABEL: ult_5_v2i64:
18187; SSSE3:       # %bb.0:
18188; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18189; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18190; SSSE3-NEXT:    pand %xmm1, %xmm2
18191; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18192; SSSE3-NEXT:    movdqa %xmm3, %xmm4
18193; SSSE3-NEXT:    pshufb %xmm2, %xmm4
18194; SSSE3-NEXT:    psrlw $4, %xmm0
18195; SSSE3-NEXT:    pand %xmm1, %xmm0
18196; SSSE3-NEXT:    pshufb %xmm0, %xmm3
18197; SSSE3-NEXT:    paddb %xmm4, %xmm3
18198; SSSE3-NEXT:    pxor %xmm0, %xmm0
18199; SSSE3-NEXT:    psadbw %xmm3, %xmm0
18200; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
18201; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18202; SSSE3-NEXT:    movdqa %xmm1, %xmm2
18203; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
18204; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18205; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18206; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18207; SSSE3-NEXT:    pand %xmm3, %xmm1
18208; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18209; SSSE3-NEXT:    por %xmm1, %xmm0
18210; SSSE3-NEXT:    retq
18211;
18212; SSE41-LABEL: ult_5_v2i64:
18213; SSE41:       # %bb.0:
18214; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18215; SSE41-NEXT:    movdqa %xmm0, %xmm2
18216; SSE41-NEXT:    pand %xmm1, %xmm2
18217; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18218; SSE41-NEXT:    movdqa %xmm3, %xmm4
18219; SSE41-NEXT:    pshufb %xmm2, %xmm4
18220; SSE41-NEXT:    psrlw $4, %xmm0
18221; SSE41-NEXT:    pand %xmm1, %xmm0
18222; SSE41-NEXT:    pshufb %xmm0, %xmm3
18223; SSE41-NEXT:    paddb %xmm4, %xmm3
18224; SSE41-NEXT:    pxor %xmm0, %xmm0
18225; SSE41-NEXT:    psadbw %xmm3, %xmm0
18226; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
18227; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18228; SSE41-NEXT:    movdqa %xmm1, %xmm2
18229; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
18230; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18231; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
18232; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18233; SSE41-NEXT:    pand %xmm3, %xmm1
18234; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18235; SSE41-NEXT:    por %xmm1, %xmm0
18236; SSE41-NEXT:    retq
18237;
18238; AVX1-LABEL: ult_5_v2i64:
18239; AVX1:       # %bb.0:
18240; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18241; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
18242; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18243; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18244; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
18245; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
18246; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18247; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18248; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18249; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18250; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5]
18251; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18252; AVX1-NEXT:    retq
18253;
18254; AVX2-LABEL: ult_5_v2i64:
18255; AVX2:       # %bb.0:
18256; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18257; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
18258; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18259; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18260; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
18261; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18262; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18263; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18264; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18265; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18266; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5]
18267; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18268; AVX2-NEXT:    retq
18269;
18270; AVX512VPOPCNTDQ-LABEL: ult_5_v2i64:
18271; AVX512VPOPCNTDQ:       # %bb.0:
18272; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18273; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
18274; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5]
18275; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18276; AVX512VPOPCNTDQ-NEXT:    vzeroupper
18277; AVX512VPOPCNTDQ-NEXT:    retq
18278;
18279; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64:
18280; AVX512VPOPCNTDQVL:       # %bb.0:
18281; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
18282; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
18283; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18284; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18285; AVX512VPOPCNTDQVL-NEXT:    retq
18286;
18287; BITALG_NOVLX-LABEL: ult_5_v2i64:
18288; BITALG_NOVLX:       # %bb.0:
18289; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18290; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
18291; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18292; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18293; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5]
18294; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18295; BITALG_NOVLX-NEXT:    vzeroupper
18296; BITALG_NOVLX-NEXT:    retq
18297;
18298; BITALG-LABEL: ult_5_v2i64:
18299; BITALG:       # %bb.0:
18300; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
18301; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18302; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18303; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
18304; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18305; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18306; BITALG-NEXT:    retq
18307  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18308  %3 = icmp ult <2 x i64> %2, <i64 5, i64 5>
18309  %4 = sext <2 x i1> %3 to <2 x i64>
18310  ret <2 x i64> %4
18311}
18312
18313define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
18314; SSE2-LABEL: ugt_5_v2i64:
18315; SSE2:       # %bb.0:
18316; SSE2-NEXT:    movdqa %xmm0, %xmm1
18317; SSE2-NEXT:    psrlw $1, %xmm1
18318; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18319; SSE2-NEXT:    psubb %xmm1, %xmm0
18320; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18321; SSE2-NEXT:    movdqa %xmm0, %xmm2
18322; SSE2-NEXT:    pand %xmm1, %xmm2
18323; SSE2-NEXT:    psrlw $2, %xmm0
18324; SSE2-NEXT:    pand %xmm1, %xmm0
18325; SSE2-NEXT:    paddb %xmm2, %xmm0
18326; SSE2-NEXT:    movdqa %xmm0, %xmm1
18327; SSE2-NEXT:    psrlw $4, %xmm1
18328; SSE2-NEXT:    paddb %xmm0, %xmm1
18329; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18330; SSE2-NEXT:    pxor %xmm0, %xmm0
18331; SSE2-NEXT:    psadbw %xmm1, %xmm0
18332; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
18333; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18334; SSE2-NEXT:    movdqa %xmm0, %xmm2
18335; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
18336; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18337; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
18338; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18339; SSE2-NEXT:    pand %xmm3, %xmm1
18340; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18341; SSE2-NEXT:    por %xmm1, %xmm0
18342; SSE2-NEXT:    retq
18343;
18344; SSE3-LABEL: ugt_5_v2i64:
18345; SSE3:       # %bb.0:
18346; SSE3-NEXT:    movdqa %xmm0, %xmm1
18347; SSE3-NEXT:    psrlw $1, %xmm1
18348; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18349; SSE3-NEXT:    psubb %xmm1, %xmm0
18350; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18351; SSE3-NEXT:    movdqa %xmm0, %xmm2
18352; SSE3-NEXT:    pand %xmm1, %xmm2
18353; SSE3-NEXT:    psrlw $2, %xmm0
18354; SSE3-NEXT:    pand %xmm1, %xmm0
18355; SSE3-NEXT:    paddb %xmm2, %xmm0
18356; SSE3-NEXT:    movdqa %xmm0, %xmm1
18357; SSE3-NEXT:    psrlw $4, %xmm1
18358; SSE3-NEXT:    paddb %xmm0, %xmm1
18359; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18360; SSE3-NEXT:    pxor %xmm0, %xmm0
18361; SSE3-NEXT:    psadbw %xmm1, %xmm0
18362; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
18363; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18364; SSE3-NEXT:    movdqa %xmm0, %xmm2
18365; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
18366; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18367; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18368; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18369; SSE3-NEXT:    pand %xmm3, %xmm1
18370; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18371; SSE3-NEXT:    por %xmm1, %xmm0
18372; SSE3-NEXT:    retq
18373;
18374; SSSE3-LABEL: ugt_5_v2i64:
18375; SSSE3:       # %bb.0:
18376; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18377; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18378; SSSE3-NEXT:    pand %xmm1, %xmm2
18379; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18380; SSSE3-NEXT:    movdqa %xmm3, %xmm4
18381; SSSE3-NEXT:    pshufb %xmm2, %xmm4
18382; SSSE3-NEXT:    psrlw $4, %xmm0
18383; SSSE3-NEXT:    pand %xmm1, %xmm0
18384; SSSE3-NEXT:    pshufb %xmm0, %xmm3
18385; SSSE3-NEXT:    paddb %xmm4, %xmm3
18386; SSSE3-NEXT:    pxor %xmm0, %xmm0
18387; SSSE3-NEXT:    psadbw %xmm3, %xmm0
18388; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
18389; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18390; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18391; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
18392; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18393; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18394; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18395; SSSE3-NEXT:    pand %xmm3, %xmm1
18396; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18397; SSSE3-NEXT:    por %xmm1, %xmm0
18398; SSSE3-NEXT:    retq
18399;
18400; SSE41-LABEL: ugt_5_v2i64:
18401; SSE41:       # %bb.0:
18402; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18403; SSE41-NEXT:    movdqa %xmm0, %xmm2
18404; SSE41-NEXT:    pand %xmm1, %xmm2
18405; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18406; SSE41-NEXT:    movdqa %xmm3, %xmm4
18407; SSE41-NEXT:    pshufb %xmm2, %xmm4
18408; SSE41-NEXT:    psrlw $4, %xmm0
18409; SSE41-NEXT:    pand %xmm1, %xmm0
18410; SSE41-NEXT:    pshufb %xmm0, %xmm3
18411; SSE41-NEXT:    paddb %xmm4, %xmm3
18412; SSE41-NEXT:    pxor %xmm0, %xmm0
18413; SSE41-NEXT:    psadbw %xmm3, %xmm0
18414; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
18415; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483653,2147483653]
18416; SSE41-NEXT:    movdqa %xmm0, %xmm2
18417; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
18418; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18419; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
18420; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18421; SSE41-NEXT:    pand %xmm3, %xmm1
18422; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18423; SSE41-NEXT:    por %xmm1, %xmm0
18424; SSE41-NEXT:    retq
18425;
18426; AVX1-LABEL: ugt_5_v2i64:
18427; AVX1:       # %bb.0:
18428; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18429; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
18430; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18431; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18432; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
18433; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
18434; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18435; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18436; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18437; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18438; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18439; AVX1-NEXT:    retq
18440;
18441; AVX2-LABEL: ugt_5_v2i64:
18442; AVX2:       # %bb.0:
18443; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18444; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
18445; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18446; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18447; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
18448; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18449; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18450; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18451; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18452; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18453; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18454; AVX2-NEXT:    retq
18455;
18456; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64:
18457; AVX512VPOPCNTDQ:       # %bb.0:
18458; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18459; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
18460; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18461; AVX512VPOPCNTDQ-NEXT:    vzeroupper
18462; AVX512VPOPCNTDQ-NEXT:    retq
18463;
18464; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64:
18465; AVX512VPOPCNTDQVL:       # %bb.0:
18466; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
18467; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
18468; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18469; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18470; AVX512VPOPCNTDQVL-NEXT:    retq
18471;
18472; BITALG_NOVLX-LABEL: ugt_5_v2i64:
18473; BITALG_NOVLX:       # %bb.0:
18474; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18475; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
18476; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18477; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18478; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18479; BITALG_NOVLX-NEXT:    vzeroupper
18480; BITALG_NOVLX-NEXT:    retq
18481;
18482; BITALG-LABEL: ugt_5_v2i64:
18483; BITALG:       # %bb.0:
18484; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
18485; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18486; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18487; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
18488; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18489; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18490; BITALG-NEXT:    retq
18491  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18492  %3 = icmp ugt <2 x i64> %2, <i64 5, i64 5>
18493  %4 = sext <2 x i1> %3 to <2 x i64>
18494  ret <2 x i64> %4
18495}
18496
18497define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
18498; SSE2-LABEL: ult_6_v2i64:
18499; SSE2:       # %bb.0:
18500; SSE2-NEXT:    movdqa %xmm0, %xmm1
18501; SSE2-NEXT:    psrlw $1, %xmm1
18502; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18503; SSE2-NEXT:    psubb %xmm1, %xmm0
18504; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18505; SSE2-NEXT:    movdqa %xmm0, %xmm2
18506; SSE2-NEXT:    pand %xmm1, %xmm2
18507; SSE2-NEXT:    psrlw $2, %xmm0
18508; SSE2-NEXT:    pand %xmm1, %xmm0
18509; SSE2-NEXT:    paddb %xmm2, %xmm0
18510; SSE2-NEXT:    movdqa %xmm0, %xmm1
18511; SSE2-NEXT:    psrlw $4, %xmm1
18512; SSE2-NEXT:    paddb %xmm0, %xmm1
18513; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18514; SSE2-NEXT:    pxor %xmm0, %xmm0
18515; SSE2-NEXT:    psadbw %xmm1, %xmm0
18516; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
18517; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18518; SSE2-NEXT:    movdqa %xmm1, %xmm2
18519; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
18520; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18521; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
18522; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18523; SSE2-NEXT:    pand %xmm3, %xmm1
18524; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18525; SSE2-NEXT:    por %xmm1, %xmm0
18526; SSE2-NEXT:    retq
18527;
18528; SSE3-LABEL: ult_6_v2i64:
18529; SSE3:       # %bb.0:
18530; SSE3-NEXT:    movdqa %xmm0, %xmm1
18531; SSE3-NEXT:    psrlw $1, %xmm1
18532; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18533; SSE3-NEXT:    psubb %xmm1, %xmm0
18534; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18535; SSE3-NEXT:    movdqa %xmm0, %xmm2
18536; SSE3-NEXT:    pand %xmm1, %xmm2
18537; SSE3-NEXT:    psrlw $2, %xmm0
18538; SSE3-NEXT:    pand %xmm1, %xmm0
18539; SSE3-NEXT:    paddb %xmm2, %xmm0
18540; SSE3-NEXT:    movdqa %xmm0, %xmm1
18541; SSE3-NEXT:    psrlw $4, %xmm1
18542; SSE3-NEXT:    paddb %xmm0, %xmm1
18543; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18544; SSE3-NEXT:    pxor %xmm0, %xmm0
18545; SSE3-NEXT:    psadbw %xmm1, %xmm0
18546; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
18547; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18548; SSE3-NEXT:    movdqa %xmm1, %xmm2
18549; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
18550; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18551; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18552; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18553; SSE3-NEXT:    pand %xmm3, %xmm1
18554; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18555; SSE3-NEXT:    por %xmm1, %xmm0
18556; SSE3-NEXT:    retq
18557;
18558; SSSE3-LABEL: ult_6_v2i64:
18559; SSSE3:       # %bb.0:
18560; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18561; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18562; SSSE3-NEXT:    pand %xmm1, %xmm2
18563; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18564; SSSE3-NEXT:    movdqa %xmm3, %xmm4
18565; SSSE3-NEXT:    pshufb %xmm2, %xmm4
18566; SSSE3-NEXT:    psrlw $4, %xmm0
18567; SSSE3-NEXT:    pand %xmm1, %xmm0
18568; SSSE3-NEXT:    pshufb %xmm0, %xmm3
18569; SSSE3-NEXT:    paddb %xmm4, %xmm3
18570; SSSE3-NEXT:    pxor %xmm0, %xmm0
18571; SSSE3-NEXT:    psadbw %xmm3, %xmm0
18572; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
18573; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18574; SSSE3-NEXT:    movdqa %xmm1, %xmm2
18575; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
18576; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18577; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18578; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18579; SSSE3-NEXT:    pand %xmm3, %xmm1
18580; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18581; SSSE3-NEXT:    por %xmm1, %xmm0
18582; SSSE3-NEXT:    retq
18583;
18584; SSE41-LABEL: ult_6_v2i64:
18585; SSE41:       # %bb.0:
18586; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18587; SSE41-NEXT:    movdqa %xmm0, %xmm2
18588; SSE41-NEXT:    pand %xmm1, %xmm2
18589; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18590; SSE41-NEXT:    movdqa %xmm3, %xmm4
18591; SSE41-NEXT:    pshufb %xmm2, %xmm4
18592; SSE41-NEXT:    psrlw $4, %xmm0
18593; SSE41-NEXT:    pand %xmm1, %xmm0
18594; SSE41-NEXT:    pshufb %xmm0, %xmm3
18595; SSE41-NEXT:    paddb %xmm4, %xmm3
18596; SSE41-NEXT:    pxor %xmm0, %xmm0
18597; SSE41-NEXT:    psadbw %xmm3, %xmm0
18598; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
18599; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18600; SSE41-NEXT:    movdqa %xmm1, %xmm2
18601; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
18602; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18603; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
18604; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18605; SSE41-NEXT:    pand %xmm3, %xmm1
18606; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18607; SSE41-NEXT:    por %xmm1, %xmm0
18608; SSE41-NEXT:    retq
18609;
18610; AVX1-LABEL: ult_6_v2i64:
18611; AVX1:       # %bb.0:
18612; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18613; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
18614; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18615; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18616; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
18617; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
18618; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18619; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18620; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18621; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18622; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6]
18623; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18624; AVX1-NEXT:    retq
18625;
18626; AVX2-LABEL: ult_6_v2i64:
18627; AVX2:       # %bb.0:
18628; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18629; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
18630; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18631; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18632; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
18633; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18634; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18635; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18636; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18637; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18638; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6]
18639; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18640; AVX2-NEXT:    retq
18641;
18642; AVX512VPOPCNTDQ-LABEL: ult_6_v2i64:
18643; AVX512VPOPCNTDQ:       # %bb.0:
18644; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18645; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
18646; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6]
18647; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18648; AVX512VPOPCNTDQ-NEXT:    vzeroupper
18649; AVX512VPOPCNTDQ-NEXT:    retq
18650;
18651; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64:
18652; AVX512VPOPCNTDQVL:       # %bb.0:
18653; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
18654; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
18655; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18656; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18657; AVX512VPOPCNTDQVL-NEXT:    retq
18658;
18659; BITALG_NOVLX-LABEL: ult_6_v2i64:
18660; BITALG_NOVLX:       # %bb.0:
18661; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18662; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
18663; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18664; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18665; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6]
18666; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18667; BITALG_NOVLX-NEXT:    vzeroupper
18668; BITALG_NOVLX-NEXT:    retq
18669;
18670; BITALG-LABEL: ult_6_v2i64:
18671; BITALG:       # %bb.0:
18672; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
18673; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18674; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18675; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
18676; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18677; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18678; BITALG-NEXT:    retq
18679  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18680  %3 = icmp ult <2 x i64> %2, <i64 6, i64 6>
18681  %4 = sext <2 x i1> %3 to <2 x i64>
18682  ret <2 x i64> %4
18683}
18684
18685define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
18686; SSE2-LABEL: ugt_6_v2i64:
18687; SSE2:       # %bb.0:
18688; SSE2-NEXT:    movdqa %xmm0, %xmm1
18689; SSE2-NEXT:    psrlw $1, %xmm1
18690; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18691; SSE2-NEXT:    psubb %xmm1, %xmm0
18692; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18693; SSE2-NEXT:    movdqa %xmm0, %xmm2
18694; SSE2-NEXT:    pand %xmm1, %xmm2
18695; SSE2-NEXT:    psrlw $2, %xmm0
18696; SSE2-NEXT:    pand %xmm1, %xmm0
18697; SSE2-NEXT:    paddb %xmm2, %xmm0
18698; SSE2-NEXT:    movdqa %xmm0, %xmm1
18699; SSE2-NEXT:    psrlw $4, %xmm1
18700; SSE2-NEXT:    paddb %xmm0, %xmm1
18701; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18702; SSE2-NEXT:    pxor %xmm0, %xmm0
18703; SSE2-NEXT:    psadbw %xmm1, %xmm0
18704; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
18705; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18706; SSE2-NEXT:    movdqa %xmm0, %xmm2
18707; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
18708; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18709; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
18710; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18711; SSE2-NEXT:    pand %xmm3, %xmm1
18712; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18713; SSE2-NEXT:    por %xmm1, %xmm0
18714; SSE2-NEXT:    retq
18715;
18716; SSE3-LABEL: ugt_6_v2i64:
18717; SSE3:       # %bb.0:
18718; SSE3-NEXT:    movdqa %xmm0, %xmm1
18719; SSE3-NEXT:    psrlw $1, %xmm1
18720; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18721; SSE3-NEXT:    psubb %xmm1, %xmm0
18722; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18723; SSE3-NEXT:    movdqa %xmm0, %xmm2
18724; SSE3-NEXT:    pand %xmm1, %xmm2
18725; SSE3-NEXT:    psrlw $2, %xmm0
18726; SSE3-NEXT:    pand %xmm1, %xmm0
18727; SSE3-NEXT:    paddb %xmm2, %xmm0
18728; SSE3-NEXT:    movdqa %xmm0, %xmm1
18729; SSE3-NEXT:    psrlw $4, %xmm1
18730; SSE3-NEXT:    paddb %xmm0, %xmm1
18731; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18732; SSE3-NEXT:    pxor %xmm0, %xmm0
18733; SSE3-NEXT:    psadbw %xmm1, %xmm0
18734; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
18735; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18736; SSE3-NEXT:    movdqa %xmm0, %xmm2
18737; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
18738; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18739; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18740; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18741; SSE3-NEXT:    pand %xmm3, %xmm1
18742; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18743; SSE3-NEXT:    por %xmm1, %xmm0
18744; SSE3-NEXT:    retq
18745;
18746; SSSE3-LABEL: ugt_6_v2i64:
18747; SSSE3:       # %bb.0:
18748; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18749; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18750; SSSE3-NEXT:    pand %xmm1, %xmm2
18751; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18752; SSSE3-NEXT:    movdqa %xmm3, %xmm4
18753; SSSE3-NEXT:    pshufb %xmm2, %xmm4
18754; SSSE3-NEXT:    psrlw $4, %xmm0
18755; SSSE3-NEXT:    pand %xmm1, %xmm0
18756; SSSE3-NEXT:    pshufb %xmm0, %xmm3
18757; SSSE3-NEXT:    paddb %xmm4, %xmm3
18758; SSSE3-NEXT:    pxor %xmm0, %xmm0
18759; SSSE3-NEXT:    psadbw %xmm3, %xmm0
18760; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
18761; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18762; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18763; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
18764; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18765; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18766; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18767; SSSE3-NEXT:    pand %xmm3, %xmm1
18768; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18769; SSSE3-NEXT:    por %xmm1, %xmm0
18770; SSSE3-NEXT:    retq
18771;
18772; SSE41-LABEL: ugt_6_v2i64:
18773; SSE41:       # %bb.0:
18774; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18775; SSE41-NEXT:    movdqa %xmm0, %xmm2
18776; SSE41-NEXT:    pand %xmm1, %xmm2
18777; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18778; SSE41-NEXT:    movdqa %xmm3, %xmm4
18779; SSE41-NEXT:    pshufb %xmm2, %xmm4
18780; SSE41-NEXT:    psrlw $4, %xmm0
18781; SSE41-NEXT:    pand %xmm1, %xmm0
18782; SSE41-NEXT:    pshufb %xmm0, %xmm3
18783; SSE41-NEXT:    paddb %xmm4, %xmm3
18784; SSE41-NEXT:    pxor %xmm0, %xmm0
18785; SSE41-NEXT:    psadbw %xmm3, %xmm0
18786; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
18787; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483654,2147483654]
18788; SSE41-NEXT:    movdqa %xmm0, %xmm2
18789; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
18790; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18791; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
18792; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18793; SSE41-NEXT:    pand %xmm3, %xmm1
18794; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18795; SSE41-NEXT:    por %xmm1, %xmm0
18796; SSE41-NEXT:    retq
18797;
18798; AVX1-LABEL: ugt_6_v2i64:
18799; AVX1:       # %bb.0:
18800; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18801; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
18802; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18803; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18804; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
18805; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
18806; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18807; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18808; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18809; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18810; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18811; AVX1-NEXT:    retq
18812;
18813; AVX2-LABEL: ugt_6_v2i64:
18814; AVX2:       # %bb.0:
18815; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18816; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
18817; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18818; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18819; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
18820; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18821; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18822; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18823; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18824; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18825; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18826; AVX2-NEXT:    retq
18827;
18828; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64:
18829; AVX512VPOPCNTDQ:       # %bb.0:
18830; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18831; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
18832; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18833; AVX512VPOPCNTDQ-NEXT:    vzeroupper
18834; AVX512VPOPCNTDQ-NEXT:    retq
18835;
18836; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64:
18837; AVX512VPOPCNTDQVL:       # %bb.0:
18838; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
18839; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
18840; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18841; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18842; AVX512VPOPCNTDQVL-NEXT:    retq
18843;
18844; BITALG_NOVLX-LABEL: ugt_6_v2i64:
18845; BITALG_NOVLX:       # %bb.0:
18846; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18847; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
18848; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18849; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18850; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
18851; BITALG_NOVLX-NEXT:    vzeroupper
18852; BITALG_NOVLX-NEXT:    retq
18853;
18854; BITALG-LABEL: ugt_6_v2i64:
18855; BITALG:       # %bb.0:
18856; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
18857; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18858; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18859; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
18860; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
18861; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18862; BITALG-NEXT:    retq
18863  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18864  %3 = icmp ugt <2 x i64> %2, <i64 6, i64 6>
18865  %4 = sext <2 x i1> %3 to <2 x i64>
18866  ret <2 x i64> %4
18867}
18868
18869define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
18870; SSE2-LABEL: ult_7_v2i64:
18871; SSE2:       # %bb.0:
18872; SSE2-NEXT:    movdqa %xmm0, %xmm1
18873; SSE2-NEXT:    psrlw $1, %xmm1
18874; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18875; SSE2-NEXT:    psubb %xmm1, %xmm0
18876; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18877; SSE2-NEXT:    movdqa %xmm0, %xmm2
18878; SSE2-NEXT:    pand %xmm1, %xmm2
18879; SSE2-NEXT:    psrlw $2, %xmm0
18880; SSE2-NEXT:    pand %xmm1, %xmm0
18881; SSE2-NEXT:    paddb %xmm2, %xmm0
18882; SSE2-NEXT:    movdqa %xmm0, %xmm1
18883; SSE2-NEXT:    psrlw $4, %xmm1
18884; SSE2-NEXT:    paddb %xmm0, %xmm1
18885; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
18886; SSE2-NEXT:    pxor %xmm0, %xmm0
18887; SSE2-NEXT:    psadbw %xmm1, %xmm0
18888; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
18889; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
18890; SSE2-NEXT:    movdqa %xmm1, %xmm2
18891; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
18892; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18893; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
18894; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18895; SSE2-NEXT:    pand %xmm3, %xmm1
18896; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18897; SSE2-NEXT:    por %xmm1, %xmm0
18898; SSE2-NEXT:    retq
18899;
18900; SSE3-LABEL: ult_7_v2i64:
18901; SSE3:       # %bb.0:
18902; SSE3-NEXT:    movdqa %xmm0, %xmm1
18903; SSE3-NEXT:    psrlw $1, %xmm1
18904; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18905; SSE3-NEXT:    psubb %xmm1, %xmm0
18906; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18907; SSE3-NEXT:    movdqa %xmm0, %xmm2
18908; SSE3-NEXT:    pand %xmm1, %xmm2
18909; SSE3-NEXT:    psrlw $2, %xmm0
18910; SSE3-NEXT:    pand %xmm1, %xmm0
18911; SSE3-NEXT:    paddb %xmm2, %xmm0
18912; SSE3-NEXT:    movdqa %xmm0, %xmm1
18913; SSE3-NEXT:    psrlw $4, %xmm1
18914; SSE3-NEXT:    paddb %xmm0, %xmm1
18915; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
18916; SSE3-NEXT:    pxor %xmm0, %xmm0
18917; SSE3-NEXT:    psadbw %xmm1, %xmm0
18918; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
18919; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
18920; SSE3-NEXT:    movdqa %xmm1, %xmm2
18921; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
18922; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18923; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18924; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18925; SSE3-NEXT:    pand %xmm3, %xmm1
18926; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18927; SSE3-NEXT:    por %xmm1, %xmm0
18928; SSE3-NEXT:    retq
18929;
18930; SSSE3-LABEL: ult_7_v2i64:
18931; SSSE3:       # %bb.0:
18932; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18933; SSSE3-NEXT:    movdqa %xmm0, %xmm2
18934; SSSE3-NEXT:    pand %xmm1, %xmm2
18935; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18936; SSSE3-NEXT:    movdqa %xmm3, %xmm4
18937; SSSE3-NEXT:    pshufb %xmm2, %xmm4
18938; SSSE3-NEXT:    psrlw $4, %xmm0
18939; SSSE3-NEXT:    pand %xmm1, %xmm0
18940; SSSE3-NEXT:    pshufb %xmm0, %xmm3
18941; SSSE3-NEXT:    paddb %xmm4, %xmm3
18942; SSSE3-NEXT:    pxor %xmm0, %xmm0
18943; SSSE3-NEXT:    psadbw %xmm3, %xmm0
18944; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
18945; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
18946; SSSE3-NEXT:    movdqa %xmm1, %xmm2
18947; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
18948; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18949; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
18950; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18951; SSSE3-NEXT:    pand %xmm3, %xmm1
18952; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18953; SSSE3-NEXT:    por %xmm1, %xmm0
18954; SSSE3-NEXT:    retq
18955;
18956; SSE41-LABEL: ult_7_v2i64:
18957; SSE41:       # %bb.0:
18958; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18959; SSE41-NEXT:    movdqa %xmm0, %xmm2
18960; SSE41-NEXT:    pand %xmm1, %xmm2
18961; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18962; SSE41-NEXT:    movdqa %xmm3, %xmm4
18963; SSE41-NEXT:    pshufb %xmm2, %xmm4
18964; SSE41-NEXT:    psrlw $4, %xmm0
18965; SSE41-NEXT:    pand %xmm1, %xmm0
18966; SSE41-NEXT:    pshufb %xmm0, %xmm3
18967; SSE41-NEXT:    paddb %xmm4, %xmm3
18968; SSE41-NEXT:    pxor %xmm0, %xmm0
18969; SSE41-NEXT:    psadbw %xmm3, %xmm0
18970; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
18971; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
18972; SSE41-NEXT:    movdqa %xmm1, %xmm2
18973; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
18974; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
18975; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
18976; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
18977; SSE41-NEXT:    pand %xmm3, %xmm1
18978; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
18979; SSE41-NEXT:    por %xmm1, %xmm0
18980; SSE41-NEXT:    retq
18981;
18982; AVX1-LABEL: ult_7_v2i64:
18983; AVX1:       # %bb.0:
18984; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18985; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
18986; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18987; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
18988; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
18989; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
18990; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
18991; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
18992; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
18993; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
18994; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7]
18995; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
18996; AVX1-NEXT:    retq
18997;
18998; AVX2-LABEL: ult_7_v2i64:
18999; AVX2:       # %bb.0:
19000; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19001; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
19002; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19003; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19004; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
19005; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
19006; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19007; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19008; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19009; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19010; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7]
19011; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19012; AVX2-NEXT:    retq
19013;
19014; AVX512VPOPCNTDQ-LABEL: ult_7_v2i64:
19015; AVX512VPOPCNTDQ:       # %bb.0:
19016; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19017; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
19018; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7]
19019; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19020; AVX512VPOPCNTDQ-NEXT:    vzeroupper
19021; AVX512VPOPCNTDQ-NEXT:    retq
19022;
19023; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64:
19024; AVX512VPOPCNTDQVL:       # %bb.0:
19025; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
19026; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
19027; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19028; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19029; AVX512VPOPCNTDQVL-NEXT:    retq
19030;
19031; BITALG_NOVLX-LABEL: ult_7_v2i64:
19032; BITALG_NOVLX:       # %bb.0:
19033; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19034; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
19035; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19036; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19037; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7]
19038; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19039; BITALG_NOVLX-NEXT:    vzeroupper
19040; BITALG_NOVLX-NEXT:    retq
19041;
19042; BITALG-LABEL: ult_7_v2i64:
19043; BITALG:       # %bb.0:
19044; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
19045; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19046; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19047; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
19048; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19049; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19050; BITALG-NEXT:    retq
19051  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19052  %3 = icmp ult <2 x i64> %2, <i64 7, i64 7>
19053  %4 = sext <2 x i1> %3 to <2 x i64>
19054  ret <2 x i64> %4
19055}
19056
19057define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
19058; SSE2-LABEL: ugt_7_v2i64:
19059; SSE2:       # %bb.0:
19060; SSE2-NEXT:    movdqa %xmm0, %xmm1
19061; SSE2-NEXT:    psrlw $1, %xmm1
19062; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19063; SSE2-NEXT:    psubb %xmm1, %xmm0
19064; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19065; SSE2-NEXT:    movdqa %xmm0, %xmm2
19066; SSE2-NEXT:    pand %xmm1, %xmm2
19067; SSE2-NEXT:    psrlw $2, %xmm0
19068; SSE2-NEXT:    pand %xmm1, %xmm0
19069; SSE2-NEXT:    paddb %xmm2, %xmm0
19070; SSE2-NEXT:    movdqa %xmm0, %xmm1
19071; SSE2-NEXT:    psrlw $4, %xmm1
19072; SSE2-NEXT:    paddb %xmm0, %xmm1
19073; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19074; SSE2-NEXT:    pxor %xmm0, %xmm0
19075; SSE2-NEXT:    psadbw %xmm1, %xmm0
19076; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
19077; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
19078; SSE2-NEXT:    movdqa %xmm0, %xmm2
19079; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
19080; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19081; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
19082; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19083; SSE2-NEXT:    pand %xmm3, %xmm1
19084; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19085; SSE2-NEXT:    por %xmm1, %xmm0
19086; SSE2-NEXT:    retq
19087;
19088; SSE3-LABEL: ugt_7_v2i64:
19089; SSE3:       # %bb.0:
19090; SSE3-NEXT:    movdqa %xmm0, %xmm1
19091; SSE3-NEXT:    psrlw $1, %xmm1
19092; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19093; SSE3-NEXT:    psubb %xmm1, %xmm0
19094; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19095; SSE3-NEXT:    movdqa %xmm0, %xmm2
19096; SSE3-NEXT:    pand %xmm1, %xmm2
19097; SSE3-NEXT:    psrlw $2, %xmm0
19098; SSE3-NEXT:    pand %xmm1, %xmm0
19099; SSE3-NEXT:    paddb %xmm2, %xmm0
19100; SSE3-NEXT:    movdqa %xmm0, %xmm1
19101; SSE3-NEXT:    psrlw $4, %xmm1
19102; SSE3-NEXT:    paddb %xmm0, %xmm1
19103; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19104; SSE3-NEXT:    pxor %xmm0, %xmm0
19105; SSE3-NEXT:    psadbw %xmm1, %xmm0
19106; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
19107; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
19108; SSE3-NEXT:    movdqa %xmm0, %xmm2
19109; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
19110; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19111; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19112; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19113; SSE3-NEXT:    pand %xmm3, %xmm1
19114; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19115; SSE3-NEXT:    por %xmm1, %xmm0
19116; SSE3-NEXT:    retq
19117;
19118; SSSE3-LABEL: ugt_7_v2i64:
19119; SSSE3:       # %bb.0:
19120; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19121; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19122; SSSE3-NEXT:    pand %xmm1, %xmm2
19123; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19124; SSSE3-NEXT:    movdqa %xmm3, %xmm4
19125; SSSE3-NEXT:    pshufb %xmm2, %xmm4
19126; SSSE3-NEXT:    psrlw $4, %xmm0
19127; SSSE3-NEXT:    pand %xmm1, %xmm0
19128; SSSE3-NEXT:    pshufb %xmm0, %xmm3
19129; SSSE3-NEXT:    paddb %xmm4, %xmm3
19130; SSSE3-NEXT:    pxor %xmm0, %xmm0
19131; SSSE3-NEXT:    psadbw %xmm3, %xmm0
19132; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
19133; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
19134; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19135; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
19136; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19137; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19138; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19139; SSSE3-NEXT:    pand %xmm3, %xmm1
19140; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19141; SSSE3-NEXT:    por %xmm1, %xmm0
19142; SSSE3-NEXT:    retq
19143;
19144; SSE41-LABEL: ugt_7_v2i64:
19145; SSE41:       # %bb.0:
19146; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19147; SSE41-NEXT:    movdqa %xmm0, %xmm2
19148; SSE41-NEXT:    pand %xmm1, %xmm2
19149; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19150; SSE41-NEXT:    movdqa %xmm3, %xmm4
19151; SSE41-NEXT:    pshufb %xmm2, %xmm4
19152; SSE41-NEXT:    psrlw $4, %xmm0
19153; SSE41-NEXT:    pand %xmm1, %xmm0
19154; SSE41-NEXT:    pshufb %xmm0, %xmm3
19155; SSE41-NEXT:    paddb %xmm4, %xmm3
19156; SSE41-NEXT:    pxor %xmm0, %xmm0
19157; SSE41-NEXT:    psadbw %xmm3, %xmm0
19158; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
19159; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483655,2147483655]
19160; SSE41-NEXT:    movdqa %xmm0, %xmm2
19161; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
19162; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19163; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
19164; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19165; SSE41-NEXT:    pand %xmm3, %xmm1
19166; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19167; SSE41-NEXT:    por %xmm1, %xmm0
19168; SSE41-NEXT:    retq
19169;
19170; AVX1-LABEL: ugt_7_v2i64:
19171; AVX1:       # %bb.0:
19172; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19173; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
19174; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19175; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19176; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
19177; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
19178; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19179; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19180; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19181; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19182; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19183; AVX1-NEXT:    retq
19184;
19185; AVX2-LABEL: ugt_7_v2i64:
19186; AVX2:       # %bb.0:
19187; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19188; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
19189; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19190; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19191; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
19192; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
19193; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19194; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19195; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19196; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19197; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19198; AVX2-NEXT:    retq
19199;
19200; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64:
19201; AVX512VPOPCNTDQ:       # %bb.0:
19202; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19203; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
19204; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19205; AVX512VPOPCNTDQ-NEXT:    vzeroupper
19206; AVX512VPOPCNTDQ-NEXT:    retq
19207;
19208; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64:
19209; AVX512VPOPCNTDQVL:       # %bb.0:
19210; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
19211; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
19212; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19213; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19214; AVX512VPOPCNTDQVL-NEXT:    retq
19215;
19216; BITALG_NOVLX-LABEL: ugt_7_v2i64:
19217; BITALG_NOVLX:       # %bb.0:
19218; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19219; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
19220; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19221; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19222; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19223; BITALG_NOVLX-NEXT:    vzeroupper
19224; BITALG_NOVLX-NEXT:    retq
19225;
19226; BITALG-LABEL: ugt_7_v2i64:
19227; BITALG:       # %bb.0:
19228; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
19229; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19230; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19231; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
19232; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19233; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19234; BITALG-NEXT:    retq
19235  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19236  %3 = icmp ugt <2 x i64> %2, <i64 7, i64 7>
19237  %4 = sext <2 x i1> %3 to <2 x i64>
19238  ret <2 x i64> %4
19239}
19240
19241define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
19242; SSE2-LABEL: ult_8_v2i64:
19243; SSE2:       # %bb.0:
19244; SSE2-NEXT:    movdqa %xmm0, %xmm1
19245; SSE2-NEXT:    psrlw $1, %xmm1
19246; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19247; SSE2-NEXT:    psubb %xmm1, %xmm0
19248; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19249; SSE2-NEXT:    movdqa %xmm0, %xmm2
19250; SSE2-NEXT:    pand %xmm1, %xmm2
19251; SSE2-NEXT:    psrlw $2, %xmm0
19252; SSE2-NEXT:    pand %xmm1, %xmm0
19253; SSE2-NEXT:    paddb %xmm2, %xmm0
19254; SSE2-NEXT:    movdqa %xmm0, %xmm1
19255; SSE2-NEXT:    psrlw $4, %xmm1
19256; SSE2-NEXT:    paddb %xmm0, %xmm1
19257; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19258; SSE2-NEXT:    pxor %xmm0, %xmm0
19259; SSE2-NEXT:    psadbw %xmm1, %xmm0
19260; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
19261; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19262; SSE2-NEXT:    movdqa %xmm1, %xmm2
19263; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
19264; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19265; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
19266; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19267; SSE2-NEXT:    pand %xmm3, %xmm1
19268; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19269; SSE2-NEXT:    por %xmm1, %xmm0
19270; SSE2-NEXT:    retq
19271;
19272; SSE3-LABEL: ult_8_v2i64:
19273; SSE3:       # %bb.0:
19274; SSE3-NEXT:    movdqa %xmm0, %xmm1
19275; SSE3-NEXT:    psrlw $1, %xmm1
19276; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19277; SSE3-NEXT:    psubb %xmm1, %xmm0
19278; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19279; SSE3-NEXT:    movdqa %xmm0, %xmm2
19280; SSE3-NEXT:    pand %xmm1, %xmm2
19281; SSE3-NEXT:    psrlw $2, %xmm0
19282; SSE3-NEXT:    pand %xmm1, %xmm0
19283; SSE3-NEXT:    paddb %xmm2, %xmm0
19284; SSE3-NEXT:    movdqa %xmm0, %xmm1
19285; SSE3-NEXT:    psrlw $4, %xmm1
19286; SSE3-NEXT:    paddb %xmm0, %xmm1
19287; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19288; SSE3-NEXT:    pxor %xmm0, %xmm0
19289; SSE3-NEXT:    psadbw %xmm1, %xmm0
19290; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
19291; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19292; SSE3-NEXT:    movdqa %xmm1, %xmm2
19293; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
19294; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19295; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19296; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19297; SSE3-NEXT:    pand %xmm3, %xmm1
19298; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19299; SSE3-NEXT:    por %xmm1, %xmm0
19300; SSE3-NEXT:    retq
19301;
19302; SSSE3-LABEL: ult_8_v2i64:
19303; SSSE3:       # %bb.0:
19304; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19305; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19306; SSSE3-NEXT:    pand %xmm1, %xmm2
19307; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19308; SSSE3-NEXT:    movdqa %xmm3, %xmm4
19309; SSSE3-NEXT:    pshufb %xmm2, %xmm4
19310; SSSE3-NEXT:    psrlw $4, %xmm0
19311; SSSE3-NEXT:    pand %xmm1, %xmm0
19312; SSSE3-NEXT:    pshufb %xmm0, %xmm3
19313; SSSE3-NEXT:    paddb %xmm4, %xmm3
19314; SSSE3-NEXT:    pxor %xmm0, %xmm0
19315; SSSE3-NEXT:    psadbw %xmm3, %xmm0
19316; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
19317; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19318; SSSE3-NEXT:    movdqa %xmm1, %xmm2
19319; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
19320; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19321; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19322; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19323; SSSE3-NEXT:    pand %xmm3, %xmm1
19324; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19325; SSSE3-NEXT:    por %xmm1, %xmm0
19326; SSSE3-NEXT:    retq
19327;
19328; SSE41-LABEL: ult_8_v2i64:
19329; SSE41:       # %bb.0:
19330; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19331; SSE41-NEXT:    movdqa %xmm0, %xmm2
19332; SSE41-NEXT:    pand %xmm1, %xmm2
19333; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19334; SSE41-NEXT:    movdqa %xmm3, %xmm4
19335; SSE41-NEXT:    pshufb %xmm2, %xmm4
19336; SSE41-NEXT:    psrlw $4, %xmm0
19337; SSE41-NEXT:    pand %xmm1, %xmm0
19338; SSE41-NEXT:    pshufb %xmm0, %xmm3
19339; SSE41-NEXT:    paddb %xmm4, %xmm3
19340; SSE41-NEXT:    pxor %xmm0, %xmm0
19341; SSE41-NEXT:    psadbw %xmm3, %xmm0
19342; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
19343; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19344; SSE41-NEXT:    movdqa %xmm1, %xmm2
19345; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
19346; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19347; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
19348; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19349; SSE41-NEXT:    pand %xmm3, %xmm1
19350; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19351; SSE41-NEXT:    por %xmm1, %xmm0
19352; SSE41-NEXT:    retq
19353;
19354; AVX1-LABEL: ult_8_v2i64:
19355; AVX1:       # %bb.0:
19356; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19357; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
19358; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19359; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19360; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
19361; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
19362; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19363; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19364; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19365; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19366; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8]
19367; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19368; AVX1-NEXT:    retq
19369;
19370; AVX2-LABEL: ult_8_v2i64:
19371; AVX2:       # %bb.0:
19372; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19373; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
19374; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19375; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19376; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
19377; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
19378; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19379; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19380; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19381; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19382; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8]
19383; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19384; AVX2-NEXT:    retq
19385;
19386; AVX512VPOPCNTDQ-LABEL: ult_8_v2i64:
19387; AVX512VPOPCNTDQ:       # %bb.0:
19388; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19389; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
19390; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8]
19391; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19392; AVX512VPOPCNTDQ-NEXT:    vzeroupper
19393; AVX512VPOPCNTDQ-NEXT:    retq
19394;
19395; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64:
19396; AVX512VPOPCNTDQVL:       # %bb.0:
19397; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
19398; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
19399; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19400; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19401; AVX512VPOPCNTDQVL-NEXT:    retq
19402;
19403; BITALG_NOVLX-LABEL: ult_8_v2i64:
19404; BITALG_NOVLX:       # %bb.0:
19405; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19406; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
19407; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19408; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19409; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8]
19410; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19411; BITALG_NOVLX-NEXT:    vzeroupper
19412; BITALG_NOVLX-NEXT:    retq
19413;
19414; BITALG-LABEL: ult_8_v2i64:
19415; BITALG:       # %bb.0:
19416; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
19417; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19418; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19419; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
19420; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19421; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19422; BITALG-NEXT:    retq
19423  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19424  %3 = icmp ult <2 x i64> %2, <i64 8, i64 8>
19425  %4 = sext <2 x i1> %3 to <2 x i64>
19426  ret <2 x i64> %4
19427}
19428
19429define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
19430; SSE2-LABEL: ugt_8_v2i64:
19431; SSE2:       # %bb.0:
19432; SSE2-NEXT:    movdqa %xmm0, %xmm1
19433; SSE2-NEXT:    psrlw $1, %xmm1
19434; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19435; SSE2-NEXT:    psubb %xmm1, %xmm0
19436; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19437; SSE2-NEXT:    movdqa %xmm0, %xmm2
19438; SSE2-NEXT:    pand %xmm1, %xmm2
19439; SSE2-NEXT:    psrlw $2, %xmm0
19440; SSE2-NEXT:    pand %xmm1, %xmm0
19441; SSE2-NEXT:    paddb %xmm2, %xmm0
19442; SSE2-NEXT:    movdqa %xmm0, %xmm1
19443; SSE2-NEXT:    psrlw $4, %xmm1
19444; SSE2-NEXT:    paddb %xmm0, %xmm1
19445; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19446; SSE2-NEXT:    pxor %xmm0, %xmm0
19447; SSE2-NEXT:    psadbw %xmm1, %xmm0
19448; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
19449; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19450; SSE2-NEXT:    movdqa %xmm0, %xmm2
19451; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
19452; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19453; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
19454; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19455; SSE2-NEXT:    pand %xmm3, %xmm1
19456; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19457; SSE2-NEXT:    por %xmm1, %xmm0
19458; SSE2-NEXT:    retq
19459;
19460; SSE3-LABEL: ugt_8_v2i64:
19461; SSE3:       # %bb.0:
19462; SSE3-NEXT:    movdqa %xmm0, %xmm1
19463; SSE3-NEXT:    psrlw $1, %xmm1
19464; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19465; SSE3-NEXT:    psubb %xmm1, %xmm0
19466; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19467; SSE3-NEXT:    movdqa %xmm0, %xmm2
19468; SSE3-NEXT:    pand %xmm1, %xmm2
19469; SSE3-NEXT:    psrlw $2, %xmm0
19470; SSE3-NEXT:    pand %xmm1, %xmm0
19471; SSE3-NEXT:    paddb %xmm2, %xmm0
19472; SSE3-NEXT:    movdqa %xmm0, %xmm1
19473; SSE3-NEXT:    psrlw $4, %xmm1
19474; SSE3-NEXT:    paddb %xmm0, %xmm1
19475; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19476; SSE3-NEXT:    pxor %xmm0, %xmm0
19477; SSE3-NEXT:    psadbw %xmm1, %xmm0
19478; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
19479; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19480; SSE3-NEXT:    movdqa %xmm0, %xmm2
19481; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
19482; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19483; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19484; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19485; SSE3-NEXT:    pand %xmm3, %xmm1
19486; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19487; SSE3-NEXT:    por %xmm1, %xmm0
19488; SSE3-NEXT:    retq
19489;
19490; SSSE3-LABEL: ugt_8_v2i64:
19491; SSSE3:       # %bb.0:
19492; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19493; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19494; SSSE3-NEXT:    pand %xmm1, %xmm2
19495; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19496; SSSE3-NEXT:    movdqa %xmm3, %xmm4
19497; SSSE3-NEXT:    pshufb %xmm2, %xmm4
19498; SSSE3-NEXT:    psrlw $4, %xmm0
19499; SSSE3-NEXT:    pand %xmm1, %xmm0
19500; SSSE3-NEXT:    pshufb %xmm0, %xmm3
19501; SSSE3-NEXT:    paddb %xmm4, %xmm3
19502; SSSE3-NEXT:    pxor %xmm0, %xmm0
19503; SSSE3-NEXT:    psadbw %xmm3, %xmm0
19504; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
19505; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19506; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19507; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
19508; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19509; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19510; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19511; SSSE3-NEXT:    pand %xmm3, %xmm1
19512; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19513; SSSE3-NEXT:    por %xmm1, %xmm0
19514; SSSE3-NEXT:    retq
19515;
19516; SSE41-LABEL: ugt_8_v2i64:
19517; SSE41:       # %bb.0:
19518; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19519; SSE41-NEXT:    movdqa %xmm0, %xmm2
19520; SSE41-NEXT:    pand %xmm1, %xmm2
19521; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19522; SSE41-NEXT:    movdqa %xmm3, %xmm4
19523; SSE41-NEXT:    pshufb %xmm2, %xmm4
19524; SSE41-NEXT:    psrlw $4, %xmm0
19525; SSE41-NEXT:    pand %xmm1, %xmm0
19526; SSE41-NEXT:    pshufb %xmm0, %xmm3
19527; SSE41-NEXT:    paddb %xmm4, %xmm3
19528; SSE41-NEXT:    pxor %xmm0, %xmm0
19529; SSE41-NEXT:    psadbw %xmm3, %xmm0
19530; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
19531; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483656,2147483656]
19532; SSE41-NEXT:    movdqa %xmm0, %xmm2
19533; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
19534; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19535; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
19536; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19537; SSE41-NEXT:    pand %xmm3, %xmm1
19538; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19539; SSE41-NEXT:    por %xmm1, %xmm0
19540; SSE41-NEXT:    retq
19541;
19542; AVX1-LABEL: ugt_8_v2i64:
19543; AVX1:       # %bb.0:
19544; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19545; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
19546; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19547; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19548; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
19549; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
19550; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19551; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19552; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19553; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19554; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19555; AVX1-NEXT:    retq
19556;
19557; AVX2-LABEL: ugt_8_v2i64:
19558; AVX2:       # %bb.0:
19559; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19560; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
19561; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19562; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19563; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
19564; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
19565; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19566; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19567; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19568; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19569; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19570; AVX2-NEXT:    retq
19571;
19572; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64:
19573; AVX512VPOPCNTDQ:       # %bb.0:
19574; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19575; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
19576; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19577; AVX512VPOPCNTDQ-NEXT:    vzeroupper
19578; AVX512VPOPCNTDQ-NEXT:    retq
19579;
19580; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64:
19581; AVX512VPOPCNTDQVL:       # %bb.0:
19582; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
19583; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
19584; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19585; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19586; AVX512VPOPCNTDQVL-NEXT:    retq
19587;
19588; BITALG_NOVLX-LABEL: ugt_8_v2i64:
19589; BITALG_NOVLX:       # %bb.0:
19590; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19591; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
19592; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19593; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19594; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19595; BITALG_NOVLX-NEXT:    vzeroupper
19596; BITALG_NOVLX-NEXT:    retq
19597;
19598; BITALG-LABEL: ugt_8_v2i64:
19599; BITALG:       # %bb.0:
19600; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
19601; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19602; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19603; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
19604; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19605; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19606; BITALG-NEXT:    retq
19607  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19608  %3 = icmp ugt <2 x i64> %2, <i64 8, i64 8>
19609  %4 = sext <2 x i1> %3 to <2 x i64>
19610  ret <2 x i64> %4
19611}
19612
19613define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
19614; SSE2-LABEL: ult_9_v2i64:
19615; SSE2:       # %bb.0:
19616; SSE2-NEXT:    movdqa %xmm0, %xmm1
19617; SSE2-NEXT:    psrlw $1, %xmm1
19618; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19619; SSE2-NEXT:    psubb %xmm1, %xmm0
19620; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19621; SSE2-NEXT:    movdqa %xmm0, %xmm2
19622; SSE2-NEXT:    pand %xmm1, %xmm2
19623; SSE2-NEXT:    psrlw $2, %xmm0
19624; SSE2-NEXT:    pand %xmm1, %xmm0
19625; SSE2-NEXT:    paddb %xmm2, %xmm0
19626; SSE2-NEXT:    movdqa %xmm0, %xmm1
19627; SSE2-NEXT:    psrlw $4, %xmm1
19628; SSE2-NEXT:    paddb %xmm0, %xmm1
19629; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19630; SSE2-NEXT:    pxor %xmm0, %xmm0
19631; SSE2-NEXT:    psadbw %xmm1, %xmm0
19632; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
19633; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19634; SSE2-NEXT:    movdqa %xmm1, %xmm2
19635; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
19636; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19637; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
19638; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19639; SSE2-NEXT:    pand %xmm3, %xmm1
19640; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19641; SSE2-NEXT:    por %xmm1, %xmm0
19642; SSE2-NEXT:    retq
19643;
19644; SSE3-LABEL: ult_9_v2i64:
19645; SSE3:       # %bb.0:
19646; SSE3-NEXT:    movdqa %xmm0, %xmm1
19647; SSE3-NEXT:    psrlw $1, %xmm1
19648; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19649; SSE3-NEXT:    psubb %xmm1, %xmm0
19650; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19651; SSE3-NEXT:    movdqa %xmm0, %xmm2
19652; SSE3-NEXT:    pand %xmm1, %xmm2
19653; SSE3-NEXT:    psrlw $2, %xmm0
19654; SSE3-NEXT:    pand %xmm1, %xmm0
19655; SSE3-NEXT:    paddb %xmm2, %xmm0
19656; SSE3-NEXT:    movdqa %xmm0, %xmm1
19657; SSE3-NEXT:    psrlw $4, %xmm1
19658; SSE3-NEXT:    paddb %xmm0, %xmm1
19659; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19660; SSE3-NEXT:    pxor %xmm0, %xmm0
19661; SSE3-NEXT:    psadbw %xmm1, %xmm0
19662; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
19663; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19664; SSE3-NEXT:    movdqa %xmm1, %xmm2
19665; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
19666; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19667; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19668; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19669; SSE3-NEXT:    pand %xmm3, %xmm1
19670; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19671; SSE3-NEXT:    por %xmm1, %xmm0
19672; SSE3-NEXT:    retq
19673;
19674; SSSE3-LABEL: ult_9_v2i64:
19675; SSSE3:       # %bb.0:
19676; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19677; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19678; SSSE3-NEXT:    pand %xmm1, %xmm2
19679; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19680; SSSE3-NEXT:    movdqa %xmm3, %xmm4
19681; SSSE3-NEXT:    pshufb %xmm2, %xmm4
19682; SSSE3-NEXT:    psrlw $4, %xmm0
19683; SSSE3-NEXT:    pand %xmm1, %xmm0
19684; SSSE3-NEXT:    pshufb %xmm0, %xmm3
19685; SSSE3-NEXT:    paddb %xmm4, %xmm3
19686; SSSE3-NEXT:    pxor %xmm0, %xmm0
19687; SSSE3-NEXT:    psadbw %xmm3, %xmm0
19688; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
19689; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19690; SSSE3-NEXT:    movdqa %xmm1, %xmm2
19691; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
19692; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19693; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19694; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19695; SSSE3-NEXT:    pand %xmm3, %xmm1
19696; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19697; SSSE3-NEXT:    por %xmm1, %xmm0
19698; SSSE3-NEXT:    retq
19699;
19700; SSE41-LABEL: ult_9_v2i64:
19701; SSE41:       # %bb.0:
19702; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19703; SSE41-NEXT:    movdqa %xmm0, %xmm2
19704; SSE41-NEXT:    pand %xmm1, %xmm2
19705; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19706; SSE41-NEXT:    movdqa %xmm3, %xmm4
19707; SSE41-NEXT:    pshufb %xmm2, %xmm4
19708; SSE41-NEXT:    psrlw $4, %xmm0
19709; SSE41-NEXT:    pand %xmm1, %xmm0
19710; SSE41-NEXT:    pshufb %xmm0, %xmm3
19711; SSE41-NEXT:    paddb %xmm4, %xmm3
19712; SSE41-NEXT:    pxor %xmm0, %xmm0
19713; SSE41-NEXT:    psadbw %xmm3, %xmm0
19714; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
19715; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19716; SSE41-NEXT:    movdqa %xmm1, %xmm2
19717; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
19718; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19719; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
19720; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19721; SSE41-NEXT:    pand %xmm3, %xmm1
19722; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19723; SSE41-NEXT:    por %xmm1, %xmm0
19724; SSE41-NEXT:    retq
19725;
19726; AVX1-LABEL: ult_9_v2i64:
19727; AVX1:       # %bb.0:
19728; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19729; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
19730; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19731; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19732; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
19733; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
19734; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19735; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19736; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19737; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19738; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9]
19739; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19740; AVX1-NEXT:    retq
19741;
19742; AVX2-LABEL: ult_9_v2i64:
19743; AVX2:       # %bb.0:
19744; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19745; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
19746; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19747; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19748; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
19749; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
19750; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19751; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19752; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19753; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19754; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9]
19755; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19756; AVX2-NEXT:    retq
19757;
19758; AVX512VPOPCNTDQ-LABEL: ult_9_v2i64:
19759; AVX512VPOPCNTDQ:       # %bb.0:
19760; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19761; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
19762; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9]
19763; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19764; AVX512VPOPCNTDQ-NEXT:    vzeroupper
19765; AVX512VPOPCNTDQ-NEXT:    retq
19766;
19767; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64:
19768; AVX512VPOPCNTDQVL:       # %bb.0:
19769; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
19770; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
19771; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19772; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19773; AVX512VPOPCNTDQVL-NEXT:    retq
19774;
19775; BITALG_NOVLX-LABEL: ult_9_v2i64:
19776; BITALG_NOVLX:       # %bb.0:
19777; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19778; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
19779; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19780; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19781; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9]
19782; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
19783; BITALG_NOVLX-NEXT:    vzeroupper
19784; BITALG_NOVLX-NEXT:    retq
19785;
19786; BITALG-LABEL: ult_9_v2i64:
19787; BITALG:       # %bb.0:
19788; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
19789; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19790; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19791; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
19792; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19793; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19794; BITALG-NEXT:    retq
19795  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19796  %3 = icmp ult <2 x i64> %2, <i64 9, i64 9>
19797  %4 = sext <2 x i1> %3 to <2 x i64>
19798  ret <2 x i64> %4
19799}
19800
19801define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
19802; SSE2-LABEL: ugt_9_v2i64:
19803; SSE2:       # %bb.0:
19804; SSE2-NEXT:    movdqa %xmm0, %xmm1
19805; SSE2-NEXT:    psrlw $1, %xmm1
19806; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19807; SSE2-NEXT:    psubb %xmm1, %xmm0
19808; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19809; SSE2-NEXT:    movdqa %xmm0, %xmm2
19810; SSE2-NEXT:    pand %xmm1, %xmm2
19811; SSE2-NEXT:    psrlw $2, %xmm0
19812; SSE2-NEXT:    pand %xmm1, %xmm0
19813; SSE2-NEXT:    paddb %xmm2, %xmm0
19814; SSE2-NEXT:    movdqa %xmm0, %xmm1
19815; SSE2-NEXT:    psrlw $4, %xmm1
19816; SSE2-NEXT:    paddb %xmm0, %xmm1
19817; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19818; SSE2-NEXT:    pxor %xmm0, %xmm0
19819; SSE2-NEXT:    psadbw %xmm1, %xmm0
19820; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
19821; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19822; SSE2-NEXT:    movdqa %xmm0, %xmm2
19823; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
19824; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19825; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
19826; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19827; SSE2-NEXT:    pand %xmm3, %xmm1
19828; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19829; SSE2-NEXT:    por %xmm1, %xmm0
19830; SSE2-NEXT:    retq
19831;
19832; SSE3-LABEL: ugt_9_v2i64:
19833; SSE3:       # %bb.0:
19834; SSE3-NEXT:    movdqa %xmm0, %xmm1
19835; SSE3-NEXT:    psrlw $1, %xmm1
19836; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19837; SSE3-NEXT:    psubb %xmm1, %xmm0
19838; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19839; SSE3-NEXT:    movdqa %xmm0, %xmm2
19840; SSE3-NEXT:    pand %xmm1, %xmm2
19841; SSE3-NEXT:    psrlw $2, %xmm0
19842; SSE3-NEXT:    pand %xmm1, %xmm0
19843; SSE3-NEXT:    paddb %xmm2, %xmm0
19844; SSE3-NEXT:    movdqa %xmm0, %xmm1
19845; SSE3-NEXT:    psrlw $4, %xmm1
19846; SSE3-NEXT:    paddb %xmm0, %xmm1
19847; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
19848; SSE3-NEXT:    pxor %xmm0, %xmm0
19849; SSE3-NEXT:    psadbw %xmm1, %xmm0
19850; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
19851; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19852; SSE3-NEXT:    movdqa %xmm0, %xmm2
19853; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
19854; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19855; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19856; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19857; SSE3-NEXT:    pand %xmm3, %xmm1
19858; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19859; SSE3-NEXT:    por %xmm1, %xmm0
19860; SSE3-NEXT:    retq
19861;
19862; SSSE3-LABEL: ugt_9_v2i64:
19863; SSSE3:       # %bb.0:
19864; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19865; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19866; SSSE3-NEXT:    pand %xmm1, %xmm2
19867; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19868; SSSE3-NEXT:    movdqa %xmm3, %xmm4
19869; SSSE3-NEXT:    pshufb %xmm2, %xmm4
19870; SSSE3-NEXT:    psrlw $4, %xmm0
19871; SSSE3-NEXT:    pand %xmm1, %xmm0
19872; SSSE3-NEXT:    pshufb %xmm0, %xmm3
19873; SSSE3-NEXT:    paddb %xmm4, %xmm3
19874; SSSE3-NEXT:    pxor %xmm0, %xmm0
19875; SSSE3-NEXT:    psadbw %xmm3, %xmm0
19876; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
19877; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19878; SSSE3-NEXT:    movdqa %xmm0, %xmm2
19879; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
19880; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19881; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
19882; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19883; SSSE3-NEXT:    pand %xmm3, %xmm1
19884; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19885; SSSE3-NEXT:    por %xmm1, %xmm0
19886; SSSE3-NEXT:    retq
19887;
19888; SSE41-LABEL: ugt_9_v2i64:
19889; SSE41:       # %bb.0:
19890; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19891; SSE41-NEXT:    movdqa %xmm0, %xmm2
19892; SSE41-NEXT:    pand %xmm1, %xmm2
19893; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19894; SSE41-NEXT:    movdqa %xmm3, %xmm4
19895; SSE41-NEXT:    pshufb %xmm2, %xmm4
19896; SSE41-NEXT:    psrlw $4, %xmm0
19897; SSE41-NEXT:    pand %xmm1, %xmm0
19898; SSE41-NEXT:    pshufb %xmm0, %xmm3
19899; SSE41-NEXT:    paddb %xmm4, %xmm3
19900; SSE41-NEXT:    pxor %xmm0, %xmm0
19901; SSE41-NEXT:    psadbw %xmm3, %xmm0
19902; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
19903; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483657,2147483657]
19904; SSE41-NEXT:    movdqa %xmm0, %xmm2
19905; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
19906; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
19907; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
19908; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
19909; SSE41-NEXT:    pand %xmm3, %xmm1
19910; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
19911; SSE41-NEXT:    por %xmm1, %xmm0
19912; SSE41-NEXT:    retq
19913;
19914; AVX1-LABEL: ugt_9_v2i64:
19915; AVX1:       # %bb.0:
19916; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19917; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
19918; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19919; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19920; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
19921; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
19922; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19923; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19924; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19925; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19926; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19927; AVX1-NEXT:    retq
19928;
19929; AVX2-LABEL: ugt_9_v2i64:
19930; AVX2:       # %bb.0:
19931; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19932; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
19933; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19934; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
19935; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
19936; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
19937; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
19938; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
19939; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19940; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19941; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19942; AVX2-NEXT:    retq
19943;
19944; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64:
19945; AVX512VPOPCNTDQ:       # %bb.0:
19946; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19947; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
19948; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19949; AVX512VPOPCNTDQ-NEXT:    vzeroupper
19950; AVX512VPOPCNTDQ-NEXT:    retq
19951;
19952; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64:
19953; AVX512VPOPCNTDQVL:       # %bb.0:
19954; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
19955; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
19956; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19957; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19958; AVX512VPOPCNTDQVL-NEXT:    retq
19959;
19960; BITALG_NOVLX-LABEL: ugt_9_v2i64:
19961; BITALG_NOVLX:       # %bb.0:
19962; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19963; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
19964; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19965; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19966; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
19967; BITALG_NOVLX-NEXT:    vzeroupper
19968; BITALG_NOVLX-NEXT:    retq
19969;
19970; BITALG-LABEL: ugt_9_v2i64:
19971; BITALG:       # %bb.0:
19972; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
19973; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19974; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
19975; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
19976; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
19977; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19978; BITALG-NEXT:    retq
19979  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19980  %3 = icmp ugt <2 x i64> %2, <i64 9, i64 9>
19981  %4 = sext <2 x i1> %3 to <2 x i64>
19982  ret <2 x i64> %4
19983}
19984
19985define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
19986; SSE2-LABEL: ult_10_v2i64:
19987; SSE2:       # %bb.0:
19988; SSE2-NEXT:    movdqa %xmm0, %xmm1
19989; SSE2-NEXT:    psrlw $1, %xmm1
19990; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
19991; SSE2-NEXT:    psubb %xmm1, %xmm0
19992; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19993; SSE2-NEXT:    movdqa %xmm0, %xmm2
19994; SSE2-NEXT:    pand %xmm1, %xmm2
19995; SSE2-NEXT:    psrlw $2, %xmm0
19996; SSE2-NEXT:    pand %xmm1, %xmm0
19997; SSE2-NEXT:    paddb %xmm2, %xmm0
19998; SSE2-NEXT:    movdqa %xmm0, %xmm1
19999; SSE2-NEXT:    psrlw $4, %xmm1
20000; SSE2-NEXT:    paddb %xmm0, %xmm1
20001; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20002; SSE2-NEXT:    pxor %xmm0, %xmm0
20003; SSE2-NEXT:    psadbw %xmm1, %xmm0
20004; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
20005; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20006; SSE2-NEXT:    movdqa %xmm1, %xmm2
20007; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
20008; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20009; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
20010; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20011; SSE2-NEXT:    pand %xmm3, %xmm1
20012; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20013; SSE2-NEXT:    por %xmm1, %xmm0
20014; SSE2-NEXT:    retq
20015;
20016; SSE3-LABEL: ult_10_v2i64:
20017; SSE3:       # %bb.0:
20018; SSE3-NEXT:    movdqa %xmm0, %xmm1
20019; SSE3-NEXT:    psrlw $1, %xmm1
20020; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20021; SSE3-NEXT:    psubb %xmm1, %xmm0
20022; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20023; SSE3-NEXT:    movdqa %xmm0, %xmm2
20024; SSE3-NEXT:    pand %xmm1, %xmm2
20025; SSE3-NEXT:    psrlw $2, %xmm0
20026; SSE3-NEXT:    pand %xmm1, %xmm0
20027; SSE3-NEXT:    paddb %xmm2, %xmm0
20028; SSE3-NEXT:    movdqa %xmm0, %xmm1
20029; SSE3-NEXT:    psrlw $4, %xmm1
20030; SSE3-NEXT:    paddb %xmm0, %xmm1
20031; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20032; SSE3-NEXT:    pxor %xmm0, %xmm0
20033; SSE3-NEXT:    psadbw %xmm1, %xmm0
20034; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
20035; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20036; SSE3-NEXT:    movdqa %xmm1, %xmm2
20037; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
20038; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20039; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20040; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20041; SSE3-NEXT:    pand %xmm3, %xmm1
20042; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20043; SSE3-NEXT:    por %xmm1, %xmm0
20044; SSE3-NEXT:    retq
20045;
20046; SSSE3-LABEL: ult_10_v2i64:
20047; SSSE3:       # %bb.0:
20048; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20049; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20050; SSSE3-NEXT:    pand %xmm1, %xmm2
20051; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20052; SSSE3-NEXT:    movdqa %xmm3, %xmm4
20053; SSSE3-NEXT:    pshufb %xmm2, %xmm4
20054; SSSE3-NEXT:    psrlw $4, %xmm0
20055; SSSE3-NEXT:    pand %xmm1, %xmm0
20056; SSSE3-NEXT:    pshufb %xmm0, %xmm3
20057; SSSE3-NEXT:    paddb %xmm4, %xmm3
20058; SSSE3-NEXT:    pxor %xmm0, %xmm0
20059; SSSE3-NEXT:    psadbw %xmm3, %xmm0
20060; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
20061; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20062; SSSE3-NEXT:    movdqa %xmm1, %xmm2
20063; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
20064; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20065; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20066; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20067; SSSE3-NEXT:    pand %xmm3, %xmm1
20068; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20069; SSSE3-NEXT:    por %xmm1, %xmm0
20070; SSSE3-NEXT:    retq
20071;
20072; SSE41-LABEL: ult_10_v2i64:
20073; SSE41:       # %bb.0:
20074; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20075; SSE41-NEXT:    movdqa %xmm0, %xmm2
20076; SSE41-NEXT:    pand %xmm1, %xmm2
20077; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20078; SSE41-NEXT:    movdqa %xmm3, %xmm4
20079; SSE41-NEXT:    pshufb %xmm2, %xmm4
20080; SSE41-NEXT:    psrlw $4, %xmm0
20081; SSE41-NEXT:    pand %xmm1, %xmm0
20082; SSE41-NEXT:    pshufb %xmm0, %xmm3
20083; SSE41-NEXT:    paddb %xmm4, %xmm3
20084; SSE41-NEXT:    pxor %xmm0, %xmm0
20085; SSE41-NEXT:    psadbw %xmm3, %xmm0
20086; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
20087; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20088; SSE41-NEXT:    movdqa %xmm1, %xmm2
20089; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
20090; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20091; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
20092; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20093; SSE41-NEXT:    pand %xmm3, %xmm1
20094; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20095; SSE41-NEXT:    por %xmm1, %xmm0
20096; SSE41-NEXT:    retq
20097;
20098; AVX1-LABEL: ult_10_v2i64:
20099; AVX1:       # %bb.0:
20100; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20101; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
20102; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20103; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20104; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
20105; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
20106; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20107; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20108; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20109; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20110; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10]
20111; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20112; AVX1-NEXT:    retq
20113;
20114; AVX2-LABEL: ult_10_v2i64:
20115; AVX2:       # %bb.0:
20116; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20117; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
20118; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20119; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20120; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
20121; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
20122; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20123; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20124; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20125; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20126; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10]
20127; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20128; AVX2-NEXT:    retq
20129;
20130; AVX512VPOPCNTDQ-LABEL: ult_10_v2i64:
20131; AVX512VPOPCNTDQ:       # %bb.0:
20132; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20133; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
20134; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10]
20135; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20136; AVX512VPOPCNTDQ-NEXT:    vzeroupper
20137; AVX512VPOPCNTDQ-NEXT:    retq
20138;
20139; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64:
20140; AVX512VPOPCNTDQVL:       # %bb.0:
20141; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
20142; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
20143; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20144; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20145; AVX512VPOPCNTDQVL-NEXT:    retq
20146;
20147; BITALG_NOVLX-LABEL: ult_10_v2i64:
20148; BITALG_NOVLX:       # %bb.0:
20149; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20150; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
20151; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20152; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20153; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10]
20154; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20155; BITALG_NOVLX-NEXT:    vzeroupper
20156; BITALG_NOVLX-NEXT:    retq
20157;
20158; BITALG-LABEL: ult_10_v2i64:
20159; BITALG:       # %bb.0:
20160; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
20161; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20162; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20163; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
20164; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20165; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20166; BITALG-NEXT:    retq
20167  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20168  %3 = icmp ult <2 x i64> %2, <i64 10, i64 10>
20169  %4 = sext <2 x i1> %3 to <2 x i64>
20170  ret <2 x i64> %4
20171}
20172
20173define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
20174; SSE2-LABEL: ugt_10_v2i64:
20175; SSE2:       # %bb.0:
20176; SSE2-NEXT:    movdqa %xmm0, %xmm1
20177; SSE2-NEXT:    psrlw $1, %xmm1
20178; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20179; SSE2-NEXT:    psubb %xmm1, %xmm0
20180; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20181; SSE2-NEXT:    movdqa %xmm0, %xmm2
20182; SSE2-NEXT:    pand %xmm1, %xmm2
20183; SSE2-NEXT:    psrlw $2, %xmm0
20184; SSE2-NEXT:    pand %xmm1, %xmm0
20185; SSE2-NEXT:    paddb %xmm2, %xmm0
20186; SSE2-NEXT:    movdqa %xmm0, %xmm1
20187; SSE2-NEXT:    psrlw $4, %xmm1
20188; SSE2-NEXT:    paddb %xmm0, %xmm1
20189; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20190; SSE2-NEXT:    pxor %xmm0, %xmm0
20191; SSE2-NEXT:    psadbw %xmm1, %xmm0
20192; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
20193; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20194; SSE2-NEXT:    movdqa %xmm0, %xmm2
20195; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
20196; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20197; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
20198; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20199; SSE2-NEXT:    pand %xmm3, %xmm1
20200; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20201; SSE2-NEXT:    por %xmm1, %xmm0
20202; SSE2-NEXT:    retq
20203;
20204; SSE3-LABEL: ugt_10_v2i64:
20205; SSE3:       # %bb.0:
20206; SSE3-NEXT:    movdqa %xmm0, %xmm1
20207; SSE3-NEXT:    psrlw $1, %xmm1
20208; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20209; SSE3-NEXT:    psubb %xmm1, %xmm0
20210; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20211; SSE3-NEXT:    movdqa %xmm0, %xmm2
20212; SSE3-NEXT:    pand %xmm1, %xmm2
20213; SSE3-NEXT:    psrlw $2, %xmm0
20214; SSE3-NEXT:    pand %xmm1, %xmm0
20215; SSE3-NEXT:    paddb %xmm2, %xmm0
20216; SSE3-NEXT:    movdqa %xmm0, %xmm1
20217; SSE3-NEXT:    psrlw $4, %xmm1
20218; SSE3-NEXT:    paddb %xmm0, %xmm1
20219; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20220; SSE3-NEXT:    pxor %xmm0, %xmm0
20221; SSE3-NEXT:    psadbw %xmm1, %xmm0
20222; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
20223; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20224; SSE3-NEXT:    movdqa %xmm0, %xmm2
20225; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
20226; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20227; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20228; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20229; SSE3-NEXT:    pand %xmm3, %xmm1
20230; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20231; SSE3-NEXT:    por %xmm1, %xmm0
20232; SSE3-NEXT:    retq
20233;
20234; SSSE3-LABEL: ugt_10_v2i64:
20235; SSSE3:       # %bb.0:
20236; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20237; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20238; SSSE3-NEXT:    pand %xmm1, %xmm2
20239; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20240; SSSE3-NEXT:    movdqa %xmm3, %xmm4
20241; SSSE3-NEXT:    pshufb %xmm2, %xmm4
20242; SSSE3-NEXT:    psrlw $4, %xmm0
20243; SSSE3-NEXT:    pand %xmm1, %xmm0
20244; SSSE3-NEXT:    pshufb %xmm0, %xmm3
20245; SSSE3-NEXT:    paddb %xmm4, %xmm3
20246; SSSE3-NEXT:    pxor %xmm0, %xmm0
20247; SSSE3-NEXT:    psadbw %xmm3, %xmm0
20248; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
20249; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20250; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20251; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
20252; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20253; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20254; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20255; SSSE3-NEXT:    pand %xmm3, %xmm1
20256; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20257; SSSE3-NEXT:    por %xmm1, %xmm0
20258; SSSE3-NEXT:    retq
20259;
20260; SSE41-LABEL: ugt_10_v2i64:
20261; SSE41:       # %bb.0:
20262; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20263; SSE41-NEXT:    movdqa %xmm0, %xmm2
20264; SSE41-NEXT:    pand %xmm1, %xmm2
20265; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20266; SSE41-NEXT:    movdqa %xmm3, %xmm4
20267; SSE41-NEXT:    pshufb %xmm2, %xmm4
20268; SSE41-NEXT:    psrlw $4, %xmm0
20269; SSE41-NEXT:    pand %xmm1, %xmm0
20270; SSE41-NEXT:    pshufb %xmm0, %xmm3
20271; SSE41-NEXT:    paddb %xmm4, %xmm3
20272; SSE41-NEXT:    pxor %xmm0, %xmm0
20273; SSE41-NEXT:    psadbw %xmm3, %xmm0
20274; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
20275; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483658,2147483658]
20276; SSE41-NEXT:    movdqa %xmm0, %xmm2
20277; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
20278; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20279; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
20280; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20281; SSE41-NEXT:    pand %xmm3, %xmm1
20282; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20283; SSE41-NEXT:    por %xmm1, %xmm0
20284; SSE41-NEXT:    retq
20285;
20286; AVX1-LABEL: ugt_10_v2i64:
20287; AVX1:       # %bb.0:
20288; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20289; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
20290; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20291; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20292; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
20293; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
20294; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20295; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20296; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20297; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20298; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20299; AVX1-NEXT:    retq
20300;
20301; AVX2-LABEL: ugt_10_v2i64:
20302; AVX2:       # %bb.0:
20303; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20304; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
20305; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20306; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20307; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
20308; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
20309; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20310; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20311; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20312; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20313; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20314; AVX2-NEXT:    retq
20315;
20316; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64:
20317; AVX512VPOPCNTDQ:       # %bb.0:
20318; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20319; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
20320; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20321; AVX512VPOPCNTDQ-NEXT:    vzeroupper
20322; AVX512VPOPCNTDQ-NEXT:    retq
20323;
20324; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64:
20325; AVX512VPOPCNTDQVL:       # %bb.0:
20326; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
20327; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
20328; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20329; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20330; AVX512VPOPCNTDQVL-NEXT:    retq
20331;
20332; BITALG_NOVLX-LABEL: ugt_10_v2i64:
20333; BITALG_NOVLX:       # %bb.0:
20334; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20335; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
20336; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20337; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20338; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20339; BITALG_NOVLX-NEXT:    vzeroupper
20340; BITALG_NOVLX-NEXT:    retq
20341;
20342; BITALG-LABEL: ugt_10_v2i64:
20343; BITALG:       # %bb.0:
20344; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
20345; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20346; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20347; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
20348; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20349; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20350; BITALG-NEXT:    retq
20351  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20352  %3 = icmp ugt <2 x i64> %2, <i64 10, i64 10>
20353  %4 = sext <2 x i1> %3 to <2 x i64>
20354  ret <2 x i64> %4
20355}
20356
20357define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
20358; SSE2-LABEL: ult_11_v2i64:
20359; SSE2:       # %bb.0:
20360; SSE2-NEXT:    movdqa %xmm0, %xmm1
20361; SSE2-NEXT:    psrlw $1, %xmm1
20362; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20363; SSE2-NEXT:    psubb %xmm1, %xmm0
20364; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20365; SSE2-NEXT:    movdqa %xmm0, %xmm2
20366; SSE2-NEXT:    pand %xmm1, %xmm2
20367; SSE2-NEXT:    psrlw $2, %xmm0
20368; SSE2-NEXT:    pand %xmm1, %xmm0
20369; SSE2-NEXT:    paddb %xmm2, %xmm0
20370; SSE2-NEXT:    movdqa %xmm0, %xmm1
20371; SSE2-NEXT:    psrlw $4, %xmm1
20372; SSE2-NEXT:    paddb %xmm0, %xmm1
20373; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20374; SSE2-NEXT:    pxor %xmm0, %xmm0
20375; SSE2-NEXT:    psadbw %xmm1, %xmm0
20376; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
20377; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20378; SSE2-NEXT:    movdqa %xmm1, %xmm2
20379; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
20380; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20381; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
20382; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20383; SSE2-NEXT:    pand %xmm3, %xmm1
20384; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20385; SSE2-NEXT:    por %xmm1, %xmm0
20386; SSE2-NEXT:    retq
20387;
20388; SSE3-LABEL: ult_11_v2i64:
20389; SSE3:       # %bb.0:
20390; SSE3-NEXT:    movdqa %xmm0, %xmm1
20391; SSE3-NEXT:    psrlw $1, %xmm1
20392; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20393; SSE3-NEXT:    psubb %xmm1, %xmm0
20394; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20395; SSE3-NEXT:    movdqa %xmm0, %xmm2
20396; SSE3-NEXT:    pand %xmm1, %xmm2
20397; SSE3-NEXT:    psrlw $2, %xmm0
20398; SSE3-NEXT:    pand %xmm1, %xmm0
20399; SSE3-NEXT:    paddb %xmm2, %xmm0
20400; SSE3-NEXT:    movdqa %xmm0, %xmm1
20401; SSE3-NEXT:    psrlw $4, %xmm1
20402; SSE3-NEXT:    paddb %xmm0, %xmm1
20403; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20404; SSE3-NEXT:    pxor %xmm0, %xmm0
20405; SSE3-NEXT:    psadbw %xmm1, %xmm0
20406; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
20407; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20408; SSE3-NEXT:    movdqa %xmm1, %xmm2
20409; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
20410; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20411; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20412; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20413; SSE3-NEXT:    pand %xmm3, %xmm1
20414; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20415; SSE3-NEXT:    por %xmm1, %xmm0
20416; SSE3-NEXT:    retq
20417;
20418; SSSE3-LABEL: ult_11_v2i64:
20419; SSSE3:       # %bb.0:
20420; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20421; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20422; SSSE3-NEXT:    pand %xmm1, %xmm2
20423; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20424; SSSE3-NEXT:    movdqa %xmm3, %xmm4
20425; SSSE3-NEXT:    pshufb %xmm2, %xmm4
20426; SSSE3-NEXT:    psrlw $4, %xmm0
20427; SSSE3-NEXT:    pand %xmm1, %xmm0
20428; SSSE3-NEXT:    pshufb %xmm0, %xmm3
20429; SSSE3-NEXT:    paddb %xmm4, %xmm3
20430; SSSE3-NEXT:    pxor %xmm0, %xmm0
20431; SSSE3-NEXT:    psadbw %xmm3, %xmm0
20432; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
20433; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20434; SSSE3-NEXT:    movdqa %xmm1, %xmm2
20435; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
20436; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20437; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20438; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20439; SSSE3-NEXT:    pand %xmm3, %xmm1
20440; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20441; SSSE3-NEXT:    por %xmm1, %xmm0
20442; SSSE3-NEXT:    retq
20443;
20444; SSE41-LABEL: ult_11_v2i64:
20445; SSE41:       # %bb.0:
20446; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20447; SSE41-NEXT:    movdqa %xmm0, %xmm2
20448; SSE41-NEXT:    pand %xmm1, %xmm2
20449; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20450; SSE41-NEXT:    movdqa %xmm3, %xmm4
20451; SSE41-NEXT:    pshufb %xmm2, %xmm4
20452; SSE41-NEXT:    psrlw $4, %xmm0
20453; SSE41-NEXT:    pand %xmm1, %xmm0
20454; SSE41-NEXT:    pshufb %xmm0, %xmm3
20455; SSE41-NEXT:    paddb %xmm4, %xmm3
20456; SSE41-NEXT:    pxor %xmm0, %xmm0
20457; SSE41-NEXT:    psadbw %xmm3, %xmm0
20458; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
20459; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20460; SSE41-NEXT:    movdqa %xmm1, %xmm2
20461; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
20462; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20463; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
20464; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20465; SSE41-NEXT:    pand %xmm3, %xmm1
20466; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20467; SSE41-NEXT:    por %xmm1, %xmm0
20468; SSE41-NEXT:    retq
20469;
20470; AVX1-LABEL: ult_11_v2i64:
20471; AVX1:       # %bb.0:
20472; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20473; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
20474; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20475; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20476; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
20477; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
20478; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20479; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20480; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20481; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20482; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11]
20483; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20484; AVX1-NEXT:    retq
20485;
20486; AVX2-LABEL: ult_11_v2i64:
20487; AVX2:       # %bb.0:
20488; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20489; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
20490; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20491; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20492; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
20493; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
20494; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20495; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20496; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20497; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20498; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11]
20499; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20500; AVX2-NEXT:    retq
20501;
20502; AVX512VPOPCNTDQ-LABEL: ult_11_v2i64:
20503; AVX512VPOPCNTDQ:       # %bb.0:
20504; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20505; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
20506; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11]
20507; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20508; AVX512VPOPCNTDQ-NEXT:    vzeroupper
20509; AVX512VPOPCNTDQ-NEXT:    retq
20510;
20511; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64:
20512; AVX512VPOPCNTDQVL:       # %bb.0:
20513; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
20514; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
20515; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20516; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20517; AVX512VPOPCNTDQVL-NEXT:    retq
20518;
20519; BITALG_NOVLX-LABEL: ult_11_v2i64:
20520; BITALG_NOVLX:       # %bb.0:
20521; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20522; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
20523; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20524; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20525; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11]
20526; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20527; BITALG_NOVLX-NEXT:    vzeroupper
20528; BITALG_NOVLX-NEXT:    retq
20529;
20530; BITALG-LABEL: ult_11_v2i64:
20531; BITALG:       # %bb.0:
20532; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
20533; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20534; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20535; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
20536; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20537; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20538; BITALG-NEXT:    retq
20539  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20540  %3 = icmp ult <2 x i64> %2, <i64 11, i64 11>
20541  %4 = sext <2 x i1> %3 to <2 x i64>
20542  ret <2 x i64> %4
20543}
20544
20545define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
20546; SSE2-LABEL: ugt_11_v2i64:
20547; SSE2:       # %bb.0:
20548; SSE2-NEXT:    movdqa %xmm0, %xmm1
20549; SSE2-NEXT:    psrlw $1, %xmm1
20550; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20551; SSE2-NEXT:    psubb %xmm1, %xmm0
20552; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20553; SSE2-NEXT:    movdqa %xmm0, %xmm2
20554; SSE2-NEXT:    pand %xmm1, %xmm2
20555; SSE2-NEXT:    psrlw $2, %xmm0
20556; SSE2-NEXT:    pand %xmm1, %xmm0
20557; SSE2-NEXT:    paddb %xmm2, %xmm0
20558; SSE2-NEXT:    movdqa %xmm0, %xmm1
20559; SSE2-NEXT:    psrlw $4, %xmm1
20560; SSE2-NEXT:    paddb %xmm0, %xmm1
20561; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20562; SSE2-NEXT:    pxor %xmm0, %xmm0
20563; SSE2-NEXT:    psadbw %xmm1, %xmm0
20564; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
20565; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20566; SSE2-NEXT:    movdqa %xmm0, %xmm2
20567; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
20568; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20569; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
20570; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20571; SSE2-NEXT:    pand %xmm3, %xmm1
20572; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20573; SSE2-NEXT:    por %xmm1, %xmm0
20574; SSE2-NEXT:    retq
20575;
20576; SSE3-LABEL: ugt_11_v2i64:
20577; SSE3:       # %bb.0:
20578; SSE3-NEXT:    movdqa %xmm0, %xmm1
20579; SSE3-NEXT:    psrlw $1, %xmm1
20580; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20581; SSE3-NEXT:    psubb %xmm1, %xmm0
20582; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20583; SSE3-NEXT:    movdqa %xmm0, %xmm2
20584; SSE3-NEXT:    pand %xmm1, %xmm2
20585; SSE3-NEXT:    psrlw $2, %xmm0
20586; SSE3-NEXT:    pand %xmm1, %xmm0
20587; SSE3-NEXT:    paddb %xmm2, %xmm0
20588; SSE3-NEXT:    movdqa %xmm0, %xmm1
20589; SSE3-NEXT:    psrlw $4, %xmm1
20590; SSE3-NEXT:    paddb %xmm0, %xmm1
20591; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20592; SSE3-NEXT:    pxor %xmm0, %xmm0
20593; SSE3-NEXT:    psadbw %xmm1, %xmm0
20594; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
20595; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20596; SSE3-NEXT:    movdqa %xmm0, %xmm2
20597; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
20598; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20599; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20600; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20601; SSE3-NEXT:    pand %xmm3, %xmm1
20602; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20603; SSE3-NEXT:    por %xmm1, %xmm0
20604; SSE3-NEXT:    retq
20605;
20606; SSSE3-LABEL: ugt_11_v2i64:
20607; SSSE3:       # %bb.0:
20608; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20609; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20610; SSSE3-NEXT:    pand %xmm1, %xmm2
20611; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20612; SSSE3-NEXT:    movdqa %xmm3, %xmm4
20613; SSSE3-NEXT:    pshufb %xmm2, %xmm4
20614; SSSE3-NEXT:    psrlw $4, %xmm0
20615; SSSE3-NEXT:    pand %xmm1, %xmm0
20616; SSSE3-NEXT:    pshufb %xmm0, %xmm3
20617; SSSE3-NEXT:    paddb %xmm4, %xmm3
20618; SSSE3-NEXT:    pxor %xmm0, %xmm0
20619; SSSE3-NEXT:    psadbw %xmm3, %xmm0
20620; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
20621; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20622; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20623; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
20624; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20625; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20626; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20627; SSSE3-NEXT:    pand %xmm3, %xmm1
20628; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20629; SSSE3-NEXT:    por %xmm1, %xmm0
20630; SSSE3-NEXT:    retq
20631;
20632; SSE41-LABEL: ugt_11_v2i64:
20633; SSE41:       # %bb.0:
20634; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20635; SSE41-NEXT:    movdqa %xmm0, %xmm2
20636; SSE41-NEXT:    pand %xmm1, %xmm2
20637; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20638; SSE41-NEXT:    movdqa %xmm3, %xmm4
20639; SSE41-NEXT:    pshufb %xmm2, %xmm4
20640; SSE41-NEXT:    psrlw $4, %xmm0
20641; SSE41-NEXT:    pand %xmm1, %xmm0
20642; SSE41-NEXT:    pshufb %xmm0, %xmm3
20643; SSE41-NEXT:    paddb %xmm4, %xmm3
20644; SSE41-NEXT:    pxor %xmm0, %xmm0
20645; SSE41-NEXT:    psadbw %xmm3, %xmm0
20646; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
20647; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483659,2147483659]
20648; SSE41-NEXT:    movdqa %xmm0, %xmm2
20649; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
20650; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20651; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
20652; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20653; SSE41-NEXT:    pand %xmm3, %xmm1
20654; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20655; SSE41-NEXT:    por %xmm1, %xmm0
20656; SSE41-NEXT:    retq
20657;
20658; AVX1-LABEL: ugt_11_v2i64:
20659; AVX1:       # %bb.0:
20660; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20661; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
20662; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20663; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20664; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
20665; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
20666; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20667; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20668; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20669; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20670; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20671; AVX1-NEXT:    retq
20672;
20673; AVX2-LABEL: ugt_11_v2i64:
20674; AVX2:       # %bb.0:
20675; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20676; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
20677; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20678; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20679; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
20680; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
20681; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20682; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20683; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20684; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20685; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20686; AVX2-NEXT:    retq
20687;
20688; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64:
20689; AVX512VPOPCNTDQ:       # %bb.0:
20690; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20691; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
20692; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20693; AVX512VPOPCNTDQ-NEXT:    vzeroupper
20694; AVX512VPOPCNTDQ-NEXT:    retq
20695;
20696; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64:
20697; AVX512VPOPCNTDQVL:       # %bb.0:
20698; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
20699; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
20700; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20701; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20702; AVX512VPOPCNTDQVL-NEXT:    retq
20703;
20704; BITALG_NOVLX-LABEL: ugt_11_v2i64:
20705; BITALG_NOVLX:       # %bb.0:
20706; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20707; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
20708; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20709; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20710; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
20711; BITALG_NOVLX-NEXT:    vzeroupper
20712; BITALG_NOVLX-NEXT:    retq
20713;
20714; BITALG-LABEL: ugt_11_v2i64:
20715; BITALG:       # %bb.0:
20716; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
20717; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20718; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20719; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
20720; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20721; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20722; BITALG-NEXT:    retq
20723  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20724  %3 = icmp ugt <2 x i64> %2, <i64 11, i64 11>
20725  %4 = sext <2 x i1> %3 to <2 x i64>
20726  ret <2 x i64> %4
20727}
20728
20729define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
20730; SSE2-LABEL: ult_12_v2i64:
20731; SSE2:       # %bb.0:
20732; SSE2-NEXT:    movdqa %xmm0, %xmm1
20733; SSE2-NEXT:    psrlw $1, %xmm1
20734; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20735; SSE2-NEXT:    psubb %xmm1, %xmm0
20736; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20737; SSE2-NEXT:    movdqa %xmm0, %xmm2
20738; SSE2-NEXT:    pand %xmm1, %xmm2
20739; SSE2-NEXT:    psrlw $2, %xmm0
20740; SSE2-NEXT:    pand %xmm1, %xmm0
20741; SSE2-NEXT:    paddb %xmm2, %xmm0
20742; SSE2-NEXT:    movdqa %xmm0, %xmm1
20743; SSE2-NEXT:    psrlw $4, %xmm1
20744; SSE2-NEXT:    paddb %xmm0, %xmm1
20745; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20746; SSE2-NEXT:    pxor %xmm0, %xmm0
20747; SSE2-NEXT:    psadbw %xmm1, %xmm0
20748; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
20749; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20750; SSE2-NEXT:    movdqa %xmm1, %xmm2
20751; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
20752; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20753; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
20754; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20755; SSE2-NEXT:    pand %xmm3, %xmm1
20756; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20757; SSE2-NEXT:    por %xmm1, %xmm0
20758; SSE2-NEXT:    retq
20759;
20760; SSE3-LABEL: ult_12_v2i64:
20761; SSE3:       # %bb.0:
20762; SSE3-NEXT:    movdqa %xmm0, %xmm1
20763; SSE3-NEXT:    psrlw $1, %xmm1
20764; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20765; SSE3-NEXT:    psubb %xmm1, %xmm0
20766; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20767; SSE3-NEXT:    movdqa %xmm0, %xmm2
20768; SSE3-NEXT:    pand %xmm1, %xmm2
20769; SSE3-NEXT:    psrlw $2, %xmm0
20770; SSE3-NEXT:    pand %xmm1, %xmm0
20771; SSE3-NEXT:    paddb %xmm2, %xmm0
20772; SSE3-NEXT:    movdqa %xmm0, %xmm1
20773; SSE3-NEXT:    psrlw $4, %xmm1
20774; SSE3-NEXT:    paddb %xmm0, %xmm1
20775; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20776; SSE3-NEXT:    pxor %xmm0, %xmm0
20777; SSE3-NEXT:    psadbw %xmm1, %xmm0
20778; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
20779; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20780; SSE3-NEXT:    movdqa %xmm1, %xmm2
20781; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
20782; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20783; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20784; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20785; SSE3-NEXT:    pand %xmm3, %xmm1
20786; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20787; SSE3-NEXT:    por %xmm1, %xmm0
20788; SSE3-NEXT:    retq
20789;
20790; SSSE3-LABEL: ult_12_v2i64:
20791; SSSE3:       # %bb.0:
20792; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20793; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20794; SSSE3-NEXT:    pand %xmm1, %xmm2
20795; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20796; SSSE3-NEXT:    movdqa %xmm3, %xmm4
20797; SSSE3-NEXT:    pshufb %xmm2, %xmm4
20798; SSSE3-NEXT:    psrlw $4, %xmm0
20799; SSSE3-NEXT:    pand %xmm1, %xmm0
20800; SSSE3-NEXT:    pshufb %xmm0, %xmm3
20801; SSSE3-NEXT:    paddb %xmm4, %xmm3
20802; SSSE3-NEXT:    pxor %xmm0, %xmm0
20803; SSSE3-NEXT:    psadbw %xmm3, %xmm0
20804; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
20805; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20806; SSSE3-NEXT:    movdqa %xmm1, %xmm2
20807; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
20808; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20809; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20810; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20811; SSSE3-NEXT:    pand %xmm3, %xmm1
20812; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20813; SSSE3-NEXT:    por %xmm1, %xmm0
20814; SSSE3-NEXT:    retq
20815;
20816; SSE41-LABEL: ult_12_v2i64:
20817; SSE41:       # %bb.0:
20818; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20819; SSE41-NEXT:    movdqa %xmm0, %xmm2
20820; SSE41-NEXT:    pand %xmm1, %xmm2
20821; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20822; SSE41-NEXT:    movdqa %xmm3, %xmm4
20823; SSE41-NEXT:    pshufb %xmm2, %xmm4
20824; SSE41-NEXT:    psrlw $4, %xmm0
20825; SSE41-NEXT:    pand %xmm1, %xmm0
20826; SSE41-NEXT:    pshufb %xmm0, %xmm3
20827; SSE41-NEXT:    paddb %xmm4, %xmm3
20828; SSE41-NEXT:    pxor %xmm0, %xmm0
20829; SSE41-NEXT:    psadbw %xmm3, %xmm0
20830; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
20831; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20832; SSE41-NEXT:    movdqa %xmm1, %xmm2
20833; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
20834; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20835; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
20836; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20837; SSE41-NEXT:    pand %xmm3, %xmm1
20838; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20839; SSE41-NEXT:    por %xmm1, %xmm0
20840; SSE41-NEXT:    retq
20841;
20842; AVX1-LABEL: ult_12_v2i64:
20843; AVX1:       # %bb.0:
20844; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20845; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
20846; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20847; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20848; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
20849; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
20850; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20851; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20852; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20853; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20854; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12]
20855; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20856; AVX1-NEXT:    retq
20857;
20858; AVX2-LABEL: ult_12_v2i64:
20859; AVX2:       # %bb.0:
20860; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20861; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
20862; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20863; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
20864; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
20865; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
20866; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
20867; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
20868; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20869; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20870; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12]
20871; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20872; AVX2-NEXT:    retq
20873;
20874; AVX512VPOPCNTDQ-LABEL: ult_12_v2i64:
20875; AVX512VPOPCNTDQ:       # %bb.0:
20876; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20877; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
20878; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12]
20879; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20880; AVX512VPOPCNTDQ-NEXT:    vzeroupper
20881; AVX512VPOPCNTDQ-NEXT:    retq
20882;
20883; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64:
20884; AVX512VPOPCNTDQVL:       # %bb.0:
20885; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
20886; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
20887; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20888; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20889; AVX512VPOPCNTDQVL-NEXT:    retq
20890;
20891; BITALG_NOVLX-LABEL: ult_12_v2i64:
20892; BITALG_NOVLX:       # %bb.0:
20893; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20894; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
20895; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20896; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20897; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12]
20898; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
20899; BITALG_NOVLX-NEXT:    vzeroupper
20900; BITALG_NOVLX-NEXT:    retq
20901;
20902; BITALG-LABEL: ult_12_v2i64:
20903; BITALG:       # %bb.0:
20904; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
20905; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
20906; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
20907; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
20908; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
20909; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20910; BITALG-NEXT:    retq
20911  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20912  %3 = icmp ult <2 x i64> %2, <i64 12, i64 12>
20913  %4 = sext <2 x i1> %3 to <2 x i64>
20914  ret <2 x i64> %4
20915}
20916
20917define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
20918; SSE2-LABEL: ugt_12_v2i64:
20919; SSE2:       # %bb.0:
20920; SSE2-NEXT:    movdqa %xmm0, %xmm1
20921; SSE2-NEXT:    psrlw $1, %xmm1
20922; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20923; SSE2-NEXT:    psubb %xmm1, %xmm0
20924; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20925; SSE2-NEXT:    movdqa %xmm0, %xmm2
20926; SSE2-NEXT:    pand %xmm1, %xmm2
20927; SSE2-NEXT:    psrlw $2, %xmm0
20928; SSE2-NEXT:    pand %xmm1, %xmm0
20929; SSE2-NEXT:    paddb %xmm2, %xmm0
20930; SSE2-NEXT:    movdqa %xmm0, %xmm1
20931; SSE2-NEXT:    psrlw $4, %xmm1
20932; SSE2-NEXT:    paddb %xmm0, %xmm1
20933; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
20934; SSE2-NEXT:    pxor %xmm0, %xmm0
20935; SSE2-NEXT:    psadbw %xmm1, %xmm0
20936; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
20937; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20938; SSE2-NEXT:    movdqa %xmm0, %xmm2
20939; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
20940; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20941; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
20942; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20943; SSE2-NEXT:    pand %xmm3, %xmm1
20944; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20945; SSE2-NEXT:    por %xmm1, %xmm0
20946; SSE2-NEXT:    retq
20947;
20948; SSE3-LABEL: ugt_12_v2i64:
20949; SSE3:       # %bb.0:
20950; SSE3-NEXT:    movdqa %xmm0, %xmm1
20951; SSE3-NEXT:    psrlw $1, %xmm1
20952; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20953; SSE3-NEXT:    psubb %xmm1, %xmm0
20954; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20955; SSE3-NEXT:    movdqa %xmm0, %xmm2
20956; SSE3-NEXT:    pand %xmm1, %xmm2
20957; SSE3-NEXT:    psrlw $2, %xmm0
20958; SSE3-NEXT:    pand %xmm1, %xmm0
20959; SSE3-NEXT:    paddb %xmm2, %xmm0
20960; SSE3-NEXT:    movdqa %xmm0, %xmm1
20961; SSE3-NEXT:    psrlw $4, %xmm1
20962; SSE3-NEXT:    paddb %xmm0, %xmm1
20963; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
20964; SSE3-NEXT:    pxor %xmm0, %xmm0
20965; SSE3-NEXT:    psadbw %xmm1, %xmm0
20966; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
20967; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20968; SSE3-NEXT:    movdqa %xmm0, %xmm2
20969; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
20970; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20971; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20972; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20973; SSE3-NEXT:    pand %xmm3, %xmm1
20974; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
20975; SSE3-NEXT:    por %xmm1, %xmm0
20976; SSE3-NEXT:    retq
20977;
20978; SSSE3-LABEL: ugt_12_v2i64:
20979; SSSE3:       # %bb.0:
20980; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20981; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20982; SSSE3-NEXT:    pand %xmm1, %xmm2
20983; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20984; SSSE3-NEXT:    movdqa %xmm3, %xmm4
20985; SSSE3-NEXT:    pshufb %xmm2, %xmm4
20986; SSSE3-NEXT:    psrlw $4, %xmm0
20987; SSSE3-NEXT:    pand %xmm1, %xmm0
20988; SSSE3-NEXT:    pshufb %xmm0, %xmm3
20989; SSSE3-NEXT:    paddb %xmm4, %xmm3
20990; SSSE3-NEXT:    pxor %xmm0, %xmm0
20991; SSSE3-NEXT:    psadbw %xmm3, %xmm0
20992; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
20993; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
20994; SSSE3-NEXT:    movdqa %xmm0, %xmm2
20995; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
20996; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
20997; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
20998; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
20999; SSSE3-NEXT:    pand %xmm3, %xmm1
21000; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21001; SSSE3-NEXT:    por %xmm1, %xmm0
21002; SSSE3-NEXT:    retq
21003;
21004; SSE41-LABEL: ugt_12_v2i64:
21005; SSE41:       # %bb.0:
21006; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21007; SSE41-NEXT:    movdqa %xmm0, %xmm2
21008; SSE41-NEXT:    pand %xmm1, %xmm2
21009; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21010; SSE41-NEXT:    movdqa %xmm3, %xmm4
21011; SSE41-NEXT:    pshufb %xmm2, %xmm4
21012; SSE41-NEXT:    psrlw $4, %xmm0
21013; SSE41-NEXT:    pand %xmm1, %xmm0
21014; SSE41-NEXT:    pshufb %xmm0, %xmm3
21015; SSE41-NEXT:    paddb %xmm4, %xmm3
21016; SSE41-NEXT:    pxor %xmm0, %xmm0
21017; SSE41-NEXT:    psadbw %xmm3, %xmm0
21018; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
21019; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483660,2147483660]
21020; SSE41-NEXT:    movdqa %xmm0, %xmm2
21021; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
21022; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21023; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
21024; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21025; SSE41-NEXT:    pand %xmm3, %xmm1
21026; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21027; SSE41-NEXT:    por %xmm1, %xmm0
21028; SSE41-NEXT:    retq
21029;
21030; AVX1-LABEL: ugt_12_v2i64:
21031; AVX1:       # %bb.0:
21032; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21033; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
21034; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21035; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21036; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
21037; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
21038; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21039; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21040; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21041; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21042; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21043; AVX1-NEXT:    retq
21044;
21045; AVX2-LABEL: ugt_12_v2i64:
21046; AVX2:       # %bb.0:
21047; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21048; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
21049; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21050; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21051; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
21052; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
21053; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21054; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21055; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21056; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21057; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21058; AVX2-NEXT:    retq
21059;
21060; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64:
21061; AVX512VPOPCNTDQ:       # %bb.0:
21062; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21063; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
21064; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21065; AVX512VPOPCNTDQ-NEXT:    vzeroupper
21066; AVX512VPOPCNTDQ-NEXT:    retq
21067;
21068; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64:
21069; AVX512VPOPCNTDQVL:       # %bb.0:
21070; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
21071; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
21072; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21073; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21074; AVX512VPOPCNTDQVL-NEXT:    retq
21075;
21076; BITALG_NOVLX-LABEL: ugt_12_v2i64:
21077; BITALG_NOVLX:       # %bb.0:
21078; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21079; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
21080; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21081; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21082; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21083; BITALG_NOVLX-NEXT:    vzeroupper
21084; BITALG_NOVLX-NEXT:    retq
21085;
21086; BITALG-LABEL: ugt_12_v2i64:
21087; BITALG:       # %bb.0:
21088; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
21089; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21090; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21091; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
21092; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21093; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21094; BITALG-NEXT:    retq
21095  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21096  %3 = icmp ugt <2 x i64> %2, <i64 12, i64 12>
21097  %4 = sext <2 x i1> %3 to <2 x i64>
21098  ret <2 x i64> %4
21099}
21100
21101define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
21102; SSE2-LABEL: ult_13_v2i64:
21103; SSE2:       # %bb.0:
21104; SSE2-NEXT:    movdqa %xmm0, %xmm1
21105; SSE2-NEXT:    psrlw $1, %xmm1
21106; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21107; SSE2-NEXT:    psubb %xmm1, %xmm0
21108; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21109; SSE2-NEXT:    movdqa %xmm0, %xmm2
21110; SSE2-NEXT:    pand %xmm1, %xmm2
21111; SSE2-NEXT:    psrlw $2, %xmm0
21112; SSE2-NEXT:    pand %xmm1, %xmm0
21113; SSE2-NEXT:    paddb %xmm2, %xmm0
21114; SSE2-NEXT:    movdqa %xmm0, %xmm1
21115; SSE2-NEXT:    psrlw $4, %xmm1
21116; SSE2-NEXT:    paddb %xmm0, %xmm1
21117; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21118; SSE2-NEXT:    pxor %xmm0, %xmm0
21119; SSE2-NEXT:    psadbw %xmm1, %xmm0
21120; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
21121; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21122; SSE2-NEXT:    movdqa %xmm1, %xmm2
21123; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
21124; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21125; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
21126; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21127; SSE2-NEXT:    pand %xmm3, %xmm1
21128; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21129; SSE2-NEXT:    por %xmm1, %xmm0
21130; SSE2-NEXT:    retq
21131;
21132; SSE3-LABEL: ult_13_v2i64:
21133; SSE3:       # %bb.0:
21134; SSE3-NEXT:    movdqa %xmm0, %xmm1
21135; SSE3-NEXT:    psrlw $1, %xmm1
21136; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21137; SSE3-NEXT:    psubb %xmm1, %xmm0
21138; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21139; SSE3-NEXT:    movdqa %xmm0, %xmm2
21140; SSE3-NEXT:    pand %xmm1, %xmm2
21141; SSE3-NEXT:    psrlw $2, %xmm0
21142; SSE3-NEXT:    pand %xmm1, %xmm0
21143; SSE3-NEXT:    paddb %xmm2, %xmm0
21144; SSE3-NEXT:    movdqa %xmm0, %xmm1
21145; SSE3-NEXT:    psrlw $4, %xmm1
21146; SSE3-NEXT:    paddb %xmm0, %xmm1
21147; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21148; SSE3-NEXT:    pxor %xmm0, %xmm0
21149; SSE3-NEXT:    psadbw %xmm1, %xmm0
21150; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
21151; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21152; SSE3-NEXT:    movdqa %xmm1, %xmm2
21153; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
21154; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21155; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21156; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21157; SSE3-NEXT:    pand %xmm3, %xmm1
21158; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21159; SSE3-NEXT:    por %xmm1, %xmm0
21160; SSE3-NEXT:    retq
21161;
21162; SSSE3-LABEL: ult_13_v2i64:
21163; SSSE3:       # %bb.0:
21164; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21165; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21166; SSSE3-NEXT:    pand %xmm1, %xmm2
21167; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21168; SSSE3-NEXT:    movdqa %xmm3, %xmm4
21169; SSSE3-NEXT:    pshufb %xmm2, %xmm4
21170; SSSE3-NEXT:    psrlw $4, %xmm0
21171; SSSE3-NEXT:    pand %xmm1, %xmm0
21172; SSSE3-NEXT:    pshufb %xmm0, %xmm3
21173; SSSE3-NEXT:    paddb %xmm4, %xmm3
21174; SSSE3-NEXT:    pxor %xmm0, %xmm0
21175; SSSE3-NEXT:    psadbw %xmm3, %xmm0
21176; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
21177; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21178; SSSE3-NEXT:    movdqa %xmm1, %xmm2
21179; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
21180; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21181; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21182; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21183; SSSE3-NEXT:    pand %xmm3, %xmm1
21184; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21185; SSSE3-NEXT:    por %xmm1, %xmm0
21186; SSSE3-NEXT:    retq
21187;
21188; SSE41-LABEL: ult_13_v2i64:
21189; SSE41:       # %bb.0:
21190; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21191; SSE41-NEXT:    movdqa %xmm0, %xmm2
21192; SSE41-NEXT:    pand %xmm1, %xmm2
21193; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21194; SSE41-NEXT:    movdqa %xmm3, %xmm4
21195; SSE41-NEXT:    pshufb %xmm2, %xmm4
21196; SSE41-NEXT:    psrlw $4, %xmm0
21197; SSE41-NEXT:    pand %xmm1, %xmm0
21198; SSE41-NEXT:    pshufb %xmm0, %xmm3
21199; SSE41-NEXT:    paddb %xmm4, %xmm3
21200; SSE41-NEXT:    pxor %xmm0, %xmm0
21201; SSE41-NEXT:    psadbw %xmm3, %xmm0
21202; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
21203; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21204; SSE41-NEXT:    movdqa %xmm1, %xmm2
21205; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
21206; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21207; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
21208; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21209; SSE41-NEXT:    pand %xmm3, %xmm1
21210; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21211; SSE41-NEXT:    por %xmm1, %xmm0
21212; SSE41-NEXT:    retq
21213;
21214; AVX1-LABEL: ult_13_v2i64:
21215; AVX1:       # %bb.0:
21216; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21217; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
21218; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21219; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21220; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
21221; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
21222; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21223; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21224; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21225; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21226; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13]
21227; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21228; AVX1-NEXT:    retq
21229;
21230; AVX2-LABEL: ult_13_v2i64:
21231; AVX2:       # %bb.0:
21232; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21233; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
21234; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21235; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21236; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
21237; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
21238; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21239; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21240; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21241; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21242; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13]
21243; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21244; AVX2-NEXT:    retq
21245;
21246; AVX512VPOPCNTDQ-LABEL: ult_13_v2i64:
21247; AVX512VPOPCNTDQ:       # %bb.0:
21248; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21249; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
21250; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13]
21251; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21252; AVX512VPOPCNTDQ-NEXT:    vzeroupper
21253; AVX512VPOPCNTDQ-NEXT:    retq
21254;
21255; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64:
21256; AVX512VPOPCNTDQVL:       # %bb.0:
21257; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
21258; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
21259; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21260; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21261; AVX512VPOPCNTDQVL-NEXT:    retq
21262;
21263; BITALG_NOVLX-LABEL: ult_13_v2i64:
21264; BITALG_NOVLX:       # %bb.0:
21265; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21266; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
21267; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21268; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21269; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13]
21270; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21271; BITALG_NOVLX-NEXT:    vzeroupper
21272; BITALG_NOVLX-NEXT:    retq
21273;
21274; BITALG-LABEL: ult_13_v2i64:
21275; BITALG:       # %bb.0:
21276; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
21277; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21278; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21279; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
21280; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21281; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21282; BITALG-NEXT:    retq
21283  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21284  %3 = icmp ult <2 x i64> %2, <i64 13, i64 13>
21285  %4 = sext <2 x i1> %3 to <2 x i64>
21286  ret <2 x i64> %4
21287}
21288
21289define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
21290; SSE2-LABEL: ugt_13_v2i64:
21291; SSE2:       # %bb.0:
21292; SSE2-NEXT:    movdqa %xmm0, %xmm1
21293; SSE2-NEXT:    psrlw $1, %xmm1
21294; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21295; SSE2-NEXT:    psubb %xmm1, %xmm0
21296; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21297; SSE2-NEXT:    movdqa %xmm0, %xmm2
21298; SSE2-NEXT:    pand %xmm1, %xmm2
21299; SSE2-NEXT:    psrlw $2, %xmm0
21300; SSE2-NEXT:    pand %xmm1, %xmm0
21301; SSE2-NEXT:    paddb %xmm2, %xmm0
21302; SSE2-NEXT:    movdqa %xmm0, %xmm1
21303; SSE2-NEXT:    psrlw $4, %xmm1
21304; SSE2-NEXT:    paddb %xmm0, %xmm1
21305; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21306; SSE2-NEXT:    pxor %xmm0, %xmm0
21307; SSE2-NEXT:    psadbw %xmm1, %xmm0
21308; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
21309; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21310; SSE2-NEXT:    movdqa %xmm0, %xmm2
21311; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
21312; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21313; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
21314; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21315; SSE2-NEXT:    pand %xmm3, %xmm1
21316; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21317; SSE2-NEXT:    por %xmm1, %xmm0
21318; SSE2-NEXT:    retq
21319;
21320; SSE3-LABEL: ugt_13_v2i64:
21321; SSE3:       # %bb.0:
21322; SSE3-NEXT:    movdqa %xmm0, %xmm1
21323; SSE3-NEXT:    psrlw $1, %xmm1
21324; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21325; SSE3-NEXT:    psubb %xmm1, %xmm0
21326; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21327; SSE3-NEXT:    movdqa %xmm0, %xmm2
21328; SSE3-NEXT:    pand %xmm1, %xmm2
21329; SSE3-NEXT:    psrlw $2, %xmm0
21330; SSE3-NEXT:    pand %xmm1, %xmm0
21331; SSE3-NEXT:    paddb %xmm2, %xmm0
21332; SSE3-NEXT:    movdqa %xmm0, %xmm1
21333; SSE3-NEXT:    psrlw $4, %xmm1
21334; SSE3-NEXT:    paddb %xmm0, %xmm1
21335; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21336; SSE3-NEXT:    pxor %xmm0, %xmm0
21337; SSE3-NEXT:    psadbw %xmm1, %xmm0
21338; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
21339; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21340; SSE3-NEXT:    movdqa %xmm0, %xmm2
21341; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
21342; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21343; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21344; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21345; SSE3-NEXT:    pand %xmm3, %xmm1
21346; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21347; SSE3-NEXT:    por %xmm1, %xmm0
21348; SSE3-NEXT:    retq
21349;
21350; SSSE3-LABEL: ugt_13_v2i64:
21351; SSSE3:       # %bb.0:
21352; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21353; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21354; SSSE3-NEXT:    pand %xmm1, %xmm2
21355; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21356; SSSE3-NEXT:    movdqa %xmm3, %xmm4
21357; SSSE3-NEXT:    pshufb %xmm2, %xmm4
21358; SSSE3-NEXT:    psrlw $4, %xmm0
21359; SSSE3-NEXT:    pand %xmm1, %xmm0
21360; SSSE3-NEXT:    pshufb %xmm0, %xmm3
21361; SSSE3-NEXT:    paddb %xmm4, %xmm3
21362; SSSE3-NEXT:    pxor %xmm0, %xmm0
21363; SSSE3-NEXT:    psadbw %xmm3, %xmm0
21364; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
21365; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21366; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21367; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
21368; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21369; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21370; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21371; SSSE3-NEXT:    pand %xmm3, %xmm1
21372; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21373; SSSE3-NEXT:    por %xmm1, %xmm0
21374; SSSE3-NEXT:    retq
21375;
21376; SSE41-LABEL: ugt_13_v2i64:
21377; SSE41:       # %bb.0:
21378; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21379; SSE41-NEXT:    movdqa %xmm0, %xmm2
21380; SSE41-NEXT:    pand %xmm1, %xmm2
21381; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21382; SSE41-NEXT:    movdqa %xmm3, %xmm4
21383; SSE41-NEXT:    pshufb %xmm2, %xmm4
21384; SSE41-NEXT:    psrlw $4, %xmm0
21385; SSE41-NEXT:    pand %xmm1, %xmm0
21386; SSE41-NEXT:    pshufb %xmm0, %xmm3
21387; SSE41-NEXT:    paddb %xmm4, %xmm3
21388; SSE41-NEXT:    pxor %xmm0, %xmm0
21389; SSE41-NEXT:    psadbw %xmm3, %xmm0
21390; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
21391; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483661,2147483661]
21392; SSE41-NEXT:    movdqa %xmm0, %xmm2
21393; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
21394; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21395; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
21396; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21397; SSE41-NEXT:    pand %xmm3, %xmm1
21398; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21399; SSE41-NEXT:    por %xmm1, %xmm0
21400; SSE41-NEXT:    retq
21401;
21402; AVX1-LABEL: ugt_13_v2i64:
21403; AVX1:       # %bb.0:
21404; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21405; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
21406; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21407; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21408; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
21409; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
21410; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21411; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21412; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21413; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21414; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21415; AVX1-NEXT:    retq
21416;
21417; AVX2-LABEL: ugt_13_v2i64:
21418; AVX2:       # %bb.0:
21419; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21420; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
21421; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21422; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21423; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
21424; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
21425; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21426; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21427; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21428; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21429; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21430; AVX2-NEXT:    retq
21431;
21432; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64:
21433; AVX512VPOPCNTDQ:       # %bb.0:
21434; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21435; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
21436; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21437; AVX512VPOPCNTDQ-NEXT:    vzeroupper
21438; AVX512VPOPCNTDQ-NEXT:    retq
21439;
21440; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64:
21441; AVX512VPOPCNTDQVL:       # %bb.0:
21442; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
21443; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
21444; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21445; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21446; AVX512VPOPCNTDQVL-NEXT:    retq
21447;
21448; BITALG_NOVLX-LABEL: ugt_13_v2i64:
21449; BITALG_NOVLX:       # %bb.0:
21450; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21451; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
21452; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21453; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21454; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21455; BITALG_NOVLX-NEXT:    vzeroupper
21456; BITALG_NOVLX-NEXT:    retq
21457;
21458; BITALG-LABEL: ugt_13_v2i64:
21459; BITALG:       # %bb.0:
21460; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
21461; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21462; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21463; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
21464; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21465; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21466; BITALG-NEXT:    retq
21467  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21468  %3 = icmp ugt <2 x i64> %2, <i64 13, i64 13>
21469  %4 = sext <2 x i1> %3 to <2 x i64>
21470  ret <2 x i64> %4
21471}
21472
21473define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
21474; SSE2-LABEL: ult_14_v2i64:
21475; SSE2:       # %bb.0:
21476; SSE2-NEXT:    movdqa %xmm0, %xmm1
21477; SSE2-NEXT:    psrlw $1, %xmm1
21478; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21479; SSE2-NEXT:    psubb %xmm1, %xmm0
21480; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21481; SSE2-NEXT:    movdqa %xmm0, %xmm2
21482; SSE2-NEXT:    pand %xmm1, %xmm2
21483; SSE2-NEXT:    psrlw $2, %xmm0
21484; SSE2-NEXT:    pand %xmm1, %xmm0
21485; SSE2-NEXT:    paddb %xmm2, %xmm0
21486; SSE2-NEXT:    movdqa %xmm0, %xmm1
21487; SSE2-NEXT:    psrlw $4, %xmm1
21488; SSE2-NEXT:    paddb %xmm0, %xmm1
21489; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21490; SSE2-NEXT:    pxor %xmm0, %xmm0
21491; SSE2-NEXT:    psadbw %xmm1, %xmm0
21492; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
21493; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21494; SSE2-NEXT:    movdqa %xmm1, %xmm2
21495; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
21496; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21497; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
21498; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21499; SSE2-NEXT:    pand %xmm3, %xmm1
21500; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21501; SSE2-NEXT:    por %xmm1, %xmm0
21502; SSE2-NEXT:    retq
21503;
21504; SSE3-LABEL: ult_14_v2i64:
21505; SSE3:       # %bb.0:
21506; SSE3-NEXT:    movdqa %xmm0, %xmm1
21507; SSE3-NEXT:    psrlw $1, %xmm1
21508; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21509; SSE3-NEXT:    psubb %xmm1, %xmm0
21510; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21511; SSE3-NEXT:    movdqa %xmm0, %xmm2
21512; SSE3-NEXT:    pand %xmm1, %xmm2
21513; SSE3-NEXT:    psrlw $2, %xmm0
21514; SSE3-NEXT:    pand %xmm1, %xmm0
21515; SSE3-NEXT:    paddb %xmm2, %xmm0
21516; SSE3-NEXT:    movdqa %xmm0, %xmm1
21517; SSE3-NEXT:    psrlw $4, %xmm1
21518; SSE3-NEXT:    paddb %xmm0, %xmm1
21519; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21520; SSE3-NEXT:    pxor %xmm0, %xmm0
21521; SSE3-NEXT:    psadbw %xmm1, %xmm0
21522; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
21523; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21524; SSE3-NEXT:    movdqa %xmm1, %xmm2
21525; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
21526; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21527; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21528; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21529; SSE3-NEXT:    pand %xmm3, %xmm1
21530; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21531; SSE3-NEXT:    por %xmm1, %xmm0
21532; SSE3-NEXT:    retq
21533;
21534; SSSE3-LABEL: ult_14_v2i64:
21535; SSSE3:       # %bb.0:
21536; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21537; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21538; SSSE3-NEXT:    pand %xmm1, %xmm2
21539; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21540; SSSE3-NEXT:    movdqa %xmm3, %xmm4
21541; SSSE3-NEXT:    pshufb %xmm2, %xmm4
21542; SSSE3-NEXT:    psrlw $4, %xmm0
21543; SSSE3-NEXT:    pand %xmm1, %xmm0
21544; SSSE3-NEXT:    pshufb %xmm0, %xmm3
21545; SSSE3-NEXT:    paddb %xmm4, %xmm3
21546; SSSE3-NEXT:    pxor %xmm0, %xmm0
21547; SSSE3-NEXT:    psadbw %xmm3, %xmm0
21548; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
21549; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21550; SSSE3-NEXT:    movdqa %xmm1, %xmm2
21551; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
21552; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21553; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21554; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21555; SSSE3-NEXT:    pand %xmm3, %xmm1
21556; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21557; SSSE3-NEXT:    por %xmm1, %xmm0
21558; SSSE3-NEXT:    retq
21559;
21560; SSE41-LABEL: ult_14_v2i64:
21561; SSE41:       # %bb.0:
21562; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21563; SSE41-NEXT:    movdqa %xmm0, %xmm2
21564; SSE41-NEXT:    pand %xmm1, %xmm2
21565; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21566; SSE41-NEXT:    movdqa %xmm3, %xmm4
21567; SSE41-NEXT:    pshufb %xmm2, %xmm4
21568; SSE41-NEXT:    psrlw $4, %xmm0
21569; SSE41-NEXT:    pand %xmm1, %xmm0
21570; SSE41-NEXT:    pshufb %xmm0, %xmm3
21571; SSE41-NEXT:    paddb %xmm4, %xmm3
21572; SSE41-NEXT:    pxor %xmm0, %xmm0
21573; SSE41-NEXT:    psadbw %xmm3, %xmm0
21574; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
21575; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21576; SSE41-NEXT:    movdqa %xmm1, %xmm2
21577; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
21578; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21579; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
21580; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21581; SSE41-NEXT:    pand %xmm3, %xmm1
21582; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21583; SSE41-NEXT:    por %xmm1, %xmm0
21584; SSE41-NEXT:    retq
21585;
21586; AVX1-LABEL: ult_14_v2i64:
21587; AVX1:       # %bb.0:
21588; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21589; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
21590; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21591; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21592; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
21593; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
21594; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21595; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21596; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21597; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21598; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14]
21599; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21600; AVX1-NEXT:    retq
21601;
21602; AVX2-LABEL: ult_14_v2i64:
21603; AVX2:       # %bb.0:
21604; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21605; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
21606; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21607; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21608; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
21609; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
21610; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21611; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21612; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21613; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21614; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14]
21615; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21616; AVX2-NEXT:    retq
21617;
21618; AVX512VPOPCNTDQ-LABEL: ult_14_v2i64:
21619; AVX512VPOPCNTDQ:       # %bb.0:
21620; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21621; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
21622; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14]
21623; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21624; AVX512VPOPCNTDQ-NEXT:    vzeroupper
21625; AVX512VPOPCNTDQ-NEXT:    retq
21626;
21627; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64:
21628; AVX512VPOPCNTDQVL:       # %bb.0:
21629; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
21630; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
21631; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21632; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21633; AVX512VPOPCNTDQVL-NEXT:    retq
21634;
21635; BITALG_NOVLX-LABEL: ult_14_v2i64:
21636; BITALG_NOVLX:       # %bb.0:
21637; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21638; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
21639; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21640; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21641; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14]
21642; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21643; BITALG_NOVLX-NEXT:    vzeroupper
21644; BITALG_NOVLX-NEXT:    retq
21645;
21646; BITALG-LABEL: ult_14_v2i64:
21647; BITALG:       # %bb.0:
21648; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
21649; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21650; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21651; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
21652; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21653; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21654; BITALG-NEXT:    retq
21655  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21656  %3 = icmp ult <2 x i64> %2, <i64 14, i64 14>
21657  %4 = sext <2 x i1> %3 to <2 x i64>
21658  ret <2 x i64> %4
21659}
21660
21661define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
21662; SSE2-LABEL: ugt_14_v2i64:
21663; SSE2:       # %bb.0:
21664; SSE2-NEXT:    movdqa %xmm0, %xmm1
21665; SSE2-NEXT:    psrlw $1, %xmm1
21666; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21667; SSE2-NEXT:    psubb %xmm1, %xmm0
21668; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21669; SSE2-NEXT:    movdqa %xmm0, %xmm2
21670; SSE2-NEXT:    pand %xmm1, %xmm2
21671; SSE2-NEXT:    psrlw $2, %xmm0
21672; SSE2-NEXT:    pand %xmm1, %xmm0
21673; SSE2-NEXT:    paddb %xmm2, %xmm0
21674; SSE2-NEXT:    movdqa %xmm0, %xmm1
21675; SSE2-NEXT:    psrlw $4, %xmm1
21676; SSE2-NEXT:    paddb %xmm0, %xmm1
21677; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21678; SSE2-NEXT:    pxor %xmm0, %xmm0
21679; SSE2-NEXT:    psadbw %xmm1, %xmm0
21680; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
21681; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21682; SSE2-NEXT:    movdqa %xmm0, %xmm2
21683; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
21684; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21685; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
21686; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21687; SSE2-NEXT:    pand %xmm3, %xmm1
21688; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21689; SSE2-NEXT:    por %xmm1, %xmm0
21690; SSE2-NEXT:    retq
21691;
21692; SSE3-LABEL: ugt_14_v2i64:
21693; SSE3:       # %bb.0:
21694; SSE3-NEXT:    movdqa %xmm0, %xmm1
21695; SSE3-NEXT:    psrlw $1, %xmm1
21696; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21697; SSE3-NEXT:    psubb %xmm1, %xmm0
21698; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21699; SSE3-NEXT:    movdqa %xmm0, %xmm2
21700; SSE3-NEXT:    pand %xmm1, %xmm2
21701; SSE3-NEXT:    psrlw $2, %xmm0
21702; SSE3-NEXT:    pand %xmm1, %xmm0
21703; SSE3-NEXT:    paddb %xmm2, %xmm0
21704; SSE3-NEXT:    movdqa %xmm0, %xmm1
21705; SSE3-NEXT:    psrlw $4, %xmm1
21706; SSE3-NEXT:    paddb %xmm0, %xmm1
21707; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21708; SSE3-NEXT:    pxor %xmm0, %xmm0
21709; SSE3-NEXT:    psadbw %xmm1, %xmm0
21710; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
21711; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21712; SSE3-NEXT:    movdqa %xmm0, %xmm2
21713; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
21714; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21715; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21716; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21717; SSE3-NEXT:    pand %xmm3, %xmm1
21718; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21719; SSE3-NEXT:    por %xmm1, %xmm0
21720; SSE3-NEXT:    retq
21721;
21722; SSSE3-LABEL: ugt_14_v2i64:
21723; SSSE3:       # %bb.0:
21724; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21725; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21726; SSSE3-NEXT:    pand %xmm1, %xmm2
21727; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21728; SSSE3-NEXT:    movdqa %xmm3, %xmm4
21729; SSSE3-NEXT:    pshufb %xmm2, %xmm4
21730; SSSE3-NEXT:    psrlw $4, %xmm0
21731; SSSE3-NEXT:    pand %xmm1, %xmm0
21732; SSSE3-NEXT:    pshufb %xmm0, %xmm3
21733; SSSE3-NEXT:    paddb %xmm4, %xmm3
21734; SSSE3-NEXT:    pxor %xmm0, %xmm0
21735; SSSE3-NEXT:    psadbw %xmm3, %xmm0
21736; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
21737; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21738; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21739; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
21740; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21741; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21742; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21743; SSSE3-NEXT:    pand %xmm3, %xmm1
21744; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21745; SSSE3-NEXT:    por %xmm1, %xmm0
21746; SSSE3-NEXT:    retq
21747;
21748; SSE41-LABEL: ugt_14_v2i64:
21749; SSE41:       # %bb.0:
21750; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21751; SSE41-NEXT:    movdqa %xmm0, %xmm2
21752; SSE41-NEXT:    pand %xmm1, %xmm2
21753; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21754; SSE41-NEXT:    movdqa %xmm3, %xmm4
21755; SSE41-NEXT:    pshufb %xmm2, %xmm4
21756; SSE41-NEXT:    psrlw $4, %xmm0
21757; SSE41-NEXT:    pand %xmm1, %xmm0
21758; SSE41-NEXT:    pshufb %xmm0, %xmm3
21759; SSE41-NEXT:    paddb %xmm4, %xmm3
21760; SSE41-NEXT:    pxor %xmm0, %xmm0
21761; SSE41-NEXT:    psadbw %xmm3, %xmm0
21762; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
21763; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483662,2147483662]
21764; SSE41-NEXT:    movdqa %xmm0, %xmm2
21765; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
21766; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21767; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
21768; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21769; SSE41-NEXT:    pand %xmm3, %xmm1
21770; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21771; SSE41-NEXT:    por %xmm1, %xmm0
21772; SSE41-NEXT:    retq
21773;
21774; AVX1-LABEL: ugt_14_v2i64:
21775; AVX1:       # %bb.0:
21776; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21777; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
21778; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21779; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21780; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
21781; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
21782; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21783; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21784; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21785; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21786; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21787; AVX1-NEXT:    retq
21788;
21789; AVX2-LABEL: ugt_14_v2i64:
21790; AVX2:       # %bb.0:
21791; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21792; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
21793; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21794; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21795; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
21796; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
21797; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21798; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21799; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21800; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21801; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21802; AVX2-NEXT:    retq
21803;
21804; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64:
21805; AVX512VPOPCNTDQ:       # %bb.0:
21806; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21807; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
21808; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21809; AVX512VPOPCNTDQ-NEXT:    vzeroupper
21810; AVX512VPOPCNTDQ-NEXT:    retq
21811;
21812; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64:
21813; AVX512VPOPCNTDQVL:       # %bb.0:
21814; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
21815; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
21816; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21817; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21818; AVX512VPOPCNTDQVL-NEXT:    retq
21819;
21820; BITALG_NOVLX-LABEL: ugt_14_v2i64:
21821; BITALG_NOVLX:       # %bb.0:
21822; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21823; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
21824; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21825; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21826; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
21827; BITALG_NOVLX-NEXT:    vzeroupper
21828; BITALG_NOVLX-NEXT:    retq
21829;
21830; BITALG-LABEL: ugt_14_v2i64:
21831; BITALG:       # %bb.0:
21832; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
21833; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21834; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21835; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
21836; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
21837; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21838; BITALG-NEXT:    retq
21839  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21840  %3 = icmp ugt <2 x i64> %2, <i64 14, i64 14>
21841  %4 = sext <2 x i1> %3 to <2 x i64>
21842  ret <2 x i64> %4
21843}
21844
21845define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
21846; SSE2-LABEL: ult_15_v2i64:
21847; SSE2:       # %bb.0:
21848; SSE2-NEXT:    movdqa %xmm0, %xmm1
21849; SSE2-NEXT:    psrlw $1, %xmm1
21850; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21851; SSE2-NEXT:    psubb %xmm1, %xmm0
21852; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21853; SSE2-NEXT:    movdqa %xmm0, %xmm2
21854; SSE2-NEXT:    pand %xmm1, %xmm2
21855; SSE2-NEXT:    psrlw $2, %xmm0
21856; SSE2-NEXT:    pand %xmm1, %xmm0
21857; SSE2-NEXT:    paddb %xmm2, %xmm0
21858; SSE2-NEXT:    movdqa %xmm0, %xmm1
21859; SSE2-NEXT:    psrlw $4, %xmm1
21860; SSE2-NEXT:    paddb %xmm0, %xmm1
21861; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
21862; SSE2-NEXT:    pxor %xmm0, %xmm0
21863; SSE2-NEXT:    psadbw %xmm1, %xmm0
21864; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
21865; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
21866; SSE2-NEXT:    movdqa %xmm1, %xmm2
21867; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
21868; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21869; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
21870; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21871; SSE2-NEXT:    pand %xmm3, %xmm1
21872; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21873; SSE2-NEXT:    por %xmm1, %xmm0
21874; SSE2-NEXT:    retq
21875;
21876; SSE3-LABEL: ult_15_v2i64:
21877; SSE3:       # %bb.0:
21878; SSE3-NEXT:    movdqa %xmm0, %xmm1
21879; SSE3-NEXT:    psrlw $1, %xmm1
21880; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21881; SSE3-NEXT:    psubb %xmm1, %xmm0
21882; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21883; SSE3-NEXT:    movdqa %xmm0, %xmm2
21884; SSE3-NEXT:    pand %xmm1, %xmm2
21885; SSE3-NEXT:    psrlw $2, %xmm0
21886; SSE3-NEXT:    pand %xmm1, %xmm0
21887; SSE3-NEXT:    paddb %xmm2, %xmm0
21888; SSE3-NEXT:    movdqa %xmm0, %xmm1
21889; SSE3-NEXT:    psrlw $4, %xmm1
21890; SSE3-NEXT:    paddb %xmm0, %xmm1
21891; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
21892; SSE3-NEXT:    pxor %xmm0, %xmm0
21893; SSE3-NEXT:    psadbw %xmm1, %xmm0
21894; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
21895; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
21896; SSE3-NEXT:    movdqa %xmm1, %xmm2
21897; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
21898; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21899; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21900; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21901; SSE3-NEXT:    pand %xmm3, %xmm1
21902; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21903; SSE3-NEXT:    por %xmm1, %xmm0
21904; SSE3-NEXT:    retq
21905;
21906; SSSE3-LABEL: ult_15_v2i64:
21907; SSSE3:       # %bb.0:
21908; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21909; SSSE3-NEXT:    movdqa %xmm0, %xmm2
21910; SSSE3-NEXT:    pand %xmm1, %xmm2
21911; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21912; SSSE3-NEXT:    movdqa %xmm3, %xmm4
21913; SSSE3-NEXT:    pshufb %xmm2, %xmm4
21914; SSSE3-NEXT:    psrlw $4, %xmm0
21915; SSSE3-NEXT:    pand %xmm1, %xmm0
21916; SSSE3-NEXT:    pshufb %xmm0, %xmm3
21917; SSSE3-NEXT:    paddb %xmm4, %xmm3
21918; SSSE3-NEXT:    pxor %xmm0, %xmm0
21919; SSSE3-NEXT:    psadbw %xmm3, %xmm0
21920; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
21921; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
21922; SSSE3-NEXT:    movdqa %xmm1, %xmm2
21923; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
21924; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21925; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
21926; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21927; SSSE3-NEXT:    pand %xmm3, %xmm1
21928; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21929; SSSE3-NEXT:    por %xmm1, %xmm0
21930; SSSE3-NEXT:    retq
21931;
21932; SSE41-LABEL: ult_15_v2i64:
21933; SSE41:       # %bb.0:
21934; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21935; SSE41-NEXT:    movdqa %xmm0, %xmm2
21936; SSE41-NEXT:    pand %xmm1, %xmm2
21937; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21938; SSE41-NEXT:    movdqa %xmm3, %xmm4
21939; SSE41-NEXT:    pshufb %xmm2, %xmm4
21940; SSE41-NEXT:    psrlw $4, %xmm0
21941; SSE41-NEXT:    pand %xmm1, %xmm0
21942; SSE41-NEXT:    pshufb %xmm0, %xmm3
21943; SSE41-NEXT:    paddb %xmm4, %xmm3
21944; SSE41-NEXT:    pxor %xmm0, %xmm0
21945; SSE41-NEXT:    psadbw %xmm3, %xmm0
21946; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
21947; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
21948; SSE41-NEXT:    movdqa %xmm1, %xmm2
21949; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
21950; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
21951; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
21952; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
21953; SSE41-NEXT:    pand %xmm3, %xmm1
21954; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
21955; SSE41-NEXT:    por %xmm1, %xmm0
21956; SSE41-NEXT:    retq
21957;
21958; AVX1-LABEL: ult_15_v2i64:
21959; AVX1:       # %bb.0:
21960; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21961; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
21962; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21963; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21964; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
21965; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
21966; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21967; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21968; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21969; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21970; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15]
21971; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21972; AVX1-NEXT:    retq
21973;
21974; AVX2-LABEL: ult_15_v2i64:
21975; AVX2:       # %bb.0:
21976; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21977; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
21978; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21979; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
21980; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
21981; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
21982; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
21983; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
21984; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
21985; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
21986; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15]
21987; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21988; AVX2-NEXT:    retq
21989;
21990; AVX512VPOPCNTDQ-LABEL: ult_15_v2i64:
21991; AVX512VPOPCNTDQ:       # %bb.0:
21992; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21993; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
21994; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15]
21995; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
21996; AVX512VPOPCNTDQ-NEXT:    vzeroupper
21997; AVX512VPOPCNTDQ-NEXT:    retq
21998;
21999; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64:
22000; AVX512VPOPCNTDQVL:       # %bb.0:
22001; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
22002; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
22003; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22004; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22005; AVX512VPOPCNTDQVL-NEXT:    retq
22006;
22007; BITALG_NOVLX-LABEL: ult_15_v2i64:
22008; BITALG_NOVLX:       # %bb.0:
22009; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22010; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
22011; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22012; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22013; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15]
22014; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22015; BITALG_NOVLX-NEXT:    vzeroupper
22016; BITALG_NOVLX-NEXT:    retq
22017;
22018; BITALG-LABEL: ult_15_v2i64:
22019; BITALG:       # %bb.0:
22020; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
22021; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22022; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22023; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
22024; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22025; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22026; BITALG-NEXT:    retq
22027  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22028  %3 = icmp ult <2 x i64> %2, <i64 15, i64 15>
22029  %4 = sext <2 x i1> %3 to <2 x i64>
22030  ret <2 x i64> %4
22031}
22032
22033define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
22034; SSE2-LABEL: ugt_15_v2i64:
22035; SSE2:       # %bb.0:
22036; SSE2-NEXT:    movdqa %xmm0, %xmm1
22037; SSE2-NEXT:    psrlw $1, %xmm1
22038; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22039; SSE2-NEXT:    psubb %xmm1, %xmm0
22040; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22041; SSE2-NEXT:    movdqa %xmm0, %xmm2
22042; SSE2-NEXT:    pand %xmm1, %xmm2
22043; SSE2-NEXT:    psrlw $2, %xmm0
22044; SSE2-NEXT:    pand %xmm1, %xmm0
22045; SSE2-NEXT:    paddb %xmm2, %xmm0
22046; SSE2-NEXT:    movdqa %xmm0, %xmm1
22047; SSE2-NEXT:    psrlw $4, %xmm1
22048; SSE2-NEXT:    paddb %xmm0, %xmm1
22049; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22050; SSE2-NEXT:    pxor %xmm0, %xmm0
22051; SSE2-NEXT:    psadbw %xmm1, %xmm0
22052; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
22053; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
22054; SSE2-NEXT:    movdqa %xmm0, %xmm2
22055; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
22056; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22057; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
22058; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22059; SSE2-NEXT:    pand %xmm3, %xmm1
22060; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22061; SSE2-NEXT:    por %xmm1, %xmm0
22062; SSE2-NEXT:    retq
22063;
22064; SSE3-LABEL: ugt_15_v2i64:
22065; SSE3:       # %bb.0:
22066; SSE3-NEXT:    movdqa %xmm0, %xmm1
22067; SSE3-NEXT:    psrlw $1, %xmm1
22068; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22069; SSE3-NEXT:    psubb %xmm1, %xmm0
22070; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22071; SSE3-NEXT:    movdqa %xmm0, %xmm2
22072; SSE3-NEXT:    pand %xmm1, %xmm2
22073; SSE3-NEXT:    psrlw $2, %xmm0
22074; SSE3-NEXT:    pand %xmm1, %xmm0
22075; SSE3-NEXT:    paddb %xmm2, %xmm0
22076; SSE3-NEXT:    movdqa %xmm0, %xmm1
22077; SSE3-NEXT:    psrlw $4, %xmm1
22078; SSE3-NEXT:    paddb %xmm0, %xmm1
22079; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22080; SSE3-NEXT:    pxor %xmm0, %xmm0
22081; SSE3-NEXT:    psadbw %xmm1, %xmm0
22082; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
22083; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
22084; SSE3-NEXT:    movdqa %xmm0, %xmm2
22085; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
22086; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22087; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22088; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22089; SSE3-NEXT:    pand %xmm3, %xmm1
22090; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22091; SSE3-NEXT:    por %xmm1, %xmm0
22092; SSE3-NEXT:    retq
22093;
22094; SSSE3-LABEL: ugt_15_v2i64:
22095; SSSE3:       # %bb.0:
22096; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22097; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22098; SSSE3-NEXT:    pand %xmm1, %xmm2
22099; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22100; SSSE3-NEXT:    movdqa %xmm3, %xmm4
22101; SSSE3-NEXT:    pshufb %xmm2, %xmm4
22102; SSSE3-NEXT:    psrlw $4, %xmm0
22103; SSSE3-NEXT:    pand %xmm1, %xmm0
22104; SSSE3-NEXT:    pshufb %xmm0, %xmm3
22105; SSSE3-NEXT:    paddb %xmm4, %xmm3
22106; SSSE3-NEXT:    pxor %xmm0, %xmm0
22107; SSSE3-NEXT:    psadbw %xmm3, %xmm0
22108; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
22109; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
22110; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22111; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
22112; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22113; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22114; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22115; SSSE3-NEXT:    pand %xmm3, %xmm1
22116; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22117; SSSE3-NEXT:    por %xmm1, %xmm0
22118; SSSE3-NEXT:    retq
22119;
22120; SSE41-LABEL: ugt_15_v2i64:
22121; SSE41:       # %bb.0:
22122; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22123; SSE41-NEXT:    movdqa %xmm0, %xmm2
22124; SSE41-NEXT:    pand %xmm1, %xmm2
22125; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22126; SSE41-NEXT:    movdqa %xmm3, %xmm4
22127; SSE41-NEXT:    pshufb %xmm2, %xmm4
22128; SSE41-NEXT:    psrlw $4, %xmm0
22129; SSE41-NEXT:    pand %xmm1, %xmm0
22130; SSE41-NEXT:    pshufb %xmm0, %xmm3
22131; SSE41-NEXT:    paddb %xmm4, %xmm3
22132; SSE41-NEXT:    pxor %xmm0, %xmm0
22133; SSE41-NEXT:    psadbw %xmm3, %xmm0
22134; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
22135; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483663,2147483663]
22136; SSE41-NEXT:    movdqa %xmm0, %xmm2
22137; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
22138; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22139; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
22140; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22141; SSE41-NEXT:    pand %xmm3, %xmm1
22142; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22143; SSE41-NEXT:    por %xmm1, %xmm0
22144; SSE41-NEXT:    retq
22145;
22146; AVX1-LABEL: ugt_15_v2i64:
22147; AVX1:       # %bb.0:
22148; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22149; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
22150; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22151; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22152; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
22153; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
22154; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22155; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22156; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22157; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22158; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22159; AVX1-NEXT:    retq
22160;
22161; AVX2-LABEL: ugt_15_v2i64:
22162; AVX2:       # %bb.0:
22163; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22164; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
22165; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22166; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22167; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
22168; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
22169; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22170; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22171; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22172; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22173; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22174; AVX2-NEXT:    retq
22175;
22176; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64:
22177; AVX512VPOPCNTDQ:       # %bb.0:
22178; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22179; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
22180; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22181; AVX512VPOPCNTDQ-NEXT:    vzeroupper
22182; AVX512VPOPCNTDQ-NEXT:    retq
22183;
22184; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64:
22185; AVX512VPOPCNTDQVL:       # %bb.0:
22186; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
22187; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
22188; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22189; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22190; AVX512VPOPCNTDQVL-NEXT:    retq
22191;
22192; BITALG_NOVLX-LABEL: ugt_15_v2i64:
22193; BITALG_NOVLX:       # %bb.0:
22194; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22195; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
22196; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22197; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22198; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22199; BITALG_NOVLX-NEXT:    vzeroupper
22200; BITALG_NOVLX-NEXT:    retq
22201;
22202; BITALG-LABEL: ugt_15_v2i64:
22203; BITALG:       # %bb.0:
22204; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
22205; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22206; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22207; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
22208; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22209; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22210; BITALG-NEXT:    retq
22211  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22212  %3 = icmp ugt <2 x i64> %2, <i64 15, i64 15>
22213  %4 = sext <2 x i1> %3 to <2 x i64>
22214  ret <2 x i64> %4
22215}
22216
22217define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
22218; SSE2-LABEL: ult_16_v2i64:
22219; SSE2:       # %bb.0:
22220; SSE2-NEXT:    movdqa %xmm0, %xmm1
22221; SSE2-NEXT:    psrlw $1, %xmm1
22222; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22223; SSE2-NEXT:    psubb %xmm1, %xmm0
22224; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22225; SSE2-NEXT:    movdqa %xmm0, %xmm2
22226; SSE2-NEXT:    pand %xmm1, %xmm2
22227; SSE2-NEXT:    psrlw $2, %xmm0
22228; SSE2-NEXT:    pand %xmm1, %xmm0
22229; SSE2-NEXT:    paddb %xmm2, %xmm0
22230; SSE2-NEXT:    movdqa %xmm0, %xmm1
22231; SSE2-NEXT:    psrlw $4, %xmm1
22232; SSE2-NEXT:    paddb %xmm0, %xmm1
22233; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22234; SSE2-NEXT:    pxor %xmm0, %xmm0
22235; SSE2-NEXT:    psadbw %xmm1, %xmm0
22236; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
22237; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22238; SSE2-NEXT:    movdqa %xmm1, %xmm2
22239; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
22240; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22241; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
22242; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22243; SSE2-NEXT:    pand %xmm3, %xmm1
22244; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22245; SSE2-NEXT:    por %xmm1, %xmm0
22246; SSE2-NEXT:    retq
22247;
22248; SSE3-LABEL: ult_16_v2i64:
22249; SSE3:       # %bb.0:
22250; SSE3-NEXT:    movdqa %xmm0, %xmm1
22251; SSE3-NEXT:    psrlw $1, %xmm1
22252; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22253; SSE3-NEXT:    psubb %xmm1, %xmm0
22254; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22255; SSE3-NEXT:    movdqa %xmm0, %xmm2
22256; SSE3-NEXT:    pand %xmm1, %xmm2
22257; SSE3-NEXT:    psrlw $2, %xmm0
22258; SSE3-NEXT:    pand %xmm1, %xmm0
22259; SSE3-NEXT:    paddb %xmm2, %xmm0
22260; SSE3-NEXT:    movdqa %xmm0, %xmm1
22261; SSE3-NEXT:    psrlw $4, %xmm1
22262; SSE3-NEXT:    paddb %xmm0, %xmm1
22263; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22264; SSE3-NEXT:    pxor %xmm0, %xmm0
22265; SSE3-NEXT:    psadbw %xmm1, %xmm0
22266; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
22267; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22268; SSE3-NEXT:    movdqa %xmm1, %xmm2
22269; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
22270; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22271; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22272; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22273; SSE3-NEXT:    pand %xmm3, %xmm1
22274; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22275; SSE3-NEXT:    por %xmm1, %xmm0
22276; SSE3-NEXT:    retq
22277;
22278; SSSE3-LABEL: ult_16_v2i64:
22279; SSSE3:       # %bb.0:
22280; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22281; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22282; SSSE3-NEXT:    pand %xmm1, %xmm2
22283; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22284; SSSE3-NEXT:    movdqa %xmm3, %xmm4
22285; SSSE3-NEXT:    pshufb %xmm2, %xmm4
22286; SSSE3-NEXT:    psrlw $4, %xmm0
22287; SSSE3-NEXT:    pand %xmm1, %xmm0
22288; SSSE3-NEXT:    pshufb %xmm0, %xmm3
22289; SSSE3-NEXT:    paddb %xmm4, %xmm3
22290; SSSE3-NEXT:    pxor %xmm0, %xmm0
22291; SSSE3-NEXT:    psadbw %xmm3, %xmm0
22292; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
22293; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22294; SSSE3-NEXT:    movdqa %xmm1, %xmm2
22295; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
22296; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22297; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22298; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22299; SSSE3-NEXT:    pand %xmm3, %xmm1
22300; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22301; SSSE3-NEXT:    por %xmm1, %xmm0
22302; SSSE3-NEXT:    retq
22303;
22304; SSE41-LABEL: ult_16_v2i64:
22305; SSE41:       # %bb.0:
22306; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22307; SSE41-NEXT:    movdqa %xmm0, %xmm2
22308; SSE41-NEXT:    pand %xmm1, %xmm2
22309; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22310; SSE41-NEXT:    movdqa %xmm3, %xmm4
22311; SSE41-NEXT:    pshufb %xmm2, %xmm4
22312; SSE41-NEXT:    psrlw $4, %xmm0
22313; SSE41-NEXT:    pand %xmm1, %xmm0
22314; SSE41-NEXT:    pshufb %xmm0, %xmm3
22315; SSE41-NEXT:    paddb %xmm4, %xmm3
22316; SSE41-NEXT:    pxor %xmm0, %xmm0
22317; SSE41-NEXT:    psadbw %xmm3, %xmm0
22318; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
22319; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22320; SSE41-NEXT:    movdqa %xmm1, %xmm2
22321; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
22322; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22323; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
22324; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22325; SSE41-NEXT:    pand %xmm3, %xmm1
22326; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22327; SSE41-NEXT:    por %xmm1, %xmm0
22328; SSE41-NEXT:    retq
22329;
22330; AVX1-LABEL: ult_16_v2i64:
22331; AVX1:       # %bb.0:
22332; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22333; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
22334; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22335; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22336; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
22337; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
22338; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22339; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22340; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22341; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22342; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16]
22343; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22344; AVX1-NEXT:    retq
22345;
22346; AVX2-LABEL: ult_16_v2i64:
22347; AVX2:       # %bb.0:
22348; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22349; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
22350; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22351; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22352; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
22353; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
22354; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22355; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22356; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22357; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22358; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16]
22359; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22360; AVX2-NEXT:    retq
22361;
22362; AVX512VPOPCNTDQ-LABEL: ult_16_v2i64:
22363; AVX512VPOPCNTDQ:       # %bb.0:
22364; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22365; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
22366; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16]
22367; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22368; AVX512VPOPCNTDQ-NEXT:    vzeroupper
22369; AVX512VPOPCNTDQ-NEXT:    retq
22370;
22371; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64:
22372; AVX512VPOPCNTDQVL:       # %bb.0:
22373; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
22374; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
22375; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22376; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22377; AVX512VPOPCNTDQVL-NEXT:    retq
22378;
22379; BITALG_NOVLX-LABEL: ult_16_v2i64:
22380; BITALG_NOVLX:       # %bb.0:
22381; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22382; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
22383; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22384; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22385; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16]
22386; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22387; BITALG_NOVLX-NEXT:    vzeroupper
22388; BITALG_NOVLX-NEXT:    retq
22389;
22390; BITALG-LABEL: ult_16_v2i64:
22391; BITALG:       # %bb.0:
22392; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
22393; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22394; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22395; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
22396; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22397; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22398; BITALG-NEXT:    retq
22399  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22400  %3 = icmp ult <2 x i64> %2, <i64 16, i64 16>
22401  %4 = sext <2 x i1> %3 to <2 x i64>
22402  ret <2 x i64> %4
22403}
22404
22405define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
22406; SSE2-LABEL: ugt_16_v2i64:
22407; SSE2:       # %bb.0:
22408; SSE2-NEXT:    movdqa %xmm0, %xmm1
22409; SSE2-NEXT:    psrlw $1, %xmm1
22410; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22411; SSE2-NEXT:    psubb %xmm1, %xmm0
22412; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22413; SSE2-NEXT:    movdqa %xmm0, %xmm2
22414; SSE2-NEXT:    pand %xmm1, %xmm2
22415; SSE2-NEXT:    psrlw $2, %xmm0
22416; SSE2-NEXT:    pand %xmm1, %xmm0
22417; SSE2-NEXT:    paddb %xmm2, %xmm0
22418; SSE2-NEXT:    movdqa %xmm0, %xmm1
22419; SSE2-NEXT:    psrlw $4, %xmm1
22420; SSE2-NEXT:    paddb %xmm0, %xmm1
22421; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22422; SSE2-NEXT:    pxor %xmm0, %xmm0
22423; SSE2-NEXT:    psadbw %xmm1, %xmm0
22424; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
22425; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22426; SSE2-NEXT:    movdqa %xmm0, %xmm2
22427; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
22428; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22429; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
22430; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22431; SSE2-NEXT:    pand %xmm3, %xmm1
22432; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22433; SSE2-NEXT:    por %xmm1, %xmm0
22434; SSE2-NEXT:    retq
22435;
22436; SSE3-LABEL: ugt_16_v2i64:
22437; SSE3:       # %bb.0:
22438; SSE3-NEXT:    movdqa %xmm0, %xmm1
22439; SSE3-NEXT:    psrlw $1, %xmm1
22440; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22441; SSE3-NEXT:    psubb %xmm1, %xmm0
22442; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22443; SSE3-NEXT:    movdqa %xmm0, %xmm2
22444; SSE3-NEXT:    pand %xmm1, %xmm2
22445; SSE3-NEXT:    psrlw $2, %xmm0
22446; SSE3-NEXT:    pand %xmm1, %xmm0
22447; SSE3-NEXT:    paddb %xmm2, %xmm0
22448; SSE3-NEXT:    movdqa %xmm0, %xmm1
22449; SSE3-NEXT:    psrlw $4, %xmm1
22450; SSE3-NEXT:    paddb %xmm0, %xmm1
22451; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22452; SSE3-NEXT:    pxor %xmm0, %xmm0
22453; SSE3-NEXT:    psadbw %xmm1, %xmm0
22454; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
22455; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22456; SSE3-NEXT:    movdqa %xmm0, %xmm2
22457; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
22458; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22459; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22460; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22461; SSE3-NEXT:    pand %xmm3, %xmm1
22462; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22463; SSE3-NEXT:    por %xmm1, %xmm0
22464; SSE3-NEXT:    retq
22465;
22466; SSSE3-LABEL: ugt_16_v2i64:
22467; SSSE3:       # %bb.0:
22468; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22469; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22470; SSSE3-NEXT:    pand %xmm1, %xmm2
22471; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22472; SSSE3-NEXT:    movdqa %xmm3, %xmm4
22473; SSSE3-NEXT:    pshufb %xmm2, %xmm4
22474; SSSE3-NEXT:    psrlw $4, %xmm0
22475; SSSE3-NEXT:    pand %xmm1, %xmm0
22476; SSSE3-NEXT:    pshufb %xmm0, %xmm3
22477; SSSE3-NEXT:    paddb %xmm4, %xmm3
22478; SSSE3-NEXT:    pxor %xmm0, %xmm0
22479; SSSE3-NEXT:    psadbw %xmm3, %xmm0
22480; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
22481; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22482; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22483; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
22484; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22485; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22486; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22487; SSSE3-NEXT:    pand %xmm3, %xmm1
22488; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22489; SSSE3-NEXT:    por %xmm1, %xmm0
22490; SSSE3-NEXT:    retq
22491;
22492; SSE41-LABEL: ugt_16_v2i64:
22493; SSE41:       # %bb.0:
22494; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22495; SSE41-NEXT:    movdqa %xmm0, %xmm2
22496; SSE41-NEXT:    pand %xmm1, %xmm2
22497; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22498; SSE41-NEXT:    movdqa %xmm3, %xmm4
22499; SSE41-NEXT:    pshufb %xmm2, %xmm4
22500; SSE41-NEXT:    psrlw $4, %xmm0
22501; SSE41-NEXT:    pand %xmm1, %xmm0
22502; SSE41-NEXT:    pshufb %xmm0, %xmm3
22503; SSE41-NEXT:    paddb %xmm4, %xmm3
22504; SSE41-NEXT:    pxor %xmm0, %xmm0
22505; SSE41-NEXT:    psadbw %xmm3, %xmm0
22506; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
22507; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483664,2147483664]
22508; SSE41-NEXT:    movdqa %xmm0, %xmm2
22509; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
22510; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22511; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
22512; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22513; SSE41-NEXT:    pand %xmm3, %xmm1
22514; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22515; SSE41-NEXT:    por %xmm1, %xmm0
22516; SSE41-NEXT:    retq
22517;
22518; AVX1-LABEL: ugt_16_v2i64:
22519; AVX1:       # %bb.0:
22520; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22521; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
22522; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22523; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22524; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
22525; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
22526; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22527; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22528; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22529; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22530; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22531; AVX1-NEXT:    retq
22532;
22533; AVX2-LABEL: ugt_16_v2i64:
22534; AVX2:       # %bb.0:
22535; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22536; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
22537; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22538; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22539; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
22540; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
22541; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22542; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22543; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22544; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22545; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22546; AVX2-NEXT:    retq
22547;
22548; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64:
22549; AVX512VPOPCNTDQ:       # %bb.0:
22550; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22551; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
22552; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22553; AVX512VPOPCNTDQ-NEXT:    vzeroupper
22554; AVX512VPOPCNTDQ-NEXT:    retq
22555;
22556; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64:
22557; AVX512VPOPCNTDQVL:       # %bb.0:
22558; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
22559; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
22560; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22561; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22562; AVX512VPOPCNTDQVL-NEXT:    retq
22563;
22564; BITALG_NOVLX-LABEL: ugt_16_v2i64:
22565; BITALG_NOVLX:       # %bb.0:
22566; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22567; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
22568; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22569; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22570; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22571; BITALG_NOVLX-NEXT:    vzeroupper
22572; BITALG_NOVLX-NEXT:    retq
22573;
22574; BITALG-LABEL: ugt_16_v2i64:
22575; BITALG:       # %bb.0:
22576; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
22577; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22578; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22579; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
22580; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22581; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22582; BITALG-NEXT:    retq
22583  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22584  %3 = icmp ugt <2 x i64> %2, <i64 16, i64 16>
22585  %4 = sext <2 x i1> %3 to <2 x i64>
22586  ret <2 x i64> %4
22587}
22588
22589define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
22590; SSE2-LABEL: ult_17_v2i64:
22591; SSE2:       # %bb.0:
22592; SSE2-NEXT:    movdqa %xmm0, %xmm1
22593; SSE2-NEXT:    psrlw $1, %xmm1
22594; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22595; SSE2-NEXT:    psubb %xmm1, %xmm0
22596; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22597; SSE2-NEXT:    movdqa %xmm0, %xmm2
22598; SSE2-NEXT:    pand %xmm1, %xmm2
22599; SSE2-NEXT:    psrlw $2, %xmm0
22600; SSE2-NEXT:    pand %xmm1, %xmm0
22601; SSE2-NEXT:    paddb %xmm2, %xmm0
22602; SSE2-NEXT:    movdqa %xmm0, %xmm1
22603; SSE2-NEXT:    psrlw $4, %xmm1
22604; SSE2-NEXT:    paddb %xmm0, %xmm1
22605; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22606; SSE2-NEXT:    pxor %xmm0, %xmm0
22607; SSE2-NEXT:    psadbw %xmm1, %xmm0
22608; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
22609; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22610; SSE2-NEXT:    movdqa %xmm1, %xmm2
22611; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
22612; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22613; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
22614; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22615; SSE2-NEXT:    pand %xmm3, %xmm1
22616; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22617; SSE2-NEXT:    por %xmm1, %xmm0
22618; SSE2-NEXT:    retq
22619;
22620; SSE3-LABEL: ult_17_v2i64:
22621; SSE3:       # %bb.0:
22622; SSE3-NEXT:    movdqa %xmm0, %xmm1
22623; SSE3-NEXT:    psrlw $1, %xmm1
22624; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22625; SSE3-NEXT:    psubb %xmm1, %xmm0
22626; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22627; SSE3-NEXT:    movdqa %xmm0, %xmm2
22628; SSE3-NEXT:    pand %xmm1, %xmm2
22629; SSE3-NEXT:    psrlw $2, %xmm0
22630; SSE3-NEXT:    pand %xmm1, %xmm0
22631; SSE3-NEXT:    paddb %xmm2, %xmm0
22632; SSE3-NEXT:    movdqa %xmm0, %xmm1
22633; SSE3-NEXT:    psrlw $4, %xmm1
22634; SSE3-NEXT:    paddb %xmm0, %xmm1
22635; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22636; SSE3-NEXT:    pxor %xmm0, %xmm0
22637; SSE3-NEXT:    psadbw %xmm1, %xmm0
22638; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
22639; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22640; SSE3-NEXT:    movdqa %xmm1, %xmm2
22641; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
22642; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22643; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22644; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22645; SSE3-NEXT:    pand %xmm3, %xmm1
22646; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22647; SSE3-NEXT:    por %xmm1, %xmm0
22648; SSE3-NEXT:    retq
22649;
22650; SSSE3-LABEL: ult_17_v2i64:
22651; SSSE3:       # %bb.0:
22652; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22653; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22654; SSSE3-NEXT:    pand %xmm1, %xmm2
22655; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22656; SSSE3-NEXT:    movdqa %xmm3, %xmm4
22657; SSSE3-NEXT:    pshufb %xmm2, %xmm4
22658; SSSE3-NEXT:    psrlw $4, %xmm0
22659; SSSE3-NEXT:    pand %xmm1, %xmm0
22660; SSSE3-NEXT:    pshufb %xmm0, %xmm3
22661; SSSE3-NEXT:    paddb %xmm4, %xmm3
22662; SSSE3-NEXT:    pxor %xmm0, %xmm0
22663; SSSE3-NEXT:    psadbw %xmm3, %xmm0
22664; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
22665; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22666; SSSE3-NEXT:    movdqa %xmm1, %xmm2
22667; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
22668; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22669; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22670; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22671; SSSE3-NEXT:    pand %xmm3, %xmm1
22672; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22673; SSSE3-NEXT:    por %xmm1, %xmm0
22674; SSSE3-NEXT:    retq
22675;
22676; SSE41-LABEL: ult_17_v2i64:
22677; SSE41:       # %bb.0:
22678; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22679; SSE41-NEXT:    movdqa %xmm0, %xmm2
22680; SSE41-NEXT:    pand %xmm1, %xmm2
22681; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22682; SSE41-NEXT:    movdqa %xmm3, %xmm4
22683; SSE41-NEXT:    pshufb %xmm2, %xmm4
22684; SSE41-NEXT:    psrlw $4, %xmm0
22685; SSE41-NEXT:    pand %xmm1, %xmm0
22686; SSE41-NEXT:    pshufb %xmm0, %xmm3
22687; SSE41-NEXT:    paddb %xmm4, %xmm3
22688; SSE41-NEXT:    pxor %xmm0, %xmm0
22689; SSE41-NEXT:    psadbw %xmm3, %xmm0
22690; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
22691; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22692; SSE41-NEXT:    movdqa %xmm1, %xmm2
22693; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
22694; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22695; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
22696; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22697; SSE41-NEXT:    pand %xmm3, %xmm1
22698; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22699; SSE41-NEXT:    por %xmm1, %xmm0
22700; SSE41-NEXT:    retq
22701;
22702; AVX1-LABEL: ult_17_v2i64:
22703; AVX1:       # %bb.0:
22704; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22705; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
22706; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22707; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22708; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
22709; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
22710; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22711; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22712; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22713; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22714; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17]
22715; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22716; AVX1-NEXT:    retq
22717;
22718; AVX2-LABEL: ult_17_v2i64:
22719; AVX2:       # %bb.0:
22720; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22721; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
22722; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22723; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22724; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
22725; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
22726; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22727; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22728; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22729; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22730; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17]
22731; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22732; AVX2-NEXT:    retq
22733;
22734; AVX512VPOPCNTDQ-LABEL: ult_17_v2i64:
22735; AVX512VPOPCNTDQ:       # %bb.0:
22736; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22737; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
22738; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17]
22739; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22740; AVX512VPOPCNTDQ-NEXT:    vzeroupper
22741; AVX512VPOPCNTDQ-NEXT:    retq
22742;
22743; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64:
22744; AVX512VPOPCNTDQVL:       # %bb.0:
22745; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
22746; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
22747; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22748; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22749; AVX512VPOPCNTDQVL-NEXT:    retq
22750;
22751; BITALG_NOVLX-LABEL: ult_17_v2i64:
22752; BITALG_NOVLX:       # %bb.0:
22753; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22754; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
22755; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22756; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22757; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17]
22758; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
22759; BITALG_NOVLX-NEXT:    vzeroupper
22760; BITALG_NOVLX-NEXT:    retq
22761;
22762; BITALG-LABEL: ult_17_v2i64:
22763; BITALG:       # %bb.0:
22764; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
22765; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22766; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22767; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
22768; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22769; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22770; BITALG-NEXT:    retq
22771  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22772  %3 = icmp ult <2 x i64> %2, <i64 17, i64 17>
22773  %4 = sext <2 x i1> %3 to <2 x i64>
22774  ret <2 x i64> %4
22775}
22776
22777define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
22778; SSE2-LABEL: ugt_17_v2i64:
22779; SSE2:       # %bb.0:
22780; SSE2-NEXT:    movdqa %xmm0, %xmm1
22781; SSE2-NEXT:    psrlw $1, %xmm1
22782; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22783; SSE2-NEXT:    psubb %xmm1, %xmm0
22784; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22785; SSE2-NEXT:    movdqa %xmm0, %xmm2
22786; SSE2-NEXT:    pand %xmm1, %xmm2
22787; SSE2-NEXT:    psrlw $2, %xmm0
22788; SSE2-NEXT:    pand %xmm1, %xmm0
22789; SSE2-NEXT:    paddb %xmm2, %xmm0
22790; SSE2-NEXT:    movdqa %xmm0, %xmm1
22791; SSE2-NEXT:    psrlw $4, %xmm1
22792; SSE2-NEXT:    paddb %xmm0, %xmm1
22793; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22794; SSE2-NEXT:    pxor %xmm0, %xmm0
22795; SSE2-NEXT:    psadbw %xmm1, %xmm0
22796; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
22797; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22798; SSE2-NEXT:    movdqa %xmm0, %xmm2
22799; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
22800; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22801; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
22802; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22803; SSE2-NEXT:    pand %xmm3, %xmm1
22804; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22805; SSE2-NEXT:    por %xmm1, %xmm0
22806; SSE2-NEXT:    retq
22807;
22808; SSE3-LABEL: ugt_17_v2i64:
22809; SSE3:       # %bb.0:
22810; SSE3-NEXT:    movdqa %xmm0, %xmm1
22811; SSE3-NEXT:    psrlw $1, %xmm1
22812; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22813; SSE3-NEXT:    psubb %xmm1, %xmm0
22814; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22815; SSE3-NEXT:    movdqa %xmm0, %xmm2
22816; SSE3-NEXT:    pand %xmm1, %xmm2
22817; SSE3-NEXT:    psrlw $2, %xmm0
22818; SSE3-NEXT:    pand %xmm1, %xmm0
22819; SSE3-NEXT:    paddb %xmm2, %xmm0
22820; SSE3-NEXT:    movdqa %xmm0, %xmm1
22821; SSE3-NEXT:    psrlw $4, %xmm1
22822; SSE3-NEXT:    paddb %xmm0, %xmm1
22823; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22824; SSE3-NEXT:    pxor %xmm0, %xmm0
22825; SSE3-NEXT:    psadbw %xmm1, %xmm0
22826; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
22827; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22828; SSE3-NEXT:    movdqa %xmm0, %xmm2
22829; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
22830; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22831; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22832; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22833; SSE3-NEXT:    pand %xmm3, %xmm1
22834; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22835; SSE3-NEXT:    por %xmm1, %xmm0
22836; SSE3-NEXT:    retq
22837;
22838; SSSE3-LABEL: ugt_17_v2i64:
22839; SSSE3:       # %bb.0:
22840; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22841; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22842; SSSE3-NEXT:    pand %xmm1, %xmm2
22843; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22844; SSSE3-NEXT:    movdqa %xmm3, %xmm4
22845; SSSE3-NEXT:    pshufb %xmm2, %xmm4
22846; SSSE3-NEXT:    psrlw $4, %xmm0
22847; SSSE3-NEXT:    pand %xmm1, %xmm0
22848; SSSE3-NEXT:    pshufb %xmm0, %xmm3
22849; SSSE3-NEXT:    paddb %xmm4, %xmm3
22850; SSSE3-NEXT:    pxor %xmm0, %xmm0
22851; SSSE3-NEXT:    psadbw %xmm3, %xmm0
22852; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
22853; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22854; SSSE3-NEXT:    movdqa %xmm0, %xmm2
22855; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
22856; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22857; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
22858; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22859; SSSE3-NEXT:    pand %xmm3, %xmm1
22860; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22861; SSSE3-NEXT:    por %xmm1, %xmm0
22862; SSSE3-NEXT:    retq
22863;
22864; SSE41-LABEL: ugt_17_v2i64:
22865; SSE41:       # %bb.0:
22866; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22867; SSE41-NEXT:    movdqa %xmm0, %xmm2
22868; SSE41-NEXT:    pand %xmm1, %xmm2
22869; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22870; SSE41-NEXT:    movdqa %xmm3, %xmm4
22871; SSE41-NEXT:    pshufb %xmm2, %xmm4
22872; SSE41-NEXT:    psrlw $4, %xmm0
22873; SSE41-NEXT:    pand %xmm1, %xmm0
22874; SSE41-NEXT:    pshufb %xmm0, %xmm3
22875; SSE41-NEXT:    paddb %xmm4, %xmm3
22876; SSE41-NEXT:    pxor %xmm0, %xmm0
22877; SSE41-NEXT:    psadbw %xmm3, %xmm0
22878; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
22879; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483665,2147483665]
22880; SSE41-NEXT:    movdqa %xmm0, %xmm2
22881; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
22882; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22883; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
22884; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22885; SSE41-NEXT:    pand %xmm3, %xmm1
22886; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22887; SSE41-NEXT:    por %xmm1, %xmm0
22888; SSE41-NEXT:    retq
22889;
22890; AVX1-LABEL: ugt_17_v2i64:
22891; AVX1:       # %bb.0:
22892; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22893; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
22894; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22895; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22896; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
22897; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
22898; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22899; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22900; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22901; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22902; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22903; AVX1-NEXT:    retq
22904;
22905; AVX2-LABEL: ugt_17_v2i64:
22906; AVX2:       # %bb.0:
22907; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22908; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
22909; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22910; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
22911; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
22912; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
22913; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
22914; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
22915; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22916; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22917; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22918; AVX2-NEXT:    retq
22919;
22920; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64:
22921; AVX512VPOPCNTDQ:       # %bb.0:
22922; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22923; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
22924; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22925; AVX512VPOPCNTDQ-NEXT:    vzeroupper
22926; AVX512VPOPCNTDQ-NEXT:    retq
22927;
22928; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64:
22929; AVX512VPOPCNTDQVL:       # %bb.0:
22930; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
22931; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
22932; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22933; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22934; AVX512VPOPCNTDQVL-NEXT:    retq
22935;
22936; BITALG_NOVLX-LABEL: ugt_17_v2i64:
22937; BITALG_NOVLX:       # %bb.0:
22938; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22939; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
22940; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22941; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22942; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
22943; BITALG_NOVLX-NEXT:    vzeroupper
22944; BITALG_NOVLX-NEXT:    retq
22945;
22946; BITALG-LABEL: ugt_17_v2i64:
22947; BITALG:       # %bb.0:
22948; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
22949; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
22950; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22951; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
22952; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
22953; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22954; BITALG-NEXT:    retq
22955  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22956  %3 = icmp ugt <2 x i64> %2, <i64 17, i64 17>
22957  %4 = sext <2 x i1> %3 to <2 x i64>
22958  ret <2 x i64> %4
22959}
22960
22961define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
22962; SSE2-LABEL: ult_18_v2i64:
22963; SSE2:       # %bb.0:
22964; SSE2-NEXT:    movdqa %xmm0, %xmm1
22965; SSE2-NEXT:    psrlw $1, %xmm1
22966; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22967; SSE2-NEXT:    psubb %xmm1, %xmm0
22968; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22969; SSE2-NEXT:    movdqa %xmm0, %xmm2
22970; SSE2-NEXT:    pand %xmm1, %xmm2
22971; SSE2-NEXT:    psrlw $2, %xmm0
22972; SSE2-NEXT:    pand %xmm1, %xmm0
22973; SSE2-NEXT:    paddb %xmm2, %xmm0
22974; SSE2-NEXT:    movdqa %xmm0, %xmm1
22975; SSE2-NEXT:    psrlw $4, %xmm1
22976; SSE2-NEXT:    paddb %xmm0, %xmm1
22977; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
22978; SSE2-NEXT:    pxor %xmm0, %xmm0
22979; SSE2-NEXT:    psadbw %xmm1, %xmm0
22980; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
22981; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
22982; SSE2-NEXT:    movdqa %xmm1, %xmm2
22983; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
22984; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22985; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
22986; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
22987; SSE2-NEXT:    pand %xmm3, %xmm1
22988; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
22989; SSE2-NEXT:    por %xmm1, %xmm0
22990; SSE2-NEXT:    retq
22991;
22992; SSE3-LABEL: ult_18_v2i64:
22993; SSE3:       # %bb.0:
22994; SSE3-NEXT:    movdqa %xmm0, %xmm1
22995; SSE3-NEXT:    psrlw $1, %xmm1
22996; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
22997; SSE3-NEXT:    psubb %xmm1, %xmm0
22998; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22999; SSE3-NEXT:    movdqa %xmm0, %xmm2
23000; SSE3-NEXT:    pand %xmm1, %xmm2
23001; SSE3-NEXT:    psrlw $2, %xmm0
23002; SSE3-NEXT:    pand %xmm1, %xmm0
23003; SSE3-NEXT:    paddb %xmm2, %xmm0
23004; SSE3-NEXT:    movdqa %xmm0, %xmm1
23005; SSE3-NEXT:    psrlw $4, %xmm1
23006; SSE3-NEXT:    paddb %xmm0, %xmm1
23007; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23008; SSE3-NEXT:    pxor %xmm0, %xmm0
23009; SSE3-NEXT:    psadbw %xmm1, %xmm0
23010; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
23011; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23012; SSE3-NEXT:    movdqa %xmm1, %xmm2
23013; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
23014; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23015; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23016; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23017; SSE3-NEXT:    pand %xmm3, %xmm1
23018; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23019; SSE3-NEXT:    por %xmm1, %xmm0
23020; SSE3-NEXT:    retq
23021;
23022; SSSE3-LABEL: ult_18_v2i64:
23023; SSSE3:       # %bb.0:
23024; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23025; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23026; SSSE3-NEXT:    pand %xmm1, %xmm2
23027; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23028; SSSE3-NEXT:    movdqa %xmm3, %xmm4
23029; SSSE3-NEXT:    pshufb %xmm2, %xmm4
23030; SSSE3-NEXT:    psrlw $4, %xmm0
23031; SSSE3-NEXT:    pand %xmm1, %xmm0
23032; SSSE3-NEXT:    pshufb %xmm0, %xmm3
23033; SSSE3-NEXT:    paddb %xmm4, %xmm3
23034; SSSE3-NEXT:    pxor %xmm0, %xmm0
23035; SSSE3-NEXT:    psadbw %xmm3, %xmm0
23036; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
23037; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23038; SSSE3-NEXT:    movdqa %xmm1, %xmm2
23039; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
23040; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23041; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23042; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23043; SSSE3-NEXT:    pand %xmm3, %xmm1
23044; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23045; SSSE3-NEXT:    por %xmm1, %xmm0
23046; SSSE3-NEXT:    retq
23047;
23048; SSE41-LABEL: ult_18_v2i64:
23049; SSE41:       # %bb.0:
23050; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23051; SSE41-NEXT:    movdqa %xmm0, %xmm2
23052; SSE41-NEXT:    pand %xmm1, %xmm2
23053; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23054; SSE41-NEXT:    movdqa %xmm3, %xmm4
23055; SSE41-NEXT:    pshufb %xmm2, %xmm4
23056; SSE41-NEXT:    psrlw $4, %xmm0
23057; SSE41-NEXT:    pand %xmm1, %xmm0
23058; SSE41-NEXT:    pshufb %xmm0, %xmm3
23059; SSE41-NEXT:    paddb %xmm4, %xmm3
23060; SSE41-NEXT:    pxor %xmm0, %xmm0
23061; SSE41-NEXT:    psadbw %xmm3, %xmm0
23062; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
23063; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23064; SSE41-NEXT:    movdqa %xmm1, %xmm2
23065; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
23066; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23067; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
23068; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23069; SSE41-NEXT:    pand %xmm3, %xmm1
23070; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23071; SSE41-NEXT:    por %xmm1, %xmm0
23072; SSE41-NEXT:    retq
23073;
23074; AVX1-LABEL: ult_18_v2i64:
23075; AVX1:       # %bb.0:
23076; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23077; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
23078; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23079; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23080; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
23081; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
23082; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23083; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23084; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23085; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23086; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18,18]
23087; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23088; AVX1-NEXT:    retq
23089;
23090; AVX2-LABEL: ult_18_v2i64:
23091; AVX2:       # %bb.0:
23092; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23093; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
23094; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23095; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23096; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
23097; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
23098; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23099; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23100; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23101; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23102; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [18,18]
23103; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23104; AVX2-NEXT:    retq
23105;
23106; AVX512VPOPCNTDQ-LABEL: ult_18_v2i64:
23107; AVX512VPOPCNTDQ:       # %bb.0:
23108; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23109; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
23110; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [18,18]
23111; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23112; AVX512VPOPCNTDQ-NEXT:    vzeroupper
23113; AVX512VPOPCNTDQ-NEXT:    retq
23114;
23115; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64:
23116; AVX512VPOPCNTDQVL:       # %bb.0:
23117; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
23118; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
23119; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23120; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23121; AVX512VPOPCNTDQVL-NEXT:    retq
23122;
23123; BITALG_NOVLX-LABEL: ult_18_v2i64:
23124; BITALG_NOVLX:       # %bb.0:
23125; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23126; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
23127; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23128; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23129; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18,18]
23130; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23131; BITALG_NOVLX-NEXT:    vzeroupper
23132; BITALG_NOVLX-NEXT:    retq
23133;
23134; BITALG-LABEL: ult_18_v2i64:
23135; BITALG:       # %bb.0:
23136; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
23137; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23138; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23139; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
23140; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23141; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23142; BITALG-NEXT:    retq
23143  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23144  %3 = icmp ult <2 x i64> %2, <i64 18, i64 18>
23145  %4 = sext <2 x i1> %3 to <2 x i64>
23146  ret <2 x i64> %4
23147}
23148
23149define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
23150; SSE2-LABEL: ugt_18_v2i64:
23151; SSE2:       # %bb.0:
23152; SSE2-NEXT:    movdqa %xmm0, %xmm1
23153; SSE2-NEXT:    psrlw $1, %xmm1
23154; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23155; SSE2-NEXT:    psubb %xmm1, %xmm0
23156; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23157; SSE2-NEXT:    movdqa %xmm0, %xmm2
23158; SSE2-NEXT:    pand %xmm1, %xmm2
23159; SSE2-NEXT:    psrlw $2, %xmm0
23160; SSE2-NEXT:    pand %xmm1, %xmm0
23161; SSE2-NEXT:    paddb %xmm2, %xmm0
23162; SSE2-NEXT:    movdqa %xmm0, %xmm1
23163; SSE2-NEXT:    psrlw $4, %xmm1
23164; SSE2-NEXT:    paddb %xmm0, %xmm1
23165; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23166; SSE2-NEXT:    pxor %xmm0, %xmm0
23167; SSE2-NEXT:    psadbw %xmm1, %xmm0
23168; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
23169; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23170; SSE2-NEXT:    movdqa %xmm0, %xmm2
23171; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
23172; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23173; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
23174; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23175; SSE2-NEXT:    pand %xmm3, %xmm1
23176; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23177; SSE2-NEXT:    por %xmm1, %xmm0
23178; SSE2-NEXT:    retq
23179;
23180; SSE3-LABEL: ugt_18_v2i64:
23181; SSE3:       # %bb.0:
23182; SSE3-NEXT:    movdqa %xmm0, %xmm1
23183; SSE3-NEXT:    psrlw $1, %xmm1
23184; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23185; SSE3-NEXT:    psubb %xmm1, %xmm0
23186; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23187; SSE3-NEXT:    movdqa %xmm0, %xmm2
23188; SSE3-NEXT:    pand %xmm1, %xmm2
23189; SSE3-NEXT:    psrlw $2, %xmm0
23190; SSE3-NEXT:    pand %xmm1, %xmm0
23191; SSE3-NEXT:    paddb %xmm2, %xmm0
23192; SSE3-NEXT:    movdqa %xmm0, %xmm1
23193; SSE3-NEXT:    psrlw $4, %xmm1
23194; SSE3-NEXT:    paddb %xmm0, %xmm1
23195; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23196; SSE3-NEXT:    pxor %xmm0, %xmm0
23197; SSE3-NEXT:    psadbw %xmm1, %xmm0
23198; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
23199; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23200; SSE3-NEXT:    movdqa %xmm0, %xmm2
23201; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
23202; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23203; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23204; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23205; SSE3-NEXT:    pand %xmm3, %xmm1
23206; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23207; SSE3-NEXT:    por %xmm1, %xmm0
23208; SSE3-NEXT:    retq
23209;
23210; SSSE3-LABEL: ugt_18_v2i64:
23211; SSSE3:       # %bb.0:
23212; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23213; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23214; SSSE3-NEXT:    pand %xmm1, %xmm2
23215; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23216; SSSE3-NEXT:    movdqa %xmm3, %xmm4
23217; SSSE3-NEXT:    pshufb %xmm2, %xmm4
23218; SSSE3-NEXT:    psrlw $4, %xmm0
23219; SSSE3-NEXT:    pand %xmm1, %xmm0
23220; SSSE3-NEXT:    pshufb %xmm0, %xmm3
23221; SSSE3-NEXT:    paddb %xmm4, %xmm3
23222; SSSE3-NEXT:    pxor %xmm0, %xmm0
23223; SSSE3-NEXT:    psadbw %xmm3, %xmm0
23224; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
23225; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23226; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23227; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
23228; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23229; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23230; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23231; SSSE3-NEXT:    pand %xmm3, %xmm1
23232; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23233; SSSE3-NEXT:    por %xmm1, %xmm0
23234; SSSE3-NEXT:    retq
23235;
23236; SSE41-LABEL: ugt_18_v2i64:
23237; SSE41:       # %bb.0:
23238; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23239; SSE41-NEXT:    movdqa %xmm0, %xmm2
23240; SSE41-NEXT:    pand %xmm1, %xmm2
23241; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23242; SSE41-NEXT:    movdqa %xmm3, %xmm4
23243; SSE41-NEXT:    pshufb %xmm2, %xmm4
23244; SSE41-NEXT:    psrlw $4, %xmm0
23245; SSE41-NEXT:    pand %xmm1, %xmm0
23246; SSE41-NEXT:    pshufb %xmm0, %xmm3
23247; SSE41-NEXT:    paddb %xmm4, %xmm3
23248; SSE41-NEXT:    pxor %xmm0, %xmm0
23249; SSE41-NEXT:    psadbw %xmm3, %xmm0
23250; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
23251; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483666,2147483666]
23252; SSE41-NEXT:    movdqa %xmm0, %xmm2
23253; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
23254; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23255; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
23256; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23257; SSE41-NEXT:    pand %xmm3, %xmm1
23258; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23259; SSE41-NEXT:    por %xmm1, %xmm0
23260; SSE41-NEXT:    retq
23261;
23262; AVX1-LABEL: ugt_18_v2i64:
23263; AVX1:       # %bb.0:
23264; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23265; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
23266; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23267; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23268; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
23269; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
23270; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23271; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23272; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23273; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23274; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23275; AVX1-NEXT:    retq
23276;
23277; AVX2-LABEL: ugt_18_v2i64:
23278; AVX2:       # %bb.0:
23279; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23280; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
23281; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23282; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23283; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
23284; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
23285; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23286; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23287; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23288; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23289; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23290; AVX2-NEXT:    retq
23291;
23292; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64:
23293; AVX512VPOPCNTDQ:       # %bb.0:
23294; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23295; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
23296; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23297; AVX512VPOPCNTDQ-NEXT:    vzeroupper
23298; AVX512VPOPCNTDQ-NEXT:    retq
23299;
23300; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64:
23301; AVX512VPOPCNTDQVL:       # %bb.0:
23302; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
23303; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
23304; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23305; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23306; AVX512VPOPCNTDQVL-NEXT:    retq
23307;
23308; BITALG_NOVLX-LABEL: ugt_18_v2i64:
23309; BITALG_NOVLX:       # %bb.0:
23310; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23311; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
23312; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23313; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23314; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23315; BITALG_NOVLX-NEXT:    vzeroupper
23316; BITALG_NOVLX-NEXT:    retq
23317;
23318; BITALG-LABEL: ugt_18_v2i64:
23319; BITALG:       # %bb.0:
23320; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
23321; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23322; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23323; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
23324; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23325; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23326; BITALG-NEXT:    retq
23327  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23328  %3 = icmp ugt <2 x i64> %2, <i64 18, i64 18>
23329  %4 = sext <2 x i1> %3 to <2 x i64>
23330  ret <2 x i64> %4
23331}
23332
23333define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
23334; SSE2-LABEL: ult_19_v2i64:
23335; SSE2:       # %bb.0:
23336; SSE2-NEXT:    movdqa %xmm0, %xmm1
23337; SSE2-NEXT:    psrlw $1, %xmm1
23338; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23339; SSE2-NEXT:    psubb %xmm1, %xmm0
23340; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23341; SSE2-NEXT:    movdqa %xmm0, %xmm2
23342; SSE2-NEXT:    pand %xmm1, %xmm2
23343; SSE2-NEXT:    psrlw $2, %xmm0
23344; SSE2-NEXT:    pand %xmm1, %xmm0
23345; SSE2-NEXT:    paddb %xmm2, %xmm0
23346; SSE2-NEXT:    movdqa %xmm0, %xmm1
23347; SSE2-NEXT:    psrlw $4, %xmm1
23348; SSE2-NEXT:    paddb %xmm0, %xmm1
23349; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23350; SSE2-NEXT:    pxor %xmm0, %xmm0
23351; SSE2-NEXT:    psadbw %xmm1, %xmm0
23352; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
23353; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23354; SSE2-NEXT:    movdqa %xmm1, %xmm2
23355; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
23356; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23357; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
23358; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23359; SSE2-NEXT:    pand %xmm3, %xmm1
23360; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23361; SSE2-NEXT:    por %xmm1, %xmm0
23362; SSE2-NEXT:    retq
23363;
23364; SSE3-LABEL: ult_19_v2i64:
23365; SSE3:       # %bb.0:
23366; SSE3-NEXT:    movdqa %xmm0, %xmm1
23367; SSE3-NEXT:    psrlw $1, %xmm1
23368; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23369; SSE3-NEXT:    psubb %xmm1, %xmm0
23370; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23371; SSE3-NEXT:    movdqa %xmm0, %xmm2
23372; SSE3-NEXT:    pand %xmm1, %xmm2
23373; SSE3-NEXT:    psrlw $2, %xmm0
23374; SSE3-NEXT:    pand %xmm1, %xmm0
23375; SSE3-NEXT:    paddb %xmm2, %xmm0
23376; SSE3-NEXT:    movdqa %xmm0, %xmm1
23377; SSE3-NEXT:    psrlw $4, %xmm1
23378; SSE3-NEXT:    paddb %xmm0, %xmm1
23379; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23380; SSE3-NEXT:    pxor %xmm0, %xmm0
23381; SSE3-NEXT:    psadbw %xmm1, %xmm0
23382; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
23383; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23384; SSE3-NEXT:    movdqa %xmm1, %xmm2
23385; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
23386; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23387; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23388; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23389; SSE3-NEXT:    pand %xmm3, %xmm1
23390; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23391; SSE3-NEXT:    por %xmm1, %xmm0
23392; SSE3-NEXT:    retq
23393;
23394; SSSE3-LABEL: ult_19_v2i64:
23395; SSSE3:       # %bb.0:
23396; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23397; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23398; SSSE3-NEXT:    pand %xmm1, %xmm2
23399; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23400; SSSE3-NEXT:    movdqa %xmm3, %xmm4
23401; SSSE3-NEXT:    pshufb %xmm2, %xmm4
23402; SSSE3-NEXT:    psrlw $4, %xmm0
23403; SSSE3-NEXT:    pand %xmm1, %xmm0
23404; SSSE3-NEXT:    pshufb %xmm0, %xmm3
23405; SSSE3-NEXT:    paddb %xmm4, %xmm3
23406; SSSE3-NEXT:    pxor %xmm0, %xmm0
23407; SSSE3-NEXT:    psadbw %xmm3, %xmm0
23408; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
23409; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23410; SSSE3-NEXT:    movdqa %xmm1, %xmm2
23411; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
23412; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23413; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23414; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23415; SSSE3-NEXT:    pand %xmm3, %xmm1
23416; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23417; SSSE3-NEXT:    por %xmm1, %xmm0
23418; SSSE3-NEXT:    retq
23419;
23420; SSE41-LABEL: ult_19_v2i64:
23421; SSE41:       # %bb.0:
23422; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23423; SSE41-NEXT:    movdqa %xmm0, %xmm2
23424; SSE41-NEXT:    pand %xmm1, %xmm2
23425; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23426; SSE41-NEXT:    movdqa %xmm3, %xmm4
23427; SSE41-NEXT:    pshufb %xmm2, %xmm4
23428; SSE41-NEXT:    psrlw $4, %xmm0
23429; SSE41-NEXT:    pand %xmm1, %xmm0
23430; SSE41-NEXT:    pshufb %xmm0, %xmm3
23431; SSE41-NEXT:    paddb %xmm4, %xmm3
23432; SSE41-NEXT:    pxor %xmm0, %xmm0
23433; SSE41-NEXT:    psadbw %xmm3, %xmm0
23434; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
23435; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23436; SSE41-NEXT:    movdqa %xmm1, %xmm2
23437; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
23438; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23439; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
23440; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23441; SSE41-NEXT:    pand %xmm3, %xmm1
23442; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23443; SSE41-NEXT:    por %xmm1, %xmm0
23444; SSE41-NEXT:    retq
23445;
23446; AVX1-LABEL: ult_19_v2i64:
23447; AVX1:       # %bb.0:
23448; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23449; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
23450; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23451; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23452; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
23453; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
23454; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23455; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23456; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23457; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23458; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [19,19]
23459; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23460; AVX1-NEXT:    retq
23461;
23462; AVX2-LABEL: ult_19_v2i64:
23463; AVX2:       # %bb.0:
23464; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23465; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
23466; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23467; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23468; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
23469; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
23470; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23471; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23472; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23473; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23474; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [19,19]
23475; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23476; AVX2-NEXT:    retq
23477;
23478; AVX512VPOPCNTDQ-LABEL: ult_19_v2i64:
23479; AVX512VPOPCNTDQ:       # %bb.0:
23480; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23481; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
23482; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [19,19]
23483; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23484; AVX512VPOPCNTDQ-NEXT:    vzeroupper
23485; AVX512VPOPCNTDQ-NEXT:    retq
23486;
23487; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64:
23488; AVX512VPOPCNTDQVL:       # %bb.0:
23489; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
23490; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
23491; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23492; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23493; AVX512VPOPCNTDQVL-NEXT:    retq
23494;
23495; BITALG_NOVLX-LABEL: ult_19_v2i64:
23496; BITALG_NOVLX:       # %bb.0:
23497; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23498; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
23499; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23500; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23501; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [19,19]
23502; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23503; BITALG_NOVLX-NEXT:    vzeroupper
23504; BITALG_NOVLX-NEXT:    retq
23505;
23506; BITALG-LABEL: ult_19_v2i64:
23507; BITALG:       # %bb.0:
23508; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
23509; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23510; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23511; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
23512; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23513; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23514; BITALG-NEXT:    retq
23515  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23516  %3 = icmp ult <2 x i64> %2, <i64 19, i64 19>
23517  %4 = sext <2 x i1> %3 to <2 x i64>
23518  ret <2 x i64> %4
23519}
23520
23521define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
23522; SSE2-LABEL: ugt_19_v2i64:
23523; SSE2:       # %bb.0:
23524; SSE2-NEXT:    movdqa %xmm0, %xmm1
23525; SSE2-NEXT:    psrlw $1, %xmm1
23526; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23527; SSE2-NEXT:    psubb %xmm1, %xmm0
23528; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23529; SSE2-NEXT:    movdqa %xmm0, %xmm2
23530; SSE2-NEXT:    pand %xmm1, %xmm2
23531; SSE2-NEXT:    psrlw $2, %xmm0
23532; SSE2-NEXT:    pand %xmm1, %xmm0
23533; SSE2-NEXT:    paddb %xmm2, %xmm0
23534; SSE2-NEXT:    movdqa %xmm0, %xmm1
23535; SSE2-NEXT:    psrlw $4, %xmm1
23536; SSE2-NEXT:    paddb %xmm0, %xmm1
23537; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23538; SSE2-NEXT:    pxor %xmm0, %xmm0
23539; SSE2-NEXT:    psadbw %xmm1, %xmm0
23540; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
23541; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23542; SSE2-NEXT:    movdqa %xmm0, %xmm2
23543; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
23544; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23545; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
23546; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23547; SSE2-NEXT:    pand %xmm3, %xmm1
23548; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23549; SSE2-NEXT:    por %xmm1, %xmm0
23550; SSE2-NEXT:    retq
23551;
23552; SSE3-LABEL: ugt_19_v2i64:
23553; SSE3:       # %bb.0:
23554; SSE3-NEXT:    movdqa %xmm0, %xmm1
23555; SSE3-NEXT:    psrlw $1, %xmm1
23556; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23557; SSE3-NEXT:    psubb %xmm1, %xmm0
23558; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23559; SSE3-NEXT:    movdqa %xmm0, %xmm2
23560; SSE3-NEXT:    pand %xmm1, %xmm2
23561; SSE3-NEXT:    psrlw $2, %xmm0
23562; SSE3-NEXT:    pand %xmm1, %xmm0
23563; SSE3-NEXT:    paddb %xmm2, %xmm0
23564; SSE3-NEXT:    movdqa %xmm0, %xmm1
23565; SSE3-NEXT:    psrlw $4, %xmm1
23566; SSE3-NEXT:    paddb %xmm0, %xmm1
23567; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23568; SSE3-NEXT:    pxor %xmm0, %xmm0
23569; SSE3-NEXT:    psadbw %xmm1, %xmm0
23570; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
23571; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23572; SSE3-NEXT:    movdqa %xmm0, %xmm2
23573; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
23574; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23575; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23576; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23577; SSE3-NEXT:    pand %xmm3, %xmm1
23578; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23579; SSE3-NEXT:    por %xmm1, %xmm0
23580; SSE3-NEXT:    retq
23581;
23582; SSSE3-LABEL: ugt_19_v2i64:
23583; SSSE3:       # %bb.0:
23584; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23585; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23586; SSSE3-NEXT:    pand %xmm1, %xmm2
23587; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23588; SSSE3-NEXT:    movdqa %xmm3, %xmm4
23589; SSSE3-NEXT:    pshufb %xmm2, %xmm4
23590; SSSE3-NEXT:    psrlw $4, %xmm0
23591; SSSE3-NEXT:    pand %xmm1, %xmm0
23592; SSSE3-NEXT:    pshufb %xmm0, %xmm3
23593; SSSE3-NEXT:    paddb %xmm4, %xmm3
23594; SSSE3-NEXT:    pxor %xmm0, %xmm0
23595; SSSE3-NEXT:    psadbw %xmm3, %xmm0
23596; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
23597; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23598; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23599; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
23600; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23601; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23602; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23603; SSSE3-NEXT:    pand %xmm3, %xmm1
23604; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23605; SSSE3-NEXT:    por %xmm1, %xmm0
23606; SSSE3-NEXT:    retq
23607;
23608; SSE41-LABEL: ugt_19_v2i64:
23609; SSE41:       # %bb.0:
23610; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23611; SSE41-NEXT:    movdqa %xmm0, %xmm2
23612; SSE41-NEXT:    pand %xmm1, %xmm2
23613; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23614; SSE41-NEXT:    movdqa %xmm3, %xmm4
23615; SSE41-NEXT:    pshufb %xmm2, %xmm4
23616; SSE41-NEXT:    psrlw $4, %xmm0
23617; SSE41-NEXT:    pand %xmm1, %xmm0
23618; SSE41-NEXT:    pshufb %xmm0, %xmm3
23619; SSE41-NEXT:    paddb %xmm4, %xmm3
23620; SSE41-NEXT:    pxor %xmm0, %xmm0
23621; SSE41-NEXT:    psadbw %xmm3, %xmm0
23622; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
23623; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483667,2147483667]
23624; SSE41-NEXT:    movdqa %xmm0, %xmm2
23625; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
23626; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23627; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
23628; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23629; SSE41-NEXT:    pand %xmm3, %xmm1
23630; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23631; SSE41-NEXT:    por %xmm1, %xmm0
23632; SSE41-NEXT:    retq
23633;
23634; AVX1-LABEL: ugt_19_v2i64:
23635; AVX1:       # %bb.0:
23636; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23637; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
23638; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23639; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23640; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
23641; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
23642; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23643; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23644; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23645; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23646; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23647; AVX1-NEXT:    retq
23648;
23649; AVX2-LABEL: ugt_19_v2i64:
23650; AVX2:       # %bb.0:
23651; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23652; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
23653; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23654; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23655; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
23656; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
23657; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23658; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23659; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23660; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23661; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23662; AVX2-NEXT:    retq
23663;
23664; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64:
23665; AVX512VPOPCNTDQ:       # %bb.0:
23666; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23667; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
23668; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23669; AVX512VPOPCNTDQ-NEXT:    vzeroupper
23670; AVX512VPOPCNTDQ-NEXT:    retq
23671;
23672; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64:
23673; AVX512VPOPCNTDQVL:       # %bb.0:
23674; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
23675; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
23676; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23677; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23678; AVX512VPOPCNTDQVL-NEXT:    retq
23679;
23680; BITALG_NOVLX-LABEL: ugt_19_v2i64:
23681; BITALG_NOVLX:       # %bb.0:
23682; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23683; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
23684; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23685; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23686; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
23687; BITALG_NOVLX-NEXT:    vzeroupper
23688; BITALG_NOVLX-NEXT:    retq
23689;
23690; BITALG-LABEL: ugt_19_v2i64:
23691; BITALG:       # %bb.0:
23692; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
23693; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23694; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23695; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
23696; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23697; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23698; BITALG-NEXT:    retq
23699  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23700  %3 = icmp ugt <2 x i64> %2, <i64 19, i64 19>
23701  %4 = sext <2 x i1> %3 to <2 x i64>
23702  ret <2 x i64> %4
23703}
23704
23705define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
23706; SSE2-LABEL: ult_20_v2i64:
23707; SSE2:       # %bb.0:
23708; SSE2-NEXT:    movdqa %xmm0, %xmm1
23709; SSE2-NEXT:    psrlw $1, %xmm1
23710; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23711; SSE2-NEXT:    psubb %xmm1, %xmm0
23712; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23713; SSE2-NEXT:    movdqa %xmm0, %xmm2
23714; SSE2-NEXT:    pand %xmm1, %xmm2
23715; SSE2-NEXT:    psrlw $2, %xmm0
23716; SSE2-NEXT:    pand %xmm1, %xmm0
23717; SSE2-NEXT:    paddb %xmm2, %xmm0
23718; SSE2-NEXT:    movdqa %xmm0, %xmm1
23719; SSE2-NEXT:    psrlw $4, %xmm1
23720; SSE2-NEXT:    paddb %xmm0, %xmm1
23721; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23722; SSE2-NEXT:    pxor %xmm0, %xmm0
23723; SSE2-NEXT:    psadbw %xmm1, %xmm0
23724; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
23725; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23726; SSE2-NEXT:    movdqa %xmm1, %xmm2
23727; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
23728; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23729; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
23730; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23731; SSE2-NEXT:    pand %xmm3, %xmm1
23732; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23733; SSE2-NEXT:    por %xmm1, %xmm0
23734; SSE2-NEXT:    retq
23735;
23736; SSE3-LABEL: ult_20_v2i64:
23737; SSE3:       # %bb.0:
23738; SSE3-NEXT:    movdqa %xmm0, %xmm1
23739; SSE3-NEXT:    psrlw $1, %xmm1
23740; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23741; SSE3-NEXT:    psubb %xmm1, %xmm0
23742; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23743; SSE3-NEXT:    movdqa %xmm0, %xmm2
23744; SSE3-NEXT:    pand %xmm1, %xmm2
23745; SSE3-NEXT:    psrlw $2, %xmm0
23746; SSE3-NEXT:    pand %xmm1, %xmm0
23747; SSE3-NEXT:    paddb %xmm2, %xmm0
23748; SSE3-NEXT:    movdqa %xmm0, %xmm1
23749; SSE3-NEXT:    psrlw $4, %xmm1
23750; SSE3-NEXT:    paddb %xmm0, %xmm1
23751; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23752; SSE3-NEXT:    pxor %xmm0, %xmm0
23753; SSE3-NEXT:    psadbw %xmm1, %xmm0
23754; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
23755; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23756; SSE3-NEXT:    movdqa %xmm1, %xmm2
23757; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
23758; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23759; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23760; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23761; SSE3-NEXT:    pand %xmm3, %xmm1
23762; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23763; SSE3-NEXT:    por %xmm1, %xmm0
23764; SSE3-NEXT:    retq
23765;
23766; SSSE3-LABEL: ult_20_v2i64:
23767; SSSE3:       # %bb.0:
23768; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23769; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23770; SSSE3-NEXT:    pand %xmm1, %xmm2
23771; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23772; SSSE3-NEXT:    movdqa %xmm3, %xmm4
23773; SSSE3-NEXT:    pshufb %xmm2, %xmm4
23774; SSSE3-NEXT:    psrlw $4, %xmm0
23775; SSSE3-NEXT:    pand %xmm1, %xmm0
23776; SSSE3-NEXT:    pshufb %xmm0, %xmm3
23777; SSSE3-NEXT:    paddb %xmm4, %xmm3
23778; SSSE3-NEXT:    pxor %xmm0, %xmm0
23779; SSSE3-NEXT:    psadbw %xmm3, %xmm0
23780; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
23781; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23782; SSSE3-NEXT:    movdqa %xmm1, %xmm2
23783; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
23784; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23785; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23786; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23787; SSSE3-NEXT:    pand %xmm3, %xmm1
23788; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23789; SSSE3-NEXT:    por %xmm1, %xmm0
23790; SSSE3-NEXT:    retq
23791;
23792; SSE41-LABEL: ult_20_v2i64:
23793; SSE41:       # %bb.0:
23794; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23795; SSE41-NEXT:    movdqa %xmm0, %xmm2
23796; SSE41-NEXT:    pand %xmm1, %xmm2
23797; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23798; SSE41-NEXT:    movdqa %xmm3, %xmm4
23799; SSE41-NEXT:    pshufb %xmm2, %xmm4
23800; SSE41-NEXT:    psrlw $4, %xmm0
23801; SSE41-NEXT:    pand %xmm1, %xmm0
23802; SSE41-NEXT:    pshufb %xmm0, %xmm3
23803; SSE41-NEXT:    paddb %xmm4, %xmm3
23804; SSE41-NEXT:    pxor %xmm0, %xmm0
23805; SSE41-NEXT:    psadbw %xmm3, %xmm0
23806; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
23807; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23808; SSE41-NEXT:    movdqa %xmm1, %xmm2
23809; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
23810; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23811; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
23812; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23813; SSE41-NEXT:    pand %xmm3, %xmm1
23814; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23815; SSE41-NEXT:    por %xmm1, %xmm0
23816; SSE41-NEXT:    retq
23817;
23818; AVX1-LABEL: ult_20_v2i64:
23819; AVX1:       # %bb.0:
23820; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23821; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
23822; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23823; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23824; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
23825; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
23826; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23827; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23828; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23829; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23830; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [20,20]
23831; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23832; AVX1-NEXT:    retq
23833;
23834; AVX2-LABEL: ult_20_v2i64:
23835; AVX2:       # %bb.0:
23836; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23837; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
23838; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23839; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
23840; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
23841; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
23842; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
23843; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
23844; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23845; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23846; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [20,20]
23847; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23848; AVX2-NEXT:    retq
23849;
23850; AVX512VPOPCNTDQ-LABEL: ult_20_v2i64:
23851; AVX512VPOPCNTDQ:       # %bb.0:
23852; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23853; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
23854; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [20,20]
23855; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23856; AVX512VPOPCNTDQ-NEXT:    vzeroupper
23857; AVX512VPOPCNTDQ-NEXT:    retq
23858;
23859; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64:
23860; AVX512VPOPCNTDQVL:       # %bb.0:
23861; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
23862; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
23863; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23864; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23865; AVX512VPOPCNTDQVL-NEXT:    retq
23866;
23867; BITALG_NOVLX-LABEL: ult_20_v2i64:
23868; BITALG_NOVLX:       # %bb.0:
23869; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23870; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
23871; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23872; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23873; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [20,20]
23874; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
23875; BITALG_NOVLX-NEXT:    vzeroupper
23876; BITALG_NOVLX-NEXT:    retq
23877;
23878; BITALG-LABEL: ult_20_v2i64:
23879; BITALG:       # %bb.0:
23880; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
23881; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23882; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
23883; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
23884; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
23885; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23886; BITALG-NEXT:    retq
23887  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23888  %3 = icmp ult <2 x i64> %2, <i64 20, i64 20>
23889  %4 = sext <2 x i1> %3 to <2 x i64>
23890  ret <2 x i64> %4
23891}
23892
23893define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
23894; SSE2-LABEL: ugt_20_v2i64:
23895; SSE2:       # %bb.0:
23896; SSE2-NEXT:    movdqa %xmm0, %xmm1
23897; SSE2-NEXT:    psrlw $1, %xmm1
23898; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23899; SSE2-NEXT:    psubb %xmm1, %xmm0
23900; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23901; SSE2-NEXT:    movdqa %xmm0, %xmm2
23902; SSE2-NEXT:    pand %xmm1, %xmm2
23903; SSE2-NEXT:    psrlw $2, %xmm0
23904; SSE2-NEXT:    pand %xmm1, %xmm0
23905; SSE2-NEXT:    paddb %xmm2, %xmm0
23906; SSE2-NEXT:    movdqa %xmm0, %xmm1
23907; SSE2-NEXT:    psrlw $4, %xmm1
23908; SSE2-NEXT:    paddb %xmm0, %xmm1
23909; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
23910; SSE2-NEXT:    pxor %xmm0, %xmm0
23911; SSE2-NEXT:    psadbw %xmm1, %xmm0
23912; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
23913; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23914; SSE2-NEXT:    movdqa %xmm0, %xmm2
23915; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
23916; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23917; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
23918; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23919; SSE2-NEXT:    pand %xmm3, %xmm1
23920; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23921; SSE2-NEXT:    por %xmm1, %xmm0
23922; SSE2-NEXT:    retq
23923;
23924; SSE3-LABEL: ugt_20_v2i64:
23925; SSE3:       # %bb.0:
23926; SSE3-NEXT:    movdqa %xmm0, %xmm1
23927; SSE3-NEXT:    psrlw $1, %xmm1
23928; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23929; SSE3-NEXT:    psubb %xmm1, %xmm0
23930; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23931; SSE3-NEXT:    movdqa %xmm0, %xmm2
23932; SSE3-NEXT:    pand %xmm1, %xmm2
23933; SSE3-NEXT:    psrlw $2, %xmm0
23934; SSE3-NEXT:    pand %xmm1, %xmm0
23935; SSE3-NEXT:    paddb %xmm2, %xmm0
23936; SSE3-NEXT:    movdqa %xmm0, %xmm1
23937; SSE3-NEXT:    psrlw $4, %xmm1
23938; SSE3-NEXT:    paddb %xmm0, %xmm1
23939; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
23940; SSE3-NEXT:    pxor %xmm0, %xmm0
23941; SSE3-NEXT:    psadbw %xmm1, %xmm0
23942; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
23943; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23944; SSE3-NEXT:    movdqa %xmm0, %xmm2
23945; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
23946; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23947; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23948; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23949; SSE3-NEXT:    pand %xmm3, %xmm1
23950; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23951; SSE3-NEXT:    por %xmm1, %xmm0
23952; SSE3-NEXT:    retq
23953;
23954; SSSE3-LABEL: ugt_20_v2i64:
23955; SSSE3:       # %bb.0:
23956; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23957; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23958; SSSE3-NEXT:    pand %xmm1, %xmm2
23959; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23960; SSSE3-NEXT:    movdqa %xmm3, %xmm4
23961; SSSE3-NEXT:    pshufb %xmm2, %xmm4
23962; SSSE3-NEXT:    psrlw $4, %xmm0
23963; SSSE3-NEXT:    pand %xmm1, %xmm0
23964; SSSE3-NEXT:    pshufb %xmm0, %xmm3
23965; SSSE3-NEXT:    paddb %xmm4, %xmm3
23966; SSSE3-NEXT:    pxor %xmm0, %xmm0
23967; SSSE3-NEXT:    psadbw %xmm3, %xmm0
23968; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
23969; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23970; SSSE3-NEXT:    movdqa %xmm0, %xmm2
23971; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
23972; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23973; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
23974; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
23975; SSSE3-NEXT:    pand %xmm3, %xmm1
23976; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
23977; SSSE3-NEXT:    por %xmm1, %xmm0
23978; SSSE3-NEXT:    retq
23979;
23980; SSE41-LABEL: ugt_20_v2i64:
23981; SSE41:       # %bb.0:
23982; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23983; SSE41-NEXT:    movdqa %xmm0, %xmm2
23984; SSE41-NEXT:    pand %xmm1, %xmm2
23985; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23986; SSE41-NEXT:    movdqa %xmm3, %xmm4
23987; SSE41-NEXT:    pshufb %xmm2, %xmm4
23988; SSE41-NEXT:    psrlw $4, %xmm0
23989; SSE41-NEXT:    pand %xmm1, %xmm0
23990; SSE41-NEXT:    pshufb %xmm0, %xmm3
23991; SSE41-NEXT:    paddb %xmm4, %xmm3
23992; SSE41-NEXT:    pxor %xmm0, %xmm0
23993; SSE41-NEXT:    psadbw %xmm3, %xmm0
23994; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
23995; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483668,2147483668]
23996; SSE41-NEXT:    movdqa %xmm0, %xmm2
23997; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
23998; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
23999; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
24000; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24001; SSE41-NEXT:    pand %xmm3, %xmm1
24002; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24003; SSE41-NEXT:    por %xmm1, %xmm0
24004; SSE41-NEXT:    retq
24005;
24006; AVX1-LABEL: ugt_20_v2i64:
24007; AVX1:       # %bb.0:
24008; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24009; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
24010; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24011; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24012; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
24013; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
24014; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24015; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24016; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24017; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24018; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24019; AVX1-NEXT:    retq
24020;
24021; AVX2-LABEL: ugt_20_v2i64:
24022; AVX2:       # %bb.0:
24023; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24024; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
24025; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24026; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24027; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
24028; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
24029; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24030; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24031; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24032; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24033; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24034; AVX2-NEXT:    retq
24035;
24036; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64:
24037; AVX512VPOPCNTDQ:       # %bb.0:
24038; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24039; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
24040; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24041; AVX512VPOPCNTDQ-NEXT:    vzeroupper
24042; AVX512VPOPCNTDQ-NEXT:    retq
24043;
24044; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64:
24045; AVX512VPOPCNTDQVL:       # %bb.0:
24046; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
24047; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
24048; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24049; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24050; AVX512VPOPCNTDQVL-NEXT:    retq
24051;
24052; BITALG_NOVLX-LABEL: ugt_20_v2i64:
24053; BITALG_NOVLX:       # %bb.0:
24054; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24055; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
24056; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24057; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24058; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24059; BITALG_NOVLX-NEXT:    vzeroupper
24060; BITALG_NOVLX-NEXT:    retq
24061;
24062; BITALG-LABEL: ugt_20_v2i64:
24063; BITALG:       # %bb.0:
24064; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
24065; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24066; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24067; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
24068; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24069; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24070; BITALG-NEXT:    retq
24071  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24072  %3 = icmp ugt <2 x i64> %2, <i64 20, i64 20>
24073  %4 = sext <2 x i1> %3 to <2 x i64>
24074  ret <2 x i64> %4
24075}
24076
24077define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
24078; SSE2-LABEL: ult_21_v2i64:
24079; SSE2:       # %bb.0:
24080; SSE2-NEXT:    movdqa %xmm0, %xmm1
24081; SSE2-NEXT:    psrlw $1, %xmm1
24082; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24083; SSE2-NEXT:    psubb %xmm1, %xmm0
24084; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24085; SSE2-NEXT:    movdqa %xmm0, %xmm2
24086; SSE2-NEXT:    pand %xmm1, %xmm2
24087; SSE2-NEXT:    psrlw $2, %xmm0
24088; SSE2-NEXT:    pand %xmm1, %xmm0
24089; SSE2-NEXT:    paddb %xmm2, %xmm0
24090; SSE2-NEXT:    movdqa %xmm0, %xmm1
24091; SSE2-NEXT:    psrlw $4, %xmm1
24092; SSE2-NEXT:    paddb %xmm0, %xmm1
24093; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24094; SSE2-NEXT:    pxor %xmm0, %xmm0
24095; SSE2-NEXT:    psadbw %xmm1, %xmm0
24096; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
24097; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24098; SSE2-NEXT:    movdqa %xmm1, %xmm2
24099; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
24100; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24101; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
24102; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24103; SSE2-NEXT:    pand %xmm3, %xmm1
24104; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24105; SSE2-NEXT:    por %xmm1, %xmm0
24106; SSE2-NEXT:    retq
24107;
24108; SSE3-LABEL: ult_21_v2i64:
24109; SSE3:       # %bb.0:
24110; SSE3-NEXT:    movdqa %xmm0, %xmm1
24111; SSE3-NEXT:    psrlw $1, %xmm1
24112; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24113; SSE3-NEXT:    psubb %xmm1, %xmm0
24114; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24115; SSE3-NEXT:    movdqa %xmm0, %xmm2
24116; SSE3-NEXT:    pand %xmm1, %xmm2
24117; SSE3-NEXT:    psrlw $2, %xmm0
24118; SSE3-NEXT:    pand %xmm1, %xmm0
24119; SSE3-NEXT:    paddb %xmm2, %xmm0
24120; SSE3-NEXT:    movdqa %xmm0, %xmm1
24121; SSE3-NEXT:    psrlw $4, %xmm1
24122; SSE3-NEXT:    paddb %xmm0, %xmm1
24123; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24124; SSE3-NEXT:    pxor %xmm0, %xmm0
24125; SSE3-NEXT:    psadbw %xmm1, %xmm0
24126; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
24127; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24128; SSE3-NEXT:    movdqa %xmm1, %xmm2
24129; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
24130; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24131; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24132; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24133; SSE3-NEXT:    pand %xmm3, %xmm1
24134; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24135; SSE3-NEXT:    por %xmm1, %xmm0
24136; SSE3-NEXT:    retq
24137;
24138; SSSE3-LABEL: ult_21_v2i64:
24139; SSSE3:       # %bb.0:
24140; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24141; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24142; SSSE3-NEXT:    pand %xmm1, %xmm2
24143; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24144; SSSE3-NEXT:    movdqa %xmm3, %xmm4
24145; SSSE3-NEXT:    pshufb %xmm2, %xmm4
24146; SSSE3-NEXT:    psrlw $4, %xmm0
24147; SSSE3-NEXT:    pand %xmm1, %xmm0
24148; SSSE3-NEXT:    pshufb %xmm0, %xmm3
24149; SSSE3-NEXT:    paddb %xmm4, %xmm3
24150; SSSE3-NEXT:    pxor %xmm0, %xmm0
24151; SSSE3-NEXT:    psadbw %xmm3, %xmm0
24152; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
24153; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24154; SSSE3-NEXT:    movdqa %xmm1, %xmm2
24155; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
24156; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24157; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24158; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24159; SSSE3-NEXT:    pand %xmm3, %xmm1
24160; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24161; SSSE3-NEXT:    por %xmm1, %xmm0
24162; SSSE3-NEXT:    retq
24163;
24164; SSE41-LABEL: ult_21_v2i64:
24165; SSE41:       # %bb.0:
24166; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24167; SSE41-NEXT:    movdqa %xmm0, %xmm2
24168; SSE41-NEXT:    pand %xmm1, %xmm2
24169; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24170; SSE41-NEXT:    movdqa %xmm3, %xmm4
24171; SSE41-NEXT:    pshufb %xmm2, %xmm4
24172; SSE41-NEXT:    psrlw $4, %xmm0
24173; SSE41-NEXT:    pand %xmm1, %xmm0
24174; SSE41-NEXT:    pshufb %xmm0, %xmm3
24175; SSE41-NEXT:    paddb %xmm4, %xmm3
24176; SSE41-NEXT:    pxor %xmm0, %xmm0
24177; SSE41-NEXT:    psadbw %xmm3, %xmm0
24178; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
24179; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24180; SSE41-NEXT:    movdqa %xmm1, %xmm2
24181; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
24182; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24183; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
24184; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24185; SSE41-NEXT:    pand %xmm3, %xmm1
24186; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24187; SSE41-NEXT:    por %xmm1, %xmm0
24188; SSE41-NEXT:    retq
24189;
24190; AVX1-LABEL: ult_21_v2i64:
24191; AVX1:       # %bb.0:
24192; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24193; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
24194; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24195; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24196; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
24197; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
24198; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24199; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24200; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24201; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24202; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [21,21]
24203; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24204; AVX1-NEXT:    retq
24205;
24206; AVX2-LABEL: ult_21_v2i64:
24207; AVX2:       # %bb.0:
24208; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24209; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
24210; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24211; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24212; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
24213; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
24214; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24215; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24216; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24217; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24218; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [21,21]
24219; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24220; AVX2-NEXT:    retq
24221;
24222; AVX512VPOPCNTDQ-LABEL: ult_21_v2i64:
24223; AVX512VPOPCNTDQ:       # %bb.0:
24224; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24225; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
24226; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [21,21]
24227; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24228; AVX512VPOPCNTDQ-NEXT:    vzeroupper
24229; AVX512VPOPCNTDQ-NEXT:    retq
24230;
24231; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64:
24232; AVX512VPOPCNTDQVL:       # %bb.0:
24233; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
24234; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
24235; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24236; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24237; AVX512VPOPCNTDQVL-NEXT:    retq
24238;
24239; BITALG_NOVLX-LABEL: ult_21_v2i64:
24240; BITALG_NOVLX:       # %bb.0:
24241; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24242; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
24243; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24244; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24245; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [21,21]
24246; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24247; BITALG_NOVLX-NEXT:    vzeroupper
24248; BITALG_NOVLX-NEXT:    retq
24249;
24250; BITALG-LABEL: ult_21_v2i64:
24251; BITALG:       # %bb.0:
24252; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
24253; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24254; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24255; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
24256; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24257; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24258; BITALG-NEXT:    retq
24259  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24260  %3 = icmp ult <2 x i64> %2, <i64 21, i64 21>
24261  %4 = sext <2 x i1> %3 to <2 x i64>
24262  ret <2 x i64> %4
24263}
24264
24265define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
24266; SSE2-LABEL: ugt_21_v2i64:
24267; SSE2:       # %bb.0:
24268; SSE2-NEXT:    movdqa %xmm0, %xmm1
24269; SSE2-NEXT:    psrlw $1, %xmm1
24270; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24271; SSE2-NEXT:    psubb %xmm1, %xmm0
24272; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24273; SSE2-NEXT:    movdqa %xmm0, %xmm2
24274; SSE2-NEXT:    pand %xmm1, %xmm2
24275; SSE2-NEXT:    psrlw $2, %xmm0
24276; SSE2-NEXT:    pand %xmm1, %xmm0
24277; SSE2-NEXT:    paddb %xmm2, %xmm0
24278; SSE2-NEXT:    movdqa %xmm0, %xmm1
24279; SSE2-NEXT:    psrlw $4, %xmm1
24280; SSE2-NEXT:    paddb %xmm0, %xmm1
24281; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24282; SSE2-NEXT:    pxor %xmm0, %xmm0
24283; SSE2-NEXT:    psadbw %xmm1, %xmm0
24284; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
24285; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24286; SSE2-NEXT:    movdqa %xmm0, %xmm2
24287; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
24288; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24289; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
24290; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24291; SSE2-NEXT:    pand %xmm3, %xmm1
24292; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24293; SSE2-NEXT:    por %xmm1, %xmm0
24294; SSE2-NEXT:    retq
24295;
24296; SSE3-LABEL: ugt_21_v2i64:
24297; SSE3:       # %bb.0:
24298; SSE3-NEXT:    movdqa %xmm0, %xmm1
24299; SSE3-NEXT:    psrlw $1, %xmm1
24300; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24301; SSE3-NEXT:    psubb %xmm1, %xmm0
24302; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24303; SSE3-NEXT:    movdqa %xmm0, %xmm2
24304; SSE3-NEXT:    pand %xmm1, %xmm2
24305; SSE3-NEXT:    psrlw $2, %xmm0
24306; SSE3-NEXT:    pand %xmm1, %xmm0
24307; SSE3-NEXT:    paddb %xmm2, %xmm0
24308; SSE3-NEXT:    movdqa %xmm0, %xmm1
24309; SSE3-NEXT:    psrlw $4, %xmm1
24310; SSE3-NEXT:    paddb %xmm0, %xmm1
24311; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24312; SSE3-NEXT:    pxor %xmm0, %xmm0
24313; SSE3-NEXT:    psadbw %xmm1, %xmm0
24314; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
24315; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24316; SSE3-NEXT:    movdqa %xmm0, %xmm2
24317; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
24318; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24319; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24320; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24321; SSE3-NEXT:    pand %xmm3, %xmm1
24322; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24323; SSE3-NEXT:    por %xmm1, %xmm0
24324; SSE3-NEXT:    retq
24325;
24326; SSSE3-LABEL: ugt_21_v2i64:
24327; SSSE3:       # %bb.0:
24328; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24329; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24330; SSSE3-NEXT:    pand %xmm1, %xmm2
24331; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24332; SSSE3-NEXT:    movdqa %xmm3, %xmm4
24333; SSSE3-NEXT:    pshufb %xmm2, %xmm4
24334; SSSE3-NEXT:    psrlw $4, %xmm0
24335; SSSE3-NEXT:    pand %xmm1, %xmm0
24336; SSSE3-NEXT:    pshufb %xmm0, %xmm3
24337; SSSE3-NEXT:    paddb %xmm4, %xmm3
24338; SSSE3-NEXT:    pxor %xmm0, %xmm0
24339; SSSE3-NEXT:    psadbw %xmm3, %xmm0
24340; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
24341; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24342; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24343; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
24344; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24345; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24346; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24347; SSSE3-NEXT:    pand %xmm3, %xmm1
24348; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24349; SSSE3-NEXT:    por %xmm1, %xmm0
24350; SSSE3-NEXT:    retq
24351;
24352; SSE41-LABEL: ugt_21_v2i64:
24353; SSE41:       # %bb.0:
24354; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24355; SSE41-NEXT:    movdqa %xmm0, %xmm2
24356; SSE41-NEXT:    pand %xmm1, %xmm2
24357; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24358; SSE41-NEXT:    movdqa %xmm3, %xmm4
24359; SSE41-NEXT:    pshufb %xmm2, %xmm4
24360; SSE41-NEXT:    psrlw $4, %xmm0
24361; SSE41-NEXT:    pand %xmm1, %xmm0
24362; SSE41-NEXT:    pshufb %xmm0, %xmm3
24363; SSE41-NEXT:    paddb %xmm4, %xmm3
24364; SSE41-NEXT:    pxor %xmm0, %xmm0
24365; SSE41-NEXT:    psadbw %xmm3, %xmm0
24366; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
24367; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483669,2147483669]
24368; SSE41-NEXT:    movdqa %xmm0, %xmm2
24369; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
24370; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24371; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
24372; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24373; SSE41-NEXT:    pand %xmm3, %xmm1
24374; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24375; SSE41-NEXT:    por %xmm1, %xmm0
24376; SSE41-NEXT:    retq
24377;
24378; AVX1-LABEL: ugt_21_v2i64:
24379; AVX1:       # %bb.0:
24380; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24381; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
24382; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24383; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24384; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
24385; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
24386; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24387; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24388; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24389; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24390; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24391; AVX1-NEXT:    retq
24392;
24393; AVX2-LABEL: ugt_21_v2i64:
24394; AVX2:       # %bb.0:
24395; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24396; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
24397; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24398; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24399; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
24400; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
24401; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24402; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24403; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24404; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24405; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24406; AVX2-NEXT:    retq
24407;
24408; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64:
24409; AVX512VPOPCNTDQ:       # %bb.0:
24410; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24411; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
24412; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24413; AVX512VPOPCNTDQ-NEXT:    vzeroupper
24414; AVX512VPOPCNTDQ-NEXT:    retq
24415;
24416; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64:
24417; AVX512VPOPCNTDQVL:       # %bb.0:
24418; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
24419; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
24420; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24421; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24422; AVX512VPOPCNTDQVL-NEXT:    retq
24423;
24424; BITALG_NOVLX-LABEL: ugt_21_v2i64:
24425; BITALG_NOVLX:       # %bb.0:
24426; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24427; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
24428; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24429; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24430; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24431; BITALG_NOVLX-NEXT:    vzeroupper
24432; BITALG_NOVLX-NEXT:    retq
24433;
24434; BITALG-LABEL: ugt_21_v2i64:
24435; BITALG:       # %bb.0:
24436; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
24437; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24438; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24439; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
24440; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24441; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24442; BITALG-NEXT:    retq
24443  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24444  %3 = icmp ugt <2 x i64> %2, <i64 21, i64 21>
24445  %4 = sext <2 x i1> %3 to <2 x i64>
24446  ret <2 x i64> %4
24447}
24448
24449define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
24450; SSE2-LABEL: ult_22_v2i64:
24451; SSE2:       # %bb.0:
24452; SSE2-NEXT:    movdqa %xmm0, %xmm1
24453; SSE2-NEXT:    psrlw $1, %xmm1
24454; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24455; SSE2-NEXT:    psubb %xmm1, %xmm0
24456; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24457; SSE2-NEXT:    movdqa %xmm0, %xmm2
24458; SSE2-NEXT:    pand %xmm1, %xmm2
24459; SSE2-NEXT:    psrlw $2, %xmm0
24460; SSE2-NEXT:    pand %xmm1, %xmm0
24461; SSE2-NEXT:    paddb %xmm2, %xmm0
24462; SSE2-NEXT:    movdqa %xmm0, %xmm1
24463; SSE2-NEXT:    psrlw $4, %xmm1
24464; SSE2-NEXT:    paddb %xmm0, %xmm1
24465; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24466; SSE2-NEXT:    pxor %xmm0, %xmm0
24467; SSE2-NEXT:    psadbw %xmm1, %xmm0
24468; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
24469; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24470; SSE2-NEXT:    movdqa %xmm1, %xmm2
24471; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
24472; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24473; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
24474; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24475; SSE2-NEXT:    pand %xmm3, %xmm1
24476; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24477; SSE2-NEXT:    por %xmm1, %xmm0
24478; SSE2-NEXT:    retq
24479;
24480; SSE3-LABEL: ult_22_v2i64:
24481; SSE3:       # %bb.0:
24482; SSE3-NEXT:    movdqa %xmm0, %xmm1
24483; SSE3-NEXT:    psrlw $1, %xmm1
24484; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24485; SSE3-NEXT:    psubb %xmm1, %xmm0
24486; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24487; SSE3-NEXT:    movdqa %xmm0, %xmm2
24488; SSE3-NEXT:    pand %xmm1, %xmm2
24489; SSE3-NEXT:    psrlw $2, %xmm0
24490; SSE3-NEXT:    pand %xmm1, %xmm0
24491; SSE3-NEXT:    paddb %xmm2, %xmm0
24492; SSE3-NEXT:    movdqa %xmm0, %xmm1
24493; SSE3-NEXT:    psrlw $4, %xmm1
24494; SSE3-NEXT:    paddb %xmm0, %xmm1
24495; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24496; SSE3-NEXT:    pxor %xmm0, %xmm0
24497; SSE3-NEXT:    psadbw %xmm1, %xmm0
24498; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
24499; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24500; SSE3-NEXT:    movdqa %xmm1, %xmm2
24501; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
24502; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24503; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24504; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24505; SSE3-NEXT:    pand %xmm3, %xmm1
24506; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24507; SSE3-NEXT:    por %xmm1, %xmm0
24508; SSE3-NEXT:    retq
24509;
24510; SSSE3-LABEL: ult_22_v2i64:
24511; SSSE3:       # %bb.0:
24512; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24513; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24514; SSSE3-NEXT:    pand %xmm1, %xmm2
24515; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24516; SSSE3-NEXT:    movdqa %xmm3, %xmm4
24517; SSSE3-NEXT:    pshufb %xmm2, %xmm4
24518; SSSE3-NEXT:    psrlw $4, %xmm0
24519; SSSE3-NEXT:    pand %xmm1, %xmm0
24520; SSSE3-NEXT:    pshufb %xmm0, %xmm3
24521; SSSE3-NEXT:    paddb %xmm4, %xmm3
24522; SSSE3-NEXT:    pxor %xmm0, %xmm0
24523; SSSE3-NEXT:    psadbw %xmm3, %xmm0
24524; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
24525; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24526; SSSE3-NEXT:    movdqa %xmm1, %xmm2
24527; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
24528; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24529; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24530; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24531; SSSE3-NEXT:    pand %xmm3, %xmm1
24532; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24533; SSSE3-NEXT:    por %xmm1, %xmm0
24534; SSSE3-NEXT:    retq
24535;
24536; SSE41-LABEL: ult_22_v2i64:
24537; SSE41:       # %bb.0:
24538; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24539; SSE41-NEXT:    movdqa %xmm0, %xmm2
24540; SSE41-NEXT:    pand %xmm1, %xmm2
24541; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24542; SSE41-NEXT:    movdqa %xmm3, %xmm4
24543; SSE41-NEXT:    pshufb %xmm2, %xmm4
24544; SSE41-NEXT:    psrlw $4, %xmm0
24545; SSE41-NEXT:    pand %xmm1, %xmm0
24546; SSE41-NEXT:    pshufb %xmm0, %xmm3
24547; SSE41-NEXT:    paddb %xmm4, %xmm3
24548; SSE41-NEXT:    pxor %xmm0, %xmm0
24549; SSE41-NEXT:    psadbw %xmm3, %xmm0
24550; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
24551; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24552; SSE41-NEXT:    movdqa %xmm1, %xmm2
24553; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
24554; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24555; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
24556; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24557; SSE41-NEXT:    pand %xmm3, %xmm1
24558; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24559; SSE41-NEXT:    por %xmm1, %xmm0
24560; SSE41-NEXT:    retq
24561;
24562; AVX1-LABEL: ult_22_v2i64:
24563; AVX1:       # %bb.0:
24564; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24565; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
24566; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24567; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24568; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
24569; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
24570; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24571; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24572; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24573; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24574; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [22,22]
24575; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24576; AVX1-NEXT:    retq
24577;
24578; AVX2-LABEL: ult_22_v2i64:
24579; AVX2:       # %bb.0:
24580; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24581; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
24582; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24583; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24584; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
24585; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
24586; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24587; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24588; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24589; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24590; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [22,22]
24591; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24592; AVX2-NEXT:    retq
24593;
24594; AVX512VPOPCNTDQ-LABEL: ult_22_v2i64:
24595; AVX512VPOPCNTDQ:       # %bb.0:
24596; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24597; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
24598; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [22,22]
24599; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24600; AVX512VPOPCNTDQ-NEXT:    vzeroupper
24601; AVX512VPOPCNTDQ-NEXT:    retq
24602;
24603; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64:
24604; AVX512VPOPCNTDQVL:       # %bb.0:
24605; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
24606; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
24607; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24608; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24609; AVX512VPOPCNTDQVL-NEXT:    retq
24610;
24611; BITALG_NOVLX-LABEL: ult_22_v2i64:
24612; BITALG_NOVLX:       # %bb.0:
24613; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24614; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
24615; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24616; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24617; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [22,22]
24618; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24619; BITALG_NOVLX-NEXT:    vzeroupper
24620; BITALG_NOVLX-NEXT:    retq
24621;
24622; BITALG-LABEL: ult_22_v2i64:
24623; BITALG:       # %bb.0:
24624; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
24625; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24626; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24627; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
24628; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24629; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24630; BITALG-NEXT:    retq
24631  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24632  %3 = icmp ult <2 x i64> %2, <i64 22, i64 22>
24633  %4 = sext <2 x i1> %3 to <2 x i64>
24634  ret <2 x i64> %4
24635}
24636
24637define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
24638; SSE2-LABEL: ugt_22_v2i64:
24639; SSE2:       # %bb.0:
24640; SSE2-NEXT:    movdqa %xmm0, %xmm1
24641; SSE2-NEXT:    psrlw $1, %xmm1
24642; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24643; SSE2-NEXT:    psubb %xmm1, %xmm0
24644; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24645; SSE2-NEXT:    movdqa %xmm0, %xmm2
24646; SSE2-NEXT:    pand %xmm1, %xmm2
24647; SSE2-NEXT:    psrlw $2, %xmm0
24648; SSE2-NEXT:    pand %xmm1, %xmm0
24649; SSE2-NEXT:    paddb %xmm2, %xmm0
24650; SSE2-NEXT:    movdqa %xmm0, %xmm1
24651; SSE2-NEXT:    psrlw $4, %xmm1
24652; SSE2-NEXT:    paddb %xmm0, %xmm1
24653; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24654; SSE2-NEXT:    pxor %xmm0, %xmm0
24655; SSE2-NEXT:    psadbw %xmm1, %xmm0
24656; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
24657; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24658; SSE2-NEXT:    movdqa %xmm0, %xmm2
24659; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
24660; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24661; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
24662; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24663; SSE2-NEXT:    pand %xmm3, %xmm1
24664; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24665; SSE2-NEXT:    por %xmm1, %xmm0
24666; SSE2-NEXT:    retq
24667;
24668; SSE3-LABEL: ugt_22_v2i64:
24669; SSE3:       # %bb.0:
24670; SSE3-NEXT:    movdqa %xmm0, %xmm1
24671; SSE3-NEXT:    psrlw $1, %xmm1
24672; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24673; SSE3-NEXT:    psubb %xmm1, %xmm0
24674; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24675; SSE3-NEXT:    movdqa %xmm0, %xmm2
24676; SSE3-NEXT:    pand %xmm1, %xmm2
24677; SSE3-NEXT:    psrlw $2, %xmm0
24678; SSE3-NEXT:    pand %xmm1, %xmm0
24679; SSE3-NEXT:    paddb %xmm2, %xmm0
24680; SSE3-NEXT:    movdqa %xmm0, %xmm1
24681; SSE3-NEXT:    psrlw $4, %xmm1
24682; SSE3-NEXT:    paddb %xmm0, %xmm1
24683; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24684; SSE3-NEXT:    pxor %xmm0, %xmm0
24685; SSE3-NEXT:    psadbw %xmm1, %xmm0
24686; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
24687; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24688; SSE3-NEXT:    movdqa %xmm0, %xmm2
24689; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
24690; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24691; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24692; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24693; SSE3-NEXT:    pand %xmm3, %xmm1
24694; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24695; SSE3-NEXT:    por %xmm1, %xmm0
24696; SSE3-NEXT:    retq
24697;
24698; SSSE3-LABEL: ugt_22_v2i64:
24699; SSSE3:       # %bb.0:
24700; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24701; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24702; SSSE3-NEXT:    pand %xmm1, %xmm2
24703; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24704; SSSE3-NEXT:    movdqa %xmm3, %xmm4
24705; SSSE3-NEXT:    pshufb %xmm2, %xmm4
24706; SSSE3-NEXT:    psrlw $4, %xmm0
24707; SSSE3-NEXT:    pand %xmm1, %xmm0
24708; SSSE3-NEXT:    pshufb %xmm0, %xmm3
24709; SSSE3-NEXT:    paddb %xmm4, %xmm3
24710; SSSE3-NEXT:    pxor %xmm0, %xmm0
24711; SSSE3-NEXT:    psadbw %xmm3, %xmm0
24712; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
24713; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24714; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24715; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
24716; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24717; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24718; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24719; SSSE3-NEXT:    pand %xmm3, %xmm1
24720; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24721; SSSE3-NEXT:    por %xmm1, %xmm0
24722; SSSE3-NEXT:    retq
24723;
24724; SSE41-LABEL: ugt_22_v2i64:
24725; SSE41:       # %bb.0:
24726; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24727; SSE41-NEXT:    movdqa %xmm0, %xmm2
24728; SSE41-NEXT:    pand %xmm1, %xmm2
24729; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24730; SSE41-NEXT:    movdqa %xmm3, %xmm4
24731; SSE41-NEXT:    pshufb %xmm2, %xmm4
24732; SSE41-NEXT:    psrlw $4, %xmm0
24733; SSE41-NEXT:    pand %xmm1, %xmm0
24734; SSE41-NEXT:    pshufb %xmm0, %xmm3
24735; SSE41-NEXT:    paddb %xmm4, %xmm3
24736; SSE41-NEXT:    pxor %xmm0, %xmm0
24737; SSE41-NEXT:    psadbw %xmm3, %xmm0
24738; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
24739; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483670,2147483670]
24740; SSE41-NEXT:    movdqa %xmm0, %xmm2
24741; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
24742; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24743; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
24744; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24745; SSE41-NEXT:    pand %xmm3, %xmm1
24746; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24747; SSE41-NEXT:    por %xmm1, %xmm0
24748; SSE41-NEXT:    retq
24749;
24750; AVX1-LABEL: ugt_22_v2i64:
24751; AVX1:       # %bb.0:
24752; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24753; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
24754; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24755; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24756; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
24757; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
24758; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24759; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24760; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24761; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24762; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24763; AVX1-NEXT:    retq
24764;
24765; AVX2-LABEL: ugt_22_v2i64:
24766; AVX2:       # %bb.0:
24767; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24768; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
24769; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24770; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24771; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
24772; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
24773; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24774; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24775; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24776; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24777; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24778; AVX2-NEXT:    retq
24779;
24780; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64:
24781; AVX512VPOPCNTDQ:       # %bb.0:
24782; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24783; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
24784; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24785; AVX512VPOPCNTDQ-NEXT:    vzeroupper
24786; AVX512VPOPCNTDQ-NEXT:    retq
24787;
24788; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64:
24789; AVX512VPOPCNTDQVL:       # %bb.0:
24790; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
24791; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
24792; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24793; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24794; AVX512VPOPCNTDQVL-NEXT:    retq
24795;
24796; BITALG_NOVLX-LABEL: ugt_22_v2i64:
24797; BITALG_NOVLX:       # %bb.0:
24798; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24799; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
24800; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24801; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24802; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
24803; BITALG_NOVLX-NEXT:    vzeroupper
24804; BITALG_NOVLX-NEXT:    retq
24805;
24806; BITALG-LABEL: ugt_22_v2i64:
24807; BITALG:       # %bb.0:
24808; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
24809; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24810; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24811; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
24812; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24813; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24814; BITALG-NEXT:    retq
24815  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24816  %3 = icmp ugt <2 x i64> %2, <i64 22, i64 22>
24817  %4 = sext <2 x i1> %3 to <2 x i64>
24818  ret <2 x i64> %4
24819}
24820
24821define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
24822; SSE2-LABEL: ult_23_v2i64:
24823; SSE2:       # %bb.0:
24824; SSE2-NEXT:    movdqa %xmm0, %xmm1
24825; SSE2-NEXT:    psrlw $1, %xmm1
24826; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24827; SSE2-NEXT:    psubb %xmm1, %xmm0
24828; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24829; SSE2-NEXT:    movdqa %xmm0, %xmm2
24830; SSE2-NEXT:    pand %xmm1, %xmm2
24831; SSE2-NEXT:    psrlw $2, %xmm0
24832; SSE2-NEXT:    pand %xmm1, %xmm0
24833; SSE2-NEXT:    paddb %xmm2, %xmm0
24834; SSE2-NEXT:    movdqa %xmm0, %xmm1
24835; SSE2-NEXT:    psrlw $4, %xmm1
24836; SSE2-NEXT:    paddb %xmm0, %xmm1
24837; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
24838; SSE2-NEXT:    pxor %xmm0, %xmm0
24839; SSE2-NEXT:    psadbw %xmm1, %xmm0
24840; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
24841; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
24842; SSE2-NEXT:    movdqa %xmm1, %xmm2
24843; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
24844; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24845; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
24846; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24847; SSE2-NEXT:    pand %xmm3, %xmm1
24848; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24849; SSE2-NEXT:    por %xmm1, %xmm0
24850; SSE2-NEXT:    retq
24851;
24852; SSE3-LABEL: ult_23_v2i64:
24853; SSE3:       # %bb.0:
24854; SSE3-NEXT:    movdqa %xmm0, %xmm1
24855; SSE3-NEXT:    psrlw $1, %xmm1
24856; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24857; SSE3-NEXT:    psubb %xmm1, %xmm0
24858; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24859; SSE3-NEXT:    movdqa %xmm0, %xmm2
24860; SSE3-NEXT:    pand %xmm1, %xmm2
24861; SSE3-NEXT:    psrlw $2, %xmm0
24862; SSE3-NEXT:    pand %xmm1, %xmm0
24863; SSE3-NEXT:    paddb %xmm2, %xmm0
24864; SSE3-NEXT:    movdqa %xmm0, %xmm1
24865; SSE3-NEXT:    psrlw $4, %xmm1
24866; SSE3-NEXT:    paddb %xmm0, %xmm1
24867; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
24868; SSE3-NEXT:    pxor %xmm0, %xmm0
24869; SSE3-NEXT:    psadbw %xmm1, %xmm0
24870; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
24871; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
24872; SSE3-NEXT:    movdqa %xmm1, %xmm2
24873; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
24874; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24875; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24876; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24877; SSE3-NEXT:    pand %xmm3, %xmm1
24878; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24879; SSE3-NEXT:    por %xmm1, %xmm0
24880; SSE3-NEXT:    retq
24881;
24882; SSSE3-LABEL: ult_23_v2i64:
24883; SSSE3:       # %bb.0:
24884; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24885; SSSE3-NEXT:    movdqa %xmm0, %xmm2
24886; SSSE3-NEXT:    pand %xmm1, %xmm2
24887; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24888; SSSE3-NEXT:    movdqa %xmm3, %xmm4
24889; SSSE3-NEXT:    pshufb %xmm2, %xmm4
24890; SSSE3-NEXT:    psrlw $4, %xmm0
24891; SSSE3-NEXT:    pand %xmm1, %xmm0
24892; SSSE3-NEXT:    pshufb %xmm0, %xmm3
24893; SSSE3-NEXT:    paddb %xmm4, %xmm3
24894; SSSE3-NEXT:    pxor %xmm0, %xmm0
24895; SSSE3-NEXT:    psadbw %xmm3, %xmm0
24896; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
24897; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
24898; SSSE3-NEXT:    movdqa %xmm1, %xmm2
24899; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
24900; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24901; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
24902; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24903; SSSE3-NEXT:    pand %xmm3, %xmm1
24904; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24905; SSSE3-NEXT:    por %xmm1, %xmm0
24906; SSSE3-NEXT:    retq
24907;
24908; SSE41-LABEL: ult_23_v2i64:
24909; SSE41:       # %bb.0:
24910; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24911; SSE41-NEXT:    movdqa %xmm0, %xmm2
24912; SSE41-NEXT:    pand %xmm1, %xmm2
24913; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24914; SSE41-NEXT:    movdqa %xmm3, %xmm4
24915; SSE41-NEXT:    pshufb %xmm2, %xmm4
24916; SSE41-NEXT:    psrlw $4, %xmm0
24917; SSE41-NEXT:    pand %xmm1, %xmm0
24918; SSE41-NEXT:    pshufb %xmm0, %xmm3
24919; SSE41-NEXT:    paddb %xmm4, %xmm3
24920; SSE41-NEXT:    pxor %xmm0, %xmm0
24921; SSE41-NEXT:    psadbw %xmm3, %xmm0
24922; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
24923; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
24924; SSE41-NEXT:    movdqa %xmm1, %xmm2
24925; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
24926; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
24927; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
24928; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24929; SSE41-NEXT:    pand %xmm3, %xmm1
24930; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
24931; SSE41-NEXT:    por %xmm1, %xmm0
24932; SSE41-NEXT:    retq
24933;
24934; AVX1-LABEL: ult_23_v2i64:
24935; AVX1:       # %bb.0:
24936; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24937; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
24938; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24939; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24940; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
24941; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
24942; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24943; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24944; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24945; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24946; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [23,23]
24947; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24948; AVX1-NEXT:    retq
24949;
24950; AVX2-LABEL: ult_23_v2i64:
24951; AVX2:       # %bb.0:
24952; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24953; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
24954; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24955; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
24956; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
24957; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
24958; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
24959; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
24960; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24961; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24962; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [23,23]
24963; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24964; AVX2-NEXT:    retq
24965;
24966; AVX512VPOPCNTDQ-LABEL: ult_23_v2i64:
24967; AVX512VPOPCNTDQ:       # %bb.0:
24968; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24969; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
24970; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [23,23]
24971; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24972; AVX512VPOPCNTDQ-NEXT:    vzeroupper
24973; AVX512VPOPCNTDQ-NEXT:    retq
24974;
24975; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64:
24976; AVX512VPOPCNTDQVL:       # %bb.0:
24977; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
24978; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
24979; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
24980; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24981; AVX512VPOPCNTDQVL-NEXT:    retq
24982;
24983; BITALG_NOVLX-LABEL: ult_23_v2i64:
24984; BITALG_NOVLX:       # %bb.0:
24985; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
24986; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
24987; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24988; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24989; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [23,23]
24990; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
24991; BITALG_NOVLX-NEXT:    vzeroupper
24992; BITALG_NOVLX-NEXT:    retq
24993;
24994; BITALG-LABEL: ult_23_v2i64:
24995; BITALG:       # %bb.0:
24996; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
24997; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
24998; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
24999; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
25000; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25001; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25002; BITALG-NEXT:    retq
25003  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25004  %3 = icmp ult <2 x i64> %2, <i64 23, i64 23>
25005  %4 = sext <2 x i1> %3 to <2 x i64>
25006  ret <2 x i64> %4
25007}
25008
25009define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
25010; SSE2-LABEL: ugt_23_v2i64:
25011; SSE2:       # %bb.0:
25012; SSE2-NEXT:    movdqa %xmm0, %xmm1
25013; SSE2-NEXT:    psrlw $1, %xmm1
25014; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25015; SSE2-NEXT:    psubb %xmm1, %xmm0
25016; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25017; SSE2-NEXT:    movdqa %xmm0, %xmm2
25018; SSE2-NEXT:    pand %xmm1, %xmm2
25019; SSE2-NEXT:    psrlw $2, %xmm0
25020; SSE2-NEXT:    pand %xmm1, %xmm0
25021; SSE2-NEXT:    paddb %xmm2, %xmm0
25022; SSE2-NEXT:    movdqa %xmm0, %xmm1
25023; SSE2-NEXT:    psrlw $4, %xmm1
25024; SSE2-NEXT:    paddb %xmm0, %xmm1
25025; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25026; SSE2-NEXT:    pxor %xmm0, %xmm0
25027; SSE2-NEXT:    psadbw %xmm1, %xmm0
25028; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
25029; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
25030; SSE2-NEXT:    movdqa %xmm0, %xmm2
25031; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
25032; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25033; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
25034; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25035; SSE2-NEXT:    pand %xmm3, %xmm1
25036; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25037; SSE2-NEXT:    por %xmm1, %xmm0
25038; SSE2-NEXT:    retq
25039;
25040; SSE3-LABEL: ugt_23_v2i64:
25041; SSE3:       # %bb.0:
25042; SSE3-NEXT:    movdqa %xmm0, %xmm1
25043; SSE3-NEXT:    psrlw $1, %xmm1
25044; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25045; SSE3-NEXT:    psubb %xmm1, %xmm0
25046; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25047; SSE3-NEXT:    movdqa %xmm0, %xmm2
25048; SSE3-NEXT:    pand %xmm1, %xmm2
25049; SSE3-NEXT:    psrlw $2, %xmm0
25050; SSE3-NEXT:    pand %xmm1, %xmm0
25051; SSE3-NEXT:    paddb %xmm2, %xmm0
25052; SSE3-NEXT:    movdqa %xmm0, %xmm1
25053; SSE3-NEXT:    psrlw $4, %xmm1
25054; SSE3-NEXT:    paddb %xmm0, %xmm1
25055; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25056; SSE3-NEXT:    pxor %xmm0, %xmm0
25057; SSE3-NEXT:    psadbw %xmm1, %xmm0
25058; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
25059; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
25060; SSE3-NEXT:    movdqa %xmm0, %xmm2
25061; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
25062; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25063; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25064; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25065; SSE3-NEXT:    pand %xmm3, %xmm1
25066; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25067; SSE3-NEXT:    por %xmm1, %xmm0
25068; SSE3-NEXT:    retq
25069;
25070; SSSE3-LABEL: ugt_23_v2i64:
25071; SSSE3:       # %bb.0:
25072; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25073; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25074; SSSE3-NEXT:    pand %xmm1, %xmm2
25075; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25076; SSSE3-NEXT:    movdqa %xmm3, %xmm4
25077; SSSE3-NEXT:    pshufb %xmm2, %xmm4
25078; SSSE3-NEXT:    psrlw $4, %xmm0
25079; SSSE3-NEXT:    pand %xmm1, %xmm0
25080; SSSE3-NEXT:    pshufb %xmm0, %xmm3
25081; SSSE3-NEXT:    paddb %xmm4, %xmm3
25082; SSSE3-NEXT:    pxor %xmm0, %xmm0
25083; SSSE3-NEXT:    psadbw %xmm3, %xmm0
25084; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
25085; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
25086; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25087; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
25088; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25089; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25090; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25091; SSSE3-NEXT:    pand %xmm3, %xmm1
25092; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25093; SSSE3-NEXT:    por %xmm1, %xmm0
25094; SSSE3-NEXT:    retq
25095;
25096; SSE41-LABEL: ugt_23_v2i64:
25097; SSE41:       # %bb.0:
25098; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25099; SSE41-NEXT:    movdqa %xmm0, %xmm2
25100; SSE41-NEXT:    pand %xmm1, %xmm2
25101; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25102; SSE41-NEXT:    movdqa %xmm3, %xmm4
25103; SSE41-NEXT:    pshufb %xmm2, %xmm4
25104; SSE41-NEXT:    psrlw $4, %xmm0
25105; SSE41-NEXT:    pand %xmm1, %xmm0
25106; SSE41-NEXT:    pshufb %xmm0, %xmm3
25107; SSE41-NEXT:    paddb %xmm4, %xmm3
25108; SSE41-NEXT:    pxor %xmm0, %xmm0
25109; SSE41-NEXT:    psadbw %xmm3, %xmm0
25110; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
25111; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483671,2147483671]
25112; SSE41-NEXT:    movdqa %xmm0, %xmm2
25113; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
25114; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25115; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
25116; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25117; SSE41-NEXT:    pand %xmm3, %xmm1
25118; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25119; SSE41-NEXT:    por %xmm1, %xmm0
25120; SSE41-NEXT:    retq
25121;
25122; AVX1-LABEL: ugt_23_v2i64:
25123; AVX1:       # %bb.0:
25124; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25125; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
25126; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25127; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25128; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
25129; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
25130; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25131; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25132; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25133; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25134; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25135; AVX1-NEXT:    retq
25136;
25137; AVX2-LABEL: ugt_23_v2i64:
25138; AVX2:       # %bb.0:
25139; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25140; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
25141; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25142; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25143; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
25144; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
25145; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25146; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25147; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25148; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25149; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25150; AVX2-NEXT:    retq
25151;
25152; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64:
25153; AVX512VPOPCNTDQ:       # %bb.0:
25154; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25155; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
25156; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25157; AVX512VPOPCNTDQ-NEXT:    vzeroupper
25158; AVX512VPOPCNTDQ-NEXT:    retq
25159;
25160; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64:
25161; AVX512VPOPCNTDQVL:       # %bb.0:
25162; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
25163; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
25164; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25165; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25166; AVX512VPOPCNTDQVL-NEXT:    retq
25167;
25168; BITALG_NOVLX-LABEL: ugt_23_v2i64:
25169; BITALG_NOVLX:       # %bb.0:
25170; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25171; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
25172; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25173; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25174; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25175; BITALG_NOVLX-NEXT:    vzeroupper
25176; BITALG_NOVLX-NEXT:    retq
25177;
25178; BITALG-LABEL: ugt_23_v2i64:
25179; BITALG:       # %bb.0:
25180; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
25181; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25182; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25183; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
25184; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25185; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25186; BITALG-NEXT:    retq
25187  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25188  %3 = icmp ugt <2 x i64> %2, <i64 23, i64 23>
25189  %4 = sext <2 x i1> %3 to <2 x i64>
25190  ret <2 x i64> %4
25191}
25192
25193define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
25194; SSE2-LABEL: ult_24_v2i64:
25195; SSE2:       # %bb.0:
25196; SSE2-NEXT:    movdqa %xmm0, %xmm1
25197; SSE2-NEXT:    psrlw $1, %xmm1
25198; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25199; SSE2-NEXT:    psubb %xmm1, %xmm0
25200; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25201; SSE2-NEXT:    movdqa %xmm0, %xmm2
25202; SSE2-NEXT:    pand %xmm1, %xmm2
25203; SSE2-NEXT:    psrlw $2, %xmm0
25204; SSE2-NEXT:    pand %xmm1, %xmm0
25205; SSE2-NEXT:    paddb %xmm2, %xmm0
25206; SSE2-NEXT:    movdqa %xmm0, %xmm1
25207; SSE2-NEXT:    psrlw $4, %xmm1
25208; SSE2-NEXT:    paddb %xmm0, %xmm1
25209; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25210; SSE2-NEXT:    pxor %xmm0, %xmm0
25211; SSE2-NEXT:    psadbw %xmm1, %xmm0
25212; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
25213; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25214; SSE2-NEXT:    movdqa %xmm1, %xmm2
25215; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
25216; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25217; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
25218; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25219; SSE2-NEXT:    pand %xmm3, %xmm1
25220; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25221; SSE2-NEXT:    por %xmm1, %xmm0
25222; SSE2-NEXT:    retq
25223;
25224; SSE3-LABEL: ult_24_v2i64:
25225; SSE3:       # %bb.0:
25226; SSE3-NEXT:    movdqa %xmm0, %xmm1
25227; SSE3-NEXT:    psrlw $1, %xmm1
25228; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25229; SSE3-NEXT:    psubb %xmm1, %xmm0
25230; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25231; SSE3-NEXT:    movdqa %xmm0, %xmm2
25232; SSE3-NEXT:    pand %xmm1, %xmm2
25233; SSE3-NEXT:    psrlw $2, %xmm0
25234; SSE3-NEXT:    pand %xmm1, %xmm0
25235; SSE3-NEXT:    paddb %xmm2, %xmm0
25236; SSE3-NEXT:    movdqa %xmm0, %xmm1
25237; SSE3-NEXT:    psrlw $4, %xmm1
25238; SSE3-NEXT:    paddb %xmm0, %xmm1
25239; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25240; SSE3-NEXT:    pxor %xmm0, %xmm0
25241; SSE3-NEXT:    psadbw %xmm1, %xmm0
25242; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
25243; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25244; SSE3-NEXT:    movdqa %xmm1, %xmm2
25245; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
25246; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25247; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25248; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25249; SSE3-NEXT:    pand %xmm3, %xmm1
25250; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25251; SSE3-NEXT:    por %xmm1, %xmm0
25252; SSE3-NEXT:    retq
25253;
25254; SSSE3-LABEL: ult_24_v2i64:
25255; SSSE3:       # %bb.0:
25256; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25257; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25258; SSSE3-NEXT:    pand %xmm1, %xmm2
25259; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25260; SSSE3-NEXT:    movdqa %xmm3, %xmm4
25261; SSSE3-NEXT:    pshufb %xmm2, %xmm4
25262; SSSE3-NEXT:    psrlw $4, %xmm0
25263; SSSE3-NEXT:    pand %xmm1, %xmm0
25264; SSSE3-NEXT:    pshufb %xmm0, %xmm3
25265; SSSE3-NEXT:    paddb %xmm4, %xmm3
25266; SSSE3-NEXT:    pxor %xmm0, %xmm0
25267; SSSE3-NEXT:    psadbw %xmm3, %xmm0
25268; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
25269; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25270; SSSE3-NEXT:    movdqa %xmm1, %xmm2
25271; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
25272; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25273; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25274; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25275; SSSE3-NEXT:    pand %xmm3, %xmm1
25276; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25277; SSSE3-NEXT:    por %xmm1, %xmm0
25278; SSSE3-NEXT:    retq
25279;
25280; SSE41-LABEL: ult_24_v2i64:
25281; SSE41:       # %bb.0:
25282; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25283; SSE41-NEXT:    movdqa %xmm0, %xmm2
25284; SSE41-NEXT:    pand %xmm1, %xmm2
25285; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25286; SSE41-NEXT:    movdqa %xmm3, %xmm4
25287; SSE41-NEXT:    pshufb %xmm2, %xmm4
25288; SSE41-NEXT:    psrlw $4, %xmm0
25289; SSE41-NEXT:    pand %xmm1, %xmm0
25290; SSE41-NEXT:    pshufb %xmm0, %xmm3
25291; SSE41-NEXT:    paddb %xmm4, %xmm3
25292; SSE41-NEXT:    pxor %xmm0, %xmm0
25293; SSE41-NEXT:    psadbw %xmm3, %xmm0
25294; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
25295; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25296; SSE41-NEXT:    movdqa %xmm1, %xmm2
25297; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
25298; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25299; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
25300; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25301; SSE41-NEXT:    pand %xmm3, %xmm1
25302; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25303; SSE41-NEXT:    por %xmm1, %xmm0
25304; SSE41-NEXT:    retq
25305;
25306; AVX1-LABEL: ult_24_v2i64:
25307; AVX1:       # %bb.0:
25308; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25309; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
25310; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25311; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25312; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
25313; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
25314; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25315; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25316; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25317; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25318; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [24,24]
25319; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25320; AVX1-NEXT:    retq
25321;
25322; AVX2-LABEL: ult_24_v2i64:
25323; AVX2:       # %bb.0:
25324; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25325; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
25326; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25327; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25328; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
25329; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
25330; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25331; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25332; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25333; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25334; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [24,24]
25335; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25336; AVX2-NEXT:    retq
25337;
25338; AVX512VPOPCNTDQ-LABEL: ult_24_v2i64:
25339; AVX512VPOPCNTDQ:       # %bb.0:
25340; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25341; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
25342; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [24,24]
25343; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25344; AVX512VPOPCNTDQ-NEXT:    vzeroupper
25345; AVX512VPOPCNTDQ-NEXT:    retq
25346;
25347; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64:
25348; AVX512VPOPCNTDQVL:       # %bb.0:
25349; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
25350; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
25351; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25352; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25353; AVX512VPOPCNTDQVL-NEXT:    retq
25354;
25355; BITALG_NOVLX-LABEL: ult_24_v2i64:
25356; BITALG_NOVLX:       # %bb.0:
25357; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25358; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
25359; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25360; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25361; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [24,24]
25362; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25363; BITALG_NOVLX-NEXT:    vzeroupper
25364; BITALG_NOVLX-NEXT:    retq
25365;
25366; BITALG-LABEL: ult_24_v2i64:
25367; BITALG:       # %bb.0:
25368; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
25369; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25370; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25371; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
25372; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25373; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25374; BITALG-NEXT:    retq
25375  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25376  %3 = icmp ult <2 x i64> %2, <i64 24, i64 24>
25377  %4 = sext <2 x i1> %3 to <2 x i64>
25378  ret <2 x i64> %4
25379}
25380
25381define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
25382; SSE2-LABEL: ugt_24_v2i64:
25383; SSE2:       # %bb.0:
25384; SSE2-NEXT:    movdqa %xmm0, %xmm1
25385; SSE2-NEXT:    psrlw $1, %xmm1
25386; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25387; SSE2-NEXT:    psubb %xmm1, %xmm0
25388; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25389; SSE2-NEXT:    movdqa %xmm0, %xmm2
25390; SSE2-NEXT:    pand %xmm1, %xmm2
25391; SSE2-NEXT:    psrlw $2, %xmm0
25392; SSE2-NEXT:    pand %xmm1, %xmm0
25393; SSE2-NEXT:    paddb %xmm2, %xmm0
25394; SSE2-NEXT:    movdqa %xmm0, %xmm1
25395; SSE2-NEXT:    psrlw $4, %xmm1
25396; SSE2-NEXT:    paddb %xmm0, %xmm1
25397; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25398; SSE2-NEXT:    pxor %xmm0, %xmm0
25399; SSE2-NEXT:    psadbw %xmm1, %xmm0
25400; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
25401; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25402; SSE2-NEXT:    movdqa %xmm0, %xmm2
25403; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
25404; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25405; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
25406; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25407; SSE2-NEXT:    pand %xmm3, %xmm1
25408; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25409; SSE2-NEXT:    por %xmm1, %xmm0
25410; SSE2-NEXT:    retq
25411;
25412; SSE3-LABEL: ugt_24_v2i64:
25413; SSE3:       # %bb.0:
25414; SSE3-NEXT:    movdqa %xmm0, %xmm1
25415; SSE3-NEXT:    psrlw $1, %xmm1
25416; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25417; SSE3-NEXT:    psubb %xmm1, %xmm0
25418; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25419; SSE3-NEXT:    movdqa %xmm0, %xmm2
25420; SSE3-NEXT:    pand %xmm1, %xmm2
25421; SSE3-NEXT:    psrlw $2, %xmm0
25422; SSE3-NEXT:    pand %xmm1, %xmm0
25423; SSE3-NEXT:    paddb %xmm2, %xmm0
25424; SSE3-NEXT:    movdqa %xmm0, %xmm1
25425; SSE3-NEXT:    psrlw $4, %xmm1
25426; SSE3-NEXT:    paddb %xmm0, %xmm1
25427; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25428; SSE3-NEXT:    pxor %xmm0, %xmm0
25429; SSE3-NEXT:    psadbw %xmm1, %xmm0
25430; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
25431; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25432; SSE3-NEXT:    movdqa %xmm0, %xmm2
25433; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
25434; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25435; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25436; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25437; SSE3-NEXT:    pand %xmm3, %xmm1
25438; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25439; SSE3-NEXT:    por %xmm1, %xmm0
25440; SSE3-NEXT:    retq
25441;
25442; SSSE3-LABEL: ugt_24_v2i64:
25443; SSSE3:       # %bb.0:
25444; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25445; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25446; SSSE3-NEXT:    pand %xmm1, %xmm2
25447; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25448; SSSE3-NEXT:    movdqa %xmm3, %xmm4
25449; SSSE3-NEXT:    pshufb %xmm2, %xmm4
25450; SSSE3-NEXT:    psrlw $4, %xmm0
25451; SSSE3-NEXT:    pand %xmm1, %xmm0
25452; SSSE3-NEXT:    pshufb %xmm0, %xmm3
25453; SSSE3-NEXT:    paddb %xmm4, %xmm3
25454; SSSE3-NEXT:    pxor %xmm0, %xmm0
25455; SSSE3-NEXT:    psadbw %xmm3, %xmm0
25456; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
25457; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25458; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25459; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
25460; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25461; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25462; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25463; SSSE3-NEXT:    pand %xmm3, %xmm1
25464; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25465; SSSE3-NEXT:    por %xmm1, %xmm0
25466; SSSE3-NEXT:    retq
25467;
25468; SSE41-LABEL: ugt_24_v2i64:
25469; SSE41:       # %bb.0:
25470; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25471; SSE41-NEXT:    movdqa %xmm0, %xmm2
25472; SSE41-NEXT:    pand %xmm1, %xmm2
25473; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25474; SSE41-NEXT:    movdqa %xmm3, %xmm4
25475; SSE41-NEXT:    pshufb %xmm2, %xmm4
25476; SSE41-NEXT:    psrlw $4, %xmm0
25477; SSE41-NEXT:    pand %xmm1, %xmm0
25478; SSE41-NEXT:    pshufb %xmm0, %xmm3
25479; SSE41-NEXT:    paddb %xmm4, %xmm3
25480; SSE41-NEXT:    pxor %xmm0, %xmm0
25481; SSE41-NEXT:    psadbw %xmm3, %xmm0
25482; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
25483; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483672,2147483672]
25484; SSE41-NEXT:    movdqa %xmm0, %xmm2
25485; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
25486; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25487; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
25488; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25489; SSE41-NEXT:    pand %xmm3, %xmm1
25490; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25491; SSE41-NEXT:    por %xmm1, %xmm0
25492; SSE41-NEXT:    retq
25493;
25494; AVX1-LABEL: ugt_24_v2i64:
25495; AVX1:       # %bb.0:
25496; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25497; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
25498; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25499; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25500; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
25501; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
25502; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25503; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25504; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25505; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25506; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25507; AVX1-NEXT:    retq
25508;
25509; AVX2-LABEL: ugt_24_v2i64:
25510; AVX2:       # %bb.0:
25511; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25512; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
25513; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25514; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25515; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
25516; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
25517; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25518; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25519; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25520; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25521; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25522; AVX2-NEXT:    retq
25523;
25524; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64:
25525; AVX512VPOPCNTDQ:       # %bb.0:
25526; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25527; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
25528; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25529; AVX512VPOPCNTDQ-NEXT:    vzeroupper
25530; AVX512VPOPCNTDQ-NEXT:    retq
25531;
25532; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64:
25533; AVX512VPOPCNTDQVL:       # %bb.0:
25534; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
25535; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
25536; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25537; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25538; AVX512VPOPCNTDQVL-NEXT:    retq
25539;
25540; BITALG_NOVLX-LABEL: ugt_24_v2i64:
25541; BITALG_NOVLX:       # %bb.0:
25542; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25543; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
25544; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25545; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25546; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25547; BITALG_NOVLX-NEXT:    vzeroupper
25548; BITALG_NOVLX-NEXT:    retq
25549;
25550; BITALG-LABEL: ugt_24_v2i64:
25551; BITALG:       # %bb.0:
25552; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
25553; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25554; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25555; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
25556; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25557; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25558; BITALG-NEXT:    retq
25559  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25560  %3 = icmp ugt <2 x i64> %2, <i64 24, i64 24>
25561  %4 = sext <2 x i1> %3 to <2 x i64>
25562  ret <2 x i64> %4
25563}
25564
25565define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
25566; SSE2-LABEL: ult_25_v2i64:
25567; SSE2:       # %bb.0:
25568; SSE2-NEXT:    movdqa %xmm0, %xmm1
25569; SSE2-NEXT:    psrlw $1, %xmm1
25570; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25571; SSE2-NEXT:    psubb %xmm1, %xmm0
25572; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25573; SSE2-NEXT:    movdqa %xmm0, %xmm2
25574; SSE2-NEXT:    pand %xmm1, %xmm2
25575; SSE2-NEXT:    psrlw $2, %xmm0
25576; SSE2-NEXT:    pand %xmm1, %xmm0
25577; SSE2-NEXT:    paddb %xmm2, %xmm0
25578; SSE2-NEXT:    movdqa %xmm0, %xmm1
25579; SSE2-NEXT:    psrlw $4, %xmm1
25580; SSE2-NEXT:    paddb %xmm0, %xmm1
25581; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25582; SSE2-NEXT:    pxor %xmm0, %xmm0
25583; SSE2-NEXT:    psadbw %xmm1, %xmm0
25584; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
25585; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25586; SSE2-NEXT:    movdqa %xmm1, %xmm2
25587; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
25588; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25589; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
25590; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25591; SSE2-NEXT:    pand %xmm3, %xmm1
25592; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25593; SSE2-NEXT:    por %xmm1, %xmm0
25594; SSE2-NEXT:    retq
25595;
25596; SSE3-LABEL: ult_25_v2i64:
25597; SSE3:       # %bb.0:
25598; SSE3-NEXT:    movdqa %xmm0, %xmm1
25599; SSE3-NEXT:    psrlw $1, %xmm1
25600; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25601; SSE3-NEXT:    psubb %xmm1, %xmm0
25602; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25603; SSE3-NEXT:    movdqa %xmm0, %xmm2
25604; SSE3-NEXT:    pand %xmm1, %xmm2
25605; SSE3-NEXT:    psrlw $2, %xmm0
25606; SSE3-NEXT:    pand %xmm1, %xmm0
25607; SSE3-NEXT:    paddb %xmm2, %xmm0
25608; SSE3-NEXT:    movdqa %xmm0, %xmm1
25609; SSE3-NEXT:    psrlw $4, %xmm1
25610; SSE3-NEXT:    paddb %xmm0, %xmm1
25611; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25612; SSE3-NEXT:    pxor %xmm0, %xmm0
25613; SSE3-NEXT:    psadbw %xmm1, %xmm0
25614; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
25615; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25616; SSE3-NEXT:    movdqa %xmm1, %xmm2
25617; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
25618; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25619; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25620; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25621; SSE3-NEXT:    pand %xmm3, %xmm1
25622; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25623; SSE3-NEXT:    por %xmm1, %xmm0
25624; SSE3-NEXT:    retq
25625;
25626; SSSE3-LABEL: ult_25_v2i64:
25627; SSSE3:       # %bb.0:
25628; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25629; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25630; SSSE3-NEXT:    pand %xmm1, %xmm2
25631; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25632; SSSE3-NEXT:    movdqa %xmm3, %xmm4
25633; SSSE3-NEXT:    pshufb %xmm2, %xmm4
25634; SSSE3-NEXT:    psrlw $4, %xmm0
25635; SSSE3-NEXT:    pand %xmm1, %xmm0
25636; SSSE3-NEXT:    pshufb %xmm0, %xmm3
25637; SSSE3-NEXT:    paddb %xmm4, %xmm3
25638; SSSE3-NEXT:    pxor %xmm0, %xmm0
25639; SSSE3-NEXT:    psadbw %xmm3, %xmm0
25640; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
25641; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25642; SSSE3-NEXT:    movdqa %xmm1, %xmm2
25643; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
25644; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25645; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25646; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25647; SSSE3-NEXT:    pand %xmm3, %xmm1
25648; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25649; SSSE3-NEXT:    por %xmm1, %xmm0
25650; SSSE3-NEXT:    retq
25651;
25652; SSE41-LABEL: ult_25_v2i64:
25653; SSE41:       # %bb.0:
25654; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25655; SSE41-NEXT:    movdqa %xmm0, %xmm2
25656; SSE41-NEXT:    pand %xmm1, %xmm2
25657; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25658; SSE41-NEXT:    movdqa %xmm3, %xmm4
25659; SSE41-NEXT:    pshufb %xmm2, %xmm4
25660; SSE41-NEXT:    psrlw $4, %xmm0
25661; SSE41-NEXT:    pand %xmm1, %xmm0
25662; SSE41-NEXT:    pshufb %xmm0, %xmm3
25663; SSE41-NEXT:    paddb %xmm4, %xmm3
25664; SSE41-NEXT:    pxor %xmm0, %xmm0
25665; SSE41-NEXT:    psadbw %xmm3, %xmm0
25666; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
25667; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25668; SSE41-NEXT:    movdqa %xmm1, %xmm2
25669; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
25670; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25671; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
25672; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25673; SSE41-NEXT:    pand %xmm3, %xmm1
25674; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25675; SSE41-NEXT:    por %xmm1, %xmm0
25676; SSE41-NEXT:    retq
25677;
25678; AVX1-LABEL: ult_25_v2i64:
25679; AVX1:       # %bb.0:
25680; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25681; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
25682; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25683; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25684; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
25685; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
25686; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25687; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25688; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25689; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25690; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [25,25]
25691; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25692; AVX1-NEXT:    retq
25693;
25694; AVX2-LABEL: ult_25_v2i64:
25695; AVX2:       # %bb.0:
25696; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25697; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
25698; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25699; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25700; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
25701; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
25702; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25703; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25704; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25705; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25706; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [25,25]
25707; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25708; AVX2-NEXT:    retq
25709;
25710; AVX512VPOPCNTDQ-LABEL: ult_25_v2i64:
25711; AVX512VPOPCNTDQ:       # %bb.0:
25712; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25713; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
25714; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [25,25]
25715; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25716; AVX512VPOPCNTDQ-NEXT:    vzeroupper
25717; AVX512VPOPCNTDQ-NEXT:    retq
25718;
25719; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64:
25720; AVX512VPOPCNTDQVL:       # %bb.0:
25721; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
25722; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
25723; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25724; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25725; AVX512VPOPCNTDQVL-NEXT:    retq
25726;
25727; BITALG_NOVLX-LABEL: ult_25_v2i64:
25728; BITALG_NOVLX:       # %bb.0:
25729; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25730; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
25731; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25732; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25733; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [25,25]
25734; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
25735; BITALG_NOVLX-NEXT:    vzeroupper
25736; BITALG_NOVLX-NEXT:    retq
25737;
25738; BITALG-LABEL: ult_25_v2i64:
25739; BITALG:       # %bb.0:
25740; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
25741; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25742; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25743; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
25744; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25745; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25746; BITALG-NEXT:    retq
25747  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25748  %3 = icmp ult <2 x i64> %2, <i64 25, i64 25>
25749  %4 = sext <2 x i1> %3 to <2 x i64>
25750  ret <2 x i64> %4
25751}
25752
25753define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
25754; SSE2-LABEL: ugt_25_v2i64:
25755; SSE2:       # %bb.0:
25756; SSE2-NEXT:    movdqa %xmm0, %xmm1
25757; SSE2-NEXT:    psrlw $1, %xmm1
25758; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25759; SSE2-NEXT:    psubb %xmm1, %xmm0
25760; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25761; SSE2-NEXT:    movdqa %xmm0, %xmm2
25762; SSE2-NEXT:    pand %xmm1, %xmm2
25763; SSE2-NEXT:    psrlw $2, %xmm0
25764; SSE2-NEXT:    pand %xmm1, %xmm0
25765; SSE2-NEXT:    paddb %xmm2, %xmm0
25766; SSE2-NEXT:    movdqa %xmm0, %xmm1
25767; SSE2-NEXT:    psrlw $4, %xmm1
25768; SSE2-NEXT:    paddb %xmm0, %xmm1
25769; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25770; SSE2-NEXT:    pxor %xmm0, %xmm0
25771; SSE2-NEXT:    psadbw %xmm1, %xmm0
25772; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
25773; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25774; SSE2-NEXT:    movdqa %xmm0, %xmm2
25775; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
25776; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25777; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
25778; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25779; SSE2-NEXT:    pand %xmm3, %xmm1
25780; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25781; SSE2-NEXT:    por %xmm1, %xmm0
25782; SSE2-NEXT:    retq
25783;
25784; SSE3-LABEL: ugt_25_v2i64:
25785; SSE3:       # %bb.0:
25786; SSE3-NEXT:    movdqa %xmm0, %xmm1
25787; SSE3-NEXT:    psrlw $1, %xmm1
25788; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25789; SSE3-NEXT:    psubb %xmm1, %xmm0
25790; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25791; SSE3-NEXT:    movdqa %xmm0, %xmm2
25792; SSE3-NEXT:    pand %xmm1, %xmm2
25793; SSE3-NEXT:    psrlw $2, %xmm0
25794; SSE3-NEXT:    pand %xmm1, %xmm0
25795; SSE3-NEXT:    paddb %xmm2, %xmm0
25796; SSE3-NEXT:    movdqa %xmm0, %xmm1
25797; SSE3-NEXT:    psrlw $4, %xmm1
25798; SSE3-NEXT:    paddb %xmm0, %xmm1
25799; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25800; SSE3-NEXT:    pxor %xmm0, %xmm0
25801; SSE3-NEXT:    psadbw %xmm1, %xmm0
25802; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
25803; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25804; SSE3-NEXT:    movdqa %xmm0, %xmm2
25805; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
25806; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25807; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25808; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25809; SSE3-NEXT:    pand %xmm3, %xmm1
25810; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25811; SSE3-NEXT:    por %xmm1, %xmm0
25812; SSE3-NEXT:    retq
25813;
25814; SSSE3-LABEL: ugt_25_v2i64:
25815; SSSE3:       # %bb.0:
25816; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25817; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25818; SSSE3-NEXT:    pand %xmm1, %xmm2
25819; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25820; SSSE3-NEXT:    movdqa %xmm3, %xmm4
25821; SSSE3-NEXT:    pshufb %xmm2, %xmm4
25822; SSSE3-NEXT:    psrlw $4, %xmm0
25823; SSSE3-NEXT:    pand %xmm1, %xmm0
25824; SSSE3-NEXT:    pshufb %xmm0, %xmm3
25825; SSSE3-NEXT:    paddb %xmm4, %xmm3
25826; SSSE3-NEXT:    pxor %xmm0, %xmm0
25827; SSSE3-NEXT:    psadbw %xmm3, %xmm0
25828; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
25829; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25830; SSSE3-NEXT:    movdqa %xmm0, %xmm2
25831; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
25832; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25833; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25834; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25835; SSSE3-NEXT:    pand %xmm3, %xmm1
25836; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25837; SSSE3-NEXT:    por %xmm1, %xmm0
25838; SSSE3-NEXT:    retq
25839;
25840; SSE41-LABEL: ugt_25_v2i64:
25841; SSE41:       # %bb.0:
25842; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25843; SSE41-NEXT:    movdqa %xmm0, %xmm2
25844; SSE41-NEXT:    pand %xmm1, %xmm2
25845; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25846; SSE41-NEXT:    movdqa %xmm3, %xmm4
25847; SSE41-NEXT:    pshufb %xmm2, %xmm4
25848; SSE41-NEXT:    psrlw $4, %xmm0
25849; SSE41-NEXT:    pand %xmm1, %xmm0
25850; SSE41-NEXT:    pshufb %xmm0, %xmm3
25851; SSE41-NEXT:    paddb %xmm4, %xmm3
25852; SSE41-NEXT:    pxor %xmm0, %xmm0
25853; SSE41-NEXT:    psadbw %xmm3, %xmm0
25854; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
25855; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483673,2147483673]
25856; SSE41-NEXT:    movdqa %xmm0, %xmm2
25857; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
25858; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25859; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
25860; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25861; SSE41-NEXT:    pand %xmm3, %xmm1
25862; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25863; SSE41-NEXT:    por %xmm1, %xmm0
25864; SSE41-NEXT:    retq
25865;
25866; AVX1-LABEL: ugt_25_v2i64:
25867; AVX1:       # %bb.0:
25868; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25869; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
25870; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25871; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25872; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
25873; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
25874; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25875; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25876; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25877; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25878; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25879; AVX1-NEXT:    retq
25880;
25881; AVX2-LABEL: ugt_25_v2i64:
25882; AVX2:       # %bb.0:
25883; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25884; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
25885; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25886; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
25887; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
25888; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
25889; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
25890; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
25891; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25892; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25893; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25894; AVX2-NEXT:    retq
25895;
25896; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64:
25897; AVX512VPOPCNTDQ:       # %bb.0:
25898; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25899; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
25900; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25901; AVX512VPOPCNTDQ-NEXT:    vzeroupper
25902; AVX512VPOPCNTDQ-NEXT:    retq
25903;
25904; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64:
25905; AVX512VPOPCNTDQVL:       # %bb.0:
25906; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
25907; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
25908; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25909; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25910; AVX512VPOPCNTDQVL-NEXT:    retq
25911;
25912; BITALG_NOVLX-LABEL: ugt_25_v2i64:
25913; BITALG_NOVLX:       # %bb.0:
25914; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
25915; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
25916; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25917; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25918; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
25919; BITALG_NOVLX-NEXT:    vzeroupper
25920; BITALG_NOVLX-NEXT:    retq
25921;
25922; BITALG-LABEL: ugt_25_v2i64:
25923; BITALG:       # %bb.0:
25924; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
25925; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25926; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
25927; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
25928; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
25929; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25930; BITALG-NEXT:    retq
25931  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25932  %3 = icmp ugt <2 x i64> %2, <i64 25, i64 25>
25933  %4 = sext <2 x i1> %3 to <2 x i64>
25934  ret <2 x i64> %4
25935}
25936
25937define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
25938; SSE2-LABEL: ult_26_v2i64:
25939; SSE2:       # %bb.0:
25940; SSE2-NEXT:    movdqa %xmm0, %xmm1
25941; SSE2-NEXT:    psrlw $1, %xmm1
25942; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25943; SSE2-NEXT:    psubb %xmm1, %xmm0
25944; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25945; SSE2-NEXT:    movdqa %xmm0, %xmm2
25946; SSE2-NEXT:    pand %xmm1, %xmm2
25947; SSE2-NEXT:    psrlw $2, %xmm0
25948; SSE2-NEXT:    pand %xmm1, %xmm0
25949; SSE2-NEXT:    paddb %xmm2, %xmm0
25950; SSE2-NEXT:    movdqa %xmm0, %xmm1
25951; SSE2-NEXT:    psrlw $4, %xmm1
25952; SSE2-NEXT:    paddb %xmm0, %xmm1
25953; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
25954; SSE2-NEXT:    pxor %xmm0, %xmm0
25955; SSE2-NEXT:    psadbw %xmm1, %xmm0
25956; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
25957; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
25958; SSE2-NEXT:    movdqa %xmm1, %xmm2
25959; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
25960; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25961; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
25962; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25963; SSE2-NEXT:    pand %xmm3, %xmm1
25964; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25965; SSE2-NEXT:    por %xmm1, %xmm0
25966; SSE2-NEXT:    retq
25967;
25968; SSE3-LABEL: ult_26_v2i64:
25969; SSE3:       # %bb.0:
25970; SSE3-NEXT:    movdqa %xmm0, %xmm1
25971; SSE3-NEXT:    psrlw $1, %xmm1
25972; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25973; SSE3-NEXT:    psubb %xmm1, %xmm0
25974; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25975; SSE3-NEXT:    movdqa %xmm0, %xmm2
25976; SSE3-NEXT:    pand %xmm1, %xmm2
25977; SSE3-NEXT:    psrlw $2, %xmm0
25978; SSE3-NEXT:    pand %xmm1, %xmm0
25979; SSE3-NEXT:    paddb %xmm2, %xmm0
25980; SSE3-NEXT:    movdqa %xmm0, %xmm1
25981; SSE3-NEXT:    psrlw $4, %xmm1
25982; SSE3-NEXT:    paddb %xmm0, %xmm1
25983; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
25984; SSE3-NEXT:    pxor %xmm0, %xmm0
25985; SSE3-NEXT:    psadbw %xmm1, %xmm0
25986; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
25987; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
25988; SSE3-NEXT:    movdqa %xmm1, %xmm2
25989; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
25990; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
25991; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
25992; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
25993; SSE3-NEXT:    pand %xmm3, %xmm1
25994; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
25995; SSE3-NEXT:    por %xmm1, %xmm0
25996; SSE3-NEXT:    retq
25997;
25998; SSSE3-LABEL: ult_26_v2i64:
25999; SSSE3:       # %bb.0:
26000; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26001; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26002; SSSE3-NEXT:    pand %xmm1, %xmm2
26003; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26004; SSSE3-NEXT:    movdqa %xmm3, %xmm4
26005; SSSE3-NEXT:    pshufb %xmm2, %xmm4
26006; SSSE3-NEXT:    psrlw $4, %xmm0
26007; SSSE3-NEXT:    pand %xmm1, %xmm0
26008; SSSE3-NEXT:    pshufb %xmm0, %xmm3
26009; SSSE3-NEXT:    paddb %xmm4, %xmm3
26010; SSSE3-NEXT:    pxor %xmm0, %xmm0
26011; SSSE3-NEXT:    psadbw %xmm3, %xmm0
26012; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
26013; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
26014; SSSE3-NEXT:    movdqa %xmm1, %xmm2
26015; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
26016; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26017; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26018; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26019; SSSE3-NEXT:    pand %xmm3, %xmm1
26020; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26021; SSSE3-NEXT:    por %xmm1, %xmm0
26022; SSSE3-NEXT:    retq
26023;
26024; SSE41-LABEL: ult_26_v2i64:
26025; SSE41:       # %bb.0:
26026; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26027; SSE41-NEXT:    movdqa %xmm0, %xmm2
26028; SSE41-NEXT:    pand %xmm1, %xmm2
26029; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26030; SSE41-NEXT:    movdqa %xmm3, %xmm4
26031; SSE41-NEXT:    pshufb %xmm2, %xmm4
26032; SSE41-NEXT:    psrlw $4, %xmm0
26033; SSE41-NEXT:    pand %xmm1, %xmm0
26034; SSE41-NEXT:    pshufb %xmm0, %xmm3
26035; SSE41-NEXT:    paddb %xmm4, %xmm3
26036; SSE41-NEXT:    pxor %xmm0, %xmm0
26037; SSE41-NEXT:    psadbw %xmm3, %xmm0
26038; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
26039; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
26040; SSE41-NEXT:    movdqa %xmm1, %xmm2
26041; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
26042; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26043; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
26044; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26045; SSE41-NEXT:    pand %xmm3, %xmm1
26046; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26047; SSE41-NEXT:    por %xmm1, %xmm0
26048; SSE41-NEXT:    retq
26049;
26050; AVX1-LABEL: ult_26_v2i64:
26051; AVX1:       # %bb.0:
26052; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26053; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
26054; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26055; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26056; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
26057; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
26058; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26059; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26060; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26061; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26062; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [26,26]
26063; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26064; AVX1-NEXT:    retq
26065;
26066; AVX2-LABEL: ult_26_v2i64:
26067; AVX2:       # %bb.0:
26068; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26069; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
26070; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26071; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26072; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
26073; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
26074; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26075; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26076; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26077; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26078; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [26,26]
26079; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26080; AVX2-NEXT:    retq
26081;
26082; AVX512VPOPCNTDQ-LABEL: ult_26_v2i64:
26083; AVX512VPOPCNTDQ:       # %bb.0:
26084; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26085; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
26086; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [26,26]
26087; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26088; AVX512VPOPCNTDQ-NEXT:    vzeroupper
26089; AVX512VPOPCNTDQ-NEXT:    retq
26090;
26091; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64:
26092; AVX512VPOPCNTDQVL:       # %bb.0:
26093; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
26094; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
26095; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26096; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26097; AVX512VPOPCNTDQVL-NEXT:    retq
26098;
26099; BITALG_NOVLX-LABEL: ult_26_v2i64:
26100; BITALG_NOVLX:       # %bb.0:
26101; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26102; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
26103; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26104; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26105; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [26,26]
26106; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26107; BITALG_NOVLX-NEXT:    vzeroupper
26108; BITALG_NOVLX-NEXT:    retq
26109;
26110; BITALG-LABEL: ult_26_v2i64:
26111; BITALG:       # %bb.0:
26112; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
26113; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26114; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26115; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
26116; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26117; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26118; BITALG-NEXT:    retq
26119  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26120  %3 = icmp ult <2 x i64> %2, <i64 26, i64 26>
26121  %4 = sext <2 x i1> %3 to <2 x i64>
26122  ret <2 x i64> %4
26123}
26124
26125define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
26126; SSE2-LABEL: ugt_26_v2i64:
26127; SSE2:       # %bb.0:
26128; SSE2-NEXT:    movdqa %xmm0, %xmm1
26129; SSE2-NEXT:    psrlw $1, %xmm1
26130; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26131; SSE2-NEXT:    psubb %xmm1, %xmm0
26132; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26133; SSE2-NEXT:    movdqa %xmm0, %xmm2
26134; SSE2-NEXT:    pand %xmm1, %xmm2
26135; SSE2-NEXT:    psrlw $2, %xmm0
26136; SSE2-NEXT:    pand %xmm1, %xmm0
26137; SSE2-NEXT:    paddb %xmm2, %xmm0
26138; SSE2-NEXT:    movdqa %xmm0, %xmm1
26139; SSE2-NEXT:    psrlw $4, %xmm1
26140; SSE2-NEXT:    paddb %xmm0, %xmm1
26141; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26142; SSE2-NEXT:    pxor %xmm0, %xmm0
26143; SSE2-NEXT:    psadbw %xmm1, %xmm0
26144; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
26145; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
26146; SSE2-NEXT:    movdqa %xmm0, %xmm2
26147; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
26148; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26149; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
26150; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26151; SSE2-NEXT:    pand %xmm3, %xmm1
26152; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26153; SSE2-NEXT:    por %xmm1, %xmm0
26154; SSE2-NEXT:    retq
26155;
26156; SSE3-LABEL: ugt_26_v2i64:
26157; SSE3:       # %bb.0:
26158; SSE3-NEXT:    movdqa %xmm0, %xmm1
26159; SSE3-NEXT:    psrlw $1, %xmm1
26160; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26161; SSE3-NEXT:    psubb %xmm1, %xmm0
26162; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26163; SSE3-NEXT:    movdqa %xmm0, %xmm2
26164; SSE3-NEXT:    pand %xmm1, %xmm2
26165; SSE3-NEXT:    psrlw $2, %xmm0
26166; SSE3-NEXT:    pand %xmm1, %xmm0
26167; SSE3-NEXT:    paddb %xmm2, %xmm0
26168; SSE3-NEXT:    movdqa %xmm0, %xmm1
26169; SSE3-NEXT:    psrlw $4, %xmm1
26170; SSE3-NEXT:    paddb %xmm0, %xmm1
26171; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26172; SSE3-NEXT:    pxor %xmm0, %xmm0
26173; SSE3-NEXT:    psadbw %xmm1, %xmm0
26174; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
26175; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
26176; SSE3-NEXT:    movdqa %xmm0, %xmm2
26177; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
26178; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26179; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26180; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26181; SSE3-NEXT:    pand %xmm3, %xmm1
26182; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26183; SSE3-NEXT:    por %xmm1, %xmm0
26184; SSE3-NEXT:    retq
26185;
26186; SSSE3-LABEL: ugt_26_v2i64:
26187; SSSE3:       # %bb.0:
26188; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26189; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26190; SSSE3-NEXT:    pand %xmm1, %xmm2
26191; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26192; SSSE3-NEXT:    movdqa %xmm3, %xmm4
26193; SSSE3-NEXT:    pshufb %xmm2, %xmm4
26194; SSSE3-NEXT:    psrlw $4, %xmm0
26195; SSSE3-NEXT:    pand %xmm1, %xmm0
26196; SSSE3-NEXT:    pshufb %xmm0, %xmm3
26197; SSSE3-NEXT:    paddb %xmm4, %xmm3
26198; SSSE3-NEXT:    pxor %xmm0, %xmm0
26199; SSSE3-NEXT:    psadbw %xmm3, %xmm0
26200; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
26201; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
26202; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26203; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
26204; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26205; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26206; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26207; SSSE3-NEXT:    pand %xmm3, %xmm1
26208; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26209; SSSE3-NEXT:    por %xmm1, %xmm0
26210; SSSE3-NEXT:    retq
26211;
26212; SSE41-LABEL: ugt_26_v2i64:
26213; SSE41:       # %bb.0:
26214; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26215; SSE41-NEXT:    movdqa %xmm0, %xmm2
26216; SSE41-NEXT:    pand %xmm1, %xmm2
26217; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26218; SSE41-NEXT:    movdqa %xmm3, %xmm4
26219; SSE41-NEXT:    pshufb %xmm2, %xmm4
26220; SSE41-NEXT:    psrlw $4, %xmm0
26221; SSE41-NEXT:    pand %xmm1, %xmm0
26222; SSE41-NEXT:    pshufb %xmm0, %xmm3
26223; SSE41-NEXT:    paddb %xmm4, %xmm3
26224; SSE41-NEXT:    pxor %xmm0, %xmm0
26225; SSE41-NEXT:    psadbw %xmm3, %xmm0
26226; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
26227; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483674,2147483674]
26228; SSE41-NEXT:    movdqa %xmm0, %xmm2
26229; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
26230; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26231; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
26232; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26233; SSE41-NEXT:    pand %xmm3, %xmm1
26234; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26235; SSE41-NEXT:    por %xmm1, %xmm0
26236; SSE41-NEXT:    retq
26237;
26238; AVX1-LABEL: ugt_26_v2i64:
26239; AVX1:       # %bb.0:
26240; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26241; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
26242; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26243; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26244; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
26245; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
26246; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26247; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26248; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26249; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26250; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26251; AVX1-NEXT:    retq
26252;
26253; AVX2-LABEL: ugt_26_v2i64:
26254; AVX2:       # %bb.0:
26255; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26256; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
26257; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26258; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26259; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
26260; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
26261; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26262; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26263; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26264; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26265; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26266; AVX2-NEXT:    retq
26267;
26268; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64:
26269; AVX512VPOPCNTDQ:       # %bb.0:
26270; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26271; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
26272; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26273; AVX512VPOPCNTDQ-NEXT:    vzeroupper
26274; AVX512VPOPCNTDQ-NEXT:    retq
26275;
26276; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64:
26277; AVX512VPOPCNTDQVL:       # %bb.0:
26278; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
26279; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
26280; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26281; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26282; AVX512VPOPCNTDQVL-NEXT:    retq
26283;
26284; BITALG_NOVLX-LABEL: ugt_26_v2i64:
26285; BITALG_NOVLX:       # %bb.0:
26286; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26287; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
26288; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26289; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26290; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26291; BITALG_NOVLX-NEXT:    vzeroupper
26292; BITALG_NOVLX-NEXT:    retq
26293;
26294; BITALG-LABEL: ugt_26_v2i64:
26295; BITALG:       # %bb.0:
26296; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
26297; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26298; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26299; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
26300; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26301; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26302; BITALG-NEXT:    retq
26303  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26304  %3 = icmp ugt <2 x i64> %2, <i64 26, i64 26>
26305  %4 = sext <2 x i1> %3 to <2 x i64>
26306  ret <2 x i64> %4
26307}
26308
26309define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
26310; SSE2-LABEL: ult_27_v2i64:
26311; SSE2:       # %bb.0:
26312; SSE2-NEXT:    movdqa %xmm0, %xmm1
26313; SSE2-NEXT:    psrlw $1, %xmm1
26314; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26315; SSE2-NEXT:    psubb %xmm1, %xmm0
26316; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26317; SSE2-NEXT:    movdqa %xmm0, %xmm2
26318; SSE2-NEXT:    pand %xmm1, %xmm2
26319; SSE2-NEXT:    psrlw $2, %xmm0
26320; SSE2-NEXT:    pand %xmm1, %xmm0
26321; SSE2-NEXT:    paddb %xmm2, %xmm0
26322; SSE2-NEXT:    movdqa %xmm0, %xmm1
26323; SSE2-NEXT:    psrlw $4, %xmm1
26324; SSE2-NEXT:    paddb %xmm0, %xmm1
26325; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26326; SSE2-NEXT:    pxor %xmm0, %xmm0
26327; SSE2-NEXT:    psadbw %xmm1, %xmm0
26328; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
26329; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26330; SSE2-NEXT:    movdqa %xmm1, %xmm2
26331; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
26332; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26333; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
26334; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26335; SSE2-NEXT:    pand %xmm3, %xmm1
26336; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26337; SSE2-NEXT:    por %xmm1, %xmm0
26338; SSE2-NEXT:    retq
26339;
26340; SSE3-LABEL: ult_27_v2i64:
26341; SSE3:       # %bb.0:
26342; SSE3-NEXT:    movdqa %xmm0, %xmm1
26343; SSE3-NEXT:    psrlw $1, %xmm1
26344; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26345; SSE3-NEXT:    psubb %xmm1, %xmm0
26346; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26347; SSE3-NEXT:    movdqa %xmm0, %xmm2
26348; SSE3-NEXT:    pand %xmm1, %xmm2
26349; SSE3-NEXT:    psrlw $2, %xmm0
26350; SSE3-NEXT:    pand %xmm1, %xmm0
26351; SSE3-NEXT:    paddb %xmm2, %xmm0
26352; SSE3-NEXT:    movdqa %xmm0, %xmm1
26353; SSE3-NEXT:    psrlw $4, %xmm1
26354; SSE3-NEXT:    paddb %xmm0, %xmm1
26355; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26356; SSE3-NEXT:    pxor %xmm0, %xmm0
26357; SSE3-NEXT:    psadbw %xmm1, %xmm0
26358; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
26359; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26360; SSE3-NEXT:    movdqa %xmm1, %xmm2
26361; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
26362; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26363; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26364; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26365; SSE3-NEXT:    pand %xmm3, %xmm1
26366; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26367; SSE3-NEXT:    por %xmm1, %xmm0
26368; SSE3-NEXT:    retq
26369;
26370; SSSE3-LABEL: ult_27_v2i64:
26371; SSSE3:       # %bb.0:
26372; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26373; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26374; SSSE3-NEXT:    pand %xmm1, %xmm2
26375; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26376; SSSE3-NEXT:    movdqa %xmm3, %xmm4
26377; SSSE3-NEXT:    pshufb %xmm2, %xmm4
26378; SSSE3-NEXT:    psrlw $4, %xmm0
26379; SSSE3-NEXT:    pand %xmm1, %xmm0
26380; SSSE3-NEXT:    pshufb %xmm0, %xmm3
26381; SSSE3-NEXT:    paddb %xmm4, %xmm3
26382; SSSE3-NEXT:    pxor %xmm0, %xmm0
26383; SSSE3-NEXT:    psadbw %xmm3, %xmm0
26384; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
26385; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26386; SSSE3-NEXT:    movdqa %xmm1, %xmm2
26387; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
26388; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26389; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26390; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26391; SSSE3-NEXT:    pand %xmm3, %xmm1
26392; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26393; SSSE3-NEXT:    por %xmm1, %xmm0
26394; SSSE3-NEXT:    retq
26395;
26396; SSE41-LABEL: ult_27_v2i64:
26397; SSE41:       # %bb.0:
26398; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26399; SSE41-NEXT:    movdqa %xmm0, %xmm2
26400; SSE41-NEXT:    pand %xmm1, %xmm2
26401; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26402; SSE41-NEXT:    movdqa %xmm3, %xmm4
26403; SSE41-NEXT:    pshufb %xmm2, %xmm4
26404; SSE41-NEXT:    psrlw $4, %xmm0
26405; SSE41-NEXT:    pand %xmm1, %xmm0
26406; SSE41-NEXT:    pshufb %xmm0, %xmm3
26407; SSE41-NEXT:    paddb %xmm4, %xmm3
26408; SSE41-NEXT:    pxor %xmm0, %xmm0
26409; SSE41-NEXT:    psadbw %xmm3, %xmm0
26410; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
26411; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26412; SSE41-NEXT:    movdqa %xmm1, %xmm2
26413; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
26414; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26415; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
26416; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26417; SSE41-NEXT:    pand %xmm3, %xmm1
26418; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26419; SSE41-NEXT:    por %xmm1, %xmm0
26420; SSE41-NEXT:    retq
26421;
26422; AVX1-LABEL: ult_27_v2i64:
26423; AVX1:       # %bb.0:
26424; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26425; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
26426; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26427; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26428; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
26429; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
26430; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26431; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26432; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26433; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26434; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [27,27]
26435; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26436; AVX1-NEXT:    retq
26437;
26438; AVX2-LABEL: ult_27_v2i64:
26439; AVX2:       # %bb.0:
26440; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26441; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
26442; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26443; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26444; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
26445; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
26446; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26447; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26448; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26449; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26450; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [27,27]
26451; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26452; AVX2-NEXT:    retq
26453;
26454; AVX512VPOPCNTDQ-LABEL: ult_27_v2i64:
26455; AVX512VPOPCNTDQ:       # %bb.0:
26456; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26457; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
26458; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [27,27]
26459; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26460; AVX512VPOPCNTDQ-NEXT:    vzeroupper
26461; AVX512VPOPCNTDQ-NEXT:    retq
26462;
26463; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64:
26464; AVX512VPOPCNTDQVL:       # %bb.0:
26465; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
26466; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
26467; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26468; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26469; AVX512VPOPCNTDQVL-NEXT:    retq
26470;
26471; BITALG_NOVLX-LABEL: ult_27_v2i64:
26472; BITALG_NOVLX:       # %bb.0:
26473; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26474; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
26475; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26476; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26477; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [27,27]
26478; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26479; BITALG_NOVLX-NEXT:    vzeroupper
26480; BITALG_NOVLX-NEXT:    retq
26481;
26482; BITALG-LABEL: ult_27_v2i64:
26483; BITALG:       # %bb.0:
26484; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
26485; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26486; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26487; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
26488; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26489; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26490; BITALG-NEXT:    retq
26491  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26492  %3 = icmp ult <2 x i64> %2, <i64 27, i64 27>
26493  %4 = sext <2 x i1> %3 to <2 x i64>
26494  ret <2 x i64> %4
26495}
26496
26497define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
26498; SSE2-LABEL: ugt_27_v2i64:
26499; SSE2:       # %bb.0:
26500; SSE2-NEXT:    movdqa %xmm0, %xmm1
26501; SSE2-NEXT:    psrlw $1, %xmm1
26502; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26503; SSE2-NEXT:    psubb %xmm1, %xmm0
26504; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26505; SSE2-NEXT:    movdqa %xmm0, %xmm2
26506; SSE2-NEXT:    pand %xmm1, %xmm2
26507; SSE2-NEXT:    psrlw $2, %xmm0
26508; SSE2-NEXT:    pand %xmm1, %xmm0
26509; SSE2-NEXT:    paddb %xmm2, %xmm0
26510; SSE2-NEXT:    movdqa %xmm0, %xmm1
26511; SSE2-NEXT:    psrlw $4, %xmm1
26512; SSE2-NEXT:    paddb %xmm0, %xmm1
26513; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26514; SSE2-NEXT:    pxor %xmm0, %xmm0
26515; SSE2-NEXT:    psadbw %xmm1, %xmm0
26516; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
26517; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26518; SSE2-NEXT:    movdqa %xmm0, %xmm2
26519; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
26520; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26521; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
26522; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26523; SSE2-NEXT:    pand %xmm3, %xmm1
26524; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26525; SSE2-NEXT:    por %xmm1, %xmm0
26526; SSE2-NEXT:    retq
26527;
26528; SSE3-LABEL: ugt_27_v2i64:
26529; SSE3:       # %bb.0:
26530; SSE3-NEXT:    movdqa %xmm0, %xmm1
26531; SSE3-NEXT:    psrlw $1, %xmm1
26532; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26533; SSE3-NEXT:    psubb %xmm1, %xmm0
26534; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26535; SSE3-NEXT:    movdqa %xmm0, %xmm2
26536; SSE3-NEXT:    pand %xmm1, %xmm2
26537; SSE3-NEXT:    psrlw $2, %xmm0
26538; SSE3-NEXT:    pand %xmm1, %xmm0
26539; SSE3-NEXT:    paddb %xmm2, %xmm0
26540; SSE3-NEXT:    movdqa %xmm0, %xmm1
26541; SSE3-NEXT:    psrlw $4, %xmm1
26542; SSE3-NEXT:    paddb %xmm0, %xmm1
26543; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26544; SSE3-NEXT:    pxor %xmm0, %xmm0
26545; SSE3-NEXT:    psadbw %xmm1, %xmm0
26546; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
26547; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26548; SSE3-NEXT:    movdqa %xmm0, %xmm2
26549; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
26550; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26551; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26552; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26553; SSE3-NEXT:    pand %xmm3, %xmm1
26554; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26555; SSE3-NEXT:    por %xmm1, %xmm0
26556; SSE3-NEXT:    retq
26557;
26558; SSSE3-LABEL: ugt_27_v2i64:
26559; SSSE3:       # %bb.0:
26560; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26561; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26562; SSSE3-NEXT:    pand %xmm1, %xmm2
26563; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26564; SSSE3-NEXT:    movdqa %xmm3, %xmm4
26565; SSSE3-NEXT:    pshufb %xmm2, %xmm4
26566; SSSE3-NEXT:    psrlw $4, %xmm0
26567; SSSE3-NEXT:    pand %xmm1, %xmm0
26568; SSSE3-NEXT:    pshufb %xmm0, %xmm3
26569; SSSE3-NEXT:    paddb %xmm4, %xmm3
26570; SSSE3-NEXT:    pxor %xmm0, %xmm0
26571; SSSE3-NEXT:    psadbw %xmm3, %xmm0
26572; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
26573; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26574; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26575; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
26576; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26577; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26578; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26579; SSSE3-NEXT:    pand %xmm3, %xmm1
26580; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26581; SSSE3-NEXT:    por %xmm1, %xmm0
26582; SSSE3-NEXT:    retq
26583;
26584; SSE41-LABEL: ugt_27_v2i64:
26585; SSE41:       # %bb.0:
26586; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26587; SSE41-NEXT:    movdqa %xmm0, %xmm2
26588; SSE41-NEXT:    pand %xmm1, %xmm2
26589; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26590; SSE41-NEXT:    movdqa %xmm3, %xmm4
26591; SSE41-NEXT:    pshufb %xmm2, %xmm4
26592; SSE41-NEXT:    psrlw $4, %xmm0
26593; SSE41-NEXT:    pand %xmm1, %xmm0
26594; SSE41-NEXT:    pshufb %xmm0, %xmm3
26595; SSE41-NEXT:    paddb %xmm4, %xmm3
26596; SSE41-NEXT:    pxor %xmm0, %xmm0
26597; SSE41-NEXT:    psadbw %xmm3, %xmm0
26598; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
26599; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483675,2147483675]
26600; SSE41-NEXT:    movdqa %xmm0, %xmm2
26601; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
26602; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26603; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
26604; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26605; SSE41-NEXT:    pand %xmm3, %xmm1
26606; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26607; SSE41-NEXT:    por %xmm1, %xmm0
26608; SSE41-NEXT:    retq
26609;
26610; AVX1-LABEL: ugt_27_v2i64:
26611; AVX1:       # %bb.0:
26612; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26613; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
26614; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26615; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26616; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
26617; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
26618; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26619; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26620; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26621; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26622; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26623; AVX1-NEXT:    retq
26624;
26625; AVX2-LABEL: ugt_27_v2i64:
26626; AVX2:       # %bb.0:
26627; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26628; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
26629; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26630; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26631; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
26632; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
26633; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26634; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26635; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26636; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26637; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26638; AVX2-NEXT:    retq
26639;
26640; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64:
26641; AVX512VPOPCNTDQ:       # %bb.0:
26642; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26643; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
26644; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26645; AVX512VPOPCNTDQ-NEXT:    vzeroupper
26646; AVX512VPOPCNTDQ-NEXT:    retq
26647;
26648; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64:
26649; AVX512VPOPCNTDQVL:       # %bb.0:
26650; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
26651; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
26652; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26653; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26654; AVX512VPOPCNTDQVL-NEXT:    retq
26655;
26656; BITALG_NOVLX-LABEL: ugt_27_v2i64:
26657; BITALG_NOVLX:       # %bb.0:
26658; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26659; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
26660; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26661; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26662; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26663; BITALG_NOVLX-NEXT:    vzeroupper
26664; BITALG_NOVLX-NEXT:    retq
26665;
26666; BITALG-LABEL: ugt_27_v2i64:
26667; BITALG:       # %bb.0:
26668; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
26669; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26670; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26671; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
26672; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26673; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26674; BITALG-NEXT:    retq
26675  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26676  %3 = icmp ugt <2 x i64> %2, <i64 27, i64 27>
26677  %4 = sext <2 x i1> %3 to <2 x i64>
26678  ret <2 x i64> %4
26679}
26680
26681define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
26682; SSE2-LABEL: ult_28_v2i64:
26683; SSE2:       # %bb.0:
26684; SSE2-NEXT:    movdqa %xmm0, %xmm1
26685; SSE2-NEXT:    psrlw $1, %xmm1
26686; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26687; SSE2-NEXT:    psubb %xmm1, %xmm0
26688; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26689; SSE2-NEXT:    movdqa %xmm0, %xmm2
26690; SSE2-NEXT:    pand %xmm1, %xmm2
26691; SSE2-NEXT:    psrlw $2, %xmm0
26692; SSE2-NEXT:    pand %xmm1, %xmm0
26693; SSE2-NEXT:    paddb %xmm2, %xmm0
26694; SSE2-NEXT:    movdqa %xmm0, %xmm1
26695; SSE2-NEXT:    psrlw $4, %xmm1
26696; SSE2-NEXT:    paddb %xmm0, %xmm1
26697; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26698; SSE2-NEXT:    pxor %xmm0, %xmm0
26699; SSE2-NEXT:    psadbw %xmm1, %xmm0
26700; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
26701; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26702; SSE2-NEXT:    movdqa %xmm1, %xmm2
26703; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
26704; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26705; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
26706; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26707; SSE2-NEXT:    pand %xmm3, %xmm1
26708; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26709; SSE2-NEXT:    por %xmm1, %xmm0
26710; SSE2-NEXT:    retq
26711;
26712; SSE3-LABEL: ult_28_v2i64:
26713; SSE3:       # %bb.0:
26714; SSE3-NEXT:    movdqa %xmm0, %xmm1
26715; SSE3-NEXT:    psrlw $1, %xmm1
26716; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26717; SSE3-NEXT:    psubb %xmm1, %xmm0
26718; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26719; SSE3-NEXT:    movdqa %xmm0, %xmm2
26720; SSE3-NEXT:    pand %xmm1, %xmm2
26721; SSE3-NEXT:    psrlw $2, %xmm0
26722; SSE3-NEXT:    pand %xmm1, %xmm0
26723; SSE3-NEXT:    paddb %xmm2, %xmm0
26724; SSE3-NEXT:    movdqa %xmm0, %xmm1
26725; SSE3-NEXT:    psrlw $4, %xmm1
26726; SSE3-NEXT:    paddb %xmm0, %xmm1
26727; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26728; SSE3-NEXT:    pxor %xmm0, %xmm0
26729; SSE3-NEXT:    psadbw %xmm1, %xmm0
26730; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
26731; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26732; SSE3-NEXT:    movdqa %xmm1, %xmm2
26733; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
26734; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26735; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26736; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26737; SSE3-NEXT:    pand %xmm3, %xmm1
26738; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26739; SSE3-NEXT:    por %xmm1, %xmm0
26740; SSE3-NEXT:    retq
26741;
26742; SSSE3-LABEL: ult_28_v2i64:
26743; SSSE3:       # %bb.0:
26744; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26745; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26746; SSSE3-NEXT:    pand %xmm1, %xmm2
26747; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26748; SSSE3-NEXT:    movdqa %xmm3, %xmm4
26749; SSSE3-NEXT:    pshufb %xmm2, %xmm4
26750; SSSE3-NEXT:    psrlw $4, %xmm0
26751; SSSE3-NEXT:    pand %xmm1, %xmm0
26752; SSSE3-NEXT:    pshufb %xmm0, %xmm3
26753; SSSE3-NEXT:    paddb %xmm4, %xmm3
26754; SSSE3-NEXT:    pxor %xmm0, %xmm0
26755; SSSE3-NEXT:    psadbw %xmm3, %xmm0
26756; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
26757; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26758; SSSE3-NEXT:    movdqa %xmm1, %xmm2
26759; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
26760; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26761; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26762; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26763; SSSE3-NEXT:    pand %xmm3, %xmm1
26764; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26765; SSSE3-NEXT:    por %xmm1, %xmm0
26766; SSSE3-NEXT:    retq
26767;
26768; SSE41-LABEL: ult_28_v2i64:
26769; SSE41:       # %bb.0:
26770; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26771; SSE41-NEXT:    movdqa %xmm0, %xmm2
26772; SSE41-NEXT:    pand %xmm1, %xmm2
26773; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26774; SSE41-NEXT:    movdqa %xmm3, %xmm4
26775; SSE41-NEXT:    pshufb %xmm2, %xmm4
26776; SSE41-NEXT:    psrlw $4, %xmm0
26777; SSE41-NEXT:    pand %xmm1, %xmm0
26778; SSE41-NEXT:    pshufb %xmm0, %xmm3
26779; SSE41-NEXT:    paddb %xmm4, %xmm3
26780; SSE41-NEXT:    pxor %xmm0, %xmm0
26781; SSE41-NEXT:    psadbw %xmm3, %xmm0
26782; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
26783; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26784; SSE41-NEXT:    movdqa %xmm1, %xmm2
26785; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
26786; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26787; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
26788; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26789; SSE41-NEXT:    pand %xmm3, %xmm1
26790; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26791; SSE41-NEXT:    por %xmm1, %xmm0
26792; SSE41-NEXT:    retq
26793;
26794; AVX1-LABEL: ult_28_v2i64:
26795; AVX1:       # %bb.0:
26796; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26797; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
26798; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26799; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26800; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
26801; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
26802; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26803; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26804; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26805; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26806; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [28,28]
26807; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26808; AVX1-NEXT:    retq
26809;
26810; AVX2-LABEL: ult_28_v2i64:
26811; AVX2:       # %bb.0:
26812; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26813; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
26814; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26815; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26816; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
26817; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
26818; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26819; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26820; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26821; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26822; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [28,28]
26823; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26824; AVX2-NEXT:    retq
26825;
26826; AVX512VPOPCNTDQ-LABEL: ult_28_v2i64:
26827; AVX512VPOPCNTDQ:       # %bb.0:
26828; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26829; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
26830; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [28,28]
26831; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26832; AVX512VPOPCNTDQ-NEXT:    vzeroupper
26833; AVX512VPOPCNTDQ-NEXT:    retq
26834;
26835; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64:
26836; AVX512VPOPCNTDQVL:       # %bb.0:
26837; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
26838; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
26839; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26840; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26841; AVX512VPOPCNTDQVL-NEXT:    retq
26842;
26843; BITALG_NOVLX-LABEL: ult_28_v2i64:
26844; BITALG_NOVLX:       # %bb.0:
26845; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
26846; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
26847; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26848; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26849; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [28,28]
26850; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
26851; BITALG_NOVLX-NEXT:    vzeroupper
26852; BITALG_NOVLX-NEXT:    retq
26853;
26854; BITALG-LABEL: ult_28_v2i64:
26855; BITALG:       # %bb.0:
26856; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
26857; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26858; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26859; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
26860; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
26861; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26862; BITALG-NEXT:    retq
26863  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26864  %3 = icmp ult <2 x i64> %2, <i64 28, i64 28>
26865  %4 = sext <2 x i1> %3 to <2 x i64>
26866  ret <2 x i64> %4
26867}
26868
26869define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
26870; SSE2-LABEL: ugt_28_v2i64:
26871; SSE2:       # %bb.0:
26872; SSE2-NEXT:    movdqa %xmm0, %xmm1
26873; SSE2-NEXT:    psrlw $1, %xmm1
26874; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26875; SSE2-NEXT:    psubb %xmm1, %xmm0
26876; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26877; SSE2-NEXT:    movdqa %xmm0, %xmm2
26878; SSE2-NEXT:    pand %xmm1, %xmm2
26879; SSE2-NEXT:    psrlw $2, %xmm0
26880; SSE2-NEXT:    pand %xmm1, %xmm0
26881; SSE2-NEXT:    paddb %xmm2, %xmm0
26882; SSE2-NEXT:    movdqa %xmm0, %xmm1
26883; SSE2-NEXT:    psrlw $4, %xmm1
26884; SSE2-NEXT:    paddb %xmm0, %xmm1
26885; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
26886; SSE2-NEXT:    pxor %xmm0, %xmm0
26887; SSE2-NEXT:    psadbw %xmm1, %xmm0
26888; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
26889; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26890; SSE2-NEXT:    movdqa %xmm0, %xmm2
26891; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
26892; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26893; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
26894; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26895; SSE2-NEXT:    pand %xmm3, %xmm1
26896; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26897; SSE2-NEXT:    por %xmm1, %xmm0
26898; SSE2-NEXT:    retq
26899;
26900; SSE3-LABEL: ugt_28_v2i64:
26901; SSE3:       # %bb.0:
26902; SSE3-NEXT:    movdqa %xmm0, %xmm1
26903; SSE3-NEXT:    psrlw $1, %xmm1
26904; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26905; SSE3-NEXT:    psubb %xmm1, %xmm0
26906; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26907; SSE3-NEXT:    movdqa %xmm0, %xmm2
26908; SSE3-NEXT:    pand %xmm1, %xmm2
26909; SSE3-NEXT:    psrlw $2, %xmm0
26910; SSE3-NEXT:    pand %xmm1, %xmm0
26911; SSE3-NEXT:    paddb %xmm2, %xmm0
26912; SSE3-NEXT:    movdqa %xmm0, %xmm1
26913; SSE3-NEXT:    psrlw $4, %xmm1
26914; SSE3-NEXT:    paddb %xmm0, %xmm1
26915; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
26916; SSE3-NEXT:    pxor %xmm0, %xmm0
26917; SSE3-NEXT:    psadbw %xmm1, %xmm0
26918; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
26919; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26920; SSE3-NEXT:    movdqa %xmm0, %xmm2
26921; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
26922; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26923; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26924; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26925; SSE3-NEXT:    pand %xmm3, %xmm1
26926; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26927; SSE3-NEXT:    por %xmm1, %xmm0
26928; SSE3-NEXT:    retq
26929;
26930; SSSE3-LABEL: ugt_28_v2i64:
26931; SSSE3:       # %bb.0:
26932; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26933; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26934; SSSE3-NEXT:    pand %xmm1, %xmm2
26935; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26936; SSSE3-NEXT:    movdqa %xmm3, %xmm4
26937; SSSE3-NEXT:    pshufb %xmm2, %xmm4
26938; SSSE3-NEXT:    psrlw $4, %xmm0
26939; SSSE3-NEXT:    pand %xmm1, %xmm0
26940; SSSE3-NEXT:    pshufb %xmm0, %xmm3
26941; SSSE3-NEXT:    paddb %xmm4, %xmm3
26942; SSSE3-NEXT:    pxor %xmm0, %xmm0
26943; SSSE3-NEXT:    psadbw %xmm3, %xmm0
26944; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
26945; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26946; SSSE3-NEXT:    movdqa %xmm0, %xmm2
26947; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
26948; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26949; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
26950; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26951; SSSE3-NEXT:    pand %xmm3, %xmm1
26952; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26953; SSSE3-NEXT:    por %xmm1, %xmm0
26954; SSSE3-NEXT:    retq
26955;
26956; SSE41-LABEL: ugt_28_v2i64:
26957; SSE41:       # %bb.0:
26958; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26959; SSE41-NEXT:    movdqa %xmm0, %xmm2
26960; SSE41-NEXT:    pand %xmm1, %xmm2
26961; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26962; SSE41-NEXT:    movdqa %xmm3, %xmm4
26963; SSE41-NEXT:    pshufb %xmm2, %xmm4
26964; SSE41-NEXT:    psrlw $4, %xmm0
26965; SSE41-NEXT:    pand %xmm1, %xmm0
26966; SSE41-NEXT:    pshufb %xmm0, %xmm3
26967; SSE41-NEXT:    paddb %xmm4, %xmm3
26968; SSE41-NEXT:    pxor %xmm0, %xmm0
26969; SSE41-NEXT:    psadbw %xmm3, %xmm0
26970; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
26971; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483676,2147483676]
26972; SSE41-NEXT:    movdqa %xmm0, %xmm2
26973; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
26974; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
26975; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
26976; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
26977; SSE41-NEXT:    pand %xmm3, %xmm1
26978; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26979; SSE41-NEXT:    por %xmm1, %xmm0
26980; SSE41-NEXT:    retq
26981;
26982; AVX1-LABEL: ugt_28_v2i64:
26983; AVX1:       # %bb.0:
26984; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26985; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
26986; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26987; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
26988; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
26989; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
26990; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
26991; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
26992; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26993; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
26994; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
26995; AVX1-NEXT:    retq
26996;
26997; AVX2-LABEL: ugt_28_v2i64:
26998; AVX2:       # %bb.0:
26999; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27000; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
27001; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27002; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27003; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
27004; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
27005; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27006; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27007; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27008; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27009; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27010; AVX2-NEXT:    retq
27011;
27012; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64:
27013; AVX512VPOPCNTDQ:       # %bb.0:
27014; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27015; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
27016; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27017; AVX512VPOPCNTDQ-NEXT:    vzeroupper
27018; AVX512VPOPCNTDQ-NEXT:    retq
27019;
27020; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64:
27021; AVX512VPOPCNTDQVL:       # %bb.0:
27022; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
27023; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
27024; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27025; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27026; AVX512VPOPCNTDQVL-NEXT:    retq
27027;
27028; BITALG_NOVLX-LABEL: ugt_28_v2i64:
27029; BITALG_NOVLX:       # %bb.0:
27030; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27031; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
27032; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27033; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27034; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27035; BITALG_NOVLX-NEXT:    vzeroupper
27036; BITALG_NOVLX-NEXT:    retq
27037;
27038; BITALG-LABEL: ugt_28_v2i64:
27039; BITALG:       # %bb.0:
27040; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
27041; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27042; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27043; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
27044; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27045; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27046; BITALG-NEXT:    retq
27047  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27048  %3 = icmp ugt <2 x i64> %2, <i64 28, i64 28>
27049  %4 = sext <2 x i1> %3 to <2 x i64>
27050  ret <2 x i64> %4
27051}
27052
27053define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
27054; SSE2-LABEL: ult_29_v2i64:
27055; SSE2:       # %bb.0:
27056; SSE2-NEXT:    movdqa %xmm0, %xmm1
27057; SSE2-NEXT:    psrlw $1, %xmm1
27058; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27059; SSE2-NEXT:    psubb %xmm1, %xmm0
27060; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27061; SSE2-NEXT:    movdqa %xmm0, %xmm2
27062; SSE2-NEXT:    pand %xmm1, %xmm2
27063; SSE2-NEXT:    psrlw $2, %xmm0
27064; SSE2-NEXT:    pand %xmm1, %xmm0
27065; SSE2-NEXT:    paddb %xmm2, %xmm0
27066; SSE2-NEXT:    movdqa %xmm0, %xmm1
27067; SSE2-NEXT:    psrlw $4, %xmm1
27068; SSE2-NEXT:    paddb %xmm0, %xmm1
27069; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27070; SSE2-NEXT:    pxor %xmm0, %xmm0
27071; SSE2-NEXT:    psadbw %xmm1, %xmm0
27072; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
27073; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27074; SSE2-NEXT:    movdqa %xmm1, %xmm2
27075; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
27076; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27077; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
27078; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27079; SSE2-NEXT:    pand %xmm3, %xmm1
27080; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27081; SSE2-NEXT:    por %xmm1, %xmm0
27082; SSE2-NEXT:    retq
27083;
27084; SSE3-LABEL: ult_29_v2i64:
27085; SSE3:       # %bb.0:
27086; SSE3-NEXT:    movdqa %xmm0, %xmm1
27087; SSE3-NEXT:    psrlw $1, %xmm1
27088; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27089; SSE3-NEXT:    psubb %xmm1, %xmm0
27090; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27091; SSE3-NEXT:    movdqa %xmm0, %xmm2
27092; SSE3-NEXT:    pand %xmm1, %xmm2
27093; SSE3-NEXT:    psrlw $2, %xmm0
27094; SSE3-NEXT:    pand %xmm1, %xmm0
27095; SSE3-NEXT:    paddb %xmm2, %xmm0
27096; SSE3-NEXT:    movdqa %xmm0, %xmm1
27097; SSE3-NEXT:    psrlw $4, %xmm1
27098; SSE3-NEXT:    paddb %xmm0, %xmm1
27099; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27100; SSE3-NEXT:    pxor %xmm0, %xmm0
27101; SSE3-NEXT:    psadbw %xmm1, %xmm0
27102; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
27103; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27104; SSE3-NEXT:    movdqa %xmm1, %xmm2
27105; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
27106; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27107; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27108; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27109; SSE3-NEXT:    pand %xmm3, %xmm1
27110; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27111; SSE3-NEXT:    por %xmm1, %xmm0
27112; SSE3-NEXT:    retq
27113;
27114; SSSE3-LABEL: ult_29_v2i64:
27115; SSSE3:       # %bb.0:
27116; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27117; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27118; SSSE3-NEXT:    pand %xmm1, %xmm2
27119; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27120; SSSE3-NEXT:    movdqa %xmm3, %xmm4
27121; SSSE3-NEXT:    pshufb %xmm2, %xmm4
27122; SSSE3-NEXT:    psrlw $4, %xmm0
27123; SSSE3-NEXT:    pand %xmm1, %xmm0
27124; SSSE3-NEXT:    pshufb %xmm0, %xmm3
27125; SSSE3-NEXT:    paddb %xmm4, %xmm3
27126; SSSE3-NEXT:    pxor %xmm0, %xmm0
27127; SSSE3-NEXT:    psadbw %xmm3, %xmm0
27128; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
27129; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27130; SSSE3-NEXT:    movdqa %xmm1, %xmm2
27131; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
27132; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27133; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27134; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27135; SSSE3-NEXT:    pand %xmm3, %xmm1
27136; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27137; SSSE3-NEXT:    por %xmm1, %xmm0
27138; SSSE3-NEXT:    retq
27139;
27140; SSE41-LABEL: ult_29_v2i64:
27141; SSE41:       # %bb.0:
27142; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27143; SSE41-NEXT:    movdqa %xmm0, %xmm2
27144; SSE41-NEXT:    pand %xmm1, %xmm2
27145; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27146; SSE41-NEXT:    movdqa %xmm3, %xmm4
27147; SSE41-NEXT:    pshufb %xmm2, %xmm4
27148; SSE41-NEXT:    psrlw $4, %xmm0
27149; SSE41-NEXT:    pand %xmm1, %xmm0
27150; SSE41-NEXT:    pshufb %xmm0, %xmm3
27151; SSE41-NEXT:    paddb %xmm4, %xmm3
27152; SSE41-NEXT:    pxor %xmm0, %xmm0
27153; SSE41-NEXT:    psadbw %xmm3, %xmm0
27154; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
27155; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27156; SSE41-NEXT:    movdqa %xmm1, %xmm2
27157; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
27158; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27159; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
27160; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27161; SSE41-NEXT:    pand %xmm3, %xmm1
27162; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27163; SSE41-NEXT:    por %xmm1, %xmm0
27164; SSE41-NEXT:    retq
27165;
27166; AVX1-LABEL: ult_29_v2i64:
27167; AVX1:       # %bb.0:
27168; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27169; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
27170; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27171; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27172; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
27173; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
27174; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27175; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27176; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27177; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27178; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [29,29]
27179; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27180; AVX1-NEXT:    retq
27181;
27182; AVX2-LABEL: ult_29_v2i64:
27183; AVX2:       # %bb.0:
27184; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27185; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
27186; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27187; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27188; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
27189; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
27190; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27191; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27192; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27193; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27194; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [29,29]
27195; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27196; AVX2-NEXT:    retq
27197;
27198; AVX512VPOPCNTDQ-LABEL: ult_29_v2i64:
27199; AVX512VPOPCNTDQ:       # %bb.0:
27200; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27201; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
27202; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [29,29]
27203; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27204; AVX512VPOPCNTDQ-NEXT:    vzeroupper
27205; AVX512VPOPCNTDQ-NEXT:    retq
27206;
27207; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64:
27208; AVX512VPOPCNTDQVL:       # %bb.0:
27209; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
27210; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
27211; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27212; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27213; AVX512VPOPCNTDQVL-NEXT:    retq
27214;
27215; BITALG_NOVLX-LABEL: ult_29_v2i64:
27216; BITALG_NOVLX:       # %bb.0:
27217; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27218; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
27219; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27220; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27221; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [29,29]
27222; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27223; BITALG_NOVLX-NEXT:    vzeroupper
27224; BITALG_NOVLX-NEXT:    retq
27225;
27226; BITALG-LABEL: ult_29_v2i64:
27227; BITALG:       # %bb.0:
27228; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
27229; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27230; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27231; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
27232; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27233; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27234; BITALG-NEXT:    retq
27235  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27236  %3 = icmp ult <2 x i64> %2, <i64 29, i64 29>
27237  %4 = sext <2 x i1> %3 to <2 x i64>
27238  ret <2 x i64> %4
27239}
27240
27241define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
27242; SSE2-LABEL: ugt_29_v2i64:
27243; SSE2:       # %bb.0:
27244; SSE2-NEXT:    movdqa %xmm0, %xmm1
27245; SSE2-NEXT:    psrlw $1, %xmm1
27246; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27247; SSE2-NEXT:    psubb %xmm1, %xmm0
27248; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27249; SSE2-NEXT:    movdqa %xmm0, %xmm2
27250; SSE2-NEXT:    pand %xmm1, %xmm2
27251; SSE2-NEXT:    psrlw $2, %xmm0
27252; SSE2-NEXT:    pand %xmm1, %xmm0
27253; SSE2-NEXT:    paddb %xmm2, %xmm0
27254; SSE2-NEXT:    movdqa %xmm0, %xmm1
27255; SSE2-NEXT:    psrlw $4, %xmm1
27256; SSE2-NEXT:    paddb %xmm0, %xmm1
27257; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27258; SSE2-NEXT:    pxor %xmm0, %xmm0
27259; SSE2-NEXT:    psadbw %xmm1, %xmm0
27260; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
27261; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27262; SSE2-NEXT:    movdqa %xmm0, %xmm2
27263; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
27264; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27265; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
27266; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27267; SSE2-NEXT:    pand %xmm3, %xmm1
27268; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27269; SSE2-NEXT:    por %xmm1, %xmm0
27270; SSE2-NEXT:    retq
27271;
27272; SSE3-LABEL: ugt_29_v2i64:
27273; SSE3:       # %bb.0:
27274; SSE3-NEXT:    movdqa %xmm0, %xmm1
27275; SSE3-NEXT:    psrlw $1, %xmm1
27276; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27277; SSE3-NEXT:    psubb %xmm1, %xmm0
27278; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27279; SSE3-NEXT:    movdqa %xmm0, %xmm2
27280; SSE3-NEXT:    pand %xmm1, %xmm2
27281; SSE3-NEXT:    psrlw $2, %xmm0
27282; SSE3-NEXT:    pand %xmm1, %xmm0
27283; SSE3-NEXT:    paddb %xmm2, %xmm0
27284; SSE3-NEXT:    movdqa %xmm0, %xmm1
27285; SSE3-NEXT:    psrlw $4, %xmm1
27286; SSE3-NEXT:    paddb %xmm0, %xmm1
27287; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27288; SSE3-NEXT:    pxor %xmm0, %xmm0
27289; SSE3-NEXT:    psadbw %xmm1, %xmm0
27290; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
27291; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27292; SSE3-NEXT:    movdqa %xmm0, %xmm2
27293; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
27294; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27295; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27296; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27297; SSE3-NEXT:    pand %xmm3, %xmm1
27298; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27299; SSE3-NEXT:    por %xmm1, %xmm0
27300; SSE3-NEXT:    retq
27301;
27302; SSSE3-LABEL: ugt_29_v2i64:
27303; SSSE3:       # %bb.0:
27304; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27305; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27306; SSSE3-NEXT:    pand %xmm1, %xmm2
27307; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27308; SSSE3-NEXT:    movdqa %xmm3, %xmm4
27309; SSSE3-NEXT:    pshufb %xmm2, %xmm4
27310; SSSE3-NEXT:    psrlw $4, %xmm0
27311; SSSE3-NEXT:    pand %xmm1, %xmm0
27312; SSSE3-NEXT:    pshufb %xmm0, %xmm3
27313; SSSE3-NEXT:    paddb %xmm4, %xmm3
27314; SSSE3-NEXT:    pxor %xmm0, %xmm0
27315; SSSE3-NEXT:    psadbw %xmm3, %xmm0
27316; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
27317; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27318; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27319; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
27320; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27321; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27322; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27323; SSSE3-NEXT:    pand %xmm3, %xmm1
27324; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27325; SSSE3-NEXT:    por %xmm1, %xmm0
27326; SSSE3-NEXT:    retq
27327;
27328; SSE41-LABEL: ugt_29_v2i64:
27329; SSE41:       # %bb.0:
27330; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27331; SSE41-NEXT:    movdqa %xmm0, %xmm2
27332; SSE41-NEXT:    pand %xmm1, %xmm2
27333; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27334; SSE41-NEXT:    movdqa %xmm3, %xmm4
27335; SSE41-NEXT:    pshufb %xmm2, %xmm4
27336; SSE41-NEXT:    psrlw $4, %xmm0
27337; SSE41-NEXT:    pand %xmm1, %xmm0
27338; SSE41-NEXT:    pshufb %xmm0, %xmm3
27339; SSE41-NEXT:    paddb %xmm4, %xmm3
27340; SSE41-NEXT:    pxor %xmm0, %xmm0
27341; SSE41-NEXT:    psadbw %xmm3, %xmm0
27342; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
27343; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483677,2147483677]
27344; SSE41-NEXT:    movdqa %xmm0, %xmm2
27345; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
27346; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27347; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
27348; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27349; SSE41-NEXT:    pand %xmm3, %xmm1
27350; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27351; SSE41-NEXT:    por %xmm1, %xmm0
27352; SSE41-NEXT:    retq
27353;
27354; AVX1-LABEL: ugt_29_v2i64:
27355; AVX1:       # %bb.0:
27356; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27357; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
27358; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27359; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27360; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
27361; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
27362; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27363; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27364; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27365; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27366; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27367; AVX1-NEXT:    retq
27368;
27369; AVX2-LABEL: ugt_29_v2i64:
27370; AVX2:       # %bb.0:
27371; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27372; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
27373; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27374; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27375; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
27376; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
27377; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27378; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27379; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27380; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27381; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27382; AVX2-NEXT:    retq
27383;
27384; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64:
27385; AVX512VPOPCNTDQ:       # %bb.0:
27386; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27387; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
27388; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27389; AVX512VPOPCNTDQ-NEXT:    vzeroupper
27390; AVX512VPOPCNTDQ-NEXT:    retq
27391;
27392; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64:
27393; AVX512VPOPCNTDQVL:       # %bb.0:
27394; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
27395; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
27396; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27397; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27398; AVX512VPOPCNTDQVL-NEXT:    retq
27399;
27400; BITALG_NOVLX-LABEL: ugt_29_v2i64:
27401; BITALG_NOVLX:       # %bb.0:
27402; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27403; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
27404; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27405; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27406; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27407; BITALG_NOVLX-NEXT:    vzeroupper
27408; BITALG_NOVLX-NEXT:    retq
27409;
27410; BITALG-LABEL: ugt_29_v2i64:
27411; BITALG:       # %bb.0:
27412; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
27413; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27414; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27415; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
27416; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27417; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27418; BITALG-NEXT:    retq
27419  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27420  %3 = icmp ugt <2 x i64> %2, <i64 29, i64 29>
27421  %4 = sext <2 x i1> %3 to <2 x i64>
27422  ret <2 x i64> %4
27423}
27424
27425define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
27426; SSE2-LABEL: ult_30_v2i64:
27427; SSE2:       # %bb.0:
27428; SSE2-NEXT:    movdqa %xmm0, %xmm1
27429; SSE2-NEXT:    psrlw $1, %xmm1
27430; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27431; SSE2-NEXT:    psubb %xmm1, %xmm0
27432; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27433; SSE2-NEXT:    movdqa %xmm0, %xmm2
27434; SSE2-NEXT:    pand %xmm1, %xmm2
27435; SSE2-NEXT:    psrlw $2, %xmm0
27436; SSE2-NEXT:    pand %xmm1, %xmm0
27437; SSE2-NEXT:    paddb %xmm2, %xmm0
27438; SSE2-NEXT:    movdqa %xmm0, %xmm1
27439; SSE2-NEXT:    psrlw $4, %xmm1
27440; SSE2-NEXT:    paddb %xmm0, %xmm1
27441; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27442; SSE2-NEXT:    pxor %xmm0, %xmm0
27443; SSE2-NEXT:    psadbw %xmm1, %xmm0
27444; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
27445; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27446; SSE2-NEXT:    movdqa %xmm1, %xmm2
27447; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
27448; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27449; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
27450; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27451; SSE2-NEXT:    pand %xmm3, %xmm1
27452; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27453; SSE2-NEXT:    por %xmm1, %xmm0
27454; SSE2-NEXT:    retq
27455;
27456; SSE3-LABEL: ult_30_v2i64:
27457; SSE3:       # %bb.0:
27458; SSE3-NEXT:    movdqa %xmm0, %xmm1
27459; SSE3-NEXT:    psrlw $1, %xmm1
27460; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27461; SSE3-NEXT:    psubb %xmm1, %xmm0
27462; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27463; SSE3-NEXT:    movdqa %xmm0, %xmm2
27464; SSE3-NEXT:    pand %xmm1, %xmm2
27465; SSE3-NEXT:    psrlw $2, %xmm0
27466; SSE3-NEXT:    pand %xmm1, %xmm0
27467; SSE3-NEXT:    paddb %xmm2, %xmm0
27468; SSE3-NEXT:    movdqa %xmm0, %xmm1
27469; SSE3-NEXT:    psrlw $4, %xmm1
27470; SSE3-NEXT:    paddb %xmm0, %xmm1
27471; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27472; SSE3-NEXT:    pxor %xmm0, %xmm0
27473; SSE3-NEXT:    psadbw %xmm1, %xmm0
27474; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
27475; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27476; SSE3-NEXT:    movdqa %xmm1, %xmm2
27477; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
27478; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27479; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27480; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27481; SSE3-NEXT:    pand %xmm3, %xmm1
27482; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27483; SSE3-NEXT:    por %xmm1, %xmm0
27484; SSE3-NEXT:    retq
27485;
27486; SSSE3-LABEL: ult_30_v2i64:
27487; SSSE3:       # %bb.0:
27488; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27489; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27490; SSSE3-NEXT:    pand %xmm1, %xmm2
27491; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27492; SSSE3-NEXT:    movdqa %xmm3, %xmm4
27493; SSSE3-NEXT:    pshufb %xmm2, %xmm4
27494; SSSE3-NEXT:    psrlw $4, %xmm0
27495; SSSE3-NEXT:    pand %xmm1, %xmm0
27496; SSSE3-NEXT:    pshufb %xmm0, %xmm3
27497; SSSE3-NEXT:    paddb %xmm4, %xmm3
27498; SSSE3-NEXT:    pxor %xmm0, %xmm0
27499; SSSE3-NEXT:    psadbw %xmm3, %xmm0
27500; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
27501; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27502; SSSE3-NEXT:    movdqa %xmm1, %xmm2
27503; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
27504; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27505; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27506; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27507; SSSE3-NEXT:    pand %xmm3, %xmm1
27508; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27509; SSSE3-NEXT:    por %xmm1, %xmm0
27510; SSSE3-NEXT:    retq
27511;
27512; SSE41-LABEL: ult_30_v2i64:
27513; SSE41:       # %bb.0:
27514; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27515; SSE41-NEXT:    movdqa %xmm0, %xmm2
27516; SSE41-NEXT:    pand %xmm1, %xmm2
27517; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27518; SSE41-NEXT:    movdqa %xmm3, %xmm4
27519; SSE41-NEXT:    pshufb %xmm2, %xmm4
27520; SSE41-NEXT:    psrlw $4, %xmm0
27521; SSE41-NEXT:    pand %xmm1, %xmm0
27522; SSE41-NEXT:    pshufb %xmm0, %xmm3
27523; SSE41-NEXT:    paddb %xmm4, %xmm3
27524; SSE41-NEXT:    pxor %xmm0, %xmm0
27525; SSE41-NEXT:    psadbw %xmm3, %xmm0
27526; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
27527; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27528; SSE41-NEXT:    movdqa %xmm1, %xmm2
27529; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
27530; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27531; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
27532; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27533; SSE41-NEXT:    pand %xmm3, %xmm1
27534; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27535; SSE41-NEXT:    por %xmm1, %xmm0
27536; SSE41-NEXT:    retq
27537;
27538; AVX1-LABEL: ult_30_v2i64:
27539; AVX1:       # %bb.0:
27540; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27541; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
27542; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27543; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27544; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
27545; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
27546; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27547; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27548; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27549; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27550; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [30,30]
27551; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27552; AVX1-NEXT:    retq
27553;
27554; AVX2-LABEL: ult_30_v2i64:
27555; AVX2:       # %bb.0:
27556; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27557; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
27558; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27559; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27560; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
27561; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
27562; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27563; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27564; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27565; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27566; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [30,30]
27567; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27568; AVX2-NEXT:    retq
27569;
27570; AVX512VPOPCNTDQ-LABEL: ult_30_v2i64:
27571; AVX512VPOPCNTDQ:       # %bb.0:
27572; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27573; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
27574; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [30,30]
27575; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27576; AVX512VPOPCNTDQ-NEXT:    vzeroupper
27577; AVX512VPOPCNTDQ-NEXT:    retq
27578;
27579; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64:
27580; AVX512VPOPCNTDQVL:       # %bb.0:
27581; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
27582; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
27583; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27584; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27585; AVX512VPOPCNTDQVL-NEXT:    retq
27586;
27587; BITALG_NOVLX-LABEL: ult_30_v2i64:
27588; BITALG_NOVLX:       # %bb.0:
27589; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27590; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
27591; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27592; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27593; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [30,30]
27594; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27595; BITALG_NOVLX-NEXT:    vzeroupper
27596; BITALG_NOVLX-NEXT:    retq
27597;
27598; BITALG-LABEL: ult_30_v2i64:
27599; BITALG:       # %bb.0:
27600; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
27601; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27602; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27603; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
27604; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27605; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27606; BITALG-NEXT:    retq
27607  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27608  %3 = icmp ult <2 x i64> %2, <i64 30, i64 30>
27609  %4 = sext <2 x i1> %3 to <2 x i64>
27610  ret <2 x i64> %4
27611}
27612
27613define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
27614; SSE2-LABEL: ugt_30_v2i64:
27615; SSE2:       # %bb.0:
27616; SSE2-NEXT:    movdqa %xmm0, %xmm1
27617; SSE2-NEXT:    psrlw $1, %xmm1
27618; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27619; SSE2-NEXT:    psubb %xmm1, %xmm0
27620; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27621; SSE2-NEXT:    movdqa %xmm0, %xmm2
27622; SSE2-NEXT:    pand %xmm1, %xmm2
27623; SSE2-NEXT:    psrlw $2, %xmm0
27624; SSE2-NEXT:    pand %xmm1, %xmm0
27625; SSE2-NEXT:    paddb %xmm2, %xmm0
27626; SSE2-NEXT:    movdqa %xmm0, %xmm1
27627; SSE2-NEXT:    psrlw $4, %xmm1
27628; SSE2-NEXT:    paddb %xmm0, %xmm1
27629; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27630; SSE2-NEXT:    pxor %xmm0, %xmm0
27631; SSE2-NEXT:    psadbw %xmm1, %xmm0
27632; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
27633; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27634; SSE2-NEXT:    movdqa %xmm0, %xmm2
27635; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
27636; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27637; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
27638; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27639; SSE2-NEXT:    pand %xmm3, %xmm1
27640; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27641; SSE2-NEXT:    por %xmm1, %xmm0
27642; SSE2-NEXT:    retq
27643;
27644; SSE3-LABEL: ugt_30_v2i64:
27645; SSE3:       # %bb.0:
27646; SSE3-NEXT:    movdqa %xmm0, %xmm1
27647; SSE3-NEXT:    psrlw $1, %xmm1
27648; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27649; SSE3-NEXT:    psubb %xmm1, %xmm0
27650; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27651; SSE3-NEXT:    movdqa %xmm0, %xmm2
27652; SSE3-NEXT:    pand %xmm1, %xmm2
27653; SSE3-NEXT:    psrlw $2, %xmm0
27654; SSE3-NEXT:    pand %xmm1, %xmm0
27655; SSE3-NEXT:    paddb %xmm2, %xmm0
27656; SSE3-NEXT:    movdqa %xmm0, %xmm1
27657; SSE3-NEXT:    psrlw $4, %xmm1
27658; SSE3-NEXT:    paddb %xmm0, %xmm1
27659; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27660; SSE3-NEXT:    pxor %xmm0, %xmm0
27661; SSE3-NEXT:    psadbw %xmm1, %xmm0
27662; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
27663; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27664; SSE3-NEXT:    movdqa %xmm0, %xmm2
27665; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
27666; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27667; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27668; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27669; SSE3-NEXT:    pand %xmm3, %xmm1
27670; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27671; SSE3-NEXT:    por %xmm1, %xmm0
27672; SSE3-NEXT:    retq
27673;
27674; SSSE3-LABEL: ugt_30_v2i64:
27675; SSSE3:       # %bb.0:
27676; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27677; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27678; SSSE3-NEXT:    pand %xmm1, %xmm2
27679; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27680; SSSE3-NEXT:    movdqa %xmm3, %xmm4
27681; SSSE3-NEXT:    pshufb %xmm2, %xmm4
27682; SSSE3-NEXT:    psrlw $4, %xmm0
27683; SSSE3-NEXT:    pand %xmm1, %xmm0
27684; SSSE3-NEXT:    pshufb %xmm0, %xmm3
27685; SSSE3-NEXT:    paddb %xmm4, %xmm3
27686; SSSE3-NEXT:    pxor %xmm0, %xmm0
27687; SSSE3-NEXT:    psadbw %xmm3, %xmm0
27688; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
27689; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27690; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27691; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
27692; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27693; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27694; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27695; SSSE3-NEXT:    pand %xmm3, %xmm1
27696; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27697; SSSE3-NEXT:    por %xmm1, %xmm0
27698; SSSE3-NEXT:    retq
27699;
27700; SSE41-LABEL: ugt_30_v2i64:
27701; SSE41:       # %bb.0:
27702; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27703; SSE41-NEXT:    movdqa %xmm0, %xmm2
27704; SSE41-NEXT:    pand %xmm1, %xmm2
27705; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27706; SSE41-NEXT:    movdqa %xmm3, %xmm4
27707; SSE41-NEXT:    pshufb %xmm2, %xmm4
27708; SSE41-NEXT:    psrlw $4, %xmm0
27709; SSE41-NEXT:    pand %xmm1, %xmm0
27710; SSE41-NEXT:    pshufb %xmm0, %xmm3
27711; SSE41-NEXT:    paddb %xmm4, %xmm3
27712; SSE41-NEXT:    pxor %xmm0, %xmm0
27713; SSE41-NEXT:    psadbw %xmm3, %xmm0
27714; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
27715; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483678,2147483678]
27716; SSE41-NEXT:    movdqa %xmm0, %xmm2
27717; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
27718; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27719; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
27720; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27721; SSE41-NEXT:    pand %xmm3, %xmm1
27722; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27723; SSE41-NEXT:    por %xmm1, %xmm0
27724; SSE41-NEXT:    retq
27725;
27726; AVX1-LABEL: ugt_30_v2i64:
27727; AVX1:       # %bb.0:
27728; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27729; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
27730; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27731; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27732; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
27733; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
27734; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27735; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27736; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27737; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27738; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27739; AVX1-NEXT:    retq
27740;
27741; AVX2-LABEL: ugt_30_v2i64:
27742; AVX2:       # %bb.0:
27743; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27744; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
27745; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27746; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27747; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
27748; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
27749; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27750; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27751; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27752; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27753; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27754; AVX2-NEXT:    retq
27755;
27756; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64:
27757; AVX512VPOPCNTDQ:       # %bb.0:
27758; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27759; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
27760; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27761; AVX512VPOPCNTDQ-NEXT:    vzeroupper
27762; AVX512VPOPCNTDQ-NEXT:    retq
27763;
27764; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64:
27765; AVX512VPOPCNTDQVL:       # %bb.0:
27766; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
27767; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
27768; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27769; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27770; AVX512VPOPCNTDQVL-NEXT:    retq
27771;
27772; BITALG_NOVLX-LABEL: ugt_30_v2i64:
27773; BITALG_NOVLX:       # %bb.0:
27774; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27775; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
27776; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27777; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27778; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
27779; BITALG_NOVLX-NEXT:    vzeroupper
27780; BITALG_NOVLX-NEXT:    retq
27781;
27782; BITALG-LABEL: ugt_30_v2i64:
27783; BITALG:       # %bb.0:
27784; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
27785; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27786; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27787; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
27788; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27789; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27790; BITALG-NEXT:    retq
27791  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27792  %3 = icmp ugt <2 x i64> %2, <i64 30, i64 30>
27793  %4 = sext <2 x i1> %3 to <2 x i64>
27794  ret <2 x i64> %4
27795}
27796
27797define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
27798; SSE2-LABEL: ult_31_v2i64:
27799; SSE2:       # %bb.0:
27800; SSE2-NEXT:    movdqa %xmm0, %xmm1
27801; SSE2-NEXT:    psrlw $1, %xmm1
27802; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27803; SSE2-NEXT:    psubb %xmm1, %xmm0
27804; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27805; SSE2-NEXT:    movdqa %xmm0, %xmm2
27806; SSE2-NEXT:    pand %xmm1, %xmm2
27807; SSE2-NEXT:    psrlw $2, %xmm0
27808; SSE2-NEXT:    pand %xmm1, %xmm0
27809; SSE2-NEXT:    paddb %xmm2, %xmm0
27810; SSE2-NEXT:    movdqa %xmm0, %xmm1
27811; SSE2-NEXT:    psrlw $4, %xmm1
27812; SSE2-NEXT:    paddb %xmm0, %xmm1
27813; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27814; SSE2-NEXT:    pxor %xmm0, %xmm0
27815; SSE2-NEXT:    psadbw %xmm1, %xmm0
27816; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
27817; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
27818; SSE2-NEXT:    movdqa %xmm1, %xmm2
27819; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
27820; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27821; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
27822; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27823; SSE2-NEXT:    pand %xmm3, %xmm1
27824; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27825; SSE2-NEXT:    por %xmm1, %xmm0
27826; SSE2-NEXT:    retq
27827;
27828; SSE3-LABEL: ult_31_v2i64:
27829; SSE3:       # %bb.0:
27830; SSE3-NEXT:    movdqa %xmm0, %xmm1
27831; SSE3-NEXT:    psrlw $1, %xmm1
27832; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27833; SSE3-NEXT:    psubb %xmm1, %xmm0
27834; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27835; SSE3-NEXT:    movdqa %xmm0, %xmm2
27836; SSE3-NEXT:    pand %xmm1, %xmm2
27837; SSE3-NEXT:    psrlw $2, %xmm0
27838; SSE3-NEXT:    pand %xmm1, %xmm0
27839; SSE3-NEXT:    paddb %xmm2, %xmm0
27840; SSE3-NEXT:    movdqa %xmm0, %xmm1
27841; SSE3-NEXT:    psrlw $4, %xmm1
27842; SSE3-NEXT:    paddb %xmm0, %xmm1
27843; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
27844; SSE3-NEXT:    pxor %xmm0, %xmm0
27845; SSE3-NEXT:    psadbw %xmm1, %xmm0
27846; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
27847; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
27848; SSE3-NEXT:    movdqa %xmm1, %xmm2
27849; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
27850; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27851; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27852; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27853; SSE3-NEXT:    pand %xmm3, %xmm1
27854; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27855; SSE3-NEXT:    por %xmm1, %xmm0
27856; SSE3-NEXT:    retq
27857;
27858; SSSE3-LABEL: ult_31_v2i64:
27859; SSSE3:       # %bb.0:
27860; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27861; SSSE3-NEXT:    movdqa %xmm0, %xmm2
27862; SSSE3-NEXT:    pand %xmm1, %xmm2
27863; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27864; SSSE3-NEXT:    movdqa %xmm3, %xmm4
27865; SSSE3-NEXT:    pshufb %xmm2, %xmm4
27866; SSSE3-NEXT:    psrlw $4, %xmm0
27867; SSSE3-NEXT:    pand %xmm1, %xmm0
27868; SSSE3-NEXT:    pshufb %xmm0, %xmm3
27869; SSSE3-NEXT:    paddb %xmm4, %xmm3
27870; SSSE3-NEXT:    pxor %xmm0, %xmm0
27871; SSSE3-NEXT:    psadbw %xmm3, %xmm0
27872; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
27873; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
27874; SSSE3-NEXT:    movdqa %xmm1, %xmm2
27875; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
27876; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27877; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
27878; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27879; SSSE3-NEXT:    pand %xmm3, %xmm1
27880; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27881; SSSE3-NEXT:    por %xmm1, %xmm0
27882; SSSE3-NEXT:    retq
27883;
27884; SSE41-LABEL: ult_31_v2i64:
27885; SSE41:       # %bb.0:
27886; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27887; SSE41-NEXT:    movdqa %xmm0, %xmm2
27888; SSE41-NEXT:    pand %xmm1, %xmm2
27889; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27890; SSE41-NEXT:    movdqa %xmm3, %xmm4
27891; SSE41-NEXT:    pshufb %xmm2, %xmm4
27892; SSE41-NEXT:    psrlw $4, %xmm0
27893; SSE41-NEXT:    pand %xmm1, %xmm0
27894; SSE41-NEXT:    pshufb %xmm0, %xmm3
27895; SSE41-NEXT:    paddb %xmm4, %xmm3
27896; SSE41-NEXT:    pxor %xmm0, %xmm0
27897; SSE41-NEXT:    psadbw %xmm3, %xmm0
27898; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
27899; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
27900; SSE41-NEXT:    movdqa %xmm1, %xmm2
27901; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
27902; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
27903; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
27904; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
27905; SSE41-NEXT:    pand %xmm3, %xmm1
27906; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
27907; SSE41-NEXT:    por %xmm1, %xmm0
27908; SSE41-NEXT:    retq
27909;
27910; AVX1-LABEL: ult_31_v2i64:
27911; AVX1:       # %bb.0:
27912; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27913; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
27914; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27915; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27916; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
27917; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
27918; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27919; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27920; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27921; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27922; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [31,31]
27923; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27924; AVX1-NEXT:    retq
27925;
27926; AVX2-LABEL: ult_31_v2i64:
27927; AVX2:       # %bb.0:
27928; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27929; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
27930; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27931; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
27932; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
27933; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
27934; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
27935; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
27936; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27937; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27938; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [31,31]
27939; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27940; AVX2-NEXT:    retq
27941;
27942; AVX512VPOPCNTDQ-LABEL: ult_31_v2i64:
27943; AVX512VPOPCNTDQ:       # %bb.0:
27944; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27945; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
27946; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [31,31]
27947; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27948; AVX512VPOPCNTDQ-NEXT:    vzeroupper
27949; AVX512VPOPCNTDQ-NEXT:    retq
27950;
27951; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64:
27952; AVX512VPOPCNTDQVL:       # %bb.0:
27953; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
27954; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
27955; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27956; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27957; AVX512VPOPCNTDQVL-NEXT:    retq
27958;
27959; BITALG_NOVLX-LABEL: ult_31_v2i64:
27960; BITALG_NOVLX:       # %bb.0:
27961; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
27962; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
27963; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27964; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27965; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [31,31]
27966; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
27967; BITALG_NOVLX-NEXT:    vzeroupper
27968; BITALG_NOVLX-NEXT:    retq
27969;
27970; BITALG-LABEL: ult_31_v2i64:
27971; BITALG:       # %bb.0:
27972; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
27973; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
27974; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
27975; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
27976; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
27977; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27978; BITALG-NEXT:    retq
27979  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27980  %3 = icmp ult <2 x i64> %2, <i64 31, i64 31>
27981  %4 = sext <2 x i1> %3 to <2 x i64>
27982  ret <2 x i64> %4
27983}
27984
27985define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
27986; SSE2-LABEL: ugt_31_v2i64:
27987; SSE2:       # %bb.0:
27988; SSE2-NEXT:    movdqa %xmm0, %xmm1
27989; SSE2-NEXT:    psrlw $1, %xmm1
27990; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
27991; SSE2-NEXT:    psubb %xmm1, %xmm0
27992; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27993; SSE2-NEXT:    movdqa %xmm0, %xmm2
27994; SSE2-NEXT:    pand %xmm1, %xmm2
27995; SSE2-NEXT:    psrlw $2, %xmm0
27996; SSE2-NEXT:    pand %xmm1, %xmm0
27997; SSE2-NEXT:    paddb %xmm2, %xmm0
27998; SSE2-NEXT:    movdqa %xmm0, %xmm1
27999; SSE2-NEXT:    psrlw $4, %xmm1
28000; SSE2-NEXT:    paddb %xmm0, %xmm1
28001; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28002; SSE2-NEXT:    pxor %xmm0, %xmm0
28003; SSE2-NEXT:    psadbw %xmm1, %xmm0
28004; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
28005; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
28006; SSE2-NEXT:    movdqa %xmm0, %xmm2
28007; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
28008; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28009; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
28010; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28011; SSE2-NEXT:    pand %xmm3, %xmm1
28012; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28013; SSE2-NEXT:    por %xmm1, %xmm0
28014; SSE2-NEXT:    retq
28015;
28016; SSE3-LABEL: ugt_31_v2i64:
28017; SSE3:       # %bb.0:
28018; SSE3-NEXT:    movdqa %xmm0, %xmm1
28019; SSE3-NEXT:    psrlw $1, %xmm1
28020; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28021; SSE3-NEXT:    psubb %xmm1, %xmm0
28022; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28023; SSE3-NEXT:    movdqa %xmm0, %xmm2
28024; SSE3-NEXT:    pand %xmm1, %xmm2
28025; SSE3-NEXT:    psrlw $2, %xmm0
28026; SSE3-NEXT:    pand %xmm1, %xmm0
28027; SSE3-NEXT:    paddb %xmm2, %xmm0
28028; SSE3-NEXT:    movdqa %xmm0, %xmm1
28029; SSE3-NEXT:    psrlw $4, %xmm1
28030; SSE3-NEXT:    paddb %xmm0, %xmm1
28031; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28032; SSE3-NEXT:    pxor %xmm0, %xmm0
28033; SSE3-NEXT:    psadbw %xmm1, %xmm0
28034; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
28035; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
28036; SSE3-NEXT:    movdqa %xmm0, %xmm2
28037; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
28038; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28039; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28040; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28041; SSE3-NEXT:    pand %xmm3, %xmm1
28042; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28043; SSE3-NEXT:    por %xmm1, %xmm0
28044; SSE3-NEXT:    retq
28045;
28046; SSSE3-LABEL: ugt_31_v2i64:
28047; SSSE3:       # %bb.0:
28048; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28049; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28050; SSSE3-NEXT:    pand %xmm1, %xmm2
28051; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28052; SSSE3-NEXT:    movdqa %xmm3, %xmm4
28053; SSSE3-NEXT:    pshufb %xmm2, %xmm4
28054; SSSE3-NEXT:    psrlw $4, %xmm0
28055; SSSE3-NEXT:    pand %xmm1, %xmm0
28056; SSSE3-NEXT:    pshufb %xmm0, %xmm3
28057; SSSE3-NEXT:    paddb %xmm4, %xmm3
28058; SSSE3-NEXT:    pxor %xmm0, %xmm0
28059; SSSE3-NEXT:    psadbw %xmm3, %xmm0
28060; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
28061; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
28062; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28063; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
28064; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28065; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28066; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28067; SSSE3-NEXT:    pand %xmm3, %xmm1
28068; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28069; SSSE3-NEXT:    por %xmm1, %xmm0
28070; SSSE3-NEXT:    retq
28071;
28072; SSE41-LABEL: ugt_31_v2i64:
28073; SSE41:       # %bb.0:
28074; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28075; SSE41-NEXT:    movdqa %xmm0, %xmm2
28076; SSE41-NEXT:    pand %xmm1, %xmm2
28077; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28078; SSE41-NEXT:    movdqa %xmm3, %xmm4
28079; SSE41-NEXT:    pshufb %xmm2, %xmm4
28080; SSE41-NEXT:    psrlw $4, %xmm0
28081; SSE41-NEXT:    pand %xmm1, %xmm0
28082; SSE41-NEXT:    pshufb %xmm0, %xmm3
28083; SSE41-NEXT:    paddb %xmm4, %xmm3
28084; SSE41-NEXT:    pxor %xmm0, %xmm0
28085; SSE41-NEXT:    psadbw %xmm3, %xmm0
28086; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
28087; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483679,2147483679]
28088; SSE41-NEXT:    movdqa %xmm0, %xmm2
28089; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
28090; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28091; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
28092; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28093; SSE41-NEXT:    pand %xmm3, %xmm1
28094; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28095; SSE41-NEXT:    por %xmm1, %xmm0
28096; SSE41-NEXT:    retq
28097;
28098; AVX1-LABEL: ugt_31_v2i64:
28099; AVX1:       # %bb.0:
28100; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28101; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
28102; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28103; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28104; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
28105; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
28106; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28107; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28108; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28109; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28110; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28111; AVX1-NEXT:    retq
28112;
28113; AVX2-LABEL: ugt_31_v2i64:
28114; AVX2:       # %bb.0:
28115; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28116; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
28117; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28118; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28119; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
28120; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
28121; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28122; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28123; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28124; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28125; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28126; AVX2-NEXT:    retq
28127;
28128; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64:
28129; AVX512VPOPCNTDQ:       # %bb.0:
28130; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28131; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
28132; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28133; AVX512VPOPCNTDQ-NEXT:    vzeroupper
28134; AVX512VPOPCNTDQ-NEXT:    retq
28135;
28136; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64:
28137; AVX512VPOPCNTDQVL:       # %bb.0:
28138; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
28139; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
28140; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28141; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28142; AVX512VPOPCNTDQVL-NEXT:    retq
28143;
28144; BITALG_NOVLX-LABEL: ugt_31_v2i64:
28145; BITALG_NOVLX:       # %bb.0:
28146; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28147; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
28148; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28149; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28150; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28151; BITALG_NOVLX-NEXT:    vzeroupper
28152; BITALG_NOVLX-NEXT:    retq
28153;
28154; BITALG-LABEL: ugt_31_v2i64:
28155; BITALG:       # %bb.0:
28156; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
28157; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28158; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28159; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
28160; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28161; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28162; BITALG-NEXT:    retq
28163  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28164  %3 = icmp ugt <2 x i64> %2, <i64 31, i64 31>
28165  %4 = sext <2 x i1> %3 to <2 x i64>
28166  ret <2 x i64> %4
28167}
28168
28169define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
28170; SSE2-LABEL: ult_32_v2i64:
28171; SSE2:       # %bb.0:
28172; SSE2-NEXT:    movdqa %xmm0, %xmm1
28173; SSE2-NEXT:    psrlw $1, %xmm1
28174; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28175; SSE2-NEXT:    psubb %xmm1, %xmm0
28176; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28177; SSE2-NEXT:    movdqa %xmm0, %xmm2
28178; SSE2-NEXT:    pand %xmm1, %xmm2
28179; SSE2-NEXT:    psrlw $2, %xmm0
28180; SSE2-NEXT:    pand %xmm1, %xmm0
28181; SSE2-NEXT:    paddb %xmm2, %xmm0
28182; SSE2-NEXT:    movdqa %xmm0, %xmm1
28183; SSE2-NEXT:    psrlw $4, %xmm1
28184; SSE2-NEXT:    paddb %xmm0, %xmm1
28185; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28186; SSE2-NEXT:    pxor %xmm0, %xmm0
28187; SSE2-NEXT:    psadbw %xmm1, %xmm0
28188; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
28189; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28190; SSE2-NEXT:    movdqa %xmm1, %xmm2
28191; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
28192; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28193; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
28194; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28195; SSE2-NEXT:    pand %xmm3, %xmm1
28196; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28197; SSE2-NEXT:    por %xmm1, %xmm0
28198; SSE2-NEXT:    retq
28199;
28200; SSE3-LABEL: ult_32_v2i64:
28201; SSE3:       # %bb.0:
28202; SSE3-NEXT:    movdqa %xmm0, %xmm1
28203; SSE3-NEXT:    psrlw $1, %xmm1
28204; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28205; SSE3-NEXT:    psubb %xmm1, %xmm0
28206; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28207; SSE3-NEXT:    movdqa %xmm0, %xmm2
28208; SSE3-NEXT:    pand %xmm1, %xmm2
28209; SSE3-NEXT:    psrlw $2, %xmm0
28210; SSE3-NEXT:    pand %xmm1, %xmm0
28211; SSE3-NEXT:    paddb %xmm2, %xmm0
28212; SSE3-NEXT:    movdqa %xmm0, %xmm1
28213; SSE3-NEXT:    psrlw $4, %xmm1
28214; SSE3-NEXT:    paddb %xmm0, %xmm1
28215; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28216; SSE3-NEXT:    pxor %xmm0, %xmm0
28217; SSE3-NEXT:    psadbw %xmm1, %xmm0
28218; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
28219; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28220; SSE3-NEXT:    movdqa %xmm1, %xmm2
28221; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
28222; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28223; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28224; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28225; SSE3-NEXT:    pand %xmm3, %xmm1
28226; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28227; SSE3-NEXT:    por %xmm1, %xmm0
28228; SSE3-NEXT:    retq
28229;
28230; SSSE3-LABEL: ult_32_v2i64:
28231; SSSE3:       # %bb.0:
28232; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28233; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28234; SSSE3-NEXT:    pand %xmm1, %xmm2
28235; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28236; SSSE3-NEXT:    movdqa %xmm3, %xmm4
28237; SSSE3-NEXT:    pshufb %xmm2, %xmm4
28238; SSSE3-NEXT:    psrlw $4, %xmm0
28239; SSSE3-NEXT:    pand %xmm1, %xmm0
28240; SSSE3-NEXT:    pshufb %xmm0, %xmm3
28241; SSSE3-NEXT:    paddb %xmm4, %xmm3
28242; SSSE3-NEXT:    pxor %xmm0, %xmm0
28243; SSSE3-NEXT:    psadbw %xmm3, %xmm0
28244; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
28245; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28246; SSSE3-NEXT:    movdqa %xmm1, %xmm2
28247; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
28248; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28249; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28250; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28251; SSSE3-NEXT:    pand %xmm3, %xmm1
28252; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28253; SSSE3-NEXT:    por %xmm1, %xmm0
28254; SSSE3-NEXT:    retq
28255;
28256; SSE41-LABEL: ult_32_v2i64:
28257; SSE41:       # %bb.0:
28258; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28259; SSE41-NEXT:    movdqa %xmm0, %xmm2
28260; SSE41-NEXT:    pand %xmm1, %xmm2
28261; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28262; SSE41-NEXT:    movdqa %xmm3, %xmm4
28263; SSE41-NEXT:    pshufb %xmm2, %xmm4
28264; SSE41-NEXT:    psrlw $4, %xmm0
28265; SSE41-NEXT:    pand %xmm1, %xmm0
28266; SSE41-NEXT:    pshufb %xmm0, %xmm3
28267; SSE41-NEXT:    paddb %xmm4, %xmm3
28268; SSE41-NEXT:    pxor %xmm0, %xmm0
28269; SSE41-NEXT:    psadbw %xmm3, %xmm0
28270; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
28271; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28272; SSE41-NEXT:    movdqa %xmm1, %xmm2
28273; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
28274; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28275; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
28276; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28277; SSE41-NEXT:    pand %xmm3, %xmm1
28278; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28279; SSE41-NEXT:    por %xmm1, %xmm0
28280; SSE41-NEXT:    retq
28281;
28282; AVX1-LABEL: ult_32_v2i64:
28283; AVX1:       # %bb.0:
28284; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28285; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
28286; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28287; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28288; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
28289; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
28290; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28291; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28292; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28293; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28294; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32,32]
28295; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28296; AVX1-NEXT:    retq
28297;
28298; AVX2-LABEL: ult_32_v2i64:
28299; AVX2:       # %bb.0:
28300; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28301; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
28302; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28303; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28304; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
28305; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
28306; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28307; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28308; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28309; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28310; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32,32]
28311; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28312; AVX2-NEXT:    retq
28313;
28314; AVX512VPOPCNTDQ-LABEL: ult_32_v2i64:
28315; AVX512VPOPCNTDQ:       # %bb.0:
28316; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28317; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
28318; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [32,32]
28319; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28320; AVX512VPOPCNTDQ-NEXT:    vzeroupper
28321; AVX512VPOPCNTDQ-NEXT:    retq
28322;
28323; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64:
28324; AVX512VPOPCNTDQVL:       # %bb.0:
28325; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
28326; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
28327; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28328; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28329; AVX512VPOPCNTDQVL-NEXT:    retq
28330;
28331; BITALG_NOVLX-LABEL: ult_32_v2i64:
28332; BITALG_NOVLX:       # %bb.0:
28333; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28334; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
28335; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28336; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28337; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [32,32]
28338; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28339; BITALG_NOVLX-NEXT:    vzeroupper
28340; BITALG_NOVLX-NEXT:    retq
28341;
28342; BITALG-LABEL: ult_32_v2i64:
28343; BITALG:       # %bb.0:
28344; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
28345; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28346; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28347; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
28348; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28349; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28350; BITALG-NEXT:    retq
28351  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28352  %3 = icmp ult <2 x i64> %2, <i64 32, i64 32>
28353  %4 = sext <2 x i1> %3 to <2 x i64>
28354  ret <2 x i64> %4
28355}
28356
28357define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
28358; SSE2-LABEL: ugt_32_v2i64:
28359; SSE2:       # %bb.0:
28360; SSE2-NEXT:    movdqa %xmm0, %xmm1
28361; SSE2-NEXT:    psrlw $1, %xmm1
28362; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28363; SSE2-NEXT:    psubb %xmm1, %xmm0
28364; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28365; SSE2-NEXT:    movdqa %xmm0, %xmm2
28366; SSE2-NEXT:    pand %xmm1, %xmm2
28367; SSE2-NEXT:    psrlw $2, %xmm0
28368; SSE2-NEXT:    pand %xmm1, %xmm0
28369; SSE2-NEXT:    paddb %xmm2, %xmm0
28370; SSE2-NEXT:    movdqa %xmm0, %xmm1
28371; SSE2-NEXT:    psrlw $4, %xmm1
28372; SSE2-NEXT:    paddb %xmm0, %xmm1
28373; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28374; SSE2-NEXT:    pxor %xmm0, %xmm0
28375; SSE2-NEXT:    psadbw %xmm1, %xmm0
28376; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
28377; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28378; SSE2-NEXT:    movdqa %xmm0, %xmm2
28379; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
28380; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28381; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
28382; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28383; SSE2-NEXT:    pand %xmm3, %xmm1
28384; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28385; SSE2-NEXT:    por %xmm1, %xmm0
28386; SSE2-NEXT:    retq
28387;
28388; SSE3-LABEL: ugt_32_v2i64:
28389; SSE3:       # %bb.0:
28390; SSE3-NEXT:    movdqa %xmm0, %xmm1
28391; SSE3-NEXT:    psrlw $1, %xmm1
28392; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28393; SSE3-NEXT:    psubb %xmm1, %xmm0
28394; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28395; SSE3-NEXT:    movdqa %xmm0, %xmm2
28396; SSE3-NEXT:    pand %xmm1, %xmm2
28397; SSE3-NEXT:    psrlw $2, %xmm0
28398; SSE3-NEXT:    pand %xmm1, %xmm0
28399; SSE3-NEXT:    paddb %xmm2, %xmm0
28400; SSE3-NEXT:    movdqa %xmm0, %xmm1
28401; SSE3-NEXT:    psrlw $4, %xmm1
28402; SSE3-NEXT:    paddb %xmm0, %xmm1
28403; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28404; SSE3-NEXT:    pxor %xmm0, %xmm0
28405; SSE3-NEXT:    psadbw %xmm1, %xmm0
28406; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
28407; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28408; SSE3-NEXT:    movdqa %xmm0, %xmm2
28409; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
28410; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28411; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28412; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28413; SSE3-NEXT:    pand %xmm3, %xmm1
28414; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28415; SSE3-NEXT:    por %xmm1, %xmm0
28416; SSE3-NEXT:    retq
28417;
28418; SSSE3-LABEL: ugt_32_v2i64:
28419; SSSE3:       # %bb.0:
28420; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28421; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28422; SSSE3-NEXT:    pand %xmm1, %xmm2
28423; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28424; SSSE3-NEXT:    movdqa %xmm3, %xmm4
28425; SSSE3-NEXT:    pshufb %xmm2, %xmm4
28426; SSSE3-NEXT:    psrlw $4, %xmm0
28427; SSSE3-NEXT:    pand %xmm1, %xmm0
28428; SSSE3-NEXT:    pshufb %xmm0, %xmm3
28429; SSSE3-NEXT:    paddb %xmm4, %xmm3
28430; SSSE3-NEXT:    pxor %xmm0, %xmm0
28431; SSSE3-NEXT:    psadbw %xmm3, %xmm0
28432; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
28433; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28434; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28435; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
28436; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28437; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28438; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28439; SSSE3-NEXT:    pand %xmm3, %xmm1
28440; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28441; SSSE3-NEXT:    por %xmm1, %xmm0
28442; SSSE3-NEXT:    retq
28443;
28444; SSE41-LABEL: ugt_32_v2i64:
28445; SSE41:       # %bb.0:
28446; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28447; SSE41-NEXT:    movdqa %xmm0, %xmm2
28448; SSE41-NEXT:    pand %xmm1, %xmm2
28449; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28450; SSE41-NEXT:    movdqa %xmm3, %xmm4
28451; SSE41-NEXT:    pshufb %xmm2, %xmm4
28452; SSE41-NEXT:    psrlw $4, %xmm0
28453; SSE41-NEXT:    pand %xmm1, %xmm0
28454; SSE41-NEXT:    pshufb %xmm0, %xmm3
28455; SSE41-NEXT:    paddb %xmm4, %xmm3
28456; SSE41-NEXT:    pxor %xmm0, %xmm0
28457; SSE41-NEXT:    psadbw %xmm3, %xmm0
28458; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
28459; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483680,2147483680]
28460; SSE41-NEXT:    movdqa %xmm0, %xmm2
28461; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
28462; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28463; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
28464; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28465; SSE41-NEXT:    pand %xmm3, %xmm1
28466; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28467; SSE41-NEXT:    por %xmm1, %xmm0
28468; SSE41-NEXT:    retq
28469;
28470; AVX1-LABEL: ugt_32_v2i64:
28471; AVX1:       # %bb.0:
28472; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28473; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
28474; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28475; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28476; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
28477; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
28478; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28479; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28480; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28481; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28482; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28483; AVX1-NEXT:    retq
28484;
28485; AVX2-LABEL: ugt_32_v2i64:
28486; AVX2:       # %bb.0:
28487; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28488; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
28489; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28490; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28491; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
28492; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
28493; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28494; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28495; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28496; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28497; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28498; AVX2-NEXT:    retq
28499;
28500; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64:
28501; AVX512VPOPCNTDQ:       # %bb.0:
28502; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28503; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
28504; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28505; AVX512VPOPCNTDQ-NEXT:    vzeroupper
28506; AVX512VPOPCNTDQ-NEXT:    retq
28507;
28508; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64:
28509; AVX512VPOPCNTDQVL:       # %bb.0:
28510; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
28511; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
28512; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28513; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28514; AVX512VPOPCNTDQVL-NEXT:    retq
28515;
28516; BITALG_NOVLX-LABEL: ugt_32_v2i64:
28517; BITALG_NOVLX:       # %bb.0:
28518; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28519; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
28520; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28521; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28522; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28523; BITALG_NOVLX-NEXT:    vzeroupper
28524; BITALG_NOVLX-NEXT:    retq
28525;
28526; BITALG-LABEL: ugt_32_v2i64:
28527; BITALG:       # %bb.0:
28528; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
28529; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28530; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28531; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
28532; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28533; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28534; BITALG-NEXT:    retq
28535  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28536  %3 = icmp ugt <2 x i64> %2, <i64 32, i64 32>
28537  %4 = sext <2 x i1> %3 to <2 x i64>
28538  ret <2 x i64> %4
28539}
28540
28541define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
28542; SSE2-LABEL: ult_33_v2i64:
28543; SSE2:       # %bb.0:
28544; SSE2-NEXT:    movdqa %xmm0, %xmm1
28545; SSE2-NEXT:    psrlw $1, %xmm1
28546; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28547; SSE2-NEXT:    psubb %xmm1, %xmm0
28548; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28549; SSE2-NEXT:    movdqa %xmm0, %xmm2
28550; SSE2-NEXT:    pand %xmm1, %xmm2
28551; SSE2-NEXT:    psrlw $2, %xmm0
28552; SSE2-NEXT:    pand %xmm1, %xmm0
28553; SSE2-NEXT:    paddb %xmm2, %xmm0
28554; SSE2-NEXT:    movdqa %xmm0, %xmm1
28555; SSE2-NEXT:    psrlw $4, %xmm1
28556; SSE2-NEXT:    paddb %xmm0, %xmm1
28557; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28558; SSE2-NEXT:    pxor %xmm0, %xmm0
28559; SSE2-NEXT:    psadbw %xmm1, %xmm0
28560; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
28561; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28562; SSE2-NEXT:    movdqa %xmm1, %xmm2
28563; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
28564; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28565; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
28566; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28567; SSE2-NEXT:    pand %xmm3, %xmm1
28568; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28569; SSE2-NEXT:    por %xmm1, %xmm0
28570; SSE2-NEXT:    retq
28571;
28572; SSE3-LABEL: ult_33_v2i64:
28573; SSE3:       # %bb.0:
28574; SSE3-NEXT:    movdqa %xmm0, %xmm1
28575; SSE3-NEXT:    psrlw $1, %xmm1
28576; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28577; SSE3-NEXT:    psubb %xmm1, %xmm0
28578; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28579; SSE3-NEXT:    movdqa %xmm0, %xmm2
28580; SSE3-NEXT:    pand %xmm1, %xmm2
28581; SSE3-NEXT:    psrlw $2, %xmm0
28582; SSE3-NEXT:    pand %xmm1, %xmm0
28583; SSE3-NEXT:    paddb %xmm2, %xmm0
28584; SSE3-NEXT:    movdqa %xmm0, %xmm1
28585; SSE3-NEXT:    psrlw $4, %xmm1
28586; SSE3-NEXT:    paddb %xmm0, %xmm1
28587; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28588; SSE3-NEXT:    pxor %xmm0, %xmm0
28589; SSE3-NEXT:    psadbw %xmm1, %xmm0
28590; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
28591; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28592; SSE3-NEXT:    movdqa %xmm1, %xmm2
28593; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
28594; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28595; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28596; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28597; SSE3-NEXT:    pand %xmm3, %xmm1
28598; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28599; SSE3-NEXT:    por %xmm1, %xmm0
28600; SSE3-NEXT:    retq
28601;
28602; SSSE3-LABEL: ult_33_v2i64:
28603; SSSE3:       # %bb.0:
28604; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28605; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28606; SSSE3-NEXT:    pand %xmm1, %xmm2
28607; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28608; SSSE3-NEXT:    movdqa %xmm3, %xmm4
28609; SSSE3-NEXT:    pshufb %xmm2, %xmm4
28610; SSSE3-NEXT:    psrlw $4, %xmm0
28611; SSSE3-NEXT:    pand %xmm1, %xmm0
28612; SSSE3-NEXT:    pshufb %xmm0, %xmm3
28613; SSSE3-NEXT:    paddb %xmm4, %xmm3
28614; SSSE3-NEXT:    pxor %xmm0, %xmm0
28615; SSSE3-NEXT:    psadbw %xmm3, %xmm0
28616; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
28617; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28618; SSSE3-NEXT:    movdqa %xmm1, %xmm2
28619; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
28620; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28621; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28622; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28623; SSSE3-NEXT:    pand %xmm3, %xmm1
28624; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28625; SSSE3-NEXT:    por %xmm1, %xmm0
28626; SSSE3-NEXT:    retq
28627;
28628; SSE41-LABEL: ult_33_v2i64:
28629; SSE41:       # %bb.0:
28630; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28631; SSE41-NEXT:    movdqa %xmm0, %xmm2
28632; SSE41-NEXT:    pand %xmm1, %xmm2
28633; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28634; SSE41-NEXT:    movdqa %xmm3, %xmm4
28635; SSE41-NEXT:    pshufb %xmm2, %xmm4
28636; SSE41-NEXT:    psrlw $4, %xmm0
28637; SSE41-NEXT:    pand %xmm1, %xmm0
28638; SSE41-NEXT:    pshufb %xmm0, %xmm3
28639; SSE41-NEXT:    paddb %xmm4, %xmm3
28640; SSE41-NEXT:    pxor %xmm0, %xmm0
28641; SSE41-NEXT:    psadbw %xmm3, %xmm0
28642; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
28643; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28644; SSE41-NEXT:    movdqa %xmm1, %xmm2
28645; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
28646; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28647; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
28648; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28649; SSE41-NEXT:    pand %xmm3, %xmm1
28650; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28651; SSE41-NEXT:    por %xmm1, %xmm0
28652; SSE41-NEXT:    retq
28653;
28654; AVX1-LABEL: ult_33_v2i64:
28655; AVX1:       # %bb.0:
28656; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28657; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
28658; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28659; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28660; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
28661; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
28662; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28663; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28664; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28665; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28666; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [33,33]
28667; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28668; AVX1-NEXT:    retq
28669;
28670; AVX2-LABEL: ult_33_v2i64:
28671; AVX2:       # %bb.0:
28672; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28673; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
28674; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28675; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28676; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
28677; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
28678; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28679; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28680; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28681; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28682; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [33,33]
28683; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28684; AVX2-NEXT:    retq
28685;
28686; AVX512VPOPCNTDQ-LABEL: ult_33_v2i64:
28687; AVX512VPOPCNTDQ:       # %bb.0:
28688; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28689; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
28690; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [33,33]
28691; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28692; AVX512VPOPCNTDQ-NEXT:    vzeroupper
28693; AVX512VPOPCNTDQ-NEXT:    retq
28694;
28695; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64:
28696; AVX512VPOPCNTDQVL:       # %bb.0:
28697; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
28698; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
28699; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28700; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28701; AVX512VPOPCNTDQVL-NEXT:    retq
28702;
28703; BITALG_NOVLX-LABEL: ult_33_v2i64:
28704; BITALG_NOVLX:       # %bb.0:
28705; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28706; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
28707; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28708; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28709; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [33,33]
28710; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
28711; BITALG_NOVLX-NEXT:    vzeroupper
28712; BITALG_NOVLX-NEXT:    retq
28713;
28714; BITALG-LABEL: ult_33_v2i64:
28715; BITALG:       # %bb.0:
28716; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
28717; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28718; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28719; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
28720; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28721; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28722; BITALG-NEXT:    retq
28723  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28724  %3 = icmp ult <2 x i64> %2, <i64 33, i64 33>
28725  %4 = sext <2 x i1> %3 to <2 x i64>
28726  ret <2 x i64> %4
28727}
28728
28729define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
28730; SSE2-LABEL: ugt_33_v2i64:
28731; SSE2:       # %bb.0:
28732; SSE2-NEXT:    movdqa %xmm0, %xmm1
28733; SSE2-NEXT:    psrlw $1, %xmm1
28734; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28735; SSE2-NEXT:    psubb %xmm1, %xmm0
28736; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28737; SSE2-NEXT:    movdqa %xmm0, %xmm2
28738; SSE2-NEXT:    pand %xmm1, %xmm2
28739; SSE2-NEXT:    psrlw $2, %xmm0
28740; SSE2-NEXT:    pand %xmm1, %xmm0
28741; SSE2-NEXT:    paddb %xmm2, %xmm0
28742; SSE2-NEXT:    movdqa %xmm0, %xmm1
28743; SSE2-NEXT:    psrlw $4, %xmm1
28744; SSE2-NEXT:    paddb %xmm0, %xmm1
28745; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28746; SSE2-NEXT:    pxor %xmm0, %xmm0
28747; SSE2-NEXT:    psadbw %xmm1, %xmm0
28748; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
28749; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28750; SSE2-NEXT:    movdqa %xmm0, %xmm2
28751; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
28752; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28753; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
28754; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28755; SSE2-NEXT:    pand %xmm3, %xmm1
28756; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28757; SSE2-NEXT:    por %xmm1, %xmm0
28758; SSE2-NEXT:    retq
28759;
28760; SSE3-LABEL: ugt_33_v2i64:
28761; SSE3:       # %bb.0:
28762; SSE3-NEXT:    movdqa %xmm0, %xmm1
28763; SSE3-NEXT:    psrlw $1, %xmm1
28764; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28765; SSE3-NEXT:    psubb %xmm1, %xmm0
28766; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28767; SSE3-NEXT:    movdqa %xmm0, %xmm2
28768; SSE3-NEXT:    pand %xmm1, %xmm2
28769; SSE3-NEXT:    psrlw $2, %xmm0
28770; SSE3-NEXT:    pand %xmm1, %xmm0
28771; SSE3-NEXT:    paddb %xmm2, %xmm0
28772; SSE3-NEXT:    movdqa %xmm0, %xmm1
28773; SSE3-NEXT:    psrlw $4, %xmm1
28774; SSE3-NEXT:    paddb %xmm0, %xmm1
28775; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28776; SSE3-NEXT:    pxor %xmm0, %xmm0
28777; SSE3-NEXT:    psadbw %xmm1, %xmm0
28778; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
28779; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28780; SSE3-NEXT:    movdqa %xmm0, %xmm2
28781; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
28782; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28783; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28784; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28785; SSE3-NEXT:    pand %xmm3, %xmm1
28786; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28787; SSE3-NEXT:    por %xmm1, %xmm0
28788; SSE3-NEXT:    retq
28789;
28790; SSSE3-LABEL: ugt_33_v2i64:
28791; SSSE3:       # %bb.0:
28792; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28793; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28794; SSSE3-NEXT:    pand %xmm1, %xmm2
28795; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28796; SSSE3-NEXT:    movdqa %xmm3, %xmm4
28797; SSSE3-NEXT:    pshufb %xmm2, %xmm4
28798; SSSE3-NEXT:    psrlw $4, %xmm0
28799; SSSE3-NEXT:    pand %xmm1, %xmm0
28800; SSSE3-NEXT:    pshufb %xmm0, %xmm3
28801; SSSE3-NEXT:    paddb %xmm4, %xmm3
28802; SSSE3-NEXT:    pxor %xmm0, %xmm0
28803; SSSE3-NEXT:    psadbw %xmm3, %xmm0
28804; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
28805; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28806; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28807; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
28808; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28809; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28810; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28811; SSSE3-NEXT:    pand %xmm3, %xmm1
28812; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28813; SSSE3-NEXT:    por %xmm1, %xmm0
28814; SSSE3-NEXT:    retq
28815;
28816; SSE41-LABEL: ugt_33_v2i64:
28817; SSE41:       # %bb.0:
28818; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28819; SSE41-NEXT:    movdqa %xmm0, %xmm2
28820; SSE41-NEXT:    pand %xmm1, %xmm2
28821; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28822; SSE41-NEXT:    movdqa %xmm3, %xmm4
28823; SSE41-NEXT:    pshufb %xmm2, %xmm4
28824; SSE41-NEXT:    psrlw $4, %xmm0
28825; SSE41-NEXT:    pand %xmm1, %xmm0
28826; SSE41-NEXT:    pshufb %xmm0, %xmm3
28827; SSE41-NEXT:    paddb %xmm4, %xmm3
28828; SSE41-NEXT:    pxor %xmm0, %xmm0
28829; SSE41-NEXT:    psadbw %xmm3, %xmm0
28830; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
28831; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483681,2147483681]
28832; SSE41-NEXT:    movdqa %xmm0, %xmm2
28833; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
28834; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28835; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
28836; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28837; SSE41-NEXT:    pand %xmm3, %xmm1
28838; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28839; SSE41-NEXT:    por %xmm1, %xmm0
28840; SSE41-NEXT:    retq
28841;
28842; AVX1-LABEL: ugt_33_v2i64:
28843; AVX1:       # %bb.0:
28844; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28845; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
28846; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28847; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28848; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
28849; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
28850; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28851; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28852; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28853; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28854; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28855; AVX1-NEXT:    retq
28856;
28857; AVX2-LABEL: ugt_33_v2i64:
28858; AVX2:       # %bb.0:
28859; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28860; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
28861; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28862; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
28863; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
28864; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
28865; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
28866; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
28867; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28868; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28869; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28870; AVX2-NEXT:    retq
28871;
28872; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64:
28873; AVX512VPOPCNTDQ:       # %bb.0:
28874; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28875; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
28876; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28877; AVX512VPOPCNTDQ-NEXT:    vzeroupper
28878; AVX512VPOPCNTDQ-NEXT:    retq
28879;
28880; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64:
28881; AVX512VPOPCNTDQVL:       # %bb.0:
28882; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
28883; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
28884; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28885; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28886; AVX512VPOPCNTDQVL-NEXT:    retq
28887;
28888; BITALG_NOVLX-LABEL: ugt_33_v2i64:
28889; BITALG_NOVLX:       # %bb.0:
28890; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
28891; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
28892; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28893; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28894; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
28895; BITALG_NOVLX-NEXT:    vzeroupper
28896; BITALG_NOVLX-NEXT:    retq
28897;
28898; BITALG-LABEL: ugt_33_v2i64:
28899; BITALG:       # %bb.0:
28900; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
28901; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
28902; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
28903; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
28904; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
28905; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28906; BITALG-NEXT:    retq
28907  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28908  %3 = icmp ugt <2 x i64> %2, <i64 33, i64 33>
28909  %4 = sext <2 x i1> %3 to <2 x i64>
28910  ret <2 x i64> %4
28911}
28912
28913define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
28914; SSE2-LABEL: ult_34_v2i64:
28915; SSE2:       # %bb.0:
28916; SSE2-NEXT:    movdqa %xmm0, %xmm1
28917; SSE2-NEXT:    psrlw $1, %xmm1
28918; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28919; SSE2-NEXT:    psubb %xmm1, %xmm0
28920; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28921; SSE2-NEXT:    movdqa %xmm0, %xmm2
28922; SSE2-NEXT:    pand %xmm1, %xmm2
28923; SSE2-NEXT:    psrlw $2, %xmm0
28924; SSE2-NEXT:    pand %xmm1, %xmm0
28925; SSE2-NEXT:    paddb %xmm2, %xmm0
28926; SSE2-NEXT:    movdqa %xmm0, %xmm1
28927; SSE2-NEXT:    psrlw $4, %xmm1
28928; SSE2-NEXT:    paddb %xmm0, %xmm1
28929; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
28930; SSE2-NEXT:    pxor %xmm0, %xmm0
28931; SSE2-NEXT:    psadbw %xmm1, %xmm0
28932; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
28933; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
28934; SSE2-NEXT:    movdqa %xmm1, %xmm2
28935; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
28936; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28937; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
28938; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28939; SSE2-NEXT:    pand %xmm3, %xmm1
28940; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28941; SSE2-NEXT:    por %xmm1, %xmm0
28942; SSE2-NEXT:    retq
28943;
28944; SSE3-LABEL: ult_34_v2i64:
28945; SSE3:       # %bb.0:
28946; SSE3-NEXT:    movdqa %xmm0, %xmm1
28947; SSE3-NEXT:    psrlw $1, %xmm1
28948; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28949; SSE3-NEXT:    psubb %xmm1, %xmm0
28950; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28951; SSE3-NEXT:    movdqa %xmm0, %xmm2
28952; SSE3-NEXT:    pand %xmm1, %xmm2
28953; SSE3-NEXT:    psrlw $2, %xmm0
28954; SSE3-NEXT:    pand %xmm1, %xmm0
28955; SSE3-NEXT:    paddb %xmm2, %xmm0
28956; SSE3-NEXT:    movdqa %xmm0, %xmm1
28957; SSE3-NEXT:    psrlw $4, %xmm1
28958; SSE3-NEXT:    paddb %xmm0, %xmm1
28959; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
28960; SSE3-NEXT:    pxor %xmm0, %xmm0
28961; SSE3-NEXT:    psadbw %xmm1, %xmm0
28962; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
28963; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
28964; SSE3-NEXT:    movdqa %xmm1, %xmm2
28965; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
28966; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28967; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28968; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28969; SSE3-NEXT:    pand %xmm3, %xmm1
28970; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28971; SSE3-NEXT:    por %xmm1, %xmm0
28972; SSE3-NEXT:    retq
28973;
28974; SSSE3-LABEL: ult_34_v2i64:
28975; SSSE3:       # %bb.0:
28976; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28977; SSSE3-NEXT:    movdqa %xmm0, %xmm2
28978; SSSE3-NEXT:    pand %xmm1, %xmm2
28979; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28980; SSSE3-NEXT:    movdqa %xmm3, %xmm4
28981; SSSE3-NEXT:    pshufb %xmm2, %xmm4
28982; SSSE3-NEXT:    psrlw $4, %xmm0
28983; SSSE3-NEXT:    pand %xmm1, %xmm0
28984; SSSE3-NEXT:    pshufb %xmm0, %xmm3
28985; SSSE3-NEXT:    paddb %xmm4, %xmm3
28986; SSSE3-NEXT:    pxor %xmm0, %xmm0
28987; SSSE3-NEXT:    psadbw %xmm3, %xmm0
28988; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
28989; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
28990; SSSE3-NEXT:    movdqa %xmm1, %xmm2
28991; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
28992; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
28993; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
28994; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
28995; SSSE3-NEXT:    pand %xmm3, %xmm1
28996; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
28997; SSSE3-NEXT:    por %xmm1, %xmm0
28998; SSSE3-NEXT:    retq
28999;
29000; SSE41-LABEL: ult_34_v2i64:
29001; SSE41:       # %bb.0:
29002; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29003; SSE41-NEXT:    movdqa %xmm0, %xmm2
29004; SSE41-NEXT:    pand %xmm1, %xmm2
29005; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29006; SSE41-NEXT:    movdqa %xmm3, %xmm4
29007; SSE41-NEXT:    pshufb %xmm2, %xmm4
29008; SSE41-NEXT:    psrlw $4, %xmm0
29009; SSE41-NEXT:    pand %xmm1, %xmm0
29010; SSE41-NEXT:    pshufb %xmm0, %xmm3
29011; SSE41-NEXT:    paddb %xmm4, %xmm3
29012; SSE41-NEXT:    pxor %xmm0, %xmm0
29013; SSE41-NEXT:    psadbw %xmm3, %xmm0
29014; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
29015; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
29016; SSE41-NEXT:    movdqa %xmm1, %xmm2
29017; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
29018; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29019; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
29020; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29021; SSE41-NEXT:    pand %xmm3, %xmm1
29022; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29023; SSE41-NEXT:    por %xmm1, %xmm0
29024; SSE41-NEXT:    retq
29025;
29026; AVX1-LABEL: ult_34_v2i64:
29027; AVX1:       # %bb.0:
29028; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29029; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
29030; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29031; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29032; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
29033; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
29034; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29035; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29036; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29037; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29038; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [34,34]
29039; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29040; AVX1-NEXT:    retq
29041;
29042; AVX2-LABEL: ult_34_v2i64:
29043; AVX2:       # %bb.0:
29044; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29045; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
29046; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29047; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29048; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
29049; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
29050; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29051; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29052; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29053; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29054; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [34,34]
29055; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29056; AVX2-NEXT:    retq
29057;
29058; AVX512VPOPCNTDQ-LABEL: ult_34_v2i64:
29059; AVX512VPOPCNTDQ:       # %bb.0:
29060; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29061; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
29062; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [34,34]
29063; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29064; AVX512VPOPCNTDQ-NEXT:    vzeroupper
29065; AVX512VPOPCNTDQ-NEXT:    retq
29066;
29067; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64:
29068; AVX512VPOPCNTDQVL:       # %bb.0:
29069; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
29070; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
29071; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29072; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29073; AVX512VPOPCNTDQVL-NEXT:    retq
29074;
29075; BITALG_NOVLX-LABEL: ult_34_v2i64:
29076; BITALG_NOVLX:       # %bb.0:
29077; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29078; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
29079; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29080; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29081; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [34,34]
29082; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29083; BITALG_NOVLX-NEXT:    vzeroupper
29084; BITALG_NOVLX-NEXT:    retq
29085;
29086; BITALG-LABEL: ult_34_v2i64:
29087; BITALG:       # %bb.0:
29088; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
29089; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29090; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29091; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
29092; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29093; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29094; BITALG-NEXT:    retq
29095  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29096  %3 = icmp ult <2 x i64> %2, <i64 34, i64 34>
29097  %4 = sext <2 x i1> %3 to <2 x i64>
29098  ret <2 x i64> %4
29099}
29100
29101define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
29102; SSE2-LABEL: ugt_34_v2i64:
29103; SSE2:       # %bb.0:
29104; SSE2-NEXT:    movdqa %xmm0, %xmm1
29105; SSE2-NEXT:    psrlw $1, %xmm1
29106; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29107; SSE2-NEXT:    psubb %xmm1, %xmm0
29108; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29109; SSE2-NEXT:    movdqa %xmm0, %xmm2
29110; SSE2-NEXT:    pand %xmm1, %xmm2
29111; SSE2-NEXT:    psrlw $2, %xmm0
29112; SSE2-NEXT:    pand %xmm1, %xmm0
29113; SSE2-NEXT:    paddb %xmm2, %xmm0
29114; SSE2-NEXT:    movdqa %xmm0, %xmm1
29115; SSE2-NEXT:    psrlw $4, %xmm1
29116; SSE2-NEXT:    paddb %xmm0, %xmm1
29117; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29118; SSE2-NEXT:    pxor %xmm0, %xmm0
29119; SSE2-NEXT:    psadbw %xmm1, %xmm0
29120; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
29121; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
29122; SSE2-NEXT:    movdqa %xmm0, %xmm2
29123; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
29124; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29125; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
29126; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29127; SSE2-NEXT:    pand %xmm3, %xmm1
29128; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29129; SSE2-NEXT:    por %xmm1, %xmm0
29130; SSE2-NEXT:    retq
29131;
29132; SSE3-LABEL: ugt_34_v2i64:
29133; SSE3:       # %bb.0:
29134; SSE3-NEXT:    movdqa %xmm0, %xmm1
29135; SSE3-NEXT:    psrlw $1, %xmm1
29136; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29137; SSE3-NEXT:    psubb %xmm1, %xmm0
29138; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29139; SSE3-NEXT:    movdqa %xmm0, %xmm2
29140; SSE3-NEXT:    pand %xmm1, %xmm2
29141; SSE3-NEXT:    psrlw $2, %xmm0
29142; SSE3-NEXT:    pand %xmm1, %xmm0
29143; SSE3-NEXT:    paddb %xmm2, %xmm0
29144; SSE3-NEXT:    movdqa %xmm0, %xmm1
29145; SSE3-NEXT:    psrlw $4, %xmm1
29146; SSE3-NEXT:    paddb %xmm0, %xmm1
29147; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29148; SSE3-NEXT:    pxor %xmm0, %xmm0
29149; SSE3-NEXT:    psadbw %xmm1, %xmm0
29150; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
29151; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
29152; SSE3-NEXT:    movdqa %xmm0, %xmm2
29153; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
29154; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29155; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29156; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29157; SSE3-NEXT:    pand %xmm3, %xmm1
29158; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29159; SSE3-NEXT:    por %xmm1, %xmm0
29160; SSE3-NEXT:    retq
29161;
29162; SSSE3-LABEL: ugt_34_v2i64:
29163; SSSE3:       # %bb.0:
29164; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29165; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29166; SSSE3-NEXT:    pand %xmm1, %xmm2
29167; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29168; SSSE3-NEXT:    movdqa %xmm3, %xmm4
29169; SSSE3-NEXT:    pshufb %xmm2, %xmm4
29170; SSSE3-NEXT:    psrlw $4, %xmm0
29171; SSSE3-NEXT:    pand %xmm1, %xmm0
29172; SSSE3-NEXT:    pshufb %xmm0, %xmm3
29173; SSSE3-NEXT:    paddb %xmm4, %xmm3
29174; SSSE3-NEXT:    pxor %xmm0, %xmm0
29175; SSSE3-NEXT:    psadbw %xmm3, %xmm0
29176; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
29177; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
29178; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29179; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
29180; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29181; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29182; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29183; SSSE3-NEXT:    pand %xmm3, %xmm1
29184; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29185; SSSE3-NEXT:    por %xmm1, %xmm0
29186; SSSE3-NEXT:    retq
29187;
29188; SSE41-LABEL: ugt_34_v2i64:
29189; SSE41:       # %bb.0:
29190; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29191; SSE41-NEXT:    movdqa %xmm0, %xmm2
29192; SSE41-NEXT:    pand %xmm1, %xmm2
29193; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29194; SSE41-NEXT:    movdqa %xmm3, %xmm4
29195; SSE41-NEXT:    pshufb %xmm2, %xmm4
29196; SSE41-NEXT:    psrlw $4, %xmm0
29197; SSE41-NEXT:    pand %xmm1, %xmm0
29198; SSE41-NEXT:    pshufb %xmm0, %xmm3
29199; SSE41-NEXT:    paddb %xmm4, %xmm3
29200; SSE41-NEXT:    pxor %xmm0, %xmm0
29201; SSE41-NEXT:    psadbw %xmm3, %xmm0
29202; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
29203; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483682,2147483682]
29204; SSE41-NEXT:    movdqa %xmm0, %xmm2
29205; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
29206; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29207; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
29208; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29209; SSE41-NEXT:    pand %xmm3, %xmm1
29210; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29211; SSE41-NEXT:    por %xmm1, %xmm0
29212; SSE41-NEXT:    retq
29213;
29214; AVX1-LABEL: ugt_34_v2i64:
29215; AVX1:       # %bb.0:
29216; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29217; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
29218; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29219; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29220; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
29221; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
29222; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29223; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29224; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29225; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29226; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29227; AVX1-NEXT:    retq
29228;
29229; AVX2-LABEL: ugt_34_v2i64:
29230; AVX2:       # %bb.0:
29231; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29232; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
29233; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29234; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29235; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
29236; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
29237; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29238; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29239; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29240; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29241; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29242; AVX2-NEXT:    retq
29243;
29244; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64:
29245; AVX512VPOPCNTDQ:       # %bb.0:
29246; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29247; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
29248; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29249; AVX512VPOPCNTDQ-NEXT:    vzeroupper
29250; AVX512VPOPCNTDQ-NEXT:    retq
29251;
29252; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64:
29253; AVX512VPOPCNTDQVL:       # %bb.0:
29254; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
29255; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
29256; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29257; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29258; AVX512VPOPCNTDQVL-NEXT:    retq
29259;
29260; BITALG_NOVLX-LABEL: ugt_34_v2i64:
29261; BITALG_NOVLX:       # %bb.0:
29262; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29263; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
29264; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29265; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29266; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29267; BITALG_NOVLX-NEXT:    vzeroupper
29268; BITALG_NOVLX-NEXT:    retq
29269;
29270; BITALG-LABEL: ugt_34_v2i64:
29271; BITALG:       # %bb.0:
29272; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
29273; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29274; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29275; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
29276; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29277; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29278; BITALG-NEXT:    retq
29279  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29280  %3 = icmp ugt <2 x i64> %2, <i64 34, i64 34>
29281  %4 = sext <2 x i1> %3 to <2 x i64>
29282  ret <2 x i64> %4
29283}
29284
29285define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
29286; SSE2-LABEL: ult_35_v2i64:
29287; SSE2:       # %bb.0:
29288; SSE2-NEXT:    movdqa %xmm0, %xmm1
29289; SSE2-NEXT:    psrlw $1, %xmm1
29290; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29291; SSE2-NEXT:    psubb %xmm1, %xmm0
29292; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29293; SSE2-NEXT:    movdqa %xmm0, %xmm2
29294; SSE2-NEXT:    pand %xmm1, %xmm2
29295; SSE2-NEXT:    psrlw $2, %xmm0
29296; SSE2-NEXT:    pand %xmm1, %xmm0
29297; SSE2-NEXT:    paddb %xmm2, %xmm0
29298; SSE2-NEXT:    movdqa %xmm0, %xmm1
29299; SSE2-NEXT:    psrlw $4, %xmm1
29300; SSE2-NEXT:    paddb %xmm0, %xmm1
29301; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29302; SSE2-NEXT:    pxor %xmm0, %xmm0
29303; SSE2-NEXT:    psadbw %xmm1, %xmm0
29304; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
29305; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29306; SSE2-NEXT:    movdqa %xmm1, %xmm2
29307; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
29308; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29309; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
29310; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29311; SSE2-NEXT:    pand %xmm3, %xmm1
29312; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29313; SSE2-NEXT:    por %xmm1, %xmm0
29314; SSE2-NEXT:    retq
29315;
29316; SSE3-LABEL: ult_35_v2i64:
29317; SSE3:       # %bb.0:
29318; SSE3-NEXT:    movdqa %xmm0, %xmm1
29319; SSE3-NEXT:    psrlw $1, %xmm1
29320; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29321; SSE3-NEXT:    psubb %xmm1, %xmm0
29322; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29323; SSE3-NEXT:    movdqa %xmm0, %xmm2
29324; SSE3-NEXT:    pand %xmm1, %xmm2
29325; SSE3-NEXT:    psrlw $2, %xmm0
29326; SSE3-NEXT:    pand %xmm1, %xmm0
29327; SSE3-NEXT:    paddb %xmm2, %xmm0
29328; SSE3-NEXT:    movdqa %xmm0, %xmm1
29329; SSE3-NEXT:    psrlw $4, %xmm1
29330; SSE3-NEXT:    paddb %xmm0, %xmm1
29331; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29332; SSE3-NEXT:    pxor %xmm0, %xmm0
29333; SSE3-NEXT:    psadbw %xmm1, %xmm0
29334; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
29335; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29336; SSE3-NEXT:    movdqa %xmm1, %xmm2
29337; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
29338; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29339; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29340; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29341; SSE3-NEXT:    pand %xmm3, %xmm1
29342; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29343; SSE3-NEXT:    por %xmm1, %xmm0
29344; SSE3-NEXT:    retq
29345;
29346; SSSE3-LABEL: ult_35_v2i64:
29347; SSSE3:       # %bb.0:
29348; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29349; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29350; SSSE3-NEXT:    pand %xmm1, %xmm2
29351; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29352; SSSE3-NEXT:    movdqa %xmm3, %xmm4
29353; SSSE3-NEXT:    pshufb %xmm2, %xmm4
29354; SSSE3-NEXT:    psrlw $4, %xmm0
29355; SSSE3-NEXT:    pand %xmm1, %xmm0
29356; SSSE3-NEXT:    pshufb %xmm0, %xmm3
29357; SSSE3-NEXT:    paddb %xmm4, %xmm3
29358; SSSE3-NEXT:    pxor %xmm0, %xmm0
29359; SSSE3-NEXT:    psadbw %xmm3, %xmm0
29360; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
29361; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29362; SSSE3-NEXT:    movdqa %xmm1, %xmm2
29363; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
29364; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29365; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29366; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29367; SSSE3-NEXT:    pand %xmm3, %xmm1
29368; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29369; SSSE3-NEXT:    por %xmm1, %xmm0
29370; SSSE3-NEXT:    retq
29371;
29372; SSE41-LABEL: ult_35_v2i64:
29373; SSE41:       # %bb.0:
29374; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29375; SSE41-NEXT:    movdqa %xmm0, %xmm2
29376; SSE41-NEXT:    pand %xmm1, %xmm2
29377; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29378; SSE41-NEXT:    movdqa %xmm3, %xmm4
29379; SSE41-NEXT:    pshufb %xmm2, %xmm4
29380; SSE41-NEXT:    psrlw $4, %xmm0
29381; SSE41-NEXT:    pand %xmm1, %xmm0
29382; SSE41-NEXT:    pshufb %xmm0, %xmm3
29383; SSE41-NEXT:    paddb %xmm4, %xmm3
29384; SSE41-NEXT:    pxor %xmm0, %xmm0
29385; SSE41-NEXT:    psadbw %xmm3, %xmm0
29386; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
29387; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29388; SSE41-NEXT:    movdqa %xmm1, %xmm2
29389; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
29390; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29391; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
29392; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29393; SSE41-NEXT:    pand %xmm3, %xmm1
29394; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29395; SSE41-NEXT:    por %xmm1, %xmm0
29396; SSE41-NEXT:    retq
29397;
29398; AVX1-LABEL: ult_35_v2i64:
29399; AVX1:       # %bb.0:
29400; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29401; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
29402; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29403; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29404; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
29405; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
29406; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29407; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29408; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29409; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29410; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [35,35]
29411; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29412; AVX1-NEXT:    retq
29413;
29414; AVX2-LABEL: ult_35_v2i64:
29415; AVX2:       # %bb.0:
29416; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29417; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
29418; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29419; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29420; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
29421; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
29422; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29423; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29424; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29425; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29426; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [35,35]
29427; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29428; AVX2-NEXT:    retq
29429;
29430; AVX512VPOPCNTDQ-LABEL: ult_35_v2i64:
29431; AVX512VPOPCNTDQ:       # %bb.0:
29432; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29433; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
29434; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [35,35]
29435; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29436; AVX512VPOPCNTDQ-NEXT:    vzeroupper
29437; AVX512VPOPCNTDQ-NEXT:    retq
29438;
29439; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64:
29440; AVX512VPOPCNTDQVL:       # %bb.0:
29441; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
29442; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
29443; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29444; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29445; AVX512VPOPCNTDQVL-NEXT:    retq
29446;
29447; BITALG_NOVLX-LABEL: ult_35_v2i64:
29448; BITALG_NOVLX:       # %bb.0:
29449; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29450; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
29451; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29452; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29453; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [35,35]
29454; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29455; BITALG_NOVLX-NEXT:    vzeroupper
29456; BITALG_NOVLX-NEXT:    retq
29457;
29458; BITALG-LABEL: ult_35_v2i64:
29459; BITALG:       # %bb.0:
29460; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
29461; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29462; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29463; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
29464; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29465; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29466; BITALG-NEXT:    retq
29467  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29468  %3 = icmp ult <2 x i64> %2, <i64 35, i64 35>
29469  %4 = sext <2 x i1> %3 to <2 x i64>
29470  ret <2 x i64> %4
29471}
29472
29473define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
29474; SSE2-LABEL: ugt_35_v2i64:
29475; SSE2:       # %bb.0:
29476; SSE2-NEXT:    movdqa %xmm0, %xmm1
29477; SSE2-NEXT:    psrlw $1, %xmm1
29478; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29479; SSE2-NEXT:    psubb %xmm1, %xmm0
29480; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29481; SSE2-NEXT:    movdqa %xmm0, %xmm2
29482; SSE2-NEXT:    pand %xmm1, %xmm2
29483; SSE2-NEXT:    psrlw $2, %xmm0
29484; SSE2-NEXT:    pand %xmm1, %xmm0
29485; SSE2-NEXT:    paddb %xmm2, %xmm0
29486; SSE2-NEXT:    movdqa %xmm0, %xmm1
29487; SSE2-NEXT:    psrlw $4, %xmm1
29488; SSE2-NEXT:    paddb %xmm0, %xmm1
29489; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29490; SSE2-NEXT:    pxor %xmm0, %xmm0
29491; SSE2-NEXT:    psadbw %xmm1, %xmm0
29492; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
29493; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29494; SSE2-NEXT:    movdqa %xmm0, %xmm2
29495; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
29496; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29497; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
29498; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29499; SSE2-NEXT:    pand %xmm3, %xmm1
29500; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29501; SSE2-NEXT:    por %xmm1, %xmm0
29502; SSE2-NEXT:    retq
29503;
29504; SSE3-LABEL: ugt_35_v2i64:
29505; SSE3:       # %bb.0:
29506; SSE3-NEXT:    movdqa %xmm0, %xmm1
29507; SSE3-NEXT:    psrlw $1, %xmm1
29508; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29509; SSE3-NEXT:    psubb %xmm1, %xmm0
29510; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29511; SSE3-NEXT:    movdqa %xmm0, %xmm2
29512; SSE3-NEXT:    pand %xmm1, %xmm2
29513; SSE3-NEXT:    psrlw $2, %xmm0
29514; SSE3-NEXT:    pand %xmm1, %xmm0
29515; SSE3-NEXT:    paddb %xmm2, %xmm0
29516; SSE3-NEXT:    movdqa %xmm0, %xmm1
29517; SSE3-NEXT:    psrlw $4, %xmm1
29518; SSE3-NEXT:    paddb %xmm0, %xmm1
29519; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29520; SSE3-NEXT:    pxor %xmm0, %xmm0
29521; SSE3-NEXT:    psadbw %xmm1, %xmm0
29522; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
29523; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29524; SSE3-NEXT:    movdqa %xmm0, %xmm2
29525; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
29526; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29527; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29528; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29529; SSE3-NEXT:    pand %xmm3, %xmm1
29530; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29531; SSE3-NEXT:    por %xmm1, %xmm0
29532; SSE3-NEXT:    retq
29533;
29534; SSSE3-LABEL: ugt_35_v2i64:
29535; SSSE3:       # %bb.0:
29536; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29537; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29538; SSSE3-NEXT:    pand %xmm1, %xmm2
29539; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29540; SSSE3-NEXT:    movdqa %xmm3, %xmm4
29541; SSSE3-NEXT:    pshufb %xmm2, %xmm4
29542; SSSE3-NEXT:    psrlw $4, %xmm0
29543; SSSE3-NEXT:    pand %xmm1, %xmm0
29544; SSSE3-NEXT:    pshufb %xmm0, %xmm3
29545; SSSE3-NEXT:    paddb %xmm4, %xmm3
29546; SSSE3-NEXT:    pxor %xmm0, %xmm0
29547; SSSE3-NEXT:    psadbw %xmm3, %xmm0
29548; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
29549; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29550; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29551; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
29552; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29553; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29554; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29555; SSSE3-NEXT:    pand %xmm3, %xmm1
29556; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29557; SSSE3-NEXT:    por %xmm1, %xmm0
29558; SSSE3-NEXT:    retq
29559;
29560; SSE41-LABEL: ugt_35_v2i64:
29561; SSE41:       # %bb.0:
29562; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29563; SSE41-NEXT:    movdqa %xmm0, %xmm2
29564; SSE41-NEXT:    pand %xmm1, %xmm2
29565; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29566; SSE41-NEXT:    movdqa %xmm3, %xmm4
29567; SSE41-NEXT:    pshufb %xmm2, %xmm4
29568; SSE41-NEXT:    psrlw $4, %xmm0
29569; SSE41-NEXT:    pand %xmm1, %xmm0
29570; SSE41-NEXT:    pshufb %xmm0, %xmm3
29571; SSE41-NEXT:    paddb %xmm4, %xmm3
29572; SSE41-NEXT:    pxor %xmm0, %xmm0
29573; SSE41-NEXT:    psadbw %xmm3, %xmm0
29574; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
29575; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483683,2147483683]
29576; SSE41-NEXT:    movdqa %xmm0, %xmm2
29577; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
29578; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29579; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
29580; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29581; SSE41-NEXT:    pand %xmm3, %xmm1
29582; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29583; SSE41-NEXT:    por %xmm1, %xmm0
29584; SSE41-NEXT:    retq
29585;
29586; AVX1-LABEL: ugt_35_v2i64:
29587; AVX1:       # %bb.0:
29588; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29589; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
29590; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29591; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29592; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
29593; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
29594; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29595; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29596; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29597; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29598; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29599; AVX1-NEXT:    retq
29600;
29601; AVX2-LABEL: ugt_35_v2i64:
29602; AVX2:       # %bb.0:
29603; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29604; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
29605; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29606; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29607; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
29608; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
29609; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29610; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29611; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29612; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29613; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29614; AVX2-NEXT:    retq
29615;
29616; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64:
29617; AVX512VPOPCNTDQ:       # %bb.0:
29618; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29619; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
29620; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29621; AVX512VPOPCNTDQ-NEXT:    vzeroupper
29622; AVX512VPOPCNTDQ-NEXT:    retq
29623;
29624; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64:
29625; AVX512VPOPCNTDQVL:       # %bb.0:
29626; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
29627; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
29628; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29629; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29630; AVX512VPOPCNTDQVL-NEXT:    retq
29631;
29632; BITALG_NOVLX-LABEL: ugt_35_v2i64:
29633; BITALG_NOVLX:       # %bb.0:
29634; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29635; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
29636; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29637; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29638; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29639; BITALG_NOVLX-NEXT:    vzeroupper
29640; BITALG_NOVLX-NEXT:    retq
29641;
29642; BITALG-LABEL: ugt_35_v2i64:
29643; BITALG:       # %bb.0:
29644; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
29645; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29646; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29647; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
29648; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29649; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29650; BITALG-NEXT:    retq
29651  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29652  %3 = icmp ugt <2 x i64> %2, <i64 35, i64 35>
29653  %4 = sext <2 x i1> %3 to <2 x i64>
29654  ret <2 x i64> %4
29655}
29656
29657define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
29658; SSE2-LABEL: ult_36_v2i64:
29659; SSE2:       # %bb.0:
29660; SSE2-NEXT:    movdqa %xmm0, %xmm1
29661; SSE2-NEXT:    psrlw $1, %xmm1
29662; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29663; SSE2-NEXT:    psubb %xmm1, %xmm0
29664; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29665; SSE2-NEXT:    movdqa %xmm0, %xmm2
29666; SSE2-NEXT:    pand %xmm1, %xmm2
29667; SSE2-NEXT:    psrlw $2, %xmm0
29668; SSE2-NEXT:    pand %xmm1, %xmm0
29669; SSE2-NEXT:    paddb %xmm2, %xmm0
29670; SSE2-NEXT:    movdqa %xmm0, %xmm1
29671; SSE2-NEXT:    psrlw $4, %xmm1
29672; SSE2-NEXT:    paddb %xmm0, %xmm1
29673; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29674; SSE2-NEXT:    pxor %xmm0, %xmm0
29675; SSE2-NEXT:    psadbw %xmm1, %xmm0
29676; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
29677; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29678; SSE2-NEXT:    movdqa %xmm1, %xmm2
29679; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
29680; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29681; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
29682; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29683; SSE2-NEXT:    pand %xmm3, %xmm1
29684; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29685; SSE2-NEXT:    por %xmm1, %xmm0
29686; SSE2-NEXT:    retq
29687;
29688; SSE3-LABEL: ult_36_v2i64:
29689; SSE3:       # %bb.0:
29690; SSE3-NEXT:    movdqa %xmm0, %xmm1
29691; SSE3-NEXT:    psrlw $1, %xmm1
29692; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29693; SSE3-NEXT:    psubb %xmm1, %xmm0
29694; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29695; SSE3-NEXT:    movdqa %xmm0, %xmm2
29696; SSE3-NEXT:    pand %xmm1, %xmm2
29697; SSE3-NEXT:    psrlw $2, %xmm0
29698; SSE3-NEXT:    pand %xmm1, %xmm0
29699; SSE3-NEXT:    paddb %xmm2, %xmm0
29700; SSE3-NEXT:    movdqa %xmm0, %xmm1
29701; SSE3-NEXT:    psrlw $4, %xmm1
29702; SSE3-NEXT:    paddb %xmm0, %xmm1
29703; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29704; SSE3-NEXT:    pxor %xmm0, %xmm0
29705; SSE3-NEXT:    psadbw %xmm1, %xmm0
29706; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
29707; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29708; SSE3-NEXT:    movdqa %xmm1, %xmm2
29709; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
29710; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29711; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29712; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29713; SSE3-NEXT:    pand %xmm3, %xmm1
29714; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29715; SSE3-NEXT:    por %xmm1, %xmm0
29716; SSE3-NEXT:    retq
29717;
29718; SSSE3-LABEL: ult_36_v2i64:
29719; SSSE3:       # %bb.0:
29720; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29721; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29722; SSSE3-NEXT:    pand %xmm1, %xmm2
29723; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29724; SSSE3-NEXT:    movdqa %xmm3, %xmm4
29725; SSSE3-NEXT:    pshufb %xmm2, %xmm4
29726; SSSE3-NEXT:    psrlw $4, %xmm0
29727; SSSE3-NEXT:    pand %xmm1, %xmm0
29728; SSSE3-NEXT:    pshufb %xmm0, %xmm3
29729; SSSE3-NEXT:    paddb %xmm4, %xmm3
29730; SSSE3-NEXT:    pxor %xmm0, %xmm0
29731; SSSE3-NEXT:    psadbw %xmm3, %xmm0
29732; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
29733; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29734; SSSE3-NEXT:    movdqa %xmm1, %xmm2
29735; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
29736; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29737; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29738; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29739; SSSE3-NEXT:    pand %xmm3, %xmm1
29740; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29741; SSSE3-NEXT:    por %xmm1, %xmm0
29742; SSSE3-NEXT:    retq
29743;
29744; SSE41-LABEL: ult_36_v2i64:
29745; SSE41:       # %bb.0:
29746; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29747; SSE41-NEXT:    movdqa %xmm0, %xmm2
29748; SSE41-NEXT:    pand %xmm1, %xmm2
29749; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29750; SSE41-NEXT:    movdqa %xmm3, %xmm4
29751; SSE41-NEXT:    pshufb %xmm2, %xmm4
29752; SSE41-NEXT:    psrlw $4, %xmm0
29753; SSE41-NEXT:    pand %xmm1, %xmm0
29754; SSE41-NEXT:    pshufb %xmm0, %xmm3
29755; SSE41-NEXT:    paddb %xmm4, %xmm3
29756; SSE41-NEXT:    pxor %xmm0, %xmm0
29757; SSE41-NEXT:    psadbw %xmm3, %xmm0
29758; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
29759; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29760; SSE41-NEXT:    movdqa %xmm1, %xmm2
29761; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
29762; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29763; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
29764; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29765; SSE41-NEXT:    pand %xmm3, %xmm1
29766; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29767; SSE41-NEXT:    por %xmm1, %xmm0
29768; SSE41-NEXT:    retq
29769;
29770; AVX1-LABEL: ult_36_v2i64:
29771; AVX1:       # %bb.0:
29772; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29773; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
29774; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29775; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29776; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
29777; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
29778; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29779; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29780; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29781; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29782; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [36,36]
29783; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29784; AVX1-NEXT:    retq
29785;
29786; AVX2-LABEL: ult_36_v2i64:
29787; AVX2:       # %bb.0:
29788; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29789; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
29790; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29791; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29792; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
29793; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
29794; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29795; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29796; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29797; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29798; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [36,36]
29799; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29800; AVX2-NEXT:    retq
29801;
29802; AVX512VPOPCNTDQ-LABEL: ult_36_v2i64:
29803; AVX512VPOPCNTDQ:       # %bb.0:
29804; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29805; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
29806; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [36,36]
29807; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29808; AVX512VPOPCNTDQ-NEXT:    vzeroupper
29809; AVX512VPOPCNTDQ-NEXT:    retq
29810;
29811; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64:
29812; AVX512VPOPCNTDQVL:       # %bb.0:
29813; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
29814; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
29815; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29816; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29817; AVX512VPOPCNTDQVL-NEXT:    retq
29818;
29819; BITALG_NOVLX-LABEL: ult_36_v2i64:
29820; BITALG_NOVLX:       # %bb.0:
29821; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29822; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
29823; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29824; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29825; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [36,36]
29826; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
29827; BITALG_NOVLX-NEXT:    vzeroupper
29828; BITALG_NOVLX-NEXT:    retq
29829;
29830; BITALG-LABEL: ult_36_v2i64:
29831; BITALG:       # %bb.0:
29832; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
29833; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29834; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29835; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
29836; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
29837; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29838; BITALG-NEXT:    retq
29839  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29840  %3 = icmp ult <2 x i64> %2, <i64 36, i64 36>
29841  %4 = sext <2 x i1> %3 to <2 x i64>
29842  ret <2 x i64> %4
29843}
29844
29845define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
29846; SSE2-LABEL: ugt_36_v2i64:
29847; SSE2:       # %bb.0:
29848; SSE2-NEXT:    movdqa %xmm0, %xmm1
29849; SSE2-NEXT:    psrlw $1, %xmm1
29850; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29851; SSE2-NEXT:    psubb %xmm1, %xmm0
29852; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29853; SSE2-NEXT:    movdqa %xmm0, %xmm2
29854; SSE2-NEXT:    pand %xmm1, %xmm2
29855; SSE2-NEXT:    psrlw $2, %xmm0
29856; SSE2-NEXT:    pand %xmm1, %xmm0
29857; SSE2-NEXT:    paddb %xmm2, %xmm0
29858; SSE2-NEXT:    movdqa %xmm0, %xmm1
29859; SSE2-NEXT:    psrlw $4, %xmm1
29860; SSE2-NEXT:    paddb %xmm0, %xmm1
29861; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
29862; SSE2-NEXT:    pxor %xmm0, %xmm0
29863; SSE2-NEXT:    psadbw %xmm1, %xmm0
29864; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
29865; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29866; SSE2-NEXT:    movdqa %xmm0, %xmm2
29867; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
29868; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29869; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
29870; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29871; SSE2-NEXT:    pand %xmm3, %xmm1
29872; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29873; SSE2-NEXT:    por %xmm1, %xmm0
29874; SSE2-NEXT:    retq
29875;
29876; SSE3-LABEL: ugt_36_v2i64:
29877; SSE3:       # %bb.0:
29878; SSE3-NEXT:    movdqa %xmm0, %xmm1
29879; SSE3-NEXT:    psrlw $1, %xmm1
29880; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29881; SSE3-NEXT:    psubb %xmm1, %xmm0
29882; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29883; SSE3-NEXT:    movdqa %xmm0, %xmm2
29884; SSE3-NEXT:    pand %xmm1, %xmm2
29885; SSE3-NEXT:    psrlw $2, %xmm0
29886; SSE3-NEXT:    pand %xmm1, %xmm0
29887; SSE3-NEXT:    paddb %xmm2, %xmm0
29888; SSE3-NEXT:    movdqa %xmm0, %xmm1
29889; SSE3-NEXT:    psrlw $4, %xmm1
29890; SSE3-NEXT:    paddb %xmm0, %xmm1
29891; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
29892; SSE3-NEXT:    pxor %xmm0, %xmm0
29893; SSE3-NEXT:    psadbw %xmm1, %xmm0
29894; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
29895; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29896; SSE3-NEXT:    movdqa %xmm0, %xmm2
29897; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
29898; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29899; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29900; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29901; SSE3-NEXT:    pand %xmm3, %xmm1
29902; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29903; SSE3-NEXT:    por %xmm1, %xmm0
29904; SSE3-NEXT:    retq
29905;
29906; SSSE3-LABEL: ugt_36_v2i64:
29907; SSSE3:       # %bb.0:
29908; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29909; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29910; SSSE3-NEXT:    pand %xmm1, %xmm2
29911; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29912; SSSE3-NEXT:    movdqa %xmm3, %xmm4
29913; SSSE3-NEXT:    pshufb %xmm2, %xmm4
29914; SSSE3-NEXT:    psrlw $4, %xmm0
29915; SSSE3-NEXT:    pand %xmm1, %xmm0
29916; SSSE3-NEXT:    pshufb %xmm0, %xmm3
29917; SSSE3-NEXT:    paddb %xmm4, %xmm3
29918; SSSE3-NEXT:    pxor %xmm0, %xmm0
29919; SSSE3-NEXT:    psadbw %xmm3, %xmm0
29920; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
29921; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29922; SSSE3-NEXT:    movdqa %xmm0, %xmm2
29923; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
29924; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29925; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
29926; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29927; SSSE3-NEXT:    pand %xmm3, %xmm1
29928; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29929; SSSE3-NEXT:    por %xmm1, %xmm0
29930; SSSE3-NEXT:    retq
29931;
29932; SSE41-LABEL: ugt_36_v2i64:
29933; SSE41:       # %bb.0:
29934; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29935; SSE41-NEXT:    movdqa %xmm0, %xmm2
29936; SSE41-NEXT:    pand %xmm1, %xmm2
29937; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29938; SSE41-NEXT:    movdqa %xmm3, %xmm4
29939; SSE41-NEXT:    pshufb %xmm2, %xmm4
29940; SSE41-NEXT:    psrlw $4, %xmm0
29941; SSE41-NEXT:    pand %xmm1, %xmm0
29942; SSE41-NEXT:    pshufb %xmm0, %xmm3
29943; SSE41-NEXT:    paddb %xmm4, %xmm3
29944; SSE41-NEXT:    pxor %xmm0, %xmm0
29945; SSE41-NEXT:    psadbw %xmm3, %xmm0
29946; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
29947; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483684,2147483684]
29948; SSE41-NEXT:    movdqa %xmm0, %xmm2
29949; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
29950; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
29951; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
29952; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
29953; SSE41-NEXT:    pand %xmm3, %xmm1
29954; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
29955; SSE41-NEXT:    por %xmm1, %xmm0
29956; SSE41-NEXT:    retq
29957;
29958; AVX1-LABEL: ugt_36_v2i64:
29959; AVX1:       # %bb.0:
29960; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29961; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
29962; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29963; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29964; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
29965; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
29966; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29967; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29968; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29969; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29970; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29971; AVX1-NEXT:    retq
29972;
29973; AVX2-LABEL: ugt_36_v2i64:
29974; AVX2:       # %bb.0:
29975; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29976; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
29977; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29978; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
29979; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
29980; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
29981; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
29982; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
29983; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
29984; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
29985; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29986; AVX2-NEXT:    retq
29987;
29988; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64:
29989; AVX512VPOPCNTDQ:       # %bb.0:
29990; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
29991; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
29992; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
29993; AVX512VPOPCNTDQ-NEXT:    vzeroupper
29994; AVX512VPOPCNTDQ-NEXT:    retq
29995;
29996; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64:
29997; AVX512VPOPCNTDQVL:       # %bb.0:
29998; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
29999; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
30000; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30001; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30002; AVX512VPOPCNTDQVL-NEXT:    retq
30003;
30004; BITALG_NOVLX-LABEL: ugt_36_v2i64:
30005; BITALG_NOVLX:       # %bb.0:
30006; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30007; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
30008; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30009; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30010; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30011; BITALG_NOVLX-NEXT:    vzeroupper
30012; BITALG_NOVLX-NEXT:    retq
30013;
30014; BITALG-LABEL: ugt_36_v2i64:
30015; BITALG:       # %bb.0:
30016; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
30017; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30018; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30019; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
30020; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30021; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30022; BITALG-NEXT:    retq
30023  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30024  %3 = icmp ugt <2 x i64> %2, <i64 36, i64 36>
30025  %4 = sext <2 x i1> %3 to <2 x i64>
30026  ret <2 x i64> %4
30027}
30028
30029define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
30030; SSE2-LABEL: ult_37_v2i64:
30031; SSE2:       # %bb.0:
30032; SSE2-NEXT:    movdqa %xmm0, %xmm1
30033; SSE2-NEXT:    psrlw $1, %xmm1
30034; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30035; SSE2-NEXT:    psubb %xmm1, %xmm0
30036; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30037; SSE2-NEXT:    movdqa %xmm0, %xmm2
30038; SSE2-NEXT:    pand %xmm1, %xmm2
30039; SSE2-NEXT:    psrlw $2, %xmm0
30040; SSE2-NEXT:    pand %xmm1, %xmm0
30041; SSE2-NEXT:    paddb %xmm2, %xmm0
30042; SSE2-NEXT:    movdqa %xmm0, %xmm1
30043; SSE2-NEXT:    psrlw $4, %xmm1
30044; SSE2-NEXT:    paddb %xmm0, %xmm1
30045; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30046; SSE2-NEXT:    pxor %xmm0, %xmm0
30047; SSE2-NEXT:    psadbw %xmm1, %xmm0
30048; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
30049; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30050; SSE2-NEXT:    movdqa %xmm1, %xmm2
30051; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
30052; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30053; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
30054; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30055; SSE2-NEXT:    pand %xmm3, %xmm1
30056; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30057; SSE2-NEXT:    por %xmm1, %xmm0
30058; SSE2-NEXT:    retq
30059;
30060; SSE3-LABEL: ult_37_v2i64:
30061; SSE3:       # %bb.0:
30062; SSE3-NEXT:    movdqa %xmm0, %xmm1
30063; SSE3-NEXT:    psrlw $1, %xmm1
30064; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30065; SSE3-NEXT:    psubb %xmm1, %xmm0
30066; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30067; SSE3-NEXT:    movdqa %xmm0, %xmm2
30068; SSE3-NEXT:    pand %xmm1, %xmm2
30069; SSE3-NEXT:    psrlw $2, %xmm0
30070; SSE3-NEXT:    pand %xmm1, %xmm0
30071; SSE3-NEXT:    paddb %xmm2, %xmm0
30072; SSE3-NEXT:    movdqa %xmm0, %xmm1
30073; SSE3-NEXT:    psrlw $4, %xmm1
30074; SSE3-NEXT:    paddb %xmm0, %xmm1
30075; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30076; SSE3-NEXT:    pxor %xmm0, %xmm0
30077; SSE3-NEXT:    psadbw %xmm1, %xmm0
30078; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
30079; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30080; SSE3-NEXT:    movdqa %xmm1, %xmm2
30081; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
30082; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30083; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30084; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30085; SSE3-NEXT:    pand %xmm3, %xmm1
30086; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30087; SSE3-NEXT:    por %xmm1, %xmm0
30088; SSE3-NEXT:    retq
30089;
30090; SSSE3-LABEL: ult_37_v2i64:
30091; SSSE3:       # %bb.0:
30092; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30093; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30094; SSSE3-NEXT:    pand %xmm1, %xmm2
30095; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30096; SSSE3-NEXT:    movdqa %xmm3, %xmm4
30097; SSSE3-NEXT:    pshufb %xmm2, %xmm4
30098; SSSE3-NEXT:    psrlw $4, %xmm0
30099; SSSE3-NEXT:    pand %xmm1, %xmm0
30100; SSSE3-NEXT:    pshufb %xmm0, %xmm3
30101; SSSE3-NEXT:    paddb %xmm4, %xmm3
30102; SSSE3-NEXT:    pxor %xmm0, %xmm0
30103; SSSE3-NEXT:    psadbw %xmm3, %xmm0
30104; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
30105; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30106; SSSE3-NEXT:    movdqa %xmm1, %xmm2
30107; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
30108; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30109; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30110; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30111; SSSE3-NEXT:    pand %xmm3, %xmm1
30112; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30113; SSSE3-NEXT:    por %xmm1, %xmm0
30114; SSSE3-NEXT:    retq
30115;
30116; SSE41-LABEL: ult_37_v2i64:
30117; SSE41:       # %bb.0:
30118; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30119; SSE41-NEXT:    movdqa %xmm0, %xmm2
30120; SSE41-NEXT:    pand %xmm1, %xmm2
30121; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30122; SSE41-NEXT:    movdqa %xmm3, %xmm4
30123; SSE41-NEXT:    pshufb %xmm2, %xmm4
30124; SSE41-NEXT:    psrlw $4, %xmm0
30125; SSE41-NEXT:    pand %xmm1, %xmm0
30126; SSE41-NEXT:    pshufb %xmm0, %xmm3
30127; SSE41-NEXT:    paddb %xmm4, %xmm3
30128; SSE41-NEXT:    pxor %xmm0, %xmm0
30129; SSE41-NEXT:    psadbw %xmm3, %xmm0
30130; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
30131; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30132; SSE41-NEXT:    movdqa %xmm1, %xmm2
30133; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
30134; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30135; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30136; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30137; SSE41-NEXT:    pand %xmm3, %xmm1
30138; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30139; SSE41-NEXT:    por %xmm1, %xmm0
30140; SSE41-NEXT:    retq
30141;
30142; AVX1-LABEL: ult_37_v2i64:
30143; AVX1:       # %bb.0:
30144; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30145; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
30146; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30147; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30148; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
30149; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
30150; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30151; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30152; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30153; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30154; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [37,37]
30155; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30156; AVX1-NEXT:    retq
30157;
30158; AVX2-LABEL: ult_37_v2i64:
30159; AVX2:       # %bb.0:
30160; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30161; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
30162; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30163; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30164; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
30165; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
30166; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30167; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30168; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30169; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30170; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [37,37]
30171; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30172; AVX2-NEXT:    retq
30173;
30174; AVX512VPOPCNTDQ-LABEL: ult_37_v2i64:
30175; AVX512VPOPCNTDQ:       # %bb.0:
30176; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30177; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
30178; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [37,37]
30179; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30180; AVX512VPOPCNTDQ-NEXT:    vzeroupper
30181; AVX512VPOPCNTDQ-NEXT:    retq
30182;
30183; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64:
30184; AVX512VPOPCNTDQVL:       # %bb.0:
30185; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
30186; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
30187; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30188; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30189; AVX512VPOPCNTDQVL-NEXT:    retq
30190;
30191; BITALG_NOVLX-LABEL: ult_37_v2i64:
30192; BITALG_NOVLX:       # %bb.0:
30193; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30194; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
30195; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30196; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30197; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [37,37]
30198; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30199; BITALG_NOVLX-NEXT:    vzeroupper
30200; BITALG_NOVLX-NEXT:    retq
30201;
30202; BITALG-LABEL: ult_37_v2i64:
30203; BITALG:       # %bb.0:
30204; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
30205; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30206; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30207; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
30208; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30209; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30210; BITALG-NEXT:    retq
30211  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30212  %3 = icmp ult <2 x i64> %2, <i64 37, i64 37>
30213  %4 = sext <2 x i1> %3 to <2 x i64>
30214  ret <2 x i64> %4
30215}
30216
30217define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
30218; SSE2-LABEL: ugt_37_v2i64:
30219; SSE2:       # %bb.0:
30220; SSE2-NEXT:    movdqa %xmm0, %xmm1
30221; SSE2-NEXT:    psrlw $1, %xmm1
30222; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30223; SSE2-NEXT:    psubb %xmm1, %xmm0
30224; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30225; SSE2-NEXT:    movdqa %xmm0, %xmm2
30226; SSE2-NEXT:    pand %xmm1, %xmm2
30227; SSE2-NEXT:    psrlw $2, %xmm0
30228; SSE2-NEXT:    pand %xmm1, %xmm0
30229; SSE2-NEXT:    paddb %xmm2, %xmm0
30230; SSE2-NEXT:    movdqa %xmm0, %xmm1
30231; SSE2-NEXT:    psrlw $4, %xmm1
30232; SSE2-NEXT:    paddb %xmm0, %xmm1
30233; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30234; SSE2-NEXT:    pxor %xmm0, %xmm0
30235; SSE2-NEXT:    psadbw %xmm1, %xmm0
30236; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
30237; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30238; SSE2-NEXT:    movdqa %xmm0, %xmm2
30239; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
30240; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30241; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
30242; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30243; SSE2-NEXT:    pand %xmm3, %xmm1
30244; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30245; SSE2-NEXT:    por %xmm1, %xmm0
30246; SSE2-NEXT:    retq
30247;
30248; SSE3-LABEL: ugt_37_v2i64:
30249; SSE3:       # %bb.0:
30250; SSE3-NEXT:    movdqa %xmm0, %xmm1
30251; SSE3-NEXT:    psrlw $1, %xmm1
30252; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30253; SSE3-NEXT:    psubb %xmm1, %xmm0
30254; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30255; SSE3-NEXT:    movdqa %xmm0, %xmm2
30256; SSE3-NEXT:    pand %xmm1, %xmm2
30257; SSE3-NEXT:    psrlw $2, %xmm0
30258; SSE3-NEXT:    pand %xmm1, %xmm0
30259; SSE3-NEXT:    paddb %xmm2, %xmm0
30260; SSE3-NEXT:    movdqa %xmm0, %xmm1
30261; SSE3-NEXT:    psrlw $4, %xmm1
30262; SSE3-NEXT:    paddb %xmm0, %xmm1
30263; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30264; SSE3-NEXT:    pxor %xmm0, %xmm0
30265; SSE3-NEXT:    psadbw %xmm1, %xmm0
30266; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
30267; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30268; SSE3-NEXT:    movdqa %xmm0, %xmm2
30269; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
30270; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30271; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30272; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30273; SSE3-NEXT:    pand %xmm3, %xmm1
30274; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30275; SSE3-NEXT:    por %xmm1, %xmm0
30276; SSE3-NEXT:    retq
30277;
30278; SSSE3-LABEL: ugt_37_v2i64:
30279; SSSE3:       # %bb.0:
30280; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30281; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30282; SSSE3-NEXT:    pand %xmm1, %xmm2
30283; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30284; SSSE3-NEXT:    movdqa %xmm3, %xmm4
30285; SSSE3-NEXT:    pshufb %xmm2, %xmm4
30286; SSSE3-NEXT:    psrlw $4, %xmm0
30287; SSSE3-NEXT:    pand %xmm1, %xmm0
30288; SSSE3-NEXT:    pshufb %xmm0, %xmm3
30289; SSSE3-NEXT:    paddb %xmm4, %xmm3
30290; SSSE3-NEXT:    pxor %xmm0, %xmm0
30291; SSSE3-NEXT:    psadbw %xmm3, %xmm0
30292; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
30293; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30294; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30295; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
30296; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30297; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30298; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30299; SSSE3-NEXT:    pand %xmm3, %xmm1
30300; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30301; SSSE3-NEXT:    por %xmm1, %xmm0
30302; SSSE3-NEXT:    retq
30303;
30304; SSE41-LABEL: ugt_37_v2i64:
30305; SSE41:       # %bb.0:
30306; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30307; SSE41-NEXT:    movdqa %xmm0, %xmm2
30308; SSE41-NEXT:    pand %xmm1, %xmm2
30309; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30310; SSE41-NEXT:    movdqa %xmm3, %xmm4
30311; SSE41-NEXT:    pshufb %xmm2, %xmm4
30312; SSE41-NEXT:    psrlw $4, %xmm0
30313; SSE41-NEXT:    pand %xmm1, %xmm0
30314; SSE41-NEXT:    pshufb %xmm0, %xmm3
30315; SSE41-NEXT:    paddb %xmm4, %xmm3
30316; SSE41-NEXT:    pxor %xmm0, %xmm0
30317; SSE41-NEXT:    psadbw %xmm3, %xmm0
30318; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
30319; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483685,2147483685]
30320; SSE41-NEXT:    movdqa %xmm0, %xmm2
30321; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
30322; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30323; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30324; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30325; SSE41-NEXT:    pand %xmm3, %xmm1
30326; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30327; SSE41-NEXT:    por %xmm1, %xmm0
30328; SSE41-NEXT:    retq
30329;
30330; AVX1-LABEL: ugt_37_v2i64:
30331; AVX1:       # %bb.0:
30332; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30333; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
30334; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30335; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30336; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
30337; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
30338; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30339; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30340; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30341; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30342; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30343; AVX1-NEXT:    retq
30344;
30345; AVX2-LABEL: ugt_37_v2i64:
30346; AVX2:       # %bb.0:
30347; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30348; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
30349; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30350; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30351; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
30352; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
30353; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30354; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30355; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30356; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30357; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30358; AVX2-NEXT:    retq
30359;
30360; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64:
30361; AVX512VPOPCNTDQ:       # %bb.0:
30362; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30363; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
30364; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30365; AVX512VPOPCNTDQ-NEXT:    vzeroupper
30366; AVX512VPOPCNTDQ-NEXT:    retq
30367;
30368; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64:
30369; AVX512VPOPCNTDQVL:       # %bb.0:
30370; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
30371; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
30372; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30373; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30374; AVX512VPOPCNTDQVL-NEXT:    retq
30375;
30376; BITALG_NOVLX-LABEL: ugt_37_v2i64:
30377; BITALG_NOVLX:       # %bb.0:
30378; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30379; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
30380; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30381; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30382; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30383; BITALG_NOVLX-NEXT:    vzeroupper
30384; BITALG_NOVLX-NEXT:    retq
30385;
30386; BITALG-LABEL: ugt_37_v2i64:
30387; BITALG:       # %bb.0:
30388; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
30389; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30390; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30391; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
30392; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30393; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30394; BITALG-NEXT:    retq
30395  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30396  %3 = icmp ugt <2 x i64> %2, <i64 37, i64 37>
30397  %4 = sext <2 x i1> %3 to <2 x i64>
30398  ret <2 x i64> %4
30399}
30400
30401define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
30402; SSE2-LABEL: ult_38_v2i64:
30403; SSE2:       # %bb.0:
30404; SSE2-NEXT:    movdqa %xmm0, %xmm1
30405; SSE2-NEXT:    psrlw $1, %xmm1
30406; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30407; SSE2-NEXT:    psubb %xmm1, %xmm0
30408; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30409; SSE2-NEXT:    movdqa %xmm0, %xmm2
30410; SSE2-NEXT:    pand %xmm1, %xmm2
30411; SSE2-NEXT:    psrlw $2, %xmm0
30412; SSE2-NEXT:    pand %xmm1, %xmm0
30413; SSE2-NEXT:    paddb %xmm2, %xmm0
30414; SSE2-NEXT:    movdqa %xmm0, %xmm1
30415; SSE2-NEXT:    psrlw $4, %xmm1
30416; SSE2-NEXT:    paddb %xmm0, %xmm1
30417; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30418; SSE2-NEXT:    pxor %xmm0, %xmm0
30419; SSE2-NEXT:    psadbw %xmm1, %xmm0
30420; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
30421; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30422; SSE2-NEXT:    movdqa %xmm1, %xmm2
30423; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
30424; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30425; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
30426; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30427; SSE2-NEXT:    pand %xmm3, %xmm1
30428; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30429; SSE2-NEXT:    por %xmm1, %xmm0
30430; SSE2-NEXT:    retq
30431;
30432; SSE3-LABEL: ult_38_v2i64:
30433; SSE3:       # %bb.0:
30434; SSE3-NEXT:    movdqa %xmm0, %xmm1
30435; SSE3-NEXT:    psrlw $1, %xmm1
30436; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30437; SSE3-NEXT:    psubb %xmm1, %xmm0
30438; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30439; SSE3-NEXT:    movdqa %xmm0, %xmm2
30440; SSE3-NEXT:    pand %xmm1, %xmm2
30441; SSE3-NEXT:    psrlw $2, %xmm0
30442; SSE3-NEXT:    pand %xmm1, %xmm0
30443; SSE3-NEXT:    paddb %xmm2, %xmm0
30444; SSE3-NEXT:    movdqa %xmm0, %xmm1
30445; SSE3-NEXT:    psrlw $4, %xmm1
30446; SSE3-NEXT:    paddb %xmm0, %xmm1
30447; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30448; SSE3-NEXT:    pxor %xmm0, %xmm0
30449; SSE3-NEXT:    psadbw %xmm1, %xmm0
30450; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
30451; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30452; SSE3-NEXT:    movdqa %xmm1, %xmm2
30453; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
30454; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30455; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30456; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30457; SSE3-NEXT:    pand %xmm3, %xmm1
30458; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30459; SSE3-NEXT:    por %xmm1, %xmm0
30460; SSE3-NEXT:    retq
30461;
30462; SSSE3-LABEL: ult_38_v2i64:
30463; SSSE3:       # %bb.0:
30464; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30465; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30466; SSSE3-NEXT:    pand %xmm1, %xmm2
30467; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30468; SSSE3-NEXT:    movdqa %xmm3, %xmm4
30469; SSSE3-NEXT:    pshufb %xmm2, %xmm4
30470; SSSE3-NEXT:    psrlw $4, %xmm0
30471; SSSE3-NEXT:    pand %xmm1, %xmm0
30472; SSSE3-NEXT:    pshufb %xmm0, %xmm3
30473; SSSE3-NEXT:    paddb %xmm4, %xmm3
30474; SSSE3-NEXT:    pxor %xmm0, %xmm0
30475; SSSE3-NEXT:    psadbw %xmm3, %xmm0
30476; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
30477; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30478; SSSE3-NEXT:    movdqa %xmm1, %xmm2
30479; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
30480; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30481; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30482; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30483; SSSE3-NEXT:    pand %xmm3, %xmm1
30484; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30485; SSSE3-NEXT:    por %xmm1, %xmm0
30486; SSSE3-NEXT:    retq
30487;
30488; SSE41-LABEL: ult_38_v2i64:
30489; SSE41:       # %bb.0:
30490; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30491; SSE41-NEXT:    movdqa %xmm0, %xmm2
30492; SSE41-NEXT:    pand %xmm1, %xmm2
30493; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30494; SSE41-NEXT:    movdqa %xmm3, %xmm4
30495; SSE41-NEXT:    pshufb %xmm2, %xmm4
30496; SSE41-NEXT:    psrlw $4, %xmm0
30497; SSE41-NEXT:    pand %xmm1, %xmm0
30498; SSE41-NEXT:    pshufb %xmm0, %xmm3
30499; SSE41-NEXT:    paddb %xmm4, %xmm3
30500; SSE41-NEXT:    pxor %xmm0, %xmm0
30501; SSE41-NEXT:    psadbw %xmm3, %xmm0
30502; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
30503; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30504; SSE41-NEXT:    movdqa %xmm1, %xmm2
30505; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
30506; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30507; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30508; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30509; SSE41-NEXT:    pand %xmm3, %xmm1
30510; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30511; SSE41-NEXT:    por %xmm1, %xmm0
30512; SSE41-NEXT:    retq
30513;
30514; AVX1-LABEL: ult_38_v2i64:
30515; AVX1:       # %bb.0:
30516; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30517; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
30518; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30519; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30520; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
30521; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
30522; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30523; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30524; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30525; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30526; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [38,38]
30527; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30528; AVX1-NEXT:    retq
30529;
30530; AVX2-LABEL: ult_38_v2i64:
30531; AVX2:       # %bb.0:
30532; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30533; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
30534; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30535; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30536; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
30537; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
30538; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30539; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30540; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30541; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30542; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [38,38]
30543; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30544; AVX2-NEXT:    retq
30545;
30546; AVX512VPOPCNTDQ-LABEL: ult_38_v2i64:
30547; AVX512VPOPCNTDQ:       # %bb.0:
30548; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30549; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
30550; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [38,38]
30551; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30552; AVX512VPOPCNTDQ-NEXT:    vzeroupper
30553; AVX512VPOPCNTDQ-NEXT:    retq
30554;
30555; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64:
30556; AVX512VPOPCNTDQVL:       # %bb.0:
30557; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
30558; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
30559; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30560; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30561; AVX512VPOPCNTDQVL-NEXT:    retq
30562;
30563; BITALG_NOVLX-LABEL: ult_38_v2i64:
30564; BITALG_NOVLX:       # %bb.0:
30565; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30566; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
30567; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30568; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30569; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [38,38]
30570; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30571; BITALG_NOVLX-NEXT:    vzeroupper
30572; BITALG_NOVLX-NEXT:    retq
30573;
30574; BITALG-LABEL: ult_38_v2i64:
30575; BITALG:       # %bb.0:
30576; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
30577; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30578; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30579; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
30580; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30581; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30582; BITALG-NEXT:    retq
30583  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30584  %3 = icmp ult <2 x i64> %2, <i64 38, i64 38>
30585  %4 = sext <2 x i1> %3 to <2 x i64>
30586  ret <2 x i64> %4
30587}
30588
30589define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
30590; SSE2-LABEL: ugt_38_v2i64:
30591; SSE2:       # %bb.0:
30592; SSE2-NEXT:    movdqa %xmm0, %xmm1
30593; SSE2-NEXT:    psrlw $1, %xmm1
30594; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30595; SSE2-NEXT:    psubb %xmm1, %xmm0
30596; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30597; SSE2-NEXT:    movdqa %xmm0, %xmm2
30598; SSE2-NEXT:    pand %xmm1, %xmm2
30599; SSE2-NEXT:    psrlw $2, %xmm0
30600; SSE2-NEXT:    pand %xmm1, %xmm0
30601; SSE2-NEXT:    paddb %xmm2, %xmm0
30602; SSE2-NEXT:    movdqa %xmm0, %xmm1
30603; SSE2-NEXT:    psrlw $4, %xmm1
30604; SSE2-NEXT:    paddb %xmm0, %xmm1
30605; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30606; SSE2-NEXT:    pxor %xmm0, %xmm0
30607; SSE2-NEXT:    psadbw %xmm1, %xmm0
30608; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
30609; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30610; SSE2-NEXT:    movdqa %xmm0, %xmm2
30611; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
30612; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30613; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
30614; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30615; SSE2-NEXT:    pand %xmm3, %xmm1
30616; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30617; SSE2-NEXT:    por %xmm1, %xmm0
30618; SSE2-NEXT:    retq
30619;
30620; SSE3-LABEL: ugt_38_v2i64:
30621; SSE3:       # %bb.0:
30622; SSE3-NEXT:    movdqa %xmm0, %xmm1
30623; SSE3-NEXT:    psrlw $1, %xmm1
30624; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30625; SSE3-NEXT:    psubb %xmm1, %xmm0
30626; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30627; SSE3-NEXT:    movdqa %xmm0, %xmm2
30628; SSE3-NEXT:    pand %xmm1, %xmm2
30629; SSE3-NEXT:    psrlw $2, %xmm0
30630; SSE3-NEXT:    pand %xmm1, %xmm0
30631; SSE3-NEXT:    paddb %xmm2, %xmm0
30632; SSE3-NEXT:    movdqa %xmm0, %xmm1
30633; SSE3-NEXT:    psrlw $4, %xmm1
30634; SSE3-NEXT:    paddb %xmm0, %xmm1
30635; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30636; SSE3-NEXT:    pxor %xmm0, %xmm0
30637; SSE3-NEXT:    psadbw %xmm1, %xmm0
30638; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
30639; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30640; SSE3-NEXT:    movdqa %xmm0, %xmm2
30641; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
30642; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30643; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30644; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30645; SSE3-NEXT:    pand %xmm3, %xmm1
30646; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30647; SSE3-NEXT:    por %xmm1, %xmm0
30648; SSE3-NEXT:    retq
30649;
30650; SSSE3-LABEL: ugt_38_v2i64:
30651; SSSE3:       # %bb.0:
30652; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30653; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30654; SSSE3-NEXT:    pand %xmm1, %xmm2
30655; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30656; SSSE3-NEXT:    movdqa %xmm3, %xmm4
30657; SSSE3-NEXT:    pshufb %xmm2, %xmm4
30658; SSSE3-NEXT:    psrlw $4, %xmm0
30659; SSSE3-NEXT:    pand %xmm1, %xmm0
30660; SSSE3-NEXT:    pshufb %xmm0, %xmm3
30661; SSSE3-NEXT:    paddb %xmm4, %xmm3
30662; SSSE3-NEXT:    pxor %xmm0, %xmm0
30663; SSSE3-NEXT:    psadbw %xmm3, %xmm0
30664; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
30665; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30666; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30667; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
30668; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30669; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30670; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30671; SSSE3-NEXT:    pand %xmm3, %xmm1
30672; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30673; SSSE3-NEXT:    por %xmm1, %xmm0
30674; SSSE3-NEXT:    retq
30675;
30676; SSE41-LABEL: ugt_38_v2i64:
30677; SSE41:       # %bb.0:
30678; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30679; SSE41-NEXT:    movdqa %xmm0, %xmm2
30680; SSE41-NEXT:    pand %xmm1, %xmm2
30681; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30682; SSE41-NEXT:    movdqa %xmm3, %xmm4
30683; SSE41-NEXT:    pshufb %xmm2, %xmm4
30684; SSE41-NEXT:    psrlw $4, %xmm0
30685; SSE41-NEXT:    pand %xmm1, %xmm0
30686; SSE41-NEXT:    pshufb %xmm0, %xmm3
30687; SSE41-NEXT:    paddb %xmm4, %xmm3
30688; SSE41-NEXT:    pxor %xmm0, %xmm0
30689; SSE41-NEXT:    psadbw %xmm3, %xmm0
30690; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
30691; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483686,2147483686]
30692; SSE41-NEXT:    movdqa %xmm0, %xmm2
30693; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
30694; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30695; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30696; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30697; SSE41-NEXT:    pand %xmm3, %xmm1
30698; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30699; SSE41-NEXT:    por %xmm1, %xmm0
30700; SSE41-NEXT:    retq
30701;
30702; AVX1-LABEL: ugt_38_v2i64:
30703; AVX1:       # %bb.0:
30704; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30705; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
30706; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30707; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30708; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
30709; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
30710; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30711; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30712; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30713; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30714; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30715; AVX1-NEXT:    retq
30716;
30717; AVX2-LABEL: ugt_38_v2i64:
30718; AVX2:       # %bb.0:
30719; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30720; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
30721; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30722; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30723; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
30724; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
30725; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30726; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30727; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30728; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30729; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30730; AVX2-NEXT:    retq
30731;
30732; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64:
30733; AVX512VPOPCNTDQ:       # %bb.0:
30734; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30735; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
30736; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30737; AVX512VPOPCNTDQ-NEXT:    vzeroupper
30738; AVX512VPOPCNTDQ-NEXT:    retq
30739;
30740; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64:
30741; AVX512VPOPCNTDQVL:       # %bb.0:
30742; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
30743; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
30744; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30745; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30746; AVX512VPOPCNTDQVL-NEXT:    retq
30747;
30748; BITALG_NOVLX-LABEL: ugt_38_v2i64:
30749; BITALG_NOVLX:       # %bb.0:
30750; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30751; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
30752; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30753; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30754; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
30755; BITALG_NOVLX-NEXT:    vzeroupper
30756; BITALG_NOVLX-NEXT:    retq
30757;
30758; BITALG-LABEL: ugt_38_v2i64:
30759; BITALG:       # %bb.0:
30760; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
30761; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30762; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30763; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
30764; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30765; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30766; BITALG-NEXT:    retq
30767  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30768  %3 = icmp ugt <2 x i64> %2, <i64 38, i64 38>
30769  %4 = sext <2 x i1> %3 to <2 x i64>
30770  ret <2 x i64> %4
30771}
30772
30773define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
30774; SSE2-LABEL: ult_39_v2i64:
30775; SSE2:       # %bb.0:
30776; SSE2-NEXT:    movdqa %xmm0, %xmm1
30777; SSE2-NEXT:    psrlw $1, %xmm1
30778; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30779; SSE2-NEXT:    psubb %xmm1, %xmm0
30780; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30781; SSE2-NEXT:    movdqa %xmm0, %xmm2
30782; SSE2-NEXT:    pand %xmm1, %xmm2
30783; SSE2-NEXT:    psrlw $2, %xmm0
30784; SSE2-NEXT:    pand %xmm1, %xmm0
30785; SSE2-NEXT:    paddb %xmm2, %xmm0
30786; SSE2-NEXT:    movdqa %xmm0, %xmm1
30787; SSE2-NEXT:    psrlw $4, %xmm1
30788; SSE2-NEXT:    paddb %xmm0, %xmm1
30789; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30790; SSE2-NEXT:    pxor %xmm0, %xmm0
30791; SSE2-NEXT:    psadbw %xmm1, %xmm0
30792; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
30793; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
30794; SSE2-NEXT:    movdqa %xmm1, %xmm2
30795; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
30796; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30797; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
30798; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30799; SSE2-NEXT:    pand %xmm3, %xmm1
30800; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30801; SSE2-NEXT:    por %xmm1, %xmm0
30802; SSE2-NEXT:    retq
30803;
30804; SSE3-LABEL: ult_39_v2i64:
30805; SSE3:       # %bb.0:
30806; SSE3-NEXT:    movdqa %xmm0, %xmm1
30807; SSE3-NEXT:    psrlw $1, %xmm1
30808; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30809; SSE3-NEXT:    psubb %xmm1, %xmm0
30810; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30811; SSE3-NEXT:    movdqa %xmm0, %xmm2
30812; SSE3-NEXT:    pand %xmm1, %xmm2
30813; SSE3-NEXT:    psrlw $2, %xmm0
30814; SSE3-NEXT:    pand %xmm1, %xmm0
30815; SSE3-NEXT:    paddb %xmm2, %xmm0
30816; SSE3-NEXT:    movdqa %xmm0, %xmm1
30817; SSE3-NEXT:    psrlw $4, %xmm1
30818; SSE3-NEXT:    paddb %xmm0, %xmm1
30819; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30820; SSE3-NEXT:    pxor %xmm0, %xmm0
30821; SSE3-NEXT:    psadbw %xmm1, %xmm0
30822; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
30823; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
30824; SSE3-NEXT:    movdqa %xmm1, %xmm2
30825; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
30826; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30827; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30828; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30829; SSE3-NEXT:    pand %xmm3, %xmm1
30830; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30831; SSE3-NEXT:    por %xmm1, %xmm0
30832; SSE3-NEXT:    retq
30833;
30834; SSSE3-LABEL: ult_39_v2i64:
30835; SSSE3:       # %bb.0:
30836; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30837; SSSE3-NEXT:    movdqa %xmm0, %xmm2
30838; SSSE3-NEXT:    pand %xmm1, %xmm2
30839; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30840; SSSE3-NEXT:    movdqa %xmm3, %xmm4
30841; SSSE3-NEXT:    pshufb %xmm2, %xmm4
30842; SSSE3-NEXT:    psrlw $4, %xmm0
30843; SSSE3-NEXT:    pand %xmm1, %xmm0
30844; SSSE3-NEXT:    pshufb %xmm0, %xmm3
30845; SSSE3-NEXT:    paddb %xmm4, %xmm3
30846; SSSE3-NEXT:    pxor %xmm0, %xmm0
30847; SSSE3-NEXT:    psadbw %xmm3, %xmm0
30848; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
30849; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
30850; SSSE3-NEXT:    movdqa %xmm1, %xmm2
30851; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
30852; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30853; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
30854; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30855; SSSE3-NEXT:    pand %xmm3, %xmm1
30856; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30857; SSSE3-NEXT:    por %xmm1, %xmm0
30858; SSSE3-NEXT:    retq
30859;
30860; SSE41-LABEL: ult_39_v2i64:
30861; SSE41:       # %bb.0:
30862; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30863; SSE41-NEXT:    movdqa %xmm0, %xmm2
30864; SSE41-NEXT:    pand %xmm1, %xmm2
30865; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30866; SSE41-NEXT:    movdqa %xmm3, %xmm4
30867; SSE41-NEXT:    pshufb %xmm2, %xmm4
30868; SSE41-NEXT:    psrlw $4, %xmm0
30869; SSE41-NEXT:    pand %xmm1, %xmm0
30870; SSE41-NEXT:    pshufb %xmm0, %xmm3
30871; SSE41-NEXT:    paddb %xmm4, %xmm3
30872; SSE41-NEXT:    pxor %xmm0, %xmm0
30873; SSE41-NEXT:    psadbw %xmm3, %xmm0
30874; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
30875; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
30876; SSE41-NEXT:    movdqa %xmm1, %xmm2
30877; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
30878; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30879; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30880; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30881; SSE41-NEXT:    pand %xmm3, %xmm1
30882; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30883; SSE41-NEXT:    por %xmm1, %xmm0
30884; SSE41-NEXT:    retq
30885;
30886; AVX1-LABEL: ult_39_v2i64:
30887; AVX1:       # %bb.0:
30888; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30889; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
30890; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30891; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30892; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
30893; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
30894; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30895; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30896; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30897; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30898; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [39,39]
30899; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30900; AVX1-NEXT:    retq
30901;
30902; AVX2-LABEL: ult_39_v2i64:
30903; AVX2:       # %bb.0:
30904; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30905; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
30906; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30907; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
30908; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
30909; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
30910; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
30911; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
30912; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30913; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30914; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [39,39]
30915; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30916; AVX2-NEXT:    retq
30917;
30918; AVX512VPOPCNTDQ-LABEL: ult_39_v2i64:
30919; AVX512VPOPCNTDQ:       # %bb.0:
30920; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30921; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
30922; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [39,39]
30923; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30924; AVX512VPOPCNTDQ-NEXT:    vzeroupper
30925; AVX512VPOPCNTDQ-NEXT:    retq
30926;
30927; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64:
30928; AVX512VPOPCNTDQVL:       # %bb.0:
30929; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
30930; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
30931; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30932; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30933; AVX512VPOPCNTDQVL-NEXT:    retq
30934;
30935; BITALG_NOVLX-LABEL: ult_39_v2i64:
30936; BITALG_NOVLX:       # %bb.0:
30937; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
30938; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
30939; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30940; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30941; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [39,39]
30942; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
30943; BITALG_NOVLX-NEXT:    vzeroupper
30944; BITALG_NOVLX-NEXT:    retq
30945;
30946; BITALG-LABEL: ult_39_v2i64:
30947; BITALG:       # %bb.0:
30948; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
30949; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
30950; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
30951; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
30952; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
30953; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30954; BITALG-NEXT:    retq
30955  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30956  %3 = icmp ult <2 x i64> %2, <i64 39, i64 39>
30957  %4 = sext <2 x i1> %3 to <2 x i64>
30958  ret <2 x i64> %4
30959}
30960
30961define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
30962; SSE2-LABEL: ugt_39_v2i64:
30963; SSE2:       # %bb.0:
30964; SSE2-NEXT:    movdqa %xmm0, %xmm1
30965; SSE2-NEXT:    psrlw $1, %xmm1
30966; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30967; SSE2-NEXT:    psubb %xmm1, %xmm0
30968; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30969; SSE2-NEXT:    movdqa %xmm0, %xmm2
30970; SSE2-NEXT:    pand %xmm1, %xmm2
30971; SSE2-NEXT:    psrlw $2, %xmm0
30972; SSE2-NEXT:    pand %xmm1, %xmm0
30973; SSE2-NEXT:    paddb %xmm2, %xmm0
30974; SSE2-NEXT:    movdqa %xmm0, %xmm1
30975; SSE2-NEXT:    psrlw $4, %xmm1
30976; SSE2-NEXT:    paddb %xmm0, %xmm1
30977; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
30978; SSE2-NEXT:    pxor %xmm0, %xmm0
30979; SSE2-NEXT:    psadbw %xmm1, %xmm0
30980; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
30981; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
30982; SSE2-NEXT:    movdqa %xmm0, %xmm2
30983; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
30984; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
30985; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
30986; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
30987; SSE2-NEXT:    pand %xmm3, %xmm1
30988; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
30989; SSE2-NEXT:    por %xmm1, %xmm0
30990; SSE2-NEXT:    retq
30991;
30992; SSE3-LABEL: ugt_39_v2i64:
30993; SSE3:       # %bb.0:
30994; SSE3-NEXT:    movdqa %xmm0, %xmm1
30995; SSE3-NEXT:    psrlw $1, %xmm1
30996; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
30997; SSE3-NEXT:    psubb %xmm1, %xmm0
30998; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30999; SSE3-NEXT:    movdqa %xmm0, %xmm2
31000; SSE3-NEXT:    pand %xmm1, %xmm2
31001; SSE3-NEXT:    psrlw $2, %xmm0
31002; SSE3-NEXT:    pand %xmm1, %xmm0
31003; SSE3-NEXT:    paddb %xmm2, %xmm0
31004; SSE3-NEXT:    movdqa %xmm0, %xmm1
31005; SSE3-NEXT:    psrlw $4, %xmm1
31006; SSE3-NEXT:    paddb %xmm0, %xmm1
31007; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31008; SSE3-NEXT:    pxor %xmm0, %xmm0
31009; SSE3-NEXT:    psadbw %xmm1, %xmm0
31010; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
31011; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
31012; SSE3-NEXT:    movdqa %xmm0, %xmm2
31013; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
31014; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31015; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31016; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31017; SSE3-NEXT:    pand %xmm3, %xmm1
31018; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31019; SSE3-NEXT:    por %xmm1, %xmm0
31020; SSE3-NEXT:    retq
31021;
31022; SSSE3-LABEL: ugt_39_v2i64:
31023; SSSE3:       # %bb.0:
31024; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31025; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31026; SSSE3-NEXT:    pand %xmm1, %xmm2
31027; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31028; SSSE3-NEXT:    movdqa %xmm3, %xmm4
31029; SSSE3-NEXT:    pshufb %xmm2, %xmm4
31030; SSSE3-NEXT:    psrlw $4, %xmm0
31031; SSSE3-NEXT:    pand %xmm1, %xmm0
31032; SSSE3-NEXT:    pshufb %xmm0, %xmm3
31033; SSSE3-NEXT:    paddb %xmm4, %xmm3
31034; SSSE3-NEXT:    pxor %xmm0, %xmm0
31035; SSSE3-NEXT:    psadbw %xmm3, %xmm0
31036; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
31037; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
31038; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31039; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
31040; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31041; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31042; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31043; SSSE3-NEXT:    pand %xmm3, %xmm1
31044; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31045; SSSE3-NEXT:    por %xmm1, %xmm0
31046; SSSE3-NEXT:    retq
31047;
31048; SSE41-LABEL: ugt_39_v2i64:
31049; SSE41:       # %bb.0:
31050; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31051; SSE41-NEXT:    movdqa %xmm0, %xmm2
31052; SSE41-NEXT:    pand %xmm1, %xmm2
31053; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31054; SSE41-NEXT:    movdqa %xmm3, %xmm4
31055; SSE41-NEXT:    pshufb %xmm2, %xmm4
31056; SSE41-NEXT:    psrlw $4, %xmm0
31057; SSE41-NEXT:    pand %xmm1, %xmm0
31058; SSE41-NEXT:    pshufb %xmm0, %xmm3
31059; SSE41-NEXT:    paddb %xmm4, %xmm3
31060; SSE41-NEXT:    pxor %xmm0, %xmm0
31061; SSE41-NEXT:    psadbw %xmm3, %xmm0
31062; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
31063; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483687,2147483687]
31064; SSE41-NEXT:    movdqa %xmm0, %xmm2
31065; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
31066; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31067; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
31068; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31069; SSE41-NEXT:    pand %xmm3, %xmm1
31070; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31071; SSE41-NEXT:    por %xmm1, %xmm0
31072; SSE41-NEXT:    retq
31073;
31074; AVX1-LABEL: ugt_39_v2i64:
31075; AVX1:       # %bb.0:
31076; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31077; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
31078; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31079; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31080; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
31081; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
31082; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31083; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31084; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31085; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31086; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31087; AVX1-NEXT:    retq
31088;
31089; AVX2-LABEL: ugt_39_v2i64:
31090; AVX2:       # %bb.0:
31091; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31092; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
31093; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31094; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31095; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
31096; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
31097; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31098; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31099; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31100; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31101; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31102; AVX2-NEXT:    retq
31103;
31104; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64:
31105; AVX512VPOPCNTDQ:       # %bb.0:
31106; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31107; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
31108; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31109; AVX512VPOPCNTDQ-NEXT:    vzeroupper
31110; AVX512VPOPCNTDQ-NEXT:    retq
31111;
31112; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64:
31113; AVX512VPOPCNTDQVL:       # %bb.0:
31114; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
31115; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
31116; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31117; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31118; AVX512VPOPCNTDQVL-NEXT:    retq
31119;
31120; BITALG_NOVLX-LABEL: ugt_39_v2i64:
31121; BITALG_NOVLX:       # %bb.0:
31122; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31123; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
31124; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31125; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31126; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31127; BITALG_NOVLX-NEXT:    vzeroupper
31128; BITALG_NOVLX-NEXT:    retq
31129;
31130; BITALG-LABEL: ugt_39_v2i64:
31131; BITALG:       # %bb.0:
31132; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
31133; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31134; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31135; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
31136; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31137; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31138; BITALG-NEXT:    retq
31139  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31140  %3 = icmp ugt <2 x i64> %2, <i64 39, i64 39>
31141  %4 = sext <2 x i1> %3 to <2 x i64>
31142  ret <2 x i64> %4
31143}
31144
31145define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
31146; SSE2-LABEL: ult_40_v2i64:
31147; SSE2:       # %bb.0:
31148; SSE2-NEXT:    movdqa %xmm0, %xmm1
31149; SSE2-NEXT:    psrlw $1, %xmm1
31150; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31151; SSE2-NEXT:    psubb %xmm1, %xmm0
31152; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31153; SSE2-NEXT:    movdqa %xmm0, %xmm2
31154; SSE2-NEXT:    pand %xmm1, %xmm2
31155; SSE2-NEXT:    psrlw $2, %xmm0
31156; SSE2-NEXT:    pand %xmm1, %xmm0
31157; SSE2-NEXT:    paddb %xmm2, %xmm0
31158; SSE2-NEXT:    movdqa %xmm0, %xmm1
31159; SSE2-NEXT:    psrlw $4, %xmm1
31160; SSE2-NEXT:    paddb %xmm0, %xmm1
31161; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31162; SSE2-NEXT:    pxor %xmm0, %xmm0
31163; SSE2-NEXT:    psadbw %xmm1, %xmm0
31164; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
31165; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31166; SSE2-NEXT:    movdqa %xmm1, %xmm2
31167; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
31168; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31169; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
31170; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31171; SSE2-NEXT:    pand %xmm3, %xmm1
31172; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31173; SSE2-NEXT:    por %xmm1, %xmm0
31174; SSE2-NEXT:    retq
31175;
31176; SSE3-LABEL: ult_40_v2i64:
31177; SSE3:       # %bb.0:
31178; SSE3-NEXT:    movdqa %xmm0, %xmm1
31179; SSE3-NEXT:    psrlw $1, %xmm1
31180; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31181; SSE3-NEXT:    psubb %xmm1, %xmm0
31182; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31183; SSE3-NEXT:    movdqa %xmm0, %xmm2
31184; SSE3-NEXT:    pand %xmm1, %xmm2
31185; SSE3-NEXT:    psrlw $2, %xmm0
31186; SSE3-NEXT:    pand %xmm1, %xmm0
31187; SSE3-NEXT:    paddb %xmm2, %xmm0
31188; SSE3-NEXT:    movdqa %xmm0, %xmm1
31189; SSE3-NEXT:    psrlw $4, %xmm1
31190; SSE3-NEXT:    paddb %xmm0, %xmm1
31191; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31192; SSE3-NEXT:    pxor %xmm0, %xmm0
31193; SSE3-NEXT:    psadbw %xmm1, %xmm0
31194; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
31195; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31196; SSE3-NEXT:    movdqa %xmm1, %xmm2
31197; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
31198; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31199; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31200; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31201; SSE3-NEXT:    pand %xmm3, %xmm1
31202; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31203; SSE3-NEXT:    por %xmm1, %xmm0
31204; SSE3-NEXT:    retq
31205;
31206; SSSE3-LABEL: ult_40_v2i64:
31207; SSSE3:       # %bb.0:
31208; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31209; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31210; SSSE3-NEXT:    pand %xmm1, %xmm2
31211; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31212; SSSE3-NEXT:    movdqa %xmm3, %xmm4
31213; SSSE3-NEXT:    pshufb %xmm2, %xmm4
31214; SSSE3-NEXT:    psrlw $4, %xmm0
31215; SSSE3-NEXT:    pand %xmm1, %xmm0
31216; SSSE3-NEXT:    pshufb %xmm0, %xmm3
31217; SSSE3-NEXT:    paddb %xmm4, %xmm3
31218; SSSE3-NEXT:    pxor %xmm0, %xmm0
31219; SSSE3-NEXT:    psadbw %xmm3, %xmm0
31220; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
31221; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31222; SSSE3-NEXT:    movdqa %xmm1, %xmm2
31223; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
31224; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31225; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31226; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31227; SSSE3-NEXT:    pand %xmm3, %xmm1
31228; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31229; SSSE3-NEXT:    por %xmm1, %xmm0
31230; SSSE3-NEXT:    retq
31231;
31232; SSE41-LABEL: ult_40_v2i64:
31233; SSE41:       # %bb.0:
31234; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31235; SSE41-NEXT:    movdqa %xmm0, %xmm2
31236; SSE41-NEXT:    pand %xmm1, %xmm2
31237; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31238; SSE41-NEXT:    movdqa %xmm3, %xmm4
31239; SSE41-NEXT:    pshufb %xmm2, %xmm4
31240; SSE41-NEXT:    psrlw $4, %xmm0
31241; SSE41-NEXT:    pand %xmm1, %xmm0
31242; SSE41-NEXT:    pshufb %xmm0, %xmm3
31243; SSE41-NEXT:    paddb %xmm4, %xmm3
31244; SSE41-NEXT:    pxor %xmm0, %xmm0
31245; SSE41-NEXT:    psadbw %xmm3, %xmm0
31246; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
31247; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31248; SSE41-NEXT:    movdqa %xmm1, %xmm2
31249; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
31250; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31251; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
31252; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31253; SSE41-NEXT:    pand %xmm3, %xmm1
31254; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31255; SSE41-NEXT:    por %xmm1, %xmm0
31256; SSE41-NEXT:    retq
31257;
31258; AVX1-LABEL: ult_40_v2i64:
31259; AVX1:       # %bb.0:
31260; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31261; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
31262; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31263; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31264; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
31265; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
31266; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31267; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31268; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31269; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31270; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [40,40]
31271; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31272; AVX1-NEXT:    retq
31273;
31274; AVX2-LABEL: ult_40_v2i64:
31275; AVX2:       # %bb.0:
31276; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31277; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
31278; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31279; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31280; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
31281; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
31282; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31283; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31284; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31285; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31286; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [40,40]
31287; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31288; AVX2-NEXT:    retq
31289;
31290; AVX512VPOPCNTDQ-LABEL: ult_40_v2i64:
31291; AVX512VPOPCNTDQ:       # %bb.0:
31292; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31293; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
31294; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [40,40]
31295; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31296; AVX512VPOPCNTDQ-NEXT:    vzeroupper
31297; AVX512VPOPCNTDQ-NEXT:    retq
31298;
31299; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64:
31300; AVX512VPOPCNTDQVL:       # %bb.0:
31301; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
31302; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
31303; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31304; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31305; AVX512VPOPCNTDQVL-NEXT:    retq
31306;
31307; BITALG_NOVLX-LABEL: ult_40_v2i64:
31308; BITALG_NOVLX:       # %bb.0:
31309; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31310; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
31311; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31312; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31313; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [40,40]
31314; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31315; BITALG_NOVLX-NEXT:    vzeroupper
31316; BITALG_NOVLX-NEXT:    retq
31317;
31318; BITALG-LABEL: ult_40_v2i64:
31319; BITALG:       # %bb.0:
31320; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
31321; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31322; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31323; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
31324; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31325; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31326; BITALG-NEXT:    retq
31327  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31328  %3 = icmp ult <2 x i64> %2, <i64 40, i64 40>
31329  %4 = sext <2 x i1> %3 to <2 x i64>
31330  ret <2 x i64> %4
31331}
31332
31333define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
31334; SSE2-LABEL: ugt_40_v2i64:
31335; SSE2:       # %bb.0:
31336; SSE2-NEXT:    movdqa %xmm0, %xmm1
31337; SSE2-NEXT:    psrlw $1, %xmm1
31338; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31339; SSE2-NEXT:    psubb %xmm1, %xmm0
31340; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31341; SSE2-NEXT:    movdqa %xmm0, %xmm2
31342; SSE2-NEXT:    pand %xmm1, %xmm2
31343; SSE2-NEXT:    psrlw $2, %xmm0
31344; SSE2-NEXT:    pand %xmm1, %xmm0
31345; SSE2-NEXT:    paddb %xmm2, %xmm0
31346; SSE2-NEXT:    movdqa %xmm0, %xmm1
31347; SSE2-NEXT:    psrlw $4, %xmm1
31348; SSE2-NEXT:    paddb %xmm0, %xmm1
31349; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31350; SSE2-NEXT:    pxor %xmm0, %xmm0
31351; SSE2-NEXT:    psadbw %xmm1, %xmm0
31352; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
31353; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31354; SSE2-NEXT:    movdqa %xmm0, %xmm2
31355; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
31356; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31357; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
31358; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31359; SSE2-NEXT:    pand %xmm3, %xmm1
31360; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31361; SSE2-NEXT:    por %xmm1, %xmm0
31362; SSE2-NEXT:    retq
31363;
31364; SSE3-LABEL: ugt_40_v2i64:
31365; SSE3:       # %bb.0:
31366; SSE3-NEXT:    movdqa %xmm0, %xmm1
31367; SSE3-NEXT:    psrlw $1, %xmm1
31368; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31369; SSE3-NEXT:    psubb %xmm1, %xmm0
31370; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31371; SSE3-NEXT:    movdqa %xmm0, %xmm2
31372; SSE3-NEXT:    pand %xmm1, %xmm2
31373; SSE3-NEXT:    psrlw $2, %xmm0
31374; SSE3-NEXT:    pand %xmm1, %xmm0
31375; SSE3-NEXT:    paddb %xmm2, %xmm0
31376; SSE3-NEXT:    movdqa %xmm0, %xmm1
31377; SSE3-NEXT:    psrlw $4, %xmm1
31378; SSE3-NEXT:    paddb %xmm0, %xmm1
31379; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31380; SSE3-NEXT:    pxor %xmm0, %xmm0
31381; SSE3-NEXT:    psadbw %xmm1, %xmm0
31382; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
31383; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31384; SSE3-NEXT:    movdqa %xmm0, %xmm2
31385; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
31386; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31387; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31388; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31389; SSE3-NEXT:    pand %xmm3, %xmm1
31390; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31391; SSE3-NEXT:    por %xmm1, %xmm0
31392; SSE3-NEXT:    retq
31393;
31394; SSSE3-LABEL: ugt_40_v2i64:
31395; SSSE3:       # %bb.0:
31396; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31397; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31398; SSSE3-NEXT:    pand %xmm1, %xmm2
31399; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31400; SSSE3-NEXT:    movdqa %xmm3, %xmm4
31401; SSSE3-NEXT:    pshufb %xmm2, %xmm4
31402; SSSE3-NEXT:    psrlw $4, %xmm0
31403; SSSE3-NEXT:    pand %xmm1, %xmm0
31404; SSSE3-NEXT:    pshufb %xmm0, %xmm3
31405; SSSE3-NEXT:    paddb %xmm4, %xmm3
31406; SSSE3-NEXT:    pxor %xmm0, %xmm0
31407; SSSE3-NEXT:    psadbw %xmm3, %xmm0
31408; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
31409; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31410; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31411; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
31412; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31413; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31414; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31415; SSSE3-NEXT:    pand %xmm3, %xmm1
31416; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31417; SSSE3-NEXT:    por %xmm1, %xmm0
31418; SSSE3-NEXT:    retq
31419;
31420; SSE41-LABEL: ugt_40_v2i64:
31421; SSE41:       # %bb.0:
31422; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31423; SSE41-NEXT:    movdqa %xmm0, %xmm2
31424; SSE41-NEXT:    pand %xmm1, %xmm2
31425; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31426; SSE41-NEXT:    movdqa %xmm3, %xmm4
31427; SSE41-NEXT:    pshufb %xmm2, %xmm4
31428; SSE41-NEXT:    psrlw $4, %xmm0
31429; SSE41-NEXT:    pand %xmm1, %xmm0
31430; SSE41-NEXT:    pshufb %xmm0, %xmm3
31431; SSE41-NEXT:    paddb %xmm4, %xmm3
31432; SSE41-NEXT:    pxor %xmm0, %xmm0
31433; SSE41-NEXT:    psadbw %xmm3, %xmm0
31434; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
31435; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483688,2147483688]
31436; SSE41-NEXT:    movdqa %xmm0, %xmm2
31437; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
31438; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31439; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
31440; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31441; SSE41-NEXT:    pand %xmm3, %xmm1
31442; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31443; SSE41-NEXT:    por %xmm1, %xmm0
31444; SSE41-NEXT:    retq
31445;
31446; AVX1-LABEL: ugt_40_v2i64:
31447; AVX1:       # %bb.0:
31448; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31449; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
31450; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31451; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31452; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
31453; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
31454; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31455; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31456; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31457; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31458; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31459; AVX1-NEXT:    retq
31460;
31461; AVX2-LABEL: ugt_40_v2i64:
31462; AVX2:       # %bb.0:
31463; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31464; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
31465; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31466; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31467; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
31468; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
31469; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31470; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31471; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31472; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31473; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31474; AVX2-NEXT:    retq
31475;
31476; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64:
31477; AVX512VPOPCNTDQ:       # %bb.0:
31478; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31479; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
31480; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31481; AVX512VPOPCNTDQ-NEXT:    vzeroupper
31482; AVX512VPOPCNTDQ-NEXT:    retq
31483;
31484; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64:
31485; AVX512VPOPCNTDQVL:       # %bb.0:
31486; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
31487; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
31488; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31489; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31490; AVX512VPOPCNTDQVL-NEXT:    retq
31491;
31492; BITALG_NOVLX-LABEL: ugt_40_v2i64:
31493; BITALG_NOVLX:       # %bb.0:
31494; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31495; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
31496; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31497; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31498; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31499; BITALG_NOVLX-NEXT:    vzeroupper
31500; BITALG_NOVLX-NEXT:    retq
31501;
31502; BITALG-LABEL: ugt_40_v2i64:
31503; BITALG:       # %bb.0:
31504; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
31505; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31506; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31507; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
31508; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31509; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31510; BITALG-NEXT:    retq
31511  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31512  %3 = icmp ugt <2 x i64> %2, <i64 40, i64 40>
31513  %4 = sext <2 x i1> %3 to <2 x i64>
31514  ret <2 x i64> %4
31515}
31516
31517define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
31518; SSE2-LABEL: ult_41_v2i64:
31519; SSE2:       # %bb.0:
31520; SSE2-NEXT:    movdqa %xmm0, %xmm1
31521; SSE2-NEXT:    psrlw $1, %xmm1
31522; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31523; SSE2-NEXT:    psubb %xmm1, %xmm0
31524; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31525; SSE2-NEXT:    movdqa %xmm0, %xmm2
31526; SSE2-NEXT:    pand %xmm1, %xmm2
31527; SSE2-NEXT:    psrlw $2, %xmm0
31528; SSE2-NEXT:    pand %xmm1, %xmm0
31529; SSE2-NEXT:    paddb %xmm2, %xmm0
31530; SSE2-NEXT:    movdqa %xmm0, %xmm1
31531; SSE2-NEXT:    psrlw $4, %xmm1
31532; SSE2-NEXT:    paddb %xmm0, %xmm1
31533; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31534; SSE2-NEXT:    pxor %xmm0, %xmm0
31535; SSE2-NEXT:    psadbw %xmm1, %xmm0
31536; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
31537; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31538; SSE2-NEXT:    movdqa %xmm1, %xmm2
31539; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
31540; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31541; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
31542; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31543; SSE2-NEXT:    pand %xmm3, %xmm1
31544; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31545; SSE2-NEXT:    por %xmm1, %xmm0
31546; SSE2-NEXT:    retq
31547;
31548; SSE3-LABEL: ult_41_v2i64:
31549; SSE3:       # %bb.0:
31550; SSE3-NEXT:    movdqa %xmm0, %xmm1
31551; SSE3-NEXT:    psrlw $1, %xmm1
31552; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31553; SSE3-NEXT:    psubb %xmm1, %xmm0
31554; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31555; SSE3-NEXT:    movdqa %xmm0, %xmm2
31556; SSE3-NEXT:    pand %xmm1, %xmm2
31557; SSE3-NEXT:    psrlw $2, %xmm0
31558; SSE3-NEXT:    pand %xmm1, %xmm0
31559; SSE3-NEXT:    paddb %xmm2, %xmm0
31560; SSE3-NEXT:    movdqa %xmm0, %xmm1
31561; SSE3-NEXT:    psrlw $4, %xmm1
31562; SSE3-NEXT:    paddb %xmm0, %xmm1
31563; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31564; SSE3-NEXT:    pxor %xmm0, %xmm0
31565; SSE3-NEXT:    psadbw %xmm1, %xmm0
31566; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
31567; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31568; SSE3-NEXT:    movdqa %xmm1, %xmm2
31569; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
31570; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31571; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31572; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31573; SSE3-NEXT:    pand %xmm3, %xmm1
31574; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31575; SSE3-NEXT:    por %xmm1, %xmm0
31576; SSE3-NEXT:    retq
31577;
31578; SSSE3-LABEL: ult_41_v2i64:
31579; SSSE3:       # %bb.0:
31580; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31581; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31582; SSSE3-NEXT:    pand %xmm1, %xmm2
31583; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31584; SSSE3-NEXT:    movdqa %xmm3, %xmm4
31585; SSSE3-NEXT:    pshufb %xmm2, %xmm4
31586; SSSE3-NEXT:    psrlw $4, %xmm0
31587; SSSE3-NEXT:    pand %xmm1, %xmm0
31588; SSSE3-NEXT:    pshufb %xmm0, %xmm3
31589; SSSE3-NEXT:    paddb %xmm4, %xmm3
31590; SSSE3-NEXT:    pxor %xmm0, %xmm0
31591; SSSE3-NEXT:    psadbw %xmm3, %xmm0
31592; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
31593; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31594; SSSE3-NEXT:    movdqa %xmm1, %xmm2
31595; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
31596; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31597; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31598; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31599; SSSE3-NEXT:    pand %xmm3, %xmm1
31600; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31601; SSSE3-NEXT:    por %xmm1, %xmm0
31602; SSSE3-NEXT:    retq
31603;
31604; SSE41-LABEL: ult_41_v2i64:
31605; SSE41:       # %bb.0:
31606; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31607; SSE41-NEXT:    movdqa %xmm0, %xmm2
31608; SSE41-NEXT:    pand %xmm1, %xmm2
31609; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31610; SSE41-NEXT:    movdqa %xmm3, %xmm4
31611; SSE41-NEXT:    pshufb %xmm2, %xmm4
31612; SSE41-NEXT:    psrlw $4, %xmm0
31613; SSE41-NEXT:    pand %xmm1, %xmm0
31614; SSE41-NEXT:    pshufb %xmm0, %xmm3
31615; SSE41-NEXT:    paddb %xmm4, %xmm3
31616; SSE41-NEXT:    pxor %xmm0, %xmm0
31617; SSE41-NEXT:    psadbw %xmm3, %xmm0
31618; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
31619; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31620; SSE41-NEXT:    movdqa %xmm1, %xmm2
31621; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
31622; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31623; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
31624; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31625; SSE41-NEXT:    pand %xmm3, %xmm1
31626; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31627; SSE41-NEXT:    por %xmm1, %xmm0
31628; SSE41-NEXT:    retq
31629;
31630; AVX1-LABEL: ult_41_v2i64:
31631; AVX1:       # %bb.0:
31632; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31633; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
31634; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31635; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31636; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
31637; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
31638; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31639; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31640; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31641; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31642; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [41,41]
31643; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31644; AVX1-NEXT:    retq
31645;
31646; AVX2-LABEL: ult_41_v2i64:
31647; AVX2:       # %bb.0:
31648; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31649; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
31650; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31651; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31652; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
31653; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
31654; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31655; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31656; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31657; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31658; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [41,41]
31659; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31660; AVX2-NEXT:    retq
31661;
31662; AVX512VPOPCNTDQ-LABEL: ult_41_v2i64:
31663; AVX512VPOPCNTDQ:       # %bb.0:
31664; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31665; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
31666; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [41,41]
31667; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31668; AVX512VPOPCNTDQ-NEXT:    vzeroupper
31669; AVX512VPOPCNTDQ-NEXT:    retq
31670;
31671; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64:
31672; AVX512VPOPCNTDQVL:       # %bb.0:
31673; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
31674; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
31675; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31676; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31677; AVX512VPOPCNTDQVL-NEXT:    retq
31678;
31679; BITALG_NOVLX-LABEL: ult_41_v2i64:
31680; BITALG_NOVLX:       # %bb.0:
31681; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31682; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
31683; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31684; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31685; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [41,41]
31686; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
31687; BITALG_NOVLX-NEXT:    vzeroupper
31688; BITALG_NOVLX-NEXT:    retq
31689;
31690; BITALG-LABEL: ult_41_v2i64:
31691; BITALG:       # %bb.0:
31692; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
31693; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31694; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31695; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
31696; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31697; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31698; BITALG-NEXT:    retq
31699  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31700  %3 = icmp ult <2 x i64> %2, <i64 41, i64 41>
31701  %4 = sext <2 x i1> %3 to <2 x i64>
31702  ret <2 x i64> %4
31703}
31704
31705define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
31706; SSE2-LABEL: ugt_41_v2i64:
31707; SSE2:       # %bb.0:
31708; SSE2-NEXT:    movdqa %xmm0, %xmm1
31709; SSE2-NEXT:    psrlw $1, %xmm1
31710; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31711; SSE2-NEXT:    psubb %xmm1, %xmm0
31712; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31713; SSE2-NEXT:    movdqa %xmm0, %xmm2
31714; SSE2-NEXT:    pand %xmm1, %xmm2
31715; SSE2-NEXT:    psrlw $2, %xmm0
31716; SSE2-NEXT:    pand %xmm1, %xmm0
31717; SSE2-NEXT:    paddb %xmm2, %xmm0
31718; SSE2-NEXT:    movdqa %xmm0, %xmm1
31719; SSE2-NEXT:    psrlw $4, %xmm1
31720; SSE2-NEXT:    paddb %xmm0, %xmm1
31721; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31722; SSE2-NEXT:    pxor %xmm0, %xmm0
31723; SSE2-NEXT:    psadbw %xmm1, %xmm0
31724; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
31725; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31726; SSE2-NEXT:    movdqa %xmm0, %xmm2
31727; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
31728; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31729; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
31730; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31731; SSE2-NEXT:    pand %xmm3, %xmm1
31732; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31733; SSE2-NEXT:    por %xmm1, %xmm0
31734; SSE2-NEXT:    retq
31735;
31736; SSE3-LABEL: ugt_41_v2i64:
31737; SSE3:       # %bb.0:
31738; SSE3-NEXT:    movdqa %xmm0, %xmm1
31739; SSE3-NEXT:    psrlw $1, %xmm1
31740; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31741; SSE3-NEXT:    psubb %xmm1, %xmm0
31742; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31743; SSE3-NEXT:    movdqa %xmm0, %xmm2
31744; SSE3-NEXT:    pand %xmm1, %xmm2
31745; SSE3-NEXT:    psrlw $2, %xmm0
31746; SSE3-NEXT:    pand %xmm1, %xmm0
31747; SSE3-NEXT:    paddb %xmm2, %xmm0
31748; SSE3-NEXT:    movdqa %xmm0, %xmm1
31749; SSE3-NEXT:    psrlw $4, %xmm1
31750; SSE3-NEXT:    paddb %xmm0, %xmm1
31751; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31752; SSE3-NEXT:    pxor %xmm0, %xmm0
31753; SSE3-NEXT:    psadbw %xmm1, %xmm0
31754; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
31755; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31756; SSE3-NEXT:    movdqa %xmm0, %xmm2
31757; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
31758; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31759; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31760; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31761; SSE3-NEXT:    pand %xmm3, %xmm1
31762; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31763; SSE3-NEXT:    por %xmm1, %xmm0
31764; SSE3-NEXT:    retq
31765;
31766; SSSE3-LABEL: ugt_41_v2i64:
31767; SSSE3:       # %bb.0:
31768; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31769; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31770; SSSE3-NEXT:    pand %xmm1, %xmm2
31771; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31772; SSSE3-NEXT:    movdqa %xmm3, %xmm4
31773; SSSE3-NEXT:    pshufb %xmm2, %xmm4
31774; SSSE3-NEXT:    psrlw $4, %xmm0
31775; SSSE3-NEXT:    pand %xmm1, %xmm0
31776; SSSE3-NEXT:    pshufb %xmm0, %xmm3
31777; SSSE3-NEXT:    paddb %xmm4, %xmm3
31778; SSSE3-NEXT:    pxor %xmm0, %xmm0
31779; SSSE3-NEXT:    psadbw %xmm3, %xmm0
31780; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
31781; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31782; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31783; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
31784; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31785; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31786; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31787; SSSE3-NEXT:    pand %xmm3, %xmm1
31788; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31789; SSSE3-NEXT:    por %xmm1, %xmm0
31790; SSSE3-NEXT:    retq
31791;
31792; SSE41-LABEL: ugt_41_v2i64:
31793; SSE41:       # %bb.0:
31794; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31795; SSE41-NEXT:    movdqa %xmm0, %xmm2
31796; SSE41-NEXT:    pand %xmm1, %xmm2
31797; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31798; SSE41-NEXT:    movdqa %xmm3, %xmm4
31799; SSE41-NEXT:    pshufb %xmm2, %xmm4
31800; SSE41-NEXT:    psrlw $4, %xmm0
31801; SSE41-NEXT:    pand %xmm1, %xmm0
31802; SSE41-NEXT:    pshufb %xmm0, %xmm3
31803; SSE41-NEXT:    paddb %xmm4, %xmm3
31804; SSE41-NEXT:    pxor %xmm0, %xmm0
31805; SSE41-NEXT:    psadbw %xmm3, %xmm0
31806; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
31807; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483689,2147483689]
31808; SSE41-NEXT:    movdqa %xmm0, %xmm2
31809; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
31810; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31811; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
31812; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31813; SSE41-NEXT:    pand %xmm3, %xmm1
31814; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31815; SSE41-NEXT:    por %xmm1, %xmm0
31816; SSE41-NEXT:    retq
31817;
31818; AVX1-LABEL: ugt_41_v2i64:
31819; AVX1:       # %bb.0:
31820; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31821; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
31822; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31823; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31824; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
31825; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
31826; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31827; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31828; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31829; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31830; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31831; AVX1-NEXT:    retq
31832;
31833; AVX2-LABEL: ugt_41_v2i64:
31834; AVX2:       # %bb.0:
31835; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31836; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
31837; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31838; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
31839; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
31840; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
31841; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
31842; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
31843; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31844; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31845; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31846; AVX2-NEXT:    retq
31847;
31848; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64:
31849; AVX512VPOPCNTDQ:       # %bb.0:
31850; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31851; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
31852; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31853; AVX512VPOPCNTDQ-NEXT:    vzeroupper
31854; AVX512VPOPCNTDQ-NEXT:    retq
31855;
31856; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64:
31857; AVX512VPOPCNTDQVL:       # %bb.0:
31858; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
31859; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
31860; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31861; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31862; AVX512VPOPCNTDQVL-NEXT:    retq
31863;
31864; BITALG_NOVLX-LABEL: ugt_41_v2i64:
31865; BITALG_NOVLX:       # %bb.0:
31866; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
31867; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
31868; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31869; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31870; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
31871; BITALG_NOVLX-NEXT:    vzeroupper
31872; BITALG_NOVLX-NEXT:    retq
31873;
31874; BITALG-LABEL: ugt_41_v2i64:
31875; BITALG:       # %bb.0:
31876; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
31877; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31878; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
31879; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
31880; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
31881; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31882; BITALG-NEXT:    retq
31883  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31884  %3 = icmp ugt <2 x i64> %2, <i64 41, i64 41>
31885  %4 = sext <2 x i1> %3 to <2 x i64>
31886  ret <2 x i64> %4
31887}
31888
31889define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
31890; SSE2-LABEL: ult_42_v2i64:
31891; SSE2:       # %bb.0:
31892; SSE2-NEXT:    movdqa %xmm0, %xmm1
31893; SSE2-NEXT:    psrlw $1, %xmm1
31894; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31895; SSE2-NEXT:    psubb %xmm1, %xmm0
31896; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31897; SSE2-NEXT:    movdqa %xmm0, %xmm2
31898; SSE2-NEXT:    pand %xmm1, %xmm2
31899; SSE2-NEXT:    psrlw $2, %xmm0
31900; SSE2-NEXT:    pand %xmm1, %xmm0
31901; SSE2-NEXT:    paddb %xmm2, %xmm0
31902; SSE2-NEXT:    movdqa %xmm0, %xmm1
31903; SSE2-NEXT:    psrlw $4, %xmm1
31904; SSE2-NEXT:    paddb %xmm0, %xmm1
31905; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
31906; SSE2-NEXT:    pxor %xmm0, %xmm0
31907; SSE2-NEXT:    psadbw %xmm1, %xmm0
31908; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
31909; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
31910; SSE2-NEXT:    movdqa %xmm1, %xmm2
31911; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
31912; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31913; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
31914; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31915; SSE2-NEXT:    pand %xmm3, %xmm1
31916; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31917; SSE2-NEXT:    por %xmm1, %xmm0
31918; SSE2-NEXT:    retq
31919;
31920; SSE3-LABEL: ult_42_v2i64:
31921; SSE3:       # %bb.0:
31922; SSE3-NEXT:    movdqa %xmm0, %xmm1
31923; SSE3-NEXT:    psrlw $1, %xmm1
31924; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31925; SSE3-NEXT:    psubb %xmm1, %xmm0
31926; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31927; SSE3-NEXT:    movdqa %xmm0, %xmm2
31928; SSE3-NEXT:    pand %xmm1, %xmm2
31929; SSE3-NEXT:    psrlw $2, %xmm0
31930; SSE3-NEXT:    pand %xmm1, %xmm0
31931; SSE3-NEXT:    paddb %xmm2, %xmm0
31932; SSE3-NEXT:    movdqa %xmm0, %xmm1
31933; SSE3-NEXT:    psrlw $4, %xmm1
31934; SSE3-NEXT:    paddb %xmm0, %xmm1
31935; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
31936; SSE3-NEXT:    pxor %xmm0, %xmm0
31937; SSE3-NEXT:    psadbw %xmm1, %xmm0
31938; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
31939; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
31940; SSE3-NEXT:    movdqa %xmm1, %xmm2
31941; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
31942; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31943; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31944; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31945; SSE3-NEXT:    pand %xmm3, %xmm1
31946; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31947; SSE3-NEXT:    por %xmm1, %xmm0
31948; SSE3-NEXT:    retq
31949;
31950; SSSE3-LABEL: ult_42_v2i64:
31951; SSSE3:       # %bb.0:
31952; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31953; SSSE3-NEXT:    movdqa %xmm0, %xmm2
31954; SSSE3-NEXT:    pand %xmm1, %xmm2
31955; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31956; SSSE3-NEXT:    movdqa %xmm3, %xmm4
31957; SSSE3-NEXT:    pshufb %xmm2, %xmm4
31958; SSSE3-NEXT:    psrlw $4, %xmm0
31959; SSSE3-NEXT:    pand %xmm1, %xmm0
31960; SSSE3-NEXT:    pshufb %xmm0, %xmm3
31961; SSSE3-NEXT:    paddb %xmm4, %xmm3
31962; SSSE3-NEXT:    pxor %xmm0, %xmm0
31963; SSSE3-NEXT:    psadbw %xmm3, %xmm0
31964; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
31965; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
31966; SSSE3-NEXT:    movdqa %xmm1, %xmm2
31967; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
31968; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31969; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
31970; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31971; SSSE3-NEXT:    pand %xmm3, %xmm1
31972; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31973; SSSE3-NEXT:    por %xmm1, %xmm0
31974; SSSE3-NEXT:    retq
31975;
31976; SSE41-LABEL: ult_42_v2i64:
31977; SSE41:       # %bb.0:
31978; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31979; SSE41-NEXT:    movdqa %xmm0, %xmm2
31980; SSE41-NEXT:    pand %xmm1, %xmm2
31981; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31982; SSE41-NEXT:    movdqa %xmm3, %xmm4
31983; SSE41-NEXT:    pshufb %xmm2, %xmm4
31984; SSE41-NEXT:    psrlw $4, %xmm0
31985; SSE41-NEXT:    pand %xmm1, %xmm0
31986; SSE41-NEXT:    pshufb %xmm0, %xmm3
31987; SSE41-NEXT:    paddb %xmm4, %xmm3
31988; SSE41-NEXT:    pxor %xmm0, %xmm0
31989; SSE41-NEXT:    psadbw %xmm3, %xmm0
31990; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
31991; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
31992; SSE41-NEXT:    movdqa %xmm1, %xmm2
31993; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
31994; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
31995; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
31996; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31997; SSE41-NEXT:    pand %xmm3, %xmm1
31998; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
31999; SSE41-NEXT:    por %xmm1, %xmm0
32000; SSE41-NEXT:    retq
32001;
32002; AVX1-LABEL: ult_42_v2i64:
32003; AVX1:       # %bb.0:
32004; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32005; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
32006; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32007; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32008; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
32009; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
32010; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32011; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32012; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32013; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32014; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42]
32015; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32016; AVX1-NEXT:    retq
32017;
32018; AVX2-LABEL: ult_42_v2i64:
32019; AVX2:       # %bb.0:
32020; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32021; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
32022; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32023; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32024; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
32025; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
32026; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32027; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32028; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32029; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32030; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42]
32031; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32032; AVX2-NEXT:    retq
32033;
32034; AVX512VPOPCNTDQ-LABEL: ult_42_v2i64:
32035; AVX512VPOPCNTDQ:       # %bb.0:
32036; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32037; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
32038; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42]
32039; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32040; AVX512VPOPCNTDQ-NEXT:    vzeroupper
32041; AVX512VPOPCNTDQ-NEXT:    retq
32042;
32043; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64:
32044; AVX512VPOPCNTDQVL:       # %bb.0:
32045; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
32046; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
32047; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32048; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32049; AVX512VPOPCNTDQVL-NEXT:    retq
32050;
32051; BITALG_NOVLX-LABEL: ult_42_v2i64:
32052; BITALG_NOVLX:       # %bb.0:
32053; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32054; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
32055; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32056; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32057; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42]
32058; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32059; BITALG_NOVLX-NEXT:    vzeroupper
32060; BITALG_NOVLX-NEXT:    retq
32061;
32062; BITALG-LABEL: ult_42_v2i64:
32063; BITALG:       # %bb.0:
32064; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
32065; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32066; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32067; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
32068; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32069; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32070; BITALG-NEXT:    retq
32071  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32072  %3 = icmp ult <2 x i64> %2, <i64 42, i64 42>
32073  %4 = sext <2 x i1> %3 to <2 x i64>
32074  ret <2 x i64> %4
32075}
32076
32077define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
32078; SSE2-LABEL: ugt_42_v2i64:
32079; SSE2:       # %bb.0:
32080; SSE2-NEXT:    movdqa %xmm0, %xmm1
32081; SSE2-NEXT:    psrlw $1, %xmm1
32082; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32083; SSE2-NEXT:    psubb %xmm1, %xmm0
32084; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32085; SSE2-NEXT:    movdqa %xmm0, %xmm2
32086; SSE2-NEXT:    pand %xmm1, %xmm2
32087; SSE2-NEXT:    psrlw $2, %xmm0
32088; SSE2-NEXT:    pand %xmm1, %xmm0
32089; SSE2-NEXT:    paddb %xmm2, %xmm0
32090; SSE2-NEXT:    movdqa %xmm0, %xmm1
32091; SSE2-NEXT:    psrlw $4, %xmm1
32092; SSE2-NEXT:    paddb %xmm0, %xmm1
32093; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32094; SSE2-NEXT:    pxor %xmm0, %xmm0
32095; SSE2-NEXT:    psadbw %xmm1, %xmm0
32096; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
32097; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
32098; SSE2-NEXT:    movdqa %xmm0, %xmm2
32099; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
32100; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32101; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
32102; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32103; SSE2-NEXT:    pand %xmm3, %xmm1
32104; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32105; SSE2-NEXT:    por %xmm1, %xmm0
32106; SSE2-NEXT:    retq
32107;
32108; SSE3-LABEL: ugt_42_v2i64:
32109; SSE3:       # %bb.0:
32110; SSE3-NEXT:    movdqa %xmm0, %xmm1
32111; SSE3-NEXT:    psrlw $1, %xmm1
32112; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32113; SSE3-NEXT:    psubb %xmm1, %xmm0
32114; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32115; SSE3-NEXT:    movdqa %xmm0, %xmm2
32116; SSE3-NEXT:    pand %xmm1, %xmm2
32117; SSE3-NEXT:    psrlw $2, %xmm0
32118; SSE3-NEXT:    pand %xmm1, %xmm0
32119; SSE3-NEXT:    paddb %xmm2, %xmm0
32120; SSE3-NEXT:    movdqa %xmm0, %xmm1
32121; SSE3-NEXT:    psrlw $4, %xmm1
32122; SSE3-NEXT:    paddb %xmm0, %xmm1
32123; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32124; SSE3-NEXT:    pxor %xmm0, %xmm0
32125; SSE3-NEXT:    psadbw %xmm1, %xmm0
32126; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
32127; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
32128; SSE3-NEXT:    movdqa %xmm0, %xmm2
32129; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
32130; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32131; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32132; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32133; SSE3-NEXT:    pand %xmm3, %xmm1
32134; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32135; SSE3-NEXT:    por %xmm1, %xmm0
32136; SSE3-NEXT:    retq
32137;
32138; SSSE3-LABEL: ugt_42_v2i64:
32139; SSSE3:       # %bb.0:
32140; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32141; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32142; SSSE3-NEXT:    pand %xmm1, %xmm2
32143; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32144; SSSE3-NEXT:    movdqa %xmm3, %xmm4
32145; SSSE3-NEXT:    pshufb %xmm2, %xmm4
32146; SSSE3-NEXT:    psrlw $4, %xmm0
32147; SSSE3-NEXT:    pand %xmm1, %xmm0
32148; SSSE3-NEXT:    pshufb %xmm0, %xmm3
32149; SSSE3-NEXT:    paddb %xmm4, %xmm3
32150; SSSE3-NEXT:    pxor %xmm0, %xmm0
32151; SSSE3-NEXT:    psadbw %xmm3, %xmm0
32152; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
32153; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
32154; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32155; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
32156; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32157; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32158; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32159; SSSE3-NEXT:    pand %xmm3, %xmm1
32160; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32161; SSSE3-NEXT:    por %xmm1, %xmm0
32162; SSSE3-NEXT:    retq
32163;
32164; SSE41-LABEL: ugt_42_v2i64:
32165; SSE41:       # %bb.0:
32166; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32167; SSE41-NEXT:    movdqa %xmm0, %xmm2
32168; SSE41-NEXT:    pand %xmm1, %xmm2
32169; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32170; SSE41-NEXT:    movdqa %xmm3, %xmm4
32171; SSE41-NEXT:    pshufb %xmm2, %xmm4
32172; SSE41-NEXT:    psrlw $4, %xmm0
32173; SSE41-NEXT:    pand %xmm1, %xmm0
32174; SSE41-NEXT:    pshufb %xmm0, %xmm3
32175; SSE41-NEXT:    paddb %xmm4, %xmm3
32176; SSE41-NEXT:    pxor %xmm0, %xmm0
32177; SSE41-NEXT:    psadbw %xmm3, %xmm0
32178; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
32179; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483690,2147483690]
32180; SSE41-NEXT:    movdqa %xmm0, %xmm2
32181; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
32182; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32183; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
32184; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32185; SSE41-NEXT:    pand %xmm3, %xmm1
32186; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32187; SSE41-NEXT:    por %xmm1, %xmm0
32188; SSE41-NEXT:    retq
32189;
32190; AVX1-LABEL: ugt_42_v2i64:
32191; AVX1:       # %bb.0:
32192; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32193; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
32194; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32195; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32196; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
32197; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
32198; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32199; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32200; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32201; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32202; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32203; AVX1-NEXT:    retq
32204;
32205; AVX2-LABEL: ugt_42_v2i64:
32206; AVX2:       # %bb.0:
32207; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32208; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
32209; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32210; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32211; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
32212; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
32213; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32214; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32215; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32216; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32217; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32218; AVX2-NEXT:    retq
32219;
32220; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64:
32221; AVX512VPOPCNTDQ:       # %bb.0:
32222; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32223; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
32224; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32225; AVX512VPOPCNTDQ-NEXT:    vzeroupper
32226; AVX512VPOPCNTDQ-NEXT:    retq
32227;
32228; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64:
32229; AVX512VPOPCNTDQVL:       # %bb.0:
32230; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
32231; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
32232; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32233; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32234; AVX512VPOPCNTDQVL-NEXT:    retq
32235;
32236; BITALG_NOVLX-LABEL: ugt_42_v2i64:
32237; BITALG_NOVLX:       # %bb.0:
32238; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32239; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
32240; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32241; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32242; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32243; BITALG_NOVLX-NEXT:    vzeroupper
32244; BITALG_NOVLX-NEXT:    retq
32245;
32246; BITALG-LABEL: ugt_42_v2i64:
32247; BITALG:       # %bb.0:
32248; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
32249; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32250; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32251; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
32252; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32253; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32254; BITALG-NEXT:    retq
32255  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32256  %3 = icmp ugt <2 x i64> %2, <i64 42, i64 42>
32257  %4 = sext <2 x i1> %3 to <2 x i64>
32258  ret <2 x i64> %4
32259}
32260
32261define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
32262; SSE2-LABEL: ult_43_v2i64:
32263; SSE2:       # %bb.0:
32264; SSE2-NEXT:    movdqa %xmm0, %xmm1
32265; SSE2-NEXT:    psrlw $1, %xmm1
32266; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32267; SSE2-NEXT:    psubb %xmm1, %xmm0
32268; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32269; SSE2-NEXT:    movdqa %xmm0, %xmm2
32270; SSE2-NEXT:    pand %xmm1, %xmm2
32271; SSE2-NEXT:    psrlw $2, %xmm0
32272; SSE2-NEXT:    pand %xmm1, %xmm0
32273; SSE2-NEXT:    paddb %xmm2, %xmm0
32274; SSE2-NEXT:    movdqa %xmm0, %xmm1
32275; SSE2-NEXT:    psrlw $4, %xmm1
32276; SSE2-NEXT:    paddb %xmm0, %xmm1
32277; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32278; SSE2-NEXT:    pxor %xmm0, %xmm0
32279; SSE2-NEXT:    psadbw %xmm1, %xmm0
32280; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
32281; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32282; SSE2-NEXT:    movdqa %xmm1, %xmm2
32283; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
32284; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32285; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
32286; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32287; SSE2-NEXT:    pand %xmm3, %xmm1
32288; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32289; SSE2-NEXT:    por %xmm1, %xmm0
32290; SSE2-NEXT:    retq
32291;
32292; SSE3-LABEL: ult_43_v2i64:
32293; SSE3:       # %bb.0:
32294; SSE3-NEXT:    movdqa %xmm0, %xmm1
32295; SSE3-NEXT:    psrlw $1, %xmm1
32296; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32297; SSE3-NEXT:    psubb %xmm1, %xmm0
32298; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32299; SSE3-NEXT:    movdqa %xmm0, %xmm2
32300; SSE3-NEXT:    pand %xmm1, %xmm2
32301; SSE3-NEXT:    psrlw $2, %xmm0
32302; SSE3-NEXT:    pand %xmm1, %xmm0
32303; SSE3-NEXT:    paddb %xmm2, %xmm0
32304; SSE3-NEXT:    movdqa %xmm0, %xmm1
32305; SSE3-NEXT:    psrlw $4, %xmm1
32306; SSE3-NEXT:    paddb %xmm0, %xmm1
32307; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32308; SSE3-NEXT:    pxor %xmm0, %xmm0
32309; SSE3-NEXT:    psadbw %xmm1, %xmm0
32310; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
32311; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32312; SSE3-NEXT:    movdqa %xmm1, %xmm2
32313; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
32314; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32315; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32316; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32317; SSE3-NEXT:    pand %xmm3, %xmm1
32318; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32319; SSE3-NEXT:    por %xmm1, %xmm0
32320; SSE3-NEXT:    retq
32321;
32322; SSSE3-LABEL: ult_43_v2i64:
32323; SSSE3:       # %bb.0:
32324; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32325; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32326; SSSE3-NEXT:    pand %xmm1, %xmm2
32327; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32328; SSSE3-NEXT:    movdqa %xmm3, %xmm4
32329; SSSE3-NEXT:    pshufb %xmm2, %xmm4
32330; SSSE3-NEXT:    psrlw $4, %xmm0
32331; SSSE3-NEXT:    pand %xmm1, %xmm0
32332; SSSE3-NEXT:    pshufb %xmm0, %xmm3
32333; SSSE3-NEXT:    paddb %xmm4, %xmm3
32334; SSSE3-NEXT:    pxor %xmm0, %xmm0
32335; SSSE3-NEXT:    psadbw %xmm3, %xmm0
32336; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
32337; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32338; SSSE3-NEXT:    movdqa %xmm1, %xmm2
32339; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
32340; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32341; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32342; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32343; SSSE3-NEXT:    pand %xmm3, %xmm1
32344; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32345; SSSE3-NEXT:    por %xmm1, %xmm0
32346; SSSE3-NEXT:    retq
32347;
32348; SSE41-LABEL: ult_43_v2i64:
32349; SSE41:       # %bb.0:
32350; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32351; SSE41-NEXT:    movdqa %xmm0, %xmm2
32352; SSE41-NEXT:    pand %xmm1, %xmm2
32353; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32354; SSE41-NEXT:    movdqa %xmm3, %xmm4
32355; SSE41-NEXT:    pshufb %xmm2, %xmm4
32356; SSE41-NEXT:    psrlw $4, %xmm0
32357; SSE41-NEXT:    pand %xmm1, %xmm0
32358; SSE41-NEXT:    pshufb %xmm0, %xmm3
32359; SSE41-NEXT:    paddb %xmm4, %xmm3
32360; SSE41-NEXT:    pxor %xmm0, %xmm0
32361; SSE41-NEXT:    psadbw %xmm3, %xmm0
32362; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
32363; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32364; SSE41-NEXT:    movdqa %xmm1, %xmm2
32365; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
32366; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32367; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
32368; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32369; SSE41-NEXT:    pand %xmm3, %xmm1
32370; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32371; SSE41-NEXT:    por %xmm1, %xmm0
32372; SSE41-NEXT:    retq
32373;
32374; AVX1-LABEL: ult_43_v2i64:
32375; AVX1:       # %bb.0:
32376; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32377; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
32378; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32379; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32380; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
32381; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
32382; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32383; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32384; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32385; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32386; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [43,43]
32387; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32388; AVX1-NEXT:    retq
32389;
32390; AVX2-LABEL: ult_43_v2i64:
32391; AVX2:       # %bb.0:
32392; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32393; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
32394; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32395; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32396; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
32397; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
32398; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32399; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32400; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32401; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32402; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [43,43]
32403; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32404; AVX2-NEXT:    retq
32405;
32406; AVX512VPOPCNTDQ-LABEL: ult_43_v2i64:
32407; AVX512VPOPCNTDQ:       # %bb.0:
32408; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32409; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
32410; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [43,43]
32411; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32412; AVX512VPOPCNTDQ-NEXT:    vzeroupper
32413; AVX512VPOPCNTDQ-NEXT:    retq
32414;
32415; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64:
32416; AVX512VPOPCNTDQVL:       # %bb.0:
32417; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
32418; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
32419; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32420; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32421; AVX512VPOPCNTDQVL-NEXT:    retq
32422;
32423; BITALG_NOVLX-LABEL: ult_43_v2i64:
32424; BITALG_NOVLX:       # %bb.0:
32425; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32426; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
32427; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32428; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32429; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [43,43]
32430; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32431; BITALG_NOVLX-NEXT:    vzeroupper
32432; BITALG_NOVLX-NEXT:    retq
32433;
32434; BITALG-LABEL: ult_43_v2i64:
32435; BITALG:       # %bb.0:
32436; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
32437; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32438; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32439; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
32440; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32441; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32442; BITALG-NEXT:    retq
32443  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32444  %3 = icmp ult <2 x i64> %2, <i64 43, i64 43>
32445  %4 = sext <2 x i1> %3 to <2 x i64>
32446  ret <2 x i64> %4
32447}
32448
32449define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
32450; SSE2-LABEL: ugt_43_v2i64:
32451; SSE2:       # %bb.0:
32452; SSE2-NEXT:    movdqa %xmm0, %xmm1
32453; SSE2-NEXT:    psrlw $1, %xmm1
32454; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32455; SSE2-NEXT:    psubb %xmm1, %xmm0
32456; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32457; SSE2-NEXT:    movdqa %xmm0, %xmm2
32458; SSE2-NEXT:    pand %xmm1, %xmm2
32459; SSE2-NEXT:    psrlw $2, %xmm0
32460; SSE2-NEXT:    pand %xmm1, %xmm0
32461; SSE2-NEXT:    paddb %xmm2, %xmm0
32462; SSE2-NEXT:    movdqa %xmm0, %xmm1
32463; SSE2-NEXT:    psrlw $4, %xmm1
32464; SSE2-NEXT:    paddb %xmm0, %xmm1
32465; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32466; SSE2-NEXT:    pxor %xmm0, %xmm0
32467; SSE2-NEXT:    psadbw %xmm1, %xmm0
32468; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
32469; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32470; SSE2-NEXT:    movdqa %xmm0, %xmm2
32471; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
32472; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32473; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
32474; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32475; SSE2-NEXT:    pand %xmm3, %xmm1
32476; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32477; SSE2-NEXT:    por %xmm1, %xmm0
32478; SSE2-NEXT:    retq
32479;
32480; SSE3-LABEL: ugt_43_v2i64:
32481; SSE3:       # %bb.0:
32482; SSE3-NEXT:    movdqa %xmm0, %xmm1
32483; SSE3-NEXT:    psrlw $1, %xmm1
32484; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32485; SSE3-NEXT:    psubb %xmm1, %xmm0
32486; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32487; SSE3-NEXT:    movdqa %xmm0, %xmm2
32488; SSE3-NEXT:    pand %xmm1, %xmm2
32489; SSE3-NEXT:    psrlw $2, %xmm0
32490; SSE3-NEXT:    pand %xmm1, %xmm0
32491; SSE3-NEXT:    paddb %xmm2, %xmm0
32492; SSE3-NEXT:    movdqa %xmm0, %xmm1
32493; SSE3-NEXT:    psrlw $4, %xmm1
32494; SSE3-NEXT:    paddb %xmm0, %xmm1
32495; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32496; SSE3-NEXT:    pxor %xmm0, %xmm0
32497; SSE3-NEXT:    psadbw %xmm1, %xmm0
32498; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
32499; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32500; SSE3-NEXT:    movdqa %xmm0, %xmm2
32501; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
32502; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32503; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32504; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32505; SSE3-NEXT:    pand %xmm3, %xmm1
32506; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32507; SSE3-NEXT:    por %xmm1, %xmm0
32508; SSE3-NEXT:    retq
32509;
32510; SSSE3-LABEL: ugt_43_v2i64:
32511; SSSE3:       # %bb.0:
32512; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32513; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32514; SSSE3-NEXT:    pand %xmm1, %xmm2
32515; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32516; SSSE3-NEXT:    movdqa %xmm3, %xmm4
32517; SSSE3-NEXT:    pshufb %xmm2, %xmm4
32518; SSSE3-NEXT:    psrlw $4, %xmm0
32519; SSSE3-NEXT:    pand %xmm1, %xmm0
32520; SSSE3-NEXT:    pshufb %xmm0, %xmm3
32521; SSSE3-NEXT:    paddb %xmm4, %xmm3
32522; SSSE3-NEXT:    pxor %xmm0, %xmm0
32523; SSSE3-NEXT:    psadbw %xmm3, %xmm0
32524; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
32525; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32526; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32527; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
32528; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32529; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32530; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32531; SSSE3-NEXT:    pand %xmm3, %xmm1
32532; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32533; SSSE3-NEXT:    por %xmm1, %xmm0
32534; SSSE3-NEXT:    retq
32535;
32536; SSE41-LABEL: ugt_43_v2i64:
32537; SSE41:       # %bb.0:
32538; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32539; SSE41-NEXT:    movdqa %xmm0, %xmm2
32540; SSE41-NEXT:    pand %xmm1, %xmm2
32541; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32542; SSE41-NEXT:    movdqa %xmm3, %xmm4
32543; SSE41-NEXT:    pshufb %xmm2, %xmm4
32544; SSE41-NEXT:    psrlw $4, %xmm0
32545; SSE41-NEXT:    pand %xmm1, %xmm0
32546; SSE41-NEXT:    pshufb %xmm0, %xmm3
32547; SSE41-NEXT:    paddb %xmm4, %xmm3
32548; SSE41-NEXT:    pxor %xmm0, %xmm0
32549; SSE41-NEXT:    psadbw %xmm3, %xmm0
32550; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
32551; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483691,2147483691]
32552; SSE41-NEXT:    movdqa %xmm0, %xmm2
32553; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
32554; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32555; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
32556; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32557; SSE41-NEXT:    pand %xmm3, %xmm1
32558; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32559; SSE41-NEXT:    por %xmm1, %xmm0
32560; SSE41-NEXT:    retq
32561;
32562; AVX1-LABEL: ugt_43_v2i64:
32563; AVX1:       # %bb.0:
32564; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32565; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
32566; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32567; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32568; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
32569; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
32570; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32571; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32572; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32573; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32574; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32575; AVX1-NEXT:    retq
32576;
32577; AVX2-LABEL: ugt_43_v2i64:
32578; AVX2:       # %bb.0:
32579; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32580; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
32581; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32582; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32583; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
32584; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
32585; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32586; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32587; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32588; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32589; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32590; AVX2-NEXT:    retq
32591;
32592; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64:
32593; AVX512VPOPCNTDQ:       # %bb.0:
32594; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32595; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
32596; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32597; AVX512VPOPCNTDQ-NEXT:    vzeroupper
32598; AVX512VPOPCNTDQ-NEXT:    retq
32599;
32600; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64:
32601; AVX512VPOPCNTDQVL:       # %bb.0:
32602; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
32603; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
32604; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32605; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32606; AVX512VPOPCNTDQVL-NEXT:    retq
32607;
32608; BITALG_NOVLX-LABEL: ugt_43_v2i64:
32609; BITALG_NOVLX:       # %bb.0:
32610; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32611; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
32612; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32613; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32614; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32615; BITALG_NOVLX-NEXT:    vzeroupper
32616; BITALG_NOVLX-NEXT:    retq
32617;
32618; BITALG-LABEL: ugt_43_v2i64:
32619; BITALG:       # %bb.0:
32620; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
32621; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32622; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32623; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
32624; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32625; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32626; BITALG-NEXT:    retq
32627  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32628  %3 = icmp ugt <2 x i64> %2, <i64 43, i64 43>
32629  %4 = sext <2 x i1> %3 to <2 x i64>
32630  ret <2 x i64> %4
32631}
32632
32633define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
32634; SSE2-LABEL: ult_44_v2i64:
32635; SSE2:       # %bb.0:
32636; SSE2-NEXT:    movdqa %xmm0, %xmm1
32637; SSE2-NEXT:    psrlw $1, %xmm1
32638; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32639; SSE2-NEXT:    psubb %xmm1, %xmm0
32640; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32641; SSE2-NEXT:    movdqa %xmm0, %xmm2
32642; SSE2-NEXT:    pand %xmm1, %xmm2
32643; SSE2-NEXT:    psrlw $2, %xmm0
32644; SSE2-NEXT:    pand %xmm1, %xmm0
32645; SSE2-NEXT:    paddb %xmm2, %xmm0
32646; SSE2-NEXT:    movdqa %xmm0, %xmm1
32647; SSE2-NEXT:    psrlw $4, %xmm1
32648; SSE2-NEXT:    paddb %xmm0, %xmm1
32649; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32650; SSE2-NEXT:    pxor %xmm0, %xmm0
32651; SSE2-NEXT:    psadbw %xmm1, %xmm0
32652; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
32653; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32654; SSE2-NEXT:    movdqa %xmm1, %xmm2
32655; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
32656; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32657; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
32658; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32659; SSE2-NEXT:    pand %xmm3, %xmm1
32660; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32661; SSE2-NEXT:    por %xmm1, %xmm0
32662; SSE2-NEXT:    retq
32663;
32664; SSE3-LABEL: ult_44_v2i64:
32665; SSE3:       # %bb.0:
32666; SSE3-NEXT:    movdqa %xmm0, %xmm1
32667; SSE3-NEXT:    psrlw $1, %xmm1
32668; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32669; SSE3-NEXT:    psubb %xmm1, %xmm0
32670; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32671; SSE3-NEXT:    movdqa %xmm0, %xmm2
32672; SSE3-NEXT:    pand %xmm1, %xmm2
32673; SSE3-NEXT:    psrlw $2, %xmm0
32674; SSE3-NEXT:    pand %xmm1, %xmm0
32675; SSE3-NEXT:    paddb %xmm2, %xmm0
32676; SSE3-NEXT:    movdqa %xmm0, %xmm1
32677; SSE3-NEXT:    psrlw $4, %xmm1
32678; SSE3-NEXT:    paddb %xmm0, %xmm1
32679; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32680; SSE3-NEXT:    pxor %xmm0, %xmm0
32681; SSE3-NEXT:    psadbw %xmm1, %xmm0
32682; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
32683; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32684; SSE3-NEXT:    movdqa %xmm1, %xmm2
32685; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
32686; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32687; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32688; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32689; SSE3-NEXT:    pand %xmm3, %xmm1
32690; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32691; SSE3-NEXT:    por %xmm1, %xmm0
32692; SSE3-NEXT:    retq
32693;
32694; SSSE3-LABEL: ult_44_v2i64:
32695; SSSE3:       # %bb.0:
32696; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32697; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32698; SSSE3-NEXT:    pand %xmm1, %xmm2
32699; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32700; SSSE3-NEXT:    movdqa %xmm3, %xmm4
32701; SSSE3-NEXT:    pshufb %xmm2, %xmm4
32702; SSSE3-NEXT:    psrlw $4, %xmm0
32703; SSSE3-NEXT:    pand %xmm1, %xmm0
32704; SSSE3-NEXT:    pshufb %xmm0, %xmm3
32705; SSSE3-NEXT:    paddb %xmm4, %xmm3
32706; SSSE3-NEXT:    pxor %xmm0, %xmm0
32707; SSSE3-NEXT:    psadbw %xmm3, %xmm0
32708; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
32709; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32710; SSSE3-NEXT:    movdqa %xmm1, %xmm2
32711; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
32712; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32713; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32714; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32715; SSSE3-NEXT:    pand %xmm3, %xmm1
32716; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32717; SSSE3-NEXT:    por %xmm1, %xmm0
32718; SSSE3-NEXT:    retq
32719;
32720; SSE41-LABEL: ult_44_v2i64:
32721; SSE41:       # %bb.0:
32722; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32723; SSE41-NEXT:    movdqa %xmm0, %xmm2
32724; SSE41-NEXT:    pand %xmm1, %xmm2
32725; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32726; SSE41-NEXT:    movdqa %xmm3, %xmm4
32727; SSE41-NEXT:    pshufb %xmm2, %xmm4
32728; SSE41-NEXT:    psrlw $4, %xmm0
32729; SSE41-NEXT:    pand %xmm1, %xmm0
32730; SSE41-NEXT:    pshufb %xmm0, %xmm3
32731; SSE41-NEXT:    paddb %xmm4, %xmm3
32732; SSE41-NEXT:    pxor %xmm0, %xmm0
32733; SSE41-NEXT:    psadbw %xmm3, %xmm0
32734; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
32735; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32736; SSE41-NEXT:    movdqa %xmm1, %xmm2
32737; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
32738; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32739; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
32740; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32741; SSE41-NEXT:    pand %xmm3, %xmm1
32742; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32743; SSE41-NEXT:    por %xmm1, %xmm0
32744; SSE41-NEXT:    retq
32745;
32746; AVX1-LABEL: ult_44_v2i64:
32747; AVX1:       # %bb.0:
32748; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32749; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
32750; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32751; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32752; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
32753; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
32754; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32755; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32756; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32757; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32758; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [44,44]
32759; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32760; AVX1-NEXT:    retq
32761;
32762; AVX2-LABEL: ult_44_v2i64:
32763; AVX2:       # %bb.0:
32764; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32765; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
32766; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32767; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32768; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
32769; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
32770; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32771; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32772; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32773; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32774; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [44,44]
32775; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32776; AVX2-NEXT:    retq
32777;
32778; AVX512VPOPCNTDQ-LABEL: ult_44_v2i64:
32779; AVX512VPOPCNTDQ:       # %bb.0:
32780; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32781; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
32782; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [44,44]
32783; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32784; AVX512VPOPCNTDQ-NEXT:    vzeroupper
32785; AVX512VPOPCNTDQ-NEXT:    retq
32786;
32787; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64:
32788; AVX512VPOPCNTDQVL:       # %bb.0:
32789; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
32790; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
32791; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32792; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32793; AVX512VPOPCNTDQVL-NEXT:    retq
32794;
32795; BITALG_NOVLX-LABEL: ult_44_v2i64:
32796; BITALG_NOVLX:       # %bb.0:
32797; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32798; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
32799; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32800; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32801; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [44,44]
32802; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
32803; BITALG_NOVLX-NEXT:    vzeroupper
32804; BITALG_NOVLX-NEXT:    retq
32805;
32806; BITALG-LABEL: ult_44_v2i64:
32807; BITALG:       # %bb.0:
32808; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
32809; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32810; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32811; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
32812; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32813; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32814; BITALG-NEXT:    retq
32815  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32816  %3 = icmp ult <2 x i64> %2, <i64 44, i64 44>
32817  %4 = sext <2 x i1> %3 to <2 x i64>
32818  ret <2 x i64> %4
32819}
32820
32821define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
32822; SSE2-LABEL: ugt_44_v2i64:
32823; SSE2:       # %bb.0:
32824; SSE2-NEXT:    movdqa %xmm0, %xmm1
32825; SSE2-NEXT:    psrlw $1, %xmm1
32826; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32827; SSE2-NEXT:    psubb %xmm1, %xmm0
32828; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32829; SSE2-NEXT:    movdqa %xmm0, %xmm2
32830; SSE2-NEXT:    pand %xmm1, %xmm2
32831; SSE2-NEXT:    psrlw $2, %xmm0
32832; SSE2-NEXT:    pand %xmm1, %xmm0
32833; SSE2-NEXT:    paddb %xmm2, %xmm0
32834; SSE2-NEXT:    movdqa %xmm0, %xmm1
32835; SSE2-NEXT:    psrlw $4, %xmm1
32836; SSE2-NEXT:    paddb %xmm0, %xmm1
32837; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
32838; SSE2-NEXT:    pxor %xmm0, %xmm0
32839; SSE2-NEXT:    psadbw %xmm1, %xmm0
32840; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
32841; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32842; SSE2-NEXT:    movdqa %xmm0, %xmm2
32843; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
32844; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32845; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
32846; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32847; SSE2-NEXT:    pand %xmm3, %xmm1
32848; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32849; SSE2-NEXT:    por %xmm1, %xmm0
32850; SSE2-NEXT:    retq
32851;
32852; SSE3-LABEL: ugt_44_v2i64:
32853; SSE3:       # %bb.0:
32854; SSE3-NEXT:    movdqa %xmm0, %xmm1
32855; SSE3-NEXT:    psrlw $1, %xmm1
32856; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32857; SSE3-NEXT:    psubb %xmm1, %xmm0
32858; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32859; SSE3-NEXT:    movdqa %xmm0, %xmm2
32860; SSE3-NEXT:    pand %xmm1, %xmm2
32861; SSE3-NEXT:    psrlw $2, %xmm0
32862; SSE3-NEXT:    pand %xmm1, %xmm0
32863; SSE3-NEXT:    paddb %xmm2, %xmm0
32864; SSE3-NEXT:    movdqa %xmm0, %xmm1
32865; SSE3-NEXT:    psrlw $4, %xmm1
32866; SSE3-NEXT:    paddb %xmm0, %xmm1
32867; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
32868; SSE3-NEXT:    pxor %xmm0, %xmm0
32869; SSE3-NEXT:    psadbw %xmm1, %xmm0
32870; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
32871; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32872; SSE3-NEXT:    movdqa %xmm0, %xmm2
32873; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
32874; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32875; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32876; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32877; SSE3-NEXT:    pand %xmm3, %xmm1
32878; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32879; SSE3-NEXT:    por %xmm1, %xmm0
32880; SSE3-NEXT:    retq
32881;
32882; SSSE3-LABEL: ugt_44_v2i64:
32883; SSSE3:       # %bb.0:
32884; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32885; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32886; SSSE3-NEXT:    pand %xmm1, %xmm2
32887; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32888; SSSE3-NEXT:    movdqa %xmm3, %xmm4
32889; SSSE3-NEXT:    pshufb %xmm2, %xmm4
32890; SSSE3-NEXT:    psrlw $4, %xmm0
32891; SSSE3-NEXT:    pand %xmm1, %xmm0
32892; SSSE3-NEXT:    pshufb %xmm0, %xmm3
32893; SSSE3-NEXT:    paddb %xmm4, %xmm3
32894; SSSE3-NEXT:    pxor %xmm0, %xmm0
32895; SSSE3-NEXT:    psadbw %xmm3, %xmm0
32896; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
32897; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32898; SSSE3-NEXT:    movdqa %xmm0, %xmm2
32899; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
32900; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32901; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
32902; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32903; SSSE3-NEXT:    pand %xmm3, %xmm1
32904; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32905; SSSE3-NEXT:    por %xmm1, %xmm0
32906; SSSE3-NEXT:    retq
32907;
32908; SSE41-LABEL: ugt_44_v2i64:
32909; SSE41:       # %bb.0:
32910; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32911; SSE41-NEXT:    movdqa %xmm0, %xmm2
32912; SSE41-NEXT:    pand %xmm1, %xmm2
32913; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32914; SSE41-NEXT:    movdqa %xmm3, %xmm4
32915; SSE41-NEXT:    pshufb %xmm2, %xmm4
32916; SSE41-NEXT:    psrlw $4, %xmm0
32917; SSE41-NEXT:    pand %xmm1, %xmm0
32918; SSE41-NEXT:    pshufb %xmm0, %xmm3
32919; SSE41-NEXT:    paddb %xmm4, %xmm3
32920; SSE41-NEXT:    pxor %xmm0, %xmm0
32921; SSE41-NEXT:    psadbw %xmm3, %xmm0
32922; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
32923; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483692,2147483692]
32924; SSE41-NEXT:    movdqa %xmm0, %xmm2
32925; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
32926; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
32927; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
32928; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
32929; SSE41-NEXT:    pand %xmm3, %xmm1
32930; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
32931; SSE41-NEXT:    por %xmm1, %xmm0
32932; SSE41-NEXT:    retq
32933;
32934; AVX1-LABEL: ugt_44_v2i64:
32935; AVX1:       # %bb.0:
32936; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32937; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
32938; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32939; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32940; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
32941; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
32942; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32943; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32944; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32945; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32946; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32947; AVX1-NEXT:    retq
32948;
32949; AVX2-LABEL: ugt_44_v2i64:
32950; AVX2:       # %bb.0:
32951; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32952; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
32953; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32954; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
32955; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
32956; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
32957; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
32958; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
32959; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32960; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32961; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32962; AVX2-NEXT:    retq
32963;
32964; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64:
32965; AVX512VPOPCNTDQ:       # %bb.0:
32966; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32967; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
32968; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32969; AVX512VPOPCNTDQ-NEXT:    vzeroupper
32970; AVX512VPOPCNTDQ-NEXT:    retq
32971;
32972; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64:
32973; AVX512VPOPCNTDQVL:       # %bb.0:
32974; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
32975; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
32976; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32977; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32978; AVX512VPOPCNTDQVL-NEXT:    retq
32979;
32980; BITALG_NOVLX-LABEL: ugt_44_v2i64:
32981; BITALG_NOVLX:       # %bb.0:
32982; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
32983; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
32984; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32985; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32986; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
32987; BITALG_NOVLX-NEXT:    vzeroupper
32988; BITALG_NOVLX-NEXT:    retq
32989;
32990; BITALG-LABEL: ugt_44_v2i64:
32991; BITALG:       # %bb.0:
32992; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
32993; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32994; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
32995; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
32996; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
32997; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32998; BITALG-NEXT:    retq
32999  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33000  %3 = icmp ugt <2 x i64> %2, <i64 44, i64 44>
33001  %4 = sext <2 x i1> %3 to <2 x i64>
33002  ret <2 x i64> %4
33003}
33004
33005define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
33006; SSE2-LABEL: ult_45_v2i64:
33007; SSE2:       # %bb.0:
33008; SSE2-NEXT:    movdqa %xmm0, %xmm1
33009; SSE2-NEXT:    psrlw $1, %xmm1
33010; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33011; SSE2-NEXT:    psubb %xmm1, %xmm0
33012; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33013; SSE2-NEXT:    movdqa %xmm0, %xmm2
33014; SSE2-NEXT:    pand %xmm1, %xmm2
33015; SSE2-NEXT:    psrlw $2, %xmm0
33016; SSE2-NEXT:    pand %xmm1, %xmm0
33017; SSE2-NEXT:    paddb %xmm2, %xmm0
33018; SSE2-NEXT:    movdqa %xmm0, %xmm1
33019; SSE2-NEXT:    psrlw $4, %xmm1
33020; SSE2-NEXT:    paddb %xmm0, %xmm1
33021; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33022; SSE2-NEXT:    pxor %xmm0, %xmm0
33023; SSE2-NEXT:    psadbw %xmm1, %xmm0
33024; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
33025; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33026; SSE2-NEXT:    movdqa %xmm1, %xmm2
33027; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
33028; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33029; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
33030; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33031; SSE2-NEXT:    pand %xmm3, %xmm1
33032; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33033; SSE2-NEXT:    por %xmm1, %xmm0
33034; SSE2-NEXT:    retq
33035;
33036; SSE3-LABEL: ult_45_v2i64:
33037; SSE3:       # %bb.0:
33038; SSE3-NEXT:    movdqa %xmm0, %xmm1
33039; SSE3-NEXT:    psrlw $1, %xmm1
33040; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33041; SSE3-NEXT:    psubb %xmm1, %xmm0
33042; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33043; SSE3-NEXT:    movdqa %xmm0, %xmm2
33044; SSE3-NEXT:    pand %xmm1, %xmm2
33045; SSE3-NEXT:    psrlw $2, %xmm0
33046; SSE3-NEXT:    pand %xmm1, %xmm0
33047; SSE3-NEXT:    paddb %xmm2, %xmm0
33048; SSE3-NEXT:    movdqa %xmm0, %xmm1
33049; SSE3-NEXT:    psrlw $4, %xmm1
33050; SSE3-NEXT:    paddb %xmm0, %xmm1
33051; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33052; SSE3-NEXT:    pxor %xmm0, %xmm0
33053; SSE3-NEXT:    psadbw %xmm1, %xmm0
33054; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
33055; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33056; SSE3-NEXT:    movdqa %xmm1, %xmm2
33057; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
33058; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33059; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33060; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33061; SSE3-NEXT:    pand %xmm3, %xmm1
33062; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33063; SSE3-NEXT:    por %xmm1, %xmm0
33064; SSE3-NEXT:    retq
33065;
33066; SSSE3-LABEL: ult_45_v2i64:
33067; SSSE3:       # %bb.0:
33068; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33069; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33070; SSSE3-NEXT:    pand %xmm1, %xmm2
33071; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33072; SSSE3-NEXT:    movdqa %xmm3, %xmm4
33073; SSSE3-NEXT:    pshufb %xmm2, %xmm4
33074; SSSE3-NEXT:    psrlw $4, %xmm0
33075; SSSE3-NEXT:    pand %xmm1, %xmm0
33076; SSSE3-NEXT:    pshufb %xmm0, %xmm3
33077; SSSE3-NEXT:    paddb %xmm4, %xmm3
33078; SSSE3-NEXT:    pxor %xmm0, %xmm0
33079; SSSE3-NEXT:    psadbw %xmm3, %xmm0
33080; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
33081; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33082; SSSE3-NEXT:    movdqa %xmm1, %xmm2
33083; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
33084; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33085; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33086; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33087; SSSE3-NEXT:    pand %xmm3, %xmm1
33088; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33089; SSSE3-NEXT:    por %xmm1, %xmm0
33090; SSSE3-NEXT:    retq
33091;
33092; SSE41-LABEL: ult_45_v2i64:
33093; SSE41:       # %bb.0:
33094; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33095; SSE41-NEXT:    movdqa %xmm0, %xmm2
33096; SSE41-NEXT:    pand %xmm1, %xmm2
33097; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33098; SSE41-NEXT:    movdqa %xmm3, %xmm4
33099; SSE41-NEXT:    pshufb %xmm2, %xmm4
33100; SSE41-NEXT:    psrlw $4, %xmm0
33101; SSE41-NEXT:    pand %xmm1, %xmm0
33102; SSE41-NEXT:    pshufb %xmm0, %xmm3
33103; SSE41-NEXT:    paddb %xmm4, %xmm3
33104; SSE41-NEXT:    pxor %xmm0, %xmm0
33105; SSE41-NEXT:    psadbw %xmm3, %xmm0
33106; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
33107; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33108; SSE41-NEXT:    movdqa %xmm1, %xmm2
33109; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
33110; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33111; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
33112; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33113; SSE41-NEXT:    pand %xmm3, %xmm1
33114; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33115; SSE41-NEXT:    por %xmm1, %xmm0
33116; SSE41-NEXT:    retq
33117;
33118; AVX1-LABEL: ult_45_v2i64:
33119; AVX1:       # %bb.0:
33120; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33121; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
33122; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33123; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33124; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
33125; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
33126; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33127; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33128; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33129; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33130; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [45,45]
33131; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33132; AVX1-NEXT:    retq
33133;
33134; AVX2-LABEL: ult_45_v2i64:
33135; AVX2:       # %bb.0:
33136; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33137; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
33138; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33139; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33140; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
33141; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
33142; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33143; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33144; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33145; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33146; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [45,45]
33147; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33148; AVX2-NEXT:    retq
33149;
33150; AVX512VPOPCNTDQ-LABEL: ult_45_v2i64:
33151; AVX512VPOPCNTDQ:       # %bb.0:
33152; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33153; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
33154; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [45,45]
33155; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33156; AVX512VPOPCNTDQ-NEXT:    vzeroupper
33157; AVX512VPOPCNTDQ-NEXT:    retq
33158;
33159; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64:
33160; AVX512VPOPCNTDQVL:       # %bb.0:
33161; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
33162; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
33163; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33164; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33165; AVX512VPOPCNTDQVL-NEXT:    retq
33166;
33167; BITALG_NOVLX-LABEL: ult_45_v2i64:
33168; BITALG_NOVLX:       # %bb.0:
33169; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33170; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
33171; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33172; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33173; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [45,45]
33174; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33175; BITALG_NOVLX-NEXT:    vzeroupper
33176; BITALG_NOVLX-NEXT:    retq
33177;
33178; BITALG-LABEL: ult_45_v2i64:
33179; BITALG:       # %bb.0:
33180; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
33181; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33182; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33183; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
33184; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33185; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33186; BITALG-NEXT:    retq
33187  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33188  %3 = icmp ult <2 x i64> %2, <i64 45, i64 45>
33189  %4 = sext <2 x i1> %3 to <2 x i64>
33190  ret <2 x i64> %4
33191}
33192
33193define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
33194; SSE2-LABEL: ugt_45_v2i64:
33195; SSE2:       # %bb.0:
33196; SSE2-NEXT:    movdqa %xmm0, %xmm1
33197; SSE2-NEXT:    psrlw $1, %xmm1
33198; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33199; SSE2-NEXT:    psubb %xmm1, %xmm0
33200; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33201; SSE2-NEXT:    movdqa %xmm0, %xmm2
33202; SSE2-NEXT:    pand %xmm1, %xmm2
33203; SSE2-NEXT:    psrlw $2, %xmm0
33204; SSE2-NEXT:    pand %xmm1, %xmm0
33205; SSE2-NEXT:    paddb %xmm2, %xmm0
33206; SSE2-NEXT:    movdqa %xmm0, %xmm1
33207; SSE2-NEXT:    psrlw $4, %xmm1
33208; SSE2-NEXT:    paddb %xmm0, %xmm1
33209; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33210; SSE2-NEXT:    pxor %xmm0, %xmm0
33211; SSE2-NEXT:    psadbw %xmm1, %xmm0
33212; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
33213; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33214; SSE2-NEXT:    movdqa %xmm0, %xmm2
33215; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
33216; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33217; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
33218; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33219; SSE2-NEXT:    pand %xmm3, %xmm1
33220; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33221; SSE2-NEXT:    por %xmm1, %xmm0
33222; SSE2-NEXT:    retq
33223;
33224; SSE3-LABEL: ugt_45_v2i64:
33225; SSE3:       # %bb.0:
33226; SSE3-NEXT:    movdqa %xmm0, %xmm1
33227; SSE3-NEXT:    psrlw $1, %xmm1
33228; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33229; SSE3-NEXT:    psubb %xmm1, %xmm0
33230; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33231; SSE3-NEXT:    movdqa %xmm0, %xmm2
33232; SSE3-NEXT:    pand %xmm1, %xmm2
33233; SSE3-NEXT:    psrlw $2, %xmm0
33234; SSE3-NEXT:    pand %xmm1, %xmm0
33235; SSE3-NEXT:    paddb %xmm2, %xmm0
33236; SSE3-NEXT:    movdqa %xmm0, %xmm1
33237; SSE3-NEXT:    psrlw $4, %xmm1
33238; SSE3-NEXT:    paddb %xmm0, %xmm1
33239; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33240; SSE3-NEXT:    pxor %xmm0, %xmm0
33241; SSE3-NEXT:    psadbw %xmm1, %xmm0
33242; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
33243; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33244; SSE3-NEXT:    movdqa %xmm0, %xmm2
33245; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
33246; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33247; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33248; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33249; SSE3-NEXT:    pand %xmm3, %xmm1
33250; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33251; SSE3-NEXT:    por %xmm1, %xmm0
33252; SSE3-NEXT:    retq
33253;
33254; SSSE3-LABEL: ugt_45_v2i64:
33255; SSSE3:       # %bb.0:
33256; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33257; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33258; SSSE3-NEXT:    pand %xmm1, %xmm2
33259; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33260; SSSE3-NEXT:    movdqa %xmm3, %xmm4
33261; SSSE3-NEXT:    pshufb %xmm2, %xmm4
33262; SSSE3-NEXT:    psrlw $4, %xmm0
33263; SSSE3-NEXT:    pand %xmm1, %xmm0
33264; SSSE3-NEXT:    pshufb %xmm0, %xmm3
33265; SSSE3-NEXT:    paddb %xmm4, %xmm3
33266; SSSE3-NEXT:    pxor %xmm0, %xmm0
33267; SSSE3-NEXT:    psadbw %xmm3, %xmm0
33268; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
33269; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33270; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33271; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
33272; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33273; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33274; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33275; SSSE3-NEXT:    pand %xmm3, %xmm1
33276; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33277; SSSE3-NEXT:    por %xmm1, %xmm0
33278; SSSE3-NEXT:    retq
33279;
33280; SSE41-LABEL: ugt_45_v2i64:
33281; SSE41:       # %bb.0:
33282; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33283; SSE41-NEXT:    movdqa %xmm0, %xmm2
33284; SSE41-NEXT:    pand %xmm1, %xmm2
33285; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33286; SSE41-NEXT:    movdqa %xmm3, %xmm4
33287; SSE41-NEXT:    pshufb %xmm2, %xmm4
33288; SSE41-NEXT:    psrlw $4, %xmm0
33289; SSE41-NEXT:    pand %xmm1, %xmm0
33290; SSE41-NEXT:    pshufb %xmm0, %xmm3
33291; SSE41-NEXT:    paddb %xmm4, %xmm3
33292; SSE41-NEXT:    pxor %xmm0, %xmm0
33293; SSE41-NEXT:    psadbw %xmm3, %xmm0
33294; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
33295; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483693,2147483693]
33296; SSE41-NEXT:    movdqa %xmm0, %xmm2
33297; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
33298; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33299; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
33300; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33301; SSE41-NEXT:    pand %xmm3, %xmm1
33302; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33303; SSE41-NEXT:    por %xmm1, %xmm0
33304; SSE41-NEXT:    retq
33305;
33306; AVX1-LABEL: ugt_45_v2i64:
33307; AVX1:       # %bb.0:
33308; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33309; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
33310; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33311; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33312; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
33313; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
33314; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33315; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33316; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33317; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33318; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33319; AVX1-NEXT:    retq
33320;
33321; AVX2-LABEL: ugt_45_v2i64:
33322; AVX2:       # %bb.0:
33323; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33324; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
33325; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33326; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33327; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
33328; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
33329; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33330; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33331; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33332; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33333; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33334; AVX2-NEXT:    retq
33335;
33336; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64:
33337; AVX512VPOPCNTDQ:       # %bb.0:
33338; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33339; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
33340; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33341; AVX512VPOPCNTDQ-NEXT:    vzeroupper
33342; AVX512VPOPCNTDQ-NEXT:    retq
33343;
33344; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64:
33345; AVX512VPOPCNTDQVL:       # %bb.0:
33346; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
33347; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
33348; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33349; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33350; AVX512VPOPCNTDQVL-NEXT:    retq
33351;
33352; BITALG_NOVLX-LABEL: ugt_45_v2i64:
33353; BITALG_NOVLX:       # %bb.0:
33354; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33355; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
33356; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33357; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33358; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33359; BITALG_NOVLX-NEXT:    vzeroupper
33360; BITALG_NOVLX-NEXT:    retq
33361;
33362; BITALG-LABEL: ugt_45_v2i64:
33363; BITALG:       # %bb.0:
33364; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
33365; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33366; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33367; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
33368; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33369; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33370; BITALG-NEXT:    retq
33371  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33372  %3 = icmp ugt <2 x i64> %2, <i64 45, i64 45>
33373  %4 = sext <2 x i1> %3 to <2 x i64>
33374  ret <2 x i64> %4
33375}
33376
33377define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
33378; SSE2-LABEL: ult_46_v2i64:
33379; SSE2:       # %bb.0:
33380; SSE2-NEXT:    movdqa %xmm0, %xmm1
33381; SSE2-NEXT:    psrlw $1, %xmm1
33382; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33383; SSE2-NEXT:    psubb %xmm1, %xmm0
33384; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33385; SSE2-NEXT:    movdqa %xmm0, %xmm2
33386; SSE2-NEXT:    pand %xmm1, %xmm2
33387; SSE2-NEXT:    psrlw $2, %xmm0
33388; SSE2-NEXT:    pand %xmm1, %xmm0
33389; SSE2-NEXT:    paddb %xmm2, %xmm0
33390; SSE2-NEXT:    movdqa %xmm0, %xmm1
33391; SSE2-NEXT:    psrlw $4, %xmm1
33392; SSE2-NEXT:    paddb %xmm0, %xmm1
33393; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33394; SSE2-NEXT:    pxor %xmm0, %xmm0
33395; SSE2-NEXT:    psadbw %xmm1, %xmm0
33396; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
33397; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33398; SSE2-NEXT:    movdqa %xmm1, %xmm2
33399; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
33400; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33401; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
33402; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33403; SSE2-NEXT:    pand %xmm3, %xmm1
33404; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33405; SSE2-NEXT:    por %xmm1, %xmm0
33406; SSE2-NEXT:    retq
33407;
33408; SSE3-LABEL: ult_46_v2i64:
33409; SSE3:       # %bb.0:
33410; SSE3-NEXT:    movdqa %xmm0, %xmm1
33411; SSE3-NEXT:    psrlw $1, %xmm1
33412; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33413; SSE3-NEXT:    psubb %xmm1, %xmm0
33414; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33415; SSE3-NEXT:    movdqa %xmm0, %xmm2
33416; SSE3-NEXT:    pand %xmm1, %xmm2
33417; SSE3-NEXT:    psrlw $2, %xmm0
33418; SSE3-NEXT:    pand %xmm1, %xmm0
33419; SSE3-NEXT:    paddb %xmm2, %xmm0
33420; SSE3-NEXT:    movdqa %xmm0, %xmm1
33421; SSE3-NEXT:    psrlw $4, %xmm1
33422; SSE3-NEXT:    paddb %xmm0, %xmm1
33423; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33424; SSE3-NEXT:    pxor %xmm0, %xmm0
33425; SSE3-NEXT:    psadbw %xmm1, %xmm0
33426; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
33427; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33428; SSE3-NEXT:    movdqa %xmm1, %xmm2
33429; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
33430; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33431; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33432; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33433; SSE3-NEXT:    pand %xmm3, %xmm1
33434; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33435; SSE3-NEXT:    por %xmm1, %xmm0
33436; SSE3-NEXT:    retq
33437;
33438; SSSE3-LABEL: ult_46_v2i64:
33439; SSSE3:       # %bb.0:
33440; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33441; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33442; SSSE3-NEXT:    pand %xmm1, %xmm2
33443; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33444; SSSE3-NEXT:    movdqa %xmm3, %xmm4
33445; SSSE3-NEXT:    pshufb %xmm2, %xmm4
33446; SSSE3-NEXT:    psrlw $4, %xmm0
33447; SSSE3-NEXT:    pand %xmm1, %xmm0
33448; SSSE3-NEXT:    pshufb %xmm0, %xmm3
33449; SSSE3-NEXT:    paddb %xmm4, %xmm3
33450; SSSE3-NEXT:    pxor %xmm0, %xmm0
33451; SSSE3-NEXT:    psadbw %xmm3, %xmm0
33452; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
33453; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33454; SSSE3-NEXT:    movdqa %xmm1, %xmm2
33455; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
33456; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33457; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33458; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33459; SSSE3-NEXT:    pand %xmm3, %xmm1
33460; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33461; SSSE3-NEXT:    por %xmm1, %xmm0
33462; SSSE3-NEXT:    retq
33463;
33464; SSE41-LABEL: ult_46_v2i64:
33465; SSE41:       # %bb.0:
33466; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33467; SSE41-NEXT:    movdqa %xmm0, %xmm2
33468; SSE41-NEXT:    pand %xmm1, %xmm2
33469; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33470; SSE41-NEXT:    movdqa %xmm3, %xmm4
33471; SSE41-NEXT:    pshufb %xmm2, %xmm4
33472; SSE41-NEXT:    psrlw $4, %xmm0
33473; SSE41-NEXT:    pand %xmm1, %xmm0
33474; SSE41-NEXT:    pshufb %xmm0, %xmm3
33475; SSE41-NEXT:    paddb %xmm4, %xmm3
33476; SSE41-NEXT:    pxor %xmm0, %xmm0
33477; SSE41-NEXT:    psadbw %xmm3, %xmm0
33478; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
33479; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33480; SSE41-NEXT:    movdqa %xmm1, %xmm2
33481; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
33482; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33483; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
33484; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33485; SSE41-NEXT:    pand %xmm3, %xmm1
33486; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33487; SSE41-NEXT:    por %xmm1, %xmm0
33488; SSE41-NEXT:    retq
33489;
33490; AVX1-LABEL: ult_46_v2i64:
33491; AVX1:       # %bb.0:
33492; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33493; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
33494; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33495; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33496; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
33497; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
33498; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33499; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33500; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33501; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33502; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [46,46]
33503; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33504; AVX1-NEXT:    retq
33505;
33506; AVX2-LABEL: ult_46_v2i64:
33507; AVX2:       # %bb.0:
33508; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33509; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
33510; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33511; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33512; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
33513; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
33514; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33515; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33516; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33517; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33518; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [46,46]
33519; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33520; AVX2-NEXT:    retq
33521;
33522; AVX512VPOPCNTDQ-LABEL: ult_46_v2i64:
33523; AVX512VPOPCNTDQ:       # %bb.0:
33524; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33525; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
33526; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [46,46]
33527; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33528; AVX512VPOPCNTDQ-NEXT:    vzeroupper
33529; AVX512VPOPCNTDQ-NEXT:    retq
33530;
33531; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64:
33532; AVX512VPOPCNTDQVL:       # %bb.0:
33533; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
33534; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
33535; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33536; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33537; AVX512VPOPCNTDQVL-NEXT:    retq
33538;
33539; BITALG_NOVLX-LABEL: ult_46_v2i64:
33540; BITALG_NOVLX:       # %bb.0:
33541; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33542; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
33543; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33544; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33545; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [46,46]
33546; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33547; BITALG_NOVLX-NEXT:    vzeroupper
33548; BITALG_NOVLX-NEXT:    retq
33549;
33550; BITALG-LABEL: ult_46_v2i64:
33551; BITALG:       # %bb.0:
33552; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
33553; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33554; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33555; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
33556; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33557; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33558; BITALG-NEXT:    retq
33559  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33560  %3 = icmp ult <2 x i64> %2, <i64 46, i64 46>
33561  %4 = sext <2 x i1> %3 to <2 x i64>
33562  ret <2 x i64> %4
33563}
33564
33565define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
33566; SSE2-LABEL: ugt_46_v2i64:
33567; SSE2:       # %bb.0:
33568; SSE2-NEXT:    movdqa %xmm0, %xmm1
33569; SSE2-NEXT:    psrlw $1, %xmm1
33570; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33571; SSE2-NEXT:    psubb %xmm1, %xmm0
33572; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33573; SSE2-NEXT:    movdqa %xmm0, %xmm2
33574; SSE2-NEXT:    pand %xmm1, %xmm2
33575; SSE2-NEXT:    psrlw $2, %xmm0
33576; SSE2-NEXT:    pand %xmm1, %xmm0
33577; SSE2-NEXT:    paddb %xmm2, %xmm0
33578; SSE2-NEXT:    movdqa %xmm0, %xmm1
33579; SSE2-NEXT:    psrlw $4, %xmm1
33580; SSE2-NEXT:    paddb %xmm0, %xmm1
33581; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33582; SSE2-NEXT:    pxor %xmm0, %xmm0
33583; SSE2-NEXT:    psadbw %xmm1, %xmm0
33584; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
33585; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33586; SSE2-NEXT:    movdqa %xmm0, %xmm2
33587; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
33588; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33589; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
33590; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33591; SSE2-NEXT:    pand %xmm3, %xmm1
33592; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33593; SSE2-NEXT:    por %xmm1, %xmm0
33594; SSE2-NEXT:    retq
33595;
33596; SSE3-LABEL: ugt_46_v2i64:
33597; SSE3:       # %bb.0:
33598; SSE3-NEXT:    movdqa %xmm0, %xmm1
33599; SSE3-NEXT:    psrlw $1, %xmm1
33600; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33601; SSE3-NEXT:    psubb %xmm1, %xmm0
33602; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33603; SSE3-NEXT:    movdqa %xmm0, %xmm2
33604; SSE3-NEXT:    pand %xmm1, %xmm2
33605; SSE3-NEXT:    psrlw $2, %xmm0
33606; SSE3-NEXT:    pand %xmm1, %xmm0
33607; SSE3-NEXT:    paddb %xmm2, %xmm0
33608; SSE3-NEXT:    movdqa %xmm0, %xmm1
33609; SSE3-NEXT:    psrlw $4, %xmm1
33610; SSE3-NEXT:    paddb %xmm0, %xmm1
33611; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33612; SSE3-NEXT:    pxor %xmm0, %xmm0
33613; SSE3-NEXT:    psadbw %xmm1, %xmm0
33614; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
33615; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33616; SSE3-NEXT:    movdqa %xmm0, %xmm2
33617; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
33618; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33619; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33620; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33621; SSE3-NEXT:    pand %xmm3, %xmm1
33622; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33623; SSE3-NEXT:    por %xmm1, %xmm0
33624; SSE3-NEXT:    retq
33625;
33626; SSSE3-LABEL: ugt_46_v2i64:
33627; SSSE3:       # %bb.0:
33628; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33629; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33630; SSSE3-NEXT:    pand %xmm1, %xmm2
33631; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33632; SSSE3-NEXT:    movdqa %xmm3, %xmm4
33633; SSSE3-NEXT:    pshufb %xmm2, %xmm4
33634; SSSE3-NEXT:    psrlw $4, %xmm0
33635; SSSE3-NEXT:    pand %xmm1, %xmm0
33636; SSSE3-NEXT:    pshufb %xmm0, %xmm3
33637; SSSE3-NEXT:    paddb %xmm4, %xmm3
33638; SSSE3-NEXT:    pxor %xmm0, %xmm0
33639; SSSE3-NEXT:    psadbw %xmm3, %xmm0
33640; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
33641; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33642; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33643; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
33644; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33645; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33646; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33647; SSSE3-NEXT:    pand %xmm3, %xmm1
33648; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33649; SSSE3-NEXT:    por %xmm1, %xmm0
33650; SSSE3-NEXT:    retq
33651;
33652; SSE41-LABEL: ugt_46_v2i64:
33653; SSE41:       # %bb.0:
33654; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33655; SSE41-NEXT:    movdqa %xmm0, %xmm2
33656; SSE41-NEXT:    pand %xmm1, %xmm2
33657; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33658; SSE41-NEXT:    movdqa %xmm3, %xmm4
33659; SSE41-NEXT:    pshufb %xmm2, %xmm4
33660; SSE41-NEXT:    psrlw $4, %xmm0
33661; SSE41-NEXT:    pand %xmm1, %xmm0
33662; SSE41-NEXT:    pshufb %xmm0, %xmm3
33663; SSE41-NEXT:    paddb %xmm4, %xmm3
33664; SSE41-NEXT:    pxor %xmm0, %xmm0
33665; SSE41-NEXT:    psadbw %xmm3, %xmm0
33666; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
33667; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483694,2147483694]
33668; SSE41-NEXT:    movdqa %xmm0, %xmm2
33669; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
33670; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33671; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
33672; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33673; SSE41-NEXT:    pand %xmm3, %xmm1
33674; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33675; SSE41-NEXT:    por %xmm1, %xmm0
33676; SSE41-NEXT:    retq
33677;
33678; AVX1-LABEL: ugt_46_v2i64:
33679; AVX1:       # %bb.0:
33680; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33681; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
33682; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33683; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33684; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
33685; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
33686; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33687; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33688; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33689; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33690; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33691; AVX1-NEXT:    retq
33692;
33693; AVX2-LABEL: ugt_46_v2i64:
33694; AVX2:       # %bb.0:
33695; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33696; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
33697; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33698; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33699; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
33700; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
33701; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33702; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33703; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33704; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33705; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33706; AVX2-NEXT:    retq
33707;
33708; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64:
33709; AVX512VPOPCNTDQ:       # %bb.0:
33710; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33711; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
33712; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33713; AVX512VPOPCNTDQ-NEXT:    vzeroupper
33714; AVX512VPOPCNTDQ-NEXT:    retq
33715;
33716; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64:
33717; AVX512VPOPCNTDQVL:       # %bb.0:
33718; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
33719; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
33720; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33721; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33722; AVX512VPOPCNTDQVL-NEXT:    retq
33723;
33724; BITALG_NOVLX-LABEL: ugt_46_v2i64:
33725; BITALG_NOVLX:       # %bb.0:
33726; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33727; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
33728; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33729; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33730; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
33731; BITALG_NOVLX-NEXT:    vzeroupper
33732; BITALG_NOVLX-NEXT:    retq
33733;
33734; BITALG-LABEL: ugt_46_v2i64:
33735; BITALG:       # %bb.0:
33736; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
33737; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33738; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33739; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
33740; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33741; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33742; BITALG-NEXT:    retq
33743  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33744  %3 = icmp ugt <2 x i64> %2, <i64 46, i64 46>
33745  %4 = sext <2 x i1> %3 to <2 x i64>
33746  ret <2 x i64> %4
33747}
33748
33749define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
33750; SSE2-LABEL: ult_47_v2i64:
33751; SSE2:       # %bb.0:
33752; SSE2-NEXT:    movdqa %xmm0, %xmm1
33753; SSE2-NEXT:    psrlw $1, %xmm1
33754; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33755; SSE2-NEXT:    psubb %xmm1, %xmm0
33756; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33757; SSE2-NEXT:    movdqa %xmm0, %xmm2
33758; SSE2-NEXT:    pand %xmm1, %xmm2
33759; SSE2-NEXT:    psrlw $2, %xmm0
33760; SSE2-NEXT:    pand %xmm1, %xmm0
33761; SSE2-NEXT:    paddb %xmm2, %xmm0
33762; SSE2-NEXT:    movdqa %xmm0, %xmm1
33763; SSE2-NEXT:    psrlw $4, %xmm1
33764; SSE2-NEXT:    paddb %xmm0, %xmm1
33765; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33766; SSE2-NEXT:    pxor %xmm0, %xmm0
33767; SSE2-NEXT:    psadbw %xmm1, %xmm0
33768; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
33769; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
33770; SSE2-NEXT:    movdqa %xmm1, %xmm2
33771; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
33772; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33773; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
33774; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33775; SSE2-NEXT:    pand %xmm3, %xmm1
33776; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33777; SSE2-NEXT:    por %xmm1, %xmm0
33778; SSE2-NEXT:    retq
33779;
33780; SSE3-LABEL: ult_47_v2i64:
33781; SSE3:       # %bb.0:
33782; SSE3-NEXT:    movdqa %xmm0, %xmm1
33783; SSE3-NEXT:    psrlw $1, %xmm1
33784; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33785; SSE3-NEXT:    psubb %xmm1, %xmm0
33786; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33787; SSE3-NEXT:    movdqa %xmm0, %xmm2
33788; SSE3-NEXT:    pand %xmm1, %xmm2
33789; SSE3-NEXT:    psrlw $2, %xmm0
33790; SSE3-NEXT:    pand %xmm1, %xmm0
33791; SSE3-NEXT:    paddb %xmm2, %xmm0
33792; SSE3-NEXT:    movdqa %xmm0, %xmm1
33793; SSE3-NEXT:    psrlw $4, %xmm1
33794; SSE3-NEXT:    paddb %xmm0, %xmm1
33795; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33796; SSE3-NEXT:    pxor %xmm0, %xmm0
33797; SSE3-NEXT:    psadbw %xmm1, %xmm0
33798; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
33799; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
33800; SSE3-NEXT:    movdqa %xmm1, %xmm2
33801; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
33802; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33803; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33804; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33805; SSE3-NEXT:    pand %xmm3, %xmm1
33806; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33807; SSE3-NEXT:    por %xmm1, %xmm0
33808; SSE3-NEXT:    retq
33809;
33810; SSSE3-LABEL: ult_47_v2i64:
33811; SSSE3:       # %bb.0:
33812; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33813; SSSE3-NEXT:    movdqa %xmm0, %xmm2
33814; SSSE3-NEXT:    pand %xmm1, %xmm2
33815; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33816; SSSE3-NEXT:    movdqa %xmm3, %xmm4
33817; SSSE3-NEXT:    pshufb %xmm2, %xmm4
33818; SSSE3-NEXT:    psrlw $4, %xmm0
33819; SSSE3-NEXT:    pand %xmm1, %xmm0
33820; SSSE3-NEXT:    pshufb %xmm0, %xmm3
33821; SSSE3-NEXT:    paddb %xmm4, %xmm3
33822; SSSE3-NEXT:    pxor %xmm0, %xmm0
33823; SSSE3-NEXT:    psadbw %xmm3, %xmm0
33824; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
33825; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
33826; SSSE3-NEXT:    movdqa %xmm1, %xmm2
33827; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
33828; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33829; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33830; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33831; SSSE3-NEXT:    pand %xmm3, %xmm1
33832; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33833; SSSE3-NEXT:    por %xmm1, %xmm0
33834; SSSE3-NEXT:    retq
33835;
33836; SSE41-LABEL: ult_47_v2i64:
33837; SSE41:       # %bb.0:
33838; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33839; SSE41-NEXT:    movdqa %xmm0, %xmm2
33840; SSE41-NEXT:    pand %xmm1, %xmm2
33841; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33842; SSE41-NEXT:    movdqa %xmm3, %xmm4
33843; SSE41-NEXT:    pshufb %xmm2, %xmm4
33844; SSE41-NEXT:    psrlw $4, %xmm0
33845; SSE41-NEXT:    pand %xmm1, %xmm0
33846; SSE41-NEXT:    pshufb %xmm0, %xmm3
33847; SSE41-NEXT:    paddb %xmm4, %xmm3
33848; SSE41-NEXT:    pxor %xmm0, %xmm0
33849; SSE41-NEXT:    psadbw %xmm3, %xmm0
33850; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
33851; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
33852; SSE41-NEXT:    movdqa %xmm1, %xmm2
33853; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
33854; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33855; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
33856; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33857; SSE41-NEXT:    pand %xmm3, %xmm1
33858; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33859; SSE41-NEXT:    por %xmm1, %xmm0
33860; SSE41-NEXT:    retq
33861;
33862; AVX1-LABEL: ult_47_v2i64:
33863; AVX1:       # %bb.0:
33864; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33865; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
33866; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33867; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33868; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
33869; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
33870; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33871; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33872; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33873; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33874; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [47,47]
33875; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33876; AVX1-NEXT:    retq
33877;
33878; AVX2-LABEL: ult_47_v2i64:
33879; AVX2:       # %bb.0:
33880; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33881; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
33882; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33883; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
33884; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
33885; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
33886; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
33887; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
33888; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33889; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33890; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [47,47]
33891; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33892; AVX2-NEXT:    retq
33893;
33894; AVX512VPOPCNTDQ-LABEL: ult_47_v2i64:
33895; AVX512VPOPCNTDQ:       # %bb.0:
33896; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33897; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
33898; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [47,47]
33899; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33900; AVX512VPOPCNTDQ-NEXT:    vzeroupper
33901; AVX512VPOPCNTDQ-NEXT:    retq
33902;
33903; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64:
33904; AVX512VPOPCNTDQVL:       # %bb.0:
33905; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
33906; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
33907; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33908; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33909; AVX512VPOPCNTDQVL-NEXT:    retq
33910;
33911; BITALG_NOVLX-LABEL: ult_47_v2i64:
33912; BITALG_NOVLX:       # %bb.0:
33913; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
33914; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
33915; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33916; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33917; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [47,47]
33918; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
33919; BITALG_NOVLX-NEXT:    vzeroupper
33920; BITALG_NOVLX-NEXT:    retq
33921;
33922; BITALG-LABEL: ult_47_v2i64:
33923; BITALG:       # %bb.0:
33924; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
33925; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
33926; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
33927; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
33928; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
33929; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33930; BITALG-NEXT:    retq
33931  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33932  %3 = icmp ult <2 x i64> %2, <i64 47, i64 47>
33933  %4 = sext <2 x i1> %3 to <2 x i64>
33934  ret <2 x i64> %4
33935}
33936
33937define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
33938; SSE2-LABEL: ugt_47_v2i64:
33939; SSE2:       # %bb.0:
33940; SSE2-NEXT:    movdqa %xmm0, %xmm1
33941; SSE2-NEXT:    psrlw $1, %xmm1
33942; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33943; SSE2-NEXT:    psubb %xmm1, %xmm0
33944; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33945; SSE2-NEXT:    movdqa %xmm0, %xmm2
33946; SSE2-NEXT:    pand %xmm1, %xmm2
33947; SSE2-NEXT:    psrlw $2, %xmm0
33948; SSE2-NEXT:    pand %xmm1, %xmm0
33949; SSE2-NEXT:    paddb %xmm2, %xmm0
33950; SSE2-NEXT:    movdqa %xmm0, %xmm1
33951; SSE2-NEXT:    psrlw $4, %xmm1
33952; SSE2-NEXT:    paddb %xmm0, %xmm1
33953; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
33954; SSE2-NEXT:    pxor %xmm0, %xmm0
33955; SSE2-NEXT:    psadbw %xmm1, %xmm0
33956; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
33957; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
33958; SSE2-NEXT:    movdqa %xmm0, %xmm2
33959; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
33960; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33961; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
33962; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33963; SSE2-NEXT:    pand %xmm3, %xmm1
33964; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33965; SSE2-NEXT:    por %xmm1, %xmm0
33966; SSE2-NEXT:    retq
33967;
33968; SSE3-LABEL: ugt_47_v2i64:
33969; SSE3:       # %bb.0:
33970; SSE3-NEXT:    movdqa %xmm0, %xmm1
33971; SSE3-NEXT:    psrlw $1, %xmm1
33972; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33973; SSE3-NEXT:    psubb %xmm1, %xmm0
33974; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33975; SSE3-NEXT:    movdqa %xmm0, %xmm2
33976; SSE3-NEXT:    pand %xmm1, %xmm2
33977; SSE3-NEXT:    psrlw $2, %xmm0
33978; SSE3-NEXT:    pand %xmm1, %xmm0
33979; SSE3-NEXT:    paddb %xmm2, %xmm0
33980; SSE3-NEXT:    movdqa %xmm0, %xmm1
33981; SSE3-NEXT:    psrlw $4, %xmm1
33982; SSE3-NEXT:    paddb %xmm0, %xmm1
33983; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
33984; SSE3-NEXT:    pxor %xmm0, %xmm0
33985; SSE3-NEXT:    psadbw %xmm1, %xmm0
33986; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
33987; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
33988; SSE3-NEXT:    movdqa %xmm0, %xmm2
33989; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
33990; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
33991; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
33992; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
33993; SSE3-NEXT:    pand %xmm3, %xmm1
33994; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
33995; SSE3-NEXT:    por %xmm1, %xmm0
33996; SSE3-NEXT:    retq
33997;
33998; SSSE3-LABEL: ugt_47_v2i64:
33999; SSSE3:       # %bb.0:
34000; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34001; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34002; SSSE3-NEXT:    pand %xmm1, %xmm2
34003; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34004; SSSE3-NEXT:    movdqa %xmm3, %xmm4
34005; SSSE3-NEXT:    pshufb %xmm2, %xmm4
34006; SSSE3-NEXT:    psrlw $4, %xmm0
34007; SSSE3-NEXT:    pand %xmm1, %xmm0
34008; SSSE3-NEXT:    pshufb %xmm0, %xmm3
34009; SSSE3-NEXT:    paddb %xmm4, %xmm3
34010; SSSE3-NEXT:    pxor %xmm0, %xmm0
34011; SSSE3-NEXT:    psadbw %xmm3, %xmm0
34012; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
34013; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
34014; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34015; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
34016; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34017; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34018; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34019; SSSE3-NEXT:    pand %xmm3, %xmm1
34020; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34021; SSSE3-NEXT:    por %xmm1, %xmm0
34022; SSSE3-NEXT:    retq
34023;
34024; SSE41-LABEL: ugt_47_v2i64:
34025; SSE41:       # %bb.0:
34026; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34027; SSE41-NEXT:    movdqa %xmm0, %xmm2
34028; SSE41-NEXT:    pand %xmm1, %xmm2
34029; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34030; SSE41-NEXT:    movdqa %xmm3, %xmm4
34031; SSE41-NEXT:    pshufb %xmm2, %xmm4
34032; SSE41-NEXT:    psrlw $4, %xmm0
34033; SSE41-NEXT:    pand %xmm1, %xmm0
34034; SSE41-NEXT:    pshufb %xmm0, %xmm3
34035; SSE41-NEXT:    paddb %xmm4, %xmm3
34036; SSE41-NEXT:    pxor %xmm0, %xmm0
34037; SSE41-NEXT:    psadbw %xmm3, %xmm0
34038; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
34039; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483695,2147483695]
34040; SSE41-NEXT:    movdqa %xmm0, %xmm2
34041; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
34042; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34043; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
34044; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34045; SSE41-NEXT:    pand %xmm3, %xmm1
34046; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34047; SSE41-NEXT:    por %xmm1, %xmm0
34048; SSE41-NEXT:    retq
34049;
34050; AVX1-LABEL: ugt_47_v2i64:
34051; AVX1:       # %bb.0:
34052; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34053; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
34054; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34055; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34056; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
34057; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
34058; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34059; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34060; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34061; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34062; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34063; AVX1-NEXT:    retq
34064;
34065; AVX2-LABEL: ugt_47_v2i64:
34066; AVX2:       # %bb.0:
34067; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34068; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
34069; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34070; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34071; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
34072; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
34073; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34074; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34075; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34076; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34077; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34078; AVX2-NEXT:    retq
34079;
34080; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64:
34081; AVX512VPOPCNTDQ:       # %bb.0:
34082; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34083; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
34084; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34085; AVX512VPOPCNTDQ-NEXT:    vzeroupper
34086; AVX512VPOPCNTDQ-NEXT:    retq
34087;
34088; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64:
34089; AVX512VPOPCNTDQVL:       # %bb.0:
34090; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
34091; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
34092; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34093; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34094; AVX512VPOPCNTDQVL-NEXT:    retq
34095;
34096; BITALG_NOVLX-LABEL: ugt_47_v2i64:
34097; BITALG_NOVLX:       # %bb.0:
34098; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34099; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
34100; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34101; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34102; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34103; BITALG_NOVLX-NEXT:    vzeroupper
34104; BITALG_NOVLX-NEXT:    retq
34105;
34106; BITALG-LABEL: ugt_47_v2i64:
34107; BITALG:       # %bb.0:
34108; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
34109; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34110; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34111; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
34112; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34113; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34114; BITALG-NEXT:    retq
34115  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34116  %3 = icmp ugt <2 x i64> %2, <i64 47, i64 47>
34117  %4 = sext <2 x i1> %3 to <2 x i64>
34118  ret <2 x i64> %4
34119}
34120
34121define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
34122; SSE2-LABEL: ult_48_v2i64:
34123; SSE2:       # %bb.0:
34124; SSE2-NEXT:    movdqa %xmm0, %xmm1
34125; SSE2-NEXT:    psrlw $1, %xmm1
34126; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34127; SSE2-NEXT:    psubb %xmm1, %xmm0
34128; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34129; SSE2-NEXT:    movdqa %xmm0, %xmm2
34130; SSE2-NEXT:    pand %xmm1, %xmm2
34131; SSE2-NEXT:    psrlw $2, %xmm0
34132; SSE2-NEXT:    pand %xmm1, %xmm0
34133; SSE2-NEXT:    paddb %xmm2, %xmm0
34134; SSE2-NEXT:    movdqa %xmm0, %xmm1
34135; SSE2-NEXT:    psrlw $4, %xmm1
34136; SSE2-NEXT:    paddb %xmm0, %xmm1
34137; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34138; SSE2-NEXT:    pxor %xmm0, %xmm0
34139; SSE2-NEXT:    psadbw %xmm1, %xmm0
34140; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
34141; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34142; SSE2-NEXT:    movdqa %xmm1, %xmm2
34143; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
34144; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34145; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
34146; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34147; SSE2-NEXT:    pand %xmm3, %xmm1
34148; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34149; SSE2-NEXT:    por %xmm1, %xmm0
34150; SSE2-NEXT:    retq
34151;
34152; SSE3-LABEL: ult_48_v2i64:
34153; SSE3:       # %bb.0:
34154; SSE3-NEXT:    movdqa %xmm0, %xmm1
34155; SSE3-NEXT:    psrlw $1, %xmm1
34156; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34157; SSE3-NEXT:    psubb %xmm1, %xmm0
34158; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34159; SSE3-NEXT:    movdqa %xmm0, %xmm2
34160; SSE3-NEXT:    pand %xmm1, %xmm2
34161; SSE3-NEXT:    psrlw $2, %xmm0
34162; SSE3-NEXT:    pand %xmm1, %xmm0
34163; SSE3-NEXT:    paddb %xmm2, %xmm0
34164; SSE3-NEXT:    movdqa %xmm0, %xmm1
34165; SSE3-NEXT:    psrlw $4, %xmm1
34166; SSE3-NEXT:    paddb %xmm0, %xmm1
34167; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34168; SSE3-NEXT:    pxor %xmm0, %xmm0
34169; SSE3-NEXT:    psadbw %xmm1, %xmm0
34170; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
34171; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34172; SSE3-NEXT:    movdqa %xmm1, %xmm2
34173; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
34174; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34175; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34176; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34177; SSE3-NEXT:    pand %xmm3, %xmm1
34178; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34179; SSE3-NEXT:    por %xmm1, %xmm0
34180; SSE3-NEXT:    retq
34181;
34182; SSSE3-LABEL: ult_48_v2i64:
34183; SSSE3:       # %bb.0:
34184; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34185; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34186; SSSE3-NEXT:    pand %xmm1, %xmm2
34187; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34188; SSSE3-NEXT:    movdqa %xmm3, %xmm4
34189; SSSE3-NEXT:    pshufb %xmm2, %xmm4
34190; SSSE3-NEXT:    psrlw $4, %xmm0
34191; SSSE3-NEXT:    pand %xmm1, %xmm0
34192; SSSE3-NEXT:    pshufb %xmm0, %xmm3
34193; SSSE3-NEXT:    paddb %xmm4, %xmm3
34194; SSSE3-NEXT:    pxor %xmm0, %xmm0
34195; SSSE3-NEXT:    psadbw %xmm3, %xmm0
34196; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
34197; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34198; SSSE3-NEXT:    movdqa %xmm1, %xmm2
34199; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
34200; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34201; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34202; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34203; SSSE3-NEXT:    pand %xmm3, %xmm1
34204; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34205; SSSE3-NEXT:    por %xmm1, %xmm0
34206; SSSE3-NEXT:    retq
34207;
34208; SSE41-LABEL: ult_48_v2i64:
34209; SSE41:       # %bb.0:
34210; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34211; SSE41-NEXT:    movdqa %xmm0, %xmm2
34212; SSE41-NEXT:    pand %xmm1, %xmm2
34213; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34214; SSE41-NEXT:    movdqa %xmm3, %xmm4
34215; SSE41-NEXT:    pshufb %xmm2, %xmm4
34216; SSE41-NEXT:    psrlw $4, %xmm0
34217; SSE41-NEXT:    pand %xmm1, %xmm0
34218; SSE41-NEXT:    pshufb %xmm0, %xmm3
34219; SSE41-NEXT:    paddb %xmm4, %xmm3
34220; SSE41-NEXT:    pxor %xmm0, %xmm0
34221; SSE41-NEXT:    psadbw %xmm3, %xmm0
34222; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
34223; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34224; SSE41-NEXT:    movdqa %xmm1, %xmm2
34225; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
34226; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34227; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
34228; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34229; SSE41-NEXT:    pand %xmm3, %xmm1
34230; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34231; SSE41-NEXT:    por %xmm1, %xmm0
34232; SSE41-NEXT:    retq
34233;
34234; AVX1-LABEL: ult_48_v2i64:
34235; AVX1:       # %bb.0:
34236; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34237; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
34238; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34239; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34240; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
34241; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
34242; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34243; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34244; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34245; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34246; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [48,48]
34247; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34248; AVX1-NEXT:    retq
34249;
34250; AVX2-LABEL: ult_48_v2i64:
34251; AVX2:       # %bb.0:
34252; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34253; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
34254; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34255; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34256; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
34257; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
34258; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34259; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34260; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34261; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34262; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [48,48]
34263; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34264; AVX2-NEXT:    retq
34265;
34266; AVX512VPOPCNTDQ-LABEL: ult_48_v2i64:
34267; AVX512VPOPCNTDQ:       # %bb.0:
34268; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34269; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
34270; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [48,48]
34271; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34272; AVX512VPOPCNTDQ-NEXT:    vzeroupper
34273; AVX512VPOPCNTDQ-NEXT:    retq
34274;
34275; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64:
34276; AVX512VPOPCNTDQVL:       # %bb.0:
34277; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
34278; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
34279; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34280; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34281; AVX512VPOPCNTDQVL-NEXT:    retq
34282;
34283; BITALG_NOVLX-LABEL: ult_48_v2i64:
34284; BITALG_NOVLX:       # %bb.0:
34285; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34286; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
34287; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34288; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34289; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [48,48]
34290; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34291; BITALG_NOVLX-NEXT:    vzeroupper
34292; BITALG_NOVLX-NEXT:    retq
34293;
34294; BITALG-LABEL: ult_48_v2i64:
34295; BITALG:       # %bb.0:
34296; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
34297; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34298; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34299; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
34300; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34301; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34302; BITALG-NEXT:    retq
34303  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34304  %3 = icmp ult <2 x i64> %2, <i64 48, i64 48>
34305  %4 = sext <2 x i1> %3 to <2 x i64>
34306  ret <2 x i64> %4
34307}
34308
34309define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
34310; SSE2-LABEL: ugt_48_v2i64:
34311; SSE2:       # %bb.0:
34312; SSE2-NEXT:    movdqa %xmm0, %xmm1
34313; SSE2-NEXT:    psrlw $1, %xmm1
34314; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34315; SSE2-NEXT:    psubb %xmm1, %xmm0
34316; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34317; SSE2-NEXT:    movdqa %xmm0, %xmm2
34318; SSE2-NEXT:    pand %xmm1, %xmm2
34319; SSE2-NEXT:    psrlw $2, %xmm0
34320; SSE2-NEXT:    pand %xmm1, %xmm0
34321; SSE2-NEXT:    paddb %xmm2, %xmm0
34322; SSE2-NEXT:    movdqa %xmm0, %xmm1
34323; SSE2-NEXT:    psrlw $4, %xmm1
34324; SSE2-NEXT:    paddb %xmm0, %xmm1
34325; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34326; SSE2-NEXT:    pxor %xmm0, %xmm0
34327; SSE2-NEXT:    psadbw %xmm1, %xmm0
34328; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
34329; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34330; SSE2-NEXT:    movdqa %xmm0, %xmm2
34331; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
34332; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34333; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
34334; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34335; SSE2-NEXT:    pand %xmm3, %xmm1
34336; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34337; SSE2-NEXT:    por %xmm1, %xmm0
34338; SSE2-NEXT:    retq
34339;
34340; SSE3-LABEL: ugt_48_v2i64:
34341; SSE3:       # %bb.0:
34342; SSE3-NEXT:    movdqa %xmm0, %xmm1
34343; SSE3-NEXT:    psrlw $1, %xmm1
34344; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34345; SSE3-NEXT:    psubb %xmm1, %xmm0
34346; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34347; SSE3-NEXT:    movdqa %xmm0, %xmm2
34348; SSE3-NEXT:    pand %xmm1, %xmm2
34349; SSE3-NEXT:    psrlw $2, %xmm0
34350; SSE3-NEXT:    pand %xmm1, %xmm0
34351; SSE3-NEXT:    paddb %xmm2, %xmm0
34352; SSE3-NEXT:    movdqa %xmm0, %xmm1
34353; SSE3-NEXT:    psrlw $4, %xmm1
34354; SSE3-NEXT:    paddb %xmm0, %xmm1
34355; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34356; SSE3-NEXT:    pxor %xmm0, %xmm0
34357; SSE3-NEXT:    psadbw %xmm1, %xmm0
34358; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
34359; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34360; SSE3-NEXT:    movdqa %xmm0, %xmm2
34361; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
34362; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34363; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34364; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34365; SSE3-NEXT:    pand %xmm3, %xmm1
34366; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34367; SSE3-NEXT:    por %xmm1, %xmm0
34368; SSE3-NEXT:    retq
34369;
34370; SSSE3-LABEL: ugt_48_v2i64:
34371; SSSE3:       # %bb.0:
34372; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34373; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34374; SSSE3-NEXT:    pand %xmm1, %xmm2
34375; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34376; SSSE3-NEXT:    movdqa %xmm3, %xmm4
34377; SSSE3-NEXT:    pshufb %xmm2, %xmm4
34378; SSSE3-NEXT:    psrlw $4, %xmm0
34379; SSSE3-NEXT:    pand %xmm1, %xmm0
34380; SSSE3-NEXT:    pshufb %xmm0, %xmm3
34381; SSSE3-NEXT:    paddb %xmm4, %xmm3
34382; SSSE3-NEXT:    pxor %xmm0, %xmm0
34383; SSSE3-NEXT:    psadbw %xmm3, %xmm0
34384; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
34385; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34386; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34387; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
34388; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34389; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34390; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34391; SSSE3-NEXT:    pand %xmm3, %xmm1
34392; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34393; SSSE3-NEXT:    por %xmm1, %xmm0
34394; SSSE3-NEXT:    retq
34395;
34396; SSE41-LABEL: ugt_48_v2i64:
34397; SSE41:       # %bb.0:
34398; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34399; SSE41-NEXT:    movdqa %xmm0, %xmm2
34400; SSE41-NEXT:    pand %xmm1, %xmm2
34401; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34402; SSE41-NEXT:    movdqa %xmm3, %xmm4
34403; SSE41-NEXT:    pshufb %xmm2, %xmm4
34404; SSE41-NEXT:    psrlw $4, %xmm0
34405; SSE41-NEXT:    pand %xmm1, %xmm0
34406; SSE41-NEXT:    pshufb %xmm0, %xmm3
34407; SSE41-NEXT:    paddb %xmm4, %xmm3
34408; SSE41-NEXT:    pxor %xmm0, %xmm0
34409; SSE41-NEXT:    psadbw %xmm3, %xmm0
34410; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
34411; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483696,2147483696]
34412; SSE41-NEXT:    movdqa %xmm0, %xmm2
34413; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
34414; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34415; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
34416; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34417; SSE41-NEXT:    pand %xmm3, %xmm1
34418; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34419; SSE41-NEXT:    por %xmm1, %xmm0
34420; SSE41-NEXT:    retq
34421;
34422; AVX1-LABEL: ugt_48_v2i64:
34423; AVX1:       # %bb.0:
34424; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34425; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
34426; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34427; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34428; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
34429; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
34430; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34431; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34432; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34433; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34434; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34435; AVX1-NEXT:    retq
34436;
34437; AVX2-LABEL: ugt_48_v2i64:
34438; AVX2:       # %bb.0:
34439; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34440; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
34441; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34442; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34443; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
34444; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
34445; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34446; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34447; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34448; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34449; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34450; AVX2-NEXT:    retq
34451;
34452; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64:
34453; AVX512VPOPCNTDQ:       # %bb.0:
34454; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34455; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
34456; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34457; AVX512VPOPCNTDQ-NEXT:    vzeroupper
34458; AVX512VPOPCNTDQ-NEXT:    retq
34459;
34460; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64:
34461; AVX512VPOPCNTDQVL:       # %bb.0:
34462; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
34463; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
34464; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34465; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34466; AVX512VPOPCNTDQVL-NEXT:    retq
34467;
34468; BITALG_NOVLX-LABEL: ugt_48_v2i64:
34469; BITALG_NOVLX:       # %bb.0:
34470; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34471; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
34472; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34473; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34474; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34475; BITALG_NOVLX-NEXT:    vzeroupper
34476; BITALG_NOVLX-NEXT:    retq
34477;
34478; BITALG-LABEL: ugt_48_v2i64:
34479; BITALG:       # %bb.0:
34480; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
34481; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34482; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34483; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
34484; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34485; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34486; BITALG-NEXT:    retq
34487  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34488  %3 = icmp ugt <2 x i64> %2, <i64 48, i64 48>
34489  %4 = sext <2 x i1> %3 to <2 x i64>
34490  ret <2 x i64> %4
34491}
34492
34493define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
34494; SSE2-LABEL: ult_49_v2i64:
34495; SSE2:       # %bb.0:
34496; SSE2-NEXT:    movdqa %xmm0, %xmm1
34497; SSE2-NEXT:    psrlw $1, %xmm1
34498; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34499; SSE2-NEXT:    psubb %xmm1, %xmm0
34500; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34501; SSE2-NEXT:    movdqa %xmm0, %xmm2
34502; SSE2-NEXT:    pand %xmm1, %xmm2
34503; SSE2-NEXT:    psrlw $2, %xmm0
34504; SSE2-NEXT:    pand %xmm1, %xmm0
34505; SSE2-NEXT:    paddb %xmm2, %xmm0
34506; SSE2-NEXT:    movdqa %xmm0, %xmm1
34507; SSE2-NEXT:    psrlw $4, %xmm1
34508; SSE2-NEXT:    paddb %xmm0, %xmm1
34509; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34510; SSE2-NEXT:    pxor %xmm0, %xmm0
34511; SSE2-NEXT:    psadbw %xmm1, %xmm0
34512; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
34513; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34514; SSE2-NEXT:    movdqa %xmm1, %xmm2
34515; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
34516; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34517; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
34518; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34519; SSE2-NEXT:    pand %xmm3, %xmm1
34520; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34521; SSE2-NEXT:    por %xmm1, %xmm0
34522; SSE2-NEXT:    retq
34523;
34524; SSE3-LABEL: ult_49_v2i64:
34525; SSE3:       # %bb.0:
34526; SSE3-NEXT:    movdqa %xmm0, %xmm1
34527; SSE3-NEXT:    psrlw $1, %xmm1
34528; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34529; SSE3-NEXT:    psubb %xmm1, %xmm0
34530; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34531; SSE3-NEXT:    movdqa %xmm0, %xmm2
34532; SSE3-NEXT:    pand %xmm1, %xmm2
34533; SSE3-NEXT:    psrlw $2, %xmm0
34534; SSE3-NEXT:    pand %xmm1, %xmm0
34535; SSE3-NEXT:    paddb %xmm2, %xmm0
34536; SSE3-NEXT:    movdqa %xmm0, %xmm1
34537; SSE3-NEXT:    psrlw $4, %xmm1
34538; SSE3-NEXT:    paddb %xmm0, %xmm1
34539; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34540; SSE3-NEXT:    pxor %xmm0, %xmm0
34541; SSE3-NEXT:    psadbw %xmm1, %xmm0
34542; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
34543; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34544; SSE3-NEXT:    movdqa %xmm1, %xmm2
34545; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
34546; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34547; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34548; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34549; SSE3-NEXT:    pand %xmm3, %xmm1
34550; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34551; SSE3-NEXT:    por %xmm1, %xmm0
34552; SSE3-NEXT:    retq
34553;
34554; SSSE3-LABEL: ult_49_v2i64:
34555; SSSE3:       # %bb.0:
34556; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34557; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34558; SSSE3-NEXT:    pand %xmm1, %xmm2
34559; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34560; SSSE3-NEXT:    movdqa %xmm3, %xmm4
34561; SSSE3-NEXT:    pshufb %xmm2, %xmm4
34562; SSSE3-NEXT:    psrlw $4, %xmm0
34563; SSSE3-NEXT:    pand %xmm1, %xmm0
34564; SSSE3-NEXT:    pshufb %xmm0, %xmm3
34565; SSSE3-NEXT:    paddb %xmm4, %xmm3
34566; SSSE3-NEXT:    pxor %xmm0, %xmm0
34567; SSSE3-NEXT:    psadbw %xmm3, %xmm0
34568; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
34569; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34570; SSSE3-NEXT:    movdqa %xmm1, %xmm2
34571; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
34572; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34573; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34574; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34575; SSSE3-NEXT:    pand %xmm3, %xmm1
34576; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34577; SSSE3-NEXT:    por %xmm1, %xmm0
34578; SSSE3-NEXT:    retq
34579;
34580; SSE41-LABEL: ult_49_v2i64:
34581; SSE41:       # %bb.0:
34582; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34583; SSE41-NEXT:    movdqa %xmm0, %xmm2
34584; SSE41-NEXT:    pand %xmm1, %xmm2
34585; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34586; SSE41-NEXT:    movdqa %xmm3, %xmm4
34587; SSE41-NEXT:    pshufb %xmm2, %xmm4
34588; SSE41-NEXT:    psrlw $4, %xmm0
34589; SSE41-NEXT:    pand %xmm1, %xmm0
34590; SSE41-NEXT:    pshufb %xmm0, %xmm3
34591; SSE41-NEXT:    paddb %xmm4, %xmm3
34592; SSE41-NEXT:    pxor %xmm0, %xmm0
34593; SSE41-NEXT:    psadbw %xmm3, %xmm0
34594; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
34595; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34596; SSE41-NEXT:    movdqa %xmm1, %xmm2
34597; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
34598; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34599; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
34600; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34601; SSE41-NEXT:    pand %xmm3, %xmm1
34602; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34603; SSE41-NEXT:    por %xmm1, %xmm0
34604; SSE41-NEXT:    retq
34605;
34606; AVX1-LABEL: ult_49_v2i64:
34607; AVX1:       # %bb.0:
34608; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34609; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
34610; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34611; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34612; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
34613; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
34614; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34615; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34616; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34617; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34618; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [49,49]
34619; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34620; AVX1-NEXT:    retq
34621;
34622; AVX2-LABEL: ult_49_v2i64:
34623; AVX2:       # %bb.0:
34624; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34625; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
34626; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34627; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34628; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
34629; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
34630; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34631; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34632; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34633; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34634; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [49,49]
34635; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34636; AVX2-NEXT:    retq
34637;
34638; AVX512VPOPCNTDQ-LABEL: ult_49_v2i64:
34639; AVX512VPOPCNTDQ:       # %bb.0:
34640; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34641; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
34642; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [49,49]
34643; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34644; AVX512VPOPCNTDQ-NEXT:    vzeroupper
34645; AVX512VPOPCNTDQ-NEXT:    retq
34646;
34647; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64:
34648; AVX512VPOPCNTDQVL:       # %bb.0:
34649; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
34650; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
34651; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34652; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34653; AVX512VPOPCNTDQVL-NEXT:    retq
34654;
34655; BITALG_NOVLX-LABEL: ult_49_v2i64:
34656; BITALG_NOVLX:       # %bb.0:
34657; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34658; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
34659; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34660; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34661; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [49,49]
34662; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34663; BITALG_NOVLX-NEXT:    vzeroupper
34664; BITALG_NOVLX-NEXT:    retq
34665;
34666; BITALG-LABEL: ult_49_v2i64:
34667; BITALG:       # %bb.0:
34668; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
34669; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34670; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34671; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
34672; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34673; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34674; BITALG-NEXT:    retq
34675  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34676  %3 = icmp ult <2 x i64> %2, <i64 49, i64 49>
34677  %4 = sext <2 x i1> %3 to <2 x i64>
34678  ret <2 x i64> %4
34679}
34680
34681define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
34682; SSE2-LABEL: ugt_49_v2i64:
34683; SSE2:       # %bb.0:
34684; SSE2-NEXT:    movdqa %xmm0, %xmm1
34685; SSE2-NEXT:    psrlw $1, %xmm1
34686; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34687; SSE2-NEXT:    psubb %xmm1, %xmm0
34688; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34689; SSE2-NEXT:    movdqa %xmm0, %xmm2
34690; SSE2-NEXT:    pand %xmm1, %xmm2
34691; SSE2-NEXT:    psrlw $2, %xmm0
34692; SSE2-NEXT:    pand %xmm1, %xmm0
34693; SSE2-NEXT:    paddb %xmm2, %xmm0
34694; SSE2-NEXT:    movdqa %xmm0, %xmm1
34695; SSE2-NEXT:    psrlw $4, %xmm1
34696; SSE2-NEXT:    paddb %xmm0, %xmm1
34697; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34698; SSE2-NEXT:    pxor %xmm0, %xmm0
34699; SSE2-NEXT:    psadbw %xmm1, %xmm0
34700; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
34701; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34702; SSE2-NEXT:    movdqa %xmm0, %xmm2
34703; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
34704; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34705; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
34706; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34707; SSE2-NEXT:    pand %xmm3, %xmm1
34708; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34709; SSE2-NEXT:    por %xmm1, %xmm0
34710; SSE2-NEXT:    retq
34711;
34712; SSE3-LABEL: ugt_49_v2i64:
34713; SSE3:       # %bb.0:
34714; SSE3-NEXT:    movdqa %xmm0, %xmm1
34715; SSE3-NEXT:    psrlw $1, %xmm1
34716; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34717; SSE3-NEXT:    psubb %xmm1, %xmm0
34718; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34719; SSE3-NEXT:    movdqa %xmm0, %xmm2
34720; SSE3-NEXT:    pand %xmm1, %xmm2
34721; SSE3-NEXT:    psrlw $2, %xmm0
34722; SSE3-NEXT:    pand %xmm1, %xmm0
34723; SSE3-NEXT:    paddb %xmm2, %xmm0
34724; SSE3-NEXT:    movdqa %xmm0, %xmm1
34725; SSE3-NEXT:    psrlw $4, %xmm1
34726; SSE3-NEXT:    paddb %xmm0, %xmm1
34727; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34728; SSE3-NEXT:    pxor %xmm0, %xmm0
34729; SSE3-NEXT:    psadbw %xmm1, %xmm0
34730; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
34731; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34732; SSE3-NEXT:    movdqa %xmm0, %xmm2
34733; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
34734; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34735; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34736; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34737; SSE3-NEXT:    pand %xmm3, %xmm1
34738; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34739; SSE3-NEXT:    por %xmm1, %xmm0
34740; SSE3-NEXT:    retq
34741;
34742; SSSE3-LABEL: ugt_49_v2i64:
34743; SSSE3:       # %bb.0:
34744; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34745; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34746; SSSE3-NEXT:    pand %xmm1, %xmm2
34747; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34748; SSSE3-NEXT:    movdqa %xmm3, %xmm4
34749; SSSE3-NEXT:    pshufb %xmm2, %xmm4
34750; SSSE3-NEXT:    psrlw $4, %xmm0
34751; SSSE3-NEXT:    pand %xmm1, %xmm0
34752; SSSE3-NEXT:    pshufb %xmm0, %xmm3
34753; SSSE3-NEXT:    paddb %xmm4, %xmm3
34754; SSSE3-NEXT:    pxor %xmm0, %xmm0
34755; SSSE3-NEXT:    psadbw %xmm3, %xmm0
34756; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
34757; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34758; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34759; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
34760; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34761; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34762; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34763; SSSE3-NEXT:    pand %xmm3, %xmm1
34764; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34765; SSSE3-NEXT:    por %xmm1, %xmm0
34766; SSSE3-NEXT:    retq
34767;
34768; SSE41-LABEL: ugt_49_v2i64:
34769; SSE41:       # %bb.0:
34770; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34771; SSE41-NEXT:    movdqa %xmm0, %xmm2
34772; SSE41-NEXT:    pand %xmm1, %xmm2
34773; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34774; SSE41-NEXT:    movdqa %xmm3, %xmm4
34775; SSE41-NEXT:    pshufb %xmm2, %xmm4
34776; SSE41-NEXT:    psrlw $4, %xmm0
34777; SSE41-NEXT:    pand %xmm1, %xmm0
34778; SSE41-NEXT:    pshufb %xmm0, %xmm3
34779; SSE41-NEXT:    paddb %xmm4, %xmm3
34780; SSE41-NEXT:    pxor %xmm0, %xmm0
34781; SSE41-NEXT:    psadbw %xmm3, %xmm0
34782; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
34783; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483697,2147483697]
34784; SSE41-NEXT:    movdqa %xmm0, %xmm2
34785; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
34786; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34787; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
34788; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34789; SSE41-NEXT:    pand %xmm3, %xmm1
34790; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34791; SSE41-NEXT:    por %xmm1, %xmm0
34792; SSE41-NEXT:    retq
34793;
34794; AVX1-LABEL: ugt_49_v2i64:
34795; AVX1:       # %bb.0:
34796; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34797; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
34798; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34799; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34800; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
34801; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
34802; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34803; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34804; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34805; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34806; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34807; AVX1-NEXT:    retq
34808;
34809; AVX2-LABEL: ugt_49_v2i64:
34810; AVX2:       # %bb.0:
34811; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34812; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
34813; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34814; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34815; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
34816; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
34817; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34818; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34819; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34820; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34821; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34822; AVX2-NEXT:    retq
34823;
34824; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64:
34825; AVX512VPOPCNTDQ:       # %bb.0:
34826; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34827; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
34828; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34829; AVX512VPOPCNTDQ-NEXT:    vzeroupper
34830; AVX512VPOPCNTDQ-NEXT:    retq
34831;
34832; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64:
34833; AVX512VPOPCNTDQVL:       # %bb.0:
34834; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
34835; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
34836; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34837; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34838; AVX512VPOPCNTDQVL-NEXT:    retq
34839;
34840; BITALG_NOVLX-LABEL: ugt_49_v2i64:
34841; BITALG_NOVLX:       # %bb.0:
34842; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
34843; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
34844; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34845; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34846; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
34847; BITALG_NOVLX-NEXT:    vzeroupper
34848; BITALG_NOVLX-NEXT:    retq
34849;
34850; BITALG-LABEL: ugt_49_v2i64:
34851; BITALG:       # %bb.0:
34852; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
34853; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34854; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34855; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
34856; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
34857; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34858; BITALG-NEXT:    retq
34859  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34860  %3 = icmp ugt <2 x i64> %2, <i64 49, i64 49>
34861  %4 = sext <2 x i1> %3 to <2 x i64>
34862  ret <2 x i64> %4
34863}
34864
34865define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
34866; SSE2-LABEL: ult_50_v2i64:
34867; SSE2:       # %bb.0:
34868; SSE2-NEXT:    movdqa %xmm0, %xmm1
34869; SSE2-NEXT:    psrlw $1, %xmm1
34870; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34871; SSE2-NEXT:    psubb %xmm1, %xmm0
34872; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34873; SSE2-NEXT:    movdqa %xmm0, %xmm2
34874; SSE2-NEXT:    pand %xmm1, %xmm2
34875; SSE2-NEXT:    psrlw $2, %xmm0
34876; SSE2-NEXT:    pand %xmm1, %xmm0
34877; SSE2-NEXT:    paddb %xmm2, %xmm0
34878; SSE2-NEXT:    movdqa %xmm0, %xmm1
34879; SSE2-NEXT:    psrlw $4, %xmm1
34880; SSE2-NEXT:    paddb %xmm0, %xmm1
34881; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
34882; SSE2-NEXT:    pxor %xmm0, %xmm0
34883; SSE2-NEXT:    psadbw %xmm1, %xmm0
34884; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
34885; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
34886; SSE2-NEXT:    movdqa %xmm1, %xmm2
34887; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
34888; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34889; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
34890; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34891; SSE2-NEXT:    pand %xmm3, %xmm1
34892; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34893; SSE2-NEXT:    por %xmm1, %xmm0
34894; SSE2-NEXT:    retq
34895;
34896; SSE3-LABEL: ult_50_v2i64:
34897; SSE3:       # %bb.0:
34898; SSE3-NEXT:    movdqa %xmm0, %xmm1
34899; SSE3-NEXT:    psrlw $1, %xmm1
34900; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34901; SSE3-NEXT:    psubb %xmm1, %xmm0
34902; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34903; SSE3-NEXT:    movdqa %xmm0, %xmm2
34904; SSE3-NEXT:    pand %xmm1, %xmm2
34905; SSE3-NEXT:    psrlw $2, %xmm0
34906; SSE3-NEXT:    pand %xmm1, %xmm0
34907; SSE3-NEXT:    paddb %xmm2, %xmm0
34908; SSE3-NEXT:    movdqa %xmm0, %xmm1
34909; SSE3-NEXT:    psrlw $4, %xmm1
34910; SSE3-NEXT:    paddb %xmm0, %xmm1
34911; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
34912; SSE3-NEXT:    pxor %xmm0, %xmm0
34913; SSE3-NEXT:    psadbw %xmm1, %xmm0
34914; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
34915; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
34916; SSE3-NEXT:    movdqa %xmm1, %xmm2
34917; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
34918; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34919; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34920; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34921; SSE3-NEXT:    pand %xmm3, %xmm1
34922; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34923; SSE3-NEXT:    por %xmm1, %xmm0
34924; SSE3-NEXT:    retq
34925;
34926; SSSE3-LABEL: ult_50_v2i64:
34927; SSSE3:       # %bb.0:
34928; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34929; SSSE3-NEXT:    movdqa %xmm0, %xmm2
34930; SSSE3-NEXT:    pand %xmm1, %xmm2
34931; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34932; SSSE3-NEXT:    movdqa %xmm3, %xmm4
34933; SSSE3-NEXT:    pshufb %xmm2, %xmm4
34934; SSSE3-NEXT:    psrlw $4, %xmm0
34935; SSSE3-NEXT:    pand %xmm1, %xmm0
34936; SSSE3-NEXT:    pshufb %xmm0, %xmm3
34937; SSSE3-NEXT:    paddb %xmm4, %xmm3
34938; SSSE3-NEXT:    pxor %xmm0, %xmm0
34939; SSSE3-NEXT:    psadbw %xmm3, %xmm0
34940; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
34941; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
34942; SSSE3-NEXT:    movdqa %xmm1, %xmm2
34943; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
34944; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34945; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
34946; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34947; SSSE3-NEXT:    pand %xmm3, %xmm1
34948; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34949; SSSE3-NEXT:    por %xmm1, %xmm0
34950; SSSE3-NEXT:    retq
34951;
34952; SSE41-LABEL: ult_50_v2i64:
34953; SSE41:       # %bb.0:
34954; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34955; SSE41-NEXT:    movdqa %xmm0, %xmm2
34956; SSE41-NEXT:    pand %xmm1, %xmm2
34957; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34958; SSE41-NEXT:    movdqa %xmm3, %xmm4
34959; SSE41-NEXT:    pshufb %xmm2, %xmm4
34960; SSE41-NEXT:    psrlw $4, %xmm0
34961; SSE41-NEXT:    pand %xmm1, %xmm0
34962; SSE41-NEXT:    pshufb %xmm0, %xmm3
34963; SSE41-NEXT:    paddb %xmm4, %xmm3
34964; SSE41-NEXT:    pxor %xmm0, %xmm0
34965; SSE41-NEXT:    psadbw %xmm3, %xmm0
34966; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
34967; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
34968; SSE41-NEXT:    movdqa %xmm1, %xmm2
34969; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
34970; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
34971; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
34972; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34973; SSE41-NEXT:    pand %xmm3, %xmm1
34974; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
34975; SSE41-NEXT:    por %xmm1, %xmm0
34976; SSE41-NEXT:    retq
34977;
34978; AVX1-LABEL: ult_50_v2i64:
34979; AVX1:       # %bb.0:
34980; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34981; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
34982; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34983; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
34984; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
34985; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
34986; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
34987; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
34988; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
34989; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
34990; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [50,50]
34991; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
34992; AVX1-NEXT:    retq
34993;
34994; AVX2-LABEL: ult_50_v2i64:
34995; AVX2:       # %bb.0:
34996; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34997; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
34998; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34999; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35000; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
35001; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
35002; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35003; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35004; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35005; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35006; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [50,50]
35007; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35008; AVX2-NEXT:    retq
35009;
35010; AVX512VPOPCNTDQ-LABEL: ult_50_v2i64:
35011; AVX512VPOPCNTDQ:       # %bb.0:
35012; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35013; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
35014; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [50,50]
35015; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35016; AVX512VPOPCNTDQ-NEXT:    vzeroupper
35017; AVX512VPOPCNTDQ-NEXT:    retq
35018;
35019; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64:
35020; AVX512VPOPCNTDQVL:       # %bb.0:
35021; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
35022; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
35023; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35024; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35025; AVX512VPOPCNTDQVL-NEXT:    retq
35026;
35027; BITALG_NOVLX-LABEL: ult_50_v2i64:
35028; BITALG_NOVLX:       # %bb.0:
35029; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35030; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
35031; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35032; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35033; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [50,50]
35034; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35035; BITALG_NOVLX-NEXT:    vzeroupper
35036; BITALG_NOVLX-NEXT:    retq
35037;
35038; BITALG-LABEL: ult_50_v2i64:
35039; BITALG:       # %bb.0:
35040; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
35041; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35042; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35043; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
35044; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35045; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35046; BITALG-NEXT:    retq
35047  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35048  %3 = icmp ult <2 x i64> %2, <i64 50, i64 50>
35049  %4 = sext <2 x i1> %3 to <2 x i64>
35050  ret <2 x i64> %4
35051}
35052
35053define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
35054; SSE2-LABEL: ugt_50_v2i64:
35055; SSE2:       # %bb.0:
35056; SSE2-NEXT:    movdqa %xmm0, %xmm1
35057; SSE2-NEXT:    psrlw $1, %xmm1
35058; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35059; SSE2-NEXT:    psubb %xmm1, %xmm0
35060; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35061; SSE2-NEXT:    movdqa %xmm0, %xmm2
35062; SSE2-NEXT:    pand %xmm1, %xmm2
35063; SSE2-NEXT:    psrlw $2, %xmm0
35064; SSE2-NEXT:    pand %xmm1, %xmm0
35065; SSE2-NEXT:    paddb %xmm2, %xmm0
35066; SSE2-NEXT:    movdqa %xmm0, %xmm1
35067; SSE2-NEXT:    psrlw $4, %xmm1
35068; SSE2-NEXT:    paddb %xmm0, %xmm1
35069; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35070; SSE2-NEXT:    pxor %xmm0, %xmm0
35071; SSE2-NEXT:    psadbw %xmm1, %xmm0
35072; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
35073; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
35074; SSE2-NEXT:    movdqa %xmm0, %xmm2
35075; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
35076; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35077; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
35078; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35079; SSE2-NEXT:    pand %xmm3, %xmm1
35080; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35081; SSE2-NEXT:    por %xmm1, %xmm0
35082; SSE2-NEXT:    retq
35083;
35084; SSE3-LABEL: ugt_50_v2i64:
35085; SSE3:       # %bb.0:
35086; SSE3-NEXT:    movdqa %xmm0, %xmm1
35087; SSE3-NEXT:    psrlw $1, %xmm1
35088; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35089; SSE3-NEXT:    psubb %xmm1, %xmm0
35090; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35091; SSE3-NEXT:    movdqa %xmm0, %xmm2
35092; SSE3-NEXT:    pand %xmm1, %xmm2
35093; SSE3-NEXT:    psrlw $2, %xmm0
35094; SSE3-NEXT:    pand %xmm1, %xmm0
35095; SSE3-NEXT:    paddb %xmm2, %xmm0
35096; SSE3-NEXT:    movdqa %xmm0, %xmm1
35097; SSE3-NEXT:    psrlw $4, %xmm1
35098; SSE3-NEXT:    paddb %xmm0, %xmm1
35099; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35100; SSE3-NEXT:    pxor %xmm0, %xmm0
35101; SSE3-NEXT:    psadbw %xmm1, %xmm0
35102; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
35103; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
35104; SSE3-NEXT:    movdqa %xmm0, %xmm2
35105; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
35106; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35107; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35108; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35109; SSE3-NEXT:    pand %xmm3, %xmm1
35110; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35111; SSE3-NEXT:    por %xmm1, %xmm0
35112; SSE3-NEXT:    retq
35113;
35114; SSSE3-LABEL: ugt_50_v2i64:
35115; SSSE3:       # %bb.0:
35116; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35117; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35118; SSSE3-NEXT:    pand %xmm1, %xmm2
35119; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35120; SSSE3-NEXT:    movdqa %xmm3, %xmm4
35121; SSSE3-NEXT:    pshufb %xmm2, %xmm4
35122; SSSE3-NEXT:    psrlw $4, %xmm0
35123; SSSE3-NEXT:    pand %xmm1, %xmm0
35124; SSSE3-NEXT:    pshufb %xmm0, %xmm3
35125; SSSE3-NEXT:    paddb %xmm4, %xmm3
35126; SSSE3-NEXT:    pxor %xmm0, %xmm0
35127; SSSE3-NEXT:    psadbw %xmm3, %xmm0
35128; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
35129; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
35130; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35131; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
35132; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35133; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35134; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35135; SSSE3-NEXT:    pand %xmm3, %xmm1
35136; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35137; SSSE3-NEXT:    por %xmm1, %xmm0
35138; SSSE3-NEXT:    retq
35139;
35140; SSE41-LABEL: ugt_50_v2i64:
35141; SSE41:       # %bb.0:
35142; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35143; SSE41-NEXT:    movdqa %xmm0, %xmm2
35144; SSE41-NEXT:    pand %xmm1, %xmm2
35145; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35146; SSE41-NEXT:    movdqa %xmm3, %xmm4
35147; SSE41-NEXT:    pshufb %xmm2, %xmm4
35148; SSE41-NEXT:    psrlw $4, %xmm0
35149; SSE41-NEXT:    pand %xmm1, %xmm0
35150; SSE41-NEXT:    pshufb %xmm0, %xmm3
35151; SSE41-NEXT:    paddb %xmm4, %xmm3
35152; SSE41-NEXT:    pxor %xmm0, %xmm0
35153; SSE41-NEXT:    psadbw %xmm3, %xmm0
35154; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
35155; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483698,2147483698]
35156; SSE41-NEXT:    movdqa %xmm0, %xmm2
35157; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
35158; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35159; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
35160; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35161; SSE41-NEXT:    pand %xmm3, %xmm1
35162; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35163; SSE41-NEXT:    por %xmm1, %xmm0
35164; SSE41-NEXT:    retq
35165;
35166; AVX1-LABEL: ugt_50_v2i64:
35167; AVX1:       # %bb.0:
35168; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35169; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
35170; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35171; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35172; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
35173; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35174; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35175; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35176; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35177; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35178; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35179; AVX1-NEXT:    retq
35180;
35181; AVX2-LABEL: ugt_50_v2i64:
35182; AVX2:       # %bb.0:
35183; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35184; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
35185; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35186; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35187; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
35188; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
35189; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35190; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35191; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35192; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35193; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35194; AVX2-NEXT:    retq
35195;
35196; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64:
35197; AVX512VPOPCNTDQ:       # %bb.0:
35198; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35199; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
35200; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35201; AVX512VPOPCNTDQ-NEXT:    vzeroupper
35202; AVX512VPOPCNTDQ-NEXT:    retq
35203;
35204; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64:
35205; AVX512VPOPCNTDQVL:       # %bb.0:
35206; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
35207; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
35208; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35209; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35210; AVX512VPOPCNTDQVL-NEXT:    retq
35211;
35212; BITALG_NOVLX-LABEL: ugt_50_v2i64:
35213; BITALG_NOVLX:       # %bb.0:
35214; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35215; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
35216; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35217; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35218; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35219; BITALG_NOVLX-NEXT:    vzeroupper
35220; BITALG_NOVLX-NEXT:    retq
35221;
35222; BITALG-LABEL: ugt_50_v2i64:
35223; BITALG:       # %bb.0:
35224; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
35225; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35226; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35227; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
35228; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35229; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35230; BITALG-NEXT:    retq
35231  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35232  %3 = icmp ugt <2 x i64> %2, <i64 50, i64 50>
35233  %4 = sext <2 x i1> %3 to <2 x i64>
35234  ret <2 x i64> %4
35235}
35236
35237define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
35238; SSE2-LABEL: ult_51_v2i64:
35239; SSE2:       # %bb.0:
35240; SSE2-NEXT:    movdqa %xmm0, %xmm1
35241; SSE2-NEXT:    psrlw $1, %xmm1
35242; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35243; SSE2-NEXT:    psubb %xmm1, %xmm0
35244; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35245; SSE2-NEXT:    movdqa %xmm0, %xmm2
35246; SSE2-NEXT:    pand %xmm1, %xmm2
35247; SSE2-NEXT:    psrlw $2, %xmm0
35248; SSE2-NEXT:    pand %xmm1, %xmm0
35249; SSE2-NEXT:    paddb %xmm2, %xmm0
35250; SSE2-NEXT:    movdqa %xmm0, %xmm1
35251; SSE2-NEXT:    psrlw $4, %xmm1
35252; SSE2-NEXT:    paddb %xmm0, %xmm1
35253; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35254; SSE2-NEXT:    pxor %xmm0, %xmm0
35255; SSE2-NEXT:    psadbw %xmm1, %xmm0
35256; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
35257; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35258; SSE2-NEXT:    movdqa %xmm1, %xmm2
35259; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
35260; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35261; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
35262; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35263; SSE2-NEXT:    pand %xmm3, %xmm1
35264; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35265; SSE2-NEXT:    por %xmm1, %xmm0
35266; SSE2-NEXT:    retq
35267;
35268; SSE3-LABEL: ult_51_v2i64:
35269; SSE3:       # %bb.0:
35270; SSE3-NEXT:    movdqa %xmm0, %xmm1
35271; SSE3-NEXT:    psrlw $1, %xmm1
35272; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35273; SSE3-NEXT:    psubb %xmm1, %xmm0
35274; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35275; SSE3-NEXT:    movdqa %xmm0, %xmm2
35276; SSE3-NEXT:    pand %xmm1, %xmm2
35277; SSE3-NEXT:    psrlw $2, %xmm0
35278; SSE3-NEXT:    pand %xmm1, %xmm0
35279; SSE3-NEXT:    paddb %xmm2, %xmm0
35280; SSE3-NEXT:    movdqa %xmm0, %xmm1
35281; SSE3-NEXT:    psrlw $4, %xmm1
35282; SSE3-NEXT:    paddb %xmm0, %xmm1
35283; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35284; SSE3-NEXT:    pxor %xmm0, %xmm0
35285; SSE3-NEXT:    psadbw %xmm1, %xmm0
35286; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
35287; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35288; SSE3-NEXT:    movdqa %xmm1, %xmm2
35289; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
35290; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35291; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35292; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35293; SSE3-NEXT:    pand %xmm3, %xmm1
35294; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35295; SSE3-NEXT:    por %xmm1, %xmm0
35296; SSE3-NEXT:    retq
35297;
35298; SSSE3-LABEL: ult_51_v2i64:
35299; SSSE3:       # %bb.0:
35300; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35301; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35302; SSSE3-NEXT:    pand %xmm1, %xmm2
35303; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35304; SSSE3-NEXT:    movdqa %xmm3, %xmm4
35305; SSSE3-NEXT:    pshufb %xmm2, %xmm4
35306; SSSE3-NEXT:    psrlw $4, %xmm0
35307; SSSE3-NEXT:    pand %xmm1, %xmm0
35308; SSSE3-NEXT:    pshufb %xmm0, %xmm3
35309; SSSE3-NEXT:    paddb %xmm4, %xmm3
35310; SSSE3-NEXT:    pxor %xmm0, %xmm0
35311; SSSE3-NEXT:    psadbw %xmm3, %xmm0
35312; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
35313; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35314; SSSE3-NEXT:    movdqa %xmm1, %xmm2
35315; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
35316; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35317; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35318; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35319; SSSE3-NEXT:    pand %xmm3, %xmm1
35320; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35321; SSSE3-NEXT:    por %xmm1, %xmm0
35322; SSSE3-NEXT:    retq
35323;
35324; SSE41-LABEL: ult_51_v2i64:
35325; SSE41:       # %bb.0:
35326; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35327; SSE41-NEXT:    movdqa %xmm0, %xmm2
35328; SSE41-NEXT:    pand %xmm1, %xmm2
35329; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35330; SSE41-NEXT:    movdqa %xmm3, %xmm4
35331; SSE41-NEXT:    pshufb %xmm2, %xmm4
35332; SSE41-NEXT:    psrlw $4, %xmm0
35333; SSE41-NEXT:    pand %xmm1, %xmm0
35334; SSE41-NEXT:    pshufb %xmm0, %xmm3
35335; SSE41-NEXT:    paddb %xmm4, %xmm3
35336; SSE41-NEXT:    pxor %xmm0, %xmm0
35337; SSE41-NEXT:    psadbw %xmm3, %xmm0
35338; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
35339; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35340; SSE41-NEXT:    movdqa %xmm1, %xmm2
35341; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
35342; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35343; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
35344; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35345; SSE41-NEXT:    pand %xmm3, %xmm1
35346; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35347; SSE41-NEXT:    por %xmm1, %xmm0
35348; SSE41-NEXT:    retq
35349;
35350; AVX1-LABEL: ult_51_v2i64:
35351; AVX1:       # %bb.0:
35352; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35353; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
35354; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35355; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35356; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
35357; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35358; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35359; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35360; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35361; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35362; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [51,51]
35363; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35364; AVX1-NEXT:    retq
35365;
35366; AVX2-LABEL: ult_51_v2i64:
35367; AVX2:       # %bb.0:
35368; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35369; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
35370; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35371; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35372; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
35373; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
35374; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35375; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35376; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35377; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35378; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [51,51]
35379; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35380; AVX2-NEXT:    retq
35381;
35382; AVX512VPOPCNTDQ-LABEL: ult_51_v2i64:
35383; AVX512VPOPCNTDQ:       # %bb.0:
35384; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35385; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
35386; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [51,51]
35387; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35388; AVX512VPOPCNTDQ-NEXT:    vzeroupper
35389; AVX512VPOPCNTDQ-NEXT:    retq
35390;
35391; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64:
35392; AVX512VPOPCNTDQVL:       # %bb.0:
35393; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
35394; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
35395; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35396; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35397; AVX512VPOPCNTDQVL-NEXT:    retq
35398;
35399; BITALG_NOVLX-LABEL: ult_51_v2i64:
35400; BITALG_NOVLX:       # %bb.0:
35401; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35402; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
35403; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35404; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35405; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [51,51]
35406; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35407; BITALG_NOVLX-NEXT:    vzeroupper
35408; BITALG_NOVLX-NEXT:    retq
35409;
35410; BITALG-LABEL: ult_51_v2i64:
35411; BITALG:       # %bb.0:
35412; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
35413; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35414; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35415; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
35416; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35417; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35418; BITALG-NEXT:    retq
35419  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35420  %3 = icmp ult <2 x i64> %2, <i64 51, i64 51>
35421  %4 = sext <2 x i1> %3 to <2 x i64>
35422  ret <2 x i64> %4
35423}
35424
35425define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
35426; SSE2-LABEL: ugt_51_v2i64:
35427; SSE2:       # %bb.0:
35428; SSE2-NEXT:    movdqa %xmm0, %xmm1
35429; SSE2-NEXT:    psrlw $1, %xmm1
35430; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35431; SSE2-NEXT:    psubb %xmm1, %xmm0
35432; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35433; SSE2-NEXT:    movdqa %xmm0, %xmm2
35434; SSE2-NEXT:    pand %xmm1, %xmm2
35435; SSE2-NEXT:    psrlw $2, %xmm0
35436; SSE2-NEXT:    pand %xmm1, %xmm0
35437; SSE2-NEXT:    paddb %xmm2, %xmm0
35438; SSE2-NEXT:    movdqa %xmm0, %xmm1
35439; SSE2-NEXT:    psrlw $4, %xmm1
35440; SSE2-NEXT:    paddb %xmm0, %xmm1
35441; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35442; SSE2-NEXT:    pxor %xmm0, %xmm0
35443; SSE2-NEXT:    psadbw %xmm1, %xmm0
35444; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
35445; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35446; SSE2-NEXT:    movdqa %xmm0, %xmm2
35447; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
35448; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35449; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
35450; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35451; SSE2-NEXT:    pand %xmm3, %xmm1
35452; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35453; SSE2-NEXT:    por %xmm1, %xmm0
35454; SSE2-NEXT:    retq
35455;
35456; SSE3-LABEL: ugt_51_v2i64:
35457; SSE3:       # %bb.0:
35458; SSE3-NEXT:    movdqa %xmm0, %xmm1
35459; SSE3-NEXT:    psrlw $1, %xmm1
35460; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35461; SSE3-NEXT:    psubb %xmm1, %xmm0
35462; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35463; SSE3-NEXT:    movdqa %xmm0, %xmm2
35464; SSE3-NEXT:    pand %xmm1, %xmm2
35465; SSE3-NEXT:    psrlw $2, %xmm0
35466; SSE3-NEXT:    pand %xmm1, %xmm0
35467; SSE3-NEXT:    paddb %xmm2, %xmm0
35468; SSE3-NEXT:    movdqa %xmm0, %xmm1
35469; SSE3-NEXT:    psrlw $4, %xmm1
35470; SSE3-NEXT:    paddb %xmm0, %xmm1
35471; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35472; SSE3-NEXT:    pxor %xmm0, %xmm0
35473; SSE3-NEXT:    psadbw %xmm1, %xmm0
35474; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
35475; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35476; SSE3-NEXT:    movdqa %xmm0, %xmm2
35477; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
35478; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35479; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35480; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35481; SSE3-NEXT:    pand %xmm3, %xmm1
35482; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35483; SSE3-NEXT:    por %xmm1, %xmm0
35484; SSE3-NEXT:    retq
35485;
35486; SSSE3-LABEL: ugt_51_v2i64:
35487; SSSE3:       # %bb.0:
35488; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35489; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35490; SSSE3-NEXT:    pand %xmm1, %xmm2
35491; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35492; SSSE3-NEXT:    movdqa %xmm3, %xmm4
35493; SSSE3-NEXT:    pshufb %xmm2, %xmm4
35494; SSSE3-NEXT:    psrlw $4, %xmm0
35495; SSSE3-NEXT:    pand %xmm1, %xmm0
35496; SSSE3-NEXT:    pshufb %xmm0, %xmm3
35497; SSSE3-NEXT:    paddb %xmm4, %xmm3
35498; SSSE3-NEXT:    pxor %xmm0, %xmm0
35499; SSSE3-NEXT:    psadbw %xmm3, %xmm0
35500; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
35501; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35502; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35503; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
35504; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35505; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35506; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35507; SSSE3-NEXT:    pand %xmm3, %xmm1
35508; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35509; SSSE3-NEXT:    por %xmm1, %xmm0
35510; SSSE3-NEXT:    retq
35511;
35512; SSE41-LABEL: ugt_51_v2i64:
35513; SSE41:       # %bb.0:
35514; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35515; SSE41-NEXT:    movdqa %xmm0, %xmm2
35516; SSE41-NEXT:    pand %xmm1, %xmm2
35517; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35518; SSE41-NEXT:    movdqa %xmm3, %xmm4
35519; SSE41-NEXT:    pshufb %xmm2, %xmm4
35520; SSE41-NEXT:    psrlw $4, %xmm0
35521; SSE41-NEXT:    pand %xmm1, %xmm0
35522; SSE41-NEXT:    pshufb %xmm0, %xmm3
35523; SSE41-NEXT:    paddb %xmm4, %xmm3
35524; SSE41-NEXT:    pxor %xmm0, %xmm0
35525; SSE41-NEXT:    psadbw %xmm3, %xmm0
35526; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
35527; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483699,2147483699]
35528; SSE41-NEXT:    movdqa %xmm0, %xmm2
35529; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
35530; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35531; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
35532; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35533; SSE41-NEXT:    pand %xmm3, %xmm1
35534; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35535; SSE41-NEXT:    por %xmm1, %xmm0
35536; SSE41-NEXT:    retq
35537;
35538; AVX1-LABEL: ugt_51_v2i64:
35539; AVX1:       # %bb.0:
35540; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35541; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
35542; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35543; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35544; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
35545; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35546; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35547; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35548; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35549; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35550; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35551; AVX1-NEXT:    retq
35552;
35553; AVX2-LABEL: ugt_51_v2i64:
35554; AVX2:       # %bb.0:
35555; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35556; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
35557; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35558; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35559; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
35560; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
35561; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35562; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35563; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35564; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35565; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35566; AVX2-NEXT:    retq
35567;
35568; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64:
35569; AVX512VPOPCNTDQ:       # %bb.0:
35570; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35571; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
35572; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35573; AVX512VPOPCNTDQ-NEXT:    vzeroupper
35574; AVX512VPOPCNTDQ-NEXT:    retq
35575;
35576; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64:
35577; AVX512VPOPCNTDQVL:       # %bb.0:
35578; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
35579; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
35580; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35581; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35582; AVX512VPOPCNTDQVL-NEXT:    retq
35583;
35584; BITALG_NOVLX-LABEL: ugt_51_v2i64:
35585; BITALG_NOVLX:       # %bb.0:
35586; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35587; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
35588; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35589; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35590; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35591; BITALG_NOVLX-NEXT:    vzeroupper
35592; BITALG_NOVLX-NEXT:    retq
35593;
35594; BITALG-LABEL: ugt_51_v2i64:
35595; BITALG:       # %bb.0:
35596; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
35597; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35598; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35599; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
35600; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35601; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35602; BITALG-NEXT:    retq
35603  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35604  %3 = icmp ugt <2 x i64> %2, <i64 51, i64 51>
35605  %4 = sext <2 x i1> %3 to <2 x i64>
35606  ret <2 x i64> %4
35607}
35608
35609define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
35610; SSE2-LABEL: ult_52_v2i64:
35611; SSE2:       # %bb.0:
35612; SSE2-NEXT:    movdqa %xmm0, %xmm1
35613; SSE2-NEXT:    psrlw $1, %xmm1
35614; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35615; SSE2-NEXT:    psubb %xmm1, %xmm0
35616; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35617; SSE2-NEXT:    movdqa %xmm0, %xmm2
35618; SSE2-NEXT:    pand %xmm1, %xmm2
35619; SSE2-NEXT:    psrlw $2, %xmm0
35620; SSE2-NEXT:    pand %xmm1, %xmm0
35621; SSE2-NEXT:    paddb %xmm2, %xmm0
35622; SSE2-NEXT:    movdqa %xmm0, %xmm1
35623; SSE2-NEXT:    psrlw $4, %xmm1
35624; SSE2-NEXT:    paddb %xmm0, %xmm1
35625; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35626; SSE2-NEXT:    pxor %xmm0, %xmm0
35627; SSE2-NEXT:    psadbw %xmm1, %xmm0
35628; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
35629; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35630; SSE2-NEXT:    movdqa %xmm1, %xmm2
35631; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
35632; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35633; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
35634; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35635; SSE2-NEXT:    pand %xmm3, %xmm1
35636; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35637; SSE2-NEXT:    por %xmm1, %xmm0
35638; SSE2-NEXT:    retq
35639;
35640; SSE3-LABEL: ult_52_v2i64:
35641; SSE3:       # %bb.0:
35642; SSE3-NEXT:    movdqa %xmm0, %xmm1
35643; SSE3-NEXT:    psrlw $1, %xmm1
35644; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35645; SSE3-NEXT:    psubb %xmm1, %xmm0
35646; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35647; SSE3-NEXT:    movdqa %xmm0, %xmm2
35648; SSE3-NEXT:    pand %xmm1, %xmm2
35649; SSE3-NEXT:    psrlw $2, %xmm0
35650; SSE3-NEXT:    pand %xmm1, %xmm0
35651; SSE3-NEXT:    paddb %xmm2, %xmm0
35652; SSE3-NEXT:    movdqa %xmm0, %xmm1
35653; SSE3-NEXT:    psrlw $4, %xmm1
35654; SSE3-NEXT:    paddb %xmm0, %xmm1
35655; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35656; SSE3-NEXT:    pxor %xmm0, %xmm0
35657; SSE3-NEXT:    psadbw %xmm1, %xmm0
35658; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
35659; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35660; SSE3-NEXT:    movdqa %xmm1, %xmm2
35661; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
35662; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35663; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35664; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35665; SSE3-NEXT:    pand %xmm3, %xmm1
35666; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35667; SSE3-NEXT:    por %xmm1, %xmm0
35668; SSE3-NEXT:    retq
35669;
35670; SSSE3-LABEL: ult_52_v2i64:
35671; SSSE3:       # %bb.0:
35672; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35673; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35674; SSSE3-NEXT:    pand %xmm1, %xmm2
35675; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35676; SSSE3-NEXT:    movdqa %xmm3, %xmm4
35677; SSSE3-NEXT:    pshufb %xmm2, %xmm4
35678; SSSE3-NEXT:    psrlw $4, %xmm0
35679; SSSE3-NEXT:    pand %xmm1, %xmm0
35680; SSSE3-NEXT:    pshufb %xmm0, %xmm3
35681; SSSE3-NEXT:    paddb %xmm4, %xmm3
35682; SSSE3-NEXT:    pxor %xmm0, %xmm0
35683; SSSE3-NEXT:    psadbw %xmm3, %xmm0
35684; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
35685; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35686; SSSE3-NEXT:    movdqa %xmm1, %xmm2
35687; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
35688; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35689; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35690; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35691; SSSE3-NEXT:    pand %xmm3, %xmm1
35692; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35693; SSSE3-NEXT:    por %xmm1, %xmm0
35694; SSSE3-NEXT:    retq
35695;
35696; SSE41-LABEL: ult_52_v2i64:
35697; SSE41:       # %bb.0:
35698; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35699; SSE41-NEXT:    movdqa %xmm0, %xmm2
35700; SSE41-NEXT:    pand %xmm1, %xmm2
35701; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35702; SSE41-NEXT:    movdqa %xmm3, %xmm4
35703; SSE41-NEXT:    pshufb %xmm2, %xmm4
35704; SSE41-NEXT:    psrlw $4, %xmm0
35705; SSE41-NEXT:    pand %xmm1, %xmm0
35706; SSE41-NEXT:    pshufb %xmm0, %xmm3
35707; SSE41-NEXT:    paddb %xmm4, %xmm3
35708; SSE41-NEXT:    pxor %xmm0, %xmm0
35709; SSE41-NEXT:    psadbw %xmm3, %xmm0
35710; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
35711; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35712; SSE41-NEXT:    movdqa %xmm1, %xmm2
35713; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
35714; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35715; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
35716; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35717; SSE41-NEXT:    pand %xmm3, %xmm1
35718; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35719; SSE41-NEXT:    por %xmm1, %xmm0
35720; SSE41-NEXT:    retq
35721;
35722; AVX1-LABEL: ult_52_v2i64:
35723; AVX1:       # %bb.0:
35724; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35725; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
35726; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35727; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35728; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
35729; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35730; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35731; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35732; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35733; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35734; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [52,52]
35735; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35736; AVX1-NEXT:    retq
35737;
35738; AVX2-LABEL: ult_52_v2i64:
35739; AVX2:       # %bb.0:
35740; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35741; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
35742; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35743; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35744; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
35745; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
35746; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35747; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35748; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35749; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35750; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [52,52]
35751; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35752; AVX2-NEXT:    retq
35753;
35754; AVX512VPOPCNTDQ-LABEL: ult_52_v2i64:
35755; AVX512VPOPCNTDQ:       # %bb.0:
35756; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35757; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
35758; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [52,52]
35759; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35760; AVX512VPOPCNTDQ-NEXT:    vzeroupper
35761; AVX512VPOPCNTDQ-NEXT:    retq
35762;
35763; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64:
35764; AVX512VPOPCNTDQVL:       # %bb.0:
35765; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
35766; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
35767; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35768; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35769; AVX512VPOPCNTDQVL-NEXT:    retq
35770;
35771; BITALG_NOVLX-LABEL: ult_52_v2i64:
35772; BITALG_NOVLX:       # %bb.0:
35773; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35774; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
35775; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35776; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35777; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [52,52]
35778; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
35779; BITALG_NOVLX-NEXT:    vzeroupper
35780; BITALG_NOVLX-NEXT:    retq
35781;
35782; BITALG-LABEL: ult_52_v2i64:
35783; BITALG:       # %bb.0:
35784; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
35785; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35786; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35787; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
35788; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35789; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35790; BITALG-NEXT:    retq
35791  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35792  %3 = icmp ult <2 x i64> %2, <i64 52, i64 52>
35793  %4 = sext <2 x i1> %3 to <2 x i64>
35794  ret <2 x i64> %4
35795}
35796
35797define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
35798; SSE2-LABEL: ugt_52_v2i64:
35799; SSE2:       # %bb.0:
35800; SSE2-NEXT:    movdqa %xmm0, %xmm1
35801; SSE2-NEXT:    psrlw $1, %xmm1
35802; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35803; SSE2-NEXT:    psubb %xmm1, %xmm0
35804; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35805; SSE2-NEXT:    movdqa %xmm0, %xmm2
35806; SSE2-NEXT:    pand %xmm1, %xmm2
35807; SSE2-NEXT:    psrlw $2, %xmm0
35808; SSE2-NEXT:    pand %xmm1, %xmm0
35809; SSE2-NEXT:    paddb %xmm2, %xmm0
35810; SSE2-NEXT:    movdqa %xmm0, %xmm1
35811; SSE2-NEXT:    psrlw $4, %xmm1
35812; SSE2-NEXT:    paddb %xmm0, %xmm1
35813; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35814; SSE2-NEXT:    pxor %xmm0, %xmm0
35815; SSE2-NEXT:    psadbw %xmm1, %xmm0
35816; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
35817; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35818; SSE2-NEXT:    movdqa %xmm0, %xmm2
35819; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
35820; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35821; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
35822; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35823; SSE2-NEXT:    pand %xmm3, %xmm1
35824; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35825; SSE2-NEXT:    por %xmm1, %xmm0
35826; SSE2-NEXT:    retq
35827;
35828; SSE3-LABEL: ugt_52_v2i64:
35829; SSE3:       # %bb.0:
35830; SSE3-NEXT:    movdqa %xmm0, %xmm1
35831; SSE3-NEXT:    psrlw $1, %xmm1
35832; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35833; SSE3-NEXT:    psubb %xmm1, %xmm0
35834; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35835; SSE3-NEXT:    movdqa %xmm0, %xmm2
35836; SSE3-NEXT:    pand %xmm1, %xmm2
35837; SSE3-NEXT:    psrlw $2, %xmm0
35838; SSE3-NEXT:    pand %xmm1, %xmm0
35839; SSE3-NEXT:    paddb %xmm2, %xmm0
35840; SSE3-NEXT:    movdqa %xmm0, %xmm1
35841; SSE3-NEXT:    psrlw $4, %xmm1
35842; SSE3-NEXT:    paddb %xmm0, %xmm1
35843; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
35844; SSE3-NEXT:    pxor %xmm0, %xmm0
35845; SSE3-NEXT:    psadbw %xmm1, %xmm0
35846; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
35847; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35848; SSE3-NEXT:    movdqa %xmm0, %xmm2
35849; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
35850; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35851; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35852; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35853; SSE3-NEXT:    pand %xmm3, %xmm1
35854; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35855; SSE3-NEXT:    por %xmm1, %xmm0
35856; SSE3-NEXT:    retq
35857;
35858; SSSE3-LABEL: ugt_52_v2i64:
35859; SSSE3:       # %bb.0:
35860; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35861; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35862; SSSE3-NEXT:    pand %xmm1, %xmm2
35863; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35864; SSSE3-NEXT:    movdqa %xmm3, %xmm4
35865; SSSE3-NEXT:    pshufb %xmm2, %xmm4
35866; SSSE3-NEXT:    psrlw $4, %xmm0
35867; SSSE3-NEXT:    pand %xmm1, %xmm0
35868; SSSE3-NEXT:    pshufb %xmm0, %xmm3
35869; SSSE3-NEXT:    paddb %xmm4, %xmm3
35870; SSSE3-NEXT:    pxor %xmm0, %xmm0
35871; SSSE3-NEXT:    psadbw %xmm3, %xmm0
35872; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
35873; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35874; SSSE3-NEXT:    movdqa %xmm0, %xmm2
35875; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
35876; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35877; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
35878; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35879; SSSE3-NEXT:    pand %xmm3, %xmm1
35880; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35881; SSSE3-NEXT:    por %xmm1, %xmm0
35882; SSSE3-NEXT:    retq
35883;
35884; SSE41-LABEL: ugt_52_v2i64:
35885; SSE41:       # %bb.0:
35886; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35887; SSE41-NEXT:    movdqa %xmm0, %xmm2
35888; SSE41-NEXT:    pand %xmm1, %xmm2
35889; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35890; SSE41-NEXT:    movdqa %xmm3, %xmm4
35891; SSE41-NEXT:    pshufb %xmm2, %xmm4
35892; SSE41-NEXT:    psrlw $4, %xmm0
35893; SSE41-NEXT:    pand %xmm1, %xmm0
35894; SSE41-NEXT:    pshufb %xmm0, %xmm3
35895; SSE41-NEXT:    paddb %xmm4, %xmm3
35896; SSE41-NEXT:    pxor %xmm0, %xmm0
35897; SSE41-NEXT:    psadbw %xmm3, %xmm0
35898; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
35899; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483700,2147483700]
35900; SSE41-NEXT:    movdqa %xmm0, %xmm2
35901; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
35902; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
35903; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
35904; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
35905; SSE41-NEXT:    pand %xmm3, %xmm1
35906; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
35907; SSE41-NEXT:    por %xmm1, %xmm0
35908; SSE41-NEXT:    retq
35909;
35910; AVX1-LABEL: ugt_52_v2i64:
35911; AVX1:       # %bb.0:
35912; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35913; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
35914; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35915; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35916; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
35917; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
35918; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35919; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35920; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35921; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35922; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35923; AVX1-NEXT:    retq
35924;
35925; AVX2-LABEL: ugt_52_v2i64:
35926; AVX2:       # %bb.0:
35927; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35928; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
35929; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35930; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
35931; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
35932; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
35933; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
35934; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
35935; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35936; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35937; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35938; AVX2-NEXT:    retq
35939;
35940; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64:
35941; AVX512VPOPCNTDQ:       # %bb.0:
35942; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35943; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
35944; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35945; AVX512VPOPCNTDQ-NEXT:    vzeroupper
35946; AVX512VPOPCNTDQ-NEXT:    retq
35947;
35948; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64:
35949; AVX512VPOPCNTDQVL:       # %bb.0:
35950; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
35951; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
35952; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35953; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35954; AVX512VPOPCNTDQVL-NEXT:    retq
35955;
35956; BITALG_NOVLX-LABEL: ugt_52_v2i64:
35957; BITALG_NOVLX:       # %bb.0:
35958; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
35959; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
35960; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35961; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35962; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
35963; BITALG_NOVLX-NEXT:    vzeroupper
35964; BITALG_NOVLX-NEXT:    retq
35965;
35966; BITALG-LABEL: ugt_52_v2i64:
35967; BITALG:       # %bb.0:
35968; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
35969; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
35970; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
35971; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
35972; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
35973; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35974; BITALG-NEXT:    retq
35975  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35976  %3 = icmp ugt <2 x i64> %2, <i64 52, i64 52>
35977  %4 = sext <2 x i1> %3 to <2 x i64>
35978  ret <2 x i64> %4
35979}
35980
35981define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
35982; SSE2-LABEL: ult_53_v2i64:
35983; SSE2:       # %bb.0:
35984; SSE2-NEXT:    movdqa %xmm0, %xmm1
35985; SSE2-NEXT:    psrlw $1, %xmm1
35986; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35987; SSE2-NEXT:    psubb %xmm1, %xmm0
35988; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35989; SSE2-NEXT:    movdqa %xmm0, %xmm2
35990; SSE2-NEXT:    pand %xmm1, %xmm2
35991; SSE2-NEXT:    psrlw $2, %xmm0
35992; SSE2-NEXT:    pand %xmm1, %xmm0
35993; SSE2-NEXT:    paddb %xmm2, %xmm0
35994; SSE2-NEXT:    movdqa %xmm0, %xmm1
35995; SSE2-NEXT:    psrlw $4, %xmm1
35996; SSE2-NEXT:    paddb %xmm0, %xmm1
35997; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
35998; SSE2-NEXT:    pxor %xmm0, %xmm0
35999; SSE2-NEXT:    psadbw %xmm1, %xmm0
36000; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36001; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36002; SSE2-NEXT:    movdqa %xmm1, %xmm2
36003; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
36004; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36005; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
36006; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36007; SSE2-NEXT:    pand %xmm3, %xmm1
36008; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36009; SSE2-NEXT:    por %xmm1, %xmm0
36010; SSE2-NEXT:    retq
36011;
36012; SSE3-LABEL: ult_53_v2i64:
36013; SSE3:       # %bb.0:
36014; SSE3-NEXT:    movdqa %xmm0, %xmm1
36015; SSE3-NEXT:    psrlw $1, %xmm1
36016; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36017; SSE3-NEXT:    psubb %xmm1, %xmm0
36018; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36019; SSE3-NEXT:    movdqa %xmm0, %xmm2
36020; SSE3-NEXT:    pand %xmm1, %xmm2
36021; SSE3-NEXT:    psrlw $2, %xmm0
36022; SSE3-NEXT:    pand %xmm1, %xmm0
36023; SSE3-NEXT:    paddb %xmm2, %xmm0
36024; SSE3-NEXT:    movdqa %xmm0, %xmm1
36025; SSE3-NEXT:    psrlw $4, %xmm1
36026; SSE3-NEXT:    paddb %xmm0, %xmm1
36027; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36028; SSE3-NEXT:    pxor %xmm0, %xmm0
36029; SSE3-NEXT:    psadbw %xmm1, %xmm0
36030; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
36031; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36032; SSE3-NEXT:    movdqa %xmm1, %xmm2
36033; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
36034; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36035; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36036; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36037; SSE3-NEXT:    pand %xmm3, %xmm1
36038; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36039; SSE3-NEXT:    por %xmm1, %xmm0
36040; SSE3-NEXT:    retq
36041;
36042; SSSE3-LABEL: ult_53_v2i64:
36043; SSSE3:       # %bb.0:
36044; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36045; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36046; SSSE3-NEXT:    pand %xmm1, %xmm2
36047; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36048; SSSE3-NEXT:    movdqa %xmm3, %xmm4
36049; SSSE3-NEXT:    pshufb %xmm2, %xmm4
36050; SSSE3-NEXT:    psrlw $4, %xmm0
36051; SSSE3-NEXT:    pand %xmm1, %xmm0
36052; SSSE3-NEXT:    pshufb %xmm0, %xmm3
36053; SSSE3-NEXT:    paddb %xmm4, %xmm3
36054; SSSE3-NEXT:    pxor %xmm0, %xmm0
36055; SSSE3-NEXT:    psadbw %xmm3, %xmm0
36056; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
36057; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36058; SSSE3-NEXT:    movdqa %xmm1, %xmm2
36059; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
36060; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36061; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36062; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36063; SSSE3-NEXT:    pand %xmm3, %xmm1
36064; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36065; SSSE3-NEXT:    por %xmm1, %xmm0
36066; SSSE3-NEXT:    retq
36067;
36068; SSE41-LABEL: ult_53_v2i64:
36069; SSE41:       # %bb.0:
36070; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36071; SSE41-NEXT:    movdqa %xmm0, %xmm2
36072; SSE41-NEXT:    pand %xmm1, %xmm2
36073; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36074; SSE41-NEXT:    movdqa %xmm3, %xmm4
36075; SSE41-NEXT:    pshufb %xmm2, %xmm4
36076; SSE41-NEXT:    psrlw $4, %xmm0
36077; SSE41-NEXT:    pand %xmm1, %xmm0
36078; SSE41-NEXT:    pshufb %xmm0, %xmm3
36079; SSE41-NEXT:    paddb %xmm4, %xmm3
36080; SSE41-NEXT:    pxor %xmm0, %xmm0
36081; SSE41-NEXT:    psadbw %xmm3, %xmm0
36082; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
36083; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36084; SSE41-NEXT:    movdqa %xmm1, %xmm2
36085; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
36086; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36087; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
36088; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36089; SSE41-NEXT:    pand %xmm3, %xmm1
36090; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36091; SSE41-NEXT:    por %xmm1, %xmm0
36092; SSE41-NEXT:    retq
36093;
36094; AVX1-LABEL: ult_53_v2i64:
36095; AVX1:       # %bb.0:
36096; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36097; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
36098; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36099; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36100; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
36101; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
36102; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36103; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36104; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36105; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36106; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [53,53]
36107; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36108; AVX1-NEXT:    retq
36109;
36110; AVX2-LABEL: ult_53_v2i64:
36111; AVX2:       # %bb.0:
36112; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36113; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
36114; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36115; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36116; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
36117; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
36118; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36119; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36120; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36121; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36122; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [53,53]
36123; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36124; AVX2-NEXT:    retq
36125;
36126; AVX512VPOPCNTDQ-LABEL: ult_53_v2i64:
36127; AVX512VPOPCNTDQ:       # %bb.0:
36128; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36129; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
36130; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [53,53]
36131; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36132; AVX512VPOPCNTDQ-NEXT:    vzeroupper
36133; AVX512VPOPCNTDQ-NEXT:    retq
36134;
36135; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64:
36136; AVX512VPOPCNTDQVL:       # %bb.0:
36137; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
36138; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
36139; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36140; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36141; AVX512VPOPCNTDQVL-NEXT:    retq
36142;
36143; BITALG_NOVLX-LABEL: ult_53_v2i64:
36144; BITALG_NOVLX:       # %bb.0:
36145; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36146; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
36147; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36148; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36149; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [53,53]
36150; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36151; BITALG_NOVLX-NEXT:    vzeroupper
36152; BITALG_NOVLX-NEXT:    retq
36153;
36154; BITALG-LABEL: ult_53_v2i64:
36155; BITALG:       # %bb.0:
36156; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
36157; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36158; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36159; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
36160; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36161; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36162; BITALG-NEXT:    retq
36163  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36164  %3 = icmp ult <2 x i64> %2, <i64 53, i64 53>
36165  %4 = sext <2 x i1> %3 to <2 x i64>
36166  ret <2 x i64> %4
36167}
36168
36169define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
36170; SSE2-LABEL: ugt_53_v2i64:
36171; SSE2:       # %bb.0:
36172; SSE2-NEXT:    movdqa %xmm0, %xmm1
36173; SSE2-NEXT:    psrlw $1, %xmm1
36174; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36175; SSE2-NEXT:    psubb %xmm1, %xmm0
36176; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36177; SSE2-NEXT:    movdqa %xmm0, %xmm2
36178; SSE2-NEXT:    pand %xmm1, %xmm2
36179; SSE2-NEXT:    psrlw $2, %xmm0
36180; SSE2-NEXT:    pand %xmm1, %xmm0
36181; SSE2-NEXT:    paddb %xmm2, %xmm0
36182; SSE2-NEXT:    movdqa %xmm0, %xmm1
36183; SSE2-NEXT:    psrlw $4, %xmm1
36184; SSE2-NEXT:    paddb %xmm0, %xmm1
36185; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36186; SSE2-NEXT:    pxor %xmm0, %xmm0
36187; SSE2-NEXT:    psadbw %xmm1, %xmm0
36188; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36189; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36190; SSE2-NEXT:    movdqa %xmm0, %xmm2
36191; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
36192; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36193; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
36194; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36195; SSE2-NEXT:    pand %xmm3, %xmm1
36196; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36197; SSE2-NEXT:    por %xmm1, %xmm0
36198; SSE2-NEXT:    retq
36199;
36200; SSE3-LABEL: ugt_53_v2i64:
36201; SSE3:       # %bb.0:
36202; SSE3-NEXT:    movdqa %xmm0, %xmm1
36203; SSE3-NEXT:    psrlw $1, %xmm1
36204; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36205; SSE3-NEXT:    psubb %xmm1, %xmm0
36206; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36207; SSE3-NEXT:    movdqa %xmm0, %xmm2
36208; SSE3-NEXT:    pand %xmm1, %xmm2
36209; SSE3-NEXT:    psrlw $2, %xmm0
36210; SSE3-NEXT:    pand %xmm1, %xmm0
36211; SSE3-NEXT:    paddb %xmm2, %xmm0
36212; SSE3-NEXT:    movdqa %xmm0, %xmm1
36213; SSE3-NEXT:    psrlw $4, %xmm1
36214; SSE3-NEXT:    paddb %xmm0, %xmm1
36215; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36216; SSE3-NEXT:    pxor %xmm0, %xmm0
36217; SSE3-NEXT:    psadbw %xmm1, %xmm0
36218; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
36219; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36220; SSE3-NEXT:    movdqa %xmm0, %xmm2
36221; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
36222; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36223; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36224; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36225; SSE3-NEXT:    pand %xmm3, %xmm1
36226; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36227; SSE3-NEXT:    por %xmm1, %xmm0
36228; SSE3-NEXT:    retq
36229;
36230; SSSE3-LABEL: ugt_53_v2i64:
36231; SSSE3:       # %bb.0:
36232; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36233; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36234; SSSE3-NEXT:    pand %xmm1, %xmm2
36235; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36236; SSSE3-NEXT:    movdqa %xmm3, %xmm4
36237; SSSE3-NEXT:    pshufb %xmm2, %xmm4
36238; SSSE3-NEXT:    psrlw $4, %xmm0
36239; SSSE3-NEXT:    pand %xmm1, %xmm0
36240; SSSE3-NEXT:    pshufb %xmm0, %xmm3
36241; SSSE3-NEXT:    paddb %xmm4, %xmm3
36242; SSSE3-NEXT:    pxor %xmm0, %xmm0
36243; SSSE3-NEXT:    psadbw %xmm3, %xmm0
36244; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
36245; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36246; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36247; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
36248; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36249; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36250; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36251; SSSE3-NEXT:    pand %xmm3, %xmm1
36252; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36253; SSSE3-NEXT:    por %xmm1, %xmm0
36254; SSSE3-NEXT:    retq
36255;
36256; SSE41-LABEL: ugt_53_v2i64:
36257; SSE41:       # %bb.0:
36258; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36259; SSE41-NEXT:    movdqa %xmm0, %xmm2
36260; SSE41-NEXT:    pand %xmm1, %xmm2
36261; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36262; SSE41-NEXT:    movdqa %xmm3, %xmm4
36263; SSE41-NEXT:    pshufb %xmm2, %xmm4
36264; SSE41-NEXT:    psrlw $4, %xmm0
36265; SSE41-NEXT:    pand %xmm1, %xmm0
36266; SSE41-NEXT:    pshufb %xmm0, %xmm3
36267; SSE41-NEXT:    paddb %xmm4, %xmm3
36268; SSE41-NEXT:    pxor %xmm0, %xmm0
36269; SSE41-NEXT:    psadbw %xmm3, %xmm0
36270; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
36271; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483701,2147483701]
36272; SSE41-NEXT:    movdqa %xmm0, %xmm2
36273; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
36274; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36275; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
36276; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36277; SSE41-NEXT:    pand %xmm3, %xmm1
36278; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36279; SSE41-NEXT:    por %xmm1, %xmm0
36280; SSE41-NEXT:    retq
36281;
36282; AVX1-LABEL: ugt_53_v2i64:
36283; AVX1:       # %bb.0:
36284; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36285; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
36286; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36287; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36288; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
36289; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
36290; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36291; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36292; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36293; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36294; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36295; AVX1-NEXT:    retq
36296;
36297; AVX2-LABEL: ugt_53_v2i64:
36298; AVX2:       # %bb.0:
36299; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36300; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
36301; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36302; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36303; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
36304; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
36305; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36306; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36307; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36308; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36309; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36310; AVX2-NEXT:    retq
36311;
36312; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64:
36313; AVX512VPOPCNTDQ:       # %bb.0:
36314; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36315; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
36316; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36317; AVX512VPOPCNTDQ-NEXT:    vzeroupper
36318; AVX512VPOPCNTDQ-NEXT:    retq
36319;
36320; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64:
36321; AVX512VPOPCNTDQVL:       # %bb.0:
36322; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
36323; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
36324; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36325; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36326; AVX512VPOPCNTDQVL-NEXT:    retq
36327;
36328; BITALG_NOVLX-LABEL: ugt_53_v2i64:
36329; BITALG_NOVLX:       # %bb.0:
36330; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36331; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
36332; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36333; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36334; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36335; BITALG_NOVLX-NEXT:    vzeroupper
36336; BITALG_NOVLX-NEXT:    retq
36337;
36338; BITALG-LABEL: ugt_53_v2i64:
36339; BITALG:       # %bb.0:
36340; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
36341; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36342; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36343; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
36344; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36345; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36346; BITALG-NEXT:    retq
36347  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36348  %3 = icmp ugt <2 x i64> %2, <i64 53, i64 53>
36349  %4 = sext <2 x i1> %3 to <2 x i64>
36350  ret <2 x i64> %4
36351}
36352
36353define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
36354; SSE2-LABEL: ult_54_v2i64:
36355; SSE2:       # %bb.0:
36356; SSE2-NEXT:    movdqa %xmm0, %xmm1
36357; SSE2-NEXT:    psrlw $1, %xmm1
36358; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36359; SSE2-NEXT:    psubb %xmm1, %xmm0
36360; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36361; SSE2-NEXT:    movdqa %xmm0, %xmm2
36362; SSE2-NEXT:    pand %xmm1, %xmm2
36363; SSE2-NEXT:    psrlw $2, %xmm0
36364; SSE2-NEXT:    pand %xmm1, %xmm0
36365; SSE2-NEXT:    paddb %xmm2, %xmm0
36366; SSE2-NEXT:    movdqa %xmm0, %xmm1
36367; SSE2-NEXT:    psrlw $4, %xmm1
36368; SSE2-NEXT:    paddb %xmm0, %xmm1
36369; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36370; SSE2-NEXT:    pxor %xmm0, %xmm0
36371; SSE2-NEXT:    psadbw %xmm1, %xmm0
36372; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36373; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36374; SSE2-NEXT:    movdqa %xmm1, %xmm2
36375; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
36376; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36377; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
36378; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36379; SSE2-NEXT:    pand %xmm3, %xmm1
36380; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36381; SSE2-NEXT:    por %xmm1, %xmm0
36382; SSE2-NEXT:    retq
36383;
36384; SSE3-LABEL: ult_54_v2i64:
36385; SSE3:       # %bb.0:
36386; SSE3-NEXT:    movdqa %xmm0, %xmm1
36387; SSE3-NEXT:    psrlw $1, %xmm1
36388; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36389; SSE3-NEXT:    psubb %xmm1, %xmm0
36390; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36391; SSE3-NEXT:    movdqa %xmm0, %xmm2
36392; SSE3-NEXT:    pand %xmm1, %xmm2
36393; SSE3-NEXT:    psrlw $2, %xmm0
36394; SSE3-NEXT:    pand %xmm1, %xmm0
36395; SSE3-NEXT:    paddb %xmm2, %xmm0
36396; SSE3-NEXT:    movdqa %xmm0, %xmm1
36397; SSE3-NEXT:    psrlw $4, %xmm1
36398; SSE3-NEXT:    paddb %xmm0, %xmm1
36399; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36400; SSE3-NEXT:    pxor %xmm0, %xmm0
36401; SSE3-NEXT:    psadbw %xmm1, %xmm0
36402; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
36403; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36404; SSE3-NEXT:    movdqa %xmm1, %xmm2
36405; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
36406; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36407; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36408; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36409; SSE3-NEXT:    pand %xmm3, %xmm1
36410; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36411; SSE3-NEXT:    por %xmm1, %xmm0
36412; SSE3-NEXT:    retq
36413;
36414; SSSE3-LABEL: ult_54_v2i64:
36415; SSSE3:       # %bb.0:
36416; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36417; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36418; SSSE3-NEXT:    pand %xmm1, %xmm2
36419; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36420; SSSE3-NEXT:    movdqa %xmm3, %xmm4
36421; SSSE3-NEXT:    pshufb %xmm2, %xmm4
36422; SSSE3-NEXT:    psrlw $4, %xmm0
36423; SSSE3-NEXT:    pand %xmm1, %xmm0
36424; SSSE3-NEXT:    pshufb %xmm0, %xmm3
36425; SSSE3-NEXT:    paddb %xmm4, %xmm3
36426; SSSE3-NEXT:    pxor %xmm0, %xmm0
36427; SSSE3-NEXT:    psadbw %xmm3, %xmm0
36428; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
36429; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36430; SSSE3-NEXT:    movdqa %xmm1, %xmm2
36431; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
36432; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36433; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36434; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36435; SSSE3-NEXT:    pand %xmm3, %xmm1
36436; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36437; SSSE3-NEXT:    por %xmm1, %xmm0
36438; SSSE3-NEXT:    retq
36439;
36440; SSE41-LABEL: ult_54_v2i64:
36441; SSE41:       # %bb.0:
36442; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36443; SSE41-NEXT:    movdqa %xmm0, %xmm2
36444; SSE41-NEXT:    pand %xmm1, %xmm2
36445; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36446; SSE41-NEXT:    movdqa %xmm3, %xmm4
36447; SSE41-NEXT:    pshufb %xmm2, %xmm4
36448; SSE41-NEXT:    psrlw $4, %xmm0
36449; SSE41-NEXT:    pand %xmm1, %xmm0
36450; SSE41-NEXT:    pshufb %xmm0, %xmm3
36451; SSE41-NEXT:    paddb %xmm4, %xmm3
36452; SSE41-NEXT:    pxor %xmm0, %xmm0
36453; SSE41-NEXT:    psadbw %xmm3, %xmm0
36454; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
36455; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36456; SSE41-NEXT:    movdqa %xmm1, %xmm2
36457; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
36458; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36459; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
36460; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36461; SSE41-NEXT:    pand %xmm3, %xmm1
36462; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36463; SSE41-NEXT:    por %xmm1, %xmm0
36464; SSE41-NEXT:    retq
36465;
36466; AVX1-LABEL: ult_54_v2i64:
36467; AVX1:       # %bb.0:
36468; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36469; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
36470; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36471; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36472; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
36473; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
36474; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36475; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36476; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36477; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36478; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [54,54]
36479; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36480; AVX1-NEXT:    retq
36481;
36482; AVX2-LABEL: ult_54_v2i64:
36483; AVX2:       # %bb.0:
36484; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36485; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
36486; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36487; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36488; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
36489; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
36490; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36491; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36492; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36493; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36494; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [54,54]
36495; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36496; AVX2-NEXT:    retq
36497;
36498; AVX512VPOPCNTDQ-LABEL: ult_54_v2i64:
36499; AVX512VPOPCNTDQ:       # %bb.0:
36500; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36501; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
36502; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [54,54]
36503; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36504; AVX512VPOPCNTDQ-NEXT:    vzeroupper
36505; AVX512VPOPCNTDQ-NEXT:    retq
36506;
36507; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64:
36508; AVX512VPOPCNTDQVL:       # %bb.0:
36509; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
36510; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
36511; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36512; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36513; AVX512VPOPCNTDQVL-NEXT:    retq
36514;
36515; BITALG_NOVLX-LABEL: ult_54_v2i64:
36516; BITALG_NOVLX:       # %bb.0:
36517; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36518; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
36519; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36520; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36521; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [54,54]
36522; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36523; BITALG_NOVLX-NEXT:    vzeroupper
36524; BITALG_NOVLX-NEXT:    retq
36525;
36526; BITALG-LABEL: ult_54_v2i64:
36527; BITALG:       # %bb.0:
36528; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
36529; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36530; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36531; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
36532; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36533; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36534; BITALG-NEXT:    retq
36535  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36536  %3 = icmp ult <2 x i64> %2, <i64 54, i64 54>
36537  %4 = sext <2 x i1> %3 to <2 x i64>
36538  ret <2 x i64> %4
36539}
36540
36541define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
36542; SSE2-LABEL: ugt_54_v2i64:
36543; SSE2:       # %bb.0:
36544; SSE2-NEXT:    movdqa %xmm0, %xmm1
36545; SSE2-NEXT:    psrlw $1, %xmm1
36546; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36547; SSE2-NEXT:    psubb %xmm1, %xmm0
36548; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36549; SSE2-NEXT:    movdqa %xmm0, %xmm2
36550; SSE2-NEXT:    pand %xmm1, %xmm2
36551; SSE2-NEXT:    psrlw $2, %xmm0
36552; SSE2-NEXT:    pand %xmm1, %xmm0
36553; SSE2-NEXT:    paddb %xmm2, %xmm0
36554; SSE2-NEXT:    movdqa %xmm0, %xmm1
36555; SSE2-NEXT:    psrlw $4, %xmm1
36556; SSE2-NEXT:    paddb %xmm0, %xmm1
36557; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36558; SSE2-NEXT:    pxor %xmm0, %xmm0
36559; SSE2-NEXT:    psadbw %xmm1, %xmm0
36560; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36561; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36562; SSE2-NEXT:    movdqa %xmm0, %xmm2
36563; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
36564; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36565; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
36566; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36567; SSE2-NEXT:    pand %xmm3, %xmm1
36568; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36569; SSE2-NEXT:    por %xmm1, %xmm0
36570; SSE2-NEXT:    retq
36571;
36572; SSE3-LABEL: ugt_54_v2i64:
36573; SSE3:       # %bb.0:
36574; SSE3-NEXT:    movdqa %xmm0, %xmm1
36575; SSE3-NEXT:    psrlw $1, %xmm1
36576; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36577; SSE3-NEXT:    psubb %xmm1, %xmm0
36578; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36579; SSE3-NEXT:    movdqa %xmm0, %xmm2
36580; SSE3-NEXT:    pand %xmm1, %xmm2
36581; SSE3-NEXT:    psrlw $2, %xmm0
36582; SSE3-NEXT:    pand %xmm1, %xmm0
36583; SSE3-NEXT:    paddb %xmm2, %xmm0
36584; SSE3-NEXT:    movdqa %xmm0, %xmm1
36585; SSE3-NEXT:    psrlw $4, %xmm1
36586; SSE3-NEXT:    paddb %xmm0, %xmm1
36587; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36588; SSE3-NEXT:    pxor %xmm0, %xmm0
36589; SSE3-NEXT:    psadbw %xmm1, %xmm0
36590; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
36591; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36592; SSE3-NEXT:    movdqa %xmm0, %xmm2
36593; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
36594; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36595; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36596; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36597; SSE3-NEXT:    pand %xmm3, %xmm1
36598; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36599; SSE3-NEXT:    por %xmm1, %xmm0
36600; SSE3-NEXT:    retq
36601;
36602; SSSE3-LABEL: ugt_54_v2i64:
36603; SSSE3:       # %bb.0:
36604; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36605; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36606; SSSE3-NEXT:    pand %xmm1, %xmm2
36607; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36608; SSSE3-NEXT:    movdqa %xmm3, %xmm4
36609; SSSE3-NEXT:    pshufb %xmm2, %xmm4
36610; SSSE3-NEXT:    psrlw $4, %xmm0
36611; SSSE3-NEXT:    pand %xmm1, %xmm0
36612; SSSE3-NEXT:    pshufb %xmm0, %xmm3
36613; SSSE3-NEXT:    paddb %xmm4, %xmm3
36614; SSSE3-NEXT:    pxor %xmm0, %xmm0
36615; SSSE3-NEXT:    psadbw %xmm3, %xmm0
36616; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
36617; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36618; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36619; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
36620; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36621; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36622; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36623; SSSE3-NEXT:    pand %xmm3, %xmm1
36624; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36625; SSSE3-NEXT:    por %xmm1, %xmm0
36626; SSSE3-NEXT:    retq
36627;
36628; SSE41-LABEL: ugt_54_v2i64:
36629; SSE41:       # %bb.0:
36630; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36631; SSE41-NEXT:    movdqa %xmm0, %xmm2
36632; SSE41-NEXT:    pand %xmm1, %xmm2
36633; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36634; SSE41-NEXT:    movdqa %xmm3, %xmm4
36635; SSE41-NEXT:    pshufb %xmm2, %xmm4
36636; SSE41-NEXT:    psrlw $4, %xmm0
36637; SSE41-NEXT:    pand %xmm1, %xmm0
36638; SSE41-NEXT:    pshufb %xmm0, %xmm3
36639; SSE41-NEXT:    paddb %xmm4, %xmm3
36640; SSE41-NEXT:    pxor %xmm0, %xmm0
36641; SSE41-NEXT:    psadbw %xmm3, %xmm0
36642; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
36643; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483702,2147483702]
36644; SSE41-NEXT:    movdqa %xmm0, %xmm2
36645; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
36646; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36647; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
36648; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36649; SSE41-NEXT:    pand %xmm3, %xmm1
36650; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36651; SSE41-NEXT:    por %xmm1, %xmm0
36652; SSE41-NEXT:    retq
36653;
36654; AVX1-LABEL: ugt_54_v2i64:
36655; AVX1:       # %bb.0:
36656; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36657; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
36658; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36659; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36660; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
36661; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
36662; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36663; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36664; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36665; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36666; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36667; AVX1-NEXT:    retq
36668;
36669; AVX2-LABEL: ugt_54_v2i64:
36670; AVX2:       # %bb.0:
36671; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36672; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
36673; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36674; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36675; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
36676; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
36677; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36678; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36679; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36680; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36681; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36682; AVX2-NEXT:    retq
36683;
36684; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64:
36685; AVX512VPOPCNTDQ:       # %bb.0:
36686; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36687; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
36688; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36689; AVX512VPOPCNTDQ-NEXT:    vzeroupper
36690; AVX512VPOPCNTDQ-NEXT:    retq
36691;
36692; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64:
36693; AVX512VPOPCNTDQVL:       # %bb.0:
36694; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
36695; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
36696; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36697; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36698; AVX512VPOPCNTDQVL-NEXT:    retq
36699;
36700; BITALG_NOVLX-LABEL: ugt_54_v2i64:
36701; BITALG_NOVLX:       # %bb.0:
36702; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36703; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
36704; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36705; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36706; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
36707; BITALG_NOVLX-NEXT:    vzeroupper
36708; BITALG_NOVLX-NEXT:    retq
36709;
36710; BITALG-LABEL: ugt_54_v2i64:
36711; BITALG:       # %bb.0:
36712; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
36713; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36714; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36715; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
36716; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36717; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36718; BITALG-NEXT:    retq
36719  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36720  %3 = icmp ugt <2 x i64> %2, <i64 54, i64 54>
36721  %4 = sext <2 x i1> %3 to <2 x i64>
36722  ret <2 x i64> %4
36723}
36724
36725define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
36726; SSE2-LABEL: ult_55_v2i64:
36727; SSE2:       # %bb.0:
36728; SSE2-NEXT:    movdqa %xmm0, %xmm1
36729; SSE2-NEXT:    psrlw $1, %xmm1
36730; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36731; SSE2-NEXT:    psubb %xmm1, %xmm0
36732; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36733; SSE2-NEXT:    movdqa %xmm0, %xmm2
36734; SSE2-NEXT:    pand %xmm1, %xmm2
36735; SSE2-NEXT:    psrlw $2, %xmm0
36736; SSE2-NEXT:    pand %xmm1, %xmm0
36737; SSE2-NEXT:    paddb %xmm2, %xmm0
36738; SSE2-NEXT:    movdqa %xmm0, %xmm1
36739; SSE2-NEXT:    psrlw $4, %xmm1
36740; SSE2-NEXT:    paddb %xmm0, %xmm1
36741; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36742; SSE2-NEXT:    pxor %xmm0, %xmm0
36743; SSE2-NEXT:    psadbw %xmm1, %xmm0
36744; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36745; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36746; SSE2-NEXT:    movdqa %xmm1, %xmm2
36747; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
36748; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36749; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
36750; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36751; SSE2-NEXT:    pand %xmm3, %xmm1
36752; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36753; SSE2-NEXT:    por %xmm1, %xmm0
36754; SSE2-NEXT:    retq
36755;
36756; SSE3-LABEL: ult_55_v2i64:
36757; SSE3:       # %bb.0:
36758; SSE3-NEXT:    movdqa %xmm0, %xmm1
36759; SSE3-NEXT:    psrlw $1, %xmm1
36760; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36761; SSE3-NEXT:    psubb %xmm1, %xmm0
36762; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36763; SSE3-NEXT:    movdqa %xmm0, %xmm2
36764; SSE3-NEXT:    pand %xmm1, %xmm2
36765; SSE3-NEXT:    psrlw $2, %xmm0
36766; SSE3-NEXT:    pand %xmm1, %xmm0
36767; SSE3-NEXT:    paddb %xmm2, %xmm0
36768; SSE3-NEXT:    movdqa %xmm0, %xmm1
36769; SSE3-NEXT:    psrlw $4, %xmm1
36770; SSE3-NEXT:    paddb %xmm0, %xmm1
36771; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36772; SSE3-NEXT:    pxor %xmm0, %xmm0
36773; SSE3-NEXT:    psadbw %xmm1, %xmm0
36774; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
36775; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36776; SSE3-NEXT:    movdqa %xmm1, %xmm2
36777; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
36778; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36779; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36780; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36781; SSE3-NEXT:    pand %xmm3, %xmm1
36782; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36783; SSE3-NEXT:    por %xmm1, %xmm0
36784; SSE3-NEXT:    retq
36785;
36786; SSSE3-LABEL: ult_55_v2i64:
36787; SSSE3:       # %bb.0:
36788; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36789; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36790; SSSE3-NEXT:    pand %xmm1, %xmm2
36791; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36792; SSSE3-NEXT:    movdqa %xmm3, %xmm4
36793; SSSE3-NEXT:    pshufb %xmm2, %xmm4
36794; SSSE3-NEXT:    psrlw $4, %xmm0
36795; SSSE3-NEXT:    pand %xmm1, %xmm0
36796; SSSE3-NEXT:    pshufb %xmm0, %xmm3
36797; SSSE3-NEXT:    paddb %xmm4, %xmm3
36798; SSSE3-NEXT:    pxor %xmm0, %xmm0
36799; SSSE3-NEXT:    psadbw %xmm3, %xmm0
36800; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
36801; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36802; SSSE3-NEXT:    movdqa %xmm1, %xmm2
36803; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
36804; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36805; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36806; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36807; SSSE3-NEXT:    pand %xmm3, %xmm1
36808; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36809; SSSE3-NEXT:    por %xmm1, %xmm0
36810; SSSE3-NEXT:    retq
36811;
36812; SSE41-LABEL: ult_55_v2i64:
36813; SSE41:       # %bb.0:
36814; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36815; SSE41-NEXT:    movdqa %xmm0, %xmm2
36816; SSE41-NEXT:    pand %xmm1, %xmm2
36817; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36818; SSE41-NEXT:    movdqa %xmm3, %xmm4
36819; SSE41-NEXT:    pshufb %xmm2, %xmm4
36820; SSE41-NEXT:    psrlw $4, %xmm0
36821; SSE41-NEXT:    pand %xmm1, %xmm0
36822; SSE41-NEXT:    pshufb %xmm0, %xmm3
36823; SSE41-NEXT:    paddb %xmm4, %xmm3
36824; SSE41-NEXT:    pxor %xmm0, %xmm0
36825; SSE41-NEXT:    psadbw %xmm3, %xmm0
36826; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
36827; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36828; SSE41-NEXT:    movdqa %xmm1, %xmm2
36829; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
36830; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36831; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
36832; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36833; SSE41-NEXT:    pand %xmm3, %xmm1
36834; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36835; SSE41-NEXT:    por %xmm1, %xmm0
36836; SSE41-NEXT:    retq
36837;
36838; AVX1-LABEL: ult_55_v2i64:
36839; AVX1:       # %bb.0:
36840; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36841; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
36842; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36843; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36844; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
36845; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
36846; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36847; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36848; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36849; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36850; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [55,55]
36851; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36852; AVX1-NEXT:    retq
36853;
36854; AVX2-LABEL: ult_55_v2i64:
36855; AVX2:       # %bb.0:
36856; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36857; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
36858; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36859; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
36860; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
36861; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
36862; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
36863; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
36864; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36865; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36866; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [55,55]
36867; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36868; AVX2-NEXT:    retq
36869;
36870; AVX512VPOPCNTDQ-LABEL: ult_55_v2i64:
36871; AVX512VPOPCNTDQ:       # %bb.0:
36872; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36873; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
36874; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [55,55]
36875; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36876; AVX512VPOPCNTDQ-NEXT:    vzeroupper
36877; AVX512VPOPCNTDQ-NEXT:    retq
36878;
36879; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64:
36880; AVX512VPOPCNTDQVL:       # %bb.0:
36881; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
36882; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
36883; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36884; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36885; AVX512VPOPCNTDQVL-NEXT:    retq
36886;
36887; BITALG_NOVLX-LABEL: ult_55_v2i64:
36888; BITALG_NOVLX:       # %bb.0:
36889; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
36890; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
36891; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36892; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36893; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [55,55]
36894; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
36895; BITALG_NOVLX-NEXT:    vzeroupper
36896; BITALG_NOVLX-NEXT:    retq
36897;
36898; BITALG-LABEL: ult_55_v2i64:
36899; BITALG:       # %bb.0:
36900; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
36901; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
36902; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
36903; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
36904; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
36905; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36906; BITALG-NEXT:    retq
36907  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36908  %3 = icmp ult <2 x i64> %2, <i64 55, i64 55>
36909  %4 = sext <2 x i1> %3 to <2 x i64>
36910  ret <2 x i64> %4
36911}
36912
36913define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
36914; SSE2-LABEL: ugt_55_v2i64:
36915; SSE2:       # %bb.0:
36916; SSE2-NEXT:    movdqa %xmm0, %xmm1
36917; SSE2-NEXT:    psrlw $1, %xmm1
36918; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36919; SSE2-NEXT:    psubb %xmm1, %xmm0
36920; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36921; SSE2-NEXT:    movdqa %xmm0, %xmm2
36922; SSE2-NEXT:    pand %xmm1, %xmm2
36923; SSE2-NEXT:    psrlw $2, %xmm0
36924; SSE2-NEXT:    pand %xmm1, %xmm0
36925; SSE2-NEXT:    paddb %xmm2, %xmm0
36926; SSE2-NEXT:    movdqa %xmm0, %xmm1
36927; SSE2-NEXT:    psrlw $4, %xmm1
36928; SSE2-NEXT:    paddb %xmm0, %xmm1
36929; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
36930; SSE2-NEXT:    pxor %xmm0, %xmm0
36931; SSE2-NEXT:    psadbw %xmm1, %xmm0
36932; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36933; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36934; SSE2-NEXT:    movdqa %xmm0, %xmm2
36935; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
36936; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36937; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
36938; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36939; SSE2-NEXT:    pand %xmm3, %xmm1
36940; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36941; SSE2-NEXT:    por %xmm1, %xmm0
36942; SSE2-NEXT:    retq
36943;
36944; SSE3-LABEL: ugt_55_v2i64:
36945; SSE3:       # %bb.0:
36946; SSE3-NEXT:    movdqa %xmm0, %xmm1
36947; SSE3-NEXT:    psrlw $1, %xmm1
36948; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36949; SSE3-NEXT:    psubb %xmm1, %xmm0
36950; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36951; SSE3-NEXT:    movdqa %xmm0, %xmm2
36952; SSE3-NEXT:    pand %xmm1, %xmm2
36953; SSE3-NEXT:    psrlw $2, %xmm0
36954; SSE3-NEXT:    pand %xmm1, %xmm0
36955; SSE3-NEXT:    paddb %xmm2, %xmm0
36956; SSE3-NEXT:    movdqa %xmm0, %xmm1
36957; SSE3-NEXT:    psrlw $4, %xmm1
36958; SSE3-NEXT:    paddb %xmm0, %xmm1
36959; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
36960; SSE3-NEXT:    pxor %xmm0, %xmm0
36961; SSE3-NEXT:    psadbw %xmm1, %xmm0
36962; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
36963; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36964; SSE3-NEXT:    movdqa %xmm0, %xmm2
36965; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
36966; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36967; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36968; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36969; SSE3-NEXT:    pand %xmm3, %xmm1
36970; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36971; SSE3-NEXT:    por %xmm1, %xmm0
36972; SSE3-NEXT:    retq
36973;
36974; SSSE3-LABEL: ugt_55_v2i64:
36975; SSSE3:       # %bb.0:
36976; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36977; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36978; SSSE3-NEXT:    pand %xmm1, %xmm2
36979; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36980; SSSE3-NEXT:    movdqa %xmm3, %xmm4
36981; SSSE3-NEXT:    pshufb %xmm2, %xmm4
36982; SSSE3-NEXT:    psrlw $4, %xmm0
36983; SSSE3-NEXT:    pand %xmm1, %xmm0
36984; SSSE3-NEXT:    pshufb %xmm0, %xmm3
36985; SSSE3-NEXT:    paddb %xmm4, %xmm3
36986; SSSE3-NEXT:    pxor %xmm0, %xmm0
36987; SSSE3-NEXT:    psadbw %xmm3, %xmm0
36988; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
36989; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
36990; SSSE3-NEXT:    movdqa %xmm0, %xmm2
36991; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
36992; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
36993; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
36994; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36995; SSSE3-NEXT:    pand %xmm3, %xmm1
36996; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
36997; SSSE3-NEXT:    por %xmm1, %xmm0
36998; SSSE3-NEXT:    retq
36999;
37000; SSE41-LABEL: ugt_55_v2i64:
37001; SSE41:       # %bb.0:
37002; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37003; SSE41-NEXT:    movdqa %xmm0, %xmm2
37004; SSE41-NEXT:    pand %xmm1, %xmm2
37005; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37006; SSE41-NEXT:    movdqa %xmm3, %xmm4
37007; SSE41-NEXT:    pshufb %xmm2, %xmm4
37008; SSE41-NEXT:    psrlw $4, %xmm0
37009; SSE41-NEXT:    pand %xmm1, %xmm0
37010; SSE41-NEXT:    pshufb %xmm0, %xmm3
37011; SSE41-NEXT:    paddb %xmm4, %xmm3
37012; SSE41-NEXT:    pxor %xmm0, %xmm0
37013; SSE41-NEXT:    psadbw %xmm3, %xmm0
37014; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
37015; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483703,2147483703]
37016; SSE41-NEXT:    movdqa %xmm0, %xmm2
37017; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
37018; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37019; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
37020; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37021; SSE41-NEXT:    pand %xmm3, %xmm1
37022; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37023; SSE41-NEXT:    por %xmm1, %xmm0
37024; SSE41-NEXT:    retq
37025;
37026; AVX1-LABEL: ugt_55_v2i64:
37027; AVX1:       # %bb.0:
37028; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37029; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
37030; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37031; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37032; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
37033; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
37034; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37035; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37036; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37037; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37038; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37039; AVX1-NEXT:    retq
37040;
37041; AVX2-LABEL: ugt_55_v2i64:
37042; AVX2:       # %bb.0:
37043; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37044; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
37045; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37046; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37047; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
37048; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37049; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37050; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37051; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37052; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37053; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37054; AVX2-NEXT:    retq
37055;
37056; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64:
37057; AVX512VPOPCNTDQ:       # %bb.0:
37058; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37059; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
37060; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37061; AVX512VPOPCNTDQ-NEXT:    vzeroupper
37062; AVX512VPOPCNTDQ-NEXT:    retq
37063;
37064; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64:
37065; AVX512VPOPCNTDQVL:       # %bb.0:
37066; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
37067; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
37068; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37069; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37070; AVX512VPOPCNTDQVL-NEXT:    retq
37071;
37072; BITALG_NOVLX-LABEL: ugt_55_v2i64:
37073; BITALG_NOVLX:       # %bb.0:
37074; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37075; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
37076; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37077; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37078; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37079; BITALG_NOVLX-NEXT:    vzeroupper
37080; BITALG_NOVLX-NEXT:    retq
37081;
37082; BITALG-LABEL: ugt_55_v2i64:
37083; BITALG:       # %bb.0:
37084; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
37085; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37086; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37087; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
37088; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37089; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37090; BITALG-NEXT:    retq
37091  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37092  %3 = icmp ugt <2 x i64> %2, <i64 55, i64 55>
37093  %4 = sext <2 x i1> %3 to <2 x i64>
37094  ret <2 x i64> %4
37095}
37096
37097define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
37098; SSE2-LABEL: ult_56_v2i64:
37099; SSE2:       # %bb.0:
37100; SSE2-NEXT:    movdqa %xmm0, %xmm1
37101; SSE2-NEXT:    psrlw $1, %xmm1
37102; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37103; SSE2-NEXT:    psubb %xmm1, %xmm0
37104; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37105; SSE2-NEXT:    movdqa %xmm0, %xmm2
37106; SSE2-NEXT:    pand %xmm1, %xmm2
37107; SSE2-NEXT:    psrlw $2, %xmm0
37108; SSE2-NEXT:    pand %xmm1, %xmm0
37109; SSE2-NEXT:    paddb %xmm2, %xmm0
37110; SSE2-NEXT:    movdqa %xmm0, %xmm1
37111; SSE2-NEXT:    psrlw $4, %xmm1
37112; SSE2-NEXT:    paddb %xmm0, %xmm1
37113; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37114; SSE2-NEXT:    pxor %xmm0, %xmm0
37115; SSE2-NEXT:    psadbw %xmm1, %xmm0
37116; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
37117; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37118; SSE2-NEXT:    movdqa %xmm1, %xmm2
37119; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
37120; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37121; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
37122; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37123; SSE2-NEXT:    pand %xmm3, %xmm1
37124; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37125; SSE2-NEXT:    por %xmm1, %xmm0
37126; SSE2-NEXT:    retq
37127;
37128; SSE3-LABEL: ult_56_v2i64:
37129; SSE3:       # %bb.0:
37130; SSE3-NEXT:    movdqa %xmm0, %xmm1
37131; SSE3-NEXT:    psrlw $1, %xmm1
37132; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37133; SSE3-NEXT:    psubb %xmm1, %xmm0
37134; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37135; SSE3-NEXT:    movdqa %xmm0, %xmm2
37136; SSE3-NEXT:    pand %xmm1, %xmm2
37137; SSE3-NEXT:    psrlw $2, %xmm0
37138; SSE3-NEXT:    pand %xmm1, %xmm0
37139; SSE3-NEXT:    paddb %xmm2, %xmm0
37140; SSE3-NEXT:    movdqa %xmm0, %xmm1
37141; SSE3-NEXT:    psrlw $4, %xmm1
37142; SSE3-NEXT:    paddb %xmm0, %xmm1
37143; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37144; SSE3-NEXT:    pxor %xmm0, %xmm0
37145; SSE3-NEXT:    psadbw %xmm1, %xmm0
37146; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
37147; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37148; SSE3-NEXT:    movdqa %xmm1, %xmm2
37149; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
37150; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37151; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37152; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37153; SSE3-NEXT:    pand %xmm3, %xmm1
37154; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37155; SSE3-NEXT:    por %xmm1, %xmm0
37156; SSE3-NEXT:    retq
37157;
37158; SSSE3-LABEL: ult_56_v2i64:
37159; SSSE3:       # %bb.0:
37160; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37161; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37162; SSSE3-NEXT:    pand %xmm1, %xmm2
37163; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37164; SSSE3-NEXT:    movdqa %xmm3, %xmm4
37165; SSSE3-NEXT:    pshufb %xmm2, %xmm4
37166; SSSE3-NEXT:    psrlw $4, %xmm0
37167; SSSE3-NEXT:    pand %xmm1, %xmm0
37168; SSSE3-NEXT:    pshufb %xmm0, %xmm3
37169; SSSE3-NEXT:    paddb %xmm4, %xmm3
37170; SSSE3-NEXT:    pxor %xmm0, %xmm0
37171; SSSE3-NEXT:    psadbw %xmm3, %xmm0
37172; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
37173; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37174; SSSE3-NEXT:    movdqa %xmm1, %xmm2
37175; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
37176; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37177; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37178; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37179; SSSE3-NEXT:    pand %xmm3, %xmm1
37180; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37181; SSSE3-NEXT:    por %xmm1, %xmm0
37182; SSSE3-NEXT:    retq
37183;
37184; SSE41-LABEL: ult_56_v2i64:
37185; SSE41:       # %bb.0:
37186; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37187; SSE41-NEXT:    movdqa %xmm0, %xmm2
37188; SSE41-NEXT:    pand %xmm1, %xmm2
37189; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37190; SSE41-NEXT:    movdqa %xmm3, %xmm4
37191; SSE41-NEXT:    pshufb %xmm2, %xmm4
37192; SSE41-NEXT:    psrlw $4, %xmm0
37193; SSE41-NEXT:    pand %xmm1, %xmm0
37194; SSE41-NEXT:    pshufb %xmm0, %xmm3
37195; SSE41-NEXT:    paddb %xmm4, %xmm3
37196; SSE41-NEXT:    pxor %xmm0, %xmm0
37197; SSE41-NEXT:    psadbw %xmm3, %xmm0
37198; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
37199; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37200; SSE41-NEXT:    movdqa %xmm1, %xmm2
37201; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
37202; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37203; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
37204; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37205; SSE41-NEXT:    pand %xmm3, %xmm1
37206; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37207; SSE41-NEXT:    por %xmm1, %xmm0
37208; SSE41-NEXT:    retq
37209;
37210; AVX1-LABEL: ult_56_v2i64:
37211; AVX1:       # %bb.0:
37212; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37213; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
37214; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37215; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37216; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
37217; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
37218; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37219; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37220; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37221; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37222; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [56,56]
37223; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37224; AVX1-NEXT:    retq
37225;
37226; AVX2-LABEL: ult_56_v2i64:
37227; AVX2:       # %bb.0:
37228; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37229; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
37230; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37231; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37232; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
37233; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37234; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37235; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37236; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37237; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37238; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [56,56]
37239; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37240; AVX2-NEXT:    retq
37241;
37242; AVX512VPOPCNTDQ-LABEL: ult_56_v2i64:
37243; AVX512VPOPCNTDQ:       # %bb.0:
37244; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37245; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
37246; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [56,56]
37247; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37248; AVX512VPOPCNTDQ-NEXT:    vzeroupper
37249; AVX512VPOPCNTDQ-NEXT:    retq
37250;
37251; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64:
37252; AVX512VPOPCNTDQVL:       # %bb.0:
37253; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
37254; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
37255; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37256; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37257; AVX512VPOPCNTDQVL-NEXT:    retq
37258;
37259; BITALG_NOVLX-LABEL: ult_56_v2i64:
37260; BITALG_NOVLX:       # %bb.0:
37261; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37262; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
37263; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37264; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37265; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [56,56]
37266; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37267; BITALG_NOVLX-NEXT:    vzeroupper
37268; BITALG_NOVLX-NEXT:    retq
37269;
37270; BITALG-LABEL: ult_56_v2i64:
37271; BITALG:       # %bb.0:
37272; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
37273; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37274; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37275; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
37276; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37277; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37278; BITALG-NEXT:    retq
37279  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37280  %3 = icmp ult <2 x i64> %2, <i64 56, i64 56>
37281  %4 = sext <2 x i1> %3 to <2 x i64>
37282  ret <2 x i64> %4
37283}
37284
37285define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
37286; SSE2-LABEL: ugt_56_v2i64:
37287; SSE2:       # %bb.0:
37288; SSE2-NEXT:    movdqa %xmm0, %xmm1
37289; SSE2-NEXT:    psrlw $1, %xmm1
37290; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37291; SSE2-NEXT:    psubb %xmm1, %xmm0
37292; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37293; SSE2-NEXT:    movdqa %xmm0, %xmm2
37294; SSE2-NEXT:    pand %xmm1, %xmm2
37295; SSE2-NEXT:    psrlw $2, %xmm0
37296; SSE2-NEXT:    pand %xmm1, %xmm0
37297; SSE2-NEXT:    paddb %xmm2, %xmm0
37298; SSE2-NEXT:    movdqa %xmm0, %xmm1
37299; SSE2-NEXT:    psrlw $4, %xmm1
37300; SSE2-NEXT:    paddb %xmm0, %xmm1
37301; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37302; SSE2-NEXT:    pxor %xmm0, %xmm0
37303; SSE2-NEXT:    psadbw %xmm1, %xmm0
37304; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
37305; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37306; SSE2-NEXT:    movdqa %xmm0, %xmm2
37307; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
37308; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37309; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
37310; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37311; SSE2-NEXT:    pand %xmm3, %xmm1
37312; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37313; SSE2-NEXT:    por %xmm1, %xmm0
37314; SSE2-NEXT:    retq
37315;
37316; SSE3-LABEL: ugt_56_v2i64:
37317; SSE3:       # %bb.0:
37318; SSE3-NEXT:    movdqa %xmm0, %xmm1
37319; SSE3-NEXT:    psrlw $1, %xmm1
37320; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37321; SSE3-NEXT:    psubb %xmm1, %xmm0
37322; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37323; SSE3-NEXT:    movdqa %xmm0, %xmm2
37324; SSE3-NEXT:    pand %xmm1, %xmm2
37325; SSE3-NEXT:    psrlw $2, %xmm0
37326; SSE3-NEXT:    pand %xmm1, %xmm0
37327; SSE3-NEXT:    paddb %xmm2, %xmm0
37328; SSE3-NEXT:    movdqa %xmm0, %xmm1
37329; SSE3-NEXT:    psrlw $4, %xmm1
37330; SSE3-NEXT:    paddb %xmm0, %xmm1
37331; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37332; SSE3-NEXT:    pxor %xmm0, %xmm0
37333; SSE3-NEXT:    psadbw %xmm1, %xmm0
37334; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
37335; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37336; SSE3-NEXT:    movdqa %xmm0, %xmm2
37337; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
37338; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37339; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37340; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37341; SSE3-NEXT:    pand %xmm3, %xmm1
37342; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37343; SSE3-NEXT:    por %xmm1, %xmm0
37344; SSE3-NEXT:    retq
37345;
37346; SSSE3-LABEL: ugt_56_v2i64:
37347; SSSE3:       # %bb.0:
37348; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37349; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37350; SSSE3-NEXT:    pand %xmm1, %xmm2
37351; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37352; SSSE3-NEXT:    movdqa %xmm3, %xmm4
37353; SSSE3-NEXT:    pshufb %xmm2, %xmm4
37354; SSSE3-NEXT:    psrlw $4, %xmm0
37355; SSSE3-NEXT:    pand %xmm1, %xmm0
37356; SSSE3-NEXT:    pshufb %xmm0, %xmm3
37357; SSSE3-NEXT:    paddb %xmm4, %xmm3
37358; SSSE3-NEXT:    pxor %xmm0, %xmm0
37359; SSSE3-NEXT:    psadbw %xmm3, %xmm0
37360; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
37361; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37362; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37363; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
37364; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37365; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37366; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37367; SSSE3-NEXT:    pand %xmm3, %xmm1
37368; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37369; SSSE3-NEXT:    por %xmm1, %xmm0
37370; SSSE3-NEXT:    retq
37371;
37372; SSE41-LABEL: ugt_56_v2i64:
37373; SSE41:       # %bb.0:
37374; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37375; SSE41-NEXT:    movdqa %xmm0, %xmm2
37376; SSE41-NEXT:    pand %xmm1, %xmm2
37377; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37378; SSE41-NEXT:    movdqa %xmm3, %xmm4
37379; SSE41-NEXT:    pshufb %xmm2, %xmm4
37380; SSE41-NEXT:    psrlw $4, %xmm0
37381; SSE41-NEXT:    pand %xmm1, %xmm0
37382; SSE41-NEXT:    pshufb %xmm0, %xmm3
37383; SSE41-NEXT:    paddb %xmm4, %xmm3
37384; SSE41-NEXT:    pxor %xmm0, %xmm0
37385; SSE41-NEXT:    psadbw %xmm3, %xmm0
37386; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
37387; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483704,2147483704]
37388; SSE41-NEXT:    movdqa %xmm0, %xmm2
37389; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
37390; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37391; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
37392; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37393; SSE41-NEXT:    pand %xmm3, %xmm1
37394; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37395; SSE41-NEXT:    por %xmm1, %xmm0
37396; SSE41-NEXT:    retq
37397;
37398; AVX1-LABEL: ugt_56_v2i64:
37399; AVX1:       # %bb.0:
37400; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37401; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
37402; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37403; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37404; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
37405; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
37406; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37407; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37408; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37409; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37410; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37411; AVX1-NEXT:    retq
37412;
37413; AVX2-LABEL: ugt_56_v2i64:
37414; AVX2:       # %bb.0:
37415; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37416; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
37417; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37418; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37419; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
37420; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37421; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37422; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37423; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37424; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37425; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37426; AVX2-NEXT:    retq
37427;
37428; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64:
37429; AVX512VPOPCNTDQ:       # %bb.0:
37430; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37431; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
37432; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37433; AVX512VPOPCNTDQ-NEXT:    vzeroupper
37434; AVX512VPOPCNTDQ-NEXT:    retq
37435;
37436; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64:
37437; AVX512VPOPCNTDQVL:       # %bb.0:
37438; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
37439; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
37440; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37441; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37442; AVX512VPOPCNTDQVL-NEXT:    retq
37443;
37444; BITALG_NOVLX-LABEL: ugt_56_v2i64:
37445; BITALG_NOVLX:       # %bb.0:
37446; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37447; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
37448; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37449; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37450; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37451; BITALG_NOVLX-NEXT:    vzeroupper
37452; BITALG_NOVLX-NEXT:    retq
37453;
37454; BITALG-LABEL: ugt_56_v2i64:
37455; BITALG:       # %bb.0:
37456; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
37457; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37458; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37459; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
37460; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37461; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37462; BITALG-NEXT:    retq
37463  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37464  %3 = icmp ugt <2 x i64> %2, <i64 56, i64 56>
37465  %4 = sext <2 x i1> %3 to <2 x i64>
37466  ret <2 x i64> %4
37467}
37468
37469define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
37470; SSE2-LABEL: ult_57_v2i64:
37471; SSE2:       # %bb.0:
37472; SSE2-NEXT:    movdqa %xmm0, %xmm1
37473; SSE2-NEXT:    psrlw $1, %xmm1
37474; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37475; SSE2-NEXT:    psubb %xmm1, %xmm0
37476; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37477; SSE2-NEXT:    movdqa %xmm0, %xmm2
37478; SSE2-NEXT:    pand %xmm1, %xmm2
37479; SSE2-NEXT:    psrlw $2, %xmm0
37480; SSE2-NEXT:    pand %xmm1, %xmm0
37481; SSE2-NEXT:    paddb %xmm2, %xmm0
37482; SSE2-NEXT:    movdqa %xmm0, %xmm1
37483; SSE2-NEXT:    psrlw $4, %xmm1
37484; SSE2-NEXT:    paddb %xmm0, %xmm1
37485; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37486; SSE2-NEXT:    pxor %xmm0, %xmm0
37487; SSE2-NEXT:    psadbw %xmm1, %xmm0
37488; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
37489; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37490; SSE2-NEXT:    movdqa %xmm1, %xmm2
37491; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
37492; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37493; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
37494; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37495; SSE2-NEXT:    pand %xmm3, %xmm1
37496; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37497; SSE2-NEXT:    por %xmm1, %xmm0
37498; SSE2-NEXT:    retq
37499;
37500; SSE3-LABEL: ult_57_v2i64:
37501; SSE3:       # %bb.0:
37502; SSE3-NEXT:    movdqa %xmm0, %xmm1
37503; SSE3-NEXT:    psrlw $1, %xmm1
37504; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37505; SSE3-NEXT:    psubb %xmm1, %xmm0
37506; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37507; SSE3-NEXT:    movdqa %xmm0, %xmm2
37508; SSE3-NEXT:    pand %xmm1, %xmm2
37509; SSE3-NEXT:    psrlw $2, %xmm0
37510; SSE3-NEXT:    pand %xmm1, %xmm0
37511; SSE3-NEXT:    paddb %xmm2, %xmm0
37512; SSE3-NEXT:    movdqa %xmm0, %xmm1
37513; SSE3-NEXT:    psrlw $4, %xmm1
37514; SSE3-NEXT:    paddb %xmm0, %xmm1
37515; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37516; SSE3-NEXT:    pxor %xmm0, %xmm0
37517; SSE3-NEXT:    psadbw %xmm1, %xmm0
37518; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
37519; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37520; SSE3-NEXT:    movdqa %xmm1, %xmm2
37521; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
37522; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37523; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37524; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37525; SSE3-NEXT:    pand %xmm3, %xmm1
37526; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37527; SSE3-NEXT:    por %xmm1, %xmm0
37528; SSE3-NEXT:    retq
37529;
37530; SSSE3-LABEL: ult_57_v2i64:
37531; SSSE3:       # %bb.0:
37532; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37533; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37534; SSSE3-NEXT:    pand %xmm1, %xmm2
37535; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37536; SSSE3-NEXT:    movdqa %xmm3, %xmm4
37537; SSSE3-NEXT:    pshufb %xmm2, %xmm4
37538; SSSE3-NEXT:    psrlw $4, %xmm0
37539; SSSE3-NEXT:    pand %xmm1, %xmm0
37540; SSSE3-NEXT:    pshufb %xmm0, %xmm3
37541; SSSE3-NEXT:    paddb %xmm4, %xmm3
37542; SSSE3-NEXT:    pxor %xmm0, %xmm0
37543; SSSE3-NEXT:    psadbw %xmm3, %xmm0
37544; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
37545; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37546; SSSE3-NEXT:    movdqa %xmm1, %xmm2
37547; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
37548; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37549; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37550; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37551; SSSE3-NEXT:    pand %xmm3, %xmm1
37552; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37553; SSSE3-NEXT:    por %xmm1, %xmm0
37554; SSSE3-NEXT:    retq
37555;
37556; SSE41-LABEL: ult_57_v2i64:
37557; SSE41:       # %bb.0:
37558; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37559; SSE41-NEXT:    movdqa %xmm0, %xmm2
37560; SSE41-NEXT:    pand %xmm1, %xmm2
37561; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37562; SSE41-NEXT:    movdqa %xmm3, %xmm4
37563; SSE41-NEXT:    pshufb %xmm2, %xmm4
37564; SSE41-NEXT:    psrlw $4, %xmm0
37565; SSE41-NEXT:    pand %xmm1, %xmm0
37566; SSE41-NEXT:    pshufb %xmm0, %xmm3
37567; SSE41-NEXT:    paddb %xmm4, %xmm3
37568; SSE41-NEXT:    pxor %xmm0, %xmm0
37569; SSE41-NEXT:    psadbw %xmm3, %xmm0
37570; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
37571; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37572; SSE41-NEXT:    movdqa %xmm1, %xmm2
37573; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
37574; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37575; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
37576; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37577; SSE41-NEXT:    pand %xmm3, %xmm1
37578; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37579; SSE41-NEXT:    por %xmm1, %xmm0
37580; SSE41-NEXT:    retq
37581;
37582; AVX1-LABEL: ult_57_v2i64:
37583; AVX1:       # %bb.0:
37584; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37585; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
37586; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37587; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37588; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
37589; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
37590; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37591; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37592; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37593; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37594; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [57,57]
37595; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37596; AVX1-NEXT:    retq
37597;
37598; AVX2-LABEL: ult_57_v2i64:
37599; AVX2:       # %bb.0:
37600; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37601; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
37602; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37603; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37604; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
37605; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37606; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37607; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37608; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37609; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37610; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [57,57]
37611; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37612; AVX2-NEXT:    retq
37613;
37614; AVX512VPOPCNTDQ-LABEL: ult_57_v2i64:
37615; AVX512VPOPCNTDQ:       # %bb.0:
37616; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37617; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
37618; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [57,57]
37619; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37620; AVX512VPOPCNTDQ-NEXT:    vzeroupper
37621; AVX512VPOPCNTDQ-NEXT:    retq
37622;
37623; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64:
37624; AVX512VPOPCNTDQVL:       # %bb.0:
37625; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
37626; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
37627; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37628; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37629; AVX512VPOPCNTDQVL-NEXT:    retq
37630;
37631; BITALG_NOVLX-LABEL: ult_57_v2i64:
37632; BITALG_NOVLX:       # %bb.0:
37633; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37634; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
37635; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37636; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37637; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [57,57]
37638; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37639; BITALG_NOVLX-NEXT:    vzeroupper
37640; BITALG_NOVLX-NEXT:    retq
37641;
37642; BITALG-LABEL: ult_57_v2i64:
37643; BITALG:       # %bb.0:
37644; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
37645; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37646; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37647; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
37648; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37649; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37650; BITALG-NEXT:    retq
37651  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37652  %3 = icmp ult <2 x i64> %2, <i64 57, i64 57>
37653  %4 = sext <2 x i1> %3 to <2 x i64>
37654  ret <2 x i64> %4
37655}
37656
37657define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
37658; SSE2-LABEL: ugt_57_v2i64:
37659; SSE2:       # %bb.0:
37660; SSE2-NEXT:    movdqa %xmm0, %xmm1
37661; SSE2-NEXT:    psrlw $1, %xmm1
37662; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37663; SSE2-NEXT:    psubb %xmm1, %xmm0
37664; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37665; SSE2-NEXT:    movdqa %xmm0, %xmm2
37666; SSE2-NEXT:    pand %xmm1, %xmm2
37667; SSE2-NEXT:    psrlw $2, %xmm0
37668; SSE2-NEXT:    pand %xmm1, %xmm0
37669; SSE2-NEXT:    paddb %xmm2, %xmm0
37670; SSE2-NEXT:    movdqa %xmm0, %xmm1
37671; SSE2-NEXT:    psrlw $4, %xmm1
37672; SSE2-NEXT:    paddb %xmm0, %xmm1
37673; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37674; SSE2-NEXT:    pxor %xmm0, %xmm0
37675; SSE2-NEXT:    psadbw %xmm1, %xmm0
37676; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
37677; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37678; SSE2-NEXT:    movdqa %xmm0, %xmm2
37679; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
37680; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37681; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
37682; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37683; SSE2-NEXT:    pand %xmm3, %xmm1
37684; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37685; SSE2-NEXT:    por %xmm1, %xmm0
37686; SSE2-NEXT:    retq
37687;
37688; SSE3-LABEL: ugt_57_v2i64:
37689; SSE3:       # %bb.0:
37690; SSE3-NEXT:    movdqa %xmm0, %xmm1
37691; SSE3-NEXT:    psrlw $1, %xmm1
37692; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37693; SSE3-NEXT:    psubb %xmm1, %xmm0
37694; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37695; SSE3-NEXT:    movdqa %xmm0, %xmm2
37696; SSE3-NEXT:    pand %xmm1, %xmm2
37697; SSE3-NEXT:    psrlw $2, %xmm0
37698; SSE3-NEXT:    pand %xmm1, %xmm0
37699; SSE3-NEXT:    paddb %xmm2, %xmm0
37700; SSE3-NEXT:    movdqa %xmm0, %xmm1
37701; SSE3-NEXT:    psrlw $4, %xmm1
37702; SSE3-NEXT:    paddb %xmm0, %xmm1
37703; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37704; SSE3-NEXT:    pxor %xmm0, %xmm0
37705; SSE3-NEXT:    psadbw %xmm1, %xmm0
37706; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
37707; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37708; SSE3-NEXT:    movdqa %xmm0, %xmm2
37709; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
37710; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37711; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37712; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37713; SSE3-NEXT:    pand %xmm3, %xmm1
37714; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37715; SSE3-NEXT:    por %xmm1, %xmm0
37716; SSE3-NEXT:    retq
37717;
37718; SSSE3-LABEL: ugt_57_v2i64:
37719; SSSE3:       # %bb.0:
37720; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37721; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37722; SSSE3-NEXT:    pand %xmm1, %xmm2
37723; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37724; SSSE3-NEXT:    movdqa %xmm3, %xmm4
37725; SSSE3-NEXT:    pshufb %xmm2, %xmm4
37726; SSSE3-NEXT:    psrlw $4, %xmm0
37727; SSSE3-NEXT:    pand %xmm1, %xmm0
37728; SSSE3-NEXT:    pshufb %xmm0, %xmm3
37729; SSSE3-NEXT:    paddb %xmm4, %xmm3
37730; SSSE3-NEXT:    pxor %xmm0, %xmm0
37731; SSSE3-NEXT:    psadbw %xmm3, %xmm0
37732; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
37733; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37734; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37735; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
37736; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37737; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37738; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37739; SSSE3-NEXT:    pand %xmm3, %xmm1
37740; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37741; SSSE3-NEXT:    por %xmm1, %xmm0
37742; SSSE3-NEXT:    retq
37743;
37744; SSE41-LABEL: ugt_57_v2i64:
37745; SSE41:       # %bb.0:
37746; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37747; SSE41-NEXT:    movdqa %xmm0, %xmm2
37748; SSE41-NEXT:    pand %xmm1, %xmm2
37749; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37750; SSE41-NEXT:    movdqa %xmm3, %xmm4
37751; SSE41-NEXT:    pshufb %xmm2, %xmm4
37752; SSE41-NEXT:    psrlw $4, %xmm0
37753; SSE41-NEXT:    pand %xmm1, %xmm0
37754; SSE41-NEXT:    pshufb %xmm0, %xmm3
37755; SSE41-NEXT:    paddb %xmm4, %xmm3
37756; SSE41-NEXT:    pxor %xmm0, %xmm0
37757; SSE41-NEXT:    psadbw %xmm3, %xmm0
37758; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
37759; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483705,2147483705]
37760; SSE41-NEXT:    movdqa %xmm0, %xmm2
37761; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
37762; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37763; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
37764; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37765; SSE41-NEXT:    pand %xmm3, %xmm1
37766; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37767; SSE41-NEXT:    por %xmm1, %xmm0
37768; SSE41-NEXT:    retq
37769;
37770; AVX1-LABEL: ugt_57_v2i64:
37771; AVX1:       # %bb.0:
37772; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37773; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
37774; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37775; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37776; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
37777; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
37778; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37779; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37780; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37781; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37782; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37783; AVX1-NEXT:    retq
37784;
37785; AVX2-LABEL: ugt_57_v2i64:
37786; AVX2:       # %bb.0:
37787; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37788; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
37789; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37790; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37791; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
37792; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37793; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37794; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37795; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37796; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37797; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37798; AVX2-NEXT:    retq
37799;
37800; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64:
37801; AVX512VPOPCNTDQ:       # %bb.0:
37802; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37803; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
37804; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37805; AVX512VPOPCNTDQ-NEXT:    vzeroupper
37806; AVX512VPOPCNTDQ-NEXT:    retq
37807;
37808; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64:
37809; AVX512VPOPCNTDQVL:       # %bb.0:
37810; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
37811; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
37812; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37813; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37814; AVX512VPOPCNTDQVL-NEXT:    retq
37815;
37816; BITALG_NOVLX-LABEL: ugt_57_v2i64:
37817; BITALG_NOVLX:       # %bb.0:
37818; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37819; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
37820; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37821; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37822; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
37823; BITALG_NOVLX-NEXT:    vzeroupper
37824; BITALG_NOVLX-NEXT:    retq
37825;
37826; BITALG-LABEL: ugt_57_v2i64:
37827; BITALG:       # %bb.0:
37828; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
37829; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37830; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37831; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
37832; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
37833; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37834; BITALG-NEXT:    retq
37835  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37836  %3 = icmp ugt <2 x i64> %2, <i64 57, i64 57>
37837  %4 = sext <2 x i1> %3 to <2 x i64>
37838  ret <2 x i64> %4
37839}
37840
37841define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
37842; SSE2-LABEL: ult_58_v2i64:
37843; SSE2:       # %bb.0:
37844; SSE2-NEXT:    movdqa %xmm0, %xmm1
37845; SSE2-NEXT:    psrlw $1, %xmm1
37846; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37847; SSE2-NEXT:    psubb %xmm1, %xmm0
37848; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37849; SSE2-NEXT:    movdqa %xmm0, %xmm2
37850; SSE2-NEXT:    pand %xmm1, %xmm2
37851; SSE2-NEXT:    psrlw $2, %xmm0
37852; SSE2-NEXT:    pand %xmm1, %xmm0
37853; SSE2-NEXT:    paddb %xmm2, %xmm0
37854; SSE2-NEXT:    movdqa %xmm0, %xmm1
37855; SSE2-NEXT:    psrlw $4, %xmm1
37856; SSE2-NEXT:    paddb %xmm0, %xmm1
37857; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
37858; SSE2-NEXT:    pxor %xmm0, %xmm0
37859; SSE2-NEXT:    psadbw %xmm1, %xmm0
37860; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
37861; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
37862; SSE2-NEXT:    movdqa %xmm1, %xmm2
37863; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
37864; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37865; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
37866; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37867; SSE2-NEXT:    pand %xmm3, %xmm1
37868; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37869; SSE2-NEXT:    por %xmm1, %xmm0
37870; SSE2-NEXT:    retq
37871;
37872; SSE3-LABEL: ult_58_v2i64:
37873; SSE3:       # %bb.0:
37874; SSE3-NEXT:    movdqa %xmm0, %xmm1
37875; SSE3-NEXT:    psrlw $1, %xmm1
37876; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37877; SSE3-NEXT:    psubb %xmm1, %xmm0
37878; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37879; SSE3-NEXT:    movdqa %xmm0, %xmm2
37880; SSE3-NEXT:    pand %xmm1, %xmm2
37881; SSE3-NEXT:    psrlw $2, %xmm0
37882; SSE3-NEXT:    pand %xmm1, %xmm0
37883; SSE3-NEXT:    paddb %xmm2, %xmm0
37884; SSE3-NEXT:    movdqa %xmm0, %xmm1
37885; SSE3-NEXT:    psrlw $4, %xmm1
37886; SSE3-NEXT:    paddb %xmm0, %xmm1
37887; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
37888; SSE3-NEXT:    pxor %xmm0, %xmm0
37889; SSE3-NEXT:    psadbw %xmm1, %xmm0
37890; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
37891; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
37892; SSE3-NEXT:    movdqa %xmm1, %xmm2
37893; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
37894; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37895; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37896; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37897; SSE3-NEXT:    pand %xmm3, %xmm1
37898; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37899; SSE3-NEXT:    por %xmm1, %xmm0
37900; SSE3-NEXT:    retq
37901;
37902; SSSE3-LABEL: ult_58_v2i64:
37903; SSSE3:       # %bb.0:
37904; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37905; SSSE3-NEXT:    movdqa %xmm0, %xmm2
37906; SSSE3-NEXT:    pand %xmm1, %xmm2
37907; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37908; SSSE3-NEXT:    movdqa %xmm3, %xmm4
37909; SSSE3-NEXT:    pshufb %xmm2, %xmm4
37910; SSSE3-NEXT:    psrlw $4, %xmm0
37911; SSSE3-NEXT:    pand %xmm1, %xmm0
37912; SSSE3-NEXT:    pshufb %xmm0, %xmm3
37913; SSSE3-NEXT:    paddb %xmm4, %xmm3
37914; SSSE3-NEXT:    pxor %xmm0, %xmm0
37915; SSSE3-NEXT:    psadbw %xmm3, %xmm0
37916; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
37917; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
37918; SSSE3-NEXT:    movdqa %xmm1, %xmm2
37919; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
37920; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37921; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
37922; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37923; SSSE3-NEXT:    pand %xmm3, %xmm1
37924; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37925; SSSE3-NEXT:    por %xmm1, %xmm0
37926; SSSE3-NEXT:    retq
37927;
37928; SSE41-LABEL: ult_58_v2i64:
37929; SSE41:       # %bb.0:
37930; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37931; SSE41-NEXT:    movdqa %xmm0, %xmm2
37932; SSE41-NEXT:    pand %xmm1, %xmm2
37933; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37934; SSE41-NEXT:    movdqa %xmm3, %xmm4
37935; SSE41-NEXT:    pshufb %xmm2, %xmm4
37936; SSE41-NEXT:    psrlw $4, %xmm0
37937; SSE41-NEXT:    pand %xmm1, %xmm0
37938; SSE41-NEXT:    pshufb %xmm0, %xmm3
37939; SSE41-NEXT:    paddb %xmm4, %xmm3
37940; SSE41-NEXT:    pxor %xmm0, %xmm0
37941; SSE41-NEXT:    psadbw %xmm3, %xmm0
37942; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
37943; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
37944; SSE41-NEXT:    movdqa %xmm1, %xmm2
37945; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
37946; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
37947; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
37948; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
37949; SSE41-NEXT:    pand %xmm3, %xmm1
37950; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
37951; SSE41-NEXT:    por %xmm1, %xmm0
37952; SSE41-NEXT:    retq
37953;
37954; AVX1-LABEL: ult_58_v2i64:
37955; AVX1:       # %bb.0:
37956; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37957; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
37958; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37959; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37960; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
37961; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
37962; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37963; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37964; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37965; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37966; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [58,58]
37967; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37968; AVX1-NEXT:    retq
37969;
37970; AVX2-LABEL: ult_58_v2i64:
37971; AVX2:       # %bb.0:
37972; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37973; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
37974; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37975; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
37976; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
37977; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
37978; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
37979; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
37980; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
37981; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
37982; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [58,58]
37983; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37984; AVX2-NEXT:    retq
37985;
37986; AVX512VPOPCNTDQ-LABEL: ult_58_v2i64:
37987; AVX512VPOPCNTDQ:       # %bb.0:
37988; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
37989; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
37990; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [58,58]
37991; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
37992; AVX512VPOPCNTDQ-NEXT:    vzeroupper
37993; AVX512VPOPCNTDQ-NEXT:    retq
37994;
37995; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64:
37996; AVX512VPOPCNTDQVL:       # %bb.0:
37997; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
37998; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
37999; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38000; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38001; AVX512VPOPCNTDQVL-NEXT:    retq
38002;
38003; BITALG_NOVLX-LABEL: ult_58_v2i64:
38004; BITALG_NOVLX:       # %bb.0:
38005; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38006; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
38007; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38008; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38009; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [58,58]
38010; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38011; BITALG_NOVLX-NEXT:    vzeroupper
38012; BITALG_NOVLX-NEXT:    retq
38013;
38014; BITALG-LABEL: ult_58_v2i64:
38015; BITALG:       # %bb.0:
38016; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
38017; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38018; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38019; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
38020; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38021; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38022; BITALG-NEXT:    retq
38023  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38024  %3 = icmp ult <2 x i64> %2, <i64 58, i64 58>
38025  %4 = sext <2 x i1> %3 to <2 x i64>
38026  ret <2 x i64> %4
38027}
38028
38029define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
38030; SSE2-LABEL: ugt_58_v2i64:
38031; SSE2:       # %bb.0:
38032; SSE2-NEXT:    movdqa %xmm0, %xmm1
38033; SSE2-NEXT:    psrlw $1, %xmm1
38034; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38035; SSE2-NEXT:    psubb %xmm1, %xmm0
38036; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38037; SSE2-NEXT:    movdqa %xmm0, %xmm2
38038; SSE2-NEXT:    pand %xmm1, %xmm2
38039; SSE2-NEXT:    psrlw $2, %xmm0
38040; SSE2-NEXT:    pand %xmm1, %xmm0
38041; SSE2-NEXT:    paddb %xmm2, %xmm0
38042; SSE2-NEXT:    movdqa %xmm0, %xmm1
38043; SSE2-NEXT:    psrlw $4, %xmm1
38044; SSE2-NEXT:    paddb %xmm0, %xmm1
38045; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38046; SSE2-NEXT:    pxor %xmm0, %xmm0
38047; SSE2-NEXT:    psadbw %xmm1, %xmm0
38048; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
38049; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
38050; SSE2-NEXT:    movdqa %xmm0, %xmm2
38051; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
38052; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38053; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
38054; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38055; SSE2-NEXT:    pand %xmm3, %xmm1
38056; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38057; SSE2-NEXT:    por %xmm1, %xmm0
38058; SSE2-NEXT:    retq
38059;
38060; SSE3-LABEL: ugt_58_v2i64:
38061; SSE3:       # %bb.0:
38062; SSE3-NEXT:    movdqa %xmm0, %xmm1
38063; SSE3-NEXT:    psrlw $1, %xmm1
38064; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38065; SSE3-NEXT:    psubb %xmm1, %xmm0
38066; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38067; SSE3-NEXT:    movdqa %xmm0, %xmm2
38068; SSE3-NEXT:    pand %xmm1, %xmm2
38069; SSE3-NEXT:    psrlw $2, %xmm0
38070; SSE3-NEXT:    pand %xmm1, %xmm0
38071; SSE3-NEXT:    paddb %xmm2, %xmm0
38072; SSE3-NEXT:    movdqa %xmm0, %xmm1
38073; SSE3-NEXT:    psrlw $4, %xmm1
38074; SSE3-NEXT:    paddb %xmm0, %xmm1
38075; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38076; SSE3-NEXT:    pxor %xmm0, %xmm0
38077; SSE3-NEXT:    psadbw %xmm1, %xmm0
38078; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
38079; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
38080; SSE3-NEXT:    movdqa %xmm0, %xmm2
38081; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
38082; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38083; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38084; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38085; SSE3-NEXT:    pand %xmm3, %xmm1
38086; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38087; SSE3-NEXT:    por %xmm1, %xmm0
38088; SSE3-NEXT:    retq
38089;
38090; SSSE3-LABEL: ugt_58_v2i64:
38091; SSSE3:       # %bb.0:
38092; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38093; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38094; SSSE3-NEXT:    pand %xmm1, %xmm2
38095; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38096; SSSE3-NEXT:    movdqa %xmm3, %xmm4
38097; SSSE3-NEXT:    pshufb %xmm2, %xmm4
38098; SSSE3-NEXT:    psrlw $4, %xmm0
38099; SSSE3-NEXT:    pand %xmm1, %xmm0
38100; SSSE3-NEXT:    pshufb %xmm0, %xmm3
38101; SSSE3-NEXT:    paddb %xmm4, %xmm3
38102; SSSE3-NEXT:    pxor %xmm0, %xmm0
38103; SSSE3-NEXT:    psadbw %xmm3, %xmm0
38104; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
38105; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
38106; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38107; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
38108; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38109; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38110; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38111; SSSE3-NEXT:    pand %xmm3, %xmm1
38112; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38113; SSSE3-NEXT:    por %xmm1, %xmm0
38114; SSSE3-NEXT:    retq
38115;
38116; SSE41-LABEL: ugt_58_v2i64:
38117; SSE41:       # %bb.0:
38118; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38119; SSE41-NEXT:    movdqa %xmm0, %xmm2
38120; SSE41-NEXT:    pand %xmm1, %xmm2
38121; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38122; SSE41-NEXT:    movdqa %xmm3, %xmm4
38123; SSE41-NEXT:    pshufb %xmm2, %xmm4
38124; SSE41-NEXT:    psrlw $4, %xmm0
38125; SSE41-NEXT:    pand %xmm1, %xmm0
38126; SSE41-NEXT:    pshufb %xmm0, %xmm3
38127; SSE41-NEXT:    paddb %xmm4, %xmm3
38128; SSE41-NEXT:    pxor %xmm0, %xmm0
38129; SSE41-NEXT:    psadbw %xmm3, %xmm0
38130; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
38131; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483706,2147483706]
38132; SSE41-NEXT:    movdqa %xmm0, %xmm2
38133; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
38134; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38135; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
38136; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38137; SSE41-NEXT:    pand %xmm3, %xmm1
38138; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38139; SSE41-NEXT:    por %xmm1, %xmm0
38140; SSE41-NEXT:    retq
38141;
38142; AVX1-LABEL: ugt_58_v2i64:
38143; AVX1:       # %bb.0:
38144; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38145; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
38146; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38147; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38148; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
38149; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
38150; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38151; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38152; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38153; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38154; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38155; AVX1-NEXT:    retq
38156;
38157; AVX2-LABEL: ugt_58_v2i64:
38158; AVX2:       # %bb.0:
38159; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38160; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
38161; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38162; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38163; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
38164; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
38165; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38166; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38167; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38168; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38169; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38170; AVX2-NEXT:    retq
38171;
38172; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64:
38173; AVX512VPOPCNTDQ:       # %bb.0:
38174; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38175; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
38176; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38177; AVX512VPOPCNTDQ-NEXT:    vzeroupper
38178; AVX512VPOPCNTDQ-NEXT:    retq
38179;
38180; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64:
38181; AVX512VPOPCNTDQVL:       # %bb.0:
38182; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
38183; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
38184; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38185; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38186; AVX512VPOPCNTDQVL-NEXT:    retq
38187;
38188; BITALG_NOVLX-LABEL: ugt_58_v2i64:
38189; BITALG_NOVLX:       # %bb.0:
38190; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38191; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
38192; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38193; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38194; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38195; BITALG_NOVLX-NEXT:    vzeroupper
38196; BITALG_NOVLX-NEXT:    retq
38197;
38198; BITALG-LABEL: ugt_58_v2i64:
38199; BITALG:       # %bb.0:
38200; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
38201; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38202; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38203; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
38204; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38205; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38206; BITALG-NEXT:    retq
38207  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38208  %3 = icmp ugt <2 x i64> %2, <i64 58, i64 58>
38209  %4 = sext <2 x i1> %3 to <2 x i64>
38210  ret <2 x i64> %4
38211}
38212
38213define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
38214; SSE2-LABEL: ult_59_v2i64:
38215; SSE2:       # %bb.0:
38216; SSE2-NEXT:    movdqa %xmm0, %xmm1
38217; SSE2-NEXT:    psrlw $1, %xmm1
38218; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38219; SSE2-NEXT:    psubb %xmm1, %xmm0
38220; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38221; SSE2-NEXT:    movdqa %xmm0, %xmm2
38222; SSE2-NEXT:    pand %xmm1, %xmm2
38223; SSE2-NEXT:    psrlw $2, %xmm0
38224; SSE2-NEXT:    pand %xmm1, %xmm0
38225; SSE2-NEXT:    paddb %xmm2, %xmm0
38226; SSE2-NEXT:    movdqa %xmm0, %xmm1
38227; SSE2-NEXT:    psrlw $4, %xmm1
38228; SSE2-NEXT:    paddb %xmm0, %xmm1
38229; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38230; SSE2-NEXT:    pxor %xmm0, %xmm0
38231; SSE2-NEXT:    psadbw %xmm1, %xmm0
38232; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
38233; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38234; SSE2-NEXT:    movdqa %xmm1, %xmm2
38235; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
38236; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38237; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
38238; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38239; SSE2-NEXT:    pand %xmm3, %xmm1
38240; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38241; SSE2-NEXT:    por %xmm1, %xmm0
38242; SSE2-NEXT:    retq
38243;
38244; SSE3-LABEL: ult_59_v2i64:
38245; SSE3:       # %bb.0:
38246; SSE3-NEXT:    movdqa %xmm0, %xmm1
38247; SSE3-NEXT:    psrlw $1, %xmm1
38248; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38249; SSE3-NEXT:    psubb %xmm1, %xmm0
38250; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38251; SSE3-NEXT:    movdqa %xmm0, %xmm2
38252; SSE3-NEXT:    pand %xmm1, %xmm2
38253; SSE3-NEXT:    psrlw $2, %xmm0
38254; SSE3-NEXT:    pand %xmm1, %xmm0
38255; SSE3-NEXT:    paddb %xmm2, %xmm0
38256; SSE3-NEXT:    movdqa %xmm0, %xmm1
38257; SSE3-NEXT:    psrlw $4, %xmm1
38258; SSE3-NEXT:    paddb %xmm0, %xmm1
38259; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38260; SSE3-NEXT:    pxor %xmm0, %xmm0
38261; SSE3-NEXT:    psadbw %xmm1, %xmm0
38262; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
38263; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38264; SSE3-NEXT:    movdqa %xmm1, %xmm2
38265; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
38266; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38267; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38268; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38269; SSE3-NEXT:    pand %xmm3, %xmm1
38270; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38271; SSE3-NEXT:    por %xmm1, %xmm0
38272; SSE3-NEXT:    retq
38273;
38274; SSSE3-LABEL: ult_59_v2i64:
38275; SSSE3:       # %bb.0:
38276; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38277; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38278; SSSE3-NEXT:    pand %xmm1, %xmm2
38279; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38280; SSSE3-NEXT:    movdqa %xmm3, %xmm4
38281; SSSE3-NEXT:    pshufb %xmm2, %xmm4
38282; SSSE3-NEXT:    psrlw $4, %xmm0
38283; SSSE3-NEXT:    pand %xmm1, %xmm0
38284; SSSE3-NEXT:    pshufb %xmm0, %xmm3
38285; SSSE3-NEXT:    paddb %xmm4, %xmm3
38286; SSSE3-NEXT:    pxor %xmm0, %xmm0
38287; SSSE3-NEXT:    psadbw %xmm3, %xmm0
38288; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
38289; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38290; SSSE3-NEXT:    movdqa %xmm1, %xmm2
38291; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
38292; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38293; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38294; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38295; SSSE3-NEXT:    pand %xmm3, %xmm1
38296; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38297; SSSE3-NEXT:    por %xmm1, %xmm0
38298; SSSE3-NEXT:    retq
38299;
38300; SSE41-LABEL: ult_59_v2i64:
38301; SSE41:       # %bb.0:
38302; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38303; SSE41-NEXT:    movdqa %xmm0, %xmm2
38304; SSE41-NEXT:    pand %xmm1, %xmm2
38305; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38306; SSE41-NEXT:    movdqa %xmm3, %xmm4
38307; SSE41-NEXT:    pshufb %xmm2, %xmm4
38308; SSE41-NEXT:    psrlw $4, %xmm0
38309; SSE41-NEXT:    pand %xmm1, %xmm0
38310; SSE41-NEXT:    pshufb %xmm0, %xmm3
38311; SSE41-NEXT:    paddb %xmm4, %xmm3
38312; SSE41-NEXT:    pxor %xmm0, %xmm0
38313; SSE41-NEXT:    psadbw %xmm3, %xmm0
38314; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
38315; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38316; SSE41-NEXT:    movdqa %xmm1, %xmm2
38317; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
38318; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38319; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
38320; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38321; SSE41-NEXT:    pand %xmm3, %xmm1
38322; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38323; SSE41-NEXT:    por %xmm1, %xmm0
38324; SSE41-NEXT:    retq
38325;
38326; AVX1-LABEL: ult_59_v2i64:
38327; AVX1:       # %bb.0:
38328; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38329; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
38330; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38331; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38332; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
38333; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
38334; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38335; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38336; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38337; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38338; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [59,59]
38339; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38340; AVX1-NEXT:    retq
38341;
38342; AVX2-LABEL: ult_59_v2i64:
38343; AVX2:       # %bb.0:
38344; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38345; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
38346; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38347; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38348; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
38349; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
38350; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38351; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38352; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38353; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38354; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [59,59]
38355; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38356; AVX2-NEXT:    retq
38357;
38358; AVX512VPOPCNTDQ-LABEL: ult_59_v2i64:
38359; AVX512VPOPCNTDQ:       # %bb.0:
38360; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38361; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
38362; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [59,59]
38363; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38364; AVX512VPOPCNTDQ-NEXT:    vzeroupper
38365; AVX512VPOPCNTDQ-NEXT:    retq
38366;
38367; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64:
38368; AVX512VPOPCNTDQVL:       # %bb.0:
38369; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
38370; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
38371; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38372; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38373; AVX512VPOPCNTDQVL-NEXT:    retq
38374;
38375; BITALG_NOVLX-LABEL: ult_59_v2i64:
38376; BITALG_NOVLX:       # %bb.0:
38377; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38378; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
38379; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38380; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38381; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [59,59]
38382; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38383; BITALG_NOVLX-NEXT:    vzeroupper
38384; BITALG_NOVLX-NEXT:    retq
38385;
38386; BITALG-LABEL: ult_59_v2i64:
38387; BITALG:       # %bb.0:
38388; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
38389; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38390; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38391; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
38392; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38393; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38394; BITALG-NEXT:    retq
38395  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38396  %3 = icmp ult <2 x i64> %2, <i64 59, i64 59>
38397  %4 = sext <2 x i1> %3 to <2 x i64>
38398  ret <2 x i64> %4
38399}
38400
38401define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
38402; SSE2-LABEL: ugt_59_v2i64:
38403; SSE2:       # %bb.0:
38404; SSE2-NEXT:    movdqa %xmm0, %xmm1
38405; SSE2-NEXT:    psrlw $1, %xmm1
38406; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38407; SSE2-NEXT:    psubb %xmm1, %xmm0
38408; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38409; SSE2-NEXT:    movdqa %xmm0, %xmm2
38410; SSE2-NEXT:    pand %xmm1, %xmm2
38411; SSE2-NEXT:    psrlw $2, %xmm0
38412; SSE2-NEXT:    pand %xmm1, %xmm0
38413; SSE2-NEXT:    paddb %xmm2, %xmm0
38414; SSE2-NEXT:    movdqa %xmm0, %xmm1
38415; SSE2-NEXT:    psrlw $4, %xmm1
38416; SSE2-NEXT:    paddb %xmm0, %xmm1
38417; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38418; SSE2-NEXT:    pxor %xmm0, %xmm0
38419; SSE2-NEXT:    psadbw %xmm1, %xmm0
38420; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
38421; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38422; SSE2-NEXT:    movdqa %xmm0, %xmm2
38423; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
38424; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38425; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
38426; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38427; SSE2-NEXT:    pand %xmm3, %xmm1
38428; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38429; SSE2-NEXT:    por %xmm1, %xmm0
38430; SSE2-NEXT:    retq
38431;
38432; SSE3-LABEL: ugt_59_v2i64:
38433; SSE3:       # %bb.0:
38434; SSE3-NEXT:    movdqa %xmm0, %xmm1
38435; SSE3-NEXT:    psrlw $1, %xmm1
38436; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38437; SSE3-NEXT:    psubb %xmm1, %xmm0
38438; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38439; SSE3-NEXT:    movdqa %xmm0, %xmm2
38440; SSE3-NEXT:    pand %xmm1, %xmm2
38441; SSE3-NEXT:    psrlw $2, %xmm0
38442; SSE3-NEXT:    pand %xmm1, %xmm0
38443; SSE3-NEXT:    paddb %xmm2, %xmm0
38444; SSE3-NEXT:    movdqa %xmm0, %xmm1
38445; SSE3-NEXT:    psrlw $4, %xmm1
38446; SSE3-NEXT:    paddb %xmm0, %xmm1
38447; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38448; SSE3-NEXT:    pxor %xmm0, %xmm0
38449; SSE3-NEXT:    psadbw %xmm1, %xmm0
38450; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
38451; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38452; SSE3-NEXT:    movdqa %xmm0, %xmm2
38453; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
38454; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38455; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38456; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38457; SSE3-NEXT:    pand %xmm3, %xmm1
38458; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38459; SSE3-NEXT:    por %xmm1, %xmm0
38460; SSE3-NEXT:    retq
38461;
38462; SSSE3-LABEL: ugt_59_v2i64:
38463; SSSE3:       # %bb.0:
38464; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38465; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38466; SSSE3-NEXT:    pand %xmm1, %xmm2
38467; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38468; SSSE3-NEXT:    movdqa %xmm3, %xmm4
38469; SSSE3-NEXT:    pshufb %xmm2, %xmm4
38470; SSSE3-NEXT:    psrlw $4, %xmm0
38471; SSSE3-NEXT:    pand %xmm1, %xmm0
38472; SSSE3-NEXT:    pshufb %xmm0, %xmm3
38473; SSSE3-NEXT:    paddb %xmm4, %xmm3
38474; SSSE3-NEXT:    pxor %xmm0, %xmm0
38475; SSSE3-NEXT:    psadbw %xmm3, %xmm0
38476; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
38477; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38478; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38479; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
38480; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38481; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38482; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38483; SSSE3-NEXT:    pand %xmm3, %xmm1
38484; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38485; SSSE3-NEXT:    por %xmm1, %xmm0
38486; SSSE3-NEXT:    retq
38487;
38488; SSE41-LABEL: ugt_59_v2i64:
38489; SSE41:       # %bb.0:
38490; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38491; SSE41-NEXT:    movdqa %xmm0, %xmm2
38492; SSE41-NEXT:    pand %xmm1, %xmm2
38493; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38494; SSE41-NEXT:    movdqa %xmm3, %xmm4
38495; SSE41-NEXT:    pshufb %xmm2, %xmm4
38496; SSE41-NEXT:    psrlw $4, %xmm0
38497; SSE41-NEXT:    pand %xmm1, %xmm0
38498; SSE41-NEXT:    pshufb %xmm0, %xmm3
38499; SSE41-NEXT:    paddb %xmm4, %xmm3
38500; SSE41-NEXT:    pxor %xmm0, %xmm0
38501; SSE41-NEXT:    psadbw %xmm3, %xmm0
38502; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
38503; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483707,2147483707]
38504; SSE41-NEXT:    movdqa %xmm0, %xmm2
38505; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
38506; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38507; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
38508; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38509; SSE41-NEXT:    pand %xmm3, %xmm1
38510; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38511; SSE41-NEXT:    por %xmm1, %xmm0
38512; SSE41-NEXT:    retq
38513;
38514; AVX1-LABEL: ugt_59_v2i64:
38515; AVX1:       # %bb.0:
38516; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38517; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
38518; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38519; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38520; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
38521; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
38522; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38523; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38524; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38525; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38526; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38527; AVX1-NEXT:    retq
38528;
38529; AVX2-LABEL: ugt_59_v2i64:
38530; AVX2:       # %bb.0:
38531; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38532; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
38533; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38534; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38535; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
38536; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
38537; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38538; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38539; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38540; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38541; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38542; AVX2-NEXT:    retq
38543;
38544; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64:
38545; AVX512VPOPCNTDQ:       # %bb.0:
38546; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38547; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
38548; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38549; AVX512VPOPCNTDQ-NEXT:    vzeroupper
38550; AVX512VPOPCNTDQ-NEXT:    retq
38551;
38552; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64:
38553; AVX512VPOPCNTDQVL:       # %bb.0:
38554; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
38555; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
38556; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38557; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38558; AVX512VPOPCNTDQVL-NEXT:    retq
38559;
38560; BITALG_NOVLX-LABEL: ugt_59_v2i64:
38561; BITALG_NOVLX:       # %bb.0:
38562; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38563; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
38564; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38565; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38566; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38567; BITALG_NOVLX-NEXT:    vzeroupper
38568; BITALG_NOVLX-NEXT:    retq
38569;
38570; BITALG-LABEL: ugt_59_v2i64:
38571; BITALG:       # %bb.0:
38572; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
38573; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38574; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38575; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
38576; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38577; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38578; BITALG-NEXT:    retq
38579  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38580  %3 = icmp ugt <2 x i64> %2, <i64 59, i64 59>
38581  %4 = sext <2 x i1> %3 to <2 x i64>
38582  ret <2 x i64> %4
38583}
38584
38585define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
38586; SSE2-LABEL: ult_60_v2i64:
38587; SSE2:       # %bb.0:
38588; SSE2-NEXT:    movdqa %xmm0, %xmm1
38589; SSE2-NEXT:    psrlw $1, %xmm1
38590; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38591; SSE2-NEXT:    psubb %xmm1, %xmm0
38592; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38593; SSE2-NEXT:    movdqa %xmm0, %xmm2
38594; SSE2-NEXT:    pand %xmm1, %xmm2
38595; SSE2-NEXT:    psrlw $2, %xmm0
38596; SSE2-NEXT:    pand %xmm1, %xmm0
38597; SSE2-NEXT:    paddb %xmm2, %xmm0
38598; SSE2-NEXT:    movdqa %xmm0, %xmm1
38599; SSE2-NEXT:    psrlw $4, %xmm1
38600; SSE2-NEXT:    paddb %xmm0, %xmm1
38601; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38602; SSE2-NEXT:    pxor %xmm0, %xmm0
38603; SSE2-NEXT:    psadbw %xmm1, %xmm0
38604; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
38605; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38606; SSE2-NEXT:    movdqa %xmm1, %xmm2
38607; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
38608; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38609; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
38610; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38611; SSE2-NEXT:    pand %xmm3, %xmm1
38612; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38613; SSE2-NEXT:    por %xmm1, %xmm0
38614; SSE2-NEXT:    retq
38615;
38616; SSE3-LABEL: ult_60_v2i64:
38617; SSE3:       # %bb.0:
38618; SSE3-NEXT:    movdqa %xmm0, %xmm1
38619; SSE3-NEXT:    psrlw $1, %xmm1
38620; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38621; SSE3-NEXT:    psubb %xmm1, %xmm0
38622; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38623; SSE3-NEXT:    movdqa %xmm0, %xmm2
38624; SSE3-NEXT:    pand %xmm1, %xmm2
38625; SSE3-NEXT:    psrlw $2, %xmm0
38626; SSE3-NEXT:    pand %xmm1, %xmm0
38627; SSE3-NEXT:    paddb %xmm2, %xmm0
38628; SSE3-NEXT:    movdqa %xmm0, %xmm1
38629; SSE3-NEXT:    psrlw $4, %xmm1
38630; SSE3-NEXT:    paddb %xmm0, %xmm1
38631; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38632; SSE3-NEXT:    pxor %xmm0, %xmm0
38633; SSE3-NEXT:    psadbw %xmm1, %xmm0
38634; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
38635; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38636; SSE3-NEXT:    movdqa %xmm1, %xmm2
38637; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
38638; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38639; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38640; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38641; SSE3-NEXT:    pand %xmm3, %xmm1
38642; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38643; SSE3-NEXT:    por %xmm1, %xmm0
38644; SSE3-NEXT:    retq
38645;
38646; SSSE3-LABEL: ult_60_v2i64:
38647; SSSE3:       # %bb.0:
38648; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38649; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38650; SSSE3-NEXT:    pand %xmm1, %xmm2
38651; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38652; SSSE3-NEXT:    movdqa %xmm3, %xmm4
38653; SSSE3-NEXT:    pshufb %xmm2, %xmm4
38654; SSSE3-NEXT:    psrlw $4, %xmm0
38655; SSSE3-NEXT:    pand %xmm1, %xmm0
38656; SSSE3-NEXT:    pshufb %xmm0, %xmm3
38657; SSSE3-NEXT:    paddb %xmm4, %xmm3
38658; SSSE3-NEXT:    pxor %xmm0, %xmm0
38659; SSSE3-NEXT:    psadbw %xmm3, %xmm0
38660; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
38661; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38662; SSSE3-NEXT:    movdqa %xmm1, %xmm2
38663; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
38664; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38665; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38666; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38667; SSSE3-NEXT:    pand %xmm3, %xmm1
38668; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38669; SSSE3-NEXT:    por %xmm1, %xmm0
38670; SSSE3-NEXT:    retq
38671;
38672; SSE41-LABEL: ult_60_v2i64:
38673; SSE41:       # %bb.0:
38674; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38675; SSE41-NEXT:    movdqa %xmm0, %xmm2
38676; SSE41-NEXT:    pand %xmm1, %xmm2
38677; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38678; SSE41-NEXT:    movdqa %xmm3, %xmm4
38679; SSE41-NEXT:    pshufb %xmm2, %xmm4
38680; SSE41-NEXT:    psrlw $4, %xmm0
38681; SSE41-NEXT:    pand %xmm1, %xmm0
38682; SSE41-NEXT:    pshufb %xmm0, %xmm3
38683; SSE41-NEXT:    paddb %xmm4, %xmm3
38684; SSE41-NEXT:    pxor %xmm0, %xmm0
38685; SSE41-NEXT:    psadbw %xmm3, %xmm0
38686; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
38687; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38688; SSE41-NEXT:    movdqa %xmm1, %xmm2
38689; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
38690; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38691; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
38692; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38693; SSE41-NEXT:    pand %xmm3, %xmm1
38694; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38695; SSE41-NEXT:    por %xmm1, %xmm0
38696; SSE41-NEXT:    retq
38697;
38698; AVX1-LABEL: ult_60_v2i64:
38699; AVX1:       # %bb.0:
38700; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38701; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
38702; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38703; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38704; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
38705; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
38706; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38707; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38708; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38709; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38710; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [60,60]
38711; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38712; AVX1-NEXT:    retq
38713;
38714; AVX2-LABEL: ult_60_v2i64:
38715; AVX2:       # %bb.0:
38716; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38717; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
38718; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38719; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38720; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
38721; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
38722; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38723; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38724; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38725; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38726; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [60,60]
38727; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38728; AVX2-NEXT:    retq
38729;
38730; AVX512VPOPCNTDQ-LABEL: ult_60_v2i64:
38731; AVX512VPOPCNTDQ:       # %bb.0:
38732; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38733; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
38734; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [60,60]
38735; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38736; AVX512VPOPCNTDQ-NEXT:    vzeroupper
38737; AVX512VPOPCNTDQ-NEXT:    retq
38738;
38739; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64:
38740; AVX512VPOPCNTDQVL:       # %bb.0:
38741; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
38742; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
38743; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38744; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38745; AVX512VPOPCNTDQVL-NEXT:    retq
38746;
38747; BITALG_NOVLX-LABEL: ult_60_v2i64:
38748; BITALG_NOVLX:       # %bb.0:
38749; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38750; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
38751; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38752; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38753; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [60,60]
38754; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
38755; BITALG_NOVLX-NEXT:    vzeroupper
38756; BITALG_NOVLX-NEXT:    retq
38757;
38758; BITALG-LABEL: ult_60_v2i64:
38759; BITALG:       # %bb.0:
38760; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
38761; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38762; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38763; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
38764; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38765; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38766; BITALG-NEXT:    retq
38767  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38768  %3 = icmp ult <2 x i64> %2, <i64 60, i64 60>
38769  %4 = sext <2 x i1> %3 to <2 x i64>
38770  ret <2 x i64> %4
38771}
38772
38773define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
38774; SSE2-LABEL: ugt_60_v2i64:
38775; SSE2:       # %bb.0:
38776; SSE2-NEXT:    movdqa %xmm0, %xmm1
38777; SSE2-NEXT:    psrlw $1, %xmm1
38778; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38779; SSE2-NEXT:    psubb %xmm1, %xmm0
38780; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38781; SSE2-NEXT:    movdqa %xmm0, %xmm2
38782; SSE2-NEXT:    pand %xmm1, %xmm2
38783; SSE2-NEXT:    psrlw $2, %xmm0
38784; SSE2-NEXT:    pand %xmm1, %xmm0
38785; SSE2-NEXT:    paddb %xmm2, %xmm0
38786; SSE2-NEXT:    movdqa %xmm0, %xmm1
38787; SSE2-NEXT:    psrlw $4, %xmm1
38788; SSE2-NEXT:    paddb %xmm0, %xmm1
38789; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38790; SSE2-NEXT:    pxor %xmm0, %xmm0
38791; SSE2-NEXT:    psadbw %xmm1, %xmm0
38792; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
38793; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38794; SSE2-NEXT:    movdqa %xmm0, %xmm2
38795; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
38796; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38797; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
38798; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38799; SSE2-NEXT:    pand %xmm3, %xmm1
38800; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38801; SSE2-NEXT:    por %xmm1, %xmm0
38802; SSE2-NEXT:    retq
38803;
38804; SSE3-LABEL: ugt_60_v2i64:
38805; SSE3:       # %bb.0:
38806; SSE3-NEXT:    movdqa %xmm0, %xmm1
38807; SSE3-NEXT:    psrlw $1, %xmm1
38808; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38809; SSE3-NEXT:    psubb %xmm1, %xmm0
38810; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38811; SSE3-NEXT:    movdqa %xmm0, %xmm2
38812; SSE3-NEXT:    pand %xmm1, %xmm2
38813; SSE3-NEXT:    psrlw $2, %xmm0
38814; SSE3-NEXT:    pand %xmm1, %xmm0
38815; SSE3-NEXT:    paddb %xmm2, %xmm0
38816; SSE3-NEXT:    movdqa %xmm0, %xmm1
38817; SSE3-NEXT:    psrlw $4, %xmm1
38818; SSE3-NEXT:    paddb %xmm0, %xmm1
38819; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38820; SSE3-NEXT:    pxor %xmm0, %xmm0
38821; SSE3-NEXT:    psadbw %xmm1, %xmm0
38822; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
38823; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38824; SSE3-NEXT:    movdqa %xmm0, %xmm2
38825; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
38826; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38827; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38828; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38829; SSE3-NEXT:    pand %xmm3, %xmm1
38830; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38831; SSE3-NEXT:    por %xmm1, %xmm0
38832; SSE3-NEXT:    retq
38833;
38834; SSSE3-LABEL: ugt_60_v2i64:
38835; SSSE3:       # %bb.0:
38836; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38837; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38838; SSSE3-NEXT:    pand %xmm1, %xmm2
38839; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38840; SSSE3-NEXT:    movdqa %xmm3, %xmm4
38841; SSSE3-NEXT:    pshufb %xmm2, %xmm4
38842; SSSE3-NEXT:    psrlw $4, %xmm0
38843; SSSE3-NEXT:    pand %xmm1, %xmm0
38844; SSSE3-NEXT:    pshufb %xmm0, %xmm3
38845; SSSE3-NEXT:    paddb %xmm4, %xmm3
38846; SSSE3-NEXT:    pxor %xmm0, %xmm0
38847; SSSE3-NEXT:    psadbw %xmm3, %xmm0
38848; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
38849; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38850; SSSE3-NEXT:    movdqa %xmm0, %xmm2
38851; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
38852; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38853; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
38854; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38855; SSSE3-NEXT:    pand %xmm3, %xmm1
38856; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38857; SSSE3-NEXT:    por %xmm1, %xmm0
38858; SSSE3-NEXT:    retq
38859;
38860; SSE41-LABEL: ugt_60_v2i64:
38861; SSE41:       # %bb.0:
38862; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38863; SSE41-NEXT:    movdqa %xmm0, %xmm2
38864; SSE41-NEXT:    pand %xmm1, %xmm2
38865; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38866; SSE41-NEXT:    movdqa %xmm3, %xmm4
38867; SSE41-NEXT:    pshufb %xmm2, %xmm4
38868; SSE41-NEXT:    psrlw $4, %xmm0
38869; SSE41-NEXT:    pand %xmm1, %xmm0
38870; SSE41-NEXT:    pshufb %xmm0, %xmm3
38871; SSE41-NEXT:    paddb %xmm4, %xmm3
38872; SSE41-NEXT:    pxor %xmm0, %xmm0
38873; SSE41-NEXT:    psadbw %xmm3, %xmm0
38874; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
38875; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483708,2147483708]
38876; SSE41-NEXT:    movdqa %xmm0, %xmm2
38877; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
38878; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38879; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
38880; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38881; SSE41-NEXT:    pand %xmm3, %xmm1
38882; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38883; SSE41-NEXT:    por %xmm1, %xmm0
38884; SSE41-NEXT:    retq
38885;
38886; AVX1-LABEL: ugt_60_v2i64:
38887; AVX1:       # %bb.0:
38888; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38889; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
38890; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38891; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38892; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
38893; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
38894; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38895; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38896; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38897; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38898; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38899; AVX1-NEXT:    retq
38900;
38901; AVX2-LABEL: ugt_60_v2i64:
38902; AVX2:       # %bb.0:
38903; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38904; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
38905; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38906; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
38907; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
38908; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
38909; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
38910; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
38911; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38912; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38913; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38914; AVX2-NEXT:    retq
38915;
38916; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64:
38917; AVX512VPOPCNTDQ:       # %bb.0:
38918; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38919; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
38920; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38921; AVX512VPOPCNTDQ-NEXT:    vzeroupper
38922; AVX512VPOPCNTDQ-NEXT:    retq
38923;
38924; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64:
38925; AVX512VPOPCNTDQVL:       # %bb.0:
38926; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
38927; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
38928; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38929; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38930; AVX512VPOPCNTDQVL-NEXT:    retq
38931;
38932; BITALG_NOVLX-LABEL: ugt_60_v2i64:
38933; BITALG_NOVLX:       # %bb.0:
38934; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
38935; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
38936; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38937; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38938; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
38939; BITALG_NOVLX-NEXT:    vzeroupper
38940; BITALG_NOVLX-NEXT:    retq
38941;
38942; BITALG-LABEL: ugt_60_v2i64:
38943; BITALG:       # %bb.0:
38944; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
38945; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
38946; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
38947; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
38948; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
38949; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38950; BITALG-NEXT:    retq
38951  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38952  %3 = icmp ugt <2 x i64> %2, <i64 60, i64 60>
38953  %4 = sext <2 x i1> %3 to <2 x i64>
38954  ret <2 x i64> %4
38955}
38956
38957define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
38958; SSE2-LABEL: ult_61_v2i64:
38959; SSE2:       # %bb.0:
38960; SSE2-NEXT:    movdqa %xmm0, %xmm1
38961; SSE2-NEXT:    psrlw $1, %xmm1
38962; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38963; SSE2-NEXT:    psubb %xmm1, %xmm0
38964; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38965; SSE2-NEXT:    movdqa %xmm0, %xmm2
38966; SSE2-NEXT:    pand %xmm1, %xmm2
38967; SSE2-NEXT:    psrlw $2, %xmm0
38968; SSE2-NEXT:    pand %xmm1, %xmm0
38969; SSE2-NEXT:    paddb %xmm2, %xmm0
38970; SSE2-NEXT:    movdqa %xmm0, %xmm1
38971; SSE2-NEXT:    psrlw $4, %xmm1
38972; SSE2-NEXT:    paddb %xmm0, %xmm1
38973; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
38974; SSE2-NEXT:    pxor %xmm0, %xmm0
38975; SSE2-NEXT:    psadbw %xmm1, %xmm0
38976; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
38977; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
38978; SSE2-NEXT:    movdqa %xmm1, %xmm2
38979; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
38980; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
38981; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
38982; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
38983; SSE2-NEXT:    pand %xmm3, %xmm1
38984; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
38985; SSE2-NEXT:    por %xmm1, %xmm0
38986; SSE2-NEXT:    retq
38987;
38988; SSE3-LABEL: ult_61_v2i64:
38989; SSE3:       # %bb.0:
38990; SSE3-NEXT:    movdqa %xmm0, %xmm1
38991; SSE3-NEXT:    psrlw $1, %xmm1
38992; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
38993; SSE3-NEXT:    psubb %xmm1, %xmm0
38994; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38995; SSE3-NEXT:    movdqa %xmm0, %xmm2
38996; SSE3-NEXT:    pand %xmm1, %xmm2
38997; SSE3-NEXT:    psrlw $2, %xmm0
38998; SSE3-NEXT:    pand %xmm1, %xmm0
38999; SSE3-NEXT:    paddb %xmm2, %xmm0
39000; SSE3-NEXT:    movdqa %xmm0, %xmm1
39001; SSE3-NEXT:    psrlw $4, %xmm1
39002; SSE3-NEXT:    paddb %xmm0, %xmm1
39003; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39004; SSE3-NEXT:    pxor %xmm0, %xmm0
39005; SSE3-NEXT:    psadbw %xmm1, %xmm0
39006; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
39007; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39008; SSE3-NEXT:    movdqa %xmm1, %xmm2
39009; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
39010; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39011; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39012; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39013; SSE3-NEXT:    pand %xmm3, %xmm1
39014; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39015; SSE3-NEXT:    por %xmm1, %xmm0
39016; SSE3-NEXT:    retq
39017;
39018; SSSE3-LABEL: ult_61_v2i64:
39019; SSSE3:       # %bb.0:
39020; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39021; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39022; SSSE3-NEXT:    pand %xmm1, %xmm2
39023; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39024; SSSE3-NEXT:    movdqa %xmm3, %xmm4
39025; SSSE3-NEXT:    pshufb %xmm2, %xmm4
39026; SSSE3-NEXT:    psrlw $4, %xmm0
39027; SSSE3-NEXT:    pand %xmm1, %xmm0
39028; SSSE3-NEXT:    pshufb %xmm0, %xmm3
39029; SSSE3-NEXT:    paddb %xmm4, %xmm3
39030; SSSE3-NEXT:    pxor %xmm0, %xmm0
39031; SSSE3-NEXT:    psadbw %xmm3, %xmm0
39032; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
39033; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39034; SSSE3-NEXT:    movdqa %xmm1, %xmm2
39035; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
39036; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39037; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39038; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39039; SSSE3-NEXT:    pand %xmm3, %xmm1
39040; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39041; SSSE3-NEXT:    por %xmm1, %xmm0
39042; SSSE3-NEXT:    retq
39043;
39044; SSE41-LABEL: ult_61_v2i64:
39045; SSE41:       # %bb.0:
39046; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39047; SSE41-NEXT:    movdqa %xmm0, %xmm2
39048; SSE41-NEXT:    pand %xmm1, %xmm2
39049; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39050; SSE41-NEXT:    movdqa %xmm3, %xmm4
39051; SSE41-NEXT:    pshufb %xmm2, %xmm4
39052; SSE41-NEXT:    psrlw $4, %xmm0
39053; SSE41-NEXT:    pand %xmm1, %xmm0
39054; SSE41-NEXT:    pshufb %xmm0, %xmm3
39055; SSE41-NEXT:    paddb %xmm4, %xmm3
39056; SSE41-NEXT:    pxor %xmm0, %xmm0
39057; SSE41-NEXT:    psadbw %xmm3, %xmm0
39058; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
39059; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39060; SSE41-NEXT:    movdqa %xmm1, %xmm2
39061; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
39062; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39063; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
39064; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39065; SSE41-NEXT:    pand %xmm3, %xmm1
39066; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39067; SSE41-NEXT:    por %xmm1, %xmm0
39068; SSE41-NEXT:    retq
39069;
39070; AVX1-LABEL: ult_61_v2i64:
39071; AVX1:       # %bb.0:
39072; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39073; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
39074; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39075; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39076; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
39077; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
39078; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39079; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39080; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39081; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39082; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [61,61]
39083; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39084; AVX1-NEXT:    retq
39085;
39086; AVX2-LABEL: ult_61_v2i64:
39087; AVX2:       # %bb.0:
39088; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39089; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
39090; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39091; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39092; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
39093; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
39094; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39095; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39096; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39097; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39098; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [61,61]
39099; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39100; AVX2-NEXT:    retq
39101;
39102; AVX512VPOPCNTDQ-LABEL: ult_61_v2i64:
39103; AVX512VPOPCNTDQ:       # %bb.0:
39104; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39105; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
39106; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [61,61]
39107; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39108; AVX512VPOPCNTDQ-NEXT:    vzeroupper
39109; AVX512VPOPCNTDQ-NEXT:    retq
39110;
39111; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64:
39112; AVX512VPOPCNTDQVL:       # %bb.0:
39113; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
39114; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
39115; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39116; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39117; AVX512VPOPCNTDQVL-NEXT:    retq
39118;
39119; BITALG_NOVLX-LABEL: ult_61_v2i64:
39120; BITALG_NOVLX:       # %bb.0:
39121; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39122; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
39123; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39124; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39125; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [61,61]
39126; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39127; BITALG_NOVLX-NEXT:    vzeroupper
39128; BITALG_NOVLX-NEXT:    retq
39129;
39130; BITALG-LABEL: ult_61_v2i64:
39131; BITALG:       # %bb.0:
39132; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
39133; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39134; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39135; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
39136; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39137; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39138; BITALG-NEXT:    retq
39139  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
39140  %3 = icmp ult <2 x i64> %2, <i64 61, i64 61>
39141  %4 = sext <2 x i1> %3 to <2 x i64>
39142  ret <2 x i64> %4
39143}
39144
39145define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
39146; SSE2-LABEL: ugt_61_v2i64:
39147; SSE2:       # %bb.0:
39148; SSE2-NEXT:    movdqa %xmm0, %xmm1
39149; SSE2-NEXT:    psrlw $1, %xmm1
39150; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39151; SSE2-NEXT:    psubb %xmm1, %xmm0
39152; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39153; SSE2-NEXT:    movdqa %xmm0, %xmm2
39154; SSE2-NEXT:    pand %xmm1, %xmm2
39155; SSE2-NEXT:    psrlw $2, %xmm0
39156; SSE2-NEXT:    pand %xmm1, %xmm0
39157; SSE2-NEXT:    paddb %xmm2, %xmm0
39158; SSE2-NEXT:    movdqa %xmm0, %xmm1
39159; SSE2-NEXT:    psrlw $4, %xmm1
39160; SSE2-NEXT:    paddb %xmm0, %xmm1
39161; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39162; SSE2-NEXT:    pxor %xmm0, %xmm0
39163; SSE2-NEXT:    psadbw %xmm1, %xmm0
39164; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
39165; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39166; SSE2-NEXT:    movdqa %xmm0, %xmm2
39167; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
39168; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39169; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
39170; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39171; SSE2-NEXT:    pand %xmm3, %xmm1
39172; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39173; SSE2-NEXT:    por %xmm1, %xmm0
39174; SSE2-NEXT:    retq
39175;
39176; SSE3-LABEL: ugt_61_v2i64:
39177; SSE3:       # %bb.0:
39178; SSE3-NEXT:    movdqa %xmm0, %xmm1
39179; SSE3-NEXT:    psrlw $1, %xmm1
39180; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39181; SSE3-NEXT:    psubb %xmm1, %xmm0
39182; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39183; SSE3-NEXT:    movdqa %xmm0, %xmm2
39184; SSE3-NEXT:    pand %xmm1, %xmm2
39185; SSE3-NEXT:    psrlw $2, %xmm0
39186; SSE3-NEXT:    pand %xmm1, %xmm0
39187; SSE3-NEXT:    paddb %xmm2, %xmm0
39188; SSE3-NEXT:    movdqa %xmm0, %xmm1
39189; SSE3-NEXT:    psrlw $4, %xmm1
39190; SSE3-NEXT:    paddb %xmm0, %xmm1
39191; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39192; SSE3-NEXT:    pxor %xmm0, %xmm0
39193; SSE3-NEXT:    psadbw %xmm1, %xmm0
39194; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
39195; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39196; SSE3-NEXT:    movdqa %xmm0, %xmm2
39197; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
39198; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39199; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39200; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39201; SSE3-NEXT:    pand %xmm3, %xmm1
39202; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39203; SSE3-NEXT:    por %xmm1, %xmm0
39204; SSE3-NEXT:    retq
39205;
39206; SSSE3-LABEL: ugt_61_v2i64:
39207; SSSE3:       # %bb.0:
39208; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39209; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39210; SSSE3-NEXT:    pand %xmm1, %xmm2
39211; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39212; SSSE3-NEXT:    movdqa %xmm3, %xmm4
39213; SSSE3-NEXT:    pshufb %xmm2, %xmm4
39214; SSSE3-NEXT:    psrlw $4, %xmm0
39215; SSSE3-NEXT:    pand %xmm1, %xmm0
39216; SSSE3-NEXT:    pshufb %xmm0, %xmm3
39217; SSSE3-NEXT:    paddb %xmm4, %xmm3
39218; SSSE3-NEXT:    pxor %xmm0, %xmm0
39219; SSSE3-NEXT:    psadbw %xmm3, %xmm0
39220; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
39221; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39222; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39223; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
39224; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39225; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39226; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39227; SSSE3-NEXT:    pand %xmm3, %xmm1
39228; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39229; SSSE3-NEXT:    por %xmm1, %xmm0
39230; SSSE3-NEXT:    retq
39231;
39232; SSE41-LABEL: ugt_61_v2i64:
39233; SSE41:       # %bb.0:
39234; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39235; SSE41-NEXT:    movdqa %xmm0, %xmm2
39236; SSE41-NEXT:    pand %xmm1, %xmm2
39237; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39238; SSE41-NEXT:    movdqa %xmm3, %xmm4
39239; SSE41-NEXT:    pshufb %xmm2, %xmm4
39240; SSE41-NEXT:    psrlw $4, %xmm0
39241; SSE41-NEXT:    pand %xmm1, %xmm0
39242; SSE41-NEXT:    pshufb %xmm0, %xmm3
39243; SSE41-NEXT:    paddb %xmm4, %xmm3
39244; SSE41-NEXT:    pxor %xmm0, %xmm0
39245; SSE41-NEXT:    psadbw %xmm3, %xmm0
39246; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
39247; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483709,2147483709]
39248; SSE41-NEXT:    movdqa %xmm0, %xmm2
39249; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
39250; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39251; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
39252; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39253; SSE41-NEXT:    pand %xmm3, %xmm1
39254; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39255; SSE41-NEXT:    por %xmm1, %xmm0
39256; SSE41-NEXT:    retq
39257;
39258; AVX1-LABEL: ugt_61_v2i64:
39259; AVX1:       # %bb.0:
39260; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39261; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
39262; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39263; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39264; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
39265; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
39266; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39267; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39268; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39269; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39270; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39271; AVX1-NEXT:    retq
39272;
39273; AVX2-LABEL: ugt_61_v2i64:
39274; AVX2:       # %bb.0:
39275; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39276; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
39277; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39278; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39279; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
39280; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
39281; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39282; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39283; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39284; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39285; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39286; AVX2-NEXT:    retq
39287;
39288; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64:
39289; AVX512VPOPCNTDQ:       # %bb.0:
39290; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39291; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
39292; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39293; AVX512VPOPCNTDQ-NEXT:    vzeroupper
39294; AVX512VPOPCNTDQ-NEXT:    retq
39295;
39296; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64:
39297; AVX512VPOPCNTDQVL:       # %bb.0:
39298; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
39299; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
39300; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39301; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39302; AVX512VPOPCNTDQVL-NEXT:    retq
39303;
39304; BITALG_NOVLX-LABEL: ugt_61_v2i64:
39305; BITALG_NOVLX:       # %bb.0:
39306; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39307; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
39308; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39309; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39310; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39311; BITALG_NOVLX-NEXT:    vzeroupper
39312; BITALG_NOVLX-NEXT:    retq
39313;
39314; BITALG-LABEL: ugt_61_v2i64:
39315; BITALG:       # %bb.0:
39316; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
39317; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39318; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39319; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
39320; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39321; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39322; BITALG-NEXT:    retq
39323  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
39324  %3 = icmp ugt <2 x i64> %2, <i64 61, i64 61>
39325  %4 = sext <2 x i1> %3 to <2 x i64>
39326  ret <2 x i64> %4
39327}
39328
39329define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
39330; SSE2-LABEL: ult_62_v2i64:
39331; SSE2:       # %bb.0:
39332; SSE2-NEXT:    movdqa %xmm0, %xmm1
39333; SSE2-NEXT:    psrlw $1, %xmm1
39334; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39335; SSE2-NEXT:    psubb %xmm1, %xmm0
39336; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39337; SSE2-NEXT:    movdqa %xmm0, %xmm2
39338; SSE2-NEXT:    pand %xmm1, %xmm2
39339; SSE2-NEXT:    psrlw $2, %xmm0
39340; SSE2-NEXT:    pand %xmm1, %xmm0
39341; SSE2-NEXT:    paddb %xmm2, %xmm0
39342; SSE2-NEXT:    movdqa %xmm0, %xmm1
39343; SSE2-NEXT:    psrlw $4, %xmm1
39344; SSE2-NEXT:    paddb %xmm0, %xmm1
39345; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39346; SSE2-NEXT:    pxor %xmm0, %xmm0
39347; SSE2-NEXT:    psadbw %xmm1, %xmm0
39348; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
39349; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39350; SSE2-NEXT:    movdqa %xmm1, %xmm2
39351; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
39352; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39353; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
39354; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39355; SSE2-NEXT:    pand %xmm3, %xmm1
39356; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39357; SSE2-NEXT:    por %xmm1, %xmm0
39358; SSE2-NEXT:    retq
39359;
39360; SSE3-LABEL: ult_62_v2i64:
39361; SSE3:       # %bb.0:
39362; SSE3-NEXT:    movdqa %xmm0, %xmm1
39363; SSE3-NEXT:    psrlw $1, %xmm1
39364; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39365; SSE3-NEXT:    psubb %xmm1, %xmm0
39366; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39367; SSE3-NEXT:    movdqa %xmm0, %xmm2
39368; SSE3-NEXT:    pand %xmm1, %xmm2
39369; SSE3-NEXT:    psrlw $2, %xmm0
39370; SSE3-NEXT:    pand %xmm1, %xmm0
39371; SSE3-NEXT:    paddb %xmm2, %xmm0
39372; SSE3-NEXT:    movdqa %xmm0, %xmm1
39373; SSE3-NEXT:    psrlw $4, %xmm1
39374; SSE3-NEXT:    paddb %xmm0, %xmm1
39375; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39376; SSE3-NEXT:    pxor %xmm0, %xmm0
39377; SSE3-NEXT:    psadbw %xmm1, %xmm0
39378; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
39379; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39380; SSE3-NEXT:    movdqa %xmm1, %xmm2
39381; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
39382; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39383; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39384; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39385; SSE3-NEXT:    pand %xmm3, %xmm1
39386; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39387; SSE3-NEXT:    por %xmm1, %xmm0
39388; SSE3-NEXT:    retq
39389;
39390; SSSE3-LABEL: ult_62_v2i64:
39391; SSSE3:       # %bb.0:
39392; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39393; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39394; SSSE3-NEXT:    pand %xmm1, %xmm2
39395; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39396; SSSE3-NEXT:    movdqa %xmm3, %xmm4
39397; SSSE3-NEXT:    pshufb %xmm2, %xmm4
39398; SSSE3-NEXT:    psrlw $4, %xmm0
39399; SSSE3-NEXT:    pand %xmm1, %xmm0
39400; SSSE3-NEXT:    pshufb %xmm0, %xmm3
39401; SSSE3-NEXT:    paddb %xmm4, %xmm3
39402; SSSE3-NEXT:    pxor %xmm0, %xmm0
39403; SSSE3-NEXT:    psadbw %xmm3, %xmm0
39404; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
39405; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39406; SSSE3-NEXT:    movdqa %xmm1, %xmm2
39407; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
39408; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39409; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39410; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39411; SSSE3-NEXT:    pand %xmm3, %xmm1
39412; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39413; SSSE3-NEXT:    por %xmm1, %xmm0
39414; SSSE3-NEXT:    retq
39415;
39416; SSE41-LABEL: ult_62_v2i64:
39417; SSE41:       # %bb.0:
39418; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39419; SSE41-NEXT:    movdqa %xmm0, %xmm2
39420; SSE41-NEXT:    pand %xmm1, %xmm2
39421; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39422; SSE41-NEXT:    movdqa %xmm3, %xmm4
39423; SSE41-NEXT:    pshufb %xmm2, %xmm4
39424; SSE41-NEXT:    psrlw $4, %xmm0
39425; SSE41-NEXT:    pand %xmm1, %xmm0
39426; SSE41-NEXT:    pshufb %xmm0, %xmm3
39427; SSE41-NEXT:    paddb %xmm4, %xmm3
39428; SSE41-NEXT:    pxor %xmm0, %xmm0
39429; SSE41-NEXT:    psadbw %xmm3, %xmm0
39430; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
39431; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39432; SSE41-NEXT:    movdqa %xmm1, %xmm2
39433; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
39434; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39435; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
39436; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39437; SSE41-NEXT:    pand %xmm3, %xmm1
39438; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39439; SSE41-NEXT:    por %xmm1, %xmm0
39440; SSE41-NEXT:    retq
39441;
39442; AVX1-LABEL: ult_62_v2i64:
39443; AVX1:       # %bb.0:
39444; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39445; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
39446; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39447; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39448; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
39449; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
39450; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39451; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39452; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39453; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39454; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [62,62]
39455; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39456; AVX1-NEXT:    retq
39457;
39458; AVX2-LABEL: ult_62_v2i64:
39459; AVX2:       # %bb.0:
39460; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39461; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
39462; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39463; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39464; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
39465; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
39466; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39467; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39468; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39469; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39470; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [62,62]
39471; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39472; AVX2-NEXT:    retq
39473;
39474; AVX512VPOPCNTDQ-LABEL: ult_62_v2i64:
39475; AVX512VPOPCNTDQ:       # %bb.0:
39476; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39477; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
39478; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [62,62]
39479; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39480; AVX512VPOPCNTDQ-NEXT:    vzeroupper
39481; AVX512VPOPCNTDQ-NEXT:    retq
39482;
39483; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64:
39484; AVX512VPOPCNTDQVL:       # %bb.0:
39485; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
39486; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
39487; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39488; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39489; AVX512VPOPCNTDQVL-NEXT:    retq
39490;
39491; BITALG_NOVLX-LABEL: ult_62_v2i64:
39492; BITALG_NOVLX:       # %bb.0:
39493; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39494; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
39495; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39496; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39497; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [62,62]
39498; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39499; BITALG_NOVLX-NEXT:    vzeroupper
39500; BITALG_NOVLX-NEXT:    retq
39501;
39502; BITALG-LABEL: ult_62_v2i64:
39503; BITALG:       # %bb.0:
39504; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
39505; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39506; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39507; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
39508; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39509; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39510; BITALG-NEXT:    retq
39511  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
39512  %3 = icmp ult <2 x i64> %2, <i64 62, i64 62>
39513  %4 = sext <2 x i1> %3 to <2 x i64>
39514  ret <2 x i64> %4
39515}
39516
39517define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
39518; SSE2-LABEL: ugt_62_v2i64:
39519; SSE2:       # %bb.0:
39520; SSE2-NEXT:    movdqa %xmm0, %xmm1
39521; SSE2-NEXT:    psrlw $1, %xmm1
39522; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39523; SSE2-NEXT:    psubb %xmm1, %xmm0
39524; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39525; SSE2-NEXT:    movdqa %xmm0, %xmm2
39526; SSE2-NEXT:    pand %xmm1, %xmm2
39527; SSE2-NEXT:    psrlw $2, %xmm0
39528; SSE2-NEXT:    pand %xmm1, %xmm0
39529; SSE2-NEXT:    paddb %xmm2, %xmm0
39530; SSE2-NEXT:    movdqa %xmm0, %xmm1
39531; SSE2-NEXT:    psrlw $4, %xmm1
39532; SSE2-NEXT:    paddb %xmm0, %xmm1
39533; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39534; SSE2-NEXT:    pxor %xmm0, %xmm0
39535; SSE2-NEXT:    psadbw %xmm1, %xmm0
39536; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
39537; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39538; SSE2-NEXT:    movdqa %xmm0, %xmm2
39539; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
39540; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39541; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
39542; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39543; SSE2-NEXT:    pand %xmm3, %xmm1
39544; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39545; SSE2-NEXT:    por %xmm1, %xmm0
39546; SSE2-NEXT:    retq
39547;
39548; SSE3-LABEL: ugt_62_v2i64:
39549; SSE3:       # %bb.0:
39550; SSE3-NEXT:    movdqa %xmm0, %xmm1
39551; SSE3-NEXT:    psrlw $1, %xmm1
39552; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39553; SSE3-NEXT:    psubb %xmm1, %xmm0
39554; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39555; SSE3-NEXT:    movdqa %xmm0, %xmm2
39556; SSE3-NEXT:    pand %xmm1, %xmm2
39557; SSE3-NEXT:    psrlw $2, %xmm0
39558; SSE3-NEXT:    pand %xmm1, %xmm0
39559; SSE3-NEXT:    paddb %xmm2, %xmm0
39560; SSE3-NEXT:    movdqa %xmm0, %xmm1
39561; SSE3-NEXT:    psrlw $4, %xmm1
39562; SSE3-NEXT:    paddb %xmm0, %xmm1
39563; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39564; SSE3-NEXT:    pxor %xmm0, %xmm0
39565; SSE3-NEXT:    psadbw %xmm1, %xmm0
39566; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
39567; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39568; SSE3-NEXT:    movdqa %xmm0, %xmm2
39569; SSE3-NEXT:    pcmpgtd %xmm1, %xmm2
39570; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39571; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39572; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39573; SSE3-NEXT:    pand %xmm3, %xmm1
39574; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39575; SSE3-NEXT:    por %xmm1, %xmm0
39576; SSE3-NEXT:    retq
39577;
39578; SSSE3-LABEL: ugt_62_v2i64:
39579; SSSE3:       # %bb.0:
39580; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39581; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39582; SSSE3-NEXT:    pand %xmm1, %xmm2
39583; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39584; SSSE3-NEXT:    movdqa %xmm3, %xmm4
39585; SSSE3-NEXT:    pshufb %xmm2, %xmm4
39586; SSSE3-NEXT:    psrlw $4, %xmm0
39587; SSSE3-NEXT:    pand %xmm1, %xmm0
39588; SSSE3-NEXT:    pshufb %xmm0, %xmm3
39589; SSSE3-NEXT:    paddb %xmm4, %xmm3
39590; SSSE3-NEXT:    pxor %xmm0, %xmm0
39591; SSSE3-NEXT:    psadbw %xmm3, %xmm0
39592; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
39593; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39594; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39595; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
39596; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39597; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39598; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39599; SSSE3-NEXT:    pand %xmm3, %xmm1
39600; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39601; SSSE3-NEXT:    por %xmm1, %xmm0
39602; SSSE3-NEXT:    retq
39603;
39604; SSE41-LABEL: ugt_62_v2i64:
39605; SSE41:       # %bb.0:
39606; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39607; SSE41-NEXT:    movdqa %xmm0, %xmm2
39608; SSE41-NEXT:    pand %xmm1, %xmm2
39609; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39610; SSE41-NEXT:    movdqa %xmm3, %xmm4
39611; SSE41-NEXT:    pshufb %xmm2, %xmm4
39612; SSE41-NEXT:    psrlw $4, %xmm0
39613; SSE41-NEXT:    pand %xmm1, %xmm0
39614; SSE41-NEXT:    pshufb %xmm0, %xmm3
39615; SSE41-NEXT:    paddb %xmm4, %xmm3
39616; SSE41-NEXT:    pxor %xmm0, %xmm0
39617; SSE41-NEXT:    psadbw %xmm3, %xmm0
39618; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
39619; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483710,2147483710]
39620; SSE41-NEXT:    movdqa %xmm0, %xmm2
39621; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
39622; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39623; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
39624; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39625; SSE41-NEXT:    pand %xmm3, %xmm1
39626; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39627; SSE41-NEXT:    por %xmm1, %xmm0
39628; SSE41-NEXT:    retq
39629;
39630; AVX1-LABEL: ugt_62_v2i64:
39631; AVX1:       # %bb.0:
39632; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39633; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
39634; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39635; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39636; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
39637; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
39638; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39639; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39640; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39641; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39642; AVX1-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39643; AVX1-NEXT:    retq
39644;
39645; AVX2-LABEL: ugt_62_v2i64:
39646; AVX2:       # %bb.0:
39647; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39648; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
39649; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39650; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39651; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
39652; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
39653; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39654; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39655; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39656; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39657; AVX2-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39658; AVX2-NEXT:    retq
39659;
39660; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64:
39661; AVX512VPOPCNTDQ:       # %bb.0:
39662; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39663; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
39664; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39665; AVX512VPOPCNTDQ-NEXT:    vzeroupper
39666; AVX512VPOPCNTDQ-NEXT:    retq
39667;
39668; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64:
39669; AVX512VPOPCNTDQVL:       # %bb.0:
39670; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
39671; AVX512VPOPCNTDQVL-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
39672; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39673; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39674; AVX512VPOPCNTDQVL-NEXT:    retq
39675;
39676; BITALG_NOVLX-LABEL: ugt_62_v2i64:
39677; BITALG_NOVLX:       # %bb.0:
39678; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39679; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
39680; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39681; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39682; BITALG_NOVLX-NEXT:    vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0
39683; BITALG_NOVLX-NEXT:    vzeroupper
39684; BITALG_NOVLX-NEXT:    retq
39685;
39686; BITALG-LABEL: ugt_62_v2i64:
39687; BITALG:       # %bb.0:
39688; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
39689; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39690; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39691; BITALG-NEXT:    vpcmpnleuq {{.*}}(%rip), %xmm0, %k1
39692; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39693; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39694; BITALG-NEXT:    retq
39695  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
39696  %3 = icmp ugt <2 x i64> %2, <i64 62, i64 62>
39697  %4 = sext <2 x i1> %3 to <2 x i64>
39698  ret <2 x i64> %4
39699}
39700
39701define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
39702; SSE2-LABEL: ult_63_v2i64:
39703; SSE2:       # %bb.0:
39704; SSE2-NEXT:    movdqa %xmm0, %xmm1
39705; SSE2-NEXT:    psrlw $1, %xmm1
39706; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39707; SSE2-NEXT:    psubb %xmm1, %xmm0
39708; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39709; SSE2-NEXT:    movdqa %xmm0, %xmm2
39710; SSE2-NEXT:    pand %xmm1, %xmm2
39711; SSE2-NEXT:    psrlw $2, %xmm0
39712; SSE2-NEXT:    pand %xmm1, %xmm0
39713; SSE2-NEXT:    paddb %xmm2, %xmm0
39714; SSE2-NEXT:    movdqa %xmm0, %xmm1
39715; SSE2-NEXT:    psrlw $4, %xmm1
39716; SSE2-NEXT:    paddb %xmm0, %xmm1
39717; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
39718; SSE2-NEXT:    pxor %xmm0, %xmm0
39719; SSE2-NEXT:    psadbw %xmm1, %xmm0
39720; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
39721; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483711,2147483711]
39722; SSE2-NEXT:    movdqa %xmm1, %xmm2
39723; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
39724; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39725; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
39726; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39727; SSE2-NEXT:    pand %xmm3, %xmm1
39728; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39729; SSE2-NEXT:    por %xmm1, %xmm0
39730; SSE2-NEXT:    retq
39731;
39732; SSE3-LABEL: ult_63_v2i64:
39733; SSE3:       # %bb.0:
39734; SSE3-NEXT:    movdqa %xmm0, %xmm1
39735; SSE3-NEXT:    psrlw $1, %xmm1
39736; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39737; SSE3-NEXT:    psubb %xmm1, %xmm0
39738; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
39739; SSE3-NEXT:    movdqa %xmm0, %xmm2
39740; SSE3-NEXT:    pand %xmm1, %xmm2
39741; SSE3-NEXT:    psrlw $2, %xmm0
39742; SSE3-NEXT:    pand %xmm1, %xmm0
39743; SSE3-NEXT:    paddb %xmm2, %xmm0
39744; SSE3-NEXT:    movdqa %xmm0, %xmm1
39745; SSE3-NEXT:    psrlw $4, %xmm1
39746; SSE3-NEXT:    paddb %xmm0, %xmm1
39747; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
39748; SSE3-NEXT:    pxor %xmm0, %xmm0
39749; SSE3-NEXT:    psadbw %xmm1, %xmm0
39750; SSE3-NEXT:    por {{.*}}(%rip), %xmm0
39751; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483711,2147483711]
39752; SSE3-NEXT:    movdqa %xmm1, %xmm2
39753; SSE3-NEXT:    pcmpgtd %xmm0, %xmm2
39754; SSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39755; SSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39756; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39757; SSE3-NEXT:    pand %xmm3, %xmm1
39758; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39759; SSE3-NEXT:    por %xmm1, %xmm0
39760; SSE3-NEXT:    retq
39761;
39762; SSSE3-LABEL: ult_63_v2i64:
39763; SSSE3:       # %bb.0:
39764; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39765; SSSE3-NEXT:    movdqa %xmm0, %xmm2
39766; SSSE3-NEXT:    pand %xmm1, %xmm2
39767; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39768; SSSE3-NEXT:    movdqa %xmm3, %xmm4
39769; SSSE3-NEXT:    pshufb %xmm2, %xmm4
39770; SSSE3-NEXT:    psrlw $4, %xmm0
39771; SSSE3-NEXT:    pand %xmm1, %xmm0
39772; SSSE3-NEXT:    pshufb %xmm0, %xmm3
39773; SSSE3-NEXT:    paddb %xmm4, %xmm3
39774; SSSE3-NEXT:    pxor %xmm0, %xmm0
39775; SSSE3-NEXT:    psadbw %xmm3, %xmm0
39776; SSSE3-NEXT:    por {{.*}}(%rip), %xmm0
39777; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483711,2147483711]
39778; SSSE3-NEXT:    movdqa %xmm1, %xmm2
39779; SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
39780; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39781; SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
39782; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39783; SSSE3-NEXT:    pand %xmm3, %xmm1
39784; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39785; SSSE3-NEXT:    por %xmm1, %xmm0
39786; SSSE3-NEXT:    retq
39787;
39788; SSE41-LABEL: ult_63_v2i64:
39789; SSE41:       # %bb.0:
39790; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39791; SSE41-NEXT:    movdqa %xmm0, %xmm2
39792; SSE41-NEXT:    pand %xmm1, %xmm2
39793; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39794; SSE41-NEXT:    movdqa %xmm3, %xmm4
39795; SSE41-NEXT:    pshufb %xmm2, %xmm4
39796; SSE41-NEXT:    psrlw $4, %xmm0
39797; SSE41-NEXT:    pand %xmm1, %xmm0
39798; SSE41-NEXT:    pshufb %xmm0, %xmm3
39799; SSE41-NEXT:    paddb %xmm4, %xmm3
39800; SSE41-NEXT:    pxor %xmm0, %xmm0
39801; SSE41-NEXT:    psadbw %xmm3, %xmm0
39802; SSE41-NEXT:    por {{.*}}(%rip), %xmm0
39803; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2147483711,2147483711]
39804; SSE41-NEXT:    movdqa %xmm1, %xmm2
39805; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
39806; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39807; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
39808; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
39809; SSE41-NEXT:    pand %xmm3, %xmm1
39810; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
39811; SSE41-NEXT:    por %xmm1, %xmm0
39812; SSE41-NEXT:    retq
39813;
39814; AVX1-LABEL: ult_63_v2i64:
39815; AVX1:       # %bb.0:
39816; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39817; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
39818; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39819; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39820; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
39821; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
39822; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39823; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39824; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39825; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39826; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [63,63]
39827; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39828; AVX1-NEXT:    retq
39829;
39830; AVX2-LABEL: ult_63_v2i64:
39831; AVX2:       # %bb.0:
39832; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39833; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
39834; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39835; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
39836; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
39837; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
39838; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
39839; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
39840; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39841; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39842; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [63,63]
39843; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39844; AVX2-NEXT:    retq
39845;
39846; AVX512VPOPCNTDQ-LABEL: ult_63_v2i64:
39847; AVX512VPOPCNTDQ:       # %bb.0:
39848; AVX512VPOPCNTDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39849; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
39850; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [63,63]
39851; AVX512VPOPCNTDQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39852; AVX512VPOPCNTDQ-NEXT:    vzeroupper
39853; AVX512VPOPCNTDQ-NEXT:    retq
39854;
39855; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64:
39856; AVX512VPOPCNTDQVL:       # %bb.0:
39857; AVX512VPOPCNTDQVL-NEXT:    vpopcntq %xmm0, %xmm0
39858; AVX512VPOPCNTDQVL-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
39859; AVX512VPOPCNTDQVL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39860; AVX512VPOPCNTDQVL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39861; AVX512VPOPCNTDQVL-NEXT:    retq
39862;
39863; BITALG_NOVLX-LABEL: ult_63_v2i64:
39864; BITALG_NOVLX:       # %bb.0:
39865; BITALG_NOVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
39866; BITALG_NOVLX-NEXT:    vpopcntb %zmm0, %zmm0
39867; BITALG_NOVLX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39868; BITALG_NOVLX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39869; BITALG_NOVLX-NEXT:    vmovdqa {{.*#+}} xmm1 = [63,63]
39870; BITALG_NOVLX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
39871; BITALG_NOVLX-NEXT:    vzeroupper
39872; BITALG_NOVLX-NEXT:    retq
39873;
39874; BITALG-LABEL: ult_63_v2i64:
39875; BITALG:       # %bb.0:
39876; BITALG-NEXT:    vpopcntb %xmm0, %xmm0
39877; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
39878; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
39879; BITALG-NEXT:    vpcmpltuq {{.*}}(%rip), %xmm0, %k1
39880; BITALG-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
39881; BITALG-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39882; BITALG-NEXT:    retq
39883  %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
39884  %3 = icmp ult <2 x i64> %2, <i64 63, i64 63>
39885  %4 = sext <2 x i1> %3 to <2 x i64>
39886  ret <2 x i64> %4
39887}
39888
39889declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
39890declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
39891declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
39892declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
39893