• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE:       # %bb.0:
11; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
12; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
13; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
14; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
15; SSE-NEXT:    pcmpgtq %xmm7, %xmm3
16; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
17; SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
18; SSE-NEXT:    pcmpgtq %xmm6, %xmm2
19; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
20; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
21; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
22; SSE-NEXT:    pcmpgtq %xmm5, %xmm1
23; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
24; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
25; SSE-NEXT:    pcmpgtq %xmm4, %xmm0
26; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
27; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
28; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
29; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
30; SSE-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm11
31; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm11[0,2,2,3]
32; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
33; SSE-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm10
34; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm10[0,2,2,3]
35; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
36; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
37; SSE-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm9
38; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm9[0,2,2,3]
39; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
40; SSE-NEXT:    pcmpgtq {{[0-9]+}}(%rsp), %xmm8
41; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm8[0,2,2,3]
42; SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
43; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
44; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
45; SSE-NEXT:    pand %xmm0, %xmm3
46; SSE-NEXT:    packsswb %xmm0, %xmm3
47; SSE-NEXT:    pmovmskb %xmm3, %eax
48; SSE-NEXT:    # kill: def $al killed $al killed $eax
49; SSE-NEXT:    retq
50;
51; AVX1-LABEL: v8i64:
52; AVX1:       # %bb.0:
53; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm8
54; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm9
55; AVX1-NEXT:    vpcmpgtq %xmm8, %xmm9, %xmm8
56; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
57; AVX1-NEXT:    vpackssdw %xmm8, %xmm1, %xmm8
58; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
59; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
60; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
61; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
62; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
63; AVX1-NEXT:    vpackssdw %xmm8, %xmm0, %xmm0
64; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm1
65; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm2
66; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
67; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm2
68; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
69; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm2
70; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm3
71; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
72; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm4, %xmm3
73; AVX1-NEXT:    vpackssdw %xmm2, %xmm3, %xmm2
74; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
75; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
76; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
77; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
78; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
79; AVX1-NEXT:    vpmovmskb %xmm0, %eax
80; AVX1-NEXT:    # kill: def $al killed $al killed $eax
81; AVX1-NEXT:    vzeroupper
82; AVX1-NEXT:    retq
83;
84; AVX2-LABEL: v8i64:
85; AVX2:       # %bb.0:
86; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm1
87; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
88; AVX2-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
89; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
90; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
91; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
92; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
93; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm5, %ymm1
94; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
95; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
96; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm4, %ymm2
97; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
98; AVX2-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
99; AVX2-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
100; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
101; AVX2-NEXT:    vpsllw $15, %xmm0, %xmm0
102; AVX2-NEXT:    vpsraw $15, %xmm0, %xmm0
103; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
104; AVX2-NEXT:    vpmovmskb %xmm0, %eax
105; AVX2-NEXT:    # kill: def $al killed $al killed $eax
106; AVX2-NEXT:    vzeroupper
107; AVX2-NEXT:    retq
108;
109; AVX512F-LABEL: v8i64:
110; AVX512F:       # %bb.0:
111; AVX512F-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1
112; AVX512F-NEXT:    vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
113; AVX512F-NEXT:    kmovw %k0, %eax
114; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
115; AVX512F-NEXT:    vzeroupper
116; AVX512F-NEXT:    retq
117;
118; AVX512BW-LABEL: v8i64:
119; AVX512BW:       # %bb.0:
120; AVX512BW-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1
121; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
122; AVX512BW-NEXT:    kmovd %k0, %eax
123; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
124; AVX512BW-NEXT:    vzeroupper
125; AVX512BW-NEXT:    retq
126  %x0 = icmp sgt <8 x i64> %a, %b
127  %x1 = icmp sgt <8 x i64> %c, %d
128  %y = and <8 x i1> %x0, %x1
129  %res = bitcast <8 x i1> %y to i8
130  ret i8 %res
131}
132
133define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
134; SSE-LABEL: v8f64:
135; SSE:       # %bb.0:
136; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm8
137; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm9
138; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm10
139; SSE-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm11
140; SSE-NEXT:    cmpltpd %xmm3, %xmm7
141; SSE-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2,2,3]
142; SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm7[0,1,0,2,4,5,6,7]
143; SSE-NEXT:    cmpltpd %xmm2, %xmm6
144; SSE-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,2,2,3]
145; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm6[0,1,0,2,4,5,6,7]
146; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
147; SSE-NEXT:    cmpltpd %xmm1, %xmm5
148; SSE-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
149; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
150; SSE-NEXT:    cmpltpd %xmm0, %xmm4
151; SSE-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
152; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
153; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
154; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
155; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm11
156; SSE-NEXT:    shufps {{.*#+}} xmm11 = xmm11[0,2,2,3]
157; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm11[0,1,0,2,4,5,6,7]
158; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm10
159; SSE-NEXT:    shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
160; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm10[0,1,0,2,4,5,6,7]
161; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
162; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm9
163; SSE-NEXT:    shufps {{.*#+}} xmm9 = xmm9[0,2,2,3]
164; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm9[0,2,2,3,4,5,6,7]
165; SSE-NEXT:    cmpltpd {{[0-9]+}}(%rsp), %xmm8
166; SSE-NEXT:    shufps {{.*#+}} xmm8 = xmm8[0,2,2,3]
167; SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm8[0,2,2,3,4,5,6,7]
168; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
169; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
170; SSE-NEXT:    pand %xmm0, %xmm3
171; SSE-NEXT:    packsswb %xmm0, %xmm3
172; SSE-NEXT:    pmovmskb %xmm3, %eax
173; SSE-NEXT:    # kill: def $al killed $al killed $eax
174; SSE-NEXT:    retq
175;
176; AVX12-LABEL: v8f64:
177; AVX12:       # %bb.0:
178; AVX12-NEXT:    vcmpltpd %ymm1, %ymm3, %ymm1
179; AVX12-NEXT:    vextractf128 $1, %ymm1, %xmm3
180; AVX12-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
181; AVX12-NEXT:    vcmpltpd %ymm0, %ymm2, %ymm0
182; AVX12-NEXT:    vextractf128 $1, %ymm0, %xmm2
183; AVX12-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
184; AVX12-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
185; AVX12-NEXT:    vcmpltpd %ymm5, %ymm7, %ymm1
186; AVX12-NEXT:    vextractf128 $1, %ymm1, %xmm2
187; AVX12-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
188; AVX12-NEXT:    vcmpltpd %ymm4, %ymm6, %ymm2
189; AVX12-NEXT:    vextractf128 $1, %ymm2, %xmm3
190; AVX12-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
191; AVX12-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
192; AVX12-NEXT:    vpand %xmm1, %xmm0, %xmm0
193; AVX12-NEXT:    vpsllw $15, %xmm0, %xmm0
194; AVX12-NEXT:    vpsraw $15, %xmm0, %xmm0
195; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
196; AVX12-NEXT:    vpmovmskb %xmm0, %eax
197; AVX12-NEXT:    # kill: def $al killed $al killed $eax
198; AVX12-NEXT:    vzeroupper
199; AVX12-NEXT:    retq
200;
201; AVX512F-LABEL: v8f64:
202; AVX512F:       # %bb.0:
203; AVX512F-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
204; AVX512F-NEXT:    vcmpltpd %zmm2, %zmm3, %k0 {%k1}
205; AVX512F-NEXT:    kmovw %k0, %eax
206; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
207; AVX512F-NEXT:    vzeroupper
208; AVX512F-NEXT:    retq
209;
210; AVX512BW-LABEL: v8f64:
211; AVX512BW:       # %bb.0:
212; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
213; AVX512BW-NEXT:    vcmpltpd %zmm2, %zmm3, %k0 {%k1}
214; AVX512BW-NEXT:    kmovd %k0, %eax
215; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
216; AVX512BW-NEXT:    vzeroupper
217; AVX512BW-NEXT:    retq
218  %x0 = fcmp ogt <8 x double> %a, %b
219  %x1 = fcmp ogt <8 x double> %c, %d
220  %y = and <8 x i1> %x0, %x1
221  %res = bitcast <8 x i1> %y to i8
222  ret i8 %res
223}
224
225define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
226; SSE-LABEL: v32i16:
227; SSE:       # %bb.0:
228; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
229; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
230; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
231; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
232; SSE-NEXT:    pcmpgtw %xmm5, %xmm1
233; SSE-NEXT:    pcmpgtw %xmm4, %xmm0
234; SSE-NEXT:    packsswb %xmm1, %xmm0
235; SSE-NEXT:    pcmpgtw %xmm7, %xmm3
236; SSE-NEXT:    pcmpgtw %xmm6, %xmm2
237; SSE-NEXT:    packsswb %xmm3, %xmm2
238; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm11
239; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm10
240; SSE-NEXT:    packsswb %xmm11, %xmm10
241; SSE-NEXT:    pand %xmm0, %xmm10
242; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm9
243; SSE-NEXT:    pcmpgtw {{[0-9]+}}(%rsp), %xmm8
244; SSE-NEXT:    packsswb %xmm9, %xmm8
245; SSE-NEXT:    pand %xmm2, %xmm8
246; SSE-NEXT:    pmovmskb %xmm10, %ecx
247; SSE-NEXT:    pmovmskb %xmm8, %eax
248; SSE-NEXT:    shll $16, %eax
249; SSE-NEXT:    orl %ecx, %eax
250; SSE-NEXT:    retq
251;
252; AVX1-LABEL: v32i16:
253; AVX1:       # %bb.0:
254; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm8
255; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm9
256; AVX1-NEXT:    vpcmpgtw %xmm8, %xmm9, %xmm8
257; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm1, %xmm1
258; AVX1-NEXT:    vpacksswb %xmm8, %xmm1, %xmm8
259; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
260; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
261; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm1, %xmm1
262; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm0, %xmm0
263; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
264; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm1
265; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm2
266; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm2, %xmm1
267; AVX1-NEXT:    vpcmpgtw %xmm7, %xmm5, %xmm2
268; AVX1-NEXT:    vpacksswb %xmm1, %xmm2, %xmm1
269; AVX1-NEXT:    vpand %xmm1, %xmm8, %xmm1
270; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm2
271; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm3
272; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
273; AVX1-NEXT:    vpcmpgtw %xmm6, %xmm4, %xmm3
274; AVX1-NEXT:    vpacksswb %xmm2, %xmm3, %xmm2
275; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
276; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
277; AVX1-NEXT:    vpmovmskb %xmm1, %eax
278; AVX1-NEXT:    shll $16, %eax
279; AVX1-NEXT:    orl %ecx, %eax
280; AVX1-NEXT:    vzeroupper
281; AVX1-NEXT:    retq
282;
283; AVX2-LABEL: v32i16:
284; AVX2:       # %bb.0:
285; AVX2-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm1
286; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
287; AVX2-NEXT:    vpacksswb %xmm3, %xmm1, %xmm1
288; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
289; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
290; AVX2-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
291; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
292; AVX2-NEXT:    vpcmpgtw %ymm7, %ymm5, %ymm1
293; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
294; AVX2-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
295; AVX2-NEXT:    vpcmpgtw %ymm6, %ymm4, %ymm2
296; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
297; AVX2-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
298; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
299; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
300; AVX2-NEXT:    vpmovmskb %ymm0, %eax
301; AVX2-NEXT:    vzeroupper
302; AVX2-NEXT:    retq
303;
304; AVX512F-LABEL: v32i16:
305; AVX512F:       # %bb.0:
306; AVX512F-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm1
307; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
308; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
309; AVX512F-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
310; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
311; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k2
312; AVX512F-NEXT:    vpcmpgtw %ymm7, %ymm5, %ymm0
313; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
314; AVX512F-NEXT:    vpcmpgtw %ymm6, %ymm4, %ymm1
315; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
316; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k2}
317; AVX512F-NEXT:    kmovw %k0, %ecx
318; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
319; AVX512F-NEXT:    kmovw %k0, %eax
320; AVX512F-NEXT:    shll $16, %eax
321; AVX512F-NEXT:    orl %ecx, %eax
322; AVX512F-NEXT:    vzeroupper
323; AVX512F-NEXT:    retq
324;
325; AVX512BW-LABEL: v32i16:
326; AVX512BW:       # %bb.0:
327; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k1
328; AVX512BW-NEXT:    vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
329; AVX512BW-NEXT:    kmovd %k0, %eax
330; AVX512BW-NEXT:    vzeroupper
331; AVX512BW-NEXT:    retq
332  %x0 = icmp sgt <32 x i16> %a, %b
333  %x1 = icmp sgt <32 x i16> %c, %d
334  %y = and <32 x i1> %x0, %x1
335  %res = bitcast <32 x i1> %y to i32
336  ret i32 %res
337}
338
339define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
340; SSE-LABEL: v16i32:
341; SSE:       # %bb.0:
342; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
343; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
344; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
345; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
346; SSE-NEXT:    pcmpgtd %xmm7, %xmm3
347; SSE-NEXT:    movdqa {{.*#+}} xmm7 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
348; SSE-NEXT:    pshufb %xmm7, %xmm3
349; SSE-NEXT:    pcmpgtd %xmm6, %xmm2
350; SSE-NEXT:    pshufb %xmm7, %xmm2
351; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
352; SSE-NEXT:    pcmpgtd %xmm5, %xmm1
353; SSE-NEXT:    movdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
354; SSE-NEXT:    pshufb %xmm3, %xmm1
355; SSE-NEXT:    pcmpgtd %xmm4, %xmm0
356; SSE-NEXT:    pshufb %xmm3, %xmm0
357; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
358; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
359; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm11
360; SSE-NEXT:    pshufb %xmm7, %xmm11
361; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm9
362; SSE-NEXT:    pshufb %xmm7, %xmm9
363; SSE-NEXT:    punpckldq {{.*#+}} xmm9 = xmm9[0],xmm11[0],xmm9[1],xmm11[1]
364; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm10
365; SSE-NEXT:    pshufb %xmm3, %xmm10
366; SSE-NEXT:    pcmpgtd {{[0-9]+}}(%rsp), %xmm8
367; SSE-NEXT:    pshufb %xmm3, %xmm8
368; SSE-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm10[0],xmm8[1],xmm10[1]
369; SSE-NEXT:    pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm9[4,5,6,7]
370; SSE-NEXT:    pand %xmm0, %xmm8
371; SSE-NEXT:    pmovmskb %xmm8, %eax
372; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
373; SSE-NEXT:    retq
374;
375; AVX1-LABEL: v16i32:
376; AVX1:       # %bb.0:
377; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm8
378; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm9
379; AVX1-NEXT:    vpcmpgtd %xmm8, %xmm9, %xmm8
380; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm1, %xmm1
381; AVX1-NEXT:    vpackssdw %xmm8, %xmm1, %xmm8
382; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
383; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
384; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm1, %xmm1
385; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
386; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
387; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm0
388; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm1
389; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm2
390; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm2, %xmm1
391; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm5, %xmm2
392; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
393; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm2
394; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm3
395; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
396; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm4, %xmm3
397; AVX1-NEXT:    vpackssdw %xmm2, %xmm3, %xmm2
398; AVX1-NEXT:    vpacksswb %xmm1, %xmm2, %xmm1
399; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
400; AVX1-NEXT:    vpmovmskb %xmm0, %eax
401; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
402; AVX1-NEXT:    vzeroupper
403; AVX1-NEXT:    retq
404;
405; AVX2-LABEL: v16i32:
406; AVX2:       # %bb.0:
407; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm1, %ymm1
408; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
409; AVX2-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
410; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm0, %ymm0
411; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
412; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
413; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
414; AVX2-NEXT:    vpcmpgtd %ymm7, %ymm5, %ymm1
415; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
416; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
417; AVX2-NEXT:    vpcmpgtd %ymm6, %ymm4, %ymm2
418; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
419; AVX2-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
420; AVX2-NEXT:    vpacksswb %xmm1, %xmm2, %xmm1
421; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
422; AVX2-NEXT:    vpmovmskb %xmm0, %eax
423; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
424; AVX2-NEXT:    vzeroupper
425; AVX2-NEXT:    retq
426;
427; AVX512F-LABEL: v16i32:
428; AVX512F:       # %bb.0:
429; AVX512F-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
430; AVX512F-NEXT:    vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
431; AVX512F-NEXT:    kmovw %k0, %eax
432; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
433; AVX512F-NEXT:    vzeroupper
434; AVX512F-NEXT:    retq
435;
436; AVX512BW-LABEL: v16i32:
437; AVX512BW:       # %bb.0:
438; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
439; AVX512BW-NEXT:    vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
440; AVX512BW-NEXT:    kmovd %k0, %eax
441; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
442; AVX512BW-NEXT:    vzeroupper
443; AVX512BW-NEXT:    retq
444  %x0 = icmp sgt <16 x i32> %a, %b
445  %x1 = icmp sgt <16 x i32> %c, %d
446  %y = and <16 x i1> %x0, %x1
447  %res = bitcast <16 x i1> %y to i16
448  ret i16 %res
449}
450
451define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
452; SSE-LABEL: v16f32:
453; SSE:       # %bb.0:
454; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm8
455; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm10
456; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm9
457; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm11
458; SSE-NEXT:    cmpltps %xmm3, %xmm7
459; SSE-NEXT:    movdqa {{.*#+}} xmm3 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
460; SSE-NEXT:    pshufb %xmm3, %xmm7
461; SSE-NEXT:    cmpltps %xmm2, %xmm6
462; SSE-NEXT:    pshufb %xmm3, %xmm6
463; SSE-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1]
464; SSE-NEXT:    cmpltps %xmm1, %xmm5
465; SSE-NEXT:    movdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
466; SSE-NEXT:    pshufb %xmm1, %xmm5
467; SSE-NEXT:    cmpltps %xmm0, %xmm4
468; SSE-NEXT:    pshufb %xmm1, %xmm4
469; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
470; SSE-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7]
471; SSE-NEXT:    cmpltps {{[0-9]+}}(%rsp), %xmm11
472; SSE-NEXT:    pshufb %xmm3, %xmm11
473; SSE-NEXT:    cmpltps {{[0-9]+}}(%rsp), %xmm9
474; SSE-NEXT:    pshufb %xmm3, %xmm9
475; SSE-NEXT:    punpckldq {{.*#+}} xmm9 = xmm9[0],xmm11[0],xmm9[1],xmm11[1]
476; SSE-NEXT:    cmpltps {{[0-9]+}}(%rsp), %xmm10
477; SSE-NEXT:    pshufb %xmm1, %xmm10
478; SSE-NEXT:    cmpltps {{[0-9]+}}(%rsp), %xmm8
479; SSE-NEXT:    pshufb %xmm1, %xmm8
480; SSE-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm10[0],xmm8[1],xmm10[1]
481; SSE-NEXT:    pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm9[4,5,6,7]
482; SSE-NEXT:    pand %xmm4, %xmm8
483; SSE-NEXT:    pmovmskb %xmm8, %eax
484; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
485; SSE-NEXT:    retq
486;
487; AVX12-LABEL: v16f32:
488; AVX12:       # %bb.0:
489; AVX12-NEXT:    vcmpltps %ymm1, %ymm3, %ymm1
490; AVX12-NEXT:    vextractf128 $1, %ymm1, %xmm3
491; AVX12-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
492; AVX12-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
493; AVX12-NEXT:    vextractf128 $1, %ymm0, %xmm2
494; AVX12-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
495; AVX12-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
496; AVX12-NEXT:    vcmpltps %ymm5, %ymm7, %ymm1
497; AVX12-NEXT:    vextractf128 $1, %ymm1, %xmm2
498; AVX12-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
499; AVX12-NEXT:    vcmpltps %ymm4, %ymm6, %ymm2
500; AVX12-NEXT:    vextractf128 $1, %ymm2, %xmm3
501; AVX12-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
502; AVX12-NEXT:    vpacksswb %xmm1, %xmm2, %xmm1
503; AVX12-NEXT:    vpand %xmm1, %xmm0, %xmm0
504; AVX12-NEXT:    vpmovmskb %xmm0, %eax
505; AVX12-NEXT:    # kill: def $ax killed $ax killed $eax
506; AVX12-NEXT:    vzeroupper
507; AVX12-NEXT:    retq
508;
509; AVX512F-LABEL: v16f32:
510; AVX512F:       # %bb.0:
511; AVX512F-NEXT:    vcmpltps %zmm0, %zmm1, %k1
512; AVX512F-NEXT:    vcmpltps %zmm2, %zmm3, %k0 {%k1}
513; AVX512F-NEXT:    kmovw %k0, %eax
514; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
515; AVX512F-NEXT:    vzeroupper
516; AVX512F-NEXT:    retq
517;
518; AVX512BW-LABEL: v16f32:
519; AVX512BW:       # %bb.0:
520; AVX512BW-NEXT:    vcmpltps %zmm0, %zmm1, %k1
521; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm3, %k0 {%k1}
522; AVX512BW-NEXT:    kmovd %k0, %eax
523; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
524; AVX512BW-NEXT:    vzeroupper
525; AVX512BW-NEXT:    retq
526  %x0 = fcmp ogt <16 x float> %a, %b
527  %x1 = fcmp ogt <16 x float> %c, %d
528  %y = and <16 x i1> %x0, %x1
529  %res = bitcast <16 x i1> %y to i16
530  ret i16 %res
531}
532
533define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
534; SSE-LABEL: v64i8:
535; SSE:       # %bb.0:
536; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
537; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
538; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
539; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
540; SSE-NEXT:    pcmpgtb %xmm7, %xmm3
541; SSE-NEXT:    pcmpgtb %xmm6, %xmm2
542; SSE-NEXT:    pcmpgtb %xmm5, %xmm1
543; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
544; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm9
545; SSE-NEXT:    pand %xmm3, %xmm9
546; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm8
547; SSE-NEXT:    pand %xmm2, %xmm8
548; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm11
549; SSE-NEXT:    pand %xmm1, %xmm11
550; SSE-NEXT:    pcmpgtb {{[0-9]+}}(%rsp), %xmm10
551; SSE-NEXT:    pand %xmm0, %xmm10
552; SSE-NEXT:    pmovmskb %xmm10, %eax
553; SSE-NEXT:    pmovmskb %xmm11, %ecx
554; SSE-NEXT:    shll $16, %ecx
555; SSE-NEXT:    orl %eax, %ecx
556; SSE-NEXT:    pmovmskb %xmm8, %edx
557; SSE-NEXT:    pmovmskb %xmm9, %eax
558; SSE-NEXT:    shll $16, %eax
559; SSE-NEXT:    orl %edx, %eax
560; SSE-NEXT:    shlq $32, %rax
561; SSE-NEXT:    orq %rcx, %rax
562; SSE-NEXT:    retq
563;
564; AVX1-LABEL: v64i8:
565; AVX1:       # %bb.0:
566; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm8
567; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm9
568; AVX1-NEXT:    vpcmpgtb %xmm8, %xmm9, %xmm8
569; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm1, %xmm9
570; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
571; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
572; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm1, %xmm1
573; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
574; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm2
575; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm3
576; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
577; AVX1-NEXT:    vpand %xmm2, %xmm8, %xmm2
578; AVX1-NEXT:    vpcmpgtb %xmm7, %xmm5, %xmm3
579; AVX1-NEXT:    vpand %xmm3, %xmm9, %xmm3
580; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm5
581; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm7
582; AVX1-NEXT:    vpcmpgtb %xmm5, %xmm7, %xmm5
583; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
584; AVX1-NEXT:    vpcmpgtb %xmm6, %xmm4, %xmm4
585; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
586; AVX1-NEXT:    vpmovmskb %xmm0, %eax
587; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
588; AVX1-NEXT:    shll $16, %ecx
589; AVX1-NEXT:    orl %eax, %ecx
590; AVX1-NEXT:    vpmovmskb %xmm3, %edx
591; AVX1-NEXT:    vpmovmskb %xmm2, %eax
592; AVX1-NEXT:    shll $16, %eax
593; AVX1-NEXT:    orl %edx, %eax
594; AVX1-NEXT:    shlq $32, %rax
595; AVX1-NEXT:    orq %rcx, %rax
596; AVX1-NEXT:    vzeroupper
597; AVX1-NEXT:    retq
598;
599; AVX2-LABEL: v64i8:
600; AVX2:       # %bb.0:
601; AVX2-NEXT:    vpcmpgtb %ymm3, %ymm1, %ymm1
602; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
603; AVX2-NEXT:    vpcmpgtb %ymm7, %ymm5, %ymm2
604; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
605; AVX2-NEXT:    vpcmpgtb %ymm6, %ymm4, %ymm2
606; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
607; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
608; AVX2-NEXT:    vpmovmskb %ymm1, %eax
609; AVX2-NEXT:    shlq $32, %rax
610; AVX2-NEXT:    orq %rcx, %rax
611; AVX2-NEXT:    vzeroupper
612; AVX2-NEXT:    retq
613;
614; AVX512F-LABEL: v64i8:
615; AVX512F:       # %bb.0:
616; AVX512F-NEXT:    vpcmpgtb %ymm3, %ymm1, %ymm1
617; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
618; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm3
619; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k1
620; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
621; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k2
622; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
623; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
624; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
625; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k3
626; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
627; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k4
628; AVX512F-NEXT:    vpcmpgtb %ymm7, %ymm5, %ymm0
629; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
630; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
631; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
632; AVX512F-NEXT:    vpcmpgtb %ymm6, %ymm4, %ymm2
633; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
634; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm3
635; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm2
636; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k0 {%k4}
637; AVX512F-NEXT:    kmovw %k0, %eax
638; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k0 {%k3}
639; AVX512F-NEXT:    kmovw %k0, %ecx
640; AVX512F-NEXT:    shll $16, %ecx
641; AVX512F-NEXT:    orl %eax, %ecx
642; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
643; AVX512F-NEXT:    kmovw %k0, %edx
644; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k1}
645; AVX512F-NEXT:    kmovw %k0, %eax
646; AVX512F-NEXT:    shll $16, %eax
647; AVX512F-NEXT:    orl %edx, %eax
648; AVX512F-NEXT:    shlq $32, %rax
649; AVX512F-NEXT:    orq %rcx, %rax
650; AVX512F-NEXT:    vzeroupper
651; AVX512F-NEXT:    retq
652;
653; AVX512BW-LABEL: v64i8:
654; AVX512BW:       # %bb.0:
655; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k1
656; AVX512BW-NEXT:    vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
657; AVX512BW-NEXT:    kmovq %k0, %rax
658; AVX512BW-NEXT:    vzeroupper
659; AVX512BW-NEXT:    retq
660  %x0 = icmp sgt <64 x i8> %a, %b
661  %x1 = icmp sgt <64 x i8> %c, %d
662  %y = and <64 x i1> %x0, %x1
663  %res = bitcast <64 x i1> %y to i64
664  ret i64 %res
665}
666