• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
4
5define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
7; VLX:       # %bb.0: # %entry
8; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
9; VLX-NEXT:    kmovd %k0, %eax
10; VLX-NEXT:    retq
11;
12; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
13; NoVLX:       # %bb.0: # %entry
14; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
15; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
16; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
17; NoVLX-NEXT:    kmovw %k0, %eax
18; NoVLX-NEXT:    vzeroupper
19; NoVLX-NEXT:    retq
20entry:
21  %0 = bitcast <2 x i64> %__a to <16 x i8>
22  %1 = bitcast <2 x i64> %__b to <16 x i8>
23  %2 = icmp eq <16 x i8> %0, %1
24  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
25  %4 = bitcast <32 x i1> %3 to i32
26  ret i32 %4
27}
28
29define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
30; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
31; VLX:       # %bb.0: # %entry
32; VLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %k0
33; VLX-NEXT:    kmovd %k0, %eax
34; VLX-NEXT:    retq
35;
36; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
37; NoVLX:       # %bb.0: # %entry
38; NoVLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0
39; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
40; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
41; NoVLX-NEXT:    kmovw %k0, %eax
42; NoVLX-NEXT:    vzeroupper
43; NoVLX-NEXT:    retq
44entry:
45  %0 = bitcast <2 x i64> %__a to <16 x i8>
46  %load = load <2 x i64>, <2 x i64>* %__b
47  %1 = bitcast <2 x i64> %load to <16 x i8>
48  %2 = icmp eq <16 x i8> %0, %1
49  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
50  %4 = bitcast <32 x i1> %3 to i32
51  ret i32 %4
52}
53
54define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
55; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
56; VLX:       # %bb.0: # %entry
57; VLX-NEXT:    kmovd %edi, %k1
58; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
59; VLX-NEXT:    kmovd %k0, %eax
60; VLX-NEXT:    retq
61;
62; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
63; NoVLX:       # %bb.0: # %entry
64; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
65; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
66; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
67; NoVLX-NEXT:    kmovw %k0, %eax
68; NoVLX-NEXT:    andl %edi, %eax
69; NoVLX-NEXT:    vzeroupper
70; NoVLX-NEXT:    retq
71entry:
72  %0 = bitcast <2 x i64> %__a to <16 x i8>
73  %1 = bitcast <2 x i64> %__b to <16 x i8>
74  %2 = icmp eq <16 x i8> %0, %1
75  %3 = bitcast i16 %__u to <16 x i1>
76  %4 = and <16 x i1> %2, %3
77  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
78  %6 = bitcast <32 x i1> %5 to i32
79  ret i32 %6
80}
81
82define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
83; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
84; VLX:       # %bb.0: # %entry
85; VLX-NEXT:    kmovd %edi, %k1
86; VLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
87; VLX-NEXT:    kmovd %k0, %eax
88; VLX-NEXT:    retq
89;
90; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
91; NoVLX:       # %bb.0: # %entry
92; NoVLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %xmm0
93; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
94; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
95; NoVLX-NEXT:    kmovw %k0, %eax
96; NoVLX-NEXT:    andl %edi, %eax
97; NoVLX-NEXT:    vzeroupper
98; NoVLX-NEXT:    retq
99entry:
100  %0 = bitcast <2 x i64> %__a to <16 x i8>
101  %load = load <2 x i64>, <2 x i64>* %__b
102  %1 = bitcast <2 x i64> %load to <16 x i8>
103  %2 = icmp eq <16 x i8> %0, %1
104  %3 = bitcast i16 %__u to <16 x i1>
105  %4 = and <16 x i1> %2, %3
106  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
107  %6 = bitcast <32 x i1> %5 to i32
108  ret i32 %6
109}
110
111
112define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
113; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
114; VLX:       # %bb.0: # %entry
115; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
116; VLX-NEXT:    kmovq %k0, %rax
117; VLX-NEXT:    retq
118;
119; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
120; NoVLX:       # %bb.0: # %entry
121; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
122; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
123; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
124; NoVLX-NEXT:    kmovw %k0, %eax
125; NoVLX-NEXT:    movzwl %ax, %eax
126; NoVLX-NEXT:    vzeroupper
127; NoVLX-NEXT:    retq
128entry:
129  %0 = bitcast <2 x i64> %__a to <16 x i8>
130  %1 = bitcast <2 x i64> %__b to <16 x i8>
131  %2 = icmp eq <16 x i8> %0, %1
132  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
133  %4 = bitcast <64 x i1> %3 to i64
134  ret i64 %4
135}
136
137define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
138; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
139; VLX:       # %bb.0: # %entry
140; VLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %k0
141; VLX-NEXT:    kmovq %k0, %rax
142; VLX-NEXT:    retq
143;
144; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
145; NoVLX:       # %bb.0: # %entry
146; NoVLX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0
147; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
148; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
149; NoVLX-NEXT:    kmovw %k0, %eax
150; NoVLX-NEXT:    movzwl %ax, %eax
151; NoVLX-NEXT:    vzeroupper
152; NoVLX-NEXT:    retq
153entry:
154  %0 = bitcast <2 x i64> %__a to <16 x i8>
155  %load = load <2 x i64>, <2 x i64>* %__b
156  %1 = bitcast <2 x i64> %load to <16 x i8>
157  %2 = icmp eq <16 x i8> %0, %1
158  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
159  %4 = bitcast <64 x i1> %3 to i64
160  ret i64 %4
161}
162
163define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
164; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
165; VLX:       # %bb.0: # %entry
166; VLX-NEXT:    kmovd %edi, %k1
167; VLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
168; VLX-NEXT:    kmovq %k0, %rax
169; VLX-NEXT:    retq
170;
171; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
172; NoVLX:       # %bb.0: # %entry
173; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
174; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
175; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
176; NoVLX-NEXT:    kmovw %k0, %eax
177; NoVLX-NEXT:    andl %edi, %eax
178; NoVLX-NEXT:    vzeroupper
179; NoVLX-NEXT:    retq
180entry:
181  %0 = bitcast <2 x i64> %__a to <16 x i8>
182  %1 = bitcast <2 x i64> %__b to <16 x i8>
183  %2 = icmp eq <16 x i8> %0, %1
184  %3 = bitcast i16 %__u to <16 x i1>
185  %4 = and <16 x i1> %2, %3
186  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
187  %6 = bitcast <64 x i1> %5 to i64
188  ret i64 %6
189}
190
191define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
192; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
193; VLX:       # %bb.0: # %entry
194; VLX-NEXT:    kmovd %edi, %k1
195; VLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
196; VLX-NEXT:    kmovq %k0, %rax
197; VLX-NEXT:    retq
198;
199; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
200; NoVLX:       # %bb.0: # %entry
201; NoVLX-NEXT:    vpcmpeqb (%rsi), %xmm0, %xmm0
202; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
203; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
204; NoVLX-NEXT:    kmovw %k0, %eax
205; NoVLX-NEXT:    andl %edi, %eax
206; NoVLX-NEXT:    vzeroupper
207; NoVLX-NEXT:    retq
208entry:
209  %0 = bitcast <2 x i64> %__a to <16 x i8>
210  %load = load <2 x i64>, <2 x i64>* %__b
211  %1 = bitcast <2 x i64> %load to <16 x i8>
212  %2 = icmp eq <16 x i8> %0, %1
213  %3 = bitcast i16 %__u to <16 x i1>
214  %4 = and <16 x i1> %2, %3
215  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
216  %6 = bitcast <64 x i1> %5 to i64
217  ret i64 %6
218}
219
220
221define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
222; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
223; VLX:       # %bb.0: # %entry
224; VLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
225; VLX-NEXT:    kmovq %k0, %rax
226; VLX-NEXT:    vzeroupper
227; VLX-NEXT:    retq
228;
229; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
230; NoVLX:       # %bb.0: # %entry
231; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
232; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
233; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
234; NoVLX-NEXT:    kmovw %k0, %ecx
235; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
236; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
237; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
238; NoVLX-NEXT:    kmovw %k0, %eax
239; NoVLX-NEXT:    shll $16, %eax
240; NoVLX-NEXT:    orl %ecx, %eax
241; NoVLX-NEXT:    vzeroupper
242; NoVLX-NEXT:    retq
243entry:
244  %0 = bitcast <4 x i64> %__a to <32 x i8>
245  %1 = bitcast <4 x i64> %__b to <32 x i8>
246  %2 = icmp eq <32 x i8> %0, %1
247  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
248  %4 = bitcast <64 x i1> %3 to i64
249  ret i64 %4
250}
251
252define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
253; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
254; VLX:       # %bb.0: # %entry
255; VLX-NEXT:    vpcmpeqb (%rdi), %ymm0, %k0
256; VLX-NEXT:    kmovq %k0, %rax
257; VLX-NEXT:    vzeroupper
258; VLX-NEXT:    retq
259;
260; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
261; NoVLX:       # %bb.0: # %entry
262; NoVLX-NEXT:    vpcmpeqb (%rdi), %ymm0, %ymm0
263; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
264; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
265; NoVLX-NEXT:    kmovw %k0, %ecx
266; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
267; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
268; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
269; NoVLX-NEXT:    kmovw %k0, %eax
270; NoVLX-NEXT:    shll $16, %eax
271; NoVLX-NEXT:    orl %ecx, %eax
272; NoVLX-NEXT:    vzeroupper
273; NoVLX-NEXT:    retq
274entry:
275  %0 = bitcast <4 x i64> %__a to <32 x i8>
276  %load = load <4 x i64>, <4 x i64>* %__b
277  %1 = bitcast <4 x i64> %load to <32 x i8>
278  %2 = icmp eq <32 x i8> %0, %1
279  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
280  %4 = bitcast <64 x i1> %3 to i64
281  ret i64 %4
282}
283
284define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
285; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
286; VLX:       # %bb.0: # %entry
287; VLX-NEXT:    kmovd %edi, %k1
288; VLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
289; VLX-NEXT:    kmovq %k0, %rax
290; VLX-NEXT:    vzeroupper
291; VLX-NEXT:    retq
292;
293; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
294; NoVLX:       # %bb.0: # %entry
295; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
296; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
297; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
298; NoVLX-NEXT:    kmovw %k0, %eax
299; NoVLX-NEXT:    andl %edi, %eax
300; NoVLX-NEXT:    shrl $16, %edi
301; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
302; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
303; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
304; NoVLX-NEXT:    kmovw %k0, %ecx
305; NoVLX-NEXT:    andl %edi, %ecx
306; NoVLX-NEXT:    shll $16, %ecx
307; NoVLX-NEXT:    movzwl %ax, %eax
308; NoVLX-NEXT:    orl %ecx, %eax
309; NoVLX-NEXT:    vzeroupper
310; NoVLX-NEXT:    retq
311entry:
312  %0 = bitcast <4 x i64> %__a to <32 x i8>
313  %1 = bitcast <4 x i64> %__b to <32 x i8>
314  %2 = icmp eq <32 x i8> %0, %1
315  %3 = bitcast i32 %__u to <32 x i1>
316  %4 = and <32 x i1> %2, %3
317  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
318  %6 = bitcast <64 x i1> %5 to i64
319  ret i64 %6
320}
321
322define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
323; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
324; VLX:       # %bb.0: # %entry
325; VLX-NEXT:    kmovd %edi, %k1
326; VLX-NEXT:    vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
327; VLX-NEXT:    kmovq %k0, %rax
328; VLX-NEXT:    vzeroupper
329; VLX-NEXT:    retq
330;
331; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
332; NoVLX:       # %bb.0: # %entry
333; NoVLX-NEXT:    vpcmpeqb (%rsi), %ymm0, %ymm0
334; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
335; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
336; NoVLX-NEXT:    kmovw %k0, %eax
337; NoVLX-NEXT:    andl %edi, %eax
338; NoVLX-NEXT:    shrl $16, %edi
339; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
340; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
341; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
342; NoVLX-NEXT:    kmovw %k0, %ecx
343; NoVLX-NEXT:    andl %edi, %ecx
344; NoVLX-NEXT:    shll $16, %ecx
345; NoVLX-NEXT:    movzwl %ax, %eax
346; NoVLX-NEXT:    orl %ecx, %eax
347; NoVLX-NEXT:    vzeroupper
348; NoVLX-NEXT:    retq
349entry:
350  %0 = bitcast <4 x i64> %__a to <32 x i8>
351  %load = load <4 x i64>, <4 x i64>* %__b
352  %1 = bitcast <4 x i64> %load to <32 x i8>
353  %2 = icmp eq <32 x i8> %0, %1
354  %3 = bitcast i32 %__u to <32 x i1>
355  %4 = and <32 x i1> %2, %3
356  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
357  %6 = bitcast <64 x i1> %5 to i64
358  ret i64 %6
359}
360
361
362define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
363; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
364; VLX:       # %bb.0: # %entry
365; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
366; VLX-NEXT:    kmovd %k0, %eax
367; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
368; VLX-NEXT:    retq
369;
370; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
371; NoVLX:       # %bb.0: # %entry
372; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
373; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
374; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
375; NoVLX-NEXT:    kmovw %k0, %eax
376; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
377; NoVLX-NEXT:    vzeroupper
378; NoVLX-NEXT:    retq
379entry:
380  %0 = bitcast <2 x i64> %__a to <8 x i16>
381  %1 = bitcast <2 x i64> %__b to <8 x i16>
382  %2 = icmp eq <8 x i16> %0, %1
383  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
384  %4 = bitcast <16 x i1> %3 to i16
385  ret i16 %4
386}
387
388define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
389; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
390; VLX:       # %bb.0: # %entry
391; VLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k0
392; VLX-NEXT:    kmovd %k0, %eax
393; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
394; VLX-NEXT:    retq
395;
396; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
397; NoVLX:       # %bb.0: # %entry
398; NoVLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0
399; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
400; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
401; NoVLX-NEXT:    kmovw %k0, %eax
402; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
403; NoVLX-NEXT:    vzeroupper
404; NoVLX-NEXT:    retq
405entry:
406  %0 = bitcast <2 x i64> %__a to <8 x i16>
407  %load = load <2 x i64>, <2 x i64>* %__b
408  %1 = bitcast <2 x i64> %load to <8 x i16>
409  %2 = icmp eq <8 x i16> %0, %1
410  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
411  %4 = bitcast <16 x i1> %3 to i16
412  ret i16 %4
413}
414
415define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
416; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
417; VLX:       # %bb.0: # %entry
418; VLX-NEXT:    kmovd %edi, %k1
419; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
420; VLX-NEXT:    kmovd %k0, %eax
421; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
422; VLX-NEXT:    retq
423;
424; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
425; NoVLX:       # %bb.0: # %entry
426; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
427; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
428; NoVLX-NEXT:    kmovw %edi, %k1
429; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
430; NoVLX-NEXT:    kmovw %k0, %eax
431; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
432; NoVLX-NEXT:    vzeroupper
433; NoVLX-NEXT:    retq
434entry:
435  %0 = bitcast <2 x i64> %__a to <8 x i16>
436  %1 = bitcast <2 x i64> %__b to <8 x i16>
437  %2 = icmp eq <8 x i16> %0, %1
438  %3 = bitcast i8 %__u to <8 x i1>
439  %4 = and <8 x i1> %2, %3
440  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
441  %6 = bitcast <16 x i1> %5 to i16
442  ret i16 %6
443}
444
445define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
446; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
447; VLX:       # %bb.0: # %entry
448; VLX-NEXT:    kmovd %edi, %k1
449; VLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
450; VLX-NEXT:    kmovd %k0, %eax
451; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
452; VLX-NEXT:    retq
453;
454; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
455; NoVLX:       # %bb.0: # %entry
456; NoVLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %xmm0
457; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
458; NoVLX-NEXT:    kmovw %edi, %k1
459; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
460; NoVLX-NEXT:    kmovw %k0, %eax
461; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
462; NoVLX-NEXT:    vzeroupper
463; NoVLX-NEXT:    retq
464entry:
465  %0 = bitcast <2 x i64> %__a to <8 x i16>
466  %load = load <2 x i64>, <2 x i64>* %__b
467  %1 = bitcast <2 x i64> %load to <8 x i16>
468  %2 = icmp eq <8 x i16> %0, %1
469  %3 = bitcast i8 %__u to <8 x i1>
470  %4 = and <8 x i1> %2, %3
471  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
472  %6 = bitcast <16 x i1> %5 to i16
473  ret i16 %6
474}
475
476
477define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
478; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
479; VLX:       # %bb.0: # %entry
480; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
481; VLX-NEXT:    kmovd %k0, %eax
482; VLX-NEXT:    retq
483;
484; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
485; NoVLX:       # %bb.0: # %entry
486; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
487; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
488; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
489; NoVLX-NEXT:    kmovw %k0, %eax
490; NoVLX-NEXT:    vzeroupper
491; NoVLX-NEXT:    retq
492entry:
493  %0 = bitcast <2 x i64> %__a to <8 x i16>
494  %1 = bitcast <2 x i64> %__b to <8 x i16>
495  %2 = icmp eq <8 x i16> %0, %1
496  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
497  %4 = bitcast <32 x i1> %3 to i32
498  ret i32 %4
499}
500
501define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
502; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
503; VLX:       # %bb.0: # %entry
504; VLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k0
505; VLX-NEXT:    kmovd %k0, %eax
506; VLX-NEXT:    retq
507;
508; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
509; NoVLX:       # %bb.0: # %entry
510; NoVLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0
511; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
512; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
513; NoVLX-NEXT:    kmovw %k0, %eax
514; NoVLX-NEXT:    vzeroupper
515; NoVLX-NEXT:    retq
516entry:
517  %0 = bitcast <2 x i64> %__a to <8 x i16>
518  %load = load <2 x i64>, <2 x i64>* %__b
519  %1 = bitcast <2 x i64> %load to <8 x i16>
520  %2 = icmp eq <8 x i16> %0, %1
521  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
522  %4 = bitcast <32 x i1> %3 to i32
523  ret i32 %4
524}
525
526define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
527; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
528; VLX:       # %bb.0: # %entry
529; VLX-NEXT:    kmovd %edi, %k1
530; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
531; VLX-NEXT:    kmovd %k0, %eax
532; VLX-NEXT:    retq
533;
534; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
535; NoVLX:       # %bb.0: # %entry
536; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
537; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
538; NoVLX-NEXT:    kmovw %edi, %k1
539; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
540; NoVLX-NEXT:    kmovw %k0, %eax
541; NoVLX-NEXT:    vzeroupper
542; NoVLX-NEXT:    retq
543entry:
544  %0 = bitcast <2 x i64> %__a to <8 x i16>
545  %1 = bitcast <2 x i64> %__b to <8 x i16>
546  %2 = icmp eq <8 x i16> %0, %1
547  %3 = bitcast i8 %__u to <8 x i1>
548  %4 = and <8 x i1> %2, %3
549  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
550  %6 = bitcast <32 x i1> %5 to i32
551  ret i32 %6
552}
553
554define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
555; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
556; VLX:       # %bb.0: # %entry
557; VLX-NEXT:    kmovd %edi, %k1
558; VLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
559; VLX-NEXT:    kmovd %k0, %eax
560; VLX-NEXT:    retq
561;
562; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
563; NoVLX:       # %bb.0: # %entry
564; NoVLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %xmm0
565; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
566; NoVLX-NEXT:    kmovw %edi, %k1
567; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
568; NoVLX-NEXT:    kmovw %k0, %eax
569; NoVLX-NEXT:    vzeroupper
570; NoVLX-NEXT:    retq
571entry:
572  %0 = bitcast <2 x i64> %__a to <8 x i16>
573  %load = load <2 x i64>, <2 x i64>* %__b
574  %1 = bitcast <2 x i64> %load to <8 x i16>
575  %2 = icmp eq <8 x i16> %0, %1
576  %3 = bitcast i8 %__u to <8 x i1>
577  %4 = and <8 x i1> %2, %3
578  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
579  %6 = bitcast <32 x i1> %5 to i32
580  ret i32 %6
581}
582
583
584define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
585; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
586; VLX:       # %bb.0: # %entry
587; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
588; VLX-NEXT:    kmovq %k0, %rax
589; VLX-NEXT:    retq
590;
591; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
592; NoVLX:       # %bb.0: # %entry
593; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
594; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
595; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
596; NoVLX-NEXT:    kmovw %k0, %eax
597; NoVLX-NEXT:    movzwl %ax, %eax
598; NoVLX-NEXT:    vzeroupper
599; NoVLX-NEXT:    retq
600entry:
601  %0 = bitcast <2 x i64> %__a to <8 x i16>
602  %1 = bitcast <2 x i64> %__b to <8 x i16>
603  %2 = icmp eq <8 x i16> %0, %1
604  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
605  %4 = bitcast <64 x i1> %3 to i64
606  ret i64 %4
607}
608
609define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
610; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
611; VLX:       # %bb.0: # %entry
612; VLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %k0
613; VLX-NEXT:    kmovq %k0, %rax
614; VLX-NEXT:    retq
615;
616; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
617; NoVLX:       # %bb.0: # %entry
618; NoVLX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0
619; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
620; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
621; NoVLX-NEXT:    kmovw %k0, %eax
622; NoVLX-NEXT:    movzwl %ax, %eax
623; NoVLX-NEXT:    vzeroupper
624; NoVLX-NEXT:    retq
625entry:
626  %0 = bitcast <2 x i64> %__a to <8 x i16>
627  %load = load <2 x i64>, <2 x i64>* %__b
628  %1 = bitcast <2 x i64> %load to <8 x i16>
629  %2 = icmp eq <8 x i16> %0, %1
630  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
631  %4 = bitcast <64 x i1> %3 to i64
632  ret i64 %4
633}
634
635define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
636; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
637; VLX:       # %bb.0: # %entry
638; VLX-NEXT:    kmovd %edi, %k1
639; VLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
640; VLX-NEXT:    kmovq %k0, %rax
641; VLX-NEXT:    retq
642;
643; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
644; NoVLX:       # %bb.0: # %entry
645; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
646; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
647; NoVLX-NEXT:    kmovw %edi, %k1
648; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
649; NoVLX-NEXT:    kmovw %k0, %eax
650; NoVLX-NEXT:    movzwl %ax, %eax
651; NoVLX-NEXT:    vzeroupper
652; NoVLX-NEXT:    retq
653entry:
654  %0 = bitcast <2 x i64> %__a to <8 x i16>
655  %1 = bitcast <2 x i64> %__b to <8 x i16>
656  %2 = icmp eq <8 x i16> %0, %1
657  %3 = bitcast i8 %__u to <8 x i1>
658  %4 = and <8 x i1> %2, %3
659  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
660  %6 = bitcast <64 x i1> %5 to i64
661  ret i64 %6
662}
663
664define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
665; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
666; VLX:       # %bb.0: # %entry
667; VLX-NEXT:    kmovd %edi, %k1
668; VLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
669; VLX-NEXT:    kmovq %k0, %rax
670; VLX-NEXT:    retq
671;
672; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
673; NoVLX:       # %bb.0: # %entry
674; NoVLX-NEXT:    vpcmpeqw (%rsi), %xmm0, %xmm0
675; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
676; NoVLX-NEXT:    kmovw %edi, %k1
677; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
678; NoVLX-NEXT:    kmovw %k0, %eax
679; NoVLX-NEXT:    movzwl %ax, %eax
680; NoVLX-NEXT:    vzeroupper
681; NoVLX-NEXT:    retq
682entry:
683  %0 = bitcast <2 x i64> %__a to <8 x i16>
684  %load = load <2 x i64>, <2 x i64>* %__b
685  %1 = bitcast <2 x i64> %load to <8 x i16>
686  %2 = icmp eq <8 x i16> %0, %1
687  %3 = bitcast i8 %__u to <8 x i1>
688  %4 = and <8 x i1> %2, %3
689  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
690  %6 = bitcast <64 x i1> %5 to i64
691  ret i64 %6
692}
693
694
695define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
696; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
697; VLX:       # %bb.0: # %entry
698; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
699; VLX-NEXT:    kmovd %k0, %eax
700; VLX-NEXT:    vzeroupper
701; VLX-NEXT:    retq
702;
703; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
704; NoVLX:       # %bb.0: # %entry
705; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
706; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
707; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
708; NoVLX-NEXT:    kmovw %k0, %eax
709; NoVLX-NEXT:    vzeroupper
710; NoVLX-NEXT:    retq
711entry:
712  %0 = bitcast <4 x i64> %__a to <16 x i16>
713  %1 = bitcast <4 x i64> %__b to <16 x i16>
714  %2 = icmp eq <16 x i16> %0, %1
715  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
716  %4 = bitcast <32 x i1> %3 to i32
717  ret i32 %4
718}
719
720define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
721; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
722; VLX:       # %bb.0: # %entry
723; VLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %k0
724; VLX-NEXT:    kmovd %k0, %eax
725; VLX-NEXT:    vzeroupper
726; VLX-NEXT:    retq
727;
728; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
729; NoVLX:       # %bb.0: # %entry
730; NoVLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0
731; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
732; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
733; NoVLX-NEXT:    kmovw %k0, %eax
734; NoVLX-NEXT:    vzeroupper
735; NoVLX-NEXT:    retq
736entry:
737  %0 = bitcast <4 x i64> %__a to <16 x i16>
738  %load = load <4 x i64>, <4 x i64>* %__b
739  %1 = bitcast <4 x i64> %load to <16 x i16>
740  %2 = icmp eq <16 x i16> %0, %1
741  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
742  %4 = bitcast <32 x i1> %3 to i32
743  ret i32 %4
744}
745
746define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
747; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
748; VLX:       # %bb.0: # %entry
749; VLX-NEXT:    kmovd %edi, %k1
750; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
751; VLX-NEXT:    kmovd %k0, %eax
752; VLX-NEXT:    vzeroupper
753; VLX-NEXT:    retq
754;
755; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
756; NoVLX:       # %bb.0: # %entry
757; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
758; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
759; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
760; NoVLX-NEXT:    kmovw %k0, %eax
761; NoVLX-NEXT:    andl %edi, %eax
762; NoVLX-NEXT:    vzeroupper
763; NoVLX-NEXT:    retq
764entry:
765  %0 = bitcast <4 x i64> %__a to <16 x i16>
766  %1 = bitcast <4 x i64> %__b to <16 x i16>
767  %2 = icmp eq <16 x i16> %0, %1
768  %3 = bitcast i16 %__u to <16 x i1>
769  %4 = and <16 x i1> %2, %3
770  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
771  %6 = bitcast <32 x i1> %5 to i32
772  ret i32 %6
773}
774
775define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
776; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
777; VLX:       # %bb.0: # %entry
778; VLX-NEXT:    kmovd %edi, %k1
779; VLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
780; VLX-NEXT:    kmovd %k0, %eax
781; VLX-NEXT:    vzeroupper
782; VLX-NEXT:    retq
783;
784; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
785; NoVLX:       # %bb.0: # %entry
786; NoVLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %ymm0
787; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
788; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
789; NoVLX-NEXT:    kmovw %k0, %eax
790; NoVLX-NEXT:    andl %edi, %eax
791; NoVLX-NEXT:    vzeroupper
792; NoVLX-NEXT:    retq
793entry:
794  %0 = bitcast <4 x i64> %__a to <16 x i16>
795  %load = load <4 x i64>, <4 x i64>* %__b
796  %1 = bitcast <4 x i64> %load to <16 x i16>
797  %2 = icmp eq <16 x i16> %0, %1
798  %3 = bitcast i16 %__u to <16 x i1>
799  %4 = and <16 x i1> %2, %3
800  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
801  %6 = bitcast <32 x i1> %5 to i32
802  ret i32 %6
803}
804
805
806define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
807; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
808; VLX:       # %bb.0: # %entry
809; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
810; VLX-NEXT:    kmovq %k0, %rax
811; VLX-NEXT:    vzeroupper
812; VLX-NEXT:    retq
813;
814; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
815; NoVLX:       # %bb.0: # %entry
816; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
817; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
818; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
819; NoVLX-NEXT:    kmovw %k0, %eax
820; NoVLX-NEXT:    movzwl %ax, %eax
821; NoVLX-NEXT:    vzeroupper
822; NoVLX-NEXT:    retq
823entry:
824  %0 = bitcast <4 x i64> %__a to <16 x i16>
825  %1 = bitcast <4 x i64> %__b to <16 x i16>
826  %2 = icmp eq <16 x i16> %0, %1
827  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
828  %4 = bitcast <64 x i1> %3 to i64
829  ret i64 %4
830}
831
832define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
833; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
834; VLX:       # %bb.0: # %entry
835; VLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %k0
836; VLX-NEXT:    kmovq %k0, %rax
837; VLX-NEXT:    vzeroupper
838; VLX-NEXT:    retq
839;
840; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
841; NoVLX:       # %bb.0: # %entry
842; NoVLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0
843; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
844; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
845; NoVLX-NEXT:    kmovw %k0, %eax
846; NoVLX-NEXT:    movzwl %ax, %eax
847; NoVLX-NEXT:    vzeroupper
848; NoVLX-NEXT:    retq
849entry:
850  %0 = bitcast <4 x i64> %__a to <16 x i16>
851  %load = load <4 x i64>, <4 x i64>* %__b
852  %1 = bitcast <4 x i64> %load to <16 x i16>
853  %2 = icmp eq <16 x i16> %0, %1
854  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
855  %4 = bitcast <64 x i1> %3 to i64
856  ret i64 %4
857}
858
859define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
860; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
861; VLX:       # %bb.0: # %entry
862; VLX-NEXT:    kmovd %edi, %k1
863; VLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
864; VLX-NEXT:    kmovq %k0, %rax
865; VLX-NEXT:    vzeroupper
866; VLX-NEXT:    retq
867;
868; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
869; NoVLX:       # %bb.0: # %entry
870; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
871; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
872; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
873; NoVLX-NEXT:    kmovw %k0, %eax
874; NoVLX-NEXT:    andl %edi, %eax
875; NoVLX-NEXT:    vzeroupper
876; NoVLX-NEXT:    retq
877entry:
878  %0 = bitcast <4 x i64> %__a to <16 x i16>
879  %1 = bitcast <4 x i64> %__b to <16 x i16>
880  %2 = icmp eq <16 x i16> %0, %1
881  %3 = bitcast i16 %__u to <16 x i1>
882  %4 = and <16 x i1> %2, %3
883  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
884  %6 = bitcast <64 x i1> %5 to i64
885  ret i64 %6
886}
887
888define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
889; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
890; VLX:       # %bb.0: # %entry
891; VLX-NEXT:    kmovd %edi, %k1
892; VLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
893; VLX-NEXT:    kmovq %k0, %rax
894; VLX-NEXT:    vzeroupper
895; VLX-NEXT:    retq
896;
897; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
898; NoVLX:       # %bb.0: # %entry
899; NoVLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %ymm0
900; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
901; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
902; NoVLX-NEXT:    kmovw %k0, %eax
903; NoVLX-NEXT:    andl %edi, %eax
904; NoVLX-NEXT:    vzeroupper
905; NoVLX-NEXT:    retq
906entry:
907  %0 = bitcast <4 x i64> %__a to <16 x i16>
908  %load = load <4 x i64>, <4 x i64>* %__b
909  %1 = bitcast <4 x i64> %load to <16 x i16>
910  %2 = icmp eq <16 x i16> %0, %1
911  %3 = bitcast i16 %__u to <16 x i1>
912  %4 = and <16 x i1> %2, %3
913  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
914  %6 = bitcast <64 x i1> %5 to i64
915  ret i64 %6
916}
917
918
919define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
920; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
921; VLX:       # %bb.0: # %entry
922; VLX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
923; VLX-NEXT:    kmovq %k0, %rax
924; VLX-NEXT:    vzeroupper
925; VLX-NEXT:    retq
926;
927; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
928; NoVLX:       # %bb.0: # %entry
929; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
930; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
931; NoVLX-NEXT:    vpcmpeqw %ymm3, %ymm2, %ymm2
932; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
933; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
934; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
935; NoVLX-NEXT:    kmovw %k0, %ecx
936; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm0
937; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
938; NoVLX-NEXT:    kmovw %k0, %eax
939; NoVLX-NEXT:    shll $16, %eax
940; NoVLX-NEXT:    orl %ecx, %eax
941; NoVLX-NEXT:    vzeroupper
942; NoVLX-NEXT:    retq
943entry:
944  %0 = bitcast <8 x i64> %__a to <32 x i16>
945  %1 = bitcast <8 x i64> %__b to <32 x i16>
946  %2 = icmp eq <32 x i16> %0, %1
947  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
948  %4 = bitcast <64 x i1> %3 to i64
949  ret i64 %4
950}
951
952define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
953; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
954; VLX:       # %bb.0: # %entry
955; VLX-NEXT:    vpcmpeqw (%rdi), %zmm0, %k0
956; VLX-NEXT:    kmovq %k0, %rax
957; VLX-NEXT:    vzeroupper
958; VLX-NEXT:    retq
959;
960; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
961; NoVLX:       # %bb.0: # %entry
962; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
963; NoVLX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0
964; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
965; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
966; NoVLX-NEXT:    kmovw %k0, %ecx
967; NoVLX-NEXT:    vpcmpeqw 32(%rdi), %ymm1, %ymm0
968; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
969; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
970; NoVLX-NEXT:    kmovw %k0, %eax
971; NoVLX-NEXT:    shll $16, %eax
972; NoVLX-NEXT:    orl %ecx, %eax
973; NoVLX-NEXT:    vzeroupper
974; NoVLX-NEXT:    retq
975entry:
976  %0 = bitcast <8 x i64> %__a to <32 x i16>
977  %load = load <8 x i64>, <8 x i64>* %__b
978  %1 = bitcast <8 x i64> %load to <32 x i16>
979  %2 = icmp eq <32 x i16> %0, %1
980  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
981  %4 = bitcast <64 x i1> %3 to i64
982  ret i64 %4
983}
984
985define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
986; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
987; VLX:       # %bb.0: # %entry
988; VLX-NEXT:    kmovd %edi, %k1
989; VLX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
990; VLX-NEXT:    kmovq %k0, %rax
991; VLX-NEXT:    vzeroupper
992; VLX-NEXT:    retq
993;
994; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
995; NoVLX:       # %bb.0: # %entry
996; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm2
997; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
998; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
999; NoVLX-NEXT:    kmovw %k0, %eax
1000; NoVLX-NEXT:    andl %edi, %eax
1001; NoVLX-NEXT:    shrl $16, %edi
1002; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1003; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1004; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1005; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
1006; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
1007; NoVLX-NEXT:    kmovw %k0, %ecx
1008; NoVLX-NEXT:    andl %edi, %ecx
1009; NoVLX-NEXT:    shll $16, %ecx
1010; NoVLX-NEXT:    movzwl %ax, %eax
1011; NoVLX-NEXT:    orl %ecx, %eax
1012; NoVLX-NEXT:    vzeroupper
1013; NoVLX-NEXT:    retq
1014entry:
1015  %0 = bitcast <8 x i64> %__a to <32 x i16>
1016  %1 = bitcast <8 x i64> %__b to <32 x i16>
1017  %2 = icmp eq <32 x i16> %0, %1
1018  %3 = bitcast i32 %__u to <32 x i1>
1019  %4 = and <32 x i1> %2, %3
1020  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1021  %6 = bitcast <64 x i1> %5 to i64
1022  ret i64 %6
1023}
1024
1025define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
1026; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1027; VLX:       # %bb.0: # %entry
1028; VLX-NEXT:    kmovd %edi, %k1
1029; VLX-NEXT:    vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1030; VLX-NEXT:    kmovq %k0, %rax
1031; VLX-NEXT:    vzeroupper
1032; VLX-NEXT:    retq
1033;
1034; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1035; NoVLX:       # %bb.0: # %entry
1036; NoVLX-NEXT:    vpcmpeqw (%rsi), %ymm0, %ymm1
1037; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
1038; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
1039; NoVLX-NEXT:    kmovw %k0, %eax
1040; NoVLX-NEXT:    andl %edi, %eax
1041; NoVLX-NEXT:    shrl $16, %edi
1042; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1043; NoVLX-NEXT:    vpcmpeqw 32(%rsi), %ymm0, %ymm0
1044; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
1045; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
1046; NoVLX-NEXT:    kmovw %k0, %ecx
1047; NoVLX-NEXT:    andl %edi, %ecx
1048; NoVLX-NEXT:    shll $16, %ecx
1049; NoVLX-NEXT:    movzwl %ax, %eax
1050; NoVLX-NEXT:    orl %ecx, %eax
1051; NoVLX-NEXT:    vzeroupper
1052; NoVLX-NEXT:    retq
1053entry:
1054  %0 = bitcast <8 x i64> %__a to <32 x i16>
1055  %load = load <8 x i64>, <8 x i64>* %__b
1056  %1 = bitcast <8 x i64> %load to <32 x i16>
1057  %2 = icmp eq <32 x i16> %0, %1
1058  %3 = bitcast i32 %__u to <32 x i1>
1059  %4 = and <32 x i1> %2, %3
1060  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1061  %6 = bitcast <64 x i1> %5 to i64
1062  ret i64 %6
1063}
1064
1065
1066define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1067; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1068; VLX:       # %bb.0: # %entry
1069; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1070; VLX-NEXT:    kmovd %k0, %eax
1071; VLX-NEXT:    # kill: def $al killed $al killed $eax
1072; VLX-NEXT:    retq
1073;
1074; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1075; NoVLX:       # %bb.0: # %entry
1076; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1077; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1078; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1079; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1080; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1081; NoVLX-NEXT:    kmovw %k0, %eax
1082; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1083; NoVLX-NEXT:    vzeroupper
1084; NoVLX-NEXT:    retq
1085entry:
1086  %0 = bitcast <2 x i64> %__a to <4 x i32>
1087  %1 = bitcast <2 x i64> %__b to <4 x i32>
1088  %2 = icmp eq <4 x i32> %0, %1
1089  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1090  %4 = bitcast <8 x i1> %3 to i8
1091  ret i8 %4
1092}
1093
1094define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1095; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1096; VLX:       # %bb.0: # %entry
1097; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1098; VLX-NEXT:    kmovd %k0, %eax
1099; VLX-NEXT:    # kill: def $al killed $al killed $eax
1100; VLX-NEXT:    retq
1101;
1102; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1103; NoVLX:       # %bb.0: # %entry
1104; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1105; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1106; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1107; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1108; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1109; NoVLX-NEXT:    kmovw %k0, %eax
1110; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1111; NoVLX-NEXT:    vzeroupper
1112; NoVLX-NEXT:    retq
1113entry:
1114  %0 = bitcast <2 x i64> %__a to <4 x i32>
1115  %load = load <2 x i64>, <2 x i64>* %__b
1116  %1 = bitcast <2 x i64> %load to <4 x i32>
1117  %2 = icmp eq <4 x i32> %0, %1
1118  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1119  %4 = bitcast <8 x i1> %3 to i8
1120  ret i8 %4
1121}
1122
1123define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1124; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1125; VLX:       # %bb.0: # %entry
1126; VLX-NEXT:    kmovd %edi, %k1
1127; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1128; VLX-NEXT:    kmovd %k0, %eax
1129; VLX-NEXT:    # kill: def $al killed $al killed $eax
1130; VLX-NEXT:    retq
1131;
1132; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1133; NoVLX:       # %bb.0: # %entry
1134; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1135; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1136; NoVLX-NEXT:    kmovw %edi, %k1
1137; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1138; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1139; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1140; NoVLX-NEXT:    kmovw %k0, %eax
1141; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1142; NoVLX-NEXT:    vzeroupper
1143; NoVLX-NEXT:    retq
1144entry:
1145  %0 = bitcast <2 x i64> %__a to <4 x i32>
1146  %1 = bitcast <2 x i64> %__b to <4 x i32>
1147  %2 = icmp eq <4 x i32> %0, %1
1148  %3 = bitcast i8 %__u to <8 x i1>
1149  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1150  %4 = and <4 x i1> %2, %extract.i
1151  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1152  %6 = bitcast <8 x i1> %5 to i8
1153  ret i8 %6
1154}
1155
1156define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1157; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1158; VLX:       # %bb.0: # %entry
1159; VLX-NEXT:    kmovd %edi, %k1
1160; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1161; VLX-NEXT:    kmovd %k0, %eax
1162; VLX-NEXT:    # kill: def $al killed $al killed $eax
1163; VLX-NEXT:    retq
1164;
1165; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1166; NoVLX:       # %bb.0: # %entry
1167; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1168; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1169; NoVLX-NEXT:    kmovw %edi, %k1
1170; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1171; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1172; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1173; NoVLX-NEXT:    kmovw %k0, %eax
1174; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1175; NoVLX-NEXT:    vzeroupper
1176; NoVLX-NEXT:    retq
1177entry:
1178  %0 = bitcast <2 x i64> %__a to <4 x i32>
1179  %load = load <2 x i64>, <2 x i64>* %__b
1180  %1 = bitcast <2 x i64> %load to <4 x i32>
1181  %2 = icmp eq <4 x i32> %0, %1
1182  %3 = bitcast i8 %__u to <8 x i1>
1183  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1184  %4 = and <4 x i1> %2, %extract.i
1185  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1186  %6 = bitcast <8 x i1> %5 to i8
1187  ret i8 %6
1188}
1189
1190
1191define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1192; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1193; VLX:       # %bb.0: # %entry
1194; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1195; VLX-NEXT:    kmovd %k0, %eax
1196; VLX-NEXT:    # kill: def $al killed $al killed $eax
1197; VLX-NEXT:    retq
1198;
1199; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1200; NoVLX:       # %bb.0: # %entry
1201; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1202; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
1203; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1204; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1205; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1206; NoVLX-NEXT:    kmovw %k0, %eax
1207; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1208; NoVLX-NEXT:    vzeroupper
1209; NoVLX-NEXT:    retq
1210entry:
1211  %0 = bitcast <2 x i64> %__a to <4 x i32>
1212  %load = load i32, i32* %__b
1213  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1214  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1215  %2 = icmp eq <4 x i32> %0, %1
1216  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1217  %4 = bitcast <8 x i1> %3 to i8
1218  ret i8 %4
1219}
1220
1221define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1222; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1223; VLX:       # %bb.0: # %entry
1224; VLX-NEXT:    kmovd %edi, %k1
1225; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1226; VLX-NEXT:    kmovd %k0, %eax
1227; VLX-NEXT:    # kill: def $al killed $al killed $eax
1228; VLX-NEXT:    retq
1229;
1230; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1231; NoVLX:       # %bb.0: # %entry
1232; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1233; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
1234; NoVLX-NEXT:    kmovw %edi, %k1
1235; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1236; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1237; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1238; NoVLX-NEXT:    kmovw %k0, %eax
1239; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
1240; NoVLX-NEXT:    vzeroupper
1241; NoVLX-NEXT:    retq
1242entry:
1243  %0 = bitcast <2 x i64> %__a to <4 x i32>
1244  %load = load i32, i32* %__b
1245  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1246  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1247  %2 = icmp eq <4 x i32> %0, %1
1248  %3 = bitcast i8 %__u to <8 x i1>
1249  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1250  %4 = and <4 x i1> %extract.i, %2
1251  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1252  %6 = bitcast <8 x i1> %5 to i8
1253  ret i8 %6
1254}
1255
1256
1257define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1258; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1259; VLX:       # %bb.0: # %entry
1260; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1261; VLX-NEXT:    kmovd %k0, %eax
1262; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1263; VLX-NEXT:    retq
1264;
1265; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1266; NoVLX:       # %bb.0: # %entry
1267; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1268; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1269; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1270; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1271; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1272; NoVLX-NEXT:    kmovw %k0, %eax
1273; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1274; NoVLX-NEXT:    vzeroupper
1275; NoVLX-NEXT:    retq
1276entry:
1277  %0 = bitcast <2 x i64> %__a to <4 x i32>
1278  %1 = bitcast <2 x i64> %__b to <4 x i32>
1279  %2 = icmp eq <4 x i32> %0, %1
1280  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1281  %4 = bitcast <16 x i1> %3 to i16
1282  ret i16 %4
1283}
1284
1285define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1286; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1287; VLX:       # %bb.0: # %entry
1288; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1289; VLX-NEXT:    kmovd %k0, %eax
1290; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1291; VLX-NEXT:    retq
1292;
1293; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1294; NoVLX:       # %bb.0: # %entry
1295; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1296; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1297; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1298; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1299; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1300; NoVLX-NEXT:    kmovw %k0, %eax
1301; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1302; NoVLX-NEXT:    vzeroupper
1303; NoVLX-NEXT:    retq
1304entry:
1305  %0 = bitcast <2 x i64> %__a to <4 x i32>
1306  %load = load <2 x i64>, <2 x i64>* %__b
1307  %1 = bitcast <2 x i64> %load to <4 x i32>
1308  %2 = icmp eq <4 x i32> %0, %1
1309  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1310  %4 = bitcast <16 x i1> %3 to i16
1311  ret i16 %4
1312}
1313
1314define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1315; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1316; VLX:       # %bb.0: # %entry
1317; VLX-NEXT:    kmovd %edi, %k1
1318; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1319; VLX-NEXT:    kmovd %k0, %eax
1320; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1321; VLX-NEXT:    retq
1322;
1323; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1324; NoVLX:       # %bb.0: # %entry
1325; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1326; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1327; NoVLX-NEXT:    kmovw %edi, %k1
1328; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1329; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1330; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1331; NoVLX-NEXT:    kmovw %k0, %eax
1332; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1333; NoVLX-NEXT:    vzeroupper
1334; NoVLX-NEXT:    retq
1335entry:
1336  %0 = bitcast <2 x i64> %__a to <4 x i32>
1337  %1 = bitcast <2 x i64> %__b to <4 x i32>
1338  %2 = icmp eq <4 x i32> %0, %1
1339  %3 = bitcast i8 %__u to <8 x i1>
1340  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1341  %4 = and <4 x i1> %2, %extract.i
1342  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1343  %6 = bitcast <16 x i1> %5 to i16
1344  ret i16 %6
1345}
1346
1347define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1348; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1349; VLX:       # %bb.0: # %entry
1350; VLX-NEXT:    kmovd %edi, %k1
1351; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1352; VLX-NEXT:    kmovd %k0, %eax
1353; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1354; VLX-NEXT:    retq
1355;
1356; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1357; NoVLX:       # %bb.0: # %entry
1358; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1359; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1360; NoVLX-NEXT:    kmovw %edi, %k1
1361; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1362; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1363; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1364; NoVLX-NEXT:    kmovw %k0, %eax
1365; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1366; NoVLX-NEXT:    vzeroupper
1367; NoVLX-NEXT:    retq
1368entry:
1369  %0 = bitcast <2 x i64> %__a to <4 x i32>
1370  %load = load <2 x i64>, <2 x i64>* %__b
1371  %1 = bitcast <2 x i64> %load to <4 x i32>
1372  %2 = icmp eq <4 x i32> %0, %1
1373  %3 = bitcast i8 %__u to <8 x i1>
1374  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1375  %4 = and <4 x i1> %2, %extract.i
1376  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1377  %6 = bitcast <16 x i1> %5 to i16
1378  ret i16 %6
1379}
1380
1381
1382define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1383; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1384; VLX:       # %bb.0: # %entry
1385; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1386; VLX-NEXT:    kmovd %k0, %eax
1387; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1388; VLX-NEXT:    retq
1389;
1390; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1391; NoVLX:       # %bb.0: # %entry
1392; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1393; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
1394; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1395; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1396; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1397; NoVLX-NEXT:    kmovw %k0, %eax
1398; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1399; NoVLX-NEXT:    vzeroupper
1400; NoVLX-NEXT:    retq
1401entry:
1402  %0 = bitcast <2 x i64> %__a to <4 x i32>
1403  %load = load i32, i32* %__b
1404  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1405  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1406  %2 = icmp eq <4 x i32> %0, %1
1407  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1408  %4 = bitcast <16 x i1> %3 to i16
1409  ret i16 %4
1410}
1411
1412define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1413; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1414; VLX:       # %bb.0: # %entry
1415; VLX-NEXT:    kmovd %edi, %k1
1416; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1417; VLX-NEXT:    kmovd %k0, %eax
1418; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1419; VLX-NEXT:    retq
1420;
1421; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1422; NoVLX:       # %bb.0: # %entry
1423; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1424; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
1425; NoVLX-NEXT:    kmovw %edi, %k1
1426; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1427; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1428; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1429; NoVLX-NEXT:    kmovw %k0, %eax
1430; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1431; NoVLX-NEXT:    vzeroupper
1432; NoVLX-NEXT:    retq
1433entry:
1434  %0 = bitcast <2 x i64> %__a to <4 x i32>
1435  %load = load i32, i32* %__b
1436  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1437  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1438  %2 = icmp eq <4 x i32> %0, %1
1439  %3 = bitcast i8 %__u to <8 x i1>
1440  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1441  %4 = and <4 x i1> %extract.i, %2
1442  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1443  %6 = bitcast <16 x i1> %5 to i16
1444  ret i16 %6
1445}
1446
1447
1448define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1449; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1450; VLX:       # %bb.0: # %entry
1451; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1452; VLX-NEXT:    kmovd %k0, %eax
1453; VLX-NEXT:    retq
1454;
1455; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1456; NoVLX:       # %bb.0: # %entry
1457; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1458; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1459; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1460; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1461; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1462; NoVLX-NEXT:    kmovw %k0, %eax
1463; NoVLX-NEXT:    vzeroupper
1464; NoVLX-NEXT:    retq
1465entry:
1466  %0 = bitcast <2 x i64> %__a to <4 x i32>
1467  %1 = bitcast <2 x i64> %__b to <4 x i32>
1468  %2 = icmp eq <4 x i32> %0, %1
1469  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1470  %4 = bitcast <32 x i1> %3 to i32
1471  ret i32 %4
1472}
1473
1474define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1475; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1476; VLX:       # %bb.0: # %entry
1477; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1478; VLX-NEXT:    kmovd %k0, %eax
1479; VLX-NEXT:    retq
1480;
1481; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1482; NoVLX:       # %bb.0: # %entry
1483; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1484; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1485; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1486; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1487; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1488; NoVLX-NEXT:    kmovw %k0, %eax
1489; NoVLX-NEXT:    vzeroupper
1490; NoVLX-NEXT:    retq
1491entry:
1492  %0 = bitcast <2 x i64> %__a to <4 x i32>
1493  %load = load <2 x i64>, <2 x i64>* %__b
1494  %1 = bitcast <2 x i64> %load to <4 x i32>
1495  %2 = icmp eq <4 x i32> %0, %1
1496  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1497  %4 = bitcast <32 x i1> %3 to i32
1498  ret i32 %4
1499}
1500
1501define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1502; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1503; VLX:       # %bb.0: # %entry
1504; VLX-NEXT:    kmovd %edi, %k1
1505; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1506; VLX-NEXT:    kmovd %k0, %eax
1507; VLX-NEXT:    retq
1508;
1509; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1510; NoVLX:       # %bb.0: # %entry
1511; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1512; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1513; NoVLX-NEXT:    kmovw %edi, %k1
1514; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1515; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1516; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1517; NoVLX-NEXT:    kmovw %k0, %eax
1518; NoVLX-NEXT:    vzeroupper
1519; NoVLX-NEXT:    retq
1520entry:
1521  %0 = bitcast <2 x i64> %__a to <4 x i32>
1522  %1 = bitcast <2 x i64> %__b to <4 x i32>
1523  %2 = icmp eq <4 x i32> %0, %1
1524  %3 = bitcast i8 %__u to <8 x i1>
1525  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1526  %4 = and <4 x i1> %2, %extract.i
1527  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1528  %6 = bitcast <32 x i1> %5 to i32
1529  ret i32 %6
1530}
1531
1532define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1533; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1534; VLX:       # %bb.0: # %entry
1535; VLX-NEXT:    kmovd %edi, %k1
1536; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1537; VLX-NEXT:    kmovd %k0, %eax
1538; VLX-NEXT:    retq
1539;
1540; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1541; NoVLX:       # %bb.0: # %entry
1542; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1543; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1544; NoVLX-NEXT:    kmovw %edi, %k1
1545; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1546; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1547; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1548; NoVLX-NEXT:    kmovw %k0, %eax
1549; NoVLX-NEXT:    vzeroupper
1550; NoVLX-NEXT:    retq
1551entry:
1552  %0 = bitcast <2 x i64> %__a to <4 x i32>
1553  %load = load <2 x i64>, <2 x i64>* %__b
1554  %1 = bitcast <2 x i64> %load to <4 x i32>
1555  %2 = icmp eq <4 x i32> %0, %1
1556  %3 = bitcast i8 %__u to <8 x i1>
1557  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1558  %4 = and <4 x i1> %2, %extract.i
1559  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1560  %6 = bitcast <32 x i1> %5 to i32
1561  ret i32 %6
1562}
1563
1564
1565define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1566; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1567; VLX:       # %bb.0: # %entry
1568; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1569; VLX-NEXT:    kmovd %k0, %eax
1570; VLX-NEXT:    retq
1571;
1572; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1573; NoVLX:       # %bb.0: # %entry
1574; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1575; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
1576; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1577; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1578; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1579; NoVLX-NEXT:    kmovw %k0, %eax
1580; NoVLX-NEXT:    vzeroupper
1581; NoVLX-NEXT:    retq
1582entry:
1583  %0 = bitcast <2 x i64> %__a to <4 x i32>
1584  %load = load i32, i32* %__b
1585  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1586  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1587  %2 = icmp eq <4 x i32> %0, %1
1588  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1589  %4 = bitcast <32 x i1> %3 to i32
1590  ret i32 %4
1591}
1592
1593define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1594; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1595; VLX:       # %bb.0: # %entry
1596; VLX-NEXT:    kmovd %edi, %k1
1597; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1598; VLX-NEXT:    kmovd %k0, %eax
1599; VLX-NEXT:    retq
1600;
1601; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1602; NoVLX:       # %bb.0: # %entry
1603; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1604; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
1605; NoVLX-NEXT:    kmovw %edi, %k1
1606; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1607; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1608; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1609; NoVLX-NEXT:    kmovw %k0, %eax
1610; NoVLX-NEXT:    vzeroupper
1611; NoVLX-NEXT:    retq
1612entry:
1613  %0 = bitcast <2 x i64> %__a to <4 x i32>
1614  %load = load i32, i32* %__b
1615  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1616  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1617  %2 = icmp eq <4 x i32> %0, %1
1618  %3 = bitcast i8 %__u to <8 x i1>
1619  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1620  %4 = and <4 x i1> %extract.i, %2
1621  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1622  %6 = bitcast <32 x i1> %5 to i32
1623  ret i32 %6
1624}
1625
1626
1627define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1628; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1629; VLX:       # %bb.0: # %entry
1630; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
1631; VLX-NEXT:    kmovq %k0, %rax
1632; VLX-NEXT:    retq
1633;
1634; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1635; NoVLX:       # %bb.0: # %entry
1636; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1637; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1638; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1639; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1640; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1641; NoVLX-NEXT:    kmovw %k0, %eax
1642; NoVLX-NEXT:    movzwl %ax, %eax
1643; NoVLX-NEXT:    vzeroupper
1644; NoVLX-NEXT:    retq
1645entry:
1646  %0 = bitcast <2 x i64> %__a to <4 x i32>
1647  %1 = bitcast <2 x i64> %__b to <4 x i32>
1648  %2 = icmp eq <4 x i32> %0, %1
1649  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1650  %4 = bitcast <64 x i1> %3 to i64
1651  ret i64 %4
1652}
1653
1654define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1655; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1656; VLX:       # %bb.0: # %entry
1657; VLX-NEXT:    vpcmpeqd (%rdi), %xmm0, %k0
1658; VLX-NEXT:    kmovq %k0, %rax
1659; VLX-NEXT:    retq
1660;
1661; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1662; NoVLX:       # %bb.0: # %entry
1663; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1664; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
1665; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1666; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1667; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1668; NoVLX-NEXT:    kmovw %k0, %eax
1669; NoVLX-NEXT:    movzwl %ax, %eax
1670; NoVLX-NEXT:    vzeroupper
1671; NoVLX-NEXT:    retq
1672entry:
1673  %0 = bitcast <2 x i64> %__a to <4 x i32>
1674  %load = load <2 x i64>, <2 x i64>* %__b
1675  %1 = bitcast <2 x i64> %load to <4 x i32>
1676  %2 = icmp eq <4 x i32> %0, %1
1677  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1678  %4 = bitcast <64 x i1> %3 to i64
1679  ret i64 %4
1680}
1681
1682define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1683; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1684; VLX:       # %bb.0: # %entry
1685; VLX-NEXT:    kmovd %edi, %k1
1686; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1687; VLX-NEXT:    kmovq %k0, %rax
1688; VLX-NEXT:    retq
1689;
1690; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1691; NoVLX:       # %bb.0: # %entry
1692; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1693; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1694; NoVLX-NEXT:    kmovw %edi, %k1
1695; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1696; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1697; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1698; NoVLX-NEXT:    kmovw %k0, %eax
1699; NoVLX-NEXT:    movzwl %ax, %eax
1700; NoVLX-NEXT:    vzeroupper
1701; NoVLX-NEXT:    retq
1702entry:
1703  %0 = bitcast <2 x i64> %__a to <4 x i32>
1704  %1 = bitcast <2 x i64> %__b to <4 x i32>
1705  %2 = icmp eq <4 x i32> %0, %1
1706  %3 = bitcast i8 %__u to <8 x i1>
1707  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1708  %4 = and <4 x i1> %2, %extract.i
1709  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1710  %6 = bitcast <64 x i1> %5 to i64
1711  ret i64 %6
1712}
1713
1714define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1715; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1716; VLX:       # %bb.0: # %entry
1717; VLX-NEXT:    kmovd %edi, %k1
1718; VLX-NEXT:    vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1719; VLX-NEXT:    kmovq %k0, %rax
1720; VLX-NEXT:    retq
1721;
1722; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1723; NoVLX:       # %bb.0: # %entry
1724; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1725; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
1726; NoVLX-NEXT:    kmovw %edi, %k1
1727; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1728; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1729; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1730; NoVLX-NEXT:    kmovw %k0, %eax
1731; NoVLX-NEXT:    movzwl %ax, %eax
1732; NoVLX-NEXT:    vzeroupper
1733; NoVLX-NEXT:    retq
1734entry:
1735  %0 = bitcast <2 x i64> %__a to <4 x i32>
1736  %load = load <2 x i64>, <2 x i64>* %__b
1737  %1 = bitcast <2 x i64> %load to <4 x i32>
1738  %2 = icmp eq <4 x i32> %0, %1
1739  %3 = bitcast i8 %__u to <8 x i1>
1740  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1741  %4 = and <4 x i1> %2, %extract.i
1742  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1743  %6 = bitcast <64 x i1> %5 to i64
1744  ret i64 %6
1745}
1746
1747
1748define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1749; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1750; VLX:       # %bb.0: # %entry
1751; VLX-NEXT:    vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1752; VLX-NEXT:    kmovq %k0, %rax
1753; VLX-NEXT:    retq
1754;
1755; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1756; NoVLX:       # %bb.0: # %entry
1757; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1758; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
1759; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1760; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1761; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1762; NoVLX-NEXT:    kmovw %k0, %eax
1763; NoVLX-NEXT:    movzwl %ax, %eax
1764; NoVLX-NEXT:    vzeroupper
1765; NoVLX-NEXT:    retq
1766entry:
1767  %0 = bitcast <2 x i64> %__a to <4 x i32>
1768  %load = load i32, i32* %__b
1769  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1770  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1771  %2 = icmp eq <4 x i32> %0, %1
1772  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1773  %4 = bitcast <64 x i1> %3 to i64
1774  ret i64 %4
1775}
1776
1777define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1778; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1779; VLX:       # %bb.0: # %entry
1780; VLX-NEXT:    kmovd %edi, %k1
1781; VLX-NEXT:    vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1782; VLX-NEXT:    kmovq %k0, %rax
1783; VLX-NEXT:    retq
1784;
1785; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1786; NoVLX:       # %bb.0: # %entry
1787; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1788; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
1789; NoVLX-NEXT:    kmovw %edi, %k1
1790; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1791; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
1792; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
1793; NoVLX-NEXT:    kmovw %k0, %eax
1794; NoVLX-NEXT:    movzwl %ax, %eax
1795; NoVLX-NEXT:    vzeroupper
1796; NoVLX-NEXT:    retq
1797entry:
1798  %0 = bitcast <2 x i64> %__a to <4 x i32>
1799  %load = load i32, i32* %__b
1800  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1801  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1802  %2 = icmp eq <4 x i32> %0, %1
1803  %3 = bitcast i8 %__u to <8 x i1>
1804  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1805  %4 = and <4 x i1> %extract.i, %2
1806  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1807  %6 = bitcast <64 x i1> %5 to i64
1808  ret i64 %6
1809}
1810
1811
1812define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1813; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1814; VLX:       # %bb.0: # %entry
1815; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
1816; VLX-NEXT:    kmovd %k0, %eax
1817; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1818; VLX-NEXT:    vzeroupper
1819; VLX-NEXT:    retq
1820;
1821; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1822; NoVLX:       # %bb.0: # %entry
1823; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1824; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1825; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1826; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1827; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1828; NoVLX-NEXT:    kmovw %k0, %eax
1829; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1830; NoVLX-NEXT:    vzeroupper
1831; NoVLX-NEXT:    retq
1832entry:
1833  %0 = bitcast <4 x i64> %__a to <8 x i32>
1834  %1 = bitcast <4 x i64> %__b to <8 x i32>
1835  %2 = icmp eq <8 x i32> %0, %1
1836  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1837  %4 = bitcast <16 x i1> %3 to i16
1838  ret i16 %4
1839}
1840
1841define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1842; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1843; VLX:       # %bb.0: # %entry
1844; VLX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
1845; VLX-NEXT:    kmovd %k0, %eax
1846; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1847; VLX-NEXT:    vzeroupper
1848; VLX-NEXT:    retq
1849;
1850; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1851; NoVLX:       # %bb.0: # %entry
1852; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1853; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
1854; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1855; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1856; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1857; NoVLX-NEXT:    kmovw %k0, %eax
1858; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1859; NoVLX-NEXT:    vzeroupper
1860; NoVLX-NEXT:    retq
1861entry:
1862  %0 = bitcast <4 x i64> %__a to <8 x i32>
1863  %load = load <4 x i64>, <4 x i64>* %__b
1864  %1 = bitcast <4 x i64> %load to <8 x i32>
1865  %2 = icmp eq <8 x i32> %0, %1
1866  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1867  %4 = bitcast <16 x i1> %3 to i16
1868  ret i16 %4
1869}
1870
1871define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1872; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1873; VLX:       # %bb.0: # %entry
1874; VLX-NEXT:    kmovd %edi, %k1
1875; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
1876; VLX-NEXT:    kmovd %k0, %eax
1877; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1878; VLX-NEXT:    vzeroupper
1879; VLX-NEXT:    retq
1880;
1881; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1882; NoVLX:       # %bb.0: # %entry
1883; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1884; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1885; NoVLX-NEXT:    kmovw %edi, %k1
1886; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1887; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1888; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1889; NoVLX-NEXT:    kmovw %k0, %eax
1890; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1891; NoVLX-NEXT:    vzeroupper
1892; NoVLX-NEXT:    retq
1893entry:
1894  %0 = bitcast <4 x i64> %__a to <8 x i32>
1895  %1 = bitcast <4 x i64> %__b to <8 x i32>
1896  %2 = icmp eq <8 x i32> %0, %1
1897  %3 = bitcast i8 %__u to <8 x i1>
1898  %4 = and <8 x i1> %2, %3
1899  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1900  %6 = bitcast <16 x i1> %5 to i16
1901  ret i16 %6
1902}
1903
1904define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1905; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1906; VLX:       # %bb.0: # %entry
1907; VLX-NEXT:    kmovd %edi, %k1
1908; VLX-NEXT:    vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
1909; VLX-NEXT:    kmovd %k0, %eax
1910; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1911; VLX-NEXT:    vzeroupper
1912; VLX-NEXT:    retq
1913;
1914; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1915; NoVLX:       # %bb.0: # %entry
1916; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1917; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
1918; NoVLX-NEXT:    kmovw %edi, %k1
1919; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1920; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1921; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1922; NoVLX-NEXT:    kmovw %k0, %eax
1923; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1924; NoVLX-NEXT:    vzeroupper
1925; NoVLX-NEXT:    retq
1926entry:
1927  %0 = bitcast <4 x i64> %__a to <8 x i32>
1928  %load = load <4 x i64>, <4 x i64>* %__b
1929  %1 = bitcast <4 x i64> %load to <8 x i32>
1930  %2 = icmp eq <8 x i32> %0, %1
1931  %3 = bitcast i8 %__u to <8 x i1>
1932  %4 = and <8 x i1> %2, %3
1933  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1934  %6 = bitcast <16 x i1> %5 to i16
1935  ret i16 %6
1936}
1937
1938
1939define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
1940; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1941; VLX:       # %bb.0: # %entry
1942; VLX-NEXT:    vpcmpeqd (%rdi){1to8}, %ymm0, %k0
1943; VLX-NEXT:    kmovd %k0, %eax
1944; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1945; VLX-NEXT:    vzeroupper
1946; VLX-NEXT:    retq
1947;
1948; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1949; NoVLX:       # %bb.0: # %entry
1950; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1951; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
1952; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
1953; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1954; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1955; NoVLX-NEXT:    kmovw %k0, %eax
1956; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1957; NoVLX-NEXT:    vzeroupper
1958; NoVLX-NEXT:    retq
1959entry:
1960  %0 = bitcast <4 x i64> %__a to <8 x i32>
1961  %load = load i32, i32* %__b
1962  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1963  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1964  %2 = icmp eq <8 x i32> %0, %1
1965  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1966  %4 = bitcast <16 x i1> %3 to i16
1967  ret i16 %4
1968}
1969
1970define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
1971; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1972; VLX:       # %bb.0: # %entry
1973; VLX-NEXT:    kmovd %edi, %k1
1974; VLX-NEXT:    vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
1975; VLX-NEXT:    kmovd %k0, %eax
1976; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
1977; VLX-NEXT:    vzeroupper
1978; VLX-NEXT:    retq
1979;
1980; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1981; NoVLX:       # %bb.0: # %entry
1982; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1983; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
1984; NoVLX-NEXT:    kmovw %edi, %k1
1985; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1986; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
1987; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
1988; NoVLX-NEXT:    kmovw %k0, %eax
1989; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
1990; NoVLX-NEXT:    vzeroupper
1991; NoVLX-NEXT:    retq
1992entry:
1993  %0 = bitcast <4 x i64> %__a to <8 x i32>
1994  %load = load i32, i32* %__b
1995  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1996  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1997  %2 = icmp eq <8 x i32> %0, %1
1998  %3 = bitcast i8 %__u to <8 x i1>
1999  %4 = and <8 x i1> %3, %2
2000  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2001  %6 = bitcast <16 x i1> %5 to i16
2002  ret i16 %6
2003}
2004
2005
2006define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2007; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2008; VLX:       # %bb.0: # %entry
2009; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
2010; VLX-NEXT:    kmovd %k0, %eax
2011; VLX-NEXT:    vzeroupper
2012; VLX-NEXT:    retq
2013;
2014; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2015; NoVLX:       # %bb.0: # %entry
2016; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2017; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2018; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2019; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2020; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2021; NoVLX-NEXT:    kmovw %k0, %eax
2022; NoVLX-NEXT:    vzeroupper
2023; NoVLX-NEXT:    retq
2024entry:
2025  %0 = bitcast <4 x i64> %__a to <8 x i32>
2026  %1 = bitcast <4 x i64> %__b to <8 x i32>
2027  %2 = icmp eq <8 x i32> %0, %1
2028  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2029  %4 = bitcast <32 x i1> %3 to i32
2030  ret i32 %4
2031}
2032
2033define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2034; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2035; VLX:       # %bb.0: # %entry
2036; VLX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
2037; VLX-NEXT:    kmovd %k0, %eax
2038; VLX-NEXT:    vzeroupper
2039; VLX-NEXT:    retq
2040;
2041; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2042; NoVLX:       # %bb.0: # %entry
2043; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2044; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
2045; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2046; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2047; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2048; NoVLX-NEXT:    kmovw %k0, %eax
2049; NoVLX-NEXT:    vzeroupper
2050; NoVLX-NEXT:    retq
2051entry:
2052  %0 = bitcast <4 x i64> %__a to <8 x i32>
2053  %load = load <4 x i64>, <4 x i64>* %__b
2054  %1 = bitcast <4 x i64> %load to <8 x i32>
2055  %2 = icmp eq <8 x i32> %0, %1
2056  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2057  %4 = bitcast <32 x i1> %3 to i32
2058  ret i32 %4
2059}
2060
2061define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2062; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2063; VLX:       # %bb.0: # %entry
2064; VLX-NEXT:    kmovd %edi, %k1
2065; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2066; VLX-NEXT:    kmovd %k0, %eax
2067; VLX-NEXT:    vzeroupper
2068; VLX-NEXT:    retq
2069;
2070; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2071; NoVLX:       # %bb.0: # %entry
2072; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2073; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2074; NoVLX-NEXT:    kmovw %edi, %k1
2075; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2076; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2077; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2078; NoVLX-NEXT:    kmovw %k0, %eax
2079; NoVLX-NEXT:    vzeroupper
2080; NoVLX-NEXT:    retq
2081entry:
2082  %0 = bitcast <4 x i64> %__a to <8 x i32>
2083  %1 = bitcast <4 x i64> %__b to <8 x i32>
2084  %2 = icmp eq <8 x i32> %0, %1
2085  %3 = bitcast i8 %__u to <8 x i1>
2086  %4 = and <8 x i1> %2, %3
2087  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2088  %6 = bitcast <32 x i1> %5 to i32
2089  ret i32 %6
2090}
2091
2092define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2093; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2094; VLX:       # %bb.0: # %entry
2095; VLX-NEXT:    kmovd %edi, %k1
2096; VLX-NEXT:    vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2097; VLX-NEXT:    kmovd %k0, %eax
2098; VLX-NEXT:    vzeroupper
2099; VLX-NEXT:    retq
2100;
2101; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2102; NoVLX:       # %bb.0: # %entry
2103; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2104; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
2105; NoVLX-NEXT:    kmovw %edi, %k1
2106; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2107; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2108; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2109; NoVLX-NEXT:    kmovw %k0, %eax
2110; NoVLX-NEXT:    vzeroupper
2111; NoVLX-NEXT:    retq
2112entry:
2113  %0 = bitcast <4 x i64> %__a to <8 x i32>
2114  %load = load <4 x i64>, <4 x i64>* %__b
2115  %1 = bitcast <4 x i64> %load to <8 x i32>
2116  %2 = icmp eq <8 x i32> %0, %1
2117  %3 = bitcast i8 %__u to <8 x i1>
2118  %4 = and <8 x i1> %2, %3
2119  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2120  %6 = bitcast <32 x i1> %5 to i32
2121  ret i32 %6
2122}
2123
2124
2125define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2126; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2127; VLX:       # %bb.0: # %entry
2128; VLX-NEXT:    vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2129; VLX-NEXT:    kmovd %k0, %eax
2130; VLX-NEXT:    vzeroupper
2131; VLX-NEXT:    retq
2132;
2133; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2134; NoVLX:       # %bb.0: # %entry
2135; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2136; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
2137; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2138; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2139; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2140; NoVLX-NEXT:    kmovw %k0, %eax
2141; NoVLX-NEXT:    vzeroupper
2142; NoVLX-NEXT:    retq
2143entry:
2144  %0 = bitcast <4 x i64> %__a to <8 x i32>
2145  %load = load i32, i32* %__b
2146  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2147  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2148  %2 = icmp eq <8 x i32> %0, %1
2149  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2150  %4 = bitcast <32 x i1> %3 to i32
2151  ret i32 %4
2152}
2153
2154define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2155; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2156; VLX:       # %bb.0: # %entry
2157; VLX-NEXT:    kmovd %edi, %k1
2158; VLX-NEXT:    vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2159; VLX-NEXT:    kmovd %k0, %eax
2160; VLX-NEXT:    vzeroupper
2161; VLX-NEXT:    retq
2162;
2163; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2164; NoVLX:       # %bb.0: # %entry
2165; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2166; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
2167; NoVLX-NEXT:    kmovw %edi, %k1
2168; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2169; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2170; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2171; NoVLX-NEXT:    kmovw %k0, %eax
2172; NoVLX-NEXT:    vzeroupper
2173; NoVLX-NEXT:    retq
2174entry:
2175  %0 = bitcast <4 x i64> %__a to <8 x i32>
2176  %load = load i32, i32* %__b
2177  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2178  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2179  %2 = icmp eq <8 x i32> %0, %1
2180  %3 = bitcast i8 %__u to <8 x i1>
2181  %4 = and <8 x i1> %3, %2
2182  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2183  %6 = bitcast <32 x i1> %5 to i32
2184  ret i32 %6
2185}
2186
2187
2188define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2189; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2190; VLX:       # %bb.0: # %entry
2191; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
2192; VLX-NEXT:    kmovq %k0, %rax
2193; VLX-NEXT:    vzeroupper
2194; VLX-NEXT:    retq
2195;
2196; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2197; NoVLX:       # %bb.0: # %entry
2198; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2199; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2200; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2201; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2202; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2203; NoVLX-NEXT:    kmovw %k0, %eax
2204; NoVLX-NEXT:    movzwl %ax, %eax
2205; NoVLX-NEXT:    vzeroupper
2206; NoVLX-NEXT:    retq
2207entry:
2208  %0 = bitcast <4 x i64> %__a to <8 x i32>
2209  %1 = bitcast <4 x i64> %__b to <8 x i32>
2210  %2 = icmp eq <8 x i32> %0, %1
2211  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2212  %4 = bitcast <64 x i1> %3 to i64
2213  ret i64 %4
2214}
2215
2216define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2217; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2218; VLX:       # %bb.0: # %entry
2219; VLX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
2220; VLX-NEXT:    kmovq %k0, %rax
2221; VLX-NEXT:    vzeroupper
2222; VLX-NEXT:    retq
2223;
2224; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2225; NoVLX:       # %bb.0: # %entry
2226; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2227; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
2228; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2229; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2230; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2231; NoVLX-NEXT:    kmovw %k0, %eax
2232; NoVLX-NEXT:    movzwl %ax, %eax
2233; NoVLX-NEXT:    vzeroupper
2234; NoVLX-NEXT:    retq
2235entry:
2236  %0 = bitcast <4 x i64> %__a to <8 x i32>
2237  %load = load <4 x i64>, <4 x i64>* %__b
2238  %1 = bitcast <4 x i64> %load to <8 x i32>
2239  %2 = icmp eq <8 x i32> %0, %1
2240  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2241  %4 = bitcast <64 x i1> %3 to i64
2242  ret i64 %4
2243}
2244
2245define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2246; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2247; VLX:       # %bb.0: # %entry
2248; VLX-NEXT:    kmovd %edi, %k1
2249; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2250; VLX-NEXT:    kmovq %k0, %rax
2251; VLX-NEXT:    vzeroupper
2252; VLX-NEXT:    retq
2253;
2254; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2255; NoVLX:       # %bb.0: # %entry
2256; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2257; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2258; NoVLX-NEXT:    kmovw %edi, %k1
2259; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2260; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2261; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2262; NoVLX-NEXT:    kmovw %k0, %eax
2263; NoVLX-NEXT:    movzwl %ax, %eax
2264; NoVLX-NEXT:    vzeroupper
2265; NoVLX-NEXT:    retq
2266entry:
2267  %0 = bitcast <4 x i64> %__a to <8 x i32>
2268  %1 = bitcast <4 x i64> %__b to <8 x i32>
2269  %2 = icmp eq <8 x i32> %0, %1
2270  %3 = bitcast i8 %__u to <8 x i1>
2271  %4 = and <8 x i1> %2, %3
2272  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2273  %6 = bitcast <64 x i1> %5 to i64
2274  ret i64 %6
2275}
2276
2277define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2278; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2279; VLX:       # %bb.0: # %entry
2280; VLX-NEXT:    kmovd %edi, %k1
2281; VLX-NEXT:    vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2282; VLX-NEXT:    kmovq %k0, %rax
2283; VLX-NEXT:    vzeroupper
2284; VLX-NEXT:    retq
2285;
2286; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2287; NoVLX:       # %bb.0: # %entry
2288; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2289; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
2290; NoVLX-NEXT:    kmovw %edi, %k1
2291; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2292; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2293; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2294; NoVLX-NEXT:    kmovw %k0, %eax
2295; NoVLX-NEXT:    movzwl %ax, %eax
2296; NoVLX-NEXT:    vzeroupper
2297; NoVLX-NEXT:    retq
2298entry:
2299  %0 = bitcast <4 x i64> %__a to <8 x i32>
2300  %load = load <4 x i64>, <4 x i64>* %__b
2301  %1 = bitcast <4 x i64> %load to <8 x i32>
2302  %2 = icmp eq <8 x i32> %0, %1
2303  %3 = bitcast i8 %__u to <8 x i1>
2304  %4 = and <8 x i1> %2, %3
2305  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2306  %6 = bitcast <64 x i1> %5 to i64
2307  ret i64 %6
2308}
2309
2310
2311define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2312; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2313; VLX:       # %bb.0: # %entry
2314; VLX-NEXT:    vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2315; VLX-NEXT:    kmovq %k0, %rax
2316; VLX-NEXT:    vzeroupper
2317; VLX-NEXT:    retq
2318;
2319; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2320; NoVLX:       # %bb.0: # %entry
2321; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2322; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
2323; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2324; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2325; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2326; NoVLX-NEXT:    kmovw %k0, %eax
2327; NoVLX-NEXT:    movzwl %ax, %eax
2328; NoVLX-NEXT:    vzeroupper
2329; NoVLX-NEXT:    retq
2330entry:
2331  %0 = bitcast <4 x i64> %__a to <8 x i32>
2332  %load = load i32, i32* %__b
2333  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2334  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2335  %2 = icmp eq <8 x i32> %0, %1
2336  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2337  %4 = bitcast <64 x i1> %3 to i64
2338  ret i64 %4
2339}
2340
2341define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2342; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2343; VLX:       # %bb.0: # %entry
2344; VLX-NEXT:    kmovd %edi, %k1
2345; VLX-NEXT:    vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2346; VLX-NEXT:    kmovq %k0, %rax
2347; VLX-NEXT:    vzeroupper
2348; VLX-NEXT:    retq
2349;
2350; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2351; NoVLX:       # %bb.0: # %entry
2352; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2353; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
2354; NoVLX-NEXT:    kmovw %edi, %k1
2355; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2356; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
2357; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
2358; NoVLX-NEXT:    kmovw %k0, %eax
2359; NoVLX-NEXT:    movzwl %ax, %eax
2360; NoVLX-NEXT:    vzeroupper
2361; NoVLX-NEXT:    retq
2362entry:
2363  %0 = bitcast <4 x i64> %__a to <8 x i32>
2364  %load = load i32, i32* %__b
2365  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2366  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2367  %2 = icmp eq <8 x i32> %0, %1
2368  %3 = bitcast i8 %__u to <8 x i1>
2369  %4 = and <8 x i1> %3, %2
2370  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2371  %6 = bitcast <64 x i1> %5 to i64
2372  ret i64 %6
2373}
2374
2375
2376define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2377; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2378; VLX:       # %bb.0: # %entry
2379; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2380; VLX-NEXT:    kmovd %k0, %eax
2381; VLX-NEXT:    vzeroupper
2382; VLX-NEXT:    retq
2383;
2384; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2385; NoVLX:       # %bb.0: # %entry
2386; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2387; NoVLX-NEXT:    kmovw %k0, %eax
2388; NoVLX-NEXT:    vzeroupper
2389; NoVLX-NEXT:    retq
2390entry:
2391  %0 = bitcast <8 x i64> %__a to <16 x i32>
2392  %1 = bitcast <8 x i64> %__b to <16 x i32>
2393  %2 = icmp eq <16 x i32> %0, %1
2394  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2395  %4 = bitcast <32 x i1> %3 to i32
2396  ret i32 %4
2397}
2398
2399define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2400; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2401; VLX:       # %bb.0: # %entry
2402; VLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2403; VLX-NEXT:    kmovd %k0, %eax
2404; VLX-NEXT:    vzeroupper
2405; VLX-NEXT:    retq
2406;
2407; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2408; NoVLX:       # %bb.0: # %entry
2409; NoVLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2410; NoVLX-NEXT:    kmovw %k0, %eax
2411; NoVLX-NEXT:    vzeroupper
2412; NoVLX-NEXT:    retq
2413entry:
2414  %0 = bitcast <8 x i64> %__a to <16 x i32>
2415  %load = load <8 x i64>, <8 x i64>* %__b
2416  %1 = bitcast <8 x i64> %load to <16 x i32>
2417  %2 = icmp eq <16 x i32> %0, %1
2418  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2419  %4 = bitcast <32 x i1> %3 to i32
2420  ret i32 %4
2421}
2422
2423define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2424; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2425; VLX:       # %bb.0: # %entry
2426; VLX-NEXT:    kmovd %edi, %k1
2427; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2428; VLX-NEXT:    kmovd %k0, %eax
2429; VLX-NEXT:    vzeroupper
2430; VLX-NEXT:    retq
2431;
2432; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2433; NoVLX:       # %bb.0: # %entry
2434; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2435; NoVLX-NEXT:    kmovw %k0, %eax
2436; NoVLX-NEXT:    andl %edi, %eax
2437; NoVLX-NEXT:    vzeroupper
2438; NoVLX-NEXT:    retq
2439entry:
2440  %0 = bitcast <8 x i64> %__a to <16 x i32>
2441  %1 = bitcast <8 x i64> %__b to <16 x i32>
2442  %2 = icmp eq <16 x i32> %0, %1
2443  %3 = bitcast i16 %__u to <16 x i1>
2444  %4 = and <16 x i1> %2, %3
2445  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2446  %6 = bitcast <32 x i1> %5 to i32
2447  ret i32 %6
2448}
2449
2450define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2451; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2452; VLX:       # %bb.0: # %entry
2453; VLX-NEXT:    kmovd %edi, %k1
2454; VLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2455; VLX-NEXT:    kmovd %k0, %eax
2456; VLX-NEXT:    vzeroupper
2457; VLX-NEXT:    retq
2458;
2459; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2460; NoVLX:       # %bb.0: # %entry
2461; NoVLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0
2462; NoVLX-NEXT:    kmovw %k0, %eax
2463; NoVLX-NEXT:    andl %edi, %eax
2464; NoVLX-NEXT:    vzeroupper
2465; NoVLX-NEXT:    retq
2466entry:
2467  %0 = bitcast <8 x i64> %__a to <16 x i32>
2468  %load = load <8 x i64>, <8 x i64>* %__b
2469  %1 = bitcast <8 x i64> %load to <16 x i32>
2470  %2 = icmp eq <16 x i32> %0, %1
2471  %3 = bitcast i16 %__u to <16 x i1>
2472  %4 = and <16 x i1> %2, %3
2473  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2474  %6 = bitcast <32 x i1> %5 to i32
2475  ret i32 %6
2476}
2477
2478
2479define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2480; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2481; VLX:       # %bb.0: # %entry
2482; VLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2483; VLX-NEXT:    kmovd %k0, %eax
2484; VLX-NEXT:    vzeroupper
2485; VLX-NEXT:    retq
2486;
2487; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2488; NoVLX:       # %bb.0: # %entry
2489; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2490; NoVLX-NEXT:    kmovw %k0, %eax
2491; NoVLX-NEXT:    vzeroupper
2492; NoVLX-NEXT:    retq
2493entry:
2494  %0 = bitcast <8 x i64> %__a to <16 x i32>
2495  %load = load i32, i32* %__b
2496  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2497  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2498  %2 = icmp eq <16 x i32> %0, %1
2499  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2500  %4 = bitcast <32 x i1> %3 to i32
2501  ret i32 %4
2502}
2503
2504define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2505; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2506; VLX:       # %bb.0: # %entry
2507; VLX-NEXT:    kmovd %edi, %k1
2508; VLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2509; VLX-NEXT:    kmovd %k0, %eax
2510; VLX-NEXT:    vzeroupper
2511; VLX-NEXT:    retq
2512;
2513; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2514; NoVLX:       # %bb.0: # %entry
2515; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2516; NoVLX-NEXT:    kmovw %k0, %eax
2517; NoVLX-NEXT:    andl %edi, %eax
2518; NoVLX-NEXT:    vzeroupper
2519; NoVLX-NEXT:    retq
2520entry:
2521  %0 = bitcast <8 x i64> %__a to <16 x i32>
2522  %load = load i32, i32* %__b
2523  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2524  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2525  %2 = icmp eq <16 x i32> %0, %1
2526  %3 = bitcast i16 %__u to <16 x i1>
2527  %4 = and <16 x i1> %3, %2
2528  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2529  %6 = bitcast <32 x i1> %5 to i32
2530  ret i32 %6
2531}
2532
2533
2534define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2535; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2536; VLX:       # %bb.0: # %entry
2537; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2538; VLX-NEXT:    kmovq %k0, %rax
2539; VLX-NEXT:    vzeroupper
2540; VLX-NEXT:    retq
2541;
2542; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2543; NoVLX:       # %bb.0: # %entry
2544; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2545; NoVLX-NEXT:    kmovw %k0, %eax
2546; NoVLX-NEXT:    movzwl %ax, %eax
2547; NoVLX-NEXT:    vzeroupper
2548; NoVLX-NEXT:    retq
2549entry:
2550  %0 = bitcast <8 x i64> %__a to <16 x i32>
2551  %1 = bitcast <8 x i64> %__b to <16 x i32>
2552  %2 = icmp eq <16 x i32> %0, %1
2553  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2554  %4 = bitcast <64 x i1> %3 to i64
2555  ret i64 %4
2556}
2557
2558define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2559; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2560; VLX:       # %bb.0: # %entry
2561; VLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2562; VLX-NEXT:    kmovq %k0, %rax
2563; VLX-NEXT:    vzeroupper
2564; VLX-NEXT:    retq
2565;
2566; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2567; NoVLX:       # %bb.0: # %entry
2568; NoVLX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0
2569; NoVLX-NEXT:    kmovw %k0, %eax
2570; NoVLX-NEXT:    movzwl %ax, %eax
2571; NoVLX-NEXT:    vzeroupper
2572; NoVLX-NEXT:    retq
2573entry:
2574  %0 = bitcast <8 x i64> %__a to <16 x i32>
2575  %load = load <8 x i64>, <8 x i64>* %__b
2576  %1 = bitcast <8 x i64> %load to <16 x i32>
2577  %2 = icmp eq <16 x i32> %0, %1
2578  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2579  %4 = bitcast <64 x i1> %3 to i64
2580  ret i64 %4
2581}
2582
2583define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2584; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2585; VLX:       # %bb.0: # %entry
2586; VLX-NEXT:    kmovd %edi, %k1
2587; VLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2588; VLX-NEXT:    kmovq %k0, %rax
2589; VLX-NEXT:    vzeroupper
2590; VLX-NEXT:    retq
2591;
2592; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2593; NoVLX:       # %bb.0: # %entry
2594; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
2595; NoVLX-NEXT:    kmovw %k0, %eax
2596; NoVLX-NEXT:    andl %edi, %eax
2597; NoVLX-NEXT:    vzeroupper
2598; NoVLX-NEXT:    retq
2599entry:
2600  %0 = bitcast <8 x i64> %__a to <16 x i32>
2601  %1 = bitcast <8 x i64> %__b to <16 x i32>
2602  %2 = icmp eq <16 x i32> %0, %1
2603  %3 = bitcast i16 %__u to <16 x i1>
2604  %4 = and <16 x i1> %2, %3
2605  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2606  %6 = bitcast <64 x i1> %5 to i64
2607  ret i64 %6
2608}
2609
2610define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2611; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2612; VLX:       # %bb.0: # %entry
2613; VLX-NEXT:    kmovd %edi, %k1
2614; VLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2615; VLX-NEXT:    kmovq %k0, %rax
2616; VLX-NEXT:    vzeroupper
2617; VLX-NEXT:    retq
2618;
2619; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2620; NoVLX:       # %bb.0: # %entry
2621; NoVLX-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0
2622; NoVLX-NEXT:    kmovw %k0, %eax
2623; NoVLX-NEXT:    andl %edi, %eax
2624; NoVLX-NEXT:    vzeroupper
2625; NoVLX-NEXT:    retq
2626entry:
2627  %0 = bitcast <8 x i64> %__a to <16 x i32>
2628  %load = load <8 x i64>, <8 x i64>* %__b
2629  %1 = bitcast <8 x i64> %load to <16 x i32>
2630  %2 = icmp eq <16 x i32> %0, %1
2631  %3 = bitcast i16 %__u to <16 x i1>
2632  %4 = and <16 x i1> %2, %3
2633  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2634  %6 = bitcast <64 x i1> %5 to i64
2635  ret i64 %6
2636}
2637
2638
2639define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2640; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2641; VLX:       # %bb.0: # %entry
2642; VLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2643; VLX-NEXT:    kmovq %k0, %rax
2644; VLX-NEXT:    vzeroupper
2645; VLX-NEXT:    retq
2646;
2647; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2648; NoVLX:       # %bb.0: # %entry
2649; NoVLX-NEXT:    vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2650; NoVLX-NEXT:    kmovw %k0, %eax
2651; NoVLX-NEXT:    movzwl %ax, %eax
2652; NoVLX-NEXT:    vzeroupper
2653; NoVLX-NEXT:    retq
2654entry:
2655  %0 = bitcast <8 x i64> %__a to <16 x i32>
2656  %load = load i32, i32* %__b
2657  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2658  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2659  %2 = icmp eq <16 x i32> %0, %1
2660  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2661  %4 = bitcast <64 x i1> %3 to i64
2662  ret i64 %4
2663}
2664
2665define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2666; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2667; VLX:       # %bb.0: # %entry
2668; VLX-NEXT:    kmovd %edi, %k1
2669; VLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2670; VLX-NEXT:    kmovq %k0, %rax
2671; VLX-NEXT:    vzeroupper
2672; VLX-NEXT:    retq
2673;
2674; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2675; NoVLX:       # %bb.0: # %entry
2676; NoVLX-NEXT:    vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2677; NoVLX-NEXT:    kmovw %k0, %eax
2678; NoVLX-NEXT:    andl %edi, %eax
2679; NoVLX-NEXT:    vzeroupper
2680; NoVLX-NEXT:    retq
2681entry:
2682  %0 = bitcast <8 x i64> %__a to <16 x i32>
2683  %load = load i32, i32* %__b
2684  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2685  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2686  %2 = icmp eq <16 x i32> %0, %1
2687  %3 = bitcast i16 %__u to <16 x i1>
2688  %4 = and <16 x i1> %3, %2
2689  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2690  %6 = bitcast <64 x i1> %5 to i64
2691  ret i64 %6
2692}
2693
2694
2695define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2696; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2697; VLX:       # %bb.0: # %entry
2698; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
2699; VLX-NEXT:    kmovb %k0, %eax
2700; VLX-NEXT:    retq
2701;
2702; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2703; NoVLX:       # %bb.0: # %entry
2704; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2705; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2706; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2707; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2708; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2709; NoVLX-NEXT:    kmovw %k0, %eax
2710; NoVLX-NEXT:    andl $3, %eax
2711; NoVLX-NEXT:    vzeroupper
2712; NoVLX-NEXT:    retq
2713entry:
2714  %0 = bitcast <2 x i64> %__a to <2 x i64>
2715  %1 = bitcast <2 x i64> %__b to <2 x i64>
2716  %2 = icmp eq <2 x i64> %0, %1
2717  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2718  %4 = bitcast <4 x i1> %3 to i4
2719  ret i4 %4
2720}
2721
2722define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2723; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2724; VLX:       # %bb.0: # %entry
2725; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
2726; VLX-NEXT:    kmovb %k0, %eax
2727; VLX-NEXT:    retq
2728;
2729; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2730; NoVLX:       # %bb.0: # %entry
2731; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2732; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
2733; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2734; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2735; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2736; NoVLX-NEXT:    kmovw %k0, %eax
2737; NoVLX-NEXT:    andl $3, %eax
2738; NoVLX-NEXT:    vzeroupper
2739; NoVLX-NEXT:    retq
2740entry:
2741  %0 = bitcast <2 x i64> %__a to <2 x i64>
2742  %load = load <2 x i64>, <2 x i64>* %__b
2743  %1 = bitcast <2 x i64> %load to <2 x i64>
2744  %2 = icmp eq <2 x i64> %0, %1
2745  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2746  %4 = bitcast <4 x i1> %3 to i4
2747  ret i4 %4
2748}
2749
2750define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2751; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2752; VLX:       # %bb.0: # %entry
2753; VLX-NEXT:    kmovd %edi, %k1
2754; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2755; VLX-NEXT:    kmovb %k0, %eax
2756; VLX-NEXT:    retq
2757;
2758; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2759; NoVLX:       # %bb.0: # %entry
2760; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2761; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2762; NoVLX-NEXT:    kmovw %edi, %k1
2763; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2764; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2765; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2766; NoVLX-NEXT:    kmovw %k0, %eax
2767; NoVLX-NEXT:    andl $3, %eax
2768; NoVLX-NEXT:    vzeroupper
2769; NoVLX-NEXT:    retq
2770entry:
2771  %0 = bitcast <2 x i64> %__a to <2 x i64>
2772  %1 = bitcast <2 x i64> %__b to <2 x i64>
2773  %2 = icmp eq <2 x i64> %0, %1
2774  %3 = bitcast i8 %__u to <8 x i1>
2775  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2776  %4 = and <2 x i1> %2, %extract.i
2777  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2778  %6 = bitcast <4 x i1> %5 to i4
2779  ret i4 %6
2780}
2781
2782define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2783; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2784; VLX:       # %bb.0: # %entry
2785; VLX-NEXT:    kmovd %edi, %k1
2786; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2787; VLX-NEXT:    kmovb %k0, %eax
2788; VLX-NEXT:    retq
2789;
2790; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2791; NoVLX:       # %bb.0: # %entry
2792; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2793; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
2794; NoVLX-NEXT:    kmovw %edi, %k1
2795; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2796; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2797; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2798; NoVLX-NEXT:    kmovw %k0, %eax
2799; NoVLX-NEXT:    andl $3, %eax
2800; NoVLX-NEXT:    vzeroupper
2801; NoVLX-NEXT:    retq
2802entry:
2803  %0 = bitcast <2 x i64> %__a to <2 x i64>
2804  %load = load <2 x i64>, <2 x i64>* %__b
2805  %1 = bitcast <2 x i64> %load to <2 x i64>
2806  %2 = icmp eq <2 x i64> %0, %1
2807  %3 = bitcast i8 %__u to <8 x i1>
2808  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2809  %4 = and <2 x i1> %2, %extract.i
2810  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2811  %6 = bitcast <4 x i1> %5 to i4
2812  ret i4 %6
2813}
2814
2815
2816define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
2817; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2818; VLX:       # %bb.0: # %entry
2819; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2820; VLX-NEXT:    kmovb %k0, %eax
2821; VLX-NEXT:    retq
2822;
2823; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2824; NoVLX:       # %bb.0: # %entry
2825; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2826; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
2827; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2828; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2829; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2830; NoVLX-NEXT:    kmovw %k0, %eax
2831; NoVLX-NEXT:    andl $3, %eax
2832; NoVLX-NEXT:    vzeroupper
2833; NoVLX-NEXT:    retq
2834entry:
2835  %0 = bitcast <2 x i64> %__a to <2 x i64>
2836  %load = load i64, i64* %__b
2837  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2838  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2839  %2 = icmp eq <2 x i64> %0, %1
2840  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2841  %4 = bitcast <4 x i1> %3 to i4
2842  ret i4 %4
2843}
2844
2845define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
2846; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2847; VLX:       # %bb.0: # %entry
2848; VLX-NEXT:    kmovd %edi, %k1
2849; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2850; VLX-NEXT:    kmovb %k0, %eax
2851; VLX-NEXT:    retq
2852;
2853; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2854; NoVLX:       # %bb.0: # %entry
2855; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2856; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
2857; NoVLX-NEXT:    kmovw %edi, %k1
2858; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2859; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2860; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2861; NoVLX-NEXT:    kmovw %k0, %eax
2862; NoVLX-NEXT:    andl $3, %eax
2863; NoVLX-NEXT:    vzeroupper
2864; NoVLX-NEXT:    retq
2865entry:
2866  %0 = bitcast <2 x i64> %__a to <2 x i64>
2867  %load = load i64, i64* %__b
2868  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2869  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2870  %2 = icmp eq <2 x i64> %0, %1
2871  %3 = bitcast i8 %__u to <8 x i1>
2872  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2873  %4 = and <2 x i1> %extract.i, %2
2874  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2875  %6 = bitcast <4 x i1> %5 to i4
2876  ret i4 %6
2877}
2878
2879
2880define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2881; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2882; VLX:       # %bb.0: # %entry
2883; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
2884; VLX-NEXT:    kmovd %k0, %eax
2885; VLX-NEXT:    # kill: def $al killed $al killed $eax
2886; VLX-NEXT:    retq
2887;
2888; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2889; NoVLX:       # %bb.0: # %entry
2890; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2891; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2892; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2893; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2894; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2895; NoVLX-NEXT:    kmovw %k0, %eax
2896; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2897; NoVLX-NEXT:    vzeroupper
2898; NoVLX-NEXT:    retq
2899entry:
2900  %0 = bitcast <2 x i64> %__a to <2 x i64>
2901  %1 = bitcast <2 x i64> %__b to <2 x i64>
2902  %2 = icmp eq <2 x i64> %0, %1
2903  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2904  %4 = bitcast <8 x i1> %3 to i8
2905  ret i8 %4
2906}
2907
2908define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2909; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2910; VLX:       # %bb.0: # %entry
2911; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
2912; VLX-NEXT:    kmovd %k0, %eax
2913; VLX-NEXT:    # kill: def $al killed $al killed $eax
2914; VLX-NEXT:    retq
2915;
2916; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2917; NoVLX:       # %bb.0: # %entry
2918; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2919; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
2920; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
2921; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2922; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2923; NoVLX-NEXT:    kmovw %k0, %eax
2924; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2925; NoVLX-NEXT:    vzeroupper
2926; NoVLX-NEXT:    retq
2927entry:
2928  %0 = bitcast <2 x i64> %__a to <2 x i64>
2929  %load = load <2 x i64>, <2 x i64>* %__b
2930  %1 = bitcast <2 x i64> %load to <2 x i64>
2931  %2 = icmp eq <2 x i64> %0, %1
2932  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2933  %4 = bitcast <8 x i1> %3 to i8
2934  ret i8 %4
2935}
2936
2937define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2938; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2939; VLX:       # %bb.0: # %entry
2940; VLX-NEXT:    kmovd %edi, %k1
2941; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2942; VLX-NEXT:    kmovd %k0, %eax
2943; VLX-NEXT:    # kill: def $al killed $al killed $eax
2944; VLX-NEXT:    retq
2945;
2946; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2947; NoVLX:       # %bb.0: # %entry
2948; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2949; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2950; NoVLX-NEXT:    kmovw %edi, %k1
2951; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2952; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2953; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2954; NoVLX-NEXT:    kmovw %k0, %eax
2955; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2956; NoVLX-NEXT:    vzeroupper
2957; NoVLX-NEXT:    retq
2958entry:
2959  %0 = bitcast <2 x i64> %__a to <2 x i64>
2960  %1 = bitcast <2 x i64> %__b to <2 x i64>
2961  %2 = icmp eq <2 x i64> %0, %1
2962  %3 = bitcast i8 %__u to <8 x i1>
2963  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2964  %4 = and <2 x i1> %2, %extract.i
2965  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2966  %6 = bitcast <8 x i1> %5 to i8
2967  ret i8 %6
2968}
2969
2970define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2971; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2972; VLX:       # %bb.0: # %entry
2973; VLX-NEXT:    kmovd %edi, %k1
2974; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2975; VLX-NEXT:    kmovd %k0, %eax
2976; VLX-NEXT:    # kill: def $al killed $al killed $eax
2977; VLX-NEXT:    retq
2978;
2979; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2980; NoVLX:       # %bb.0: # %entry
2981; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2982; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
2983; NoVLX-NEXT:    kmovw %edi, %k1
2984; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2985; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
2986; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
2987; NoVLX-NEXT:    kmovw %k0, %eax
2988; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
2989; NoVLX-NEXT:    vzeroupper
2990; NoVLX-NEXT:    retq
2991entry:
2992  %0 = bitcast <2 x i64> %__a to <2 x i64>
2993  %load = load <2 x i64>, <2 x i64>* %__b
2994  %1 = bitcast <2 x i64> %load to <2 x i64>
2995  %2 = icmp eq <2 x i64> %0, %1
2996  %3 = bitcast i8 %__u to <8 x i1>
2997  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2998  %4 = and <2 x i1> %2, %extract.i
2999  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3000  %6 = bitcast <8 x i1> %5 to i8
3001  ret i8 %6
3002}
3003
3004
3005define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3006; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3007; VLX:       # %bb.0: # %entry
3008; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3009; VLX-NEXT:    kmovd %k0, %eax
3010; VLX-NEXT:    # kill: def $al killed $al killed $eax
3011; VLX-NEXT:    retq
3012;
3013; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3014; NoVLX:       # %bb.0: # %entry
3015; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3016; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
3017; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3018; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3019; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3020; NoVLX-NEXT:    kmovw %k0, %eax
3021; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3022; NoVLX-NEXT:    vzeroupper
3023; NoVLX-NEXT:    retq
3024entry:
3025  %0 = bitcast <2 x i64> %__a to <2 x i64>
3026  %load = load i64, i64* %__b
3027  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3028  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3029  %2 = icmp eq <2 x i64> %0, %1
3030  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3031  %4 = bitcast <8 x i1> %3 to i8
3032  ret i8 %4
3033}
3034
3035define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3036; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3037; VLX:       # %bb.0: # %entry
3038; VLX-NEXT:    kmovd %edi, %k1
3039; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3040; VLX-NEXT:    kmovd %k0, %eax
3041; VLX-NEXT:    # kill: def $al killed $al killed $eax
3042; VLX-NEXT:    retq
3043;
3044; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3045; NoVLX:       # %bb.0: # %entry
3046; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3047; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
3048; NoVLX-NEXT:    kmovw %edi, %k1
3049; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3050; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3051; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3052; NoVLX-NEXT:    kmovw %k0, %eax
3053; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3054; NoVLX-NEXT:    vzeroupper
3055; NoVLX-NEXT:    retq
3056entry:
3057  %0 = bitcast <2 x i64> %__a to <2 x i64>
3058  %load = load i64, i64* %__b
3059  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3060  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3061  %2 = icmp eq <2 x i64> %0, %1
3062  %3 = bitcast i8 %__u to <8 x i1>
3063  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3064  %4 = and <2 x i1> %extract.i, %2
3065  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3066  %6 = bitcast <8 x i1> %5 to i8
3067  ret i8 %6
3068}
3069
3070
3071define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3072; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3073; VLX:       # %bb.0: # %entry
3074; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
3075; VLX-NEXT:    kmovd %k0, %eax
3076; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3077; VLX-NEXT:    retq
3078;
3079; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3080; NoVLX:       # %bb.0: # %entry
3081; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3082; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3083; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3084; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3085; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3086; NoVLX-NEXT:    kmovw %k0, %eax
3087; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3088; NoVLX-NEXT:    vzeroupper
3089; NoVLX-NEXT:    retq
3090entry:
3091  %0 = bitcast <2 x i64> %__a to <2 x i64>
3092  %1 = bitcast <2 x i64> %__b to <2 x i64>
3093  %2 = icmp eq <2 x i64> %0, %1
3094  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3095  %4 = bitcast <16 x i1> %3 to i16
3096  ret i16 %4
3097}
3098
3099define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3100; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3101; VLX:       # %bb.0: # %entry
3102; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
3103; VLX-NEXT:    kmovd %k0, %eax
3104; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3105; VLX-NEXT:    retq
3106;
3107; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3108; NoVLX:       # %bb.0: # %entry
3109; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3110; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
3111; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3112; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3113; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3114; NoVLX-NEXT:    kmovw %k0, %eax
3115; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3116; NoVLX-NEXT:    vzeroupper
3117; NoVLX-NEXT:    retq
3118entry:
3119  %0 = bitcast <2 x i64> %__a to <2 x i64>
3120  %load = load <2 x i64>, <2 x i64>* %__b
3121  %1 = bitcast <2 x i64> %load to <2 x i64>
3122  %2 = icmp eq <2 x i64> %0, %1
3123  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3124  %4 = bitcast <16 x i1> %3 to i16
3125  ret i16 %4
3126}
3127
3128define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3129; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3130; VLX:       # %bb.0: # %entry
3131; VLX-NEXT:    kmovd %edi, %k1
3132; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3133; VLX-NEXT:    kmovd %k0, %eax
3134; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3135; VLX-NEXT:    retq
3136;
3137; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3138; NoVLX:       # %bb.0: # %entry
3139; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3140; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3141; NoVLX-NEXT:    kmovw %edi, %k1
3142; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3143; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3144; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3145; NoVLX-NEXT:    kmovw %k0, %eax
3146; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3147; NoVLX-NEXT:    vzeroupper
3148; NoVLX-NEXT:    retq
3149entry:
3150  %0 = bitcast <2 x i64> %__a to <2 x i64>
3151  %1 = bitcast <2 x i64> %__b to <2 x i64>
3152  %2 = icmp eq <2 x i64> %0, %1
3153  %3 = bitcast i8 %__u to <8 x i1>
3154  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3155  %4 = and <2 x i1> %2, %extract.i
3156  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3157  %6 = bitcast <16 x i1> %5 to i16
3158  ret i16 %6
3159}
3160
3161define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3162; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3163; VLX:       # %bb.0: # %entry
3164; VLX-NEXT:    kmovd %edi, %k1
3165; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3166; VLX-NEXT:    kmovd %k0, %eax
3167; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3168; VLX-NEXT:    retq
3169;
3170; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3171; NoVLX:       # %bb.0: # %entry
3172; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3173; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
3174; NoVLX-NEXT:    kmovw %edi, %k1
3175; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3176; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3177; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3178; NoVLX-NEXT:    kmovw %k0, %eax
3179; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3180; NoVLX-NEXT:    vzeroupper
3181; NoVLX-NEXT:    retq
3182entry:
3183  %0 = bitcast <2 x i64> %__a to <2 x i64>
3184  %load = load <2 x i64>, <2 x i64>* %__b
3185  %1 = bitcast <2 x i64> %load to <2 x i64>
3186  %2 = icmp eq <2 x i64> %0, %1
3187  %3 = bitcast i8 %__u to <8 x i1>
3188  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3189  %4 = and <2 x i1> %2, %extract.i
3190  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3191  %6 = bitcast <16 x i1> %5 to i16
3192  ret i16 %6
3193}
3194
3195
3196define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3197; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3198; VLX:       # %bb.0: # %entry
3199; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3200; VLX-NEXT:    kmovd %k0, %eax
3201; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3202; VLX-NEXT:    retq
3203;
3204; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3205; NoVLX:       # %bb.0: # %entry
3206; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3207; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
3208; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3209; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3210; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3211; NoVLX-NEXT:    kmovw %k0, %eax
3212; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3213; NoVLX-NEXT:    vzeroupper
3214; NoVLX-NEXT:    retq
3215entry:
3216  %0 = bitcast <2 x i64> %__a to <2 x i64>
3217  %load = load i64, i64* %__b
3218  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3219  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3220  %2 = icmp eq <2 x i64> %0, %1
3221  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3222  %4 = bitcast <16 x i1> %3 to i16
3223  ret i16 %4
3224}
3225
3226define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3227; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3228; VLX:       # %bb.0: # %entry
3229; VLX-NEXT:    kmovd %edi, %k1
3230; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3231; VLX-NEXT:    kmovd %k0, %eax
3232; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3233; VLX-NEXT:    retq
3234;
3235; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3236; NoVLX:       # %bb.0: # %entry
3237; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3238; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
3239; NoVLX-NEXT:    kmovw %edi, %k1
3240; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3241; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3242; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3243; NoVLX-NEXT:    kmovw %k0, %eax
3244; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3245; NoVLX-NEXT:    vzeroupper
3246; NoVLX-NEXT:    retq
3247entry:
3248  %0 = bitcast <2 x i64> %__a to <2 x i64>
3249  %load = load i64, i64* %__b
3250  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3251  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3252  %2 = icmp eq <2 x i64> %0, %1
3253  %3 = bitcast i8 %__u to <8 x i1>
3254  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3255  %4 = and <2 x i1> %extract.i, %2
3256  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3257  %6 = bitcast <16 x i1> %5 to i16
3258  ret i16 %6
3259}
3260
3261
3262define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3263; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3264; VLX:       # %bb.0: # %entry
3265; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
3266; VLX-NEXT:    kmovd %k0, %eax
3267; VLX-NEXT:    retq
3268;
3269; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3270; NoVLX:       # %bb.0: # %entry
3271; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3272; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3273; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3274; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3275; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3276; NoVLX-NEXT:    kmovw %k0, %eax
3277; NoVLX-NEXT:    vzeroupper
3278; NoVLX-NEXT:    retq
3279entry:
3280  %0 = bitcast <2 x i64> %__a to <2 x i64>
3281  %1 = bitcast <2 x i64> %__b to <2 x i64>
3282  %2 = icmp eq <2 x i64> %0, %1
3283  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3284  %4 = bitcast <32 x i1> %3 to i32
3285  ret i32 %4
3286}
3287
3288define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3289; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3290; VLX:       # %bb.0: # %entry
3291; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
3292; VLX-NEXT:    kmovd %k0, %eax
3293; VLX-NEXT:    retq
3294;
3295; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3296; NoVLX:       # %bb.0: # %entry
3297; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3298; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
3299; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3300; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3301; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3302; NoVLX-NEXT:    kmovw %k0, %eax
3303; NoVLX-NEXT:    vzeroupper
3304; NoVLX-NEXT:    retq
3305entry:
3306  %0 = bitcast <2 x i64> %__a to <2 x i64>
3307  %load = load <2 x i64>, <2 x i64>* %__b
3308  %1 = bitcast <2 x i64> %load to <2 x i64>
3309  %2 = icmp eq <2 x i64> %0, %1
3310  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3311  %4 = bitcast <32 x i1> %3 to i32
3312  ret i32 %4
3313}
3314
3315define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3316; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3317; VLX:       # %bb.0: # %entry
3318; VLX-NEXT:    kmovd %edi, %k1
3319; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3320; VLX-NEXT:    kmovd %k0, %eax
3321; VLX-NEXT:    retq
3322;
3323; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3324; NoVLX:       # %bb.0: # %entry
3325; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3326; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3327; NoVLX-NEXT:    kmovw %edi, %k1
3328; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3329; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3330; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3331; NoVLX-NEXT:    kmovw %k0, %eax
3332; NoVLX-NEXT:    vzeroupper
3333; NoVLX-NEXT:    retq
3334entry:
3335  %0 = bitcast <2 x i64> %__a to <2 x i64>
3336  %1 = bitcast <2 x i64> %__b to <2 x i64>
3337  %2 = icmp eq <2 x i64> %0, %1
3338  %3 = bitcast i8 %__u to <8 x i1>
3339  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3340  %4 = and <2 x i1> %2, %extract.i
3341  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3342  %6 = bitcast <32 x i1> %5 to i32
3343  ret i32 %6
3344}
3345
3346define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3347; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3348; VLX:       # %bb.0: # %entry
3349; VLX-NEXT:    kmovd %edi, %k1
3350; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3351; VLX-NEXT:    kmovd %k0, %eax
3352; VLX-NEXT:    retq
3353;
3354; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3355; NoVLX:       # %bb.0: # %entry
3356; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3357; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
3358; NoVLX-NEXT:    kmovw %edi, %k1
3359; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3360; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3361; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3362; NoVLX-NEXT:    kmovw %k0, %eax
3363; NoVLX-NEXT:    vzeroupper
3364; NoVLX-NEXT:    retq
3365entry:
3366  %0 = bitcast <2 x i64> %__a to <2 x i64>
3367  %load = load <2 x i64>, <2 x i64>* %__b
3368  %1 = bitcast <2 x i64> %load to <2 x i64>
3369  %2 = icmp eq <2 x i64> %0, %1
3370  %3 = bitcast i8 %__u to <8 x i1>
3371  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3372  %4 = and <2 x i1> %2, %extract.i
3373  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3374  %6 = bitcast <32 x i1> %5 to i32
3375  ret i32 %6
3376}
3377
3378
3379define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3380; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3381; VLX:       # %bb.0: # %entry
3382; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3383; VLX-NEXT:    kmovd %k0, %eax
3384; VLX-NEXT:    retq
3385;
3386; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3387; NoVLX:       # %bb.0: # %entry
3388; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3389; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
3390; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3391; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3392; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3393; NoVLX-NEXT:    kmovw %k0, %eax
3394; NoVLX-NEXT:    vzeroupper
3395; NoVLX-NEXT:    retq
3396entry:
3397  %0 = bitcast <2 x i64> %__a to <2 x i64>
3398  %load = load i64, i64* %__b
3399  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3400  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3401  %2 = icmp eq <2 x i64> %0, %1
3402  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3403  %4 = bitcast <32 x i1> %3 to i32
3404  ret i32 %4
3405}
3406
3407define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3408; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3409; VLX:       # %bb.0: # %entry
3410; VLX-NEXT:    kmovd %edi, %k1
3411; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3412; VLX-NEXT:    kmovd %k0, %eax
3413; VLX-NEXT:    retq
3414;
3415; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3416; NoVLX:       # %bb.0: # %entry
3417; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3418; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
3419; NoVLX-NEXT:    kmovw %edi, %k1
3420; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3421; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3422; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3423; NoVLX-NEXT:    kmovw %k0, %eax
3424; NoVLX-NEXT:    vzeroupper
3425; NoVLX-NEXT:    retq
3426entry:
3427  %0 = bitcast <2 x i64> %__a to <2 x i64>
3428  %load = load i64, i64* %__b
3429  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3430  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3431  %2 = icmp eq <2 x i64> %0, %1
3432  %3 = bitcast i8 %__u to <8 x i1>
3433  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3434  %4 = and <2 x i1> %extract.i, %2
3435  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3436  %6 = bitcast <32 x i1> %5 to i32
3437  ret i32 %6
3438}
3439
3440
3441define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3442; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3443; VLX:       # %bb.0: # %entry
3444; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
3445; VLX-NEXT:    kmovq %k0, %rax
3446; VLX-NEXT:    retq
3447;
3448; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3449; NoVLX:       # %bb.0: # %entry
3450; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3451; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3452; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3453; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3454; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3455; NoVLX-NEXT:    kmovw %k0, %eax
3456; NoVLX-NEXT:    movzwl %ax, %eax
3457; NoVLX-NEXT:    vzeroupper
3458; NoVLX-NEXT:    retq
3459entry:
3460  %0 = bitcast <2 x i64> %__a to <2 x i64>
3461  %1 = bitcast <2 x i64> %__b to <2 x i64>
3462  %2 = icmp eq <2 x i64> %0, %1
3463  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3464  %4 = bitcast <64 x i1> %3 to i64
3465  ret i64 %4
3466}
3467
3468define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3469; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3470; VLX:       # %bb.0: # %entry
3471; VLX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0
3472; VLX-NEXT:    kmovq %k0, %rax
3473; VLX-NEXT:    retq
3474;
3475; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3476; NoVLX:       # %bb.0: # %entry
3477; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3478; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
3479; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3480; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3481; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3482; NoVLX-NEXT:    kmovw %k0, %eax
3483; NoVLX-NEXT:    movzwl %ax, %eax
3484; NoVLX-NEXT:    vzeroupper
3485; NoVLX-NEXT:    retq
3486entry:
3487  %0 = bitcast <2 x i64> %__a to <2 x i64>
3488  %load = load <2 x i64>, <2 x i64>* %__b
3489  %1 = bitcast <2 x i64> %load to <2 x i64>
3490  %2 = icmp eq <2 x i64> %0, %1
3491  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3492  %4 = bitcast <64 x i1> %3 to i64
3493  ret i64 %4
3494}
3495
3496define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3497; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3498; VLX:       # %bb.0: # %entry
3499; VLX-NEXT:    kmovd %edi, %k1
3500; VLX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3501; VLX-NEXT:    kmovq %k0, %rax
3502; VLX-NEXT:    retq
3503;
3504; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3505; NoVLX:       # %bb.0: # %entry
3506; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
3507; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3508; NoVLX-NEXT:    kmovw %edi, %k1
3509; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3510; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3511; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3512; NoVLX-NEXT:    kmovw %k0, %eax
3513; NoVLX-NEXT:    movzwl %ax, %eax
3514; NoVLX-NEXT:    vzeroupper
3515; NoVLX-NEXT:    retq
3516entry:
3517  %0 = bitcast <2 x i64> %__a to <2 x i64>
3518  %1 = bitcast <2 x i64> %__b to <2 x i64>
3519  %2 = icmp eq <2 x i64> %0, %1
3520  %3 = bitcast i8 %__u to <8 x i1>
3521  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3522  %4 = and <2 x i1> %2, %extract.i
3523  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3524  %6 = bitcast <64 x i1> %5 to i64
3525  ret i64 %6
3526}
3527
3528define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3529; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3530; VLX:       # %bb.0: # %entry
3531; VLX-NEXT:    kmovd %edi, %k1
3532; VLX-NEXT:    vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3533; VLX-NEXT:    kmovq %k0, %rax
3534; VLX-NEXT:    retq
3535;
3536; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3537; NoVLX:       # %bb.0: # %entry
3538; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3539; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
3540; NoVLX-NEXT:    kmovw %edi, %k1
3541; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3542; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3543; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3544; NoVLX-NEXT:    kmovw %k0, %eax
3545; NoVLX-NEXT:    movzwl %ax, %eax
3546; NoVLX-NEXT:    vzeroupper
3547; NoVLX-NEXT:    retq
3548entry:
3549  %0 = bitcast <2 x i64> %__a to <2 x i64>
3550  %load = load <2 x i64>, <2 x i64>* %__b
3551  %1 = bitcast <2 x i64> %load to <2 x i64>
3552  %2 = icmp eq <2 x i64> %0, %1
3553  %3 = bitcast i8 %__u to <8 x i1>
3554  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3555  %4 = and <2 x i1> %2, %extract.i
3556  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3557  %6 = bitcast <64 x i1> %5 to i64
3558  ret i64 %6
3559}
3560
3561
3562define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3563; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3564; VLX:       # %bb.0: # %entry
3565; VLX-NEXT:    vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3566; VLX-NEXT:    kmovq %k0, %rax
3567; VLX-NEXT:    retq
3568;
3569; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3570; NoVLX:       # %bb.0: # %entry
3571; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3572; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
3573; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3574; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3575; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3576; NoVLX-NEXT:    kmovw %k0, %eax
3577; NoVLX-NEXT:    movzwl %ax, %eax
3578; NoVLX-NEXT:    vzeroupper
3579; NoVLX-NEXT:    retq
3580entry:
3581  %0 = bitcast <2 x i64> %__a to <2 x i64>
3582  %load = load i64, i64* %__b
3583  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3584  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3585  %2 = icmp eq <2 x i64> %0, %1
3586  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3587  %4 = bitcast <64 x i1> %3 to i64
3588  ret i64 %4
3589}
3590
3591define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3592; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3593; VLX:       # %bb.0: # %entry
3594; VLX-NEXT:    kmovd %edi, %k1
3595; VLX-NEXT:    vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3596; VLX-NEXT:    kmovq %k0, %rax
3597; VLX-NEXT:    retq
3598;
3599; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3600; NoVLX:       # %bb.0: # %entry
3601; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
3602; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
3603; NoVLX-NEXT:    kmovw %edi, %k1
3604; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3605; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
3606; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
3607; NoVLX-NEXT:    kmovw %k0, %eax
3608; NoVLX-NEXT:    movzwl %ax, %eax
3609; NoVLX-NEXT:    vzeroupper
3610; NoVLX-NEXT:    retq
3611entry:
3612  %0 = bitcast <2 x i64> %__a to <2 x i64>
3613  %load = load i64, i64* %__b
3614  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3615  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3616  %2 = icmp eq <2 x i64> %0, %1
3617  %3 = bitcast i8 %__u to <8 x i1>
3618  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3619  %4 = and <2 x i1> %extract.i, %2
3620  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3621  %6 = bitcast <64 x i1> %5 to i64
3622  ret i64 %6
3623}
3624
3625
3626define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3627; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3628; VLX:       # %bb.0: # %entry
3629; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
3630; VLX-NEXT:    kmovd %k0, %eax
3631; VLX-NEXT:    # kill: def $al killed $al killed $eax
3632; VLX-NEXT:    vzeroupper
3633; VLX-NEXT:    retq
3634;
3635; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3636; NoVLX:       # %bb.0: # %entry
3637; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3638; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3639; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3640; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3641; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3642; NoVLX-NEXT:    kmovw %k0, %eax
3643; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3644; NoVLX-NEXT:    vzeroupper
3645; NoVLX-NEXT:    retq
3646entry:
3647  %0 = bitcast <4 x i64> %__a to <4 x i64>
3648  %1 = bitcast <4 x i64> %__b to <4 x i64>
3649  %2 = icmp eq <4 x i64> %0, %1
3650  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3651  %4 = bitcast <8 x i1> %3 to i8
3652  ret i8 %4
3653}
3654
3655define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3656; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3657; VLX:       # %bb.0: # %entry
3658; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
3659; VLX-NEXT:    kmovd %k0, %eax
3660; VLX-NEXT:    # kill: def $al killed $al killed $eax
3661; VLX-NEXT:    vzeroupper
3662; VLX-NEXT:    retq
3663;
3664; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3665; NoVLX:       # %bb.0: # %entry
3666; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3667; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
3668; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3669; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3670; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3671; NoVLX-NEXT:    kmovw %k0, %eax
3672; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3673; NoVLX-NEXT:    vzeroupper
3674; NoVLX-NEXT:    retq
3675entry:
3676  %0 = bitcast <4 x i64> %__a to <4 x i64>
3677  %load = load <4 x i64>, <4 x i64>* %__b
3678  %1 = bitcast <4 x i64> %load to <4 x i64>
3679  %2 = icmp eq <4 x i64> %0, %1
3680  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3681  %4 = bitcast <8 x i1> %3 to i8
3682  ret i8 %4
3683}
3684
3685define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3686; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3687; VLX:       # %bb.0: # %entry
3688; VLX-NEXT:    kmovd %edi, %k1
3689; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3690; VLX-NEXT:    kmovd %k0, %eax
3691; VLX-NEXT:    # kill: def $al killed $al killed $eax
3692; VLX-NEXT:    vzeroupper
3693; VLX-NEXT:    retq
3694;
3695; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3696; NoVLX:       # %bb.0: # %entry
3697; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3698; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3699; NoVLX-NEXT:    kmovw %edi, %k1
3700; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3701; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3702; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3703; NoVLX-NEXT:    kmovw %k0, %eax
3704; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3705; NoVLX-NEXT:    vzeroupper
3706; NoVLX-NEXT:    retq
3707entry:
3708  %0 = bitcast <4 x i64> %__a to <4 x i64>
3709  %1 = bitcast <4 x i64> %__b to <4 x i64>
3710  %2 = icmp eq <4 x i64> %0, %1
3711  %3 = bitcast i8 %__u to <8 x i1>
3712  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3713  %4 = and <4 x i1> %2, %extract.i
3714  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3715  %6 = bitcast <8 x i1> %5 to i8
3716  ret i8 %6
3717}
3718
3719define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3720; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3721; VLX:       # %bb.0: # %entry
3722; VLX-NEXT:    kmovd %edi, %k1
3723; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3724; VLX-NEXT:    kmovd %k0, %eax
3725; VLX-NEXT:    # kill: def $al killed $al killed $eax
3726; VLX-NEXT:    vzeroupper
3727; VLX-NEXT:    retq
3728;
3729; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3730; NoVLX:       # %bb.0: # %entry
3731; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3732; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
3733; NoVLX-NEXT:    kmovw %edi, %k1
3734; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3735; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3736; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3737; NoVLX-NEXT:    kmovw %k0, %eax
3738; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3739; NoVLX-NEXT:    vzeroupper
3740; NoVLX-NEXT:    retq
3741entry:
3742  %0 = bitcast <4 x i64> %__a to <4 x i64>
3743  %load = load <4 x i64>, <4 x i64>* %__b
3744  %1 = bitcast <4 x i64> %load to <4 x i64>
3745  %2 = icmp eq <4 x i64> %0, %1
3746  %3 = bitcast i8 %__u to <8 x i1>
3747  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3748  %4 = and <4 x i1> %2, %extract.i
3749  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3750  %6 = bitcast <8 x i1> %5 to i8
3751  ret i8 %6
3752}
3753
3754
3755define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3756; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3757; VLX:       # %bb.0: # %entry
3758; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3759; VLX-NEXT:    kmovd %k0, %eax
3760; VLX-NEXT:    # kill: def $al killed $al killed $eax
3761; VLX-NEXT:    vzeroupper
3762; VLX-NEXT:    retq
3763;
3764; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3765; NoVLX:       # %bb.0: # %entry
3766; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3767; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
3768; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3769; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3770; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3771; NoVLX-NEXT:    kmovw %k0, %eax
3772; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3773; NoVLX-NEXT:    vzeroupper
3774; NoVLX-NEXT:    retq
3775entry:
3776  %0 = bitcast <4 x i64> %__a to <4 x i64>
3777  %load = load i64, i64* %__b
3778  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3779  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3780  %2 = icmp eq <4 x i64> %0, %1
3781  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3782  %4 = bitcast <8 x i1> %3 to i8
3783  ret i8 %4
3784}
3785
3786define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3787; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3788; VLX:       # %bb.0: # %entry
3789; VLX-NEXT:    kmovd %edi, %k1
3790; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3791; VLX-NEXT:    kmovd %k0, %eax
3792; VLX-NEXT:    # kill: def $al killed $al killed $eax
3793; VLX-NEXT:    vzeroupper
3794; VLX-NEXT:    retq
3795;
3796; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3797; NoVLX:       # %bb.0: # %entry
3798; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3799; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
3800; NoVLX-NEXT:    kmovw %edi, %k1
3801; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3802; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3803; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3804; NoVLX-NEXT:    kmovw %k0, %eax
3805; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
3806; NoVLX-NEXT:    vzeroupper
3807; NoVLX-NEXT:    retq
3808entry:
3809  %0 = bitcast <4 x i64> %__a to <4 x i64>
3810  %load = load i64, i64* %__b
3811  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3812  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3813  %2 = icmp eq <4 x i64> %0, %1
3814  %3 = bitcast i8 %__u to <8 x i1>
3815  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3816  %4 = and <4 x i1> %extract.i, %2
3817  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3818  %6 = bitcast <8 x i1> %5 to i8
3819  ret i8 %6
3820}
3821
3822
3823define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3824; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3825; VLX:       # %bb.0: # %entry
3826; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
3827; VLX-NEXT:    kmovd %k0, %eax
3828; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3829; VLX-NEXT:    vzeroupper
3830; VLX-NEXT:    retq
3831;
3832; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3833; NoVLX:       # %bb.0: # %entry
3834; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3835; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3836; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3837; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3838; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3839; NoVLX-NEXT:    kmovw %k0, %eax
3840; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3841; NoVLX-NEXT:    vzeroupper
3842; NoVLX-NEXT:    retq
3843entry:
3844  %0 = bitcast <4 x i64> %__a to <4 x i64>
3845  %1 = bitcast <4 x i64> %__b to <4 x i64>
3846  %2 = icmp eq <4 x i64> %0, %1
3847  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3848  %4 = bitcast <16 x i1> %3 to i16
3849  ret i16 %4
3850}
3851
3852define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3853; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3854; VLX:       # %bb.0: # %entry
3855; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
3856; VLX-NEXT:    kmovd %k0, %eax
3857; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3858; VLX-NEXT:    vzeroupper
3859; VLX-NEXT:    retq
3860;
3861; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3862; NoVLX:       # %bb.0: # %entry
3863; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3864; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
3865; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3866; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3867; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3868; NoVLX-NEXT:    kmovw %k0, %eax
3869; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3870; NoVLX-NEXT:    vzeroupper
3871; NoVLX-NEXT:    retq
3872entry:
3873  %0 = bitcast <4 x i64> %__a to <4 x i64>
3874  %load = load <4 x i64>, <4 x i64>* %__b
3875  %1 = bitcast <4 x i64> %load to <4 x i64>
3876  %2 = icmp eq <4 x i64> %0, %1
3877  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3878  %4 = bitcast <16 x i1> %3 to i16
3879  ret i16 %4
3880}
3881
3882define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3883; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3884; VLX:       # %bb.0: # %entry
3885; VLX-NEXT:    kmovd %edi, %k1
3886; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3887; VLX-NEXT:    kmovd %k0, %eax
3888; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3889; VLX-NEXT:    vzeroupper
3890; VLX-NEXT:    retq
3891;
3892; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3893; NoVLX:       # %bb.0: # %entry
3894; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
3895; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3896; NoVLX-NEXT:    kmovw %edi, %k1
3897; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3898; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3899; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3900; NoVLX-NEXT:    kmovw %k0, %eax
3901; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3902; NoVLX-NEXT:    vzeroupper
3903; NoVLX-NEXT:    retq
3904entry:
3905  %0 = bitcast <4 x i64> %__a to <4 x i64>
3906  %1 = bitcast <4 x i64> %__b to <4 x i64>
3907  %2 = icmp eq <4 x i64> %0, %1
3908  %3 = bitcast i8 %__u to <8 x i1>
3909  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3910  %4 = and <4 x i1> %2, %extract.i
3911  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3912  %6 = bitcast <16 x i1> %5 to i16
3913  ret i16 %6
3914}
3915
3916define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3917; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3918; VLX:       # %bb.0: # %entry
3919; VLX-NEXT:    kmovd %edi, %k1
3920; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3921; VLX-NEXT:    kmovd %k0, %eax
3922; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3923; VLX-NEXT:    vzeroupper
3924; VLX-NEXT:    retq
3925;
3926; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3927; NoVLX:       # %bb.0: # %entry
3928; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3929; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
3930; NoVLX-NEXT:    kmovw %edi, %k1
3931; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3932; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3933; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3934; NoVLX-NEXT:    kmovw %k0, %eax
3935; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3936; NoVLX-NEXT:    vzeroupper
3937; NoVLX-NEXT:    retq
3938entry:
3939  %0 = bitcast <4 x i64> %__a to <4 x i64>
3940  %load = load <4 x i64>, <4 x i64>* %__b
3941  %1 = bitcast <4 x i64> %load to <4 x i64>
3942  %2 = icmp eq <4 x i64> %0, %1
3943  %3 = bitcast i8 %__u to <8 x i1>
3944  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3945  %4 = and <4 x i1> %2, %extract.i
3946  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3947  %6 = bitcast <16 x i1> %5 to i16
3948  ret i16 %6
3949}
3950
3951
3952define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3953; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3954; VLX:       # %bb.0: # %entry
3955; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3956; VLX-NEXT:    kmovd %k0, %eax
3957; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3958; VLX-NEXT:    vzeroupper
3959; VLX-NEXT:    retq
3960;
3961; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3962; NoVLX:       # %bb.0: # %entry
3963; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3964; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
3965; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
3966; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
3967; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
3968; NoVLX-NEXT:    kmovw %k0, %eax
3969; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
3970; NoVLX-NEXT:    vzeroupper
3971; NoVLX-NEXT:    retq
3972entry:
3973  %0 = bitcast <4 x i64> %__a to <4 x i64>
3974  %load = load i64, i64* %__b
3975  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3976  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3977  %2 = icmp eq <4 x i64> %0, %1
3978  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3979  %4 = bitcast <16 x i1> %3 to i16
3980  ret i16 %4
3981}
3982
3983define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3984; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3985; VLX:       # %bb.0: # %entry
3986; VLX-NEXT:    kmovd %edi, %k1
3987; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3988; VLX-NEXT:    kmovd %k0, %eax
3989; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
3990; VLX-NEXT:    vzeroupper
3991; VLX-NEXT:    retq
3992;
3993; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3994; NoVLX:       # %bb.0: # %entry
3995; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
3996; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
3997; NoVLX-NEXT:    kmovw %edi, %k1
3998; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3999; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4000; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4001; NoVLX-NEXT:    kmovw %k0, %eax
4002; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4003; NoVLX-NEXT:    vzeroupper
4004; NoVLX-NEXT:    retq
4005entry:
4006  %0 = bitcast <4 x i64> %__a to <4 x i64>
4007  %load = load i64, i64* %__b
4008  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4009  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4010  %2 = icmp eq <4 x i64> %0, %1
4011  %3 = bitcast i8 %__u to <8 x i1>
4012  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4013  %4 = and <4 x i1> %extract.i, %2
4014  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4015  %6 = bitcast <16 x i1> %5 to i16
4016  ret i16 %6
4017}
4018
4019
4020define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4021; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
4022; VLX:       # %bb.0: # %entry
4023; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
4024; VLX-NEXT:    kmovd %k0, %eax
4025; VLX-NEXT:    vzeroupper
4026; VLX-NEXT:    retq
4027;
4028; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
4029; NoVLX:       # %bb.0: # %entry
4030; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4031; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4032; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4033; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4034; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4035; NoVLX-NEXT:    kmovw %k0, %eax
4036; NoVLX-NEXT:    vzeroupper
4037; NoVLX-NEXT:    retq
4038entry:
4039  %0 = bitcast <4 x i64> %__a to <4 x i64>
4040  %1 = bitcast <4 x i64> %__b to <4 x i64>
4041  %2 = icmp eq <4 x i64> %0, %1
4042  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4043  %4 = bitcast <32 x i1> %3 to i32
4044  ret i32 %4
4045}
4046
4047define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4048; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4049; VLX:       # %bb.0: # %entry
4050; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
4051; VLX-NEXT:    kmovd %k0, %eax
4052; VLX-NEXT:    vzeroupper
4053; VLX-NEXT:    retq
4054;
4055; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4056; NoVLX:       # %bb.0: # %entry
4057; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4058; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
4059; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4060; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4061; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4062; NoVLX-NEXT:    kmovw %k0, %eax
4063; NoVLX-NEXT:    vzeroupper
4064; NoVLX-NEXT:    retq
4065entry:
4066  %0 = bitcast <4 x i64> %__a to <4 x i64>
4067  %load = load <4 x i64>, <4 x i64>* %__b
4068  %1 = bitcast <4 x i64> %load to <4 x i64>
4069  %2 = icmp eq <4 x i64> %0, %1
4070  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4071  %4 = bitcast <32 x i1> %3 to i32
4072  ret i32 %4
4073}
4074
4075define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4076; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4077; VLX:       # %bb.0: # %entry
4078; VLX-NEXT:    kmovd %edi, %k1
4079; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4080; VLX-NEXT:    kmovd %k0, %eax
4081; VLX-NEXT:    vzeroupper
4082; VLX-NEXT:    retq
4083;
4084; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4085; NoVLX:       # %bb.0: # %entry
4086; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4087; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4088; NoVLX-NEXT:    kmovw %edi, %k1
4089; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4090; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4091; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4092; NoVLX-NEXT:    kmovw %k0, %eax
4093; NoVLX-NEXT:    vzeroupper
4094; NoVLX-NEXT:    retq
4095entry:
4096  %0 = bitcast <4 x i64> %__a to <4 x i64>
4097  %1 = bitcast <4 x i64> %__b to <4 x i64>
4098  %2 = icmp eq <4 x i64> %0, %1
4099  %3 = bitcast i8 %__u to <8 x i1>
4100  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4101  %4 = and <4 x i1> %2, %extract.i
4102  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4103  %6 = bitcast <32 x i1> %5 to i32
4104  ret i32 %6
4105}
4106
4107define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4108; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4109; VLX:       # %bb.0: # %entry
4110; VLX-NEXT:    kmovd %edi, %k1
4111; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4112; VLX-NEXT:    kmovd %k0, %eax
4113; VLX-NEXT:    vzeroupper
4114; VLX-NEXT:    retq
4115;
4116; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4117; NoVLX:       # %bb.0: # %entry
4118; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4119; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
4120; NoVLX-NEXT:    kmovw %edi, %k1
4121; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4122; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4123; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4124; NoVLX-NEXT:    kmovw %k0, %eax
4125; NoVLX-NEXT:    vzeroupper
4126; NoVLX-NEXT:    retq
4127entry:
4128  %0 = bitcast <4 x i64> %__a to <4 x i64>
4129  %load = load <4 x i64>, <4 x i64>* %__b
4130  %1 = bitcast <4 x i64> %load to <4 x i64>
4131  %2 = icmp eq <4 x i64> %0, %1
4132  %3 = bitcast i8 %__u to <8 x i1>
4133  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4134  %4 = and <4 x i1> %2, %extract.i
4135  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4136  %6 = bitcast <32 x i1> %5 to i32
4137  ret i32 %6
4138}
4139
4140
4141define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4142; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4143; VLX:       # %bb.0: # %entry
4144; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4145; VLX-NEXT:    kmovd %k0, %eax
4146; VLX-NEXT:    vzeroupper
4147; VLX-NEXT:    retq
4148;
4149; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4150; NoVLX:       # %bb.0: # %entry
4151; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4152; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
4153; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4154; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4155; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4156; NoVLX-NEXT:    kmovw %k0, %eax
4157; NoVLX-NEXT:    vzeroupper
4158; NoVLX-NEXT:    retq
4159entry:
4160  %0 = bitcast <4 x i64> %__a to <4 x i64>
4161  %load = load i64, i64* %__b
4162  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4163  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4164  %2 = icmp eq <4 x i64> %0, %1
4165  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4166  %4 = bitcast <32 x i1> %3 to i32
4167  ret i32 %4
4168}
4169
4170define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4171; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4172; VLX:       # %bb.0: # %entry
4173; VLX-NEXT:    kmovd %edi, %k1
4174; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4175; VLX-NEXT:    kmovd %k0, %eax
4176; VLX-NEXT:    vzeroupper
4177; VLX-NEXT:    retq
4178;
4179; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4180; NoVLX:       # %bb.0: # %entry
4181; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4182; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
4183; NoVLX-NEXT:    kmovw %edi, %k1
4184; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4185; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4186; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4187; NoVLX-NEXT:    kmovw %k0, %eax
4188; NoVLX-NEXT:    vzeroupper
4189; NoVLX-NEXT:    retq
4190entry:
4191  %0 = bitcast <4 x i64> %__a to <4 x i64>
4192  %load = load i64, i64* %__b
4193  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4194  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4195  %2 = icmp eq <4 x i64> %0, %1
4196  %3 = bitcast i8 %__u to <8 x i1>
4197  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4198  %4 = and <4 x i1> %extract.i, %2
4199  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4200  %6 = bitcast <32 x i1> %5 to i32
4201  ret i32 %6
4202}
4203
4204
4205define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4206; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4207; VLX:       # %bb.0: # %entry
4208; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
4209; VLX-NEXT:    kmovq %k0, %rax
4210; VLX-NEXT:    vzeroupper
4211; VLX-NEXT:    retq
4212;
4213; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4214; NoVLX:       # %bb.0: # %entry
4215; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4216; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4217; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4218; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4219; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4220; NoVLX-NEXT:    kmovw %k0, %eax
4221; NoVLX-NEXT:    movzwl %ax, %eax
4222; NoVLX-NEXT:    vzeroupper
4223; NoVLX-NEXT:    retq
4224entry:
4225  %0 = bitcast <4 x i64> %__a to <4 x i64>
4226  %1 = bitcast <4 x i64> %__b to <4 x i64>
4227  %2 = icmp eq <4 x i64> %0, %1
4228  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4229  %4 = bitcast <64 x i1> %3 to i64
4230  ret i64 %4
4231}
4232
4233define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4234; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4235; VLX:       # %bb.0: # %entry
4236; VLX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0
4237; VLX-NEXT:    kmovq %k0, %rax
4238; VLX-NEXT:    vzeroupper
4239; VLX-NEXT:    retq
4240;
4241; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4242; NoVLX:       # %bb.0: # %entry
4243; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4244; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
4245; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4246; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4247; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4248; NoVLX-NEXT:    kmovw %k0, %eax
4249; NoVLX-NEXT:    movzwl %ax, %eax
4250; NoVLX-NEXT:    vzeroupper
4251; NoVLX-NEXT:    retq
4252entry:
4253  %0 = bitcast <4 x i64> %__a to <4 x i64>
4254  %load = load <4 x i64>, <4 x i64>* %__b
4255  %1 = bitcast <4 x i64> %load to <4 x i64>
4256  %2 = icmp eq <4 x i64> %0, %1
4257  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4258  %4 = bitcast <64 x i1> %3 to i64
4259  ret i64 %4
4260}
4261
4262define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4263; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4264; VLX:       # %bb.0: # %entry
4265; VLX-NEXT:    kmovd %edi, %k1
4266; VLX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4267; VLX-NEXT:    kmovq %k0, %rax
4268; VLX-NEXT:    vzeroupper
4269; VLX-NEXT:    retq
4270;
4271; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4272; NoVLX:       # %bb.0: # %entry
4273; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
4274; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4275; NoVLX-NEXT:    kmovw %edi, %k1
4276; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4277; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4278; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4279; NoVLX-NEXT:    kmovw %k0, %eax
4280; NoVLX-NEXT:    movzwl %ax, %eax
4281; NoVLX-NEXT:    vzeroupper
4282; NoVLX-NEXT:    retq
4283entry:
4284  %0 = bitcast <4 x i64> %__a to <4 x i64>
4285  %1 = bitcast <4 x i64> %__b to <4 x i64>
4286  %2 = icmp eq <4 x i64> %0, %1
4287  %3 = bitcast i8 %__u to <8 x i1>
4288  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4289  %4 = and <4 x i1> %2, %extract.i
4290  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4291  %6 = bitcast <64 x i1> %5 to i64
4292  ret i64 %6
4293}
4294
4295define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4296; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4297; VLX:       # %bb.0: # %entry
4298; VLX-NEXT:    kmovd %edi, %k1
4299; VLX-NEXT:    vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4300; VLX-NEXT:    kmovq %k0, %rax
4301; VLX-NEXT:    vzeroupper
4302; VLX-NEXT:    retq
4303;
4304; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4305; NoVLX:       # %bb.0: # %entry
4306; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4307; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
4308; NoVLX-NEXT:    kmovw %edi, %k1
4309; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4310; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4311; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4312; NoVLX-NEXT:    kmovw %k0, %eax
4313; NoVLX-NEXT:    movzwl %ax, %eax
4314; NoVLX-NEXT:    vzeroupper
4315; NoVLX-NEXT:    retq
4316entry:
4317  %0 = bitcast <4 x i64> %__a to <4 x i64>
4318  %load = load <4 x i64>, <4 x i64>* %__b
4319  %1 = bitcast <4 x i64> %load to <4 x i64>
4320  %2 = icmp eq <4 x i64> %0, %1
4321  %3 = bitcast i8 %__u to <8 x i1>
4322  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4323  %4 = and <4 x i1> %2, %extract.i
4324  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4325  %6 = bitcast <64 x i1> %5 to i64
4326  ret i64 %6
4327}
4328
4329
4330define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4331; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4332; VLX:       # %bb.0: # %entry
4333; VLX-NEXT:    vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4334; VLX-NEXT:    kmovq %k0, %rax
4335; VLX-NEXT:    vzeroupper
4336; VLX-NEXT:    retq
4337;
4338; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4339; NoVLX:       # %bb.0: # %entry
4340; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4341; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
4342; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4343; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4344; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4345; NoVLX-NEXT:    kmovw %k0, %eax
4346; NoVLX-NEXT:    movzwl %ax, %eax
4347; NoVLX-NEXT:    vzeroupper
4348; NoVLX-NEXT:    retq
4349entry:
4350  %0 = bitcast <4 x i64> %__a to <4 x i64>
4351  %load = load i64, i64* %__b
4352  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4353  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4354  %2 = icmp eq <4 x i64> %0, %1
4355  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4356  %4 = bitcast <64 x i1> %3 to i64
4357  ret i64 %4
4358}
4359
4360define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4361; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4362; VLX:       # %bb.0: # %entry
4363; VLX-NEXT:    kmovd %edi, %k1
4364; VLX-NEXT:    vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4365; VLX-NEXT:    kmovq %k0, %rax
4366; VLX-NEXT:    vzeroupper
4367; VLX-NEXT:    retq
4368;
4369; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4370; NoVLX:       # %bb.0: # %entry
4371; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
4372; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
4373; NoVLX-NEXT:    kmovw %edi, %k1
4374; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4375; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
4376; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
4377; NoVLX-NEXT:    kmovw %k0, %eax
4378; NoVLX-NEXT:    movzwl %ax, %eax
4379; NoVLX-NEXT:    vzeroupper
4380; NoVLX-NEXT:    retq
4381entry:
4382  %0 = bitcast <4 x i64> %__a to <4 x i64>
4383  %load = load i64, i64* %__b
4384  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4385  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4386  %2 = icmp eq <4 x i64> %0, %1
4387  %3 = bitcast i8 %__u to <8 x i1>
4388  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4389  %4 = and <4 x i1> %extract.i, %2
4390  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4391  %6 = bitcast <64 x i1> %5 to i64
4392  ret i64 %6
4393}
4394
4395
4396define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4397; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4398; VLX:       # %bb.0: # %entry
4399; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4400; VLX-NEXT:    kmovd %k0, %eax
4401; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4402; VLX-NEXT:    vzeroupper
4403; VLX-NEXT:    retq
4404;
4405; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4406; NoVLX:       # %bb.0: # %entry
4407; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4408; NoVLX-NEXT:    kmovw %k0, %eax
4409; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4410; NoVLX-NEXT:    vzeroupper
4411; NoVLX-NEXT:    retq
4412entry:
4413  %0 = bitcast <8 x i64> %__a to <8 x i64>
4414  %1 = bitcast <8 x i64> %__b to <8 x i64>
4415  %2 = icmp eq <8 x i64> %0, %1
4416  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4417  %4 = bitcast <16 x i1> %3 to i16
4418  ret i16 %4
4419}
4420
4421define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4422; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4423; VLX:       # %bb.0: # %entry
4424; VLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4425; VLX-NEXT:    kmovd %k0, %eax
4426; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4427; VLX-NEXT:    vzeroupper
4428; VLX-NEXT:    retq
4429;
4430; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4431; NoVLX:       # %bb.0: # %entry
4432; NoVLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4433; NoVLX-NEXT:    kmovw %k0, %eax
4434; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4435; NoVLX-NEXT:    vzeroupper
4436; NoVLX-NEXT:    retq
4437entry:
4438  %0 = bitcast <8 x i64> %__a to <8 x i64>
4439  %load = load <8 x i64>, <8 x i64>* %__b
4440  %1 = bitcast <8 x i64> %load to <8 x i64>
4441  %2 = icmp eq <8 x i64> %0, %1
4442  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4443  %4 = bitcast <16 x i1> %3 to i16
4444  ret i16 %4
4445}
4446
4447define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4448; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4449; VLX:       # %bb.0: # %entry
4450; VLX-NEXT:    kmovd %edi, %k1
4451; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4452; VLX-NEXT:    kmovd %k0, %eax
4453; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4454; VLX-NEXT:    vzeroupper
4455; VLX-NEXT:    retq
4456;
4457; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4458; NoVLX:       # %bb.0: # %entry
4459; NoVLX-NEXT:    kmovw %edi, %k1
4460; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4461; NoVLX-NEXT:    kmovw %k0, %eax
4462; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4463; NoVLX-NEXT:    vzeroupper
4464; NoVLX-NEXT:    retq
4465entry:
4466  %0 = bitcast <8 x i64> %__a to <8 x i64>
4467  %1 = bitcast <8 x i64> %__b to <8 x i64>
4468  %2 = icmp eq <8 x i64> %0, %1
4469  %3 = bitcast i8 %__u to <8 x i1>
4470  %4 = and <8 x i1> %2, %3
4471  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4472  %6 = bitcast <16 x i1> %5 to i16
4473  ret i16 %6
4474}
4475
4476define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4477; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4478; VLX:       # %bb.0: # %entry
4479; VLX-NEXT:    kmovd %edi, %k1
4480; VLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4481; VLX-NEXT:    kmovd %k0, %eax
4482; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4483; VLX-NEXT:    vzeroupper
4484; VLX-NEXT:    retq
4485;
4486; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4487; NoVLX:       # %bb.0: # %entry
4488; NoVLX-NEXT:    kmovw %edi, %k1
4489; NoVLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4490; NoVLX-NEXT:    kmovw %k0, %eax
4491; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4492; NoVLX-NEXT:    vzeroupper
4493; NoVLX-NEXT:    retq
4494entry:
4495  %0 = bitcast <8 x i64> %__a to <8 x i64>
4496  %load = load <8 x i64>, <8 x i64>* %__b
4497  %1 = bitcast <8 x i64> %load to <8 x i64>
4498  %2 = icmp eq <8 x i64> %0, %1
4499  %3 = bitcast i8 %__u to <8 x i1>
4500  %4 = and <8 x i1> %2, %3
4501  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4502  %6 = bitcast <16 x i1> %5 to i16
4503  ret i16 %6
4504}
4505
4506
4507define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4508; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4509; VLX:       # %bb.0: # %entry
4510; VLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4511; VLX-NEXT:    kmovd %k0, %eax
4512; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4513; VLX-NEXT:    vzeroupper
4514; VLX-NEXT:    retq
4515;
4516; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4517; NoVLX:       # %bb.0: # %entry
4518; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4519; NoVLX-NEXT:    kmovw %k0, %eax
4520; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4521; NoVLX-NEXT:    vzeroupper
4522; NoVLX-NEXT:    retq
4523entry:
4524  %0 = bitcast <8 x i64> %__a to <8 x i64>
4525  %load = load i64, i64* %__b
4526  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4527  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4528  %2 = icmp eq <8 x i64> %0, %1
4529  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4530  %4 = bitcast <16 x i1> %3 to i16
4531  ret i16 %4
4532}
4533
4534define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4535; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4536; VLX:       # %bb.0: # %entry
4537; VLX-NEXT:    kmovd %edi, %k1
4538; VLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4539; VLX-NEXT:    kmovd %k0, %eax
4540; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
4541; VLX-NEXT:    vzeroupper
4542; VLX-NEXT:    retq
4543;
4544; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4545; NoVLX:       # %bb.0: # %entry
4546; NoVLX-NEXT:    kmovw %edi, %k1
4547; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4548; NoVLX-NEXT:    kmovw %k0, %eax
4549; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
4550; NoVLX-NEXT:    vzeroupper
4551; NoVLX-NEXT:    retq
4552entry:
4553  %0 = bitcast <8 x i64> %__a to <8 x i64>
4554  %load = load i64, i64* %__b
4555  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4556  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4557  %2 = icmp eq <8 x i64> %0, %1
4558  %3 = bitcast i8 %__u to <8 x i1>
4559  %4 = and <8 x i1> %3, %2
4560  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4561  %6 = bitcast <16 x i1> %5 to i16
4562  ret i16 %6
4563}
4564
4565
4566define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4567; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4568; VLX:       # %bb.0: # %entry
4569; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4570; VLX-NEXT:    kmovd %k0, %eax
4571; VLX-NEXT:    vzeroupper
4572; VLX-NEXT:    retq
4573;
4574; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4575; NoVLX:       # %bb.0: # %entry
4576; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4577; NoVLX-NEXT:    kmovw %k0, %eax
4578; NoVLX-NEXT:    vzeroupper
4579; NoVLX-NEXT:    retq
4580entry:
4581  %0 = bitcast <8 x i64> %__a to <8 x i64>
4582  %1 = bitcast <8 x i64> %__b to <8 x i64>
4583  %2 = icmp eq <8 x i64> %0, %1
4584  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4585  %4 = bitcast <32 x i1> %3 to i32
4586  ret i32 %4
4587}
4588
4589define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4590; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4591; VLX:       # %bb.0: # %entry
4592; VLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4593; VLX-NEXT:    kmovd %k0, %eax
4594; VLX-NEXT:    vzeroupper
4595; VLX-NEXT:    retq
4596;
4597; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4598; NoVLX:       # %bb.0: # %entry
4599; NoVLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4600; NoVLX-NEXT:    kmovw %k0, %eax
4601; NoVLX-NEXT:    vzeroupper
4602; NoVLX-NEXT:    retq
4603entry:
4604  %0 = bitcast <8 x i64> %__a to <8 x i64>
4605  %load = load <8 x i64>, <8 x i64>* %__b
4606  %1 = bitcast <8 x i64> %load to <8 x i64>
4607  %2 = icmp eq <8 x i64> %0, %1
4608  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4609  %4 = bitcast <32 x i1> %3 to i32
4610  ret i32 %4
4611}
4612
4613define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4614; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4615; VLX:       # %bb.0: # %entry
4616; VLX-NEXT:    kmovd %edi, %k1
4617; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4618; VLX-NEXT:    kmovd %k0, %eax
4619; VLX-NEXT:    vzeroupper
4620; VLX-NEXT:    retq
4621;
4622; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4623; NoVLX:       # %bb.0: # %entry
4624; NoVLX-NEXT:    kmovw %edi, %k1
4625; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4626; NoVLX-NEXT:    kmovw %k0, %eax
4627; NoVLX-NEXT:    vzeroupper
4628; NoVLX-NEXT:    retq
4629entry:
4630  %0 = bitcast <8 x i64> %__a to <8 x i64>
4631  %1 = bitcast <8 x i64> %__b to <8 x i64>
4632  %2 = icmp eq <8 x i64> %0, %1
4633  %3 = bitcast i8 %__u to <8 x i1>
4634  %4 = and <8 x i1> %2, %3
4635  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4636  %6 = bitcast <32 x i1> %5 to i32
4637  ret i32 %6
4638}
4639
4640define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4641; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4642; VLX:       # %bb.0: # %entry
4643; VLX-NEXT:    kmovd %edi, %k1
4644; VLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4645; VLX-NEXT:    kmovd %k0, %eax
4646; VLX-NEXT:    vzeroupper
4647; VLX-NEXT:    retq
4648;
4649; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4650; NoVLX:       # %bb.0: # %entry
4651; NoVLX-NEXT:    kmovw %edi, %k1
4652; NoVLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4653; NoVLX-NEXT:    kmovw %k0, %eax
4654; NoVLX-NEXT:    vzeroupper
4655; NoVLX-NEXT:    retq
4656entry:
4657  %0 = bitcast <8 x i64> %__a to <8 x i64>
4658  %load = load <8 x i64>, <8 x i64>* %__b
4659  %1 = bitcast <8 x i64> %load to <8 x i64>
4660  %2 = icmp eq <8 x i64> %0, %1
4661  %3 = bitcast i8 %__u to <8 x i1>
4662  %4 = and <8 x i1> %2, %3
4663  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4664  %6 = bitcast <32 x i1> %5 to i32
4665  ret i32 %6
4666}
4667
4668
4669define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4670; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4671; VLX:       # %bb.0: # %entry
4672; VLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4673; VLX-NEXT:    kmovd %k0, %eax
4674; VLX-NEXT:    vzeroupper
4675; VLX-NEXT:    retq
4676;
4677; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4678; NoVLX:       # %bb.0: # %entry
4679; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4680; NoVLX-NEXT:    kmovw %k0, %eax
4681; NoVLX-NEXT:    vzeroupper
4682; NoVLX-NEXT:    retq
4683entry:
4684  %0 = bitcast <8 x i64> %__a to <8 x i64>
4685  %load = load i64, i64* %__b
4686  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4687  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4688  %2 = icmp eq <8 x i64> %0, %1
4689  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4690  %4 = bitcast <32 x i1> %3 to i32
4691  ret i32 %4
4692}
4693
4694define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4695; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4696; VLX:       # %bb.0: # %entry
4697; VLX-NEXT:    kmovd %edi, %k1
4698; VLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4699; VLX-NEXT:    kmovd %k0, %eax
4700; VLX-NEXT:    vzeroupper
4701; VLX-NEXT:    retq
4702;
4703; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4704; NoVLX:       # %bb.0: # %entry
4705; NoVLX-NEXT:    kmovw %edi, %k1
4706; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4707; NoVLX-NEXT:    kmovw %k0, %eax
4708; NoVLX-NEXT:    vzeroupper
4709; NoVLX-NEXT:    retq
4710entry:
4711  %0 = bitcast <8 x i64> %__a to <8 x i64>
4712  %load = load i64, i64* %__b
4713  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4714  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4715  %2 = icmp eq <8 x i64> %0, %1
4716  %3 = bitcast i8 %__u to <8 x i1>
4717  %4 = and <8 x i1> %3, %2
4718  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4719  %6 = bitcast <32 x i1> %5 to i32
4720  ret i32 %6
4721}
4722
4723
4724define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4725; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4726; VLX:       # %bb.0: # %entry
4727; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4728; VLX-NEXT:    kmovq %k0, %rax
4729; VLX-NEXT:    vzeroupper
4730; VLX-NEXT:    retq
4731;
4732; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4733; NoVLX:       # %bb.0: # %entry
4734; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
4735; NoVLX-NEXT:    kmovw %k0, %eax
4736; NoVLX-NEXT:    movzwl %ax, %eax
4737; NoVLX-NEXT:    vzeroupper
4738; NoVLX-NEXT:    retq
4739entry:
4740  %0 = bitcast <8 x i64> %__a to <8 x i64>
4741  %1 = bitcast <8 x i64> %__b to <8 x i64>
4742  %2 = icmp eq <8 x i64> %0, %1
4743  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4744  %4 = bitcast <64 x i1> %3 to i64
4745  ret i64 %4
4746}
4747
4748define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4749; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4750; VLX:       # %bb.0: # %entry
4751; VLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4752; VLX-NEXT:    kmovq %k0, %rax
4753; VLX-NEXT:    vzeroupper
4754; VLX-NEXT:    retq
4755;
4756; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4757; NoVLX:       # %bb.0: # %entry
4758; NoVLX-NEXT:    vpcmpeqq (%rdi), %zmm0, %k0
4759; NoVLX-NEXT:    kmovw %k0, %eax
4760; NoVLX-NEXT:    movzwl %ax, %eax
4761; NoVLX-NEXT:    vzeroupper
4762; NoVLX-NEXT:    retq
4763entry:
4764  %0 = bitcast <8 x i64> %__a to <8 x i64>
4765  %load = load <8 x i64>, <8 x i64>* %__b
4766  %1 = bitcast <8 x i64> %load to <8 x i64>
4767  %2 = icmp eq <8 x i64> %0, %1
4768  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4769  %4 = bitcast <64 x i1> %3 to i64
4770  ret i64 %4
4771}
4772
4773define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4774; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4775; VLX:       # %bb.0: # %entry
4776; VLX-NEXT:    kmovd %edi, %k1
4777; VLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4778; VLX-NEXT:    kmovq %k0, %rax
4779; VLX-NEXT:    vzeroupper
4780; VLX-NEXT:    retq
4781;
4782; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4783; NoVLX:       # %bb.0: # %entry
4784; NoVLX-NEXT:    kmovw %edi, %k1
4785; NoVLX-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4786; NoVLX-NEXT:    kmovw %k0, %eax
4787; NoVLX-NEXT:    movzwl %ax, %eax
4788; NoVLX-NEXT:    vzeroupper
4789; NoVLX-NEXT:    retq
4790entry:
4791  %0 = bitcast <8 x i64> %__a to <8 x i64>
4792  %1 = bitcast <8 x i64> %__b to <8 x i64>
4793  %2 = icmp eq <8 x i64> %0, %1
4794  %3 = bitcast i8 %__u to <8 x i1>
4795  %4 = and <8 x i1> %2, %3
4796  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4797  %6 = bitcast <64 x i1> %5 to i64
4798  ret i64 %6
4799}
4800
4801define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4802; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4803; VLX:       # %bb.0: # %entry
4804; VLX-NEXT:    kmovd %edi, %k1
4805; VLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4806; VLX-NEXT:    kmovq %k0, %rax
4807; VLX-NEXT:    vzeroupper
4808; VLX-NEXT:    retq
4809;
4810; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4811; NoVLX:       # %bb.0: # %entry
4812; NoVLX-NEXT:    kmovw %edi, %k1
4813; NoVLX-NEXT:    vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4814; NoVLX-NEXT:    kmovw %k0, %eax
4815; NoVLX-NEXT:    movzwl %ax, %eax
4816; NoVLX-NEXT:    vzeroupper
4817; NoVLX-NEXT:    retq
4818entry:
4819  %0 = bitcast <8 x i64> %__a to <8 x i64>
4820  %load = load <8 x i64>, <8 x i64>* %__b
4821  %1 = bitcast <8 x i64> %load to <8 x i64>
4822  %2 = icmp eq <8 x i64> %0, %1
4823  %3 = bitcast i8 %__u to <8 x i1>
4824  %4 = and <8 x i1> %2, %3
4825  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4826  %6 = bitcast <64 x i1> %5 to i64
4827  ret i64 %6
4828}
4829
4830
4831define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4832; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4833; VLX:       # %bb.0: # %entry
4834; VLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4835; VLX-NEXT:    kmovq %k0, %rax
4836; VLX-NEXT:    vzeroupper
4837; VLX-NEXT:    retq
4838;
4839; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4840; NoVLX:       # %bb.0: # %entry
4841; NoVLX-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4842; NoVLX-NEXT:    kmovw %k0, %eax
4843; NoVLX-NEXT:    movzwl %ax, %eax
4844; NoVLX-NEXT:    vzeroupper
4845; NoVLX-NEXT:    retq
4846entry:
4847  %0 = bitcast <8 x i64> %__a to <8 x i64>
4848  %load = load i64, i64* %__b
4849  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4850  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4851  %2 = icmp eq <8 x i64> %0, %1
4852  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4853  %4 = bitcast <64 x i1> %3 to i64
4854  ret i64 %4
4855}
4856
4857define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4858; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4859; VLX:       # %bb.0: # %entry
4860; VLX-NEXT:    kmovd %edi, %k1
4861; VLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4862; VLX-NEXT:    kmovq %k0, %rax
4863; VLX-NEXT:    vzeroupper
4864; VLX-NEXT:    retq
4865;
4866; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4867; NoVLX:       # %bb.0: # %entry
4868; NoVLX-NEXT:    kmovw %edi, %k1
4869; NoVLX-NEXT:    vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4870; NoVLX-NEXT:    kmovw %k0, %eax
4871; NoVLX-NEXT:    movzwl %ax, %eax
4872; NoVLX-NEXT:    vzeroupper
4873; NoVLX-NEXT:    retq
4874entry:
4875  %0 = bitcast <8 x i64> %__a to <8 x i64>
4876  %load = load i64, i64* %__b
4877  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4878  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4879  %2 = icmp eq <8 x i64> %0, %1
4880  %3 = bitcast i8 %__u to <8 x i1>
4881  %4 = and <8 x i1> %3, %2
4882  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4883  %6 = bitcast <64 x i1> %5 to i64
4884  ret i64 %6
4885}
4886
4887
4888define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4889; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4890; VLX:       # %bb.0: # %entry
4891; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
4892; VLX-NEXT:    kmovd %k0, %eax
4893; VLX-NEXT:    retq
4894;
4895; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4896; NoVLX:       # %bb.0: # %entry
4897; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
4898; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4899; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4900; NoVLX-NEXT:    kmovw %k0, %eax
4901; NoVLX-NEXT:    vzeroupper
4902; NoVLX-NEXT:    retq
4903entry:
4904  %0 = bitcast <2 x i64> %__a to <16 x i8>
4905  %1 = bitcast <2 x i64> %__b to <16 x i8>
4906  %2 = icmp sgt <16 x i8> %0, %1
4907  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4908  %4 = bitcast <32 x i1> %3 to i32
4909  ret i32 %4
4910}
4911
4912define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4913; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4914; VLX:       # %bb.0: # %entry
4915; VLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %k0
4916; VLX-NEXT:    kmovd %k0, %eax
4917; VLX-NEXT:    retq
4918;
4919; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4920; NoVLX:       # %bb.0: # %entry
4921; NoVLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0
4922; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4923; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4924; NoVLX-NEXT:    kmovw %k0, %eax
4925; NoVLX-NEXT:    vzeroupper
4926; NoVLX-NEXT:    retq
4927entry:
4928  %0 = bitcast <2 x i64> %__a to <16 x i8>
4929  %load = load <2 x i64>, <2 x i64>* %__b
4930  %1 = bitcast <2 x i64> %load to <16 x i8>
4931  %2 = icmp sgt <16 x i8> %0, %1
4932  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4933  %4 = bitcast <32 x i1> %3 to i32
4934  ret i32 %4
4935}
4936
4937define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4938; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4939; VLX:       # %bb.0: # %entry
4940; VLX-NEXT:    kmovd %edi, %k1
4941; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4942; VLX-NEXT:    kmovd %k0, %eax
4943; VLX-NEXT:    retq
4944;
4945; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4946; NoVLX:       # %bb.0: # %entry
4947; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
4948; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4949; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4950; NoVLX-NEXT:    kmovw %k0, %eax
4951; NoVLX-NEXT:    andl %edi, %eax
4952; NoVLX-NEXT:    vzeroupper
4953; NoVLX-NEXT:    retq
4954entry:
4955  %0 = bitcast <2 x i64> %__a to <16 x i8>
4956  %1 = bitcast <2 x i64> %__b to <16 x i8>
4957  %2 = icmp sgt <16 x i8> %0, %1
4958  %3 = bitcast i16 %__u to <16 x i1>
4959  %4 = and <16 x i1> %2, %3
4960  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4961  %6 = bitcast <32 x i1> %5 to i32
4962  ret i32 %6
4963}
4964
4965define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4966; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4967; VLX:       # %bb.0: # %entry
4968; VLX-NEXT:    kmovd %edi, %k1
4969; VLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4970; VLX-NEXT:    kmovd %k0, %eax
4971; VLX-NEXT:    retq
4972;
4973; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4974; NoVLX:       # %bb.0: # %entry
4975; NoVLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
4976; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
4977; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
4978; NoVLX-NEXT:    kmovw %k0, %eax
4979; NoVLX-NEXT:    andl %edi, %eax
4980; NoVLX-NEXT:    vzeroupper
4981; NoVLX-NEXT:    retq
4982entry:
4983  %0 = bitcast <2 x i64> %__a to <16 x i8>
4984  %load = load <2 x i64>, <2 x i64>* %__b
4985  %1 = bitcast <2 x i64> %load to <16 x i8>
4986  %2 = icmp sgt <16 x i8> %0, %1
4987  %3 = bitcast i16 %__u to <16 x i1>
4988  %4 = and <16 x i1> %2, %3
4989  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4990  %6 = bitcast <32 x i1> %5 to i32
4991  ret i32 %6
4992}
4993
4994
4995define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4996; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4997; VLX:       # %bb.0: # %entry
4998; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
4999; VLX-NEXT:    kmovq %k0, %rax
5000; VLX-NEXT:    retq
5001;
5002; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
5003; NoVLX:       # %bb.0: # %entry
5004; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
5005; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5006; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5007; NoVLX-NEXT:    kmovw %k0, %eax
5008; NoVLX-NEXT:    movzwl %ax, %eax
5009; NoVLX-NEXT:    vzeroupper
5010; NoVLX-NEXT:    retq
5011entry:
5012  %0 = bitcast <2 x i64> %__a to <16 x i8>
5013  %1 = bitcast <2 x i64> %__b to <16 x i8>
5014  %2 = icmp sgt <16 x i8> %0, %1
5015  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5016  %4 = bitcast <64 x i1> %3 to i64
5017  ret i64 %4
5018}
5019
5020define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5021; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
5022; VLX:       # %bb.0: # %entry
5023; VLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %k0
5024; VLX-NEXT:    kmovq %k0, %rax
5025; VLX-NEXT:    retq
5026;
5027; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
5028; NoVLX:       # %bb.0: # %entry
5029; NoVLX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0
5030; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5031; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5032; NoVLX-NEXT:    kmovw %k0, %eax
5033; NoVLX-NEXT:    movzwl %ax, %eax
5034; NoVLX-NEXT:    vzeroupper
5035; NoVLX-NEXT:    retq
5036entry:
5037  %0 = bitcast <2 x i64> %__a to <16 x i8>
5038  %load = load <2 x i64>, <2 x i64>* %__b
5039  %1 = bitcast <2 x i64> %load to <16 x i8>
5040  %2 = icmp sgt <16 x i8> %0, %1
5041  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5042  %4 = bitcast <64 x i1> %3 to i64
5043  ret i64 %4
5044}
5045
5046define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5047; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
5048; VLX:       # %bb.0: # %entry
5049; VLX-NEXT:    kmovd %edi, %k1
5050; VLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
5051; VLX-NEXT:    kmovq %k0, %rax
5052; VLX-NEXT:    retq
5053;
5054; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
5055; NoVLX:       # %bb.0: # %entry
5056; NoVLX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
5057; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5058; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5059; NoVLX-NEXT:    kmovw %k0, %eax
5060; NoVLX-NEXT:    andl %edi, %eax
5061; NoVLX-NEXT:    vzeroupper
5062; NoVLX-NEXT:    retq
5063entry:
5064  %0 = bitcast <2 x i64> %__a to <16 x i8>
5065  %1 = bitcast <2 x i64> %__b to <16 x i8>
5066  %2 = icmp sgt <16 x i8> %0, %1
5067  %3 = bitcast i16 %__u to <16 x i1>
5068  %4 = and <16 x i1> %2, %3
5069  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5070  %6 = bitcast <64 x i1> %5 to i64
5071  ret i64 %6
5072}
5073
5074define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5075; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5076; VLX:       # %bb.0: # %entry
5077; VLX-NEXT:    kmovd %edi, %k1
5078; VLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
5079; VLX-NEXT:    kmovq %k0, %rax
5080; VLX-NEXT:    retq
5081;
5082; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5083; NoVLX:       # %bb.0: # %entry
5084; NoVLX-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
5085; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5086; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5087; NoVLX-NEXT:    kmovw %k0, %eax
5088; NoVLX-NEXT:    andl %edi, %eax
5089; NoVLX-NEXT:    vzeroupper
5090; NoVLX-NEXT:    retq
5091entry:
5092  %0 = bitcast <2 x i64> %__a to <16 x i8>
5093  %load = load <2 x i64>, <2 x i64>* %__b
5094  %1 = bitcast <2 x i64> %load to <16 x i8>
5095  %2 = icmp sgt <16 x i8> %0, %1
5096  %3 = bitcast i16 %__u to <16 x i1>
5097  %4 = and <16 x i1> %2, %3
5098  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5099  %6 = bitcast <64 x i1> %5 to i64
5100  ret i64 %6
5101}
5102
5103
5104define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5105; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5106; VLX:       # %bb.0: # %entry
5107; VLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0
5108; VLX-NEXT:    kmovq %k0, %rax
5109; VLX-NEXT:    vzeroupper
5110; VLX-NEXT:    retq
5111;
5112; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5113; NoVLX:       # %bb.0: # %entry
5114; NoVLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
5115; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5116; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5117; NoVLX-NEXT:    kmovw %k0, %ecx
5118; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5119; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5120; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5121; NoVLX-NEXT:    kmovw %k0, %eax
5122; NoVLX-NEXT:    shll $16, %eax
5123; NoVLX-NEXT:    orl %ecx, %eax
5124; NoVLX-NEXT:    vzeroupper
5125; NoVLX-NEXT:    retq
5126entry:
5127  %0 = bitcast <4 x i64> %__a to <32 x i8>
5128  %1 = bitcast <4 x i64> %__b to <32 x i8>
5129  %2 = icmp sgt <32 x i8> %0, %1
5130  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5131  %4 = bitcast <64 x i1> %3 to i64
5132  ret i64 %4
5133}
5134
5135define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5136; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5137; VLX:       # %bb.0: # %entry
5138; VLX-NEXT:    vpcmpgtb (%rdi), %ymm0, %k0
5139; VLX-NEXT:    kmovq %k0, %rax
5140; VLX-NEXT:    vzeroupper
5141; VLX-NEXT:    retq
5142;
5143; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5144; NoVLX:       # %bb.0: # %entry
5145; NoVLX-NEXT:    vpcmpgtb (%rdi), %ymm0, %ymm0
5146; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5147; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5148; NoVLX-NEXT:    kmovw %k0, %ecx
5149; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5150; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5151; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5152; NoVLX-NEXT:    kmovw %k0, %eax
5153; NoVLX-NEXT:    shll $16, %eax
5154; NoVLX-NEXT:    orl %ecx, %eax
5155; NoVLX-NEXT:    vzeroupper
5156; NoVLX-NEXT:    retq
5157entry:
5158  %0 = bitcast <4 x i64> %__a to <32 x i8>
5159  %load = load <4 x i64>, <4 x i64>* %__b
5160  %1 = bitcast <4 x i64> %load to <32 x i8>
5161  %2 = icmp sgt <32 x i8> %0, %1
5162  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5163  %4 = bitcast <64 x i1> %3 to i64
5164  ret i64 %4
5165}
5166
5167define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5168; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5169; VLX:       # %bb.0: # %entry
5170; VLX-NEXT:    kmovd %edi, %k1
5171; VLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
5172; VLX-NEXT:    kmovq %k0, %rax
5173; VLX-NEXT:    vzeroupper
5174; VLX-NEXT:    retq
5175;
5176; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5177; NoVLX:       # %bb.0: # %entry
5178; NoVLX-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
5179; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5180; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5181; NoVLX-NEXT:    kmovw %k0, %eax
5182; NoVLX-NEXT:    andl %edi, %eax
5183; NoVLX-NEXT:    shrl $16, %edi
5184; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5185; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5186; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5187; NoVLX-NEXT:    kmovw %k0, %ecx
5188; NoVLX-NEXT:    andl %edi, %ecx
5189; NoVLX-NEXT:    shll $16, %ecx
5190; NoVLX-NEXT:    movzwl %ax, %eax
5191; NoVLX-NEXT:    orl %ecx, %eax
5192; NoVLX-NEXT:    vzeroupper
5193; NoVLX-NEXT:    retq
5194entry:
5195  %0 = bitcast <4 x i64> %__a to <32 x i8>
5196  %1 = bitcast <4 x i64> %__b to <32 x i8>
5197  %2 = icmp sgt <32 x i8> %0, %1
5198  %3 = bitcast i32 %__u to <32 x i1>
5199  %4 = and <32 x i1> %2, %3
5200  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5201  %6 = bitcast <64 x i1> %5 to i64
5202  ret i64 %6
5203}
5204
5205define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5206; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5207; VLX:       # %bb.0: # %entry
5208; VLX-NEXT:    kmovd %edi, %k1
5209; VLX-NEXT:    vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
5210; VLX-NEXT:    kmovq %k0, %rax
5211; VLX-NEXT:    vzeroupper
5212; VLX-NEXT:    retq
5213;
5214; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5215; NoVLX:       # %bb.0: # %entry
5216; NoVLX-NEXT:    vpcmpgtb (%rsi), %ymm0, %ymm0
5217; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
5218; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5219; NoVLX-NEXT:    kmovw %k0, %eax
5220; NoVLX-NEXT:    andl %edi, %eax
5221; NoVLX-NEXT:    shrl $16, %edi
5222; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
5223; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
5224; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5225; NoVLX-NEXT:    kmovw %k0, %ecx
5226; NoVLX-NEXT:    andl %edi, %ecx
5227; NoVLX-NEXT:    shll $16, %ecx
5228; NoVLX-NEXT:    movzwl %ax, %eax
5229; NoVLX-NEXT:    orl %ecx, %eax
5230; NoVLX-NEXT:    vzeroupper
5231; NoVLX-NEXT:    retq
5232entry:
5233  %0 = bitcast <4 x i64> %__a to <32 x i8>
5234  %load = load <4 x i64>, <4 x i64>* %__b
5235  %1 = bitcast <4 x i64> %load to <32 x i8>
5236  %2 = icmp sgt <32 x i8> %0, %1
5237  %3 = bitcast i32 %__u to <32 x i1>
5238  %4 = and <32 x i1> %2, %3
5239  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5240  %6 = bitcast <64 x i1> %5 to i64
5241  ret i64 %6
5242}
5243
5244
5245define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5246; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5247; VLX:       # %bb.0: # %entry
5248; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
5249; VLX-NEXT:    kmovd %k0, %eax
5250; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5251; VLX-NEXT:    retq
5252;
5253; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5254; NoVLX:       # %bb.0: # %entry
5255; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5256; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5257; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5258; NoVLX-NEXT:    kmovw %k0, %eax
5259; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5260; NoVLX-NEXT:    vzeroupper
5261; NoVLX-NEXT:    retq
5262entry:
5263  %0 = bitcast <2 x i64> %__a to <8 x i16>
5264  %1 = bitcast <2 x i64> %__b to <8 x i16>
5265  %2 = icmp sgt <8 x i16> %0, %1
5266  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5267  %4 = bitcast <16 x i1> %3 to i16
5268  ret i16 %4
5269}
5270
5271define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5272; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5273; VLX:       # %bb.0: # %entry
5274; VLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k0
5275; VLX-NEXT:    kmovd %k0, %eax
5276; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5277; VLX-NEXT:    retq
5278;
5279; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5280; NoVLX:       # %bb.0: # %entry
5281; NoVLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0
5282; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5283; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5284; NoVLX-NEXT:    kmovw %k0, %eax
5285; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5286; NoVLX-NEXT:    vzeroupper
5287; NoVLX-NEXT:    retq
5288entry:
5289  %0 = bitcast <2 x i64> %__a to <8 x i16>
5290  %load = load <2 x i64>, <2 x i64>* %__b
5291  %1 = bitcast <2 x i64> %load to <8 x i16>
5292  %2 = icmp sgt <8 x i16> %0, %1
5293  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5294  %4 = bitcast <16 x i1> %3 to i16
5295  ret i16 %4
5296}
5297
5298define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5299; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5300; VLX:       # %bb.0: # %entry
5301; VLX-NEXT:    kmovd %edi, %k1
5302; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5303; VLX-NEXT:    kmovd %k0, %eax
5304; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5305; VLX-NEXT:    retq
5306;
5307; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5308; NoVLX:       # %bb.0: # %entry
5309; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5310; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5311; NoVLX-NEXT:    kmovw %edi, %k1
5312; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5313; NoVLX-NEXT:    kmovw %k0, %eax
5314; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5315; NoVLX-NEXT:    vzeroupper
5316; NoVLX-NEXT:    retq
5317entry:
5318  %0 = bitcast <2 x i64> %__a to <8 x i16>
5319  %1 = bitcast <2 x i64> %__b to <8 x i16>
5320  %2 = icmp sgt <8 x i16> %0, %1
5321  %3 = bitcast i8 %__u to <8 x i1>
5322  %4 = and <8 x i1> %2, %3
5323  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5324  %6 = bitcast <16 x i1> %5 to i16
5325  ret i16 %6
5326}
5327
5328define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5329; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5330; VLX:       # %bb.0: # %entry
5331; VLX-NEXT:    kmovd %edi, %k1
5332; VLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5333; VLX-NEXT:    kmovd %k0, %eax
5334; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
5335; VLX-NEXT:    retq
5336;
5337; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5338; NoVLX:       # %bb.0: # %entry
5339; NoVLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %xmm0
5340; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5341; NoVLX-NEXT:    kmovw %edi, %k1
5342; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5343; NoVLX-NEXT:    kmovw %k0, %eax
5344; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
5345; NoVLX-NEXT:    vzeroupper
5346; NoVLX-NEXT:    retq
5347entry:
5348  %0 = bitcast <2 x i64> %__a to <8 x i16>
5349  %load = load <2 x i64>, <2 x i64>* %__b
5350  %1 = bitcast <2 x i64> %load to <8 x i16>
5351  %2 = icmp sgt <8 x i16> %0, %1
5352  %3 = bitcast i8 %__u to <8 x i1>
5353  %4 = and <8 x i1> %2, %3
5354  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5355  %6 = bitcast <16 x i1> %5 to i16
5356  ret i16 %6
5357}
5358
5359
5360define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5361; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5362; VLX:       # %bb.0: # %entry
5363; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
5364; VLX-NEXT:    kmovd %k0, %eax
5365; VLX-NEXT:    retq
5366;
5367; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5368; NoVLX:       # %bb.0: # %entry
5369; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5370; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5371; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5372; NoVLX-NEXT:    kmovw %k0, %eax
5373; NoVLX-NEXT:    vzeroupper
5374; NoVLX-NEXT:    retq
5375entry:
5376  %0 = bitcast <2 x i64> %__a to <8 x i16>
5377  %1 = bitcast <2 x i64> %__b to <8 x i16>
5378  %2 = icmp sgt <8 x i16> %0, %1
5379  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5380  %4 = bitcast <32 x i1> %3 to i32
5381  ret i32 %4
5382}
5383
5384define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5385; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5386; VLX:       # %bb.0: # %entry
5387; VLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k0
5388; VLX-NEXT:    kmovd %k0, %eax
5389; VLX-NEXT:    retq
5390;
5391; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5392; NoVLX:       # %bb.0: # %entry
5393; NoVLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0
5394; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5395; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5396; NoVLX-NEXT:    kmovw %k0, %eax
5397; NoVLX-NEXT:    vzeroupper
5398; NoVLX-NEXT:    retq
5399entry:
5400  %0 = bitcast <2 x i64> %__a to <8 x i16>
5401  %load = load <2 x i64>, <2 x i64>* %__b
5402  %1 = bitcast <2 x i64> %load to <8 x i16>
5403  %2 = icmp sgt <8 x i16> %0, %1
5404  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5405  %4 = bitcast <32 x i1> %3 to i32
5406  ret i32 %4
5407}
5408
5409define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5410; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5411; VLX:       # %bb.0: # %entry
5412; VLX-NEXT:    kmovd %edi, %k1
5413; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5414; VLX-NEXT:    kmovd %k0, %eax
5415; VLX-NEXT:    retq
5416;
5417; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5418; NoVLX:       # %bb.0: # %entry
5419; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5420; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5421; NoVLX-NEXT:    kmovw %edi, %k1
5422; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5423; NoVLX-NEXT:    kmovw %k0, %eax
5424; NoVLX-NEXT:    vzeroupper
5425; NoVLX-NEXT:    retq
5426entry:
5427  %0 = bitcast <2 x i64> %__a to <8 x i16>
5428  %1 = bitcast <2 x i64> %__b to <8 x i16>
5429  %2 = icmp sgt <8 x i16> %0, %1
5430  %3 = bitcast i8 %__u to <8 x i1>
5431  %4 = and <8 x i1> %2, %3
5432  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5433  %6 = bitcast <32 x i1> %5 to i32
5434  ret i32 %6
5435}
5436
5437define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5438; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5439; VLX:       # %bb.0: # %entry
5440; VLX-NEXT:    kmovd %edi, %k1
5441; VLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5442; VLX-NEXT:    kmovd %k0, %eax
5443; VLX-NEXT:    retq
5444;
5445; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5446; NoVLX:       # %bb.0: # %entry
5447; NoVLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %xmm0
5448; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5449; NoVLX-NEXT:    kmovw %edi, %k1
5450; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5451; NoVLX-NEXT:    kmovw %k0, %eax
5452; NoVLX-NEXT:    vzeroupper
5453; NoVLX-NEXT:    retq
5454entry:
5455  %0 = bitcast <2 x i64> %__a to <8 x i16>
5456  %load = load <2 x i64>, <2 x i64>* %__b
5457  %1 = bitcast <2 x i64> %load to <8 x i16>
5458  %2 = icmp sgt <8 x i16> %0, %1
5459  %3 = bitcast i8 %__u to <8 x i1>
5460  %4 = and <8 x i1> %2, %3
5461  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5462  %6 = bitcast <32 x i1> %5 to i32
5463  ret i32 %6
5464}
5465
5466
5467define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5468; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5469; VLX:       # %bb.0: # %entry
5470; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
5471; VLX-NEXT:    kmovq %k0, %rax
5472; VLX-NEXT:    retq
5473;
5474; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5475; NoVLX:       # %bb.0: # %entry
5476; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5477; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5478; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5479; NoVLX-NEXT:    kmovw %k0, %eax
5480; NoVLX-NEXT:    movzwl %ax, %eax
5481; NoVLX-NEXT:    vzeroupper
5482; NoVLX-NEXT:    retq
5483entry:
5484  %0 = bitcast <2 x i64> %__a to <8 x i16>
5485  %1 = bitcast <2 x i64> %__b to <8 x i16>
5486  %2 = icmp sgt <8 x i16> %0, %1
5487  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5488  %4 = bitcast <64 x i1> %3 to i64
5489  ret i64 %4
5490}
5491
5492define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5493; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5494; VLX:       # %bb.0: # %entry
5495; VLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %k0
5496; VLX-NEXT:    kmovq %k0, %rax
5497; VLX-NEXT:    retq
5498;
5499; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5500; NoVLX:       # %bb.0: # %entry
5501; NoVLX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0
5502; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5503; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
5504; NoVLX-NEXT:    kmovw %k0, %eax
5505; NoVLX-NEXT:    movzwl %ax, %eax
5506; NoVLX-NEXT:    vzeroupper
5507; NoVLX-NEXT:    retq
5508entry:
5509  %0 = bitcast <2 x i64> %__a to <8 x i16>
5510  %load = load <2 x i64>, <2 x i64>* %__b
5511  %1 = bitcast <2 x i64> %load to <8 x i16>
5512  %2 = icmp sgt <8 x i16> %0, %1
5513  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5514  %4 = bitcast <64 x i1> %3 to i64
5515  ret i64 %4
5516}
5517
5518define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5519; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5520; VLX:       # %bb.0: # %entry
5521; VLX-NEXT:    kmovd %edi, %k1
5522; VLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5523; VLX-NEXT:    kmovq %k0, %rax
5524; VLX-NEXT:    retq
5525;
5526; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5527; NoVLX:       # %bb.0: # %entry
5528; NoVLX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
5529; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5530; NoVLX-NEXT:    kmovw %edi, %k1
5531; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5532; NoVLX-NEXT:    kmovw %k0, %eax
5533; NoVLX-NEXT:    movzwl %ax, %eax
5534; NoVLX-NEXT:    vzeroupper
5535; NoVLX-NEXT:    retq
5536entry:
5537  %0 = bitcast <2 x i64> %__a to <8 x i16>
5538  %1 = bitcast <2 x i64> %__b to <8 x i16>
5539  %2 = icmp sgt <8 x i16> %0, %1
5540  %3 = bitcast i8 %__u to <8 x i1>
5541  %4 = and <8 x i1> %2, %3
5542  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5543  %6 = bitcast <64 x i1> %5 to i64
5544  ret i64 %6
5545}
5546
5547define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5548; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5549; VLX:       # %bb.0: # %entry
5550; VLX-NEXT:    kmovd %edi, %k1
5551; VLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5552; VLX-NEXT:    kmovq %k0, %rax
5553; VLX-NEXT:    retq
5554;
5555; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5556; NoVLX:       # %bb.0: # %entry
5557; NoVLX-NEXT:    vpcmpgtw (%rsi), %xmm0, %xmm0
5558; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
5559; NoVLX-NEXT:    kmovw %edi, %k1
5560; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
5561; NoVLX-NEXT:    kmovw %k0, %eax
5562; NoVLX-NEXT:    movzwl %ax, %eax
5563; NoVLX-NEXT:    vzeroupper
5564; NoVLX-NEXT:    retq
5565entry:
5566  %0 = bitcast <2 x i64> %__a to <8 x i16>
5567  %load = load <2 x i64>, <2 x i64>* %__b
5568  %1 = bitcast <2 x i64> %load to <8 x i16>
5569  %2 = icmp sgt <8 x i16> %0, %1
5570  %3 = bitcast i8 %__u to <8 x i1>
5571  %4 = and <8 x i1> %2, %3
5572  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5573  %6 = bitcast <64 x i1> %5 to i64
5574  ret i64 %6
5575}
5576
5577
5578define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5579; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5580; VLX:       # %bb.0: # %entry
5581; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
5582; VLX-NEXT:    kmovd %k0, %eax
5583; VLX-NEXT:    vzeroupper
5584; VLX-NEXT:    retq
5585;
5586; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5587; NoVLX:       # %bb.0: # %entry
5588; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5589; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5590; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5591; NoVLX-NEXT:    kmovw %k0, %eax
5592; NoVLX-NEXT:    vzeroupper
5593; NoVLX-NEXT:    retq
5594entry:
5595  %0 = bitcast <4 x i64> %__a to <16 x i16>
5596  %1 = bitcast <4 x i64> %__b to <16 x i16>
5597  %2 = icmp sgt <16 x i16> %0, %1
5598  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5599  %4 = bitcast <32 x i1> %3 to i32
5600  ret i32 %4
5601}
5602
5603define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5604; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5605; VLX:       # %bb.0: # %entry
5606; VLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %k0
5607; VLX-NEXT:    kmovd %k0, %eax
5608; VLX-NEXT:    vzeroupper
5609; VLX-NEXT:    retq
5610;
5611; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5612; NoVLX:       # %bb.0: # %entry
5613; NoVLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0
5614; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5615; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5616; NoVLX-NEXT:    kmovw %k0, %eax
5617; NoVLX-NEXT:    vzeroupper
5618; NoVLX-NEXT:    retq
5619entry:
5620  %0 = bitcast <4 x i64> %__a to <16 x i16>
5621  %load = load <4 x i64>, <4 x i64>* %__b
5622  %1 = bitcast <4 x i64> %load to <16 x i16>
5623  %2 = icmp sgt <16 x i16> %0, %1
5624  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5625  %4 = bitcast <32 x i1> %3 to i32
5626  ret i32 %4
5627}
5628
5629define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5630; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5631; VLX:       # %bb.0: # %entry
5632; VLX-NEXT:    kmovd %edi, %k1
5633; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5634; VLX-NEXT:    kmovd %k0, %eax
5635; VLX-NEXT:    vzeroupper
5636; VLX-NEXT:    retq
5637;
5638; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5639; NoVLX:       # %bb.0: # %entry
5640; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5641; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5642; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5643; NoVLX-NEXT:    kmovw %k0, %eax
5644; NoVLX-NEXT:    andl %edi, %eax
5645; NoVLX-NEXT:    vzeroupper
5646; NoVLX-NEXT:    retq
5647entry:
5648  %0 = bitcast <4 x i64> %__a to <16 x i16>
5649  %1 = bitcast <4 x i64> %__b to <16 x i16>
5650  %2 = icmp sgt <16 x i16> %0, %1
5651  %3 = bitcast i16 %__u to <16 x i1>
5652  %4 = and <16 x i1> %2, %3
5653  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5654  %6 = bitcast <32 x i1> %5 to i32
5655  ret i32 %6
5656}
5657
5658define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5659; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5660; VLX:       # %bb.0: # %entry
5661; VLX-NEXT:    kmovd %edi, %k1
5662; VLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5663; VLX-NEXT:    kmovd %k0, %eax
5664; VLX-NEXT:    vzeroupper
5665; VLX-NEXT:    retq
5666;
5667; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5668; NoVLX:       # %bb.0: # %entry
5669; NoVLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %ymm0
5670; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5671; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5672; NoVLX-NEXT:    kmovw %k0, %eax
5673; NoVLX-NEXT:    andl %edi, %eax
5674; NoVLX-NEXT:    vzeroupper
5675; NoVLX-NEXT:    retq
5676entry:
5677  %0 = bitcast <4 x i64> %__a to <16 x i16>
5678  %load = load <4 x i64>, <4 x i64>* %__b
5679  %1 = bitcast <4 x i64> %load to <16 x i16>
5680  %2 = icmp sgt <16 x i16> %0, %1
5681  %3 = bitcast i16 %__u to <16 x i1>
5682  %4 = and <16 x i1> %2, %3
5683  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5684  %6 = bitcast <32 x i1> %5 to i32
5685  ret i32 %6
5686}
5687
5688
5689define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5690; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5691; VLX:       # %bb.0: # %entry
5692; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
5693; VLX-NEXT:    kmovq %k0, %rax
5694; VLX-NEXT:    vzeroupper
5695; VLX-NEXT:    retq
5696;
5697; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5698; NoVLX:       # %bb.0: # %entry
5699; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5700; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5701; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5702; NoVLX-NEXT:    kmovw %k0, %eax
5703; NoVLX-NEXT:    movzwl %ax, %eax
5704; NoVLX-NEXT:    vzeroupper
5705; NoVLX-NEXT:    retq
5706entry:
5707  %0 = bitcast <4 x i64> %__a to <16 x i16>
5708  %1 = bitcast <4 x i64> %__b to <16 x i16>
5709  %2 = icmp sgt <16 x i16> %0, %1
5710  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5711  %4 = bitcast <64 x i1> %3 to i64
5712  ret i64 %4
5713}
5714
5715define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5716; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5717; VLX:       # %bb.0: # %entry
5718; VLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %k0
5719; VLX-NEXT:    kmovq %k0, %rax
5720; VLX-NEXT:    vzeroupper
5721; VLX-NEXT:    retq
5722;
5723; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5724; NoVLX:       # %bb.0: # %entry
5725; NoVLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0
5726; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5727; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5728; NoVLX-NEXT:    kmovw %k0, %eax
5729; NoVLX-NEXT:    movzwl %ax, %eax
5730; NoVLX-NEXT:    vzeroupper
5731; NoVLX-NEXT:    retq
5732entry:
5733  %0 = bitcast <4 x i64> %__a to <16 x i16>
5734  %load = load <4 x i64>, <4 x i64>* %__b
5735  %1 = bitcast <4 x i64> %load to <16 x i16>
5736  %2 = icmp sgt <16 x i16> %0, %1
5737  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5738  %4 = bitcast <64 x i1> %3 to i64
5739  ret i64 %4
5740}
5741
5742define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5743; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5744; VLX:       # %bb.0: # %entry
5745; VLX-NEXT:    kmovd %edi, %k1
5746; VLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5747; VLX-NEXT:    kmovq %k0, %rax
5748; VLX-NEXT:    vzeroupper
5749; VLX-NEXT:    retq
5750;
5751; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5752; NoVLX:       # %bb.0: # %entry
5753; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5754; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5755; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5756; NoVLX-NEXT:    kmovw %k0, %eax
5757; NoVLX-NEXT:    andl %edi, %eax
5758; NoVLX-NEXT:    vzeroupper
5759; NoVLX-NEXT:    retq
5760entry:
5761  %0 = bitcast <4 x i64> %__a to <16 x i16>
5762  %1 = bitcast <4 x i64> %__b to <16 x i16>
5763  %2 = icmp sgt <16 x i16> %0, %1
5764  %3 = bitcast i16 %__u to <16 x i1>
5765  %4 = and <16 x i1> %2, %3
5766  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5767  %6 = bitcast <64 x i1> %5 to i64
5768  ret i64 %6
5769}
5770
5771define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5772; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5773; VLX:       # %bb.0: # %entry
5774; VLX-NEXT:    kmovd %edi, %k1
5775; VLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5776; VLX-NEXT:    kmovq %k0, %rax
5777; VLX-NEXT:    vzeroupper
5778; VLX-NEXT:    retq
5779;
5780; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5781; NoVLX:       # %bb.0: # %entry
5782; NoVLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %ymm0
5783; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5784; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5785; NoVLX-NEXT:    kmovw %k0, %eax
5786; NoVLX-NEXT:    andl %edi, %eax
5787; NoVLX-NEXT:    vzeroupper
5788; NoVLX-NEXT:    retq
5789entry:
5790  %0 = bitcast <4 x i64> %__a to <16 x i16>
5791  %load = load <4 x i64>, <4 x i64>* %__b
5792  %1 = bitcast <4 x i64> %load to <16 x i16>
5793  %2 = icmp sgt <16 x i16> %0, %1
5794  %3 = bitcast i16 %__u to <16 x i1>
5795  %4 = and <16 x i1> %2, %3
5796  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5797  %6 = bitcast <64 x i1> %5 to i64
5798  ret i64 %6
5799}
5800
5801
5802define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5803; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5804; VLX:       # %bb.0: # %entry
5805; VLX-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
5806; VLX-NEXT:    kmovq %k0, %rax
5807; VLX-NEXT:    vzeroupper
5808; VLX-NEXT:    retq
5809;
5810; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5811; NoVLX:       # %bb.0: # %entry
5812; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
5813; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
5814; NoVLX-NEXT:    vpcmpgtw %ymm3, %ymm2, %ymm2
5815; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5816; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5817; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5818; NoVLX-NEXT:    kmovw %k0, %ecx
5819; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm0
5820; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5821; NoVLX-NEXT:    kmovw %k0, %eax
5822; NoVLX-NEXT:    shll $16, %eax
5823; NoVLX-NEXT:    orl %ecx, %eax
5824; NoVLX-NEXT:    vzeroupper
5825; NoVLX-NEXT:    retq
5826entry:
5827  %0 = bitcast <8 x i64> %__a to <32 x i16>
5828  %1 = bitcast <8 x i64> %__b to <32 x i16>
5829  %2 = icmp sgt <32 x i16> %0, %1
5830  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5831  %4 = bitcast <64 x i1> %3 to i64
5832  ret i64 %4
5833}
5834
5835define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5836; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5837; VLX:       # %bb.0: # %entry
5838; VLX-NEXT:    vpcmpgtw (%rdi), %zmm0, %k0
5839; VLX-NEXT:    kmovq %k0, %rax
5840; VLX-NEXT:    vzeroupper
5841; VLX-NEXT:    retq
5842;
5843; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5844; NoVLX:       # %bb.0: # %entry
5845; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
5846; NoVLX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0
5847; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5848; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5849; NoVLX-NEXT:    kmovw %k0, %ecx
5850; NoVLX-NEXT:    vpcmpgtw 32(%rdi), %ymm1, %ymm0
5851; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5852; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5853; NoVLX-NEXT:    kmovw %k0, %eax
5854; NoVLX-NEXT:    shll $16, %eax
5855; NoVLX-NEXT:    orl %ecx, %eax
5856; NoVLX-NEXT:    vzeroupper
5857; NoVLX-NEXT:    retq
5858entry:
5859  %0 = bitcast <8 x i64> %__a to <32 x i16>
5860  %load = load <8 x i64>, <8 x i64>* %__b
5861  %1 = bitcast <8 x i64> %load to <32 x i16>
5862  %2 = icmp sgt <32 x i16> %0, %1
5863  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5864  %4 = bitcast <64 x i1> %3 to i64
5865  ret i64 %4
5866}
5867
5868define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5869; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5870; VLX:       # %bb.0: # %entry
5871; VLX-NEXT:    kmovd %edi, %k1
5872; VLX-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
5873; VLX-NEXT:    kmovq %k0, %rax
5874; VLX-NEXT:    vzeroupper
5875; VLX-NEXT:    retq
5876;
5877; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5878; NoVLX:       # %bb.0: # %entry
5879; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm2
5880; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
5881; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
5882; NoVLX-NEXT:    kmovw %k0, %eax
5883; NoVLX-NEXT:    andl %edi, %eax
5884; NoVLX-NEXT:    shrl $16, %edi
5885; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
5886; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
5887; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
5888; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5889; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5890; NoVLX-NEXT:    kmovw %k0, %ecx
5891; NoVLX-NEXT:    andl %edi, %ecx
5892; NoVLX-NEXT:    shll $16, %ecx
5893; NoVLX-NEXT:    movzwl %ax, %eax
5894; NoVLX-NEXT:    orl %ecx, %eax
5895; NoVLX-NEXT:    vzeroupper
5896; NoVLX-NEXT:    retq
5897entry:
5898  %0 = bitcast <8 x i64> %__a to <32 x i16>
5899  %1 = bitcast <8 x i64> %__b to <32 x i16>
5900  %2 = icmp sgt <32 x i16> %0, %1
5901  %3 = bitcast i32 %__u to <32 x i1>
5902  %4 = and <32 x i1> %2, %3
5903  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5904  %6 = bitcast <64 x i1> %5 to i64
5905  ret i64 %6
5906}
5907
5908define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5909; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5910; VLX:       # %bb.0: # %entry
5911; VLX-NEXT:    kmovd %edi, %k1
5912; VLX-NEXT:    vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
5913; VLX-NEXT:    kmovq %k0, %rax
5914; VLX-NEXT:    vzeroupper
5915; VLX-NEXT:    retq
5916;
5917; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5918; NoVLX:       # %bb.0: # %entry
5919; NoVLX-NEXT:    vpcmpgtw (%rsi), %ymm0, %ymm1
5920; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
5921; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
5922; NoVLX-NEXT:    kmovw %k0, %eax
5923; NoVLX-NEXT:    andl %edi, %eax
5924; NoVLX-NEXT:    shrl $16, %edi
5925; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
5926; NoVLX-NEXT:    vpcmpgtw 32(%rsi), %ymm0, %ymm0
5927; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
5928; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
5929; NoVLX-NEXT:    kmovw %k0, %ecx
5930; NoVLX-NEXT:    andl %edi, %ecx
5931; NoVLX-NEXT:    shll $16, %ecx
5932; NoVLX-NEXT:    movzwl %ax, %eax
5933; NoVLX-NEXT:    orl %ecx, %eax
5934; NoVLX-NEXT:    vzeroupper
5935; NoVLX-NEXT:    retq
5936entry:
5937  %0 = bitcast <8 x i64> %__a to <32 x i16>
5938  %load = load <8 x i64>, <8 x i64>* %__b
5939  %1 = bitcast <8 x i64> %load to <32 x i16>
5940  %2 = icmp sgt <32 x i16> %0, %1
5941  %3 = bitcast i32 %__u to <32 x i1>
5942  %4 = and <32 x i1> %2, %3
5943  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5944  %6 = bitcast <64 x i1> %5 to i64
5945  ret i64 %6
5946}
5947
5948
5949define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5950; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5951; VLX:       # %bb.0: # %entry
5952; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
5953; VLX-NEXT:    kmovd %k0, %eax
5954; VLX-NEXT:    # kill: def $al killed $al killed $eax
5955; VLX-NEXT:    retq
5956;
5957; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5958; NoVLX:       # %bb.0: # %entry
5959; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
5960; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5961; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
5962; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
5963; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
5964; NoVLX-NEXT:    kmovw %k0, %eax
5965; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
5966; NoVLX-NEXT:    vzeroupper
5967; NoVLX-NEXT:    retq
5968entry:
5969  %0 = bitcast <2 x i64> %__a to <4 x i32>
5970  %1 = bitcast <2 x i64> %__b to <4 x i32>
5971  %2 = icmp sgt <4 x i32> %0, %1
5972  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5973  %4 = bitcast <8 x i1> %3 to i8
5974  ret i8 %4
5975}
5976
5977define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5978; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5979; VLX:       # %bb.0: # %entry
5980; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
5981; VLX-NEXT:    kmovd %k0, %eax
5982; VLX-NEXT:    # kill: def $al killed $al killed $eax
5983; VLX-NEXT:    retq
5984;
5985; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5986; NoVLX:       # %bb.0: # %entry
5987; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
5988; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
5989; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
5990; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
5991; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
5992; NoVLX-NEXT:    kmovw %k0, %eax
5993; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
5994; NoVLX-NEXT:    vzeroupper
5995; NoVLX-NEXT:    retq
5996entry:
5997  %0 = bitcast <2 x i64> %__a to <4 x i32>
5998  %load = load <2 x i64>, <2 x i64>* %__b
5999  %1 = bitcast <2 x i64> %load to <4 x i32>
6000  %2 = icmp sgt <4 x i32> %0, %1
6001  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6002  %4 = bitcast <8 x i1> %3 to i8
6003  ret i8 %4
6004}
6005
6006define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6007; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
6008; VLX:       # %bb.0: # %entry
6009; VLX-NEXT:    kmovd %edi, %k1
6010; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6011; VLX-NEXT:    kmovd %k0, %eax
6012; VLX-NEXT:    # kill: def $al killed $al killed $eax
6013; VLX-NEXT:    retq
6014;
6015; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
6016; NoVLX:       # %bb.0: # %entry
6017; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6018; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6019; NoVLX-NEXT:    kmovw %edi, %k1
6020; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6021; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6022; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6023; NoVLX-NEXT:    kmovw %k0, %eax
6024; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
6025; NoVLX-NEXT:    vzeroupper
6026; NoVLX-NEXT:    retq
6027entry:
6028  %0 = bitcast <2 x i64> %__a to <4 x i32>
6029  %1 = bitcast <2 x i64> %__b to <4 x i32>
6030  %2 = icmp sgt <4 x i32> %0, %1
6031  %3 = bitcast i8 %__u to <8 x i1>
6032  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6033  %4 = and <4 x i1> %2, %extract.i
6034  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6035  %6 = bitcast <8 x i1> %5 to i8
6036  ret i8 %6
6037}
6038
6039define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6040; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
6041; VLX:       # %bb.0: # %entry
6042; VLX-NEXT:    kmovd %edi, %k1
6043; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6044; VLX-NEXT:    kmovd %k0, %eax
6045; VLX-NEXT:    # kill: def $al killed $al killed $eax
6046; VLX-NEXT:    retq
6047;
6048; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
6049; NoVLX:       # %bb.0: # %entry
6050; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6051; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6052; NoVLX-NEXT:    kmovw %edi, %k1
6053; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6054; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6055; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6056; NoVLX-NEXT:    kmovw %k0, %eax
6057; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
6058; NoVLX-NEXT:    vzeroupper
6059; NoVLX-NEXT:    retq
6060entry:
6061  %0 = bitcast <2 x i64> %__a to <4 x i32>
6062  %load = load <2 x i64>, <2 x i64>* %__b
6063  %1 = bitcast <2 x i64> %load to <4 x i32>
6064  %2 = icmp sgt <4 x i32> %0, %1
6065  %3 = bitcast i8 %__u to <8 x i1>
6066  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6067  %4 = and <4 x i1> %2, %extract.i
6068  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6069  %6 = bitcast <8 x i1> %5 to i8
6070  ret i8 %6
6071}
6072
6073
6074define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6075; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6076; VLX:       # %bb.0: # %entry
6077; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6078; VLX-NEXT:    kmovd %k0, %eax
6079; VLX-NEXT:    # kill: def $al killed $al killed $eax
6080; VLX-NEXT:    retq
6081;
6082; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6083; NoVLX:       # %bb.0: # %entry
6084; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6085; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
6086; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6087; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6088; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6089; NoVLX-NEXT:    kmovw %k0, %eax
6090; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
6091; NoVLX-NEXT:    vzeroupper
6092; NoVLX-NEXT:    retq
6093entry:
6094  %0 = bitcast <2 x i64> %__a to <4 x i32>
6095  %load = load i32, i32* %__b
6096  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6097  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6098  %2 = icmp sgt <4 x i32> %0, %1
6099  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6100  %4 = bitcast <8 x i1> %3 to i8
6101  ret i8 %4
6102}
6103
6104define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6105; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6106; VLX:       # %bb.0: # %entry
6107; VLX-NEXT:    kmovd %edi, %k1
6108; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6109; VLX-NEXT:    kmovd %k0, %eax
6110; VLX-NEXT:    # kill: def $al killed $al killed $eax
6111; VLX-NEXT:    retq
6112;
6113; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6114; NoVLX:       # %bb.0: # %entry
6115; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6116; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
6117; NoVLX-NEXT:    kmovw %edi, %k1
6118; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6119; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6120; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6121; NoVLX-NEXT:    kmovw %k0, %eax
6122; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
6123; NoVLX-NEXT:    vzeroupper
6124; NoVLX-NEXT:    retq
6125entry:
6126  %0 = bitcast <2 x i64> %__a to <4 x i32>
6127  %load = load i32, i32* %__b
6128  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6129  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6130  %2 = icmp sgt <4 x i32> %0, %1
6131  %3 = bitcast i8 %__u to <8 x i1>
6132  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6133  %4 = and <4 x i1> %extract.i, %2
6134  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6135  %6 = bitcast <8 x i1> %5 to i8
6136  ret i8 %6
6137}
6138
6139
6140define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6141; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6142; VLX:       # %bb.0: # %entry
6143; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
6144; VLX-NEXT:    kmovd %k0, %eax
6145; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6146; VLX-NEXT:    retq
6147;
6148; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6149; NoVLX:       # %bb.0: # %entry
6150; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6151; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6152; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6153; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6154; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6155; NoVLX-NEXT:    kmovw %k0, %eax
6156; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6157; NoVLX-NEXT:    vzeroupper
6158; NoVLX-NEXT:    retq
6159entry:
6160  %0 = bitcast <2 x i64> %__a to <4 x i32>
6161  %1 = bitcast <2 x i64> %__b to <4 x i32>
6162  %2 = icmp sgt <4 x i32> %0, %1
6163  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6164  %4 = bitcast <16 x i1> %3 to i16
6165  ret i16 %4
6166}
6167
6168define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6169; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6170; VLX:       # %bb.0: # %entry
6171; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
6172; VLX-NEXT:    kmovd %k0, %eax
6173; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6174; VLX-NEXT:    retq
6175;
6176; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6177; NoVLX:       # %bb.0: # %entry
6178; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6179; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
6180; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6181; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6182; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6183; NoVLX-NEXT:    kmovw %k0, %eax
6184; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6185; NoVLX-NEXT:    vzeroupper
6186; NoVLX-NEXT:    retq
6187entry:
6188  %0 = bitcast <2 x i64> %__a to <4 x i32>
6189  %load = load <2 x i64>, <2 x i64>* %__b
6190  %1 = bitcast <2 x i64> %load to <4 x i32>
6191  %2 = icmp sgt <4 x i32> %0, %1
6192  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6193  %4 = bitcast <16 x i1> %3 to i16
6194  ret i16 %4
6195}
6196
6197define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6198; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6199; VLX:       # %bb.0: # %entry
6200; VLX-NEXT:    kmovd %edi, %k1
6201; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6202; VLX-NEXT:    kmovd %k0, %eax
6203; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6204; VLX-NEXT:    retq
6205;
6206; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6207; NoVLX:       # %bb.0: # %entry
6208; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6209; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6210; NoVLX-NEXT:    kmovw %edi, %k1
6211; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6212; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6213; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6214; NoVLX-NEXT:    kmovw %k0, %eax
6215; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6216; NoVLX-NEXT:    vzeroupper
6217; NoVLX-NEXT:    retq
6218entry:
6219  %0 = bitcast <2 x i64> %__a to <4 x i32>
6220  %1 = bitcast <2 x i64> %__b to <4 x i32>
6221  %2 = icmp sgt <4 x i32> %0, %1
6222  %3 = bitcast i8 %__u to <8 x i1>
6223  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6224  %4 = and <4 x i1> %2, %extract.i
6225  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6226  %6 = bitcast <16 x i1> %5 to i16
6227  ret i16 %6
6228}
6229
6230define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6231; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6232; VLX:       # %bb.0: # %entry
6233; VLX-NEXT:    kmovd %edi, %k1
6234; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6235; VLX-NEXT:    kmovd %k0, %eax
6236; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6237; VLX-NEXT:    retq
6238;
6239; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6240; NoVLX:       # %bb.0: # %entry
6241; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6242; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6243; NoVLX-NEXT:    kmovw %edi, %k1
6244; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6245; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6246; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6247; NoVLX-NEXT:    kmovw %k0, %eax
6248; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6249; NoVLX-NEXT:    vzeroupper
6250; NoVLX-NEXT:    retq
6251entry:
6252  %0 = bitcast <2 x i64> %__a to <4 x i32>
6253  %load = load <2 x i64>, <2 x i64>* %__b
6254  %1 = bitcast <2 x i64> %load to <4 x i32>
6255  %2 = icmp sgt <4 x i32> %0, %1
6256  %3 = bitcast i8 %__u to <8 x i1>
6257  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6258  %4 = and <4 x i1> %2, %extract.i
6259  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6260  %6 = bitcast <16 x i1> %5 to i16
6261  ret i16 %6
6262}
6263
6264
6265define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6266; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6267; VLX:       # %bb.0: # %entry
6268; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6269; VLX-NEXT:    kmovd %k0, %eax
6270; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6271; VLX-NEXT:    retq
6272;
6273; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6274; NoVLX:       # %bb.0: # %entry
6275; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6276; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
6277; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6278; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6279; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6280; NoVLX-NEXT:    kmovw %k0, %eax
6281; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6282; NoVLX-NEXT:    vzeroupper
6283; NoVLX-NEXT:    retq
6284entry:
6285  %0 = bitcast <2 x i64> %__a to <4 x i32>
6286  %load = load i32, i32* %__b
6287  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6288  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6289  %2 = icmp sgt <4 x i32> %0, %1
6290  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6291  %4 = bitcast <16 x i1> %3 to i16
6292  ret i16 %4
6293}
6294
6295define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6296; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6297; VLX:       # %bb.0: # %entry
6298; VLX-NEXT:    kmovd %edi, %k1
6299; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6300; VLX-NEXT:    kmovd %k0, %eax
6301; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6302; VLX-NEXT:    retq
6303;
6304; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6305; NoVLX:       # %bb.0: # %entry
6306; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6307; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
6308; NoVLX-NEXT:    kmovw %edi, %k1
6309; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6310; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6311; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6312; NoVLX-NEXT:    kmovw %k0, %eax
6313; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6314; NoVLX-NEXT:    vzeroupper
6315; NoVLX-NEXT:    retq
6316entry:
6317  %0 = bitcast <2 x i64> %__a to <4 x i32>
6318  %load = load i32, i32* %__b
6319  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6320  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6321  %2 = icmp sgt <4 x i32> %0, %1
6322  %3 = bitcast i8 %__u to <8 x i1>
6323  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6324  %4 = and <4 x i1> %extract.i, %2
6325  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6326  %6 = bitcast <16 x i1> %5 to i16
6327  ret i16 %6
6328}
6329
6330
6331define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6332; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6333; VLX:       # %bb.0: # %entry
6334; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
6335; VLX-NEXT:    kmovd %k0, %eax
6336; VLX-NEXT:    retq
6337;
6338; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6339; NoVLX:       # %bb.0: # %entry
6340; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6341; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6342; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6343; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6344; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6345; NoVLX-NEXT:    kmovw %k0, %eax
6346; NoVLX-NEXT:    vzeroupper
6347; NoVLX-NEXT:    retq
6348entry:
6349  %0 = bitcast <2 x i64> %__a to <4 x i32>
6350  %1 = bitcast <2 x i64> %__b to <4 x i32>
6351  %2 = icmp sgt <4 x i32> %0, %1
6352  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6353  %4 = bitcast <32 x i1> %3 to i32
6354  ret i32 %4
6355}
6356
6357define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6358; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6359; VLX:       # %bb.0: # %entry
6360; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
6361; VLX-NEXT:    kmovd %k0, %eax
6362; VLX-NEXT:    retq
6363;
6364; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6365; NoVLX:       # %bb.0: # %entry
6366; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6367; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
6368; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6369; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6370; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6371; NoVLX-NEXT:    kmovw %k0, %eax
6372; NoVLX-NEXT:    vzeroupper
6373; NoVLX-NEXT:    retq
6374entry:
6375  %0 = bitcast <2 x i64> %__a to <4 x i32>
6376  %load = load <2 x i64>, <2 x i64>* %__b
6377  %1 = bitcast <2 x i64> %load to <4 x i32>
6378  %2 = icmp sgt <4 x i32> %0, %1
6379  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6380  %4 = bitcast <32 x i1> %3 to i32
6381  ret i32 %4
6382}
6383
6384define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6385; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6386; VLX:       # %bb.0: # %entry
6387; VLX-NEXT:    kmovd %edi, %k1
6388; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6389; VLX-NEXT:    kmovd %k0, %eax
6390; VLX-NEXT:    retq
6391;
6392; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6393; NoVLX:       # %bb.0: # %entry
6394; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6395; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6396; NoVLX-NEXT:    kmovw %edi, %k1
6397; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6398; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6399; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6400; NoVLX-NEXT:    kmovw %k0, %eax
6401; NoVLX-NEXT:    vzeroupper
6402; NoVLX-NEXT:    retq
6403entry:
6404  %0 = bitcast <2 x i64> %__a to <4 x i32>
6405  %1 = bitcast <2 x i64> %__b to <4 x i32>
6406  %2 = icmp sgt <4 x i32> %0, %1
6407  %3 = bitcast i8 %__u to <8 x i1>
6408  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6409  %4 = and <4 x i1> %2, %extract.i
6410  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6411  %6 = bitcast <32 x i1> %5 to i32
6412  ret i32 %6
6413}
6414
6415define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6416; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6417; VLX:       # %bb.0: # %entry
6418; VLX-NEXT:    kmovd %edi, %k1
6419; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6420; VLX-NEXT:    kmovd %k0, %eax
6421; VLX-NEXT:    retq
6422;
6423; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6424; NoVLX:       # %bb.0: # %entry
6425; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6426; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6427; NoVLX-NEXT:    kmovw %edi, %k1
6428; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6429; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6430; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6431; NoVLX-NEXT:    kmovw %k0, %eax
6432; NoVLX-NEXT:    vzeroupper
6433; NoVLX-NEXT:    retq
6434entry:
6435  %0 = bitcast <2 x i64> %__a to <4 x i32>
6436  %load = load <2 x i64>, <2 x i64>* %__b
6437  %1 = bitcast <2 x i64> %load to <4 x i32>
6438  %2 = icmp sgt <4 x i32> %0, %1
6439  %3 = bitcast i8 %__u to <8 x i1>
6440  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6441  %4 = and <4 x i1> %2, %extract.i
6442  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6443  %6 = bitcast <32 x i1> %5 to i32
6444  ret i32 %6
6445}
6446
6447
6448define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6449; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6450; VLX:       # %bb.0: # %entry
6451; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6452; VLX-NEXT:    kmovd %k0, %eax
6453; VLX-NEXT:    retq
6454;
6455; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6456; NoVLX:       # %bb.0: # %entry
6457; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6458; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
6459; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6460; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6461; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6462; NoVLX-NEXT:    kmovw %k0, %eax
6463; NoVLX-NEXT:    vzeroupper
6464; NoVLX-NEXT:    retq
6465entry:
6466  %0 = bitcast <2 x i64> %__a to <4 x i32>
6467  %load = load i32, i32* %__b
6468  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6469  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6470  %2 = icmp sgt <4 x i32> %0, %1
6471  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6472  %4 = bitcast <32 x i1> %3 to i32
6473  ret i32 %4
6474}
6475
6476define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6477; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6478; VLX:       # %bb.0: # %entry
6479; VLX-NEXT:    kmovd %edi, %k1
6480; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6481; VLX-NEXT:    kmovd %k0, %eax
6482; VLX-NEXT:    retq
6483;
6484; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6485; NoVLX:       # %bb.0: # %entry
6486; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6487; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
6488; NoVLX-NEXT:    kmovw %edi, %k1
6489; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6490; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6491; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6492; NoVLX-NEXT:    kmovw %k0, %eax
6493; NoVLX-NEXT:    vzeroupper
6494; NoVLX-NEXT:    retq
6495entry:
6496  %0 = bitcast <2 x i64> %__a to <4 x i32>
6497  %load = load i32, i32* %__b
6498  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6499  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6500  %2 = icmp sgt <4 x i32> %0, %1
6501  %3 = bitcast i8 %__u to <8 x i1>
6502  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6503  %4 = and <4 x i1> %extract.i, %2
6504  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6505  %6 = bitcast <32 x i1> %5 to i32
6506  ret i32 %6
6507}
6508
6509
6510define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6511; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6512; VLX:       # %bb.0: # %entry
6513; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
6514; VLX-NEXT:    kmovq %k0, %rax
6515; VLX-NEXT:    retq
6516;
6517; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6518; NoVLX:       # %bb.0: # %entry
6519; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6520; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6521; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6522; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6523; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6524; NoVLX-NEXT:    kmovw %k0, %eax
6525; NoVLX-NEXT:    movzwl %ax, %eax
6526; NoVLX-NEXT:    vzeroupper
6527; NoVLX-NEXT:    retq
6528entry:
6529  %0 = bitcast <2 x i64> %__a to <4 x i32>
6530  %1 = bitcast <2 x i64> %__b to <4 x i32>
6531  %2 = icmp sgt <4 x i32> %0, %1
6532  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6533  %4 = bitcast <64 x i1> %3 to i64
6534  ret i64 %4
6535}
6536
6537define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6538; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6539; VLX:       # %bb.0: # %entry
6540; VLX-NEXT:    vpcmpgtd (%rdi), %xmm0, %k0
6541; VLX-NEXT:    kmovq %k0, %rax
6542; VLX-NEXT:    retq
6543;
6544; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6545; NoVLX:       # %bb.0: # %entry
6546; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6547; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
6548; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6549; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6550; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6551; NoVLX-NEXT:    kmovw %k0, %eax
6552; NoVLX-NEXT:    movzwl %ax, %eax
6553; NoVLX-NEXT:    vzeroupper
6554; NoVLX-NEXT:    retq
6555entry:
6556  %0 = bitcast <2 x i64> %__a to <4 x i32>
6557  %load = load <2 x i64>, <2 x i64>* %__b
6558  %1 = bitcast <2 x i64> %load to <4 x i32>
6559  %2 = icmp sgt <4 x i32> %0, %1
6560  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6561  %4 = bitcast <64 x i1> %3 to i64
6562  ret i64 %4
6563}
6564
6565define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6566; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6567; VLX:       # %bb.0: # %entry
6568; VLX-NEXT:    kmovd %edi, %k1
6569; VLX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6570; VLX-NEXT:    kmovq %k0, %rax
6571; VLX-NEXT:    retq
6572;
6573; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6574; NoVLX:       # %bb.0: # %entry
6575; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
6576; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6577; NoVLX-NEXT:    kmovw %edi, %k1
6578; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6579; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6580; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6581; NoVLX-NEXT:    kmovw %k0, %eax
6582; NoVLX-NEXT:    movzwl %ax, %eax
6583; NoVLX-NEXT:    vzeroupper
6584; NoVLX-NEXT:    retq
6585entry:
6586  %0 = bitcast <2 x i64> %__a to <4 x i32>
6587  %1 = bitcast <2 x i64> %__b to <4 x i32>
6588  %2 = icmp sgt <4 x i32> %0, %1
6589  %3 = bitcast i8 %__u to <8 x i1>
6590  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6591  %4 = and <4 x i1> %2, %extract.i
6592  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6593  %6 = bitcast <64 x i1> %5 to i64
6594  ret i64 %6
6595}
6596
6597define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6598; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6599; VLX:       # %bb.0: # %entry
6600; VLX-NEXT:    kmovd %edi, %k1
6601; VLX-NEXT:    vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6602; VLX-NEXT:    kmovq %k0, %rax
6603; VLX-NEXT:    retq
6604;
6605; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6606; NoVLX:       # %bb.0: # %entry
6607; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6608; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
6609; NoVLX-NEXT:    kmovw %edi, %k1
6610; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6611; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6612; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6613; NoVLX-NEXT:    kmovw %k0, %eax
6614; NoVLX-NEXT:    movzwl %ax, %eax
6615; NoVLX-NEXT:    vzeroupper
6616; NoVLX-NEXT:    retq
6617entry:
6618  %0 = bitcast <2 x i64> %__a to <4 x i32>
6619  %load = load <2 x i64>, <2 x i64>* %__b
6620  %1 = bitcast <2 x i64> %load to <4 x i32>
6621  %2 = icmp sgt <4 x i32> %0, %1
6622  %3 = bitcast i8 %__u to <8 x i1>
6623  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6624  %4 = and <4 x i1> %2, %extract.i
6625  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6626  %6 = bitcast <64 x i1> %5 to i64
6627  ret i64 %6
6628}
6629
6630
6631define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6632; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6633; VLX:       # %bb.0: # %entry
6634; VLX-NEXT:    vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6635; VLX-NEXT:    kmovq %k0, %rax
6636; VLX-NEXT:    retq
6637;
6638; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6639; NoVLX:       # %bb.0: # %entry
6640; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6641; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
6642; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6643; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6644; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6645; NoVLX-NEXT:    kmovw %k0, %eax
6646; NoVLX-NEXT:    movzwl %ax, %eax
6647; NoVLX-NEXT:    vzeroupper
6648; NoVLX-NEXT:    retq
6649entry:
6650  %0 = bitcast <2 x i64> %__a to <4 x i32>
6651  %load = load i32, i32* %__b
6652  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6653  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6654  %2 = icmp sgt <4 x i32> %0, %1
6655  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6656  %4 = bitcast <64 x i1> %3 to i64
6657  ret i64 %4
6658}
6659
6660define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6661; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6662; VLX:       # %bb.0: # %entry
6663; VLX-NEXT:    kmovd %edi, %k1
6664; VLX-NEXT:    vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6665; VLX-NEXT:    kmovq %k0, %rax
6666; VLX-NEXT:    retq
6667;
6668; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6669; NoVLX:       # %bb.0: # %entry
6670; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
6671; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
6672; NoVLX-NEXT:    kmovw %edi, %k1
6673; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6674; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
6675; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
6676; NoVLX-NEXT:    kmovw %k0, %eax
6677; NoVLX-NEXT:    movzwl %ax, %eax
6678; NoVLX-NEXT:    vzeroupper
6679; NoVLX-NEXT:    retq
6680entry:
6681  %0 = bitcast <2 x i64> %__a to <4 x i32>
6682  %load = load i32, i32* %__b
6683  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6684  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6685  %2 = icmp sgt <4 x i32> %0, %1
6686  %3 = bitcast i8 %__u to <8 x i1>
6687  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6688  %4 = and <4 x i1> %extract.i, %2
6689  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6690  %6 = bitcast <64 x i1> %5 to i64
6691  ret i64 %6
6692}
6693
6694
6695define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6696; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6697; VLX:       # %bb.0: # %entry
6698; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
6699; VLX-NEXT:    kmovd %k0, %eax
6700; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6701; VLX-NEXT:    vzeroupper
6702; VLX-NEXT:    retq
6703;
6704; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6705; NoVLX:       # %bb.0: # %entry
6706; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6707; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6708; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6709; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6710; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6711; NoVLX-NEXT:    kmovw %k0, %eax
6712; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6713; NoVLX-NEXT:    vzeroupper
6714; NoVLX-NEXT:    retq
6715entry:
6716  %0 = bitcast <4 x i64> %__a to <8 x i32>
6717  %1 = bitcast <4 x i64> %__b to <8 x i32>
6718  %2 = icmp sgt <8 x i32> %0, %1
6719  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6720  %4 = bitcast <16 x i1> %3 to i16
6721  ret i16 %4
6722}
6723
6724define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6725; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6726; VLX:       # %bb.0: # %entry
6727; VLX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0
6728; VLX-NEXT:    kmovd %k0, %eax
6729; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6730; VLX-NEXT:    vzeroupper
6731; VLX-NEXT:    retq
6732;
6733; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6734; NoVLX:       # %bb.0: # %entry
6735; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6736; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
6737; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6738; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6739; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6740; NoVLX-NEXT:    kmovw %k0, %eax
6741; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6742; NoVLX-NEXT:    vzeroupper
6743; NoVLX-NEXT:    retq
6744entry:
6745  %0 = bitcast <4 x i64> %__a to <8 x i32>
6746  %load = load <4 x i64>, <4 x i64>* %__b
6747  %1 = bitcast <4 x i64> %load to <8 x i32>
6748  %2 = icmp sgt <8 x i32> %0, %1
6749  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6750  %4 = bitcast <16 x i1> %3 to i16
6751  ret i16 %4
6752}
6753
6754define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6755; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6756; VLX:       # %bb.0: # %entry
6757; VLX-NEXT:    kmovd %edi, %k1
6758; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6759; VLX-NEXT:    kmovd %k0, %eax
6760; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6761; VLX-NEXT:    vzeroupper
6762; VLX-NEXT:    retq
6763;
6764; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6765; NoVLX:       # %bb.0: # %entry
6766; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6767; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6768; NoVLX-NEXT:    kmovw %edi, %k1
6769; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6770; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6771; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6772; NoVLX-NEXT:    kmovw %k0, %eax
6773; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6774; NoVLX-NEXT:    vzeroupper
6775; NoVLX-NEXT:    retq
6776entry:
6777  %0 = bitcast <4 x i64> %__a to <8 x i32>
6778  %1 = bitcast <4 x i64> %__b to <8 x i32>
6779  %2 = icmp sgt <8 x i32> %0, %1
6780  %3 = bitcast i8 %__u to <8 x i1>
6781  %4 = and <8 x i1> %2, %3
6782  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6783  %6 = bitcast <16 x i1> %5 to i16
6784  ret i16 %6
6785}
6786
6787define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6788; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6789; VLX:       # %bb.0: # %entry
6790; VLX-NEXT:    kmovd %edi, %k1
6791; VLX-NEXT:    vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6792; VLX-NEXT:    kmovd %k0, %eax
6793; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6794; VLX-NEXT:    vzeroupper
6795; VLX-NEXT:    retq
6796;
6797; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6798; NoVLX:       # %bb.0: # %entry
6799; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6800; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
6801; NoVLX-NEXT:    kmovw %edi, %k1
6802; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6803; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6804; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6805; NoVLX-NEXT:    kmovw %k0, %eax
6806; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6807; NoVLX-NEXT:    vzeroupper
6808; NoVLX-NEXT:    retq
6809entry:
6810  %0 = bitcast <4 x i64> %__a to <8 x i32>
6811  %load = load <4 x i64>, <4 x i64>* %__b
6812  %1 = bitcast <4 x i64> %load to <8 x i32>
6813  %2 = icmp sgt <8 x i32> %0, %1
6814  %3 = bitcast i8 %__u to <8 x i1>
6815  %4 = and <8 x i1> %2, %3
6816  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6817  %6 = bitcast <16 x i1> %5 to i16
6818  ret i16 %6
6819}
6820
6821
6822define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
6823; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6824; VLX:       # %bb.0: # %entry
6825; VLX-NEXT:    vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6826; VLX-NEXT:    kmovd %k0, %eax
6827; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6828; VLX-NEXT:    vzeroupper
6829; VLX-NEXT:    retq
6830;
6831; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6832; NoVLX:       # %bb.0: # %entry
6833; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6834; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
6835; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6836; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6837; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6838; NoVLX-NEXT:    kmovw %k0, %eax
6839; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6840; NoVLX-NEXT:    vzeroupper
6841; NoVLX-NEXT:    retq
6842entry:
6843  %0 = bitcast <4 x i64> %__a to <8 x i32>
6844  %load = load i32, i32* %__b
6845  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6846  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6847  %2 = icmp sgt <8 x i32> %0, %1
6848  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6849  %4 = bitcast <16 x i1> %3 to i16
6850  ret i16 %4
6851}
6852
6853define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
6854; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6855; VLX:       # %bb.0: # %entry
6856; VLX-NEXT:    kmovd %edi, %k1
6857; VLX-NEXT:    vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6858; VLX-NEXT:    kmovd %k0, %eax
6859; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
6860; VLX-NEXT:    vzeroupper
6861; VLX-NEXT:    retq
6862;
6863; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6864; NoVLX:       # %bb.0: # %entry
6865; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6866; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
6867; NoVLX-NEXT:    kmovw %edi, %k1
6868; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6869; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6870; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6871; NoVLX-NEXT:    kmovw %k0, %eax
6872; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
6873; NoVLX-NEXT:    vzeroupper
6874; NoVLX-NEXT:    retq
6875entry:
6876  %0 = bitcast <4 x i64> %__a to <8 x i32>
6877  %load = load i32, i32* %__b
6878  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6879  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6880  %2 = icmp sgt <8 x i32> %0, %1
6881  %3 = bitcast i8 %__u to <8 x i1>
6882  %4 = and <8 x i1> %3, %2
6883  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6884  %6 = bitcast <16 x i1> %5 to i16
6885  ret i16 %6
6886}
6887
6888
6889define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6890; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6891; VLX:       # %bb.0: # %entry
6892; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
6893; VLX-NEXT:    kmovd %k0, %eax
6894; VLX-NEXT:    vzeroupper
6895; VLX-NEXT:    retq
6896;
6897; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6898; NoVLX:       # %bb.0: # %entry
6899; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6900; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6901; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6902; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6903; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6904; NoVLX-NEXT:    kmovw %k0, %eax
6905; NoVLX-NEXT:    vzeroupper
6906; NoVLX-NEXT:    retq
6907entry:
6908  %0 = bitcast <4 x i64> %__a to <8 x i32>
6909  %1 = bitcast <4 x i64> %__b to <8 x i32>
6910  %2 = icmp sgt <8 x i32> %0, %1
6911  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6912  %4 = bitcast <32 x i1> %3 to i32
6913  ret i32 %4
6914}
6915
6916define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6917; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6918; VLX:       # %bb.0: # %entry
6919; VLX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0
6920; VLX-NEXT:    kmovd %k0, %eax
6921; VLX-NEXT:    vzeroupper
6922; VLX-NEXT:    retq
6923;
6924; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6925; NoVLX:       # %bb.0: # %entry
6926; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6927; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
6928; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
6929; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6930; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6931; NoVLX-NEXT:    kmovw %k0, %eax
6932; NoVLX-NEXT:    vzeroupper
6933; NoVLX-NEXT:    retq
6934entry:
6935  %0 = bitcast <4 x i64> %__a to <8 x i32>
6936  %load = load <4 x i64>, <4 x i64>* %__b
6937  %1 = bitcast <4 x i64> %load to <8 x i32>
6938  %2 = icmp sgt <8 x i32> %0, %1
6939  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6940  %4 = bitcast <32 x i1> %3 to i32
6941  ret i32 %4
6942}
6943
6944define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6945; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6946; VLX:       # %bb.0: # %entry
6947; VLX-NEXT:    kmovd %edi, %k1
6948; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6949; VLX-NEXT:    kmovd %k0, %eax
6950; VLX-NEXT:    vzeroupper
6951; VLX-NEXT:    retq
6952;
6953; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6954; NoVLX:       # %bb.0: # %entry
6955; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
6956; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6957; NoVLX-NEXT:    kmovw %edi, %k1
6958; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6959; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6960; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6961; NoVLX-NEXT:    kmovw %k0, %eax
6962; NoVLX-NEXT:    vzeroupper
6963; NoVLX-NEXT:    retq
6964entry:
6965  %0 = bitcast <4 x i64> %__a to <8 x i32>
6966  %1 = bitcast <4 x i64> %__b to <8 x i32>
6967  %2 = icmp sgt <8 x i32> %0, %1
6968  %3 = bitcast i8 %__u to <8 x i1>
6969  %4 = and <8 x i1> %2, %3
6970  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6971  %6 = bitcast <32 x i1> %5 to i32
6972  ret i32 %6
6973}
6974
6975define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6976; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6977; VLX:       # %bb.0: # %entry
6978; VLX-NEXT:    kmovd %edi, %k1
6979; VLX-NEXT:    vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6980; VLX-NEXT:    kmovd %k0, %eax
6981; VLX-NEXT:    vzeroupper
6982; VLX-NEXT:    retq
6983;
6984; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6985; NoVLX:       # %bb.0: # %entry
6986; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
6987; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
6988; NoVLX-NEXT:    kmovw %edi, %k1
6989; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6990; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
6991; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
6992; NoVLX-NEXT:    kmovw %k0, %eax
6993; NoVLX-NEXT:    vzeroupper
6994; NoVLX-NEXT:    retq
6995entry:
6996  %0 = bitcast <4 x i64> %__a to <8 x i32>
6997  %load = load <4 x i64>, <4 x i64>* %__b
6998  %1 = bitcast <4 x i64> %load to <8 x i32>
6999  %2 = icmp sgt <8 x i32> %0, %1
7000  %3 = bitcast i8 %__u to <8 x i1>
7001  %4 = and <8 x i1> %2, %3
7002  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7003  %6 = bitcast <32 x i1> %5 to i32
7004  ret i32 %6
7005}
7006
7007
7008define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
7009; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7010; VLX:       # %bb.0: # %entry
7011; VLX-NEXT:    vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7012; VLX-NEXT:    kmovd %k0, %eax
7013; VLX-NEXT:    vzeroupper
7014; VLX-NEXT:    retq
7015;
7016; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7017; NoVLX:       # %bb.0: # %entry
7018; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7019; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
7020; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7021; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7022; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7023; NoVLX-NEXT:    kmovw %k0, %eax
7024; NoVLX-NEXT:    vzeroupper
7025; NoVLX-NEXT:    retq
7026entry:
7027  %0 = bitcast <4 x i64> %__a to <8 x i32>
7028  %load = load i32, i32* %__b
7029  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7030  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7031  %2 = icmp sgt <8 x i32> %0, %1
7032  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7033  %4 = bitcast <32 x i1> %3 to i32
7034  ret i32 %4
7035}
7036
7037define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
7038; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7039; VLX:       # %bb.0: # %entry
7040; VLX-NEXT:    kmovd %edi, %k1
7041; VLX-NEXT:    vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7042; VLX-NEXT:    kmovd %k0, %eax
7043; VLX-NEXT:    vzeroupper
7044; VLX-NEXT:    retq
7045;
7046; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7047; NoVLX:       # %bb.0: # %entry
7048; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7049; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
7050; NoVLX-NEXT:    kmovw %edi, %k1
7051; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7052; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7053; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7054; NoVLX-NEXT:    kmovw %k0, %eax
7055; NoVLX-NEXT:    vzeroupper
7056; NoVLX-NEXT:    retq
7057entry:
7058  %0 = bitcast <4 x i64> %__a to <8 x i32>
7059  %load = load i32, i32* %__b
7060  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7061  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7062  %2 = icmp sgt <8 x i32> %0, %1
7063  %3 = bitcast i8 %__u to <8 x i1>
7064  %4 = and <8 x i1> %3, %2
7065  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7066  %6 = bitcast <32 x i1> %5 to i32
7067  ret i32 %6
7068}
7069
7070
7071define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7072; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7073; VLX:       # %bb.0: # %entry
7074; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
7075; VLX-NEXT:    kmovq %k0, %rax
7076; VLX-NEXT:    vzeroupper
7077; VLX-NEXT:    retq
7078;
7079; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7080; NoVLX:       # %bb.0: # %entry
7081; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
7082; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7083; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7084; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7085; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7086; NoVLX-NEXT:    kmovw %k0, %eax
7087; NoVLX-NEXT:    movzwl %ax, %eax
7088; NoVLX-NEXT:    vzeroupper
7089; NoVLX-NEXT:    retq
7090entry:
7091  %0 = bitcast <4 x i64> %__a to <8 x i32>
7092  %1 = bitcast <4 x i64> %__b to <8 x i32>
7093  %2 = icmp sgt <8 x i32> %0, %1
7094  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7095  %4 = bitcast <64 x i1> %3 to i64
7096  ret i64 %4
7097}
7098
7099define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7100; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7101; VLX:       # %bb.0: # %entry
7102; VLX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0
7103; VLX-NEXT:    kmovq %k0, %rax
7104; VLX-NEXT:    vzeroupper
7105; VLX-NEXT:    retq
7106;
7107; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7108; NoVLX:       # %bb.0: # %entry
7109; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7110; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
7111; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7112; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7113; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7114; NoVLX-NEXT:    kmovw %k0, %eax
7115; NoVLX-NEXT:    movzwl %ax, %eax
7116; NoVLX-NEXT:    vzeroupper
7117; NoVLX-NEXT:    retq
7118entry:
7119  %0 = bitcast <4 x i64> %__a to <8 x i32>
7120  %load = load <4 x i64>, <4 x i64>* %__b
7121  %1 = bitcast <4 x i64> %load to <8 x i32>
7122  %2 = icmp sgt <8 x i32> %0, %1
7123  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7124  %4 = bitcast <64 x i1> %3 to i64
7125  ret i64 %4
7126}
7127
7128define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7129; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7130; VLX:       # %bb.0: # %entry
7131; VLX-NEXT:    kmovd %edi, %k1
7132; VLX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
7133; VLX-NEXT:    kmovq %k0, %rax
7134; VLX-NEXT:    vzeroupper
7135; VLX-NEXT:    retq
7136;
7137; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7138; NoVLX:       # %bb.0: # %entry
7139; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
7140; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7141; NoVLX-NEXT:    kmovw %edi, %k1
7142; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7143; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7144; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7145; NoVLX-NEXT:    kmovw %k0, %eax
7146; NoVLX-NEXT:    movzwl %ax, %eax
7147; NoVLX-NEXT:    vzeroupper
7148; NoVLX-NEXT:    retq
7149entry:
7150  %0 = bitcast <4 x i64> %__a to <8 x i32>
7151  %1 = bitcast <4 x i64> %__b to <8 x i32>
7152  %2 = icmp sgt <8 x i32> %0, %1
7153  %3 = bitcast i8 %__u to <8 x i1>
7154  %4 = and <8 x i1> %2, %3
7155  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7156  %6 = bitcast <64 x i1> %5 to i64
7157  ret i64 %6
7158}
7159
7160define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7161; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7162; VLX:       # %bb.0: # %entry
7163; VLX-NEXT:    kmovd %edi, %k1
7164; VLX-NEXT:    vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
7165; VLX-NEXT:    kmovq %k0, %rax
7166; VLX-NEXT:    vzeroupper
7167; VLX-NEXT:    retq
7168;
7169; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7170; NoVLX:       # %bb.0: # %entry
7171; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7172; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
7173; NoVLX-NEXT:    kmovw %edi, %k1
7174; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7175; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7176; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7177; NoVLX-NEXT:    kmovw %k0, %eax
7178; NoVLX-NEXT:    movzwl %ax, %eax
7179; NoVLX-NEXT:    vzeroupper
7180; NoVLX-NEXT:    retq
7181entry:
7182  %0 = bitcast <4 x i64> %__a to <8 x i32>
7183  %load = load <4 x i64>, <4 x i64>* %__b
7184  %1 = bitcast <4 x i64> %load to <8 x i32>
7185  %2 = icmp sgt <8 x i32> %0, %1
7186  %3 = bitcast i8 %__u to <8 x i1>
7187  %4 = and <8 x i1> %2, %3
7188  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7189  %6 = bitcast <64 x i1> %5 to i64
7190  ret i64 %6
7191}
7192
7193
7194define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
7195; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7196; VLX:       # %bb.0: # %entry
7197; VLX-NEXT:    vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7198; VLX-NEXT:    kmovq %k0, %rax
7199; VLX-NEXT:    vzeroupper
7200; VLX-NEXT:    retq
7201;
7202; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7203; NoVLX:       # %bb.0: # %entry
7204; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7205; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
7206; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7207; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7208; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7209; NoVLX-NEXT:    kmovw %k0, %eax
7210; NoVLX-NEXT:    movzwl %ax, %eax
7211; NoVLX-NEXT:    vzeroupper
7212; NoVLX-NEXT:    retq
7213entry:
7214  %0 = bitcast <4 x i64> %__a to <8 x i32>
7215  %load = load i32, i32* %__b
7216  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7217  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7218  %2 = icmp sgt <8 x i32> %0, %1
7219  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7220  %4 = bitcast <64 x i1> %3 to i64
7221  ret i64 %4
7222}
7223
7224define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
7225; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7226; VLX:       # %bb.0: # %entry
7227; VLX-NEXT:    kmovd %edi, %k1
7228; VLX-NEXT:    vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7229; VLX-NEXT:    kmovq %k0, %rax
7230; VLX-NEXT:    vzeroupper
7231; VLX-NEXT:    retq
7232;
7233; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7234; NoVLX:       # %bb.0: # %entry
7235; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
7236; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
7237; NoVLX-NEXT:    kmovw %edi, %k1
7238; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7239; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
7240; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
7241; NoVLX-NEXT:    kmovw %k0, %eax
7242; NoVLX-NEXT:    movzwl %ax, %eax
7243; NoVLX-NEXT:    vzeroupper
7244; NoVLX-NEXT:    retq
7245entry:
7246  %0 = bitcast <4 x i64> %__a to <8 x i32>
7247  %load = load i32, i32* %__b
7248  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7249  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7250  %2 = icmp sgt <8 x i32> %0, %1
7251  %3 = bitcast i8 %__u to <8 x i1>
7252  %4 = and <8 x i1> %3, %2
7253  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7254  %6 = bitcast <64 x i1> %5 to i64
7255  ret i64 %6
7256}
7257
7258
7259define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7260; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7261; VLX:       # %bb.0: # %entry
7262; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7263; VLX-NEXT:    kmovd %k0, %eax
7264; VLX-NEXT:    vzeroupper
7265; VLX-NEXT:    retq
7266;
7267; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7268; NoVLX:       # %bb.0: # %entry
7269; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7270; NoVLX-NEXT:    kmovw %k0, %eax
7271; NoVLX-NEXT:    vzeroupper
7272; NoVLX-NEXT:    retq
7273entry:
7274  %0 = bitcast <8 x i64> %__a to <16 x i32>
7275  %1 = bitcast <8 x i64> %__b to <16 x i32>
7276  %2 = icmp sgt <16 x i32> %0, %1
7277  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7278  %4 = bitcast <32 x i1> %3 to i32
7279  ret i32 %4
7280}
7281
7282define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7283; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7284; VLX:       # %bb.0: # %entry
7285; VLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7286; VLX-NEXT:    kmovd %k0, %eax
7287; VLX-NEXT:    vzeroupper
7288; VLX-NEXT:    retq
7289;
7290; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7291; NoVLX:       # %bb.0: # %entry
7292; NoVLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7293; NoVLX-NEXT:    kmovw %k0, %eax
7294; NoVLX-NEXT:    vzeroupper
7295; NoVLX-NEXT:    retq
7296entry:
7297  %0 = bitcast <8 x i64> %__a to <16 x i32>
7298  %load = load <8 x i64>, <8 x i64>* %__b
7299  %1 = bitcast <8 x i64> %load to <16 x i32>
7300  %2 = icmp sgt <16 x i32> %0, %1
7301  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7302  %4 = bitcast <32 x i1> %3 to i32
7303  ret i32 %4
7304}
7305
7306define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7307; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7308; VLX:       # %bb.0: # %entry
7309; VLX-NEXT:    kmovd %edi, %k1
7310; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7311; VLX-NEXT:    kmovd %k0, %eax
7312; VLX-NEXT:    vzeroupper
7313; VLX-NEXT:    retq
7314;
7315; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7316; NoVLX:       # %bb.0: # %entry
7317; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7318; NoVLX-NEXT:    kmovw %k0, %eax
7319; NoVLX-NEXT:    andl %edi, %eax
7320; NoVLX-NEXT:    vzeroupper
7321; NoVLX-NEXT:    retq
7322entry:
7323  %0 = bitcast <8 x i64> %__a to <16 x i32>
7324  %1 = bitcast <8 x i64> %__b to <16 x i32>
7325  %2 = icmp sgt <16 x i32> %0, %1
7326  %3 = bitcast i16 %__u to <16 x i1>
7327  %4 = and <16 x i1> %2, %3
7328  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7329  %6 = bitcast <32 x i1> %5 to i32
7330  ret i32 %6
7331}
7332
7333define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7334; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7335; VLX:       # %bb.0: # %entry
7336; VLX-NEXT:    kmovd %edi, %k1
7337; VLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7338; VLX-NEXT:    kmovd %k0, %eax
7339; VLX-NEXT:    vzeroupper
7340; VLX-NEXT:    retq
7341;
7342; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7343; NoVLX:       # %bb.0: # %entry
7344; NoVLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0
7345; NoVLX-NEXT:    kmovw %k0, %eax
7346; NoVLX-NEXT:    andl %edi, %eax
7347; NoVLX-NEXT:    vzeroupper
7348; NoVLX-NEXT:    retq
7349entry:
7350  %0 = bitcast <8 x i64> %__a to <16 x i32>
7351  %load = load <8 x i64>, <8 x i64>* %__b
7352  %1 = bitcast <8 x i64> %load to <16 x i32>
7353  %2 = icmp sgt <16 x i32> %0, %1
7354  %3 = bitcast i16 %__u to <16 x i1>
7355  %4 = and <16 x i1> %2, %3
7356  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7357  %6 = bitcast <32 x i1> %5 to i32
7358  ret i32 %6
7359}
7360
7361
7362define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7363; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7364; VLX:       # %bb.0: # %entry
7365; VLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7366; VLX-NEXT:    kmovd %k0, %eax
7367; VLX-NEXT:    vzeroupper
7368; VLX-NEXT:    retq
7369;
7370; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7371; NoVLX:       # %bb.0: # %entry
7372; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7373; NoVLX-NEXT:    kmovw %k0, %eax
7374; NoVLX-NEXT:    vzeroupper
7375; NoVLX-NEXT:    retq
7376entry:
7377  %0 = bitcast <8 x i64> %__a to <16 x i32>
7378  %load = load i32, i32* %__b
7379  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7380  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7381  %2 = icmp sgt <16 x i32> %0, %1
7382  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7383  %4 = bitcast <32 x i1> %3 to i32
7384  ret i32 %4
7385}
7386
7387define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7388; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7389; VLX:       # %bb.0: # %entry
7390; VLX-NEXT:    kmovd %edi, %k1
7391; VLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7392; VLX-NEXT:    kmovd %k0, %eax
7393; VLX-NEXT:    vzeroupper
7394; VLX-NEXT:    retq
7395;
7396; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7397; NoVLX:       # %bb.0: # %entry
7398; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7399; NoVLX-NEXT:    kmovw %k0, %eax
7400; NoVLX-NEXT:    andl %edi, %eax
7401; NoVLX-NEXT:    vzeroupper
7402; NoVLX-NEXT:    retq
7403entry:
7404  %0 = bitcast <8 x i64> %__a to <16 x i32>
7405  %load = load i32, i32* %__b
7406  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7407  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7408  %2 = icmp sgt <16 x i32> %0, %1
7409  %3 = bitcast i16 %__u to <16 x i1>
7410  %4 = and <16 x i1> %3, %2
7411  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7412  %6 = bitcast <32 x i1> %5 to i32
7413  ret i32 %6
7414}
7415
7416
7417define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7418; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7419; VLX:       # %bb.0: # %entry
7420; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7421; VLX-NEXT:    kmovq %k0, %rax
7422; VLX-NEXT:    vzeroupper
7423; VLX-NEXT:    retq
7424;
7425; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7426; NoVLX:       # %bb.0: # %entry
7427; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7428; NoVLX-NEXT:    kmovw %k0, %eax
7429; NoVLX-NEXT:    movzwl %ax, %eax
7430; NoVLX-NEXT:    vzeroupper
7431; NoVLX-NEXT:    retq
7432entry:
7433  %0 = bitcast <8 x i64> %__a to <16 x i32>
7434  %1 = bitcast <8 x i64> %__b to <16 x i32>
7435  %2 = icmp sgt <16 x i32> %0, %1
7436  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7437  %4 = bitcast <64 x i1> %3 to i64
7438  ret i64 %4
7439}
7440
7441define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7442; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7443; VLX:       # %bb.0: # %entry
7444; VLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7445; VLX-NEXT:    kmovq %k0, %rax
7446; VLX-NEXT:    vzeroupper
7447; VLX-NEXT:    retq
7448;
7449; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7450; NoVLX:       # %bb.0: # %entry
7451; NoVLX-NEXT:    vpcmpgtd (%rdi), %zmm0, %k0
7452; NoVLX-NEXT:    kmovw %k0, %eax
7453; NoVLX-NEXT:    movzwl %ax, %eax
7454; NoVLX-NEXT:    vzeroupper
7455; NoVLX-NEXT:    retq
7456entry:
7457  %0 = bitcast <8 x i64> %__a to <16 x i32>
7458  %load = load <8 x i64>, <8 x i64>* %__b
7459  %1 = bitcast <8 x i64> %load to <16 x i32>
7460  %2 = icmp sgt <16 x i32> %0, %1
7461  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7462  %4 = bitcast <64 x i1> %3 to i64
7463  ret i64 %4
7464}
7465
7466define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7467; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7468; VLX:       # %bb.0: # %entry
7469; VLX-NEXT:    kmovd %edi, %k1
7470; VLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7471; VLX-NEXT:    kmovq %k0, %rax
7472; VLX-NEXT:    vzeroupper
7473; VLX-NEXT:    retq
7474;
7475; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7476; NoVLX:       # %bb.0: # %entry
7477; NoVLX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
7478; NoVLX-NEXT:    kmovw %k0, %eax
7479; NoVLX-NEXT:    andl %edi, %eax
7480; NoVLX-NEXT:    vzeroupper
7481; NoVLX-NEXT:    retq
7482entry:
7483  %0 = bitcast <8 x i64> %__a to <16 x i32>
7484  %1 = bitcast <8 x i64> %__b to <16 x i32>
7485  %2 = icmp sgt <16 x i32> %0, %1
7486  %3 = bitcast i16 %__u to <16 x i1>
7487  %4 = and <16 x i1> %2, %3
7488  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7489  %6 = bitcast <64 x i1> %5 to i64
7490  ret i64 %6
7491}
7492
7493define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7494; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7495; VLX:       # %bb.0: # %entry
7496; VLX-NEXT:    kmovd %edi, %k1
7497; VLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7498; VLX-NEXT:    kmovq %k0, %rax
7499; VLX-NEXT:    vzeroupper
7500; VLX-NEXT:    retq
7501;
7502; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7503; NoVLX:       # %bb.0: # %entry
7504; NoVLX-NEXT:    vpcmpgtd (%rsi), %zmm0, %k0
7505; NoVLX-NEXT:    kmovw %k0, %eax
7506; NoVLX-NEXT:    andl %edi, %eax
7507; NoVLX-NEXT:    vzeroupper
7508; NoVLX-NEXT:    retq
7509entry:
7510  %0 = bitcast <8 x i64> %__a to <16 x i32>
7511  %load = load <8 x i64>, <8 x i64>* %__b
7512  %1 = bitcast <8 x i64> %load to <16 x i32>
7513  %2 = icmp sgt <16 x i32> %0, %1
7514  %3 = bitcast i16 %__u to <16 x i1>
7515  %4 = and <16 x i1> %2, %3
7516  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7517  %6 = bitcast <64 x i1> %5 to i64
7518  ret i64 %6
7519}
7520
7521
7522define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7523; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7524; VLX:       # %bb.0: # %entry
7525; VLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7526; VLX-NEXT:    kmovq %k0, %rax
7527; VLX-NEXT:    vzeroupper
7528; VLX-NEXT:    retq
7529;
7530; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7531; NoVLX:       # %bb.0: # %entry
7532; NoVLX-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7533; NoVLX-NEXT:    kmovw %k0, %eax
7534; NoVLX-NEXT:    movzwl %ax, %eax
7535; NoVLX-NEXT:    vzeroupper
7536; NoVLX-NEXT:    retq
7537entry:
7538  %0 = bitcast <8 x i64> %__a to <16 x i32>
7539  %load = load i32, i32* %__b
7540  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7541  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7542  %2 = icmp sgt <16 x i32> %0, %1
7543  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7544  %4 = bitcast <64 x i1> %3 to i64
7545  ret i64 %4
7546}
7547
7548define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7549; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7550; VLX:       # %bb.0: # %entry
7551; VLX-NEXT:    kmovd %edi, %k1
7552; VLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7553; VLX-NEXT:    kmovq %k0, %rax
7554; VLX-NEXT:    vzeroupper
7555; VLX-NEXT:    retq
7556;
7557; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7558; NoVLX:       # %bb.0: # %entry
7559; NoVLX-NEXT:    vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7560; NoVLX-NEXT:    kmovw %k0, %eax
7561; NoVLX-NEXT:    andl %edi, %eax
7562; NoVLX-NEXT:    vzeroupper
7563; NoVLX-NEXT:    retq
7564entry:
7565  %0 = bitcast <8 x i64> %__a to <16 x i32>
7566  %load = load i32, i32* %__b
7567  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7568  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7569  %2 = icmp sgt <16 x i32> %0, %1
7570  %3 = bitcast i16 %__u to <16 x i1>
7571  %4 = and <16 x i1> %3, %2
7572  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7573  %6 = bitcast <64 x i1> %5 to i64
7574  ret i64 %6
7575}
7576
7577
7578define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7579; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7580; VLX:       # %bb.0: # %entry
7581; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
7582; VLX-NEXT:    kmovb %k0, %eax
7583; VLX-NEXT:    retq
7584;
7585; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7586; NoVLX:       # %bb.0: # %entry
7587; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7588; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7589; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7590; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7591; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7592; NoVLX-NEXT:    kmovw %k0, %eax
7593; NoVLX-NEXT:    andl $3, %eax
7594; NoVLX-NEXT:    vzeroupper
7595; NoVLX-NEXT:    retq
7596entry:
7597  %0 = bitcast <2 x i64> %__a to <2 x i64>
7598  %1 = bitcast <2 x i64> %__b to <2 x i64>
7599  %2 = icmp sgt <2 x i64> %0, %1
7600  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7601  %4 = bitcast <4 x i1> %3 to i4
7602  ret i4 %4
7603}
7604
7605define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7606; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7607; VLX:       # %bb.0: # %entry
7608; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
7609; VLX-NEXT:    kmovb %k0, %eax
7610; VLX-NEXT:    retq
7611;
7612; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7613; NoVLX:       # %bb.0: # %entry
7614; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7615; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
7616; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7617; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7618; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7619; NoVLX-NEXT:    kmovw %k0, %eax
7620; NoVLX-NEXT:    andl $3, %eax
7621; NoVLX-NEXT:    vzeroupper
7622; NoVLX-NEXT:    retq
7623entry:
7624  %0 = bitcast <2 x i64> %__a to <2 x i64>
7625  %load = load <2 x i64>, <2 x i64>* %__b
7626  %1 = bitcast <2 x i64> %load to <2 x i64>
7627  %2 = icmp sgt <2 x i64> %0, %1
7628  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7629  %4 = bitcast <4 x i1> %3 to i4
7630  ret i4 %4
7631}
7632
7633define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7634; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7635; VLX:       # %bb.0: # %entry
7636; VLX-NEXT:    kmovd %edi, %k1
7637; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7638; VLX-NEXT:    kmovb %k0, %eax
7639; VLX-NEXT:    retq
7640;
7641; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7642; NoVLX:       # %bb.0: # %entry
7643; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7644; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7645; NoVLX-NEXT:    kmovw %edi, %k1
7646; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7647; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7648; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7649; NoVLX-NEXT:    kmovw %k0, %eax
7650; NoVLX-NEXT:    andl $3, %eax
7651; NoVLX-NEXT:    vzeroupper
7652; NoVLX-NEXT:    retq
7653entry:
7654  %0 = bitcast <2 x i64> %__a to <2 x i64>
7655  %1 = bitcast <2 x i64> %__b to <2 x i64>
7656  %2 = icmp sgt <2 x i64> %0, %1
7657  %3 = bitcast i8 %__u to <8 x i1>
7658  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7659  %4 = and <2 x i1> %2, %extract.i
7660  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7661  %6 = bitcast <4 x i1> %5 to i4
7662  ret i4 %6
7663}
7664
7665define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7666; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7667; VLX:       # %bb.0: # %entry
7668; VLX-NEXT:    kmovd %edi, %k1
7669; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7670; VLX-NEXT:    kmovb %k0, %eax
7671; VLX-NEXT:    retq
7672;
7673; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7674; NoVLX:       # %bb.0: # %entry
7675; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7676; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
7677; NoVLX-NEXT:    kmovw %edi, %k1
7678; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7679; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7680; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7681; NoVLX-NEXT:    kmovw %k0, %eax
7682; NoVLX-NEXT:    andl $3, %eax
7683; NoVLX-NEXT:    vzeroupper
7684; NoVLX-NEXT:    retq
7685entry:
7686  %0 = bitcast <2 x i64> %__a to <2 x i64>
7687  %load = load <2 x i64>, <2 x i64>* %__b
7688  %1 = bitcast <2 x i64> %load to <2 x i64>
7689  %2 = icmp sgt <2 x i64> %0, %1
7690  %3 = bitcast i8 %__u to <8 x i1>
7691  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7692  %4 = and <2 x i1> %2, %extract.i
7693  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7694  %6 = bitcast <4 x i1> %5 to i4
7695  ret i4 %6
7696}
7697
7698
7699define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7700; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7701; VLX:       # %bb.0: # %entry
7702; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7703; VLX-NEXT:    kmovb %k0, %eax
7704; VLX-NEXT:    retq
7705;
7706; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7707; NoVLX:       # %bb.0: # %entry
7708; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7709; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
7710; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7711; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7712; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7713; NoVLX-NEXT:    kmovw %k0, %eax
7714; NoVLX-NEXT:    andl $3, %eax
7715; NoVLX-NEXT:    vzeroupper
7716; NoVLX-NEXT:    retq
7717entry:
7718  %0 = bitcast <2 x i64> %__a to <2 x i64>
7719  %load = load i64, i64* %__b
7720  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7721  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7722  %2 = icmp sgt <2 x i64> %0, %1
7723  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7724  %4 = bitcast <4 x i1> %3 to i4
7725  ret i4 %4
7726}
7727
7728define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7729; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7730; VLX:       # %bb.0: # %entry
7731; VLX-NEXT:    kmovd %edi, %k1
7732; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7733; VLX-NEXT:    kmovb %k0, %eax
7734; VLX-NEXT:    retq
7735;
7736; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7737; NoVLX:       # %bb.0: # %entry
7738; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7739; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
7740; NoVLX-NEXT:    kmovw %edi, %k1
7741; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7742; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7743; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7744; NoVLX-NEXT:    kmovw %k0, %eax
7745; NoVLX-NEXT:    andl $3, %eax
7746; NoVLX-NEXT:    vzeroupper
7747; NoVLX-NEXT:    retq
7748entry:
7749  %0 = bitcast <2 x i64> %__a to <2 x i64>
7750  %load = load i64, i64* %__b
7751  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7752  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7753  %2 = icmp sgt <2 x i64> %0, %1
7754  %3 = bitcast i8 %__u to <8 x i1>
7755  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7756  %4 = and <2 x i1> %extract.i, %2
7757  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7758  %6 = bitcast <4 x i1> %5 to i4
7759  ret i4 %6
7760}
7761
7762
7763define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7764; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7765; VLX:       # %bb.0: # %entry
7766; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
7767; VLX-NEXT:    kmovd %k0, %eax
7768; VLX-NEXT:    # kill: def $al killed $al killed $eax
7769; VLX-NEXT:    retq
7770;
7771; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7772; NoVLX:       # %bb.0: # %entry
7773; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7774; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7775; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7776; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7777; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7778; NoVLX-NEXT:    kmovw %k0, %eax
7779; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7780; NoVLX-NEXT:    vzeroupper
7781; NoVLX-NEXT:    retq
7782entry:
7783  %0 = bitcast <2 x i64> %__a to <2 x i64>
7784  %1 = bitcast <2 x i64> %__b to <2 x i64>
7785  %2 = icmp sgt <2 x i64> %0, %1
7786  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7787  %4 = bitcast <8 x i1> %3 to i8
7788  ret i8 %4
7789}
7790
7791define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7792; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7793; VLX:       # %bb.0: # %entry
7794; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
7795; VLX-NEXT:    kmovd %k0, %eax
7796; VLX-NEXT:    # kill: def $al killed $al killed $eax
7797; VLX-NEXT:    retq
7798;
7799; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7800; NoVLX:       # %bb.0: # %entry
7801; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7802; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
7803; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7804; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7805; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7806; NoVLX-NEXT:    kmovw %k0, %eax
7807; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7808; NoVLX-NEXT:    vzeroupper
7809; NoVLX-NEXT:    retq
7810entry:
7811  %0 = bitcast <2 x i64> %__a to <2 x i64>
7812  %load = load <2 x i64>, <2 x i64>* %__b
7813  %1 = bitcast <2 x i64> %load to <2 x i64>
7814  %2 = icmp sgt <2 x i64> %0, %1
7815  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7816  %4 = bitcast <8 x i1> %3 to i8
7817  ret i8 %4
7818}
7819
7820define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7821; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7822; VLX:       # %bb.0: # %entry
7823; VLX-NEXT:    kmovd %edi, %k1
7824; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7825; VLX-NEXT:    kmovd %k0, %eax
7826; VLX-NEXT:    # kill: def $al killed $al killed $eax
7827; VLX-NEXT:    retq
7828;
7829; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7830; NoVLX:       # %bb.0: # %entry
7831; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7832; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7833; NoVLX-NEXT:    kmovw %edi, %k1
7834; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7835; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7836; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7837; NoVLX-NEXT:    kmovw %k0, %eax
7838; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7839; NoVLX-NEXT:    vzeroupper
7840; NoVLX-NEXT:    retq
7841entry:
7842  %0 = bitcast <2 x i64> %__a to <2 x i64>
7843  %1 = bitcast <2 x i64> %__b to <2 x i64>
7844  %2 = icmp sgt <2 x i64> %0, %1
7845  %3 = bitcast i8 %__u to <8 x i1>
7846  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7847  %4 = and <2 x i1> %2, %extract.i
7848  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7849  %6 = bitcast <8 x i1> %5 to i8
7850  ret i8 %6
7851}
7852
7853define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7854; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7855; VLX:       # %bb.0: # %entry
7856; VLX-NEXT:    kmovd %edi, %k1
7857; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7858; VLX-NEXT:    kmovd %k0, %eax
7859; VLX-NEXT:    # kill: def $al killed $al killed $eax
7860; VLX-NEXT:    retq
7861;
7862; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7863; NoVLX:       # %bb.0: # %entry
7864; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7865; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
7866; NoVLX-NEXT:    kmovw %edi, %k1
7867; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7868; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7869; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7870; NoVLX-NEXT:    kmovw %k0, %eax
7871; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7872; NoVLX-NEXT:    vzeroupper
7873; NoVLX-NEXT:    retq
7874entry:
7875  %0 = bitcast <2 x i64> %__a to <2 x i64>
7876  %load = load <2 x i64>, <2 x i64>* %__b
7877  %1 = bitcast <2 x i64> %load to <2 x i64>
7878  %2 = icmp sgt <2 x i64> %0, %1
7879  %3 = bitcast i8 %__u to <8 x i1>
7880  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7881  %4 = and <2 x i1> %2, %extract.i
7882  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7883  %6 = bitcast <8 x i1> %5 to i8
7884  ret i8 %6
7885}
7886
7887
7888define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7889; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7890; VLX:       # %bb.0: # %entry
7891; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7892; VLX-NEXT:    kmovd %k0, %eax
7893; VLX-NEXT:    # kill: def $al killed $al killed $eax
7894; VLX-NEXT:    retq
7895;
7896; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7897; NoVLX:       # %bb.0: # %entry
7898; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7899; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
7900; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7901; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7902; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7903; NoVLX-NEXT:    kmovw %k0, %eax
7904; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7905; NoVLX-NEXT:    vzeroupper
7906; NoVLX-NEXT:    retq
7907entry:
7908  %0 = bitcast <2 x i64> %__a to <2 x i64>
7909  %load = load i64, i64* %__b
7910  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7911  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7912  %2 = icmp sgt <2 x i64> %0, %1
7913  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7914  %4 = bitcast <8 x i1> %3 to i8
7915  ret i8 %4
7916}
7917
7918define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7919; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7920; VLX:       # %bb.0: # %entry
7921; VLX-NEXT:    kmovd %edi, %k1
7922; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7923; VLX-NEXT:    kmovd %k0, %eax
7924; VLX-NEXT:    # kill: def $al killed $al killed $eax
7925; VLX-NEXT:    retq
7926;
7927; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7928; NoVLX:       # %bb.0: # %entry
7929; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7930; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
7931; NoVLX-NEXT:    kmovw %edi, %k1
7932; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7933; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7934; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7935; NoVLX-NEXT:    kmovw %k0, %eax
7936; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
7937; NoVLX-NEXT:    vzeroupper
7938; NoVLX-NEXT:    retq
7939entry:
7940  %0 = bitcast <2 x i64> %__a to <2 x i64>
7941  %load = load i64, i64* %__b
7942  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7943  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7944  %2 = icmp sgt <2 x i64> %0, %1
7945  %3 = bitcast i8 %__u to <8 x i1>
7946  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7947  %4 = and <2 x i1> %extract.i, %2
7948  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7949  %6 = bitcast <8 x i1> %5 to i8
7950  ret i8 %6
7951}
7952
7953
7954define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7955; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7956; VLX:       # %bb.0: # %entry
7957; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
7958; VLX-NEXT:    kmovd %k0, %eax
7959; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7960; VLX-NEXT:    retq
7961;
7962; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7963; NoVLX:       # %bb.0: # %entry
7964; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
7965; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7966; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7967; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7968; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7969; NoVLX-NEXT:    kmovw %k0, %eax
7970; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7971; NoVLX-NEXT:    vzeroupper
7972; NoVLX-NEXT:    retq
7973entry:
7974  %0 = bitcast <2 x i64> %__a to <2 x i64>
7975  %1 = bitcast <2 x i64> %__b to <2 x i64>
7976  %2 = icmp sgt <2 x i64> %0, %1
7977  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7978  %4 = bitcast <16 x i1> %3 to i16
7979  ret i16 %4
7980}
7981
7982define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7983; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7984; VLX:       # %bb.0: # %entry
7985; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
7986; VLX-NEXT:    kmovd %k0, %eax
7987; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
7988; VLX-NEXT:    retq
7989;
7990; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7991; NoVLX:       # %bb.0: # %entry
7992; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
7993; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
7994; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
7995; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
7996; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
7997; NoVLX-NEXT:    kmovw %k0, %eax
7998; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
7999; NoVLX-NEXT:    vzeroupper
8000; NoVLX-NEXT:    retq
8001entry:
8002  %0 = bitcast <2 x i64> %__a to <2 x i64>
8003  %load = load <2 x i64>, <2 x i64>* %__b
8004  %1 = bitcast <2 x i64> %load to <2 x i64>
8005  %2 = icmp sgt <2 x i64> %0, %1
8006  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8007  %4 = bitcast <16 x i1> %3 to i16
8008  ret i16 %4
8009}
8010
8011define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8012; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
8013; VLX:       # %bb.0: # %entry
8014; VLX-NEXT:    kmovd %edi, %k1
8015; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8016; VLX-NEXT:    kmovd %k0, %eax
8017; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8018; VLX-NEXT:    retq
8019;
8020; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
8021; NoVLX:       # %bb.0: # %entry
8022; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8023; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8024; NoVLX-NEXT:    kmovw %edi, %k1
8025; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8026; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8027; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8028; NoVLX-NEXT:    kmovw %k0, %eax
8029; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8030; NoVLX-NEXT:    vzeroupper
8031; NoVLX-NEXT:    retq
8032entry:
8033  %0 = bitcast <2 x i64> %__a to <2 x i64>
8034  %1 = bitcast <2 x i64> %__b to <2 x i64>
8035  %2 = icmp sgt <2 x i64> %0, %1
8036  %3 = bitcast i8 %__u to <8 x i1>
8037  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8038  %4 = and <2 x i1> %2, %extract.i
8039  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8040  %6 = bitcast <16 x i1> %5 to i16
8041  ret i16 %6
8042}
8043
8044define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8045; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
8046; VLX:       # %bb.0: # %entry
8047; VLX-NEXT:    kmovd %edi, %k1
8048; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8049; VLX-NEXT:    kmovd %k0, %eax
8050; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8051; VLX-NEXT:    retq
8052;
8053; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
8054; NoVLX:       # %bb.0: # %entry
8055; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8056; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
8057; NoVLX-NEXT:    kmovw %edi, %k1
8058; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8059; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8060; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8061; NoVLX-NEXT:    kmovw %k0, %eax
8062; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8063; NoVLX-NEXT:    vzeroupper
8064; NoVLX-NEXT:    retq
8065entry:
8066  %0 = bitcast <2 x i64> %__a to <2 x i64>
8067  %load = load <2 x i64>, <2 x i64>* %__b
8068  %1 = bitcast <2 x i64> %load to <2 x i64>
8069  %2 = icmp sgt <2 x i64> %0, %1
8070  %3 = bitcast i8 %__u to <8 x i1>
8071  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8072  %4 = and <2 x i1> %2, %extract.i
8073  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8074  %6 = bitcast <16 x i1> %5 to i16
8075  ret i16 %6
8076}
8077
8078
8079define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8080; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8081; VLX:       # %bb.0: # %entry
8082; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8083; VLX-NEXT:    kmovd %k0, %eax
8084; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8085; VLX-NEXT:    retq
8086;
8087; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8088; NoVLX:       # %bb.0: # %entry
8089; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8090; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
8091; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8092; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8093; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8094; NoVLX-NEXT:    kmovw %k0, %eax
8095; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8096; NoVLX-NEXT:    vzeroupper
8097; NoVLX-NEXT:    retq
8098entry:
8099  %0 = bitcast <2 x i64> %__a to <2 x i64>
8100  %load = load i64, i64* %__b
8101  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8102  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8103  %2 = icmp sgt <2 x i64> %0, %1
8104  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8105  %4 = bitcast <16 x i1> %3 to i16
8106  ret i16 %4
8107}
8108
8109define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8110; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8111; VLX:       # %bb.0: # %entry
8112; VLX-NEXT:    kmovd %edi, %k1
8113; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8114; VLX-NEXT:    kmovd %k0, %eax
8115; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8116; VLX-NEXT:    retq
8117;
8118; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8119; NoVLX:       # %bb.0: # %entry
8120; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8121; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
8122; NoVLX-NEXT:    kmovw %edi, %k1
8123; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8124; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8125; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8126; NoVLX-NEXT:    kmovw %k0, %eax
8127; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8128; NoVLX-NEXT:    vzeroupper
8129; NoVLX-NEXT:    retq
8130entry:
8131  %0 = bitcast <2 x i64> %__a to <2 x i64>
8132  %load = load i64, i64* %__b
8133  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8134  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8135  %2 = icmp sgt <2 x i64> %0, %1
8136  %3 = bitcast i8 %__u to <8 x i1>
8137  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8138  %4 = and <2 x i1> %extract.i, %2
8139  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8140  %6 = bitcast <16 x i1> %5 to i16
8141  ret i16 %6
8142}
8143
8144
8145define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8146; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8147; VLX:       # %bb.0: # %entry
8148; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
8149; VLX-NEXT:    kmovd %k0, %eax
8150; VLX-NEXT:    retq
8151;
8152; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8153; NoVLX:       # %bb.0: # %entry
8154; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8155; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8156; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8157; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8158; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8159; NoVLX-NEXT:    kmovw %k0, %eax
8160; NoVLX-NEXT:    vzeroupper
8161; NoVLX-NEXT:    retq
8162entry:
8163  %0 = bitcast <2 x i64> %__a to <2 x i64>
8164  %1 = bitcast <2 x i64> %__b to <2 x i64>
8165  %2 = icmp sgt <2 x i64> %0, %1
8166  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8167  %4 = bitcast <32 x i1> %3 to i32
8168  ret i32 %4
8169}
8170
8171define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8172; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8173; VLX:       # %bb.0: # %entry
8174; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
8175; VLX-NEXT:    kmovd %k0, %eax
8176; VLX-NEXT:    retq
8177;
8178; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8179; NoVLX:       # %bb.0: # %entry
8180; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8181; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
8182; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8183; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8184; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8185; NoVLX-NEXT:    kmovw %k0, %eax
8186; NoVLX-NEXT:    vzeroupper
8187; NoVLX-NEXT:    retq
8188entry:
8189  %0 = bitcast <2 x i64> %__a to <2 x i64>
8190  %load = load <2 x i64>, <2 x i64>* %__b
8191  %1 = bitcast <2 x i64> %load to <2 x i64>
8192  %2 = icmp sgt <2 x i64> %0, %1
8193  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8194  %4 = bitcast <32 x i1> %3 to i32
8195  ret i32 %4
8196}
8197
8198define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8199; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8200; VLX:       # %bb.0: # %entry
8201; VLX-NEXT:    kmovd %edi, %k1
8202; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8203; VLX-NEXT:    kmovd %k0, %eax
8204; VLX-NEXT:    retq
8205;
8206; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8207; NoVLX:       # %bb.0: # %entry
8208; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8209; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8210; NoVLX-NEXT:    kmovw %edi, %k1
8211; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8212; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8213; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8214; NoVLX-NEXT:    kmovw %k0, %eax
8215; NoVLX-NEXT:    vzeroupper
8216; NoVLX-NEXT:    retq
8217entry:
8218  %0 = bitcast <2 x i64> %__a to <2 x i64>
8219  %1 = bitcast <2 x i64> %__b to <2 x i64>
8220  %2 = icmp sgt <2 x i64> %0, %1
8221  %3 = bitcast i8 %__u to <8 x i1>
8222  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8223  %4 = and <2 x i1> %2, %extract.i
8224  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8225  %6 = bitcast <32 x i1> %5 to i32
8226  ret i32 %6
8227}
8228
8229define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8230; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8231; VLX:       # %bb.0: # %entry
8232; VLX-NEXT:    kmovd %edi, %k1
8233; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8234; VLX-NEXT:    kmovd %k0, %eax
8235; VLX-NEXT:    retq
8236;
8237; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8238; NoVLX:       # %bb.0: # %entry
8239; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8240; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
8241; NoVLX-NEXT:    kmovw %edi, %k1
8242; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8243; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8244; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8245; NoVLX-NEXT:    kmovw %k0, %eax
8246; NoVLX-NEXT:    vzeroupper
8247; NoVLX-NEXT:    retq
8248entry:
8249  %0 = bitcast <2 x i64> %__a to <2 x i64>
8250  %load = load <2 x i64>, <2 x i64>* %__b
8251  %1 = bitcast <2 x i64> %load to <2 x i64>
8252  %2 = icmp sgt <2 x i64> %0, %1
8253  %3 = bitcast i8 %__u to <8 x i1>
8254  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8255  %4 = and <2 x i1> %2, %extract.i
8256  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8257  %6 = bitcast <32 x i1> %5 to i32
8258  ret i32 %6
8259}
8260
8261
8262define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8263; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8264; VLX:       # %bb.0: # %entry
8265; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8266; VLX-NEXT:    kmovd %k0, %eax
8267; VLX-NEXT:    retq
8268;
8269; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8270; NoVLX:       # %bb.0: # %entry
8271; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8272; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
8273; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8274; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8275; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8276; NoVLX-NEXT:    kmovw %k0, %eax
8277; NoVLX-NEXT:    vzeroupper
8278; NoVLX-NEXT:    retq
8279entry:
8280  %0 = bitcast <2 x i64> %__a to <2 x i64>
8281  %load = load i64, i64* %__b
8282  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8283  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8284  %2 = icmp sgt <2 x i64> %0, %1
8285  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8286  %4 = bitcast <32 x i1> %3 to i32
8287  ret i32 %4
8288}
8289
8290define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8291; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8292; VLX:       # %bb.0: # %entry
8293; VLX-NEXT:    kmovd %edi, %k1
8294; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8295; VLX-NEXT:    kmovd %k0, %eax
8296; VLX-NEXT:    retq
8297;
8298; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8299; NoVLX:       # %bb.0: # %entry
8300; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8301; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
8302; NoVLX-NEXT:    kmovw %edi, %k1
8303; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8304; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8305; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8306; NoVLX-NEXT:    kmovw %k0, %eax
8307; NoVLX-NEXT:    vzeroupper
8308; NoVLX-NEXT:    retq
8309entry:
8310  %0 = bitcast <2 x i64> %__a to <2 x i64>
8311  %load = load i64, i64* %__b
8312  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8313  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8314  %2 = icmp sgt <2 x i64> %0, %1
8315  %3 = bitcast i8 %__u to <8 x i1>
8316  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8317  %4 = and <2 x i1> %extract.i, %2
8318  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8319  %6 = bitcast <32 x i1> %5 to i32
8320  ret i32 %6
8321}
8322
8323
8324define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8325; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8326; VLX:       # %bb.0: # %entry
8327; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0
8328; VLX-NEXT:    kmovq %k0, %rax
8329; VLX-NEXT:    retq
8330;
8331; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8332; NoVLX:       # %bb.0: # %entry
8333; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8334; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8335; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8336; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8337; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8338; NoVLX-NEXT:    kmovw %k0, %eax
8339; NoVLX-NEXT:    movzwl %ax, %eax
8340; NoVLX-NEXT:    vzeroupper
8341; NoVLX-NEXT:    retq
8342entry:
8343  %0 = bitcast <2 x i64> %__a to <2 x i64>
8344  %1 = bitcast <2 x i64> %__b to <2 x i64>
8345  %2 = icmp sgt <2 x i64> %0, %1
8346  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8347  %4 = bitcast <64 x i1> %3 to i64
8348  ret i64 %4
8349}
8350
8351define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8352; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8353; VLX:       # %bb.0: # %entry
8354; VLX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0
8355; VLX-NEXT:    kmovq %k0, %rax
8356; VLX-NEXT:    retq
8357;
8358; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8359; NoVLX:       # %bb.0: # %entry
8360; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8361; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
8362; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8363; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8364; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8365; NoVLX-NEXT:    kmovw %k0, %eax
8366; NoVLX-NEXT:    movzwl %ax, %eax
8367; NoVLX-NEXT:    vzeroupper
8368; NoVLX-NEXT:    retq
8369entry:
8370  %0 = bitcast <2 x i64> %__a to <2 x i64>
8371  %load = load <2 x i64>, <2 x i64>* %__b
8372  %1 = bitcast <2 x i64> %load to <2 x i64>
8373  %2 = icmp sgt <2 x i64> %0, %1
8374  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8375  %4 = bitcast <64 x i1> %3 to i64
8376  ret i64 %4
8377}
8378
8379define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8380; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8381; VLX:       # %bb.0: # %entry
8382; VLX-NEXT:    kmovd %edi, %k1
8383; VLX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8384; VLX-NEXT:    kmovq %k0, %rax
8385; VLX-NEXT:    retq
8386;
8387; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8388; NoVLX:       # %bb.0: # %entry
8389; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
8390; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8391; NoVLX-NEXT:    kmovw %edi, %k1
8392; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8393; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8394; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8395; NoVLX-NEXT:    kmovw %k0, %eax
8396; NoVLX-NEXT:    movzwl %ax, %eax
8397; NoVLX-NEXT:    vzeroupper
8398; NoVLX-NEXT:    retq
8399entry:
8400  %0 = bitcast <2 x i64> %__a to <2 x i64>
8401  %1 = bitcast <2 x i64> %__b to <2 x i64>
8402  %2 = icmp sgt <2 x i64> %0, %1
8403  %3 = bitcast i8 %__u to <8 x i1>
8404  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8405  %4 = and <2 x i1> %2, %extract.i
8406  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8407  %6 = bitcast <64 x i1> %5 to i64
8408  ret i64 %6
8409}
8410
8411define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8412; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8413; VLX:       # %bb.0: # %entry
8414; VLX-NEXT:    kmovd %edi, %k1
8415; VLX-NEXT:    vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8416; VLX-NEXT:    kmovq %k0, %rax
8417; VLX-NEXT:    retq
8418;
8419; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8420; NoVLX:       # %bb.0: # %entry
8421; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8422; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
8423; NoVLX-NEXT:    kmovw %edi, %k1
8424; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8425; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8426; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8427; NoVLX-NEXT:    kmovw %k0, %eax
8428; NoVLX-NEXT:    movzwl %ax, %eax
8429; NoVLX-NEXT:    vzeroupper
8430; NoVLX-NEXT:    retq
8431entry:
8432  %0 = bitcast <2 x i64> %__a to <2 x i64>
8433  %load = load <2 x i64>, <2 x i64>* %__b
8434  %1 = bitcast <2 x i64> %load to <2 x i64>
8435  %2 = icmp sgt <2 x i64> %0, %1
8436  %3 = bitcast i8 %__u to <8 x i1>
8437  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8438  %4 = and <2 x i1> %2, %extract.i
8439  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8440  %6 = bitcast <64 x i1> %5 to i64
8441  ret i64 %6
8442}
8443
8444
8445define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8446; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8447; VLX:       # %bb.0: # %entry
8448; VLX-NEXT:    vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8449; VLX-NEXT:    kmovq %k0, %rax
8450; VLX-NEXT:    retq
8451;
8452; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8453; NoVLX:       # %bb.0: # %entry
8454; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8455; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
8456; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8457; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8458; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8459; NoVLX-NEXT:    kmovw %k0, %eax
8460; NoVLX-NEXT:    movzwl %ax, %eax
8461; NoVLX-NEXT:    vzeroupper
8462; NoVLX-NEXT:    retq
8463entry:
8464  %0 = bitcast <2 x i64> %__a to <2 x i64>
8465  %load = load i64, i64* %__b
8466  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8467  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8468  %2 = icmp sgt <2 x i64> %0, %1
8469  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8470  %4 = bitcast <64 x i1> %3 to i64
8471  ret i64 %4
8472}
8473
8474define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8475; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8476; VLX:       # %bb.0: # %entry
8477; VLX-NEXT:    kmovd %edi, %k1
8478; VLX-NEXT:    vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8479; VLX-NEXT:    kmovq %k0, %rax
8480; VLX-NEXT:    retq
8481;
8482; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8483; NoVLX:       # %bb.0: # %entry
8484; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
8485; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
8486; NoVLX-NEXT:    kmovw %edi, %k1
8487; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8488; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
8489; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
8490; NoVLX-NEXT:    kmovw %k0, %eax
8491; NoVLX-NEXT:    movzwl %ax, %eax
8492; NoVLX-NEXT:    vzeroupper
8493; NoVLX-NEXT:    retq
8494entry:
8495  %0 = bitcast <2 x i64> %__a to <2 x i64>
8496  %load = load i64, i64* %__b
8497  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8498  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8499  %2 = icmp sgt <2 x i64> %0, %1
8500  %3 = bitcast i8 %__u to <8 x i1>
8501  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8502  %4 = and <2 x i1> %extract.i, %2
8503  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8504  %6 = bitcast <64 x i1> %5 to i64
8505  ret i64 %6
8506}
8507
8508
8509define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8510; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8511; VLX:       # %bb.0: # %entry
8512; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8513; VLX-NEXT:    kmovd %k0, %eax
8514; VLX-NEXT:    # kill: def $al killed $al killed $eax
8515; VLX-NEXT:    vzeroupper
8516; VLX-NEXT:    retq
8517;
8518; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8519; NoVLX:       # %bb.0: # %entry
8520; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8521; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8522; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8523; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8524; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8525; NoVLX-NEXT:    kmovw %k0, %eax
8526; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8527; NoVLX-NEXT:    vzeroupper
8528; NoVLX-NEXT:    retq
8529entry:
8530  %0 = bitcast <4 x i64> %__a to <4 x i64>
8531  %1 = bitcast <4 x i64> %__b to <4 x i64>
8532  %2 = icmp sgt <4 x i64> %0, %1
8533  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8534  %4 = bitcast <8 x i1> %3 to i8
8535  ret i8 %4
8536}
8537
8538define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8539; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8540; VLX:       # %bb.0: # %entry
8541; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8542; VLX-NEXT:    kmovd %k0, %eax
8543; VLX-NEXT:    # kill: def $al killed $al killed $eax
8544; VLX-NEXT:    vzeroupper
8545; VLX-NEXT:    retq
8546;
8547; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8548; NoVLX:       # %bb.0: # %entry
8549; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8550; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
8551; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8552; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8553; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8554; NoVLX-NEXT:    kmovw %k0, %eax
8555; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8556; NoVLX-NEXT:    vzeroupper
8557; NoVLX-NEXT:    retq
8558entry:
8559  %0 = bitcast <4 x i64> %__a to <4 x i64>
8560  %load = load <4 x i64>, <4 x i64>* %__b
8561  %1 = bitcast <4 x i64> %load to <4 x i64>
8562  %2 = icmp sgt <4 x i64> %0, %1
8563  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8564  %4 = bitcast <8 x i1> %3 to i8
8565  ret i8 %4
8566}
8567
8568define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8569; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8570; VLX:       # %bb.0: # %entry
8571; VLX-NEXT:    kmovd %edi, %k1
8572; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8573; VLX-NEXT:    kmovd %k0, %eax
8574; VLX-NEXT:    # kill: def $al killed $al killed $eax
8575; VLX-NEXT:    vzeroupper
8576; VLX-NEXT:    retq
8577;
8578; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8579; NoVLX:       # %bb.0: # %entry
8580; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8581; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8582; NoVLX-NEXT:    kmovw %edi, %k1
8583; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8584; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8585; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8586; NoVLX-NEXT:    kmovw %k0, %eax
8587; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8588; NoVLX-NEXT:    vzeroupper
8589; NoVLX-NEXT:    retq
8590entry:
8591  %0 = bitcast <4 x i64> %__a to <4 x i64>
8592  %1 = bitcast <4 x i64> %__b to <4 x i64>
8593  %2 = icmp sgt <4 x i64> %0, %1
8594  %3 = bitcast i8 %__u to <8 x i1>
8595  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8596  %4 = and <4 x i1> %2, %extract.i
8597  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8598  %6 = bitcast <8 x i1> %5 to i8
8599  ret i8 %6
8600}
8601
8602define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8603; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8604; VLX:       # %bb.0: # %entry
8605; VLX-NEXT:    kmovd %edi, %k1
8606; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8607; VLX-NEXT:    kmovd %k0, %eax
8608; VLX-NEXT:    # kill: def $al killed $al killed $eax
8609; VLX-NEXT:    vzeroupper
8610; VLX-NEXT:    retq
8611;
8612; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8613; NoVLX:       # %bb.0: # %entry
8614; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8615; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
8616; NoVLX-NEXT:    kmovw %edi, %k1
8617; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8618; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8619; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8620; NoVLX-NEXT:    kmovw %k0, %eax
8621; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8622; NoVLX-NEXT:    vzeroupper
8623; NoVLX-NEXT:    retq
8624entry:
8625  %0 = bitcast <4 x i64> %__a to <4 x i64>
8626  %load = load <4 x i64>, <4 x i64>* %__b
8627  %1 = bitcast <4 x i64> %load to <4 x i64>
8628  %2 = icmp sgt <4 x i64> %0, %1
8629  %3 = bitcast i8 %__u to <8 x i1>
8630  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8631  %4 = and <4 x i1> %2, %extract.i
8632  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8633  %6 = bitcast <8 x i1> %5 to i8
8634  ret i8 %6
8635}
8636
8637
8638define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8639; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8640; VLX:       # %bb.0: # %entry
8641; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8642; VLX-NEXT:    kmovd %k0, %eax
8643; VLX-NEXT:    # kill: def $al killed $al killed $eax
8644; VLX-NEXT:    vzeroupper
8645; VLX-NEXT:    retq
8646;
8647; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8648; NoVLX:       # %bb.0: # %entry
8649; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8650; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
8651; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8652; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8653; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8654; NoVLX-NEXT:    kmovw %k0, %eax
8655; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8656; NoVLX-NEXT:    vzeroupper
8657; NoVLX-NEXT:    retq
8658entry:
8659  %0 = bitcast <4 x i64> %__a to <4 x i64>
8660  %load = load i64, i64* %__b
8661  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8662  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8663  %2 = icmp sgt <4 x i64> %0, %1
8664  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8665  %4 = bitcast <8 x i1> %3 to i8
8666  ret i8 %4
8667}
8668
8669define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8670; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8671; VLX:       # %bb.0: # %entry
8672; VLX-NEXT:    kmovd %edi, %k1
8673; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8674; VLX-NEXT:    kmovd %k0, %eax
8675; VLX-NEXT:    # kill: def $al killed $al killed $eax
8676; VLX-NEXT:    vzeroupper
8677; VLX-NEXT:    retq
8678;
8679; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8680; NoVLX:       # %bb.0: # %entry
8681; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8682; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
8683; NoVLX-NEXT:    kmovw %edi, %k1
8684; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8685; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8686; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8687; NoVLX-NEXT:    kmovw %k0, %eax
8688; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
8689; NoVLX-NEXT:    vzeroupper
8690; NoVLX-NEXT:    retq
8691entry:
8692  %0 = bitcast <4 x i64> %__a to <4 x i64>
8693  %load = load i64, i64* %__b
8694  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8695  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8696  %2 = icmp sgt <4 x i64> %0, %1
8697  %3 = bitcast i8 %__u to <8 x i1>
8698  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8699  %4 = and <4 x i1> %extract.i, %2
8700  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8701  %6 = bitcast <8 x i1> %5 to i8
8702  ret i8 %6
8703}
8704
8705
8706define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8707; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8708; VLX:       # %bb.0: # %entry
8709; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8710; VLX-NEXT:    kmovd %k0, %eax
8711; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8712; VLX-NEXT:    vzeroupper
8713; VLX-NEXT:    retq
8714;
8715; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8716; NoVLX:       # %bb.0: # %entry
8717; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8718; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8719; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8720; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8721; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8722; NoVLX-NEXT:    kmovw %k0, %eax
8723; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8724; NoVLX-NEXT:    vzeroupper
8725; NoVLX-NEXT:    retq
8726entry:
8727  %0 = bitcast <4 x i64> %__a to <4 x i64>
8728  %1 = bitcast <4 x i64> %__b to <4 x i64>
8729  %2 = icmp sgt <4 x i64> %0, %1
8730  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8731  %4 = bitcast <16 x i1> %3 to i16
8732  ret i16 %4
8733}
8734
8735define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8736; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8737; VLX:       # %bb.0: # %entry
8738; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8739; VLX-NEXT:    kmovd %k0, %eax
8740; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8741; VLX-NEXT:    vzeroupper
8742; VLX-NEXT:    retq
8743;
8744; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8745; NoVLX:       # %bb.0: # %entry
8746; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8747; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
8748; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8749; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8750; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8751; NoVLX-NEXT:    kmovw %k0, %eax
8752; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8753; NoVLX-NEXT:    vzeroupper
8754; NoVLX-NEXT:    retq
8755entry:
8756  %0 = bitcast <4 x i64> %__a to <4 x i64>
8757  %load = load <4 x i64>, <4 x i64>* %__b
8758  %1 = bitcast <4 x i64> %load to <4 x i64>
8759  %2 = icmp sgt <4 x i64> %0, %1
8760  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8761  %4 = bitcast <16 x i1> %3 to i16
8762  ret i16 %4
8763}
8764
8765define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8766; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8767; VLX:       # %bb.0: # %entry
8768; VLX-NEXT:    kmovd %edi, %k1
8769; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8770; VLX-NEXT:    kmovd %k0, %eax
8771; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8772; VLX-NEXT:    vzeroupper
8773; VLX-NEXT:    retq
8774;
8775; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8776; NoVLX:       # %bb.0: # %entry
8777; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8778; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8779; NoVLX-NEXT:    kmovw %edi, %k1
8780; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8781; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8782; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8783; NoVLX-NEXT:    kmovw %k0, %eax
8784; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8785; NoVLX-NEXT:    vzeroupper
8786; NoVLX-NEXT:    retq
8787entry:
8788  %0 = bitcast <4 x i64> %__a to <4 x i64>
8789  %1 = bitcast <4 x i64> %__b to <4 x i64>
8790  %2 = icmp sgt <4 x i64> %0, %1
8791  %3 = bitcast i8 %__u to <8 x i1>
8792  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8793  %4 = and <4 x i1> %2, %extract.i
8794  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8795  %6 = bitcast <16 x i1> %5 to i16
8796  ret i16 %6
8797}
8798
8799define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8800; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8801; VLX:       # %bb.0: # %entry
8802; VLX-NEXT:    kmovd %edi, %k1
8803; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8804; VLX-NEXT:    kmovd %k0, %eax
8805; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8806; VLX-NEXT:    vzeroupper
8807; VLX-NEXT:    retq
8808;
8809; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8810; NoVLX:       # %bb.0: # %entry
8811; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8812; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
8813; NoVLX-NEXT:    kmovw %edi, %k1
8814; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8815; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8816; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8817; NoVLX-NEXT:    kmovw %k0, %eax
8818; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8819; NoVLX-NEXT:    vzeroupper
8820; NoVLX-NEXT:    retq
8821entry:
8822  %0 = bitcast <4 x i64> %__a to <4 x i64>
8823  %load = load <4 x i64>, <4 x i64>* %__b
8824  %1 = bitcast <4 x i64> %load to <4 x i64>
8825  %2 = icmp sgt <4 x i64> %0, %1
8826  %3 = bitcast i8 %__u to <8 x i1>
8827  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8828  %4 = and <4 x i1> %2, %extract.i
8829  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8830  %6 = bitcast <16 x i1> %5 to i16
8831  ret i16 %6
8832}
8833
8834
8835define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8836; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8837; VLX:       # %bb.0: # %entry
8838; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8839; VLX-NEXT:    kmovd %k0, %eax
8840; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8841; VLX-NEXT:    vzeroupper
8842; VLX-NEXT:    retq
8843;
8844; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8845; NoVLX:       # %bb.0: # %entry
8846; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8847; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
8848; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8849; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8850; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8851; NoVLX-NEXT:    kmovw %k0, %eax
8852; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8853; NoVLX-NEXT:    vzeroupper
8854; NoVLX-NEXT:    retq
8855entry:
8856  %0 = bitcast <4 x i64> %__a to <4 x i64>
8857  %load = load i64, i64* %__b
8858  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8859  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8860  %2 = icmp sgt <4 x i64> %0, %1
8861  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8862  %4 = bitcast <16 x i1> %3 to i16
8863  ret i16 %4
8864}
8865
8866define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8867; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8868; VLX:       # %bb.0: # %entry
8869; VLX-NEXT:    kmovd %edi, %k1
8870; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8871; VLX-NEXT:    kmovd %k0, %eax
8872; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
8873; VLX-NEXT:    vzeroupper
8874; VLX-NEXT:    retq
8875;
8876; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8877; NoVLX:       # %bb.0: # %entry
8878; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8879; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
8880; NoVLX-NEXT:    kmovw %edi, %k1
8881; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8882; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8883; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8884; NoVLX-NEXT:    kmovw %k0, %eax
8885; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
8886; NoVLX-NEXT:    vzeroupper
8887; NoVLX-NEXT:    retq
8888entry:
8889  %0 = bitcast <4 x i64> %__a to <4 x i64>
8890  %load = load i64, i64* %__b
8891  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8892  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8893  %2 = icmp sgt <4 x i64> %0, %1
8894  %3 = bitcast i8 %__u to <8 x i1>
8895  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8896  %4 = and <4 x i1> %extract.i, %2
8897  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8898  %6 = bitcast <16 x i1> %5 to i16
8899  ret i16 %6
8900}
8901
8902
8903define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8904; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8905; VLX:       # %bb.0: # %entry
8906; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
8907; VLX-NEXT:    kmovd %k0, %eax
8908; VLX-NEXT:    vzeroupper
8909; VLX-NEXT:    retq
8910;
8911; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8912; NoVLX:       # %bb.0: # %entry
8913; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8914; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8915; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8916; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8917; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8918; NoVLX-NEXT:    kmovw %k0, %eax
8919; NoVLX-NEXT:    vzeroupper
8920; NoVLX-NEXT:    retq
8921entry:
8922  %0 = bitcast <4 x i64> %__a to <4 x i64>
8923  %1 = bitcast <4 x i64> %__b to <4 x i64>
8924  %2 = icmp sgt <4 x i64> %0, %1
8925  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8926  %4 = bitcast <32 x i1> %3 to i32
8927  ret i32 %4
8928}
8929
8930define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8931; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8932; VLX:       # %bb.0: # %entry
8933; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
8934; VLX-NEXT:    kmovd %k0, %eax
8935; VLX-NEXT:    vzeroupper
8936; VLX-NEXT:    retq
8937;
8938; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8939; NoVLX:       # %bb.0: # %entry
8940; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8941; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
8942; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
8943; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8944; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8945; NoVLX-NEXT:    kmovw %k0, %eax
8946; NoVLX-NEXT:    vzeroupper
8947; NoVLX-NEXT:    retq
8948entry:
8949  %0 = bitcast <4 x i64> %__a to <4 x i64>
8950  %load = load <4 x i64>, <4 x i64>* %__b
8951  %1 = bitcast <4 x i64> %load to <4 x i64>
8952  %2 = icmp sgt <4 x i64> %0, %1
8953  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8954  %4 = bitcast <32 x i1> %3 to i32
8955  ret i32 %4
8956}
8957
8958define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8959; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8960; VLX:       # %bb.0: # %entry
8961; VLX-NEXT:    kmovd %edi, %k1
8962; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8963; VLX-NEXT:    kmovd %k0, %eax
8964; VLX-NEXT:    vzeroupper
8965; VLX-NEXT:    retq
8966;
8967; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8968; NoVLX:       # %bb.0: # %entry
8969; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
8970; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
8971; NoVLX-NEXT:    kmovw %edi, %k1
8972; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8973; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
8974; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
8975; NoVLX-NEXT:    kmovw %k0, %eax
8976; NoVLX-NEXT:    vzeroupper
8977; NoVLX-NEXT:    retq
8978entry:
8979  %0 = bitcast <4 x i64> %__a to <4 x i64>
8980  %1 = bitcast <4 x i64> %__b to <4 x i64>
8981  %2 = icmp sgt <4 x i64> %0, %1
8982  %3 = bitcast i8 %__u to <8 x i1>
8983  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8984  %4 = and <4 x i1> %2, %extract.i
8985  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8986  %6 = bitcast <32 x i1> %5 to i32
8987  ret i32 %6
8988}
8989
8990define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8991; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8992; VLX:       # %bb.0: # %entry
8993; VLX-NEXT:    kmovd %edi, %k1
8994; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8995; VLX-NEXT:    kmovd %k0, %eax
8996; VLX-NEXT:    vzeroupper
8997; VLX-NEXT:    retq
8998;
8999; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
9000; NoVLX:       # %bb.0: # %entry
9001; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9002; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
9003; NoVLX-NEXT:    kmovw %edi, %k1
9004; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9005; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9006; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9007; NoVLX-NEXT:    kmovw %k0, %eax
9008; NoVLX-NEXT:    vzeroupper
9009; NoVLX-NEXT:    retq
9010entry:
9011  %0 = bitcast <4 x i64> %__a to <4 x i64>
9012  %load = load <4 x i64>, <4 x i64>* %__b
9013  %1 = bitcast <4 x i64> %load to <4 x i64>
9014  %2 = icmp sgt <4 x i64> %0, %1
9015  %3 = bitcast i8 %__u to <8 x i1>
9016  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9017  %4 = and <4 x i1> %2, %extract.i
9018  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9019  %6 = bitcast <32 x i1> %5 to i32
9020  ret i32 %6
9021}
9022
9023
9024define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
9025; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9026; VLX:       # %bb.0: # %entry
9027; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9028; VLX-NEXT:    kmovd %k0, %eax
9029; VLX-NEXT:    vzeroupper
9030; VLX-NEXT:    retq
9031;
9032; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9033; NoVLX:       # %bb.0: # %entry
9034; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9035; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
9036; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9037; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9038; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9039; NoVLX-NEXT:    kmovw %k0, %eax
9040; NoVLX-NEXT:    vzeroupper
9041; NoVLX-NEXT:    retq
9042entry:
9043  %0 = bitcast <4 x i64> %__a to <4 x i64>
9044  %load = load i64, i64* %__b
9045  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9046  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9047  %2 = icmp sgt <4 x i64> %0, %1
9048  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9049  %4 = bitcast <32 x i1> %3 to i32
9050  ret i32 %4
9051}
9052
9053define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
9054; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9055; VLX:       # %bb.0: # %entry
9056; VLX-NEXT:    kmovd %edi, %k1
9057; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9058; VLX-NEXT:    kmovd %k0, %eax
9059; VLX-NEXT:    vzeroupper
9060; VLX-NEXT:    retq
9061;
9062; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9063; NoVLX:       # %bb.0: # %entry
9064; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9065; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
9066; NoVLX-NEXT:    kmovw %edi, %k1
9067; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9068; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9069; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9070; NoVLX-NEXT:    kmovw %k0, %eax
9071; NoVLX-NEXT:    vzeroupper
9072; NoVLX-NEXT:    retq
9073entry:
9074  %0 = bitcast <4 x i64> %__a to <4 x i64>
9075  %load = load i64, i64* %__b
9076  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9077  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9078  %2 = icmp sgt <4 x i64> %0, %1
9079  %3 = bitcast i8 %__u to <8 x i1>
9080  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9081  %4 = and <4 x i1> %extract.i, %2
9082  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9083  %6 = bitcast <32 x i1> %5 to i32
9084  ret i32 %6
9085}
9086
9087
9088define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9089; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
9090; VLX:       # %bb.0: # %entry
9091; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
9092; VLX-NEXT:    kmovq %k0, %rax
9093; VLX-NEXT:    vzeroupper
9094; VLX-NEXT:    retq
9095;
9096; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
9097; NoVLX:       # %bb.0: # %entry
9098; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
9099; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9100; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9101; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9102; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9103; NoVLX-NEXT:    kmovw %k0, %eax
9104; NoVLX-NEXT:    movzwl %ax, %eax
9105; NoVLX-NEXT:    vzeroupper
9106; NoVLX-NEXT:    retq
9107entry:
9108  %0 = bitcast <4 x i64> %__a to <4 x i64>
9109  %1 = bitcast <4 x i64> %__b to <4 x i64>
9110  %2 = icmp sgt <4 x i64> %0, %1
9111  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9112  %4 = bitcast <64 x i1> %3 to i64
9113  ret i64 %4
9114}
9115
9116define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9117; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
9118; VLX:       # %bb.0: # %entry
9119; VLX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0
9120; VLX-NEXT:    kmovq %k0, %rax
9121; VLX-NEXT:    vzeroupper
9122; VLX-NEXT:    retq
9123;
9124; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
9125; NoVLX:       # %bb.0: # %entry
9126; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9127; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
9128; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9129; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9130; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9131; NoVLX-NEXT:    kmovw %k0, %eax
9132; NoVLX-NEXT:    movzwl %ax, %eax
9133; NoVLX-NEXT:    vzeroupper
9134; NoVLX-NEXT:    retq
9135entry:
9136  %0 = bitcast <4 x i64> %__a to <4 x i64>
9137  %load = load <4 x i64>, <4 x i64>* %__b
9138  %1 = bitcast <4 x i64> %load to <4 x i64>
9139  %2 = icmp sgt <4 x i64> %0, %1
9140  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9141  %4 = bitcast <64 x i1> %3 to i64
9142  ret i64 %4
9143}
9144
9145define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9146; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9147; VLX:       # %bb.0: # %entry
9148; VLX-NEXT:    kmovd %edi, %k1
9149; VLX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
9150; VLX-NEXT:    kmovq %k0, %rax
9151; VLX-NEXT:    vzeroupper
9152; VLX-NEXT:    retq
9153;
9154; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9155; NoVLX:       # %bb.0: # %entry
9156; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
9157; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9158; NoVLX-NEXT:    kmovw %edi, %k1
9159; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9160; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9161; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9162; NoVLX-NEXT:    kmovw %k0, %eax
9163; NoVLX-NEXT:    movzwl %ax, %eax
9164; NoVLX-NEXT:    vzeroupper
9165; NoVLX-NEXT:    retq
9166entry:
9167  %0 = bitcast <4 x i64> %__a to <4 x i64>
9168  %1 = bitcast <4 x i64> %__b to <4 x i64>
9169  %2 = icmp sgt <4 x i64> %0, %1
9170  %3 = bitcast i8 %__u to <8 x i1>
9171  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9172  %4 = and <4 x i1> %2, %extract.i
9173  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9174  %6 = bitcast <64 x i1> %5 to i64
9175  ret i64 %6
9176}
9177
9178define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9179; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9180; VLX:       # %bb.0: # %entry
9181; VLX-NEXT:    kmovd %edi, %k1
9182; VLX-NEXT:    vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
9183; VLX-NEXT:    kmovq %k0, %rax
9184; VLX-NEXT:    vzeroupper
9185; VLX-NEXT:    retq
9186;
9187; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9188; NoVLX:       # %bb.0: # %entry
9189; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9190; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
9191; NoVLX-NEXT:    kmovw %edi, %k1
9192; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9193; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9194; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9195; NoVLX-NEXT:    kmovw %k0, %eax
9196; NoVLX-NEXT:    movzwl %ax, %eax
9197; NoVLX-NEXT:    vzeroupper
9198; NoVLX-NEXT:    retq
9199entry:
9200  %0 = bitcast <4 x i64> %__a to <4 x i64>
9201  %load = load <4 x i64>, <4 x i64>* %__b
9202  %1 = bitcast <4 x i64> %load to <4 x i64>
9203  %2 = icmp sgt <4 x i64> %0, %1
9204  %3 = bitcast i8 %__u to <8 x i1>
9205  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9206  %4 = and <4 x i1> %2, %extract.i
9207  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9208  %6 = bitcast <64 x i1> %5 to i64
9209  ret i64 %6
9210}
9211
9212
9213define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
9214; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9215; VLX:       # %bb.0: # %entry
9216; VLX-NEXT:    vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9217; VLX-NEXT:    kmovq %k0, %rax
9218; VLX-NEXT:    vzeroupper
9219; VLX-NEXT:    retq
9220;
9221; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9222; NoVLX:       # %bb.0: # %entry
9223; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9224; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
9225; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9226; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9227; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9228; NoVLX-NEXT:    kmovw %k0, %eax
9229; NoVLX-NEXT:    movzwl %ax, %eax
9230; NoVLX-NEXT:    vzeroupper
9231; NoVLX-NEXT:    retq
9232entry:
9233  %0 = bitcast <4 x i64> %__a to <4 x i64>
9234  %load = load i64, i64* %__b
9235  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9236  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9237  %2 = icmp sgt <4 x i64> %0, %1
9238  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9239  %4 = bitcast <64 x i1> %3 to i64
9240  ret i64 %4
9241}
9242
9243define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
9244; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9245; VLX:       # %bb.0: # %entry
9246; VLX-NEXT:    kmovd %edi, %k1
9247; VLX-NEXT:    vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9248; VLX-NEXT:    kmovq %k0, %rax
9249; VLX-NEXT:    vzeroupper
9250; VLX-NEXT:    retq
9251;
9252; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9253; NoVLX:       # %bb.0: # %entry
9254; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
9255; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
9256; NoVLX-NEXT:    kmovw %edi, %k1
9257; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9258; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
9259; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
9260; NoVLX-NEXT:    kmovw %k0, %eax
9261; NoVLX-NEXT:    movzwl %ax, %eax
9262; NoVLX-NEXT:    vzeroupper
9263; NoVLX-NEXT:    retq
9264entry:
9265  %0 = bitcast <4 x i64> %__a to <4 x i64>
9266  %load = load i64, i64* %__b
9267  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9268  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9269  %2 = icmp sgt <4 x i64> %0, %1
9270  %3 = bitcast i8 %__u to <8 x i1>
9271  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9272  %4 = and <4 x i1> %extract.i, %2
9273  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9274  %6 = bitcast <64 x i1> %5 to i64
9275  ret i64 %6
9276}
9277
9278
9279define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9280; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9281; VLX:       # %bb.0: # %entry
9282; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9283; VLX-NEXT:    kmovd %k0, %eax
9284; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9285; VLX-NEXT:    vzeroupper
9286; VLX-NEXT:    retq
9287;
9288; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9289; NoVLX:       # %bb.0: # %entry
9290; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9291; NoVLX-NEXT:    kmovw %k0, %eax
9292; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9293; NoVLX-NEXT:    vzeroupper
9294; NoVLX-NEXT:    retq
9295entry:
9296  %0 = bitcast <8 x i64> %__a to <8 x i64>
9297  %1 = bitcast <8 x i64> %__b to <8 x i64>
9298  %2 = icmp sgt <8 x i64> %0, %1
9299  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9300  %4 = bitcast <16 x i1> %3 to i16
9301  ret i16 %4
9302}
9303
9304define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9305; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9306; VLX:       # %bb.0: # %entry
9307; VLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9308; VLX-NEXT:    kmovd %k0, %eax
9309; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9310; VLX-NEXT:    vzeroupper
9311; VLX-NEXT:    retq
9312;
9313; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9314; NoVLX:       # %bb.0: # %entry
9315; NoVLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9316; NoVLX-NEXT:    kmovw %k0, %eax
9317; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9318; NoVLX-NEXT:    vzeroupper
9319; NoVLX-NEXT:    retq
9320entry:
9321  %0 = bitcast <8 x i64> %__a to <8 x i64>
9322  %load = load <8 x i64>, <8 x i64>* %__b
9323  %1 = bitcast <8 x i64> %load to <8 x i64>
9324  %2 = icmp sgt <8 x i64> %0, %1
9325  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9326  %4 = bitcast <16 x i1> %3 to i16
9327  ret i16 %4
9328}
9329
9330define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9331; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9332; VLX:       # %bb.0: # %entry
9333; VLX-NEXT:    kmovd %edi, %k1
9334; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9335; VLX-NEXT:    kmovd %k0, %eax
9336; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9337; VLX-NEXT:    vzeroupper
9338; VLX-NEXT:    retq
9339;
9340; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9341; NoVLX:       # %bb.0: # %entry
9342; NoVLX-NEXT:    kmovw %edi, %k1
9343; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9344; NoVLX-NEXT:    kmovw %k0, %eax
9345; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9346; NoVLX-NEXT:    vzeroupper
9347; NoVLX-NEXT:    retq
9348entry:
9349  %0 = bitcast <8 x i64> %__a to <8 x i64>
9350  %1 = bitcast <8 x i64> %__b to <8 x i64>
9351  %2 = icmp sgt <8 x i64> %0, %1
9352  %3 = bitcast i8 %__u to <8 x i1>
9353  %4 = and <8 x i1> %2, %3
9354  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9355  %6 = bitcast <16 x i1> %5 to i16
9356  ret i16 %6
9357}
9358
9359define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9360; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9361; VLX:       # %bb.0: # %entry
9362; VLX-NEXT:    kmovd %edi, %k1
9363; VLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9364; VLX-NEXT:    kmovd %k0, %eax
9365; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9366; VLX-NEXT:    vzeroupper
9367; VLX-NEXT:    retq
9368;
9369; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9370; NoVLX:       # %bb.0: # %entry
9371; NoVLX-NEXT:    kmovw %edi, %k1
9372; NoVLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9373; NoVLX-NEXT:    kmovw %k0, %eax
9374; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9375; NoVLX-NEXT:    vzeroupper
9376; NoVLX-NEXT:    retq
9377entry:
9378  %0 = bitcast <8 x i64> %__a to <8 x i64>
9379  %load = load <8 x i64>, <8 x i64>* %__b
9380  %1 = bitcast <8 x i64> %load to <8 x i64>
9381  %2 = icmp sgt <8 x i64> %0, %1
9382  %3 = bitcast i8 %__u to <8 x i1>
9383  %4 = and <8 x i1> %2, %3
9384  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9385  %6 = bitcast <16 x i1> %5 to i16
9386  ret i16 %6
9387}
9388
9389
9390define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9391; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9392; VLX:       # %bb.0: # %entry
9393; VLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9394; VLX-NEXT:    kmovd %k0, %eax
9395; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9396; VLX-NEXT:    vzeroupper
9397; VLX-NEXT:    retq
9398;
9399; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9400; NoVLX:       # %bb.0: # %entry
9401; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9402; NoVLX-NEXT:    kmovw %k0, %eax
9403; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9404; NoVLX-NEXT:    vzeroupper
9405; NoVLX-NEXT:    retq
9406entry:
9407  %0 = bitcast <8 x i64> %__a to <8 x i64>
9408  %load = load i64, i64* %__b
9409  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9410  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9411  %2 = icmp sgt <8 x i64> %0, %1
9412  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9413  %4 = bitcast <16 x i1> %3 to i16
9414  ret i16 %4
9415}
9416
9417define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9418; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9419; VLX:       # %bb.0: # %entry
9420; VLX-NEXT:    kmovd %edi, %k1
9421; VLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9422; VLX-NEXT:    kmovd %k0, %eax
9423; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
9424; VLX-NEXT:    vzeroupper
9425; VLX-NEXT:    retq
9426;
9427; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9428; NoVLX:       # %bb.0: # %entry
9429; NoVLX-NEXT:    kmovw %edi, %k1
9430; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9431; NoVLX-NEXT:    kmovw %k0, %eax
9432; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
9433; NoVLX-NEXT:    vzeroupper
9434; NoVLX-NEXT:    retq
9435entry:
9436  %0 = bitcast <8 x i64> %__a to <8 x i64>
9437  %load = load i64, i64* %__b
9438  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9439  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9440  %2 = icmp sgt <8 x i64> %0, %1
9441  %3 = bitcast i8 %__u to <8 x i1>
9442  %4 = and <8 x i1> %3, %2
9443  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9444  %6 = bitcast <16 x i1> %5 to i16
9445  ret i16 %6
9446}
9447
9448
9449define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9450; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9451; VLX:       # %bb.0: # %entry
9452; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9453; VLX-NEXT:    kmovd %k0, %eax
9454; VLX-NEXT:    vzeroupper
9455; VLX-NEXT:    retq
9456;
9457; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9458; NoVLX:       # %bb.0: # %entry
9459; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9460; NoVLX-NEXT:    kmovw %k0, %eax
9461; NoVLX-NEXT:    vzeroupper
9462; NoVLX-NEXT:    retq
9463entry:
9464  %0 = bitcast <8 x i64> %__a to <8 x i64>
9465  %1 = bitcast <8 x i64> %__b to <8 x i64>
9466  %2 = icmp sgt <8 x i64> %0, %1
9467  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9468  %4 = bitcast <32 x i1> %3 to i32
9469  ret i32 %4
9470}
9471
9472define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9473; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9474; VLX:       # %bb.0: # %entry
9475; VLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9476; VLX-NEXT:    kmovd %k0, %eax
9477; VLX-NEXT:    vzeroupper
9478; VLX-NEXT:    retq
9479;
9480; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9481; NoVLX:       # %bb.0: # %entry
9482; NoVLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9483; NoVLX-NEXT:    kmovw %k0, %eax
9484; NoVLX-NEXT:    vzeroupper
9485; NoVLX-NEXT:    retq
9486entry:
9487  %0 = bitcast <8 x i64> %__a to <8 x i64>
9488  %load = load <8 x i64>, <8 x i64>* %__b
9489  %1 = bitcast <8 x i64> %load to <8 x i64>
9490  %2 = icmp sgt <8 x i64> %0, %1
9491  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9492  %4 = bitcast <32 x i1> %3 to i32
9493  ret i32 %4
9494}
9495
9496define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9497; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9498; VLX:       # %bb.0: # %entry
9499; VLX-NEXT:    kmovd %edi, %k1
9500; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9501; VLX-NEXT:    kmovd %k0, %eax
9502; VLX-NEXT:    vzeroupper
9503; VLX-NEXT:    retq
9504;
9505; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9506; NoVLX:       # %bb.0: # %entry
9507; NoVLX-NEXT:    kmovw %edi, %k1
9508; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9509; NoVLX-NEXT:    kmovw %k0, %eax
9510; NoVLX-NEXT:    vzeroupper
9511; NoVLX-NEXT:    retq
9512entry:
9513  %0 = bitcast <8 x i64> %__a to <8 x i64>
9514  %1 = bitcast <8 x i64> %__b to <8 x i64>
9515  %2 = icmp sgt <8 x i64> %0, %1
9516  %3 = bitcast i8 %__u to <8 x i1>
9517  %4 = and <8 x i1> %2, %3
9518  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9519  %6 = bitcast <32 x i1> %5 to i32
9520  ret i32 %6
9521}
9522
9523define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9524; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9525; VLX:       # %bb.0: # %entry
9526; VLX-NEXT:    kmovd %edi, %k1
9527; VLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9528; VLX-NEXT:    kmovd %k0, %eax
9529; VLX-NEXT:    vzeroupper
9530; VLX-NEXT:    retq
9531;
9532; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9533; NoVLX:       # %bb.0: # %entry
9534; NoVLX-NEXT:    kmovw %edi, %k1
9535; NoVLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9536; NoVLX-NEXT:    kmovw %k0, %eax
9537; NoVLX-NEXT:    vzeroupper
9538; NoVLX-NEXT:    retq
9539entry:
9540  %0 = bitcast <8 x i64> %__a to <8 x i64>
9541  %load = load <8 x i64>, <8 x i64>* %__b
9542  %1 = bitcast <8 x i64> %load to <8 x i64>
9543  %2 = icmp sgt <8 x i64> %0, %1
9544  %3 = bitcast i8 %__u to <8 x i1>
9545  %4 = and <8 x i1> %2, %3
9546  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9547  %6 = bitcast <32 x i1> %5 to i32
9548  ret i32 %6
9549}
9550
9551
9552define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9553; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9554; VLX:       # %bb.0: # %entry
9555; VLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9556; VLX-NEXT:    kmovd %k0, %eax
9557; VLX-NEXT:    vzeroupper
9558; VLX-NEXT:    retq
9559;
9560; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9561; NoVLX:       # %bb.0: # %entry
9562; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9563; NoVLX-NEXT:    kmovw %k0, %eax
9564; NoVLX-NEXT:    vzeroupper
9565; NoVLX-NEXT:    retq
9566entry:
9567  %0 = bitcast <8 x i64> %__a to <8 x i64>
9568  %load = load i64, i64* %__b
9569  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9570  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9571  %2 = icmp sgt <8 x i64> %0, %1
9572  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9573  %4 = bitcast <32 x i1> %3 to i32
9574  ret i32 %4
9575}
9576
9577define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9578; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9579; VLX:       # %bb.0: # %entry
9580; VLX-NEXT:    kmovd %edi, %k1
9581; VLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9582; VLX-NEXT:    kmovd %k0, %eax
9583; VLX-NEXT:    vzeroupper
9584; VLX-NEXT:    retq
9585;
9586; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9587; NoVLX:       # %bb.0: # %entry
9588; NoVLX-NEXT:    kmovw %edi, %k1
9589; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9590; NoVLX-NEXT:    kmovw %k0, %eax
9591; NoVLX-NEXT:    vzeroupper
9592; NoVLX-NEXT:    retq
9593entry:
9594  %0 = bitcast <8 x i64> %__a to <8 x i64>
9595  %load = load i64, i64* %__b
9596  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9597  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9598  %2 = icmp sgt <8 x i64> %0, %1
9599  %3 = bitcast i8 %__u to <8 x i1>
9600  %4 = and <8 x i1> %3, %2
9601  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9602  %6 = bitcast <32 x i1> %5 to i32
9603  ret i32 %6
9604}
9605
9606
9607define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9608; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9609; VLX:       # %bb.0: # %entry
9610; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9611; VLX-NEXT:    kmovq %k0, %rax
9612; VLX-NEXT:    vzeroupper
9613; VLX-NEXT:    retq
9614;
9615; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9616; NoVLX:       # %bb.0: # %entry
9617; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
9618; NoVLX-NEXT:    kmovw %k0, %eax
9619; NoVLX-NEXT:    movzwl %ax, %eax
9620; NoVLX-NEXT:    vzeroupper
9621; NoVLX-NEXT:    retq
9622entry:
9623  %0 = bitcast <8 x i64> %__a to <8 x i64>
9624  %1 = bitcast <8 x i64> %__b to <8 x i64>
9625  %2 = icmp sgt <8 x i64> %0, %1
9626  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9627  %4 = bitcast <64 x i1> %3 to i64
9628  ret i64 %4
9629}
9630
9631define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9632; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9633; VLX:       # %bb.0: # %entry
9634; VLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9635; VLX-NEXT:    kmovq %k0, %rax
9636; VLX-NEXT:    vzeroupper
9637; VLX-NEXT:    retq
9638;
9639; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9640; NoVLX:       # %bb.0: # %entry
9641; NoVLX-NEXT:    vpcmpgtq (%rdi), %zmm0, %k0
9642; NoVLX-NEXT:    kmovw %k0, %eax
9643; NoVLX-NEXT:    movzwl %ax, %eax
9644; NoVLX-NEXT:    vzeroupper
9645; NoVLX-NEXT:    retq
9646entry:
9647  %0 = bitcast <8 x i64> %__a to <8 x i64>
9648  %load = load <8 x i64>, <8 x i64>* %__b
9649  %1 = bitcast <8 x i64> %load to <8 x i64>
9650  %2 = icmp sgt <8 x i64> %0, %1
9651  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9652  %4 = bitcast <64 x i1> %3 to i64
9653  ret i64 %4
9654}
9655
9656define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9657; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9658; VLX:       # %bb.0: # %entry
9659; VLX-NEXT:    kmovd %edi, %k1
9660; VLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9661; VLX-NEXT:    kmovq %k0, %rax
9662; VLX-NEXT:    vzeroupper
9663; VLX-NEXT:    retq
9664;
9665; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9666; NoVLX:       # %bb.0: # %entry
9667; NoVLX-NEXT:    kmovw %edi, %k1
9668; NoVLX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9669; NoVLX-NEXT:    kmovw %k0, %eax
9670; NoVLX-NEXT:    movzwl %ax, %eax
9671; NoVLX-NEXT:    vzeroupper
9672; NoVLX-NEXT:    retq
9673entry:
9674  %0 = bitcast <8 x i64> %__a to <8 x i64>
9675  %1 = bitcast <8 x i64> %__b to <8 x i64>
9676  %2 = icmp sgt <8 x i64> %0, %1
9677  %3 = bitcast i8 %__u to <8 x i1>
9678  %4 = and <8 x i1> %2, %3
9679  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9680  %6 = bitcast <64 x i1> %5 to i64
9681  ret i64 %6
9682}
9683
9684define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9685; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9686; VLX:       # %bb.0: # %entry
9687; VLX-NEXT:    kmovd %edi, %k1
9688; VLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9689; VLX-NEXT:    kmovq %k0, %rax
9690; VLX-NEXT:    vzeroupper
9691; VLX-NEXT:    retq
9692;
9693; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9694; NoVLX:       # %bb.0: # %entry
9695; NoVLX-NEXT:    kmovw %edi, %k1
9696; NoVLX-NEXT:    vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9697; NoVLX-NEXT:    kmovw %k0, %eax
9698; NoVLX-NEXT:    movzwl %ax, %eax
9699; NoVLX-NEXT:    vzeroupper
9700; NoVLX-NEXT:    retq
9701entry:
9702  %0 = bitcast <8 x i64> %__a to <8 x i64>
9703  %load = load <8 x i64>, <8 x i64>* %__b
9704  %1 = bitcast <8 x i64> %load to <8 x i64>
9705  %2 = icmp sgt <8 x i64> %0, %1
9706  %3 = bitcast i8 %__u to <8 x i1>
9707  %4 = and <8 x i1> %2, %3
9708  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9709  %6 = bitcast <64 x i1> %5 to i64
9710  ret i64 %6
9711}
9712
9713
9714define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9715; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9716; VLX:       # %bb.0: # %entry
9717; VLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9718; VLX-NEXT:    kmovq %k0, %rax
9719; VLX-NEXT:    vzeroupper
9720; VLX-NEXT:    retq
9721;
9722; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9723; NoVLX:       # %bb.0: # %entry
9724; NoVLX-NEXT:    vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9725; NoVLX-NEXT:    kmovw %k0, %eax
9726; NoVLX-NEXT:    movzwl %ax, %eax
9727; NoVLX-NEXT:    vzeroupper
9728; NoVLX-NEXT:    retq
9729entry:
9730  %0 = bitcast <8 x i64> %__a to <8 x i64>
9731  %load = load i64, i64* %__b
9732  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9733  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9734  %2 = icmp sgt <8 x i64> %0, %1
9735  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9736  %4 = bitcast <64 x i1> %3 to i64
9737  ret i64 %4
9738}
9739
9740define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9741; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9742; VLX:       # %bb.0: # %entry
9743; VLX-NEXT:    kmovd %edi, %k1
9744; VLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9745; VLX-NEXT:    kmovq %k0, %rax
9746; VLX-NEXT:    vzeroupper
9747; VLX-NEXT:    retq
9748;
9749; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9750; NoVLX:       # %bb.0: # %entry
9751; NoVLX-NEXT:    kmovw %edi, %k1
9752; NoVLX-NEXT:    vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9753; NoVLX-NEXT:    kmovw %k0, %eax
9754; NoVLX-NEXT:    movzwl %ax, %eax
9755; NoVLX-NEXT:    vzeroupper
9756; NoVLX-NEXT:    retq
9757entry:
9758  %0 = bitcast <8 x i64> %__a to <8 x i64>
9759  %load = load i64, i64* %__b
9760  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9761  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9762  %2 = icmp sgt <8 x i64> %0, %1
9763  %3 = bitcast i8 %__u to <8 x i1>
9764  %4 = and <8 x i1> %3, %2
9765  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9766  %6 = bitcast <64 x i1> %5 to i64
9767  ret i64 %6
9768}
9769
9770
9771define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9772; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9773; VLX:       # %bb.0: # %entry
9774; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0
9775; VLX-NEXT:    kmovd %k0, %eax
9776; VLX-NEXT:    retq
9777;
9778; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9779; NoVLX:       # %bb.0: # %entry
9780; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9781; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9782; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9783; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9784; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9785; NoVLX-NEXT:    kmovw %k0, %eax
9786; NoVLX-NEXT:    vzeroupper
9787; NoVLX-NEXT:    retq
9788entry:
9789  %0 = bitcast <2 x i64> %__a to <16 x i8>
9790  %1 = bitcast <2 x i64> %__b to <16 x i8>
9791  %2 = icmp sge <16 x i8> %0, %1
9792  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9793  %4 = bitcast <32 x i1> %3 to i32
9794  ret i32 %4
9795}
9796
9797define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9798; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9799; VLX:       # %bb.0: # %entry
9800; VLX-NEXT:    vpcmpnltb (%rdi), %xmm0, %k0
9801; VLX-NEXT:    kmovd %k0, %eax
9802; VLX-NEXT:    retq
9803;
9804; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9805; NoVLX:       # %bb.0: # %entry
9806; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
9807; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9808; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9809; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9810; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9811; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9812; NoVLX-NEXT:    kmovw %k0, %eax
9813; NoVLX-NEXT:    vzeroupper
9814; NoVLX-NEXT:    retq
9815entry:
9816  %0 = bitcast <2 x i64> %__a to <16 x i8>
9817  %load = load <2 x i64>, <2 x i64>* %__b
9818  %1 = bitcast <2 x i64> %load to <16 x i8>
9819  %2 = icmp sge <16 x i8> %0, %1
9820  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9821  %4 = bitcast <32 x i1> %3 to i32
9822  ret i32 %4
9823}
9824
9825define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9826; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9827; VLX:       # %bb.0: # %entry
9828; VLX-NEXT:    kmovd %edi, %k1
9829; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9830; VLX-NEXT:    kmovd %k0, %eax
9831; VLX-NEXT:    retq
9832;
9833; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9834; NoVLX:       # %bb.0: # %entry
9835; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9836; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9837; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9838; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9839; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9840; NoVLX-NEXT:    kmovw %k0, %eax
9841; NoVLX-NEXT:    andl %edi, %eax
9842; NoVLX-NEXT:    vzeroupper
9843; NoVLX-NEXT:    retq
9844entry:
9845  %0 = bitcast <2 x i64> %__a to <16 x i8>
9846  %1 = bitcast <2 x i64> %__b to <16 x i8>
9847  %2 = icmp sge <16 x i8> %0, %1
9848  %3 = bitcast i16 %__u to <16 x i1>
9849  %4 = and <16 x i1> %2, %3
9850  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9851  %6 = bitcast <32 x i1> %5 to i32
9852  ret i32 %6
9853}
9854
9855define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9856; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9857; VLX:       # %bb.0: # %entry
9858; VLX-NEXT:    kmovd %edi, %k1
9859; VLX-NEXT:    vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9860; VLX-NEXT:    kmovd %k0, %eax
9861; VLX-NEXT:    retq
9862;
9863; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9864; NoVLX:       # %bb.0: # %entry
9865; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
9866; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9867; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9868; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9869; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9870; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9871; NoVLX-NEXT:    kmovw %k0, %eax
9872; NoVLX-NEXT:    andl %edi, %eax
9873; NoVLX-NEXT:    vzeroupper
9874; NoVLX-NEXT:    retq
9875entry:
9876  %0 = bitcast <2 x i64> %__a to <16 x i8>
9877  %load = load <2 x i64>, <2 x i64>* %__b
9878  %1 = bitcast <2 x i64> %load to <16 x i8>
9879  %2 = icmp sge <16 x i8> %0, %1
9880  %3 = bitcast i16 %__u to <16 x i1>
9881  %4 = and <16 x i1> %2, %3
9882  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9883  %6 = bitcast <32 x i1> %5 to i32
9884  ret i32 %6
9885}
9886
9887
9888define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9889; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9890; VLX:       # %bb.0: # %entry
9891; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0
9892; VLX-NEXT:    kmovq %k0, %rax
9893; VLX-NEXT:    retq
9894;
9895; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9896; NoVLX:       # %bb.0: # %entry
9897; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9898; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9899; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9900; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9901; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9902; NoVLX-NEXT:    kmovw %k0, %eax
9903; NoVLX-NEXT:    movzwl %ax, %eax
9904; NoVLX-NEXT:    vzeroupper
9905; NoVLX-NEXT:    retq
9906entry:
9907  %0 = bitcast <2 x i64> %__a to <16 x i8>
9908  %1 = bitcast <2 x i64> %__b to <16 x i8>
9909  %2 = icmp sge <16 x i8> %0, %1
9910  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9911  %4 = bitcast <64 x i1> %3 to i64
9912  ret i64 %4
9913}
9914
9915define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9916; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9917; VLX:       # %bb.0: # %entry
9918; VLX-NEXT:    vpcmpnltb (%rdi), %xmm0, %k0
9919; VLX-NEXT:    kmovq %k0, %rax
9920; VLX-NEXT:    retq
9921;
9922; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9923; NoVLX:       # %bb.0: # %entry
9924; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
9925; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9926; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9927; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9928; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9929; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9930; NoVLX-NEXT:    kmovw %k0, %eax
9931; NoVLX-NEXT:    movzwl %ax, %eax
9932; NoVLX-NEXT:    vzeroupper
9933; NoVLX-NEXT:    retq
9934entry:
9935  %0 = bitcast <2 x i64> %__a to <16 x i8>
9936  %load = load <2 x i64>, <2 x i64>* %__b
9937  %1 = bitcast <2 x i64> %load to <16 x i8>
9938  %2 = icmp sge <16 x i8> %0, %1
9939  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9940  %4 = bitcast <64 x i1> %3 to i64
9941  ret i64 %4
9942}
9943
9944define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9945; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9946; VLX:       # %bb.0: # %entry
9947; VLX-NEXT:    kmovd %edi, %k1
9948; VLX-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9949; VLX-NEXT:    kmovq %k0, %rax
9950; VLX-NEXT:    retq
9951;
9952; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9953; NoVLX:       # %bb.0: # %entry
9954; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9955; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9956; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9957; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9958; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9959; NoVLX-NEXT:    kmovw %k0, %eax
9960; NoVLX-NEXT:    andl %edi, %eax
9961; NoVLX-NEXT:    vzeroupper
9962; NoVLX-NEXT:    retq
9963entry:
9964  %0 = bitcast <2 x i64> %__a to <16 x i8>
9965  %1 = bitcast <2 x i64> %__b to <16 x i8>
9966  %2 = icmp sge <16 x i8> %0, %1
9967  %3 = bitcast i16 %__u to <16 x i1>
9968  %4 = and <16 x i1> %2, %3
9969  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9970  %6 = bitcast <64 x i1> %5 to i64
9971  ret i64 %6
9972}
9973
9974define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9975; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9976; VLX:       # %bb.0: # %entry
9977; VLX-NEXT:    kmovd %edi, %k1
9978; VLX-NEXT:    vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9979; VLX-NEXT:    kmovq %k0, %rax
9980; VLX-NEXT:    retq
9981;
9982; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9983; NoVLX:       # %bb.0: # %entry
9984; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
9985; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
9986; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
9987; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
9988; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
9989; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
9990; NoVLX-NEXT:    kmovw %k0, %eax
9991; NoVLX-NEXT:    andl %edi, %eax
9992; NoVLX-NEXT:    vzeroupper
9993; NoVLX-NEXT:    retq
9994entry:
9995  %0 = bitcast <2 x i64> %__a to <16 x i8>
9996  %load = load <2 x i64>, <2 x i64>* %__b
9997  %1 = bitcast <2 x i64> %load to <16 x i8>
9998  %2 = icmp sge <16 x i8> %0, %1
9999  %3 = bitcast i16 %__u to <16 x i1>
10000  %4 = and <16 x i1> %2, %3
10001  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10002  %6 = bitcast <64 x i1> %5 to i64
10003  ret i64 %6
10004}
10005
10006
10007define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10008; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
10009; VLX:       # %bb.0: # %entry
10010; VLX-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0
10011; VLX-NEXT:    kmovq %k0, %rax
10012; VLX-NEXT:    vzeroupper
10013; VLX-NEXT:    retq
10014;
10015; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
10016; NoVLX:       # %bb.0: # %entry
10017; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
10018; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10019; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
10020; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
10021; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10022; NoVLX-NEXT:    kmovw %k0, %ecx
10023; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
10024; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
10025; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10026; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10027; NoVLX-NEXT:    kmovw %k0, %eax
10028; NoVLX-NEXT:    shll $16, %eax
10029; NoVLX-NEXT:    orl %ecx, %eax
10030; NoVLX-NEXT:    vzeroupper
10031; NoVLX-NEXT:    retq
10032entry:
10033  %0 = bitcast <4 x i64> %__a to <32 x i8>
10034  %1 = bitcast <4 x i64> %__b to <32 x i8>
10035  %2 = icmp sge <32 x i8> %0, %1
10036  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10037  %4 = bitcast <64 x i1> %3 to i64
10038  ret i64 %4
10039}
10040
10041define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10042; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
10043; VLX:       # %bb.0: # %entry
10044; VLX-NEXT:    vpcmpnltb (%rdi), %ymm0, %k0
10045; VLX-NEXT:    kmovq %k0, %rax
10046; VLX-NEXT:    vzeroupper
10047; VLX-NEXT:    retq
10048;
10049; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
10050; NoVLX:       # %bb.0: # %entry
10051; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
10052; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
10053; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10054; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
10055; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
10056; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10057; NoVLX-NEXT:    kmovw %k0, %ecx
10058; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
10059; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
10060; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10061; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10062; NoVLX-NEXT:    kmovw %k0, %eax
10063; NoVLX-NEXT:    shll $16, %eax
10064; NoVLX-NEXT:    orl %ecx, %eax
10065; NoVLX-NEXT:    vzeroupper
10066; NoVLX-NEXT:    retq
10067entry:
10068  %0 = bitcast <4 x i64> %__a to <32 x i8>
10069  %load = load <4 x i64>, <4 x i64>* %__b
10070  %1 = bitcast <4 x i64> %load to <32 x i8>
10071  %2 = icmp sge <32 x i8> %0, %1
10072  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10073  %4 = bitcast <64 x i1> %3 to i64
10074  ret i64 %4
10075}
10076
10077define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10078; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
10079; VLX:       # %bb.0: # %entry
10080; VLX-NEXT:    kmovd %edi, %k1
10081; VLX-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1}
10082; VLX-NEXT:    kmovq %k0, %rax
10083; VLX-NEXT:    vzeroupper
10084; VLX-NEXT:    retq
10085;
10086; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
10087; NoVLX:       # %bb.0: # %entry
10088; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
10089; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10090; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
10091; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
10092; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10093; NoVLX-NEXT:    kmovw %k0, %eax
10094; NoVLX-NEXT:    andl %edi, %eax
10095; NoVLX-NEXT:    shrl $16, %edi
10096; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
10097; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
10098; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10099; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10100; NoVLX-NEXT:    kmovw %k0, %ecx
10101; NoVLX-NEXT:    andl %edi, %ecx
10102; NoVLX-NEXT:    shll $16, %ecx
10103; NoVLX-NEXT:    movzwl %ax, %eax
10104; NoVLX-NEXT:    orl %ecx, %eax
10105; NoVLX-NEXT:    vzeroupper
10106; NoVLX-NEXT:    retq
10107entry:
10108  %0 = bitcast <4 x i64> %__a to <32 x i8>
10109  %1 = bitcast <4 x i64> %__b to <32 x i8>
10110  %2 = icmp sge <32 x i8> %0, %1
10111  %3 = bitcast i32 %__u to <32 x i1>
10112  %4 = and <32 x i1> %2, %3
10113  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10114  %6 = bitcast <64 x i1> %5 to i64
10115  ret i64 %6
10116}
10117
10118define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10119; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
10120; VLX:       # %bb.0: # %entry
10121; VLX-NEXT:    kmovd %edi, %k1
10122; VLX-NEXT:    vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
10123; VLX-NEXT:    kmovq %k0, %rax
10124; VLX-NEXT:    vzeroupper
10125; VLX-NEXT:    retq
10126;
10127; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
10128; NoVLX:       # %bb.0: # %entry
10129; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10130; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
10131; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10132; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
10133; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
10134; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10135; NoVLX-NEXT:    kmovw %k0, %eax
10136; NoVLX-NEXT:    andl %edi, %eax
10137; NoVLX-NEXT:    shrl $16, %edi
10138; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
10139; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
10140; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10141; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10142; NoVLX-NEXT:    kmovw %k0, %ecx
10143; NoVLX-NEXT:    andl %edi, %ecx
10144; NoVLX-NEXT:    shll $16, %ecx
10145; NoVLX-NEXT:    movzwl %ax, %eax
10146; NoVLX-NEXT:    orl %ecx, %eax
10147; NoVLX-NEXT:    vzeroupper
10148; NoVLX-NEXT:    retq
10149entry:
10150  %0 = bitcast <4 x i64> %__a to <32 x i8>
10151  %load = load <4 x i64>, <4 x i64>* %__b
10152  %1 = bitcast <4 x i64> %load to <32 x i8>
10153  %2 = icmp sge <32 x i8> %0, %1
10154  %3 = bitcast i32 %__u to <32 x i1>
10155  %4 = and <32 x i1> %2, %3
10156  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10157  %6 = bitcast <64 x i1> %5 to i64
10158  ret i64 %6
10159}
10160
10161
10162define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10163; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10164; VLX:       # %bb.0: # %entry
10165; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0
10166; VLX-NEXT:    kmovd %k0, %eax
10167; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10168; VLX-NEXT:    retq
10169;
10170; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10171; NoVLX:       # %bb.0: # %entry
10172; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10173; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10174; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10175; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10176; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10177; NoVLX-NEXT:    kmovw %k0, %eax
10178; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10179; NoVLX-NEXT:    vzeroupper
10180; NoVLX-NEXT:    retq
10181entry:
10182  %0 = bitcast <2 x i64> %__a to <8 x i16>
10183  %1 = bitcast <2 x i64> %__b to <8 x i16>
10184  %2 = icmp sge <8 x i16> %0, %1
10185  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10186  %4 = bitcast <16 x i1> %3 to i16
10187  ret i16 %4
10188}
10189
10190define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10191; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10192; VLX:       # %bb.0: # %entry
10193; VLX-NEXT:    vpcmpnltw (%rdi), %xmm0, %k0
10194; VLX-NEXT:    kmovd %k0, %eax
10195; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10196; VLX-NEXT:    retq
10197;
10198; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10199; NoVLX:       # %bb.0: # %entry
10200; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10201; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10202; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10203; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10204; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10205; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10206; NoVLX-NEXT:    kmovw %k0, %eax
10207; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10208; NoVLX-NEXT:    vzeroupper
10209; NoVLX-NEXT:    retq
10210entry:
10211  %0 = bitcast <2 x i64> %__a to <8 x i16>
10212  %load = load <2 x i64>, <2 x i64>* %__b
10213  %1 = bitcast <2 x i64> %load to <8 x i16>
10214  %2 = icmp sge <8 x i16> %0, %1
10215  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10216  %4 = bitcast <16 x i1> %3 to i16
10217  ret i16 %4
10218}
10219
10220define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10221; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10222; VLX:       # %bb.0: # %entry
10223; VLX-NEXT:    kmovd %edi, %k1
10224; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10225; VLX-NEXT:    kmovd %k0, %eax
10226; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10227; VLX-NEXT:    retq
10228;
10229; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10230; NoVLX:       # %bb.0: # %entry
10231; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10232; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10233; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10234; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10235; NoVLX-NEXT:    kmovw %edi, %k1
10236; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10237; NoVLX-NEXT:    kmovw %k0, %eax
10238; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10239; NoVLX-NEXT:    vzeroupper
10240; NoVLX-NEXT:    retq
10241entry:
10242  %0 = bitcast <2 x i64> %__a to <8 x i16>
10243  %1 = bitcast <2 x i64> %__b to <8 x i16>
10244  %2 = icmp sge <8 x i16> %0, %1
10245  %3 = bitcast i8 %__u to <8 x i1>
10246  %4 = and <8 x i1> %2, %3
10247  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10248  %6 = bitcast <16 x i1> %5 to i16
10249  ret i16 %6
10250}
10251
10252define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10253; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10254; VLX:       # %bb.0: # %entry
10255; VLX-NEXT:    kmovd %edi, %k1
10256; VLX-NEXT:    vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10257; VLX-NEXT:    kmovd %k0, %eax
10258; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
10259; VLX-NEXT:    retq
10260;
10261; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10262; NoVLX:       # %bb.0: # %entry
10263; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10264; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10265; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10266; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10267; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10268; NoVLX-NEXT:    kmovw %edi, %k1
10269; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10270; NoVLX-NEXT:    kmovw %k0, %eax
10271; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
10272; NoVLX-NEXT:    vzeroupper
10273; NoVLX-NEXT:    retq
10274entry:
10275  %0 = bitcast <2 x i64> %__a to <8 x i16>
10276  %load = load <2 x i64>, <2 x i64>* %__b
10277  %1 = bitcast <2 x i64> %load to <8 x i16>
10278  %2 = icmp sge <8 x i16> %0, %1
10279  %3 = bitcast i8 %__u to <8 x i1>
10280  %4 = and <8 x i1> %2, %3
10281  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10282  %6 = bitcast <16 x i1> %5 to i16
10283  ret i16 %6
10284}
10285
10286
10287define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10288; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10289; VLX:       # %bb.0: # %entry
10290; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0
10291; VLX-NEXT:    kmovd %k0, %eax
10292; VLX-NEXT:    retq
10293;
10294; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10295; NoVLX:       # %bb.0: # %entry
10296; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10297; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10298; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10299; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10300; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10301; NoVLX-NEXT:    kmovw %k0, %eax
10302; NoVLX-NEXT:    vzeroupper
10303; NoVLX-NEXT:    retq
10304entry:
10305  %0 = bitcast <2 x i64> %__a to <8 x i16>
10306  %1 = bitcast <2 x i64> %__b to <8 x i16>
10307  %2 = icmp sge <8 x i16> %0, %1
10308  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10309  %4 = bitcast <32 x i1> %3 to i32
10310  ret i32 %4
10311}
10312
10313define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10314; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10315; VLX:       # %bb.0: # %entry
10316; VLX-NEXT:    vpcmpnltw (%rdi), %xmm0, %k0
10317; VLX-NEXT:    kmovd %k0, %eax
10318; VLX-NEXT:    retq
10319;
10320; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10321; NoVLX:       # %bb.0: # %entry
10322; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10323; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10324; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10325; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10326; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10327; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10328; NoVLX-NEXT:    kmovw %k0, %eax
10329; NoVLX-NEXT:    vzeroupper
10330; NoVLX-NEXT:    retq
10331entry:
10332  %0 = bitcast <2 x i64> %__a to <8 x i16>
10333  %load = load <2 x i64>, <2 x i64>* %__b
10334  %1 = bitcast <2 x i64> %load to <8 x i16>
10335  %2 = icmp sge <8 x i16> %0, %1
10336  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10337  %4 = bitcast <32 x i1> %3 to i32
10338  ret i32 %4
10339}
10340
10341define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10342; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10343; VLX:       # %bb.0: # %entry
10344; VLX-NEXT:    kmovd %edi, %k1
10345; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10346; VLX-NEXT:    kmovd %k0, %eax
10347; VLX-NEXT:    retq
10348;
10349; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10350; NoVLX:       # %bb.0: # %entry
10351; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10352; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10353; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10354; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10355; NoVLX-NEXT:    kmovw %edi, %k1
10356; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10357; NoVLX-NEXT:    kmovw %k0, %eax
10358; NoVLX-NEXT:    vzeroupper
10359; NoVLX-NEXT:    retq
10360entry:
10361  %0 = bitcast <2 x i64> %__a to <8 x i16>
10362  %1 = bitcast <2 x i64> %__b to <8 x i16>
10363  %2 = icmp sge <8 x i16> %0, %1
10364  %3 = bitcast i8 %__u to <8 x i1>
10365  %4 = and <8 x i1> %2, %3
10366  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10367  %6 = bitcast <32 x i1> %5 to i32
10368  ret i32 %6
10369}
10370
10371define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10372; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10373; VLX:       # %bb.0: # %entry
10374; VLX-NEXT:    kmovd %edi, %k1
10375; VLX-NEXT:    vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10376; VLX-NEXT:    kmovd %k0, %eax
10377; VLX-NEXT:    retq
10378;
10379; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10380; NoVLX:       # %bb.0: # %entry
10381; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10382; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10383; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10384; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10385; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10386; NoVLX-NEXT:    kmovw %edi, %k1
10387; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10388; NoVLX-NEXT:    kmovw %k0, %eax
10389; NoVLX-NEXT:    vzeroupper
10390; NoVLX-NEXT:    retq
10391entry:
10392  %0 = bitcast <2 x i64> %__a to <8 x i16>
10393  %load = load <2 x i64>, <2 x i64>* %__b
10394  %1 = bitcast <2 x i64> %load to <8 x i16>
10395  %2 = icmp sge <8 x i16> %0, %1
10396  %3 = bitcast i8 %__u to <8 x i1>
10397  %4 = and <8 x i1> %2, %3
10398  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10399  %6 = bitcast <32 x i1> %5 to i32
10400  ret i32 %6
10401}
10402
10403
10404define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10405; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10406; VLX:       # %bb.0: # %entry
10407; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0
10408; VLX-NEXT:    kmovq %k0, %rax
10409; VLX-NEXT:    retq
10410;
10411; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10412; NoVLX:       # %bb.0: # %entry
10413; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10414; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10415; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10416; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10417; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10418; NoVLX-NEXT:    kmovw %k0, %eax
10419; NoVLX-NEXT:    movzwl %ax, %eax
10420; NoVLX-NEXT:    vzeroupper
10421; NoVLX-NEXT:    retq
10422entry:
10423  %0 = bitcast <2 x i64> %__a to <8 x i16>
10424  %1 = bitcast <2 x i64> %__b to <8 x i16>
10425  %2 = icmp sge <8 x i16> %0, %1
10426  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10427  %4 = bitcast <64 x i1> %3 to i64
10428  ret i64 %4
10429}
10430
10431define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10432; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10433; VLX:       # %bb.0: # %entry
10434; VLX-NEXT:    vpcmpnltw (%rdi), %xmm0, %k0
10435; VLX-NEXT:    kmovq %k0, %rax
10436; VLX-NEXT:    retq
10437;
10438; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10439; NoVLX:       # %bb.0: # %entry
10440; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10441; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10442; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10443; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10444; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10445; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
10446; NoVLX-NEXT:    kmovw %k0, %eax
10447; NoVLX-NEXT:    movzwl %ax, %eax
10448; NoVLX-NEXT:    vzeroupper
10449; NoVLX-NEXT:    retq
10450entry:
10451  %0 = bitcast <2 x i64> %__a to <8 x i16>
10452  %load = load <2 x i64>, <2 x i64>* %__b
10453  %1 = bitcast <2 x i64> %load to <8 x i16>
10454  %2 = icmp sge <8 x i16> %0, %1
10455  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10456  %4 = bitcast <64 x i1> %3 to i64
10457  ret i64 %4
10458}
10459
10460define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10461; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10462; VLX:       # %bb.0: # %entry
10463; VLX-NEXT:    kmovd %edi, %k1
10464; VLX-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10465; VLX-NEXT:    kmovq %k0, %rax
10466; VLX-NEXT:    retq
10467;
10468; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10469; NoVLX:       # %bb.0: # %entry
10470; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10471; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10472; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10473; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10474; NoVLX-NEXT:    kmovw %edi, %k1
10475; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10476; NoVLX-NEXT:    kmovw %k0, %eax
10477; NoVLX-NEXT:    movzwl %ax, %eax
10478; NoVLX-NEXT:    vzeroupper
10479; NoVLX-NEXT:    retq
10480entry:
10481  %0 = bitcast <2 x i64> %__a to <8 x i16>
10482  %1 = bitcast <2 x i64> %__b to <8 x i16>
10483  %2 = icmp sge <8 x i16> %0, %1
10484  %3 = bitcast i8 %__u to <8 x i1>
10485  %4 = and <8 x i1> %2, %3
10486  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10487  %6 = bitcast <64 x i1> %5 to i64
10488  ret i64 %6
10489}
10490
10491define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10492; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10493; VLX:       # %bb.0: # %entry
10494; VLX-NEXT:    kmovd %edi, %k1
10495; VLX-NEXT:    vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10496; VLX-NEXT:    kmovq %k0, %rax
10497; VLX-NEXT:    retq
10498;
10499; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10500; NoVLX:       # %bb.0: # %entry
10501; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
10502; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
10503; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10504; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
10505; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
10506; NoVLX-NEXT:    kmovw %edi, %k1
10507; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
10508; NoVLX-NEXT:    kmovw %k0, %eax
10509; NoVLX-NEXT:    movzwl %ax, %eax
10510; NoVLX-NEXT:    vzeroupper
10511; NoVLX-NEXT:    retq
10512entry:
10513  %0 = bitcast <2 x i64> %__a to <8 x i16>
10514  %load = load <2 x i64>, <2 x i64>* %__b
10515  %1 = bitcast <2 x i64> %load to <8 x i16>
10516  %2 = icmp sge <8 x i16> %0, %1
10517  %3 = bitcast i8 %__u to <8 x i1>
10518  %4 = and <8 x i1> %2, %3
10519  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10520  %6 = bitcast <64 x i1> %5 to i64
10521  ret i64 %6
10522}
10523
10524
10525define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10526; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10527; VLX:       # %bb.0: # %entry
10528; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0
10529; VLX-NEXT:    kmovd %k0, %eax
10530; VLX-NEXT:    vzeroupper
10531; VLX-NEXT:    retq
10532;
10533; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10534; NoVLX:       # %bb.0: # %entry
10535; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10536; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10537; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10538; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10539; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10540; NoVLX-NEXT:    kmovw %k0, %eax
10541; NoVLX-NEXT:    vzeroupper
10542; NoVLX-NEXT:    retq
10543entry:
10544  %0 = bitcast <4 x i64> %__a to <16 x i16>
10545  %1 = bitcast <4 x i64> %__b to <16 x i16>
10546  %2 = icmp sge <16 x i16> %0, %1
10547  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10548  %4 = bitcast <32 x i1> %3 to i32
10549  ret i32 %4
10550}
10551
10552define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10553; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10554; VLX:       # %bb.0: # %entry
10555; VLX-NEXT:    vpcmpnltw (%rdi), %ymm0, %k0
10556; VLX-NEXT:    kmovd %k0, %eax
10557; VLX-NEXT:    vzeroupper
10558; VLX-NEXT:    retq
10559;
10560; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10561; NoVLX:       # %bb.0: # %entry
10562; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
10563; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10564; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10565; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10566; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10567; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10568; NoVLX-NEXT:    kmovw %k0, %eax
10569; NoVLX-NEXT:    vzeroupper
10570; NoVLX-NEXT:    retq
10571entry:
10572  %0 = bitcast <4 x i64> %__a to <16 x i16>
10573  %load = load <4 x i64>, <4 x i64>* %__b
10574  %1 = bitcast <4 x i64> %load to <16 x i16>
10575  %2 = icmp sge <16 x i16> %0, %1
10576  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10577  %4 = bitcast <32 x i1> %3 to i32
10578  ret i32 %4
10579}
10580
10581define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10582; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10583; VLX:       # %bb.0: # %entry
10584; VLX-NEXT:    kmovd %edi, %k1
10585; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10586; VLX-NEXT:    kmovd %k0, %eax
10587; VLX-NEXT:    vzeroupper
10588; VLX-NEXT:    retq
10589;
10590; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10591; NoVLX:       # %bb.0: # %entry
10592; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10593; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10594; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10595; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10596; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10597; NoVLX-NEXT:    kmovw %k0, %eax
10598; NoVLX-NEXT:    andl %edi, %eax
10599; NoVLX-NEXT:    vzeroupper
10600; NoVLX-NEXT:    retq
10601entry:
10602  %0 = bitcast <4 x i64> %__a to <16 x i16>
10603  %1 = bitcast <4 x i64> %__b to <16 x i16>
10604  %2 = icmp sge <16 x i16> %0, %1
10605  %3 = bitcast i16 %__u to <16 x i1>
10606  %4 = and <16 x i1> %2, %3
10607  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10608  %6 = bitcast <32 x i1> %5 to i32
10609  ret i32 %6
10610}
10611
10612define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10613; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10614; VLX:       # %bb.0: # %entry
10615; VLX-NEXT:    kmovd %edi, %k1
10616; VLX-NEXT:    vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10617; VLX-NEXT:    kmovd %k0, %eax
10618; VLX-NEXT:    vzeroupper
10619; VLX-NEXT:    retq
10620;
10621; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10622; NoVLX:       # %bb.0: # %entry
10623; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10624; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10625; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10626; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10627; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10628; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10629; NoVLX-NEXT:    kmovw %k0, %eax
10630; NoVLX-NEXT:    andl %edi, %eax
10631; NoVLX-NEXT:    vzeroupper
10632; NoVLX-NEXT:    retq
10633entry:
10634  %0 = bitcast <4 x i64> %__a to <16 x i16>
10635  %load = load <4 x i64>, <4 x i64>* %__b
10636  %1 = bitcast <4 x i64> %load to <16 x i16>
10637  %2 = icmp sge <16 x i16> %0, %1
10638  %3 = bitcast i16 %__u to <16 x i1>
10639  %4 = and <16 x i1> %2, %3
10640  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10641  %6 = bitcast <32 x i1> %5 to i32
10642  ret i32 %6
10643}
10644
10645
10646define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10647; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10648; VLX:       # %bb.0: # %entry
10649; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0
10650; VLX-NEXT:    kmovq %k0, %rax
10651; VLX-NEXT:    vzeroupper
10652; VLX-NEXT:    retq
10653;
10654; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10655; NoVLX:       # %bb.0: # %entry
10656; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10657; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10658; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10659; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10660; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10661; NoVLX-NEXT:    kmovw %k0, %eax
10662; NoVLX-NEXT:    movzwl %ax, %eax
10663; NoVLX-NEXT:    vzeroupper
10664; NoVLX-NEXT:    retq
10665entry:
10666  %0 = bitcast <4 x i64> %__a to <16 x i16>
10667  %1 = bitcast <4 x i64> %__b to <16 x i16>
10668  %2 = icmp sge <16 x i16> %0, %1
10669  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10670  %4 = bitcast <64 x i1> %3 to i64
10671  ret i64 %4
10672}
10673
10674define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10675; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10676; VLX:       # %bb.0: # %entry
10677; VLX-NEXT:    vpcmpnltw (%rdi), %ymm0, %k0
10678; VLX-NEXT:    kmovq %k0, %rax
10679; VLX-NEXT:    vzeroupper
10680; VLX-NEXT:    retq
10681;
10682; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10683; NoVLX:       # %bb.0: # %entry
10684; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
10685; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10686; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10687; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10688; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10689; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10690; NoVLX-NEXT:    kmovw %k0, %eax
10691; NoVLX-NEXT:    movzwl %ax, %eax
10692; NoVLX-NEXT:    vzeroupper
10693; NoVLX-NEXT:    retq
10694entry:
10695  %0 = bitcast <4 x i64> %__a to <16 x i16>
10696  %load = load <4 x i64>, <4 x i64>* %__b
10697  %1 = bitcast <4 x i64> %load to <16 x i16>
10698  %2 = icmp sge <16 x i16> %0, %1
10699  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10700  %4 = bitcast <64 x i1> %3 to i64
10701  ret i64 %4
10702}
10703
10704define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10705; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10706; VLX:       # %bb.0: # %entry
10707; VLX-NEXT:    kmovd %edi, %k1
10708; VLX-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10709; VLX-NEXT:    kmovq %k0, %rax
10710; VLX-NEXT:    vzeroupper
10711; VLX-NEXT:    retq
10712;
10713; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10714; NoVLX:       # %bb.0: # %entry
10715; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10716; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10717; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10718; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10719; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10720; NoVLX-NEXT:    kmovw %k0, %eax
10721; NoVLX-NEXT:    andl %edi, %eax
10722; NoVLX-NEXT:    vzeroupper
10723; NoVLX-NEXT:    retq
10724entry:
10725  %0 = bitcast <4 x i64> %__a to <16 x i16>
10726  %1 = bitcast <4 x i64> %__b to <16 x i16>
10727  %2 = icmp sge <16 x i16> %0, %1
10728  %3 = bitcast i16 %__u to <16 x i1>
10729  %4 = and <16 x i1> %2, %3
10730  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10731  %6 = bitcast <64 x i1> %5 to i64
10732  ret i64 %6
10733}
10734
10735define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10736; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10737; VLX:       # %bb.0: # %entry
10738; VLX-NEXT:    kmovd %edi, %k1
10739; VLX-NEXT:    vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10740; VLX-NEXT:    kmovq %k0, %rax
10741; VLX-NEXT:    vzeroupper
10742; VLX-NEXT:    retq
10743;
10744; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10745; NoVLX:       # %bb.0: # %entry
10746; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10747; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10748; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10749; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10750; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10751; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10752; NoVLX-NEXT:    kmovw %k0, %eax
10753; NoVLX-NEXT:    andl %edi, %eax
10754; NoVLX-NEXT:    vzeroupper
10755; NoVLX-NEXT:    retq
10756entry:
10757  %0 = bitcast <4 x i64> %__a to <16 x i16>
10758  %load = load <4 x i64>, <4 x i64>* %__b
10759  %1 = bitcast <4 x i64> %load to <16 x i16>
10760  %2 = icmp sge <16 x i16> %0, %1
10761  %3 = bitcast i16 %__u to <16 x i1>
10762  %4 = and <16 x i1> %2, %3
10763  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10764  %6 = bitcast <64 x i1> %5 to i64
10765  ret i64 %6
10766}
10767
10768
10769define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10770; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10771; VLX:       # %bb.0: # %entry
10772; VLX-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
10773; VLX-NEXT:    kmovq %k0, %rax
10774; VLX-NEXT:    vzeroupper
10775; VLX-NEXT:    retq
10776;
10777; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10778; NoVLX:       # %bb.0: # %entry
10779; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
10780; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
10781; NoVLX-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
10782; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10783; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10784; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10785; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10786; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10787; NoVLX-NEXT:    kmovw %k0, %ecx
10788; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
10789; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm0
10790; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10791; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10792; NoVLX-NEXT:    kmovw %k0, %eax
10793; NoVLX-NEXT:    shll $16, %eax
10794; NoVLX-NEXT:    orl %ecx, %eax
10795; NoVLX-NEXT:    vzeroupper
10796; NoVLX-NEXT:    retq
10797entry:
10798  %0 = bitcast <8 x i64> %__a to <32 x i16>
10799  %1 = bitcast <8 x i64> %__b to <32 x i16>
10800  %2 = icmp sge <32 x i16> %0, %1
10801  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10802  %4 = bitcast <64 x i1> %3 to i64
10803  ret i64 %4
10804}
10805
10806define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10807; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10808; VLX:       # %bb.0: # %entry
10809; VLX-NEXT:    vpcmpnltw (%rdi), %zmm0, %k0
10810; VLX-NEXT:    kmovq %k0, %rax
10811; VLX-NEXT:    vzeroupper
10812; VLX-NEXT:    retq
10813;
10814; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10815; NoVLX:       # %bb.0: # %entry
10816; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
10817; NoVLX-NEXT:    vmovdqa (%rdi), %ymm2
10818; NoVLX-NEXT:    vmovdqa 32(%rdi), %ymm3
10819; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm3, %ymm1
10820; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm2, %ymm0
10821; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10822; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10823; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10824; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10825; NoVLX-NEXT:    kmovw %k0, %ecx
10826; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
10827; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm0
10828; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10829; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10830; NoVLX-NEXT:    kmovw %k0, %eax
10831; NoVLX-NEXT:    shll $16, %eax
10832; NoVLX-NEXT:    orl %ecx, %eax
10833; NoVLX-NEXT:    vzeroupper
10834; NoVLX-NEXT:    retq
10835entry:
10836  %0 = bitcast <8 x i64> %__a to <32 x i16>
10837  %load = load <8 x i64>, <8 x i64>* %__b
10838  %1 = bitcast <8 x i64> %load to <32 x i16>
10839  %2 = icmp sge <32 x i16> %0, %1
10840  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10841  %4 = bitcast <64 x i1> %3 to i64
10842  ret i64 %4
10843}
10844
10845define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10846; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10847; VLX:       # %bb.0: # %entry
10848; VLX-NEXT:    kmovd %edi, %k1
10849; VLX-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
10850; VLX-NEXT:    kmovq %k0, %rax
10851; VLX-NEXT:    vzeroupper
10852; VLX-NEXT:    retq
10853;
10854; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10855; NoVLX:       # %bb.0: # %entry
10856; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm2
10857; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
10858; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
10859; NoVLX-NEXT:    vpslld $31, %zmm2, %zmm2
10860; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
10861; NoVLX-NEXT:    kmovw %k0, %eax
10862; NoVLX-NEXT:    andl %edi, %eax
10863; NoVLX-NEXT:    shrl $16, %edi
10864; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
10865; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
10866; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10867; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10868; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10869; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10870; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10871; NoVLX-NEXT:    kmovw %k0, %ecx
10872; NoVLX-NEXT:    andl %edi, %ecx
10873; NoVLX-NEXT:    shll $16, %ecx
10874; NoVLX-NEXT:    movzwl %ax, %eax
10875; NoVLX-NEXT:    orl %ecx, %eax
10876; NoVLX-NEXT:    vzeroupper
10877; NoVLX-NEXT:    retq
10878entry:
10879  %0 = bitcast <8 x i64> %__a to <32 x i16>
10880  %1 = bitcast <8 x i64> %__b to <32 x i16>
10881  %2 = icmp sge <32 x i16> %0, %1
10882  %3 = bitcast i32 %__u to <32 x i1>
10883  %4 = and <32 x i1> %2, %3
10884  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10885  %6 = bitcast <64 x i1> %5 to i64
10886  ret i64 %6
10887}
10888
10889define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10890; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10891; VLX:       # %bb.0: # %entry
10892; VLX-NEXT:    kmovd %edi, %k1
10893; VLX-NEXT:    vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
10894; VLX-NEXT:    kmovq %k0, %rax
10895; VLX-NEXT:    vzeroupper
10896; VLX-NEXT:    retq
10897;
10898; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10899; NoVLX:       # %bb.0: # %entry
10900; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
10901; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm1
10902; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
10903; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
10904; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
10905; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
10906; NoVLX-NEXT:    kmovw %k0, %eax
10907; NoVLX-NEXT:    andl %edi, %eax
10908; NoVLX-NEXT:    shrl $16, %edi
10909; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
10910; NoVLX-NEXT:    vmovdqa 32(%rsi), %ymm1
10911; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
10912; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
10913; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
10914; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
10915; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
10916; NoVLX-NEXT:    kmovw %k0, %ecx
10917; NoVLX-NEXT:    andl %edi, %ecx
10918; NoVLX-NEXT:    shll $16, %ecx
10919; NoVLX-NEXT:    movzwl %ax, %eax
10920; NoVLX-NEXT:    orl %ecx, %eax
10921; NoVLX-NEXT:    vzeroupper
10922; NoVLX-NEXT:    retq
10923entry:
10924  %0 = bitcast <8 x i64> %__a to <32 x i16>
10925  %load = load <8 x i64>, <8 x i64>* %__b
10926  %1 = bitcast <8 x i64> %load to <32 x i16>
10927  %2 = icmp sge <32 x i16> %0, %1
10928  %3 = bitcast i32 %__u to <32 x i1>
10929  %4 = and <32 x i1> %2, %3
10930  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10931  %6 = bitcast <64 x i1> %5 to i64
10932  ret i64 %6
10933}
10934
10935
10936define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10937; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10938; VLX:       # %bb.0: # %entry
10939; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
10940; VLX-NEXT:    kmovd %k0, %eax
10941; VLX-NEXT:    # kill: def $al killed $al killed $eax
10942; VLX-NEXT:    retq
10943;
10944; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10945; NoVLX:       # %bb.0: # %entry
10946; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
10947; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10948; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
10949; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10950; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10951; NoVLX-NEXT:    kmovw %k0, %eax
10952; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10953; NoVLX-NEXT:    vzeroupper
10954; NoVLX-NEXT:    retq
10955entry:
10956  %0 = bitcast <2 x i64> %__a to <4 x i32>
10957  %1 = bitcast <2 x i64> %__b to <4 x i32>
10958  %2 = icmp sge <4 x i32> %0, %1
10959  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10960  %4 = bitcast <8 x i1> %3 to i8
10961  ret i8 %4
10962}
10963
10964define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10965; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10966; VLX:       # %bb.0: # %entry
10967; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
10968; VLX-NEXT:    kmovd %k0, %eax
10969; VLX-NEXT:    # kill: def $al killed $al killed $eax
10970; VLX-NEXT:    retq
10971;
10972; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10973; NoVLX:       # %bb.0: # %entry
10974; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
10975; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
10976; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
10977; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
10978; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
10979; NoVLX-NEXT:    kmovw %k0, %eax
10980; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
10981; NoVLX-NEXT:    vzeroupper
10982; NoVLX-NEXT:    retq
10983entry:
10984  %0 = bitcast <2 x i64> %__a to <4 x i32>
10985  %load = load <2 x i64>, <2 x i64>* %__b
10986  %1 = bitcast <2 x i64> %load to <4 x i32>
10987  %2 = icmp sge <4 x i32> %0, %1
10988  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10989  %4 = bitcast <8 x i1> %3 to i8
10990  ret i8 %4
10991}
10992
10993define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10994; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10995; VLX:       # %bb.0: # %entry
10996; VLX-NEXT:    kmovd %edi, %k1
10997; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10998; VLX-NEXT:    kmovd %k0, %eax
10999; VLX-NEXT:    # kill: def $al killed $al killed $eax
11000; VLX-NEXT:    retq
11001;
11002; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
11003; NoVLX:       # %bb.0: # %entry
11004; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11005; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11006; NoVLX-NEXT:    kmovw %edi, %k1
11007; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11008; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11009; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11010; NoVLX-NEXT:    kmovw %k0, %eax
11011; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
11012; NoVLX-NEXT:    vzeroupper
11013; NoVLX-NEXT:    retq
11014entry:
11015  %0 = bitcast <2 x i64> %__a to <4 x i32>
11016  %1 = bitcast <2 x i64> %__b to <4 x i32>
11017  %2 = icmp sge <4 x i32> %0, %1
11018  %3 = bitcast i8 %__u to <8 x i1>
11019  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11020  %4 = and <4 x i1> %2, %extract.i
11021  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11022  %6 = bitcast <8 x i1> %5 to i8
11023  ret i8 %6
11024}
11025
11026define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11027; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
11028; VLX:       # %bb.0: # %entry
11029; VLX-NEXT:    kmovd %edi, %k1
11030; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11031; VLX-NEXT:    kmovd %k0, %eax
11032; VLX-NEXT:    # kill: def $al killed $al killed $eax
11033; VLX-NEXT:    retq
11034;
11035; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
11036; NoVLX:       # %bb.0: # %entry
11037; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11038; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11039; NoVLX-NEXT:    kmovw %edi, %k1
11040; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11041; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11042; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11043; NoVLX-NEXT:    kmovw %k0, %eax
11044; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
11045; NoVLX-NEXT:    vzeroupper
11046; NoVLX-NEXT:    retq
11047entry:
11048  %0 = bitcast <2 x i64> %__a to <4 x i32>
11049  %load = load <2 x i64>, <2 x i64>* %__b
11050  %1 = bitcast <2 x i64> %load to <4 x i32>
11051  %2 = icmp sge <4 x i32> %0, %1
11052  %3 = bitcast i8 %__u to <8 x i1>
11053  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11054  %4 = and <4 x i1> %2, %extract.i
11055  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11056  %6 = bitcast <8 x i1> %5 to i8
11057  ret i8 %6
11058}
11059
11060
11061define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11062; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
11063; VLX:       # %bb.0: # %entry
11064; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11065; VLX-NEXT:    kmovd %k0, %eax
11066; VLX-NEXT:    # kill: def $al killed $al killed $eax
11067; VLX-NEXT:    retq
11068;
11069; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
11070; NoVLX:       # %bb.0: # %entry
11071; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11072; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
11073; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11074; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11075; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11076; NoVLX-NEXT:    kmovw %k0, %eax
11077; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
11078; NoVLX-NEXT:    vzeroupper
11079; NoVLX-NEXT:    retq
11080entry:
11081  %0 = bitcast <2 x i64> %__a to <4 x i32>
11082  %load = load i32, i32* %__b
11083  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11084  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11085  %2 = icmp sge <4 x i32> %0, %1
11086  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11087  %4 = bitcast <8 x i1> %3 to i8
11088  ret i8 %4
11089}
11090
11091define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11092; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
11093; VLX:       # %bb.0: # %entry
11094; VLX-NEXT:    kmovd %edi, %k1
11095; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11096; VLX-NEXT:    kmovd %k0, %eax
11097; VLX-NEXT:    # kill: def $al killed $al killed $eax
11098; VLX-NEXT:    retq
11099;
11100; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
11101; NoVLX:       # %bb.0: # %entry
11102; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11103; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
11104; NoVLX-NEXT:    kmovw %edi, %k1
11105; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11106; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11107; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11108; NoVLX-NEXT:    kmovw %k0, %eax
11109; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
11110; NoVLX-NEXT:    vzeroupper
11111; NoVLX-NEXT:    retq
11112entry:
11113  %0 = bitcast <2 x i64> %__a to <4 x i32>
11114  %load = load i32, i32* %__b
11115  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11116  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11117  %2 = icmp sge <4 x i32> %0, %1
11118  %3 = bitcast i8 %__u to <8 x i1>
11119  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11120  %4 = and <4 x i1> %extract.i, %2
11121  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11122  %6 = bitcast <8 x i1> %5 to i8
11123  ret i8 %6
11124}
11125
11126
11127define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11128; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
11129; VLX:       # %bb.0: # %entry
11130; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
11131; VLX-NEXT:    kmovd %k0, %eax
11132; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11133; VLX-NEXT:    retq
11134;
11135; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
11136; NoVLX:       # %bb.0: # %entry
11137; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11138; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11139; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11140; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11141; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11142; NoVLX-NEXT:    kmovw %k0, %eax
11143; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11144; NoVLX-NEXT:    vzeroupper
11145; NoVLX-NEXT:    retq
11146entry:
11147  %0 = bitcast <2 x i64> %__a to <4 x i32>
11148  %1 = bitcast <2 x i64> %__b to <4 x i32>
11149  %2 = icmp sge <4 x i32> %0, %1
11150  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11151  %4 = bitcast <16 x i1> %3 to i16
11152  ret i16 %4
11153}
11154
11155define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11156; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
11157; VLX:       # %bb.0: # %entry
11158; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
11159; VLX-NEXT:    kmovd %k0, %eax
11160; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11161; VLX-NEXT:    retq
11162;
11163; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
11164; NoVLX:       # %bb.0: # %entry
11165; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11166; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
11167; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11168; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11169; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11170; NoVLX-NEXT:    kmovw %k0, %eax
11171; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11172; NoVLX-NEXT:    vzeroupper
11173; NoVLX-NEXT:    retq
11174entry:
11175  %0 = bitcast <2 x i64> %__a to <4 x i32>
11176  %load = load <2 x i64>, <2 x i64>* %__b
11177  %1 = bitcast <2 x i64> %load to <4 x i32>
11178  %2 = icmp sge <4 x i32> %0, %1
11179  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11180  %4 = bitcast <16 x i1> %3 to i16
11181  ret i16 %4
11182}
11183
11184define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11185; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11186; VLX:       # %bb.0: # %entry
11187; VLX-NEXT:    kmovd %edi, %k1
11188; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11189; VLX-NEXT:    kmovd %k0, %eax
11190; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11191; VLX-NEXT:    retq
11192;
11193; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11194; NoVLX:       # %bb.0: # %entry
11195; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11196; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11197; NoVLX-NEXT:    kmovw %edi, %k1
11198; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11199; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11200; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11201; NoVLX-NEXT:    kmovw %k0, %eax
11202; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11203; NoVLX-NEXT:    vzeroupper
11204; NoVLX-NEXT:    retq
11205entry:
11206  %0 = bitcast <2 x i64> %__a to <4 x i32>
11207  %1 = bitcast <2 x i64> %__b to <4 x i32>
11208  %2 = icmp sge <4 x i32> %0, %1
11209  %3 = bitcast i8 %__u to <8 x i1>
11210  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11211  %4 = and <4 x i1> %2, %extract.i
11212  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11213  %6 = bitcast <16 x i1> %5 to i16
11214  ret i16 %6
11215}
11216
11217define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11218; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11219; VLX:       # %bb.0: # %entry
11220; VLX-NEXT:    kmovd %edi, %k1
11221; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11222; VLX-NEXT:    kmovd %k0, %eax
11223; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11224; VLX-NEXT:    retq
11225;
11226; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11227; NoVLX:       # %bb.0: # %entry
11228; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11229; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11230; NoVLX-NEXT:    kmovw %edi, %k1
11231; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11232; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11233; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11234; NoVLX-NEXT:    kmovw %k0, %eax
11235; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11236; NoVLX-NEXT:    vzeroupper
11237; NoVLX-NEXT:    retq
11238entry:
11239  %0 = bitcast <2 x i64> %__a to <4 x i32>
11240  %load = load <2 x i64>, <2 x i64>* %__b
11241  %1 = bitcast <2 x i64> %load to <4 x i32>
11242  %2 = icmp sge <4 x i32> %0, %1
11243  %3 = bitcast i8 %__u to <8 x i1>
11244  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11245  %4 = and <4 x i1> %2, %extract.i
11246  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11247  %6 = bitcast <16 x i1> %5 to i16
11248  ret i16 %6
11249}
11250
11251
11252define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11253; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11254; VLX:       # %bb.0: # %entry
11255; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11256; VLX-NEXT:    kmovd %k0, %eax
11257; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11258; VLX-NEXT:    retq
11259;
11260; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11261; NoVLX:       # %bb.0: # %entry
11262; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11263; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
11264; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11265; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11266; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11267; NoVLX-NEXT:    kmovw %k0, %eax
11268; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11269; NoVLX-NEXT:    vzeroupper
11270; NoVLX-NEXT:    retq
11271entry:
11272  %0 = bitcast <2 x i64> %__a to <4 x i32>
11273  %load = load i32, i32* %__b
11274  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11275  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11276  %2 = icmp sge <4 x i32> %0, %1
11277  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11278  %4 = bitcast <16 x i1> %3 to i16
11279  ret i16 %4
11280}
11281
11282define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11283; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11284; VLX:       # %bb.0: # %entry
11285; VLX-NEXT:    kmovd %edi, %k1
11286; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11287; VLX-NEXT:    kmovd %k0, %eax
11288; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11289; VLX-NEXT:    retq
11290;
11291; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11292; NoVLX:       # %bb.0: # %entry
11293; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11294; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
11295; NoVLX-NEXT:    kmovw %edi, %k1
11296; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11297; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11298; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11299; NoVLX-NEXT:    kmovw %k0, %eax
11300; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11301; NoVLX-NEXT:    vzeroupper
11302; NoVLX-NEXT:    retq
11303entry:
11304  %0 = bitcast <2 x i64> %__a to <4 x i32>
11305  %load = load i32, i32* %__b
11306  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11307  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11308  %2 = icmp sge <4 x i32> %0, %1
11309  %3 = bitcast i8 %__u to <8 x i1>
11310  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11311  %4 = and <4 x i1> %extract.i, %2
11312  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11313  %6 = bitcast <16 x i1> %5 to i16
11314  ret i16 %6
11315}
11316
11317
11318define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11319; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11320; VLX:       # %bb.0: # %entry
11321; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
11322; VLX-NEXT:    kmovd %k0, %eax
11323; VLX-NEXT:    retq
11324;
11325; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11326; NoVLX:       # %bb.0: # %entry
11327; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11328; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11329; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11330; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11331; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11332; NoVLX-NEXT:    kmovw %k0, %eax
11333; NoVLX-NEXT:    vzeroupper
11334; NoVLX-NEXT:    retq
11335entry:
11336  %0 = bitcast <2 x i64> %__a to <4 x i32>
11337  %1 = bitcast <2 x i64> %__b to <4 x i32>
11338  %2 = icmp sge <4 x i32> %0, %1
11339  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11340  %4 = bitcast <32 x i1> %3 to i32
11341  ret i32 %4
11342}
11343
11344define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11345; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11346; VLX:       # %bb.0: # %entry
11347; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
11348; VLX-NEXT:    kmovd %k0, %eax
11349; VLX-NEXT:    retq
11350;
11351; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11352; NoVLX:       # %bb.0: # %entry
11353; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11354; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
11355; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11356; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11357; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11358; NoVLX-NEXT:    kmovw %k0, %eax
11359; NoVLX-NEXT:    vzeroupper
11360; NoVLX-NEXT:    retq
11361entry:
11362  %0 = bitcast <2 x i64> %__a to <4 x i32>
11363  %load = load <2 x i64>, <2 x i64>* %__b
11364  %1 = bitcast <2 x i64> %load to <4 x i32>
11365  %2 = icmp sge <4 x i32> %0, %1
11366  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11367  %4 = bitcast <32 x i1> %3 to i32
11368  ret i32 %4
11369}
11370
11371define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11372; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11373; VLX:       # %bb.0: # %entry
11374; VLX-NEXT:    kmovd %edi, %k1
11375; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11376; VLX-NEXT:    kmovd %k0, %eax
11377; VLX-NEXT:    retq
11378;
11379; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11380; NoVLX:       # %bb.0: # %entry
11381; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11382; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11383; NoVLX-NEXT:    kmovw %edi, %k1
11384; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11385; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11386; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11387; NoVLX-NEXT:    kmovw %k0, %eax
11388; NoVLX-NEXT:    vzeroupper
11389; NoVLX-NEXT:    retq
11390entry:
11391  %0 = bitcast <2 x i64> %__a to <4 x i32>
11392  %1 = bitcast <2 x i64> %__b to <4 x i32>
11393  %2 = icmp sge <4 x i32> %0, %1
11394  %3 = bitcast i8 %__u to <8 x i1>
11395  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11396  %4 = and <4 x i1> %2, %extract.i
11397  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11398  %6 = bitcast <32 x i1> %5 to i32
11399  ret i32 %6
11400}
11401
11402define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11403; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11404; VLX:       # %bb.0: # %entry
11405; VLX-NEXT:    kmovd %edi, %k1
11406; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11407; VLX-NEXT:    kmovd %k0, %eax
11408; VLX-NEXT:    retq
11409;
11410; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11411; NoVLX:       # %bb.0: # %entry
11412; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11413; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11414; NoVLX-NEXT:    kmovw %edi, %k1
11415; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11416; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11417; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11418; NoVLX-NEXT:    kmovw %k0, %eax
11419; NoVLX-NEXT:    vzeroupper
11420; NoVLX-NEXT:    retq
11421entry:
11422  %0 = bitcast <2 x i64> %__a to <4 x i32>
11423  %load = load <2 x i64>, <2 x i64>* %__b
11424  %1 = bitcast <2 x i64> %load to <4 x i32>
11425  %2 = icmp sge <4 x i32> %0, %1
11426  %3 = bitcast i8 %__u to <8 x i1>
11427  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11428  %4 = and <4 x i1> %2, %extract.i
11429  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11430  %6 = bitcast <32 x i1> %5 to i32
11431  ret i32 %6
11432}
11433
11434
11435define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11436; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11437; VLX:       # %bb.0: # %entry
11438; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11439; VLX-NEXT:    kmovd %k0, %eax
11440; VLX-NEXT:    retq
11441;
11442; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11443; NoVLX:       # %bb.0: # %entry
11444; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11445; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
11446; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11447; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11448; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11449; NoVLX-NEXT:    kmovw %k0, %eax
11450; NoVLX-NEXT:    vzeroupper
11451; NoVLX-NEXT:    retq
11452entry:
11453  %0 = bitcast <2 x i64> %__a to <4 x i32>
11454  %load = load i32, i32* %__b
11455  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11456  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11457  %2 = icmp sge <4 x i32> %0, %1
11458  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11459  %4 = bitcast <32 x i1> %3 to i32
11460  ret i32 %4
11461}
11462
11463define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11464; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11465; VLX:       # %bb.0: # %entry
11466; VLX-NEXT:    kmovd %edi, %k1
11467; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11468; VLX-NEXT:    kmovd %k0, %eax
11469; VLX-NEXT:    retq
11470;
11471; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11472; NoVLX:       # %bb.0: # %entry
11473; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11474; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
11475; NoVLX-NEXT:    kmovw %edi, %k1
11476; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11477; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11478; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11479; NoVLX-NEXT:    kmovw %k0, %eax
11480; NoVLX-NEXT:    vzeroupper
11481; NoVLX-NEXT:    retq
11482entry:
11483  %0 = bitcast <2 x i64> %__a to <4 x i32>
11484  %load = load i32, i32* %__b
11485  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11486  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11487  %2 = icmp sge <4 x i32> %0, %1
11488  %3 = bitcast i8 %__u to <8 x i1>
11489  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11490  %4 = and <4 x i1> %extract.i, %2
11491  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11492  %6 = bitcast <32 x i1> %5 to i32
11493  ret i32 %6
11494}
11495
11496
11497define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11498; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11499; VLX:       # %bb.0: # %entry
11500; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0
11501; VLX-NEXT:    kmovq %k0, %rax
11502; VLX-NEXT:    retq
11503;
11504; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11505; NoVLX:       # %bb.0: # %entry
11506; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11507; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11508; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11509; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11510; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11511; NoVLX-NEXT:    kmovw %k0, %eax
11512; NoVLX-NEXT:    movzwl %ax, %eax
11513; NoVLX-NEXT:    vzeroupper
11514; NoVLX-NEXT:    retq
11515entry:
11516  %0 = bitcast <2 x i64> %__a to <4 x i32>
11517  %1 = bitcast <2 x i64> %__b to <4 x i32>
11518  %2 = icmp sge <4 x i32> %0, %1
11519  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11520  %4 = bitcast <64 x i1> %3 to i64
11521  ret i64 %4
11522}
11523
11524define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11525; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11526; VLX:       # %bb.0: # %entry
11527; VLX-NEXT:    vpcmpnltd (%rdi), %xmm0, %k0
11528; VLX-NEXT:    kmovq %k0, %rax
11529; VLX-NEXT:    retq
11530;
11531; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11532; NoVLX:       # %bb.0: # %entry
11533; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11534; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
11535; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11536; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11537; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11538; NoVLX-NEXT:    kmovw %k0, %eax
11539; NoVLX-NEXT:    movzwl %ax, %eax
11540; NoVLX-NEXT:    vzeroupper
11541; NoVLX-NEXT:    retq
11542entry:
11543  %0 = bitcast <2 x i64> %__a to <4 x i32>
11544  %load = load <2 x i64>, <2 x i64>* %__b
11545  %1 = bitcast <2 x i64> %load to <4 x i32>
11546  %2 = icmp sge <4 x i32> %0, %1
11547  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11548  %4 = bitcast <64 x i1> %3 to i64
11549  ret i64 %4
11550}
11551
11552define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11553; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11554; VLX:       # %bb.0: # %entry
11555; VLX-NEXT:    kmovd %edi, %k1
11556; VLX-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11557; VLX-NEXT:    kmovq %k0, %rax
11558; VLX-NEXT:    retq
11559;
11560; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11561; NoVLX:       # %bb.0: # %entry
11562; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
11563; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11564; NoVLX-NEXT:    kmovw %edi, %k1
11565; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11566; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11567; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11568; NoVLX-NEXT:    kmovw %k0, %eax
11569; NoVLX-NEXT:    movzwl %ax, %eax
11570; NoVLX-NEXT:    vzeroupper
11571; NoVLX-NEXT:    retq
11572entry:
11573  %0 = bitcast <2 x i64> %__a to <4 x i32>
11574  %1 = bitcast <2 x i64> %__b to <4 x i32>
11575  %2 = icmp sge <4 x i32> %0, %1
11576  %3 = bitcast i8 %__u to <8 x i1>
11577  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11578  %4 = and <4 x i1> %2, %extract.i
11579  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11580  %6 = bitcast <64 x i1> %5 to i64
11581  ret i64 %6
11582}
11583
11584define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11585; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11586; VLX:       # %bb.0: # %entry
11587; VLX-NEXT:    kmovd %edi, %k1
11588; VLX-NEXT:    vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11589; VLX-NEXT:    kmovq %k0, %rax
11590; VLX-NEXT:    retq
11591;
11592; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11593; NoVLX:       # %bb.0: # %entry
11594; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11595; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
11596; NoVLX-NEXT:    kmovw %edi, %k1
11597; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11598; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11599; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11600; NoVLX-NEXT:    kmovw %k0, %eax
11601; NoVLX-NEXT:    movzwl %ax, %eax
11602; NoVLX-NEXT:    vzeroupper
11603; NoVLX-NEXT:    retq
11604entry:
11605  %0 = bitcast <2 x i64> %__a to <4 x i32>
11606  %load = load <2 x i64>, <2 x i64>* %__b
11607  %1 = bitcast <2 x i64> %load to <4 x i32>
11608  %2 = icmp sge <4 x i32> %0, %1
11609  %3 = bitcast i8 %__u to <8 x i1>
11610  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11611  %4 = and <4 x i1> %2, %extract.i
11612  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11613  %6 = bitcast <64 x i1> %5 to i64
11614  ret i64 %6
11615}
11616
11617
11618define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11619; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11620; VLX:       # %bb.0: # %entry
11621; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11622; VLX-NEXT:    kmovq %k0, %rax
11623; VLX-NEXT:    retq
11624;
11625; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11626; NoVLX:       # %bb.0: # %entry
11627; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11628; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
11629; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11630; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11631; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11632; NoVLX-NEXT:    kmovw %k0, %eax
11633; NoVLX-NEXT:    movzwl %ax, %eax
11634; NoVLX-NEXT:    vzeroupper
11635; NoVLX-NEXT:    retq
11636entry:
11637  %0 = bitcast <2 x i64> %__a to <4 x i32>
11638  %load = load i32, i32* %__b
11639  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11640  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11641  %2 = icmp sge <4 x i32> %0, %1
11642  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11643  %4 = bitcast <64 x i1> %3 to i64
11644  ret i64 %4
11645}
11646
11647define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11648; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11649; VLX:       # %bb.0: # %entry
11650; VLX-NEXT:    kmovd %edi, %k1
11651; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11652; VLX-NEXT:    kmovq %k0, %rax
11653; VLX-NEXT:    retq
11654;
11655; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11656; NoVLX:       # %bb.0: # %entry
11657; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
11658; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
11659; NoVLX-NEXT:    kmovw %edi, %k1
11660; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11661; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
11662; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
11663; NoVLX-NEXT:    kmovw %k0, %eax
11664; NoVLX-NEXT:    movzwl %ax, %eax
11665; NoVLX-NEXT:    vzeroupper
11666; NoVLX-NEXT:    retq
11667entry:
11668  %0 = bitcast <2 x i64> %__a to <4 x i32>
11669  %load = load i32, i32* %__b
11670  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11671  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11672  %2 = icmp sge <4 x i32> %0, %1
11673  %3 = bitcast i8 %__u to <8 x i1>
11674  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11675  %4 = and <4 x i1> %extract.i, %2
11676  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11677  %6 = bitcast <64 x i1> %5 to i64
11678  ret i64 %6
11679}
11680
11681
11682define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11683; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11684; VLX:       # %bb.0: # %entry
11685; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0
11686; VLX-NEXT:    kmovd %k0, %eax
11687; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11688; VLX-NEXT:    vzeroupper
11689; VLX-NEXT:    retq
11690;
11691; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11692; NoVLX:       # %bb.0: # %entry
11693; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11694; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11695; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11696; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11697; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11698; NoVLX-NEXT:    kmovw %k0, %eax
11699; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11700; NoVLX-NEXT:    vzeroupper
11701; NoVLX-NEXT:    retq
11702entry:
11703  %0 = bitcast <4 x i64> %__a to <8 x i32>
11704  %1 = bitcast <4 x i64> %__b to <8 x i32>
11705  %2 = icmp sge <8 x i32> %0, %1
11706  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11707  %4 = bitcast <16 x i1> %3 to i16
11708  ret i16 %4
11709}
11710
11711define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11712; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11713; VLX:       # %bb.0: # %entry
11714; VLX-NEXT:    vpcmpnltd (%rdi), %ymm0, %k0
11715; VLX-NEXT:    kmovd %k0, %eax
11716; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11717; VLX-NEXT:    vzeroupper
11718; VLX-NEXT:    retq
11719;
11720; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11721; NoVLX:       # %bb.0: # %entry
11722; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11723; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
11724; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11725; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11726; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11727; NoVLX-NEXT:    kmovw %k0, %eax
11728; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11729; NoVLX-NEXT:    vzeroupper
11730; NoVLX-NEXT:    retq
11731entry:
11732  %0 = bitcast <4 x i64> %__a to <8 x i32>
11733  %load = load <4 x i64>, <4 x i64>* %__b
11734  %1 = bitcast <4 x i64> %load to <8 x i32>
11735  %2 = icmp sge <8 x i32> %0, %1
11736  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11737  %4 = bitcast <16 x i1> %3 to i16
11738  ret i16 %4
11739}
11740
11741define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11742; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11743; VLX:       # %bb.0: # %entry
11744; VLX-NEXT:    kmovd %edi, %k1
11745; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11746; VLX-NEXT:    kmovd %k0, %eax
11747; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11748; VLX-NEXT:    vzeroupper
11749; VLX-NEXT:    retq
11750;
11751; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11752; NoVLX:       # %bb.0: # %entry
11753; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11754; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11755; NoVLX-NEXT:    kmovw %edi, %k1
11756; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11757; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11758; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11759; NoVLX-NEXT:    kmovw %k0, %eax
11760; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11761; NoVLX-NEXT:    vzeroupper
11762; NoVLX-NEXT:    retq
11763entry:
11764  %0 = bitcast <4 x i64> %__a to <8 x i32>
11765  %1 = bitcast <4 x i64> %__b to <8 x i32>
11766  %2 = icmp sge <8 x i32> %0, %1
11767  %3 = bitcast i8 %__u to <8 x i1>
11768  %4 = and <8 x i1> %2, %3
11769  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11770  %6 = bitcast <16 x i1> %5 to i16
11771  ret i16 %6
11772}
11773
11774define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11775; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11776; VLX:       # %bb.0: # %entry
11777; VLX-NEXT:    kmovd %edi, %k1
11778; VLX-NEXT:    vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11779; VLX-NEXT:    kmovd %k0, %eax
11780; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11781; VLX-NEXT:    vzeroupper
11782; VLX-NEXT:    retq
11783;
11784; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11785; NoVLX:       # %bb.0: # %entry
11786; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11787; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
11788; NoVLX-NEXT:    kmovw %edi, %k1
11789; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11790; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11791; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11792; NoVLX-NEXT:    kmovw %k0, %eax
11793; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11794; NoVLX-NEXT:    vzeroupper
11795; NoVLX-NEXT:    retq
11796entry:
11797  %0 = bitcast <4 x i64> %__a to <8 x i32>
11798  %load = load <4 x i64>, <4 x i64>* %__b
11799  %1 = bitcast <4 x i64> %load to <8 x i32>
11800  %2 = icmp sge <8 x i32> %0, %1
11801  %3 = bitcast i8 %__u to <8 x i1>
11802  %4 = and <8 x i1> %2, %3
11803  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11804  %6 = bitcast <16 x i1> %5 to i16
11805  ret i16 %6
11806}
11807
11808
11809define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11810; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11811; VLX:       # %bb.0: # %entry
11812; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11813; VLX-NEXT:    kmovd %k0, %eax
11814; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11815; VLX-NEXT:    vzeroupper
11816; VLX-NEXT:    retq
11817;
11818; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11819; NoVLX:       # %bb.0: # %entry
11820; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11821; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
11822; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11823; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11824; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11825; NoVLX-NEXT:    kmovw %k0, %eax
11826; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11827; NoVLX-NEXT:    vzeroupper
11828; NoVLX-NEXT:    retq
11829entry:
11830  %0 = bitcast <4 x i64> %__a to <8 x i32>
11831  %load = load i32, i32* %__b
11832  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11833  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11834  %2 = icmp sge <8 x i32> %0, %1
11835  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11836  %4 = bitcast <16 x i1> %3 to i16
11837  ret i16 %4
11838}
11839
11840define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11841; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11842; VLX:       # %bb.0: # %entry
11843; VLX-NEXT:    kmovd %edi, %k1
11844; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11845; VLX-NEXT:    kmovd %k0, %eax
11846; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
11847; VLX-NEXT:    vzeroupper
11848; VLX-NEXT:    retq
11849;
11850; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11851; NoVLX:       # %bb.0: # %entry
11852; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11853; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
11854; NoVLX-NEXT:    kmovw %edi, %k1
11855; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11856; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11857; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11858; NoVLX-NEXT:    kmovw %k0, %eax
11859; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
11860; NoVLX-NEXT:    vzeroupper
11861; NoVLX-NEXT:    retq
11862entry:
11863  %0 = bitcast <4 x i64> %__a to <8 x i32>
11864  %load = load i32, i32* %__b
11865  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11866  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11867  %2 = icmp sge <8 x i32> %0, %1
11868  %3 = bitcast i8 %__u to <8 x i1>
11869  %4 = and <8 x i1> %3, %2
11870  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11871  %6 = bitcast <16 x i1> %5 to i16
11872  ret i16 %6
11873}
11874
11875
11876define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11877; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11878; VLX:       # %bb.0: # %entry
11879; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0
11880; VLX-NEXT:    kmovd %k0, %eax
11881; VLX-NEXT:    vzeroupper
11882; VLX-NEXT:    retq
11883;
11884; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11885; NoVLX:       # %bb.0: # %entry
11886; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11887; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11888; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11889; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11890; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11891; NoVLX-NEXT:    kmovw %k0, %eax
11892; NoVLX-NEXT:    vzeroupper
11893; NoVLX-NEXT:    retq
11894entry:
11895  %0 = bitcast <4 x i64> %__a to <8 x i32>
11896  %1 = bitcast <4 x i64> %__b to <8 x i32>
11897  %2 = icmp sge <8 x i32> %0, %1
11898  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11899  %4 = bitcast <32 x i1> %3 to i32
11900  ret i32 %4
11901}
11902
11903define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11904; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11905; VLX:       # %bb.0: # %entry
11906; VLX-NEXT:    vpcmpnltd (%rdi), %ymm0, %k0
11907; VLX-NEXT:    kmovd %k0, %eax
11908; VLX-NEXT:    vzeroupper
11909; VLX-NEXT:    retq
11910;
11911; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11912; NoVLX:       # %bb.0: # %entry
11913; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11914; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
11915; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
11916; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11917; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11918; NoVLX-NEXT:    kmovw %k0, %eax
11919; NoVLX-NEXT:    vzeroupper
11920; NoVLX-NEXT:    retq
11921entry:
11922  %0 = bitcast <4 x i64> %__a to <8 x i32>
11923  %load = load <4 x i64>, <4 x i64>* %__b
11924  %1 = bitcast <4 x i64> %load to <8 x i32>
11925  %2 = icmp sge <8 x i32> %0, %1
11926  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11927  %4 = bitcast <32 x i1> %3 to i32
11928  ret i32 %4
11929}
11930
11931define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11932; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11933; VLX:       # %bb.0: # %entry
11934; VLX-NEXT:    kmovd %edi, %k1
11935; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11936; VLX-NEXT:    kmovd %k0, %eax
11937; VLX-NEXT:    vzeroupper
11938; VLX-NEXT:    retq
11939;
11940; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11941; NoVLX:       # %bb.0: # %entry
11942; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
11943; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11944; NoVLX-NEXT:    kmovw %edi, %k1
11945; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11946; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11947; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11948; NoVLX-NEXT:    kmovw %k0, %eax
11949; NoVLX-NEXT:    vzeroupper
11950; NoVLX-NEXT:    retq
11951entry:
11952  %0 = bitcast <4 x i64> %__a to <8 x i32>
11953  %1 = bitcast <4 x i64> %__b to <8 x i32>
11954  %2 = icmp sge <8 x i32> %0, %1
11955  %3 = bitcast i8 %__u to <8 x i1>
11956  %4 = and <8 x i1> %2, %3
11957  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11958  %6 = bitcast <32 x i1> %5 to i32
11959  ret i32 %6
11960}
11961
11962define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11963; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11964; VLX:       # %bb.0: # %entry
11965; VLX-NEXT:    kmovd %edi, %k1
11966; VLX-NEXT:    vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11967; VLX-NEXT:    kmovd %k0, %eax
11968; VLX-NEXT:    vzeroupper
11969; VLX-NEXT:    retq
11970;
11971; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11972; NoVLX:       # %bb.0: # %entry
11973; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
11974; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
11975; NoVLX-NEXT:    kmovw %edi, %k1
11976; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11977; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
11978; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
11979; NoVLX-NEXT:    kmovw %k0, %eax
11980; NoVLX-NEXT:    vzeroupper
11981; NoVLX-NEXT:    retq
11982entry:
11983  %0 = bitcast <4 x i64> %__a to <8 x i32>
11984  %load = load <4 x i64>, <4 x i64>* %__b
11985  %1 = bitcast <4 x i64> %load to <8 x i32>
11986  %2 = icmp sge <8 x i32> %0, %1
11987  %3 = bitcast i8 %__u to <8 x i1>
11988  %4 = and <8 x i1> %2, %3
11989  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11990  %6 = bitcast <32 x i1> %5 to i32
11991  ret i32 %6
11992}
11993
11994
11995define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11996; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11997; VLX:       # %bb.0: # %entry
11998; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11999; VLX-NEXT:    kmovd %k0, %eax
12000; VLX-NEXT:    vzeroupper
12001; VLX-NEXT:    retq
12002;
12003; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
12004; NoVLX:       # %bb.0: # %entry
12005; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12006; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
12007; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12008; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12009; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12010; NoVLX-NEXT:    kmovw %k0, %eax
12011; NoVLX-NEXT:    vzeroupper
12012; NoVLX-NEXT:    retq
12013entry:
12014  %0 = bitcast <4 x i64> %__a to <8 x i32>
12015  %load = load i32, i32* %__b
12016  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12017  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12018  %2 = icmp sge <8 x i32> %0, %1
12019  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12020  %4 = bitcast <32 x i1> %3 to i32
12021  ret i32 %4
12022}
12023
12024define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
12025; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
12026; VLX:       # %bb.0: # %entry
12027; VLX-NEXT:    kmovd %edi, %k1
12028; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
12029; VLX-NEXT:    kmovd %k0, %eax
12030; VLX-NEXT:    vzeroupper
12031; VLX-NEXT:    retq
12032;
12033; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
12034; NoVLX:       # %bb.0: # %entry
12035; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12036; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
12037; NoVLX-NEXT:    kmovw %edi, %k1
12038; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12039; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12040; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12041; NoVLX-NEXT:    kmovw %k0, %eax
12042; NoVLX-NEXT:    vzeroupper
12043; NoVLX-NEXT:    retq
12044entry:
12045  %0 = bitcast <4 x i64> %__a to <8 x i32>
12046  %load = load i32, i32* %__b
12047  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12048  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12049  %2 = icmp sge <8 x i32> %0, %1
12050  %3 = bitcast i8 %__u to <8 x i1>
12051  %4 = and <8 x i1> %3, %2
12052  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12053  %6 = bitcast <32 x i1> %5 to i32
12054  ret i32 %6
12055}
12056
12057
12058define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
12059; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
12060; VLX:       # %bb.0: # %entry
12061; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0
12062; VLX-NEXT:    kmovq %k0, %rax
12063; VLX-NEXT:    vzeroupper
12064; VLX-NEXT:    retq
12065;
12066; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
12067; NoVLX:       # %bb.0: # %entry
12068; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
12069; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12070; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12071; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12072; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12073; NoVLX-NEXT:    kmovw %k0, %eax
12074; NoVLX-NEXT:    movzwl %ax, %eax
12075; NoVLX-NEXT:    vzeroupper
12076; NoVLX-NEXT:    retq
12077entry:
12078  %0 = bitcast <4 x i64> %__a to <8 x i32>
12079  %1 = bitcast <4 x i64> %__b to <8 x i32>
12080  %2 = icmp sge <8 x i32> %0, %1
12081  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12082  %4 = bitcast <64 x i1> %3 to i64
12083  ret i64 %4
12084}
12085
12086define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
12087; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
12088; VLX:       # %bb.0: # %entry
12089; VLX-NEXT:    vpcmpnltd (%rdi), %ymm0, %k0
12090; VLX-NEXT:    kmovq %k0, %rax
12091; VLX-NEXT:    vzeroupper
12092; VLX-NEXT:    retq
12093;
12094; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
12095; NoVLX:       # %bb.0: # %entry
12096; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12097; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
12098; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12099; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12100; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12101; NoVLX-NEXT:    kmovw %k0, %eax
12102; NoVLX-NEXT:    movzwl %ax, %eax
12103; NoVLX-NEXT:    vzeroupper
12104; NoVLX-NEXT:    retq
12105entry:
12106  %0 = bitcast <4 x i64> %__a to <8 x i32>
12107  %load = load <4 x i64>, <4 x i64>* %__b
12108  %1 = bitcast <4 x i64> %load to <8 x i32>
12109  %2 = icmp sge <8 x i32> %0, %1
12110  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12111  %4 = bitcast <64 x i1> %3 to i64
12112  ret i64 %4
12113}
12114
12115define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
12116; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
12117; VLX:       # %bb.0: # %entry
12118; VLX-NEXT:    kmovd %edi, %k1
12119; VLX-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
12120; VLX-NEXT:    kmovq %k0, %rax
12121; VLX-NEXT:    vzeroupper
12122; VLX-NEXT:    retq
12123;
12124; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
12125; NoVLX:       # %bb.0: # %entry
12126; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
12127; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12128; NoVLX-NEXT:    kmovw %edi, %k1
12129; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12130; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12131; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12132; NoVLX-NEXT:    kmovw %k0, %eax
12133; NoVLX-NEXT:    movzwl %ax, %eax
12134; NoVLX-NEXT:    vzeroupper
12135; NoVLX-NEXT:    retq
12136entry:
12137  %0 = bitcast <4 x i64> %__a to <8 x i32>
12138  %1 = bitcast <4 x i64> %__b to <8 x i32>
12139  %2 = icmp sge <8 x i32> %0, %1
12140  %3 = bitcast i8 %__u to <8 x i1>
12141  %4 = and <8 x i1> %2, %3
12142  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12143  %6 = bitcast <64 x i1> %5 to i64
12144  ret i64 %6
12145}
12146
12147define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
12148; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
12149; VLX:       # %bb.0: # %entry
12150; VLX-NEXT:    kmovd %edi, %k1
12151; VLX-NEXT:    vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
12152; VLX-NEXT:    kmovq %k0, %rax
12153; VLX-NEXT:    vzeroupper
12154; VLX-NEXT:    retq
12155;
12156; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
12157; NoVLX:       # %bb.0: # %entry
12158; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12159; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
12160; NoVLX-NEXT:    kmovw %edi, %k1
12161; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12162; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12163; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12164; NoVLX-NEXT:    kmovw %k0, %eax
12165; NoVLX-NEXT:    movzwl %ax, %eax
12166; NoVLX-NEXT:    vzeroupper
12167; NoVLX-NEXT:    retq
12168entry:
12169  %0 = bitcast <4 x i64> %__a to <8 x i32>
12170  %load = load <4 x i64>, <4 x i64>* %__b
12171  %1 = bitcast <4 x i64> %load to <8 x i32>
12172  %2 = icmp sge <8 x i32> %0, %1
12173  %3 = bitcast i8 %__u to <8 x i1>
12174  %4 = and <8 x i1> %2, %3
12175  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12176  %6 = bitcast <64 x i1> %5 to i64
12177  ret i64 %6
12178}
12179
12180
12181define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
12182; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
12183; VLX:       # %bb.0: # %entry
12184; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
12185; VLX-NEXT:    kmovq %k0, %rax
12186; VLX-NEXT:    vzeroupper
12187; VLX-NEXT:    retq
12188;
12189; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
12190; NoVLX:       # %bb.0: # %entry
12191; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12192; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
12193; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12194; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12195; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12196; NoVLX-NEXT:    kmovw %k0, %eax
12197; NoVLX-NEXT:    movzwl %ax, %eax
12198; NoVLX-NEXT:    vzeroupper
12199; NoVLX-NEXT:    retq
12200entry:
12201  %0 = bitcast <4 x i64> %__a to <8 x i32>
12202  %load = load i32, i32* %__b
12203  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12204  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12205  %2 = icmp sge <8 x i32> %0, %1
12206  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12207  %4 = bitcast <64 x i1> %3 to i64
12208  ret i64 %4
12209}
12210
12211define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
12212; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12213; VLX:       # %bb.0: # %entry
12214; VLX-NEXT:    kmovd %edi, %k1
12215; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
12216; VLX-NEXT:    kmovq %k0, %rax
12217; VLX-NEXT:    vzeroupper
12218; VLX-NEXT:    retq
12219;
12220; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12221; NoVLX:       # %bb.0: # %entry
12222; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
12223; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
12224; NoVLX-NEXT:    kmovw %edi, %k1
12225; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12226; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
12227; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
12228; NoVLX-NEXT:    kmovw %k0, %eax
12229; NoVLX-NEXT:    movzwl %ax, %eax
12230; NoVLX-NEXT:    vzeroupper
12231; NoVLX-NEXT:    retq
12232entry:
12233  %0 = bitcast <4 x i64> %__a to <8 x i32>
12234  %load = load i32, i32* %__b
12235  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12236  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12237  %2 = icmp sge <8 x i32> %0, %1
12238  %3 = bitcast i8 %__u to <8 x i1>
12239  %4 = and <8 x i1> %3, %2
12240  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12241  %6 = bitcast <64 x i1> %5 to i64
12242  ret i64 %6
12243}
12244
12245
12246define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12247; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12248; VLX:       # %bb.0: # %entry
12249; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12250; VLX-NEXT:    kmovd %k0, %eax
12251; VLX-NEXT:    vzeroupper
12252; VLX-NEXT:    retq
12253;
12254; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12255; NoVLX:       # %bb.0: # %entry
12256; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12257; NoVLX-NEXT:    kmovw %k0, %eax
12258; NoVLX-NEXT:    vzeroupper
12259; NoVLX-NEXT:    retq
12260entry:
12261  %0 = bitcast <8 x i64> %__a to <16 x i32>
12262  %1 = bitcast <8 x i64> %__b to <16 x i32>
12263  %2 = icmp sge <16 x i32> %0, %1
12264  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12265  %4 = bitcast <32 x i1> %3 to i32
12266  ret i32 %4
12267}
12268
12269define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12270; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12271; VLX:       # %bb.0: # %entry
12272; VLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12273; VLX-NEXT:    kmovd %k0, %eax
12274; VLX-NEXT:    vzeroupper
12275; VLX-NEXT:    retq
12276;
12277; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12278; NoVLX:       # %bb.0: # %entry
12279; NoVLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12280; NoVLX-NEXT:    kmovw %k0, %eax
12281; NoVLX-NEXT:    vzeroupper
12282; NoVLX-NEXT:    retq
12283entry:
12284  %0 = bitcast <8 x i64> %__a to <16 x i32>
12285  %load = load <8 x i64>, <8 x i64>* %__b
12286  %1 = bitcast <8 x i64> %load to <16 x i32>
12287  %2 = icmp sge <16 x i32> %0, %1
12288  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12289  %4 = bitcast <32 x i1> %3 to i32
12290  ret i32 %4
12291}
12292
12293define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12294; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12295; VLX:       # %bb.0: # %entry
12296; VLX-NEXT:    kmovd %edi, %k1
12297; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12298; VLX-NEXT:    kmovd %k0, %eax
12299; VLX-NEXT:    vzeroupper
12300; VLX-NEXT:    retq
12301;
12302; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12303; NoVLX:       # %bb.0: # %entry
12304; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12305; NoVLX-NEXT:    kmovw %k0, %eax
12306; NoVLX-NEXT:    andl %edi, %eax
12307; NoVLX-NEXT:    vzeroupper
12308; NoVLX-NEXT:    retq
12309entry:
12310  %0 = bitcast <8 x i64> %__a to <16 x i32>
12311  %1 = bitcast <8 x i64> %__b to <16 x i32>
12312  %2 = icmp sge <16 x i32> %0, %1
12313  %3 = bitcast i16 %__u to <16 x i1>
12314  %4 = and <16 x i1> %2, %3
12315  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12316  %6 = bitcast <32 x i1> %5 to i32
12317  ret i32 %6
12318}
12319
12320define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12321; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12322; VLX:       # %bb.0: # %entry
12323; VLX-NEXT:    kmovd %edi, %k1
12324; VLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12325; VLX-NEXT:    kmovd %k0, %eax
12326; VLX-NEXT:    vzeroupper
12327; VLX-NEXT:    retq
12328;
12329; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12330; NoVLX:       # %bb.0: # %entry
12331; NoVLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0
12332; NoVLX-NEXT:    kmovw %k0, %eax
12333; NoVLX-NEXT:    andl %edi, %eax
12334; NoVLX-NEXT:    vzeroupper
12335; NoVLX-NEXT:    retq
12336entry:
12337  %0 = bitcast <8 x i64> %__a to <16 x i32>
12338  %load = load <8 x i64>, <8 x i64>* %__b
12339  %1 = bitcast <8 x i64> %load to <16 x i32>
12340  %2 = icmp sge <16 x i32> %0, %1
12341  %3 = bitcast i16 %__u to <16 x i1>
12342  %4 = and <16 x i1> %2, %3
12343  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12344  %6 = bitcast <32 x i1> %5 to i32
12345  ret i32 %6
12346}
12347
12348
12349define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12350; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12351; VLX:       # %bb.0: # %entry
12352; VLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12353; VLX-NEXT:    kmovd %k0, %eax
12354; VLX-NEXT:    vzeroupper
12355; VLX-NEXT:    retq
12356;
12357; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12358; NoVLX:       # %bb.0: # %entry
12359; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12360; NoVLX-NEXT:    kmovw %k0, %eax
12361; NoVLX-NEXT:    vzeroupper
12362; NoVLX-NEXT:    retq
12363entry:
12364  %0 = bitcast <8 x i64> %__a to <16 x i32>
12365  %load = load i32, i32* %__b
12366  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12367  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12368  %2 = icmp sge <16 x i32> %0, %1
12369  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12370  %4 = bitcast <32 x i1> %3 to i32
12371  ret i32 %4
12372}
12373
12374define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12375; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12376; VLX:       # %bb.0: # %entry
12377; VLX-NEXT:    kmovd %edi, %k1
12378; VLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12379; VLX-NEXT:    kmovd %k0, %eax
12380; VLX-NEXT:    vzeroupper
12381; VLX-NEXT:    retq
12382;
12383; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12384; NoVLX:       # %bb.0: # %entry
12385; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12386; NoVLX-NEXT:    kmovw %k0, %eax
12387; NoVLX-NEXT:    andl %edi, %eax
12388; NoVLX-NEXT:    vzeroupper
12389; NoVLX-NEXT:    retq
12390entry:
12391  %0 = bitcast <8 x i64> %__a to <16 x i32>
12392  %load = load i32, i32* %__b
12393  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12394  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12395  %2 = icmp sge <16 x i32> %0, %1
12396  %3 = bitcast i16 %__u to <16 x i1>
12397  %4 = and <16 x i1> %3, %2
12398  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12399  %6 = bitcast <32 x i1> %5 to i32
12400  ret i32 %6
12401}
12402
12403
12404define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12405; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12406; VLX:       # %bb.0: # %entry
12407; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12408; VLX-NEXT:    kmovq %k0, %rax
12409; VLX-NEXT:    vzeroupper
12410; VLX-NEXT:    retq
12411;
12412; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12413; NoVLX:       # %bb.0: # %entry
12414; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12415; NoVLX-NEXT:    kmovw %k0, %eax
12416; NoVLX-NEXT:    movzwl %ax, %eax
12417; NoVLX-NEXT:    vzeroupper
12418; NoVLX-NEXT:    retq
12419entry:
12420  %0 = bitcast <8 x i64> %__a to <16 x i32>
12421  %1 = bitcast <8 x i64> %__b to <16 x i32>
12422  %2 = icmp sge <16 x i32> %0, %1
12423  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12424  %4 = bitcast <64 x i1> %3 to i64
12425  ret i64 %4
12426}
12427
12428define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12429; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12430; VLX:       # %bb.0: # %entry
12431; VLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12432; VLX-NEXT:    kmovq %k0, %rax
12433; VLX-NEXT:    vzeroupper
12434; VLX-NEXT:    retq
12435;
12436; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12437; NoVLX:       # %bb.0: # %entry
12438; NoVLX-NEXT:    vpcmpnltd (%rdi), %zmm0, %k0
12439; NoVLX-NEXT:    kmovw %k0, %eax
12440; NoVLX-NEXT:    movzwl %ax, %eax
12441; NoVLX-NEXT:    vzeroupper
12442; NoVLX-NEXT:    retq
12443entry:
12444  %0 = bitcast <8 x i64> %__a to <16 x i32>
12445  %load = load <8 x i64>, <8 x i64>* %__b
12446  %1 = bitcast <8 x i64> %load to <16 x i32>
12447  %2 = icmp sge <16 x i32> %0, %1
12448  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12449  %4 = bitcast <64 x i1> %3 to i64
12450  ret i64 %4
12451}
12452
12453define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12454; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12455; VLX:       # %bb.0: # %entry
12456; VLX-NEXT:    kmovd %edi, %k1
12457; VLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12458; VLX-NEXT:    kmovq %k0, %rax
12459; VLX-NEXT:    vzeroupper
12460; VLX-NEXT:    retq
12461;
12462; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12463; NoVLX:       # %bb.0: # %entry
12464; NoVLX-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
12465; NoVLX-NEXT:    kmovw %k0, %eax
12466; NoVLX-NEXT:    andl %edi, %eax
12467; NoVLX-NEXT:    vzeroupper
12468; NoVLX-NEXT:    retq
12469entry:
12470  %0 = bitcast <8 x i64> %__a to <16 x i32>
12471  %1 = bitcast <8 x i64> %__b to <16 x i32>
12472  %2 = icmp sge <16 x i32> %0, %1
12473  %3 = bitcast i16 %__u to <16 x i1>
12474  %4 = and <16 x i1> %2, %3
12475  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12476  %6 = bitcast <64 x i1> %5 to i64
12477  ret i64 %6
12478}
12479
12480define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12481; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12482; VLX:       # %bb.0: # %entry
12483; VLX-NEXT:    kmovd %edi, %k1
12484; VLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12485; VLX-NEXT:    kmovq %k0, %rax
12486; VLX-NEXT:    vzeroupper
12487; VLX-NEXT:    retq
12488;
12489; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12490; NoVLX:       # %bb.0: # %entry
12491; NoVLX-NEXT:    vpcmpnltd (%rsi), %zmm0, %k0
12492; NoVLX-NEXT:    kmovw %k0, %eax
12493; NoVLX-NEXT:    andl %edi, %eax
12494; NoVLX-NEXT:    vzeroupper
12495; NoVLX-NEXT:    retq
12496entry:
12497  %0 = bitcast <8 x i64> %__a to <16 x i32>
12498  %load = load <8 x i64>, <8 x i64>* %__b
12499  %1 = bitcast <8 x i64> %load to <16 x i32>
12500  %2 = icmp sge <16 x i32> %0, %1
12501  %3 = bitcast i16 %__u to <16 x i1>
12502  %4 = and <16 x i1> %2, %3
12503  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12504  %6 = bitcast <64 x i1> %5 to i64
12505  ret i64 %6
12506}
12507
12508
12509define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12510; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12511; VLX:       # %bb.0: # %entry
12512; VLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12513; VLX-NEXT:    kmovq %k0, %rax
12514; VLX-NEXT:    vzeroupper
12515; VLX-NEXT:    retq
12516;
12517; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12518; NoVLX:       # %bb.0: # %entry
12519; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12520; NoVLX-NEXT:    kmovw %k0, %eax
12521; NoVLX-NEXT:    movzwl %ax, %eax
12522; NoVLX-NEXT:    vzeroupper
12523; NoVLX-NEXT:    retq
12524entry:
12525  %0 = bitcast <8 x i64> %__a to <16 x i32>
12526  %load = load i32, i32* %__b
12527  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12528  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12529  %2 = icmp sge <16 x i32> %0, %1
12530  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12531  %4 = bitcast <64 x i1> %3 to i64
12532  ret i64 %4
12533}
12534
12535define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12536; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12537; VLX:       # %bb.0: # %entry
12538; VLX-NEXT:    kmovd %edi, %k1
12539; VLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12540; VLX-NEXT:    kmovq %k0, %rax
12541; VLX-NEXT:    vzeroupper
12542; VLX-NEXT:    retq
12543;
12544; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12545; NoVLX:       # %bb.0: # %entry
12546; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12547; NoVLX-NEXT:    kmovw %k0, %eax
12548; NoVLX-NEXT:    andl %edi, %eax
12549; NoVLX-NEXT:    vzeroupper
12550; NoVLX-NEXT:    retq
12551entry:
12552  %0 = bitcast <8 x i64> %__a to <16 x i32>
12553  %load = load i32, i32* %__b
12554  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12555  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12556  %2 = icmp sge <16 x i32> %0, %1
12557  %3 = bitcast i16 %__u to <16 x i1>
12558  %4 = and <16 x i1> %3, %2
12559  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12560  %6 = bitcast <64 x i1> %5 to i64
12561  ret i64 %6
12562}
12563
12564
12565define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12566; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12567; VLX:       # %bb.0: # %entry
12568; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12569; VLX-NEXT:    kmovb %k0, %eax
12570; VLX-NEXT:    retq
12571;
12572; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12573; NoVLX:       # %bb.0: # %entry
12574; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12575; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12576; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12577; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12578; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12579; NoVLX-NEXT:    kmovw %k0, %eax
12580; NoVLX-NEXT:    andl $3, %eax
12581; NoVLX-NEXT:    vzeroupper
12582; NoVLX-NEXT:    retq
12583entry:
12584  %0 = bitcast <2 x i64> %__a to <2 x i64>
12585  %1 = bitcast <2 x i64> %__b to <2 x i64>
12586  %2 = icmp sge <2 x i64> %0, %1
12587  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12588  %4 = bitcast <4 x i1> %3 to i4
12589  ret i4 %4
12590}
12591
12592define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12593; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12594; VLX:       # %bb.0: # %entry
12595; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12596; VLX-NEXT:    kmovb %k0, %eax
12597; VLX-NEXT:    retq
12598;
12599; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12600; NoVLX:       # %bb.0: # %entry
12601; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12602; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12603; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12604; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12605; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12606; NoVLX-NEXT:    kmovw %k0, %eax
12607; NoVLX-NEXT:    andl $3, %eax
12608; NoVLX-NEXT:    vzeroupper
12609; NoVLX-NEXT:    retq
12610entry:
12611  %0 = bitcast <2 x i64> %__a to <2 x i64>
12612  %load = load <2 x i64>, <2 x i64>* %__b
12613  %1 = bitcast <2 x i64> %load to <2 x i64>
12614  %2 = icmp sge <2 x i64> %0, %1
12615  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12616  %4 = bitcast <4 x i1> %3 to i4
12617  ret i4 %4
12618}
12619
12620define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12621; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12622; VLX:       # %bb.0: # %entry
12623; VLX-NEXT:    kmovd %edi, %k1
12624; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12625; VLX-NEXT:    kmovb %k0, %eax
12626; VLX-NEXT:    retq
12627;
12628; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12629; NoVLX:       # %bb.0: # %entry
12630; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12631; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12632; NoVLX-NEXT:    kmovw %edi, %k1
12633; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12634; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12635; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12636; NoVLX-NEXT:    kmovw %k0, %eax
12637; NoVLX-NEXT:    andl $3, %eax
12638; NoVLX-NEXT:    vzeroupper
12639; NoVLX-NEXT:    retq
12640entry:
12641  %0 = bitcast <2 x i64> %__a to <2 x i64>
12642  %1 = bitcast <2 x i64> %__b to <2 x i64>
12643  %2 = icmp sge <2 x i64> %0, %1
12644  %3 = bitcast i8 %__u to <8 x i1>
12645  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12646  %4 = and <2 x i1> %2, %extract.i
12647  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12648  %6 = bitcast <4 x i1> %5 to i4
12649  ret i4 %6
12650}
12651
12652define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12653; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12654; VLX:       # %bb.0: # %entry
12655; VLX-NEXT:    kmovd %edi, %k1
12656; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12657; VLX-NEXT:    kmovb %k0, %eax
12658; VLX-NEXT:    retq
12659;
12660; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12661; NoVLX:       # %bb.0: # %entry
12662; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12663; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
12664; NoVLX-NEXT:    kmovw %edi, %k1
12665; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12666; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12667; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12668; NoVLX-NEXT:    kmovw %k0, %eax
12669; NoVLX-NEXT:    andl $3, %eax
12670; NoVLX-NEXT:    vzeroupper
12671; NoVLX-NEXT:    retq
12672entry:
12673  %0 = bitcast <2 x i64> %__a to <2 x i64>
12674  %load = load <2 x i64>, <2 x i64>* %__b
12675  %1 = bitcast <2 x i64> %load to <2 x i64>
12676  %2 = icmp sge <2 x i64> %0, %1
12677  %3 = bitcast i8 %__u to <8 x i1>
12678  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12679  %4 = and <2 x i1> %2, %extract.i
12680  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12681  %6 = bitcast <4 x i1> %5 to i4
12682  ret i4 %6
12683}
12684
12685
12686define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12687; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12688; VLX:       # %bb.0: # %entry
12689; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12690; VLX-NEXT:    kmovb %k0, %eax
12691; VLX-NEXT:    retq
12692;
12693; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12694; NoVLX:       # %bb.0: # %entry
12695; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12696; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
12697; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12698; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12699; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12700; NoVLX-NEXT:    kmovw %k0, %eax
12701; NoVLX-NEXT:    andl $3, %eax
12702; NoVLX-NEXT:    vzeroupper
12703; NoVLX-NEXT:    retq
12704entry:
12705  %0 = bitcast <2 x i64> %__a to <2 x i64>
12706  %load = load i64, i64* %__b
12707  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12708  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12709  %2 = icmp sge <2 x i64> %0, %1
12710  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12711  %4 = bitcast <4 x i1> %3 to i4
12712  ret i4 %4
12713}
12714
12715define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12716; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12717; VLX:       # %bb.0: # %entry
12718; VLX-NEXT:    kmovd %edi, %k1
12719; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12720; VLX-NEXT:    kmovb %k0, %eax
12721; VLX-NEXT:    retq
12722;
12723; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12724; NoVLX:       # %bb.0: # %entry
12725; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12726; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
12727; NoVLX-NEXT:    kmovw %edi, %k1
12728; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12729; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12730; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12731; NoVLX-NEXT:    kmovw %k0, %eax
12732; NoVLX-NEXT:    andl $3, %eax
12733; NoVLX-NEXT:    vzeroupper
12734; NoVLX-NEXT:    retq
12735entry:
12736  %0 = bitcast <2 x i64> %__a to <2 x i64>
12737  %load = load i64, i64* %__b
12738  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12739  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12740  %2 = icmp sge <2 x i64> %0, %1
12741  %3 = bitcast i8 %__u to <8 x i1>
12742  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12743  %4 = and <2 x i1> %extract.i, %2
12744  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12745  %6 = bitcast <4 x i1> %5 to i4
12746  ret i4 %6
12747}
12748
12749
12750define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12751; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12752; VLX:       # %bb.0: # %entry
12753; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12754; VLX-NEXT:    kmovd %k0, %eax
12755; VLX-NEXT:    # kill: def $al killed $al killed $eax
12756; VLX-NEXT:    retq
12757;
12758; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12759; NoVLX:       # %bb.0: # %entry
12760; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12761; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12762; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12763; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12764; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12765; NoVLX-NEXT:    kmovw %k0, %eax
12766; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12767; NoVLX-NEXT:    vzeroupper
12768; NoVLX-NEXT:    retq
12769entry:
12770  %0 = bitcast <2 x i64> %__a to <2 x i64>
12771  %1 = bitcast <2 x i64> %__b to <2 x i64>
12772  %2 = icmp sge <2 x i64> %0, %1
12773  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12774  %4 = bitcast <8 x i1> %3 to i8
12775  ret i8 %4
12776}
12777
12778define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12779; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12780; VLX:       # %bb.0: # %entry
12781; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12782; VLX-NEXT:    kmovd %k0, %eax
12783; VLX-NEXT:    # kill: def $al killed $al killed $eax
12784; VLX-NEXT:    retq
12785;
12786; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12787; NoVLX:       # %bb.0: # %entry
12788; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12789; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12790; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12791; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12792; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12793; NoVLX-NEXT:    kmovw %k0, %eax
12794; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12795; NoVLX-NEXT:    vzeroupper
12796; NoVLX-NEXT:    retq
12797entry:
12798  %0 = bitcast <2 x i64> %__a to <2 x i64>
12799  %load = load <2 x i64>, <2 x i64>* %__b
12800  %1 = bitcast <2 x i64> %load to <2 x i64>
12801  %2 = icmp sge <2 x i64> %0, %1
12802  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12803  %4 = bitcast <8 x i1> %3 to i8
12804  ret i8 %4
12805}
12806
12807define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12808; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12809; VLX:       # %bb.0: # %entry
12810; VLX-NEXT:    kmovd %edi, %k1
12811; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12812; VLX-NEXT:    kmovd %k0, %eax
12813; VLX-NEXT:    # kill: def $al killed $al killed $eax
12814; VLX-NEXT:    retq
12815;
12816; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12817; NoVLX:       # %bb.0: # %entry
12818; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12819; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12820; NoVLX-NEXT:    kmovw %edi, %k1
12821; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12822; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12823; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12824; NoVLX-NEXT:    kmovw %k0, %eax
12825; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12826; NoVLX-NEXT:    vzeroupper
12827; NoVLX-NEXT:    retq
12828entry:
12829  %0 = bitcast <2 x i64> %__a to <2 x i64>
12830  %1 = bitcast <2 x i64> %__b to <2 x i64>
12831  %2 = icmp sge <2 x i64> %0, %1
12832  %3 = bitcast i8 %__u to <8 x i1>
12833  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12834  %4 = and <2 x i1> %2, %extract.i
12835  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12836  %6 = bitcast <8 x i1> %5 to i8
12837  ret i8 %6
12838}
12839
12840define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12841; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12842; VLX:       # %bb.0: # %entry
12843; VLX-NEXT:    kmovd %edi, %k1
12844; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12845; VLX-NEXT:    kmovd %k0, %eax
12846; VLX-NEXT:    # kill: def $al killed $al killed $eax
12847; VLX-NEXT:    retq
12848;
12849; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12850; NoVLX:       # %bb.0: # %entry
12851; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12852; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
12853; NoVLX-NEXT:    kmovw %edi, %k1
12854; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12855; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12856; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12857; NoVLX-NEXT:    kmovw %k0, %eax
12858; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12859; NoVLX-NEXT:    vzeroupper
12860; NoVLX-NEXT:    retq
12861entry:
12862  %0 = bitcast <2 x i64> %__a to <2 x i64>
12863  %load = load <2 x i64>, <2 x i64>* %__b
12864  %1 = bitcast <2 x i64> %load to <2 x i64>
12865  %2 = icmp sge <2 x i64> %0, %1
12866  %3 = bitcast i8 %__u to <8 x i1>
12867  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12868  %4 = and <2 x i1> %2, %extract.i
12869  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12870  %6 = bitcast <8 x i1> %5 to i8
12871  ret i8 %6
12872}
12873
12874
12875define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12876; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12877; VLX:       # %bb.0: # %entry
12878; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12879; VLX-NEXT:    kmovd %k0, %eax
12880; VLX-NEXT:    # kill: def $al killed $al killed $eax
12881; VLX-NEXT:    retq
12882;
12883; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12884; NoVLX:       # %bb.0: # %entry
12885; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12886; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
12887; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12888; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12889; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12890; NoVLX-NEXT:    kmovw %k0, %eax
12891; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12892; NoVLX-NEXT:    vzeroupper
12893; NoVLX-NEXT:    retq
12894entry:
12895  %0 = bitcast <2 x i64> %__a to <2 x i64>
12896  %load = load i64, i64* %__b
12897  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12898  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12899  %2 = icmp sge <2 x i64> %0, %1
12900  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12901  %4 = bitcast <8 x i1> %3 to i8
12902  ret i8 %4
12903}
12904
12905define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12906; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12907; VLX:       # %bb.0: # %entry
12908; VLX-NEXT:    kmovd %edi, %k1
12909; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12910; VLX-NEXT:    kmovd %k0, %eax
12911; VLX-NEXT:    # kill: def $al killed $al killed $eax
12912; VLX-NEXT:    retq
12913;
12914; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12915; NoVLX:       # %bb.0: # %entry
12916; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12917; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
12918; NoVLX-NEXT:    kmovw %edi, %k1
12919; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12920; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12921; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12922; NoVLX-NEXT:    kmovw %k0, %eax
12923; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
12924; NoVLX-NEXT:    vzeroupper
12925; NoVLX-NEXT:    retq
12926entry:
12927  %0 = bitcast <2 x i64> %__a to <2 x i64>
12928  %load = load i64, i64* %__b
12929  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12930  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12931  %2 = icmp sge <2 x i64> %0, %1
12932  %3 = bitcast i8 %__u to <8 x i1>
12933  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12934  %4 = and <2 x i1> %extract.i, %2
12935  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12936  %6 = bitcast <8 x i1> %5 to i8
12937  ret i8 %6
12938}
12939
12940
12941define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12942; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12943; VLX:       # %bb.0: # %entry
12944; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
12945; VLX-NEXT:    kmovd %k0, %eax
12946; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12947; VLX-NEXT:    retq
12948;
12949; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12950; NoVLX:       # %bb.0: # %entry
12951; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
12952; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12953; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12954; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12955; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12956; NoVLX-NEXT:    kmovw %k0, %eax
12957; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12958; NoVLX-NEXT:    vzeroupper
12959; NoVLX-NEXT:    retq
12960entry:
12961  %0 = bitcast <2 x i64> %__a to <2 x i64>
12962  %1 = bitcast <2 x i64> %__b to <2 x i64>
12963  %2 = icmp sge <2 x i64> %0, %1
12964  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12965  %4 = bitcast <16 x i1> %3 to i16
12966  ret i16 %4
12967}
12968
12969define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12970; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12971; VLX:       # %bb.0: # %entry
12972; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
12973; VLX-NEXT:    kmovd %k0, %eax
12974; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
12975; VLX-NEXT:    retq
12976;
12977; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12978; NoVLX:       # %bb.0: # %entry
12979; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
12980; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
12981; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
12982; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
12983; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
12984; NoVLX-NEXT:    kmovw %k0, %eax
12985; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
12986; NoVLX-NEXT:    vzeroupper
12987; NoVLX-NEXT:    retq
12988entry:
12989  %0 = bitcast <2 x i64> %__a to <2 x i64>
12990  %load = load <2 x i64>, <2 x i64>* %__b
12991  %1 = bitcast <2 x i64> %load to <2 x i64>
12992  %2 = icmp sge <2 x i64> %0, %1
12993  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12994  %4 = bitcast <16 x i1> %3 to i16
12995  ret i16 %4
12996}
12997
12998define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12999; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
13000; VLX:       # %bb.0: # %entry
13001; VLX-NEXT:    kmovd %edi, %k1
13002; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13003; VLX-NEXT:    kmovd %k0, %eax
13004; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13005; VLX-NEXT:    retq
13006;
13007; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
13008; NoVLX:       # %bb.0: # %entry
13009; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13010; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13011; NoVLX-NEXT:    kmovw %edi, %k1
13012; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13013; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13014; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13015; NoVLX-NEXT:    kmovw %k0, %eax
13016; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13017; NoVLX-NEXT:    vzeroupper
13018; NoVLX-NEXT:    retq
13019entry:
13020  %0 = bitcast <2 x i64> %__a to <2 x i64>
13021  %1 = bitcast <2 x i64> %__b to <2 x i64>
13022  %2 = icmp sge <2 x i64> %0, %1
13023  %3 = bitcast i8 %__u to <8 x i1>
13024  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13025  %4 = and <2 x i1> %2, %extract.i
13026  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13027  %6 = bitcast <16 x i1> %5 to i16
13028  ret i16 %6
13029}
13030
13031define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13032; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
13033; VLX:       # %bb.0: # %entry
13034; VLX-NEXT:    kmovd %edi, %k1
13035; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13036; VLX-NEXT:    kmovd %k0, %eax
13037; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13038; VLX-NEXT:    retq
13039;
13040; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
13041; NoVLX:       # %bb.0: # %entry
13042; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13043; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
13044; NoVLX-NEXT:    kmovw %edi, %k1
13045; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13046; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13047; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13048; NoVLX-NEXT:    kmovw %k0, %eax
13049; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13050; NoVLX-NEXT:    vzeroupper
13051; NoVLX-NEXT:    retq
13052entry:
13053  %0 = bitcast <2 x i64> %__a to <2 x i64>
13054  %load = load <2 x i64>, <2 x i64>* %__b
13055  %1 = bitcast <2 x i64> %load to <2 x i64>
13056  %2 = icmp sge <2 x i64> %0, %1
13057  %3 = bitcast i8 %__u to <8 x i1>
13058  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13059  %4 = and <2 x i1> %2, %extract.i
13060  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13061  %6 = bitcast <16 x i1> %5 to i16
13062  ret i16 %6
13063}
13064
13065
13066define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13067; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13068; VLX:       # %bb.0: # %entry
13069; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13070; VLX-NEXT:    kmovd %k0, %eax
13071; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13072; VLX-NEXT:    retq
13073;
13074; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13075; NoVLX:       # %bb.0: # %entry
13076; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13077; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
13078; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13079; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13080; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13081; NoVLX-NEXT:    kmovw %k0, %eax
13082; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13083; NoVLX-NEXT:    vzeroupper
13084; NoVLX-NEXT:    retq
13085entry:
13086  %0 = bitcast <2 x i64> %__a to <2 x i64>
13087  %load = load i64, i64* %__b
13088  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13089  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13090  %2 = icmp sge <2 x i64> %0, %1
13091  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13092  %4 = bitcast <16 x i1> %3 to i16
13093  ret i16 %4
13094}
13095
13096define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13097; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13098; VLX:       # %bb.0: # %entry
13099; VLX-NEXT:    kmovd %edi, %k1
13100; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13101; VLX-NEXT:    kmovd %k0, %eax
13102; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13103; VLX-NEXT:    retq
13104;
13105; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13106; NoVLX:       # %bb.0: # %entry
13107; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13108; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
13109; NoVLX-NEXT:    kmovw %edi, %k1
13110; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13111; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13112; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13113; NoVLX-NEXT:    kmovw %k0, %eax
13114; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13115; NoVLX-NEXT:    vzeroupper
13116; NoVLX-NEXT:    retq
13117entry:
13118  %0 = bitcast <2 x i64> %__a to <2 x i64>
13119  %load = load i64, i64* %__b
13120  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13121  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13122  %2 = icmp sge <2 x i64> %0, %1
13123  %3 = bitcast i8 %__u to <8 x i1>
13124  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13125  %4 = and <2 x i1> %extract.i, %2
13126  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13127  %6 = bitcast <16 x i1> %5 to i16
13128  ret i16 %6
13129}
13130
13131
13132define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13133; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
13134; VLX:       # %bb.0: # %entry
13135; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
13136; VLX-NEXT:    kmovd %k0, %eax
13137; VLX-NEXT:    retq
13138;
13139; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
13140; NoVLX:       # %bb.0: # %entry
13141; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13142; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13143; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13144; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13145; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13146; NoVLX-NEXT:    kmovw %k0, %eax
13147; NoVLX-NEXT:    vzeroupper
13148; NoVLX-NEXT:    retq
13149entry:
13150  %0 = bitcast <2 x i64> %__a to <2 x i64>
13151  %1 = bitcast <2 x i64> %__b to <2 x i64>
13152  %2 = icmp sge <2 x i64> %0, %1
13153  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13154  %4 = bitcast <32 x i1> %3 to i32
13155  ret i32 %4
13156}
13157
13158define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13159; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
13160; VLX:       # %bb.0: # %entry
13161; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
13162; VLX-NEXT:    kmovd %k0, %eax
13163; VLX-NEXT:    retq
13164;
13165; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
13166; NoVLX:       # %bb.0: # %entry
13167; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13168; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
13169; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13170; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13171; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13172; NoVLX-NEXT:    kmovw %k0, %eax
13173; NoVLX-NEXT:    vzeroupper
13174; NoVLX-NEXT:    retq
13175entry:
13176  %0 = bitcast <2 x i64> %__a to <2 x i64>
13177  %load = load <2 x i64>, <2 x i64>* %__b
13178  %1 = bitcast <2 x i64> %load to <2 x i64>
13179  %2 = icmp sge <2 x i64> %0, %1
13180  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13181  %4 = bitcast <32 x i1> %3 to i32
13182  ret i32 %4
13183}
13184
13185define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13186; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
13187; VLX:       # %bb.0: # %entry
13188; VLX-NEXT:    kmovd %edi, %k1
13189; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13190; VLX-NEXT:    kmovd %k0, %eax
13191; VLX-NEXT:    retq
13192;
13193; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
13194; NoVLX:       # %bb.0: # %entry
13195; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13196; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13197; NoVLX-NEXT:    kmovw %edi, %k1
13198; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13199; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13200; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13201; NoVLX-NEXT:    kmovw %k0, %eax
13202; NoVLX-NEXT:    vzeroupper
13203; NoVLX-NEXT:    retq
13204entry:
13205  %0 = bitcast <2 x i64> %__a to <2 x i64>
13206  %1 = bitcast <2 x i64> %__b to <2 x i64>
13207  %2 = icmp sge <2 x i64> %0, %1
13208  %3 = bitcast i8 %__u to <8 x i1>
13209  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13210  %4 = and <2 x i1> %2, %extract.i
13211  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13212  %6 = bitcast <32 x i1> %5 to i32
13213  ret i32 %6
13214}
13215
13216define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13217; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
13218; VLX:       # %bb.0: # %entry
13219; VLX-NEXT:    kmovd %edi, %k1
13220; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13221; VLX-NEXT:    kmovd %k0, %eax
13222; VLX-NEXT:    retq
13223;
13224; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
13225; NoVLX:       # %bb.0: # %entry
13226; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13227; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
13228; NoVLX-NEXT:    kmovw %edi, %k1
13229; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13230; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13231; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13232; NoVLX-NEXT:    kmovw %k0, %eax
13233; NoVLX-NEXT:    vzeroupper
13234; NoVLX-NEXT:    retq
13235entry:
13236  %0 = bitcast <2 x i64> %__a to <2 x i64>
13237  %load = load <2 x i64>, <2 x i64>* %__b
13238  %1 = bitcast <2 x i64> %load to <2 x i64>
13239  %2 = icmp sge <2 x i64> %0, %1
13240  %3 = bitcast i8 %__u to <8 x i1>
13241  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13242  %4 = and <2 x i1> %2, %extract.i
13243  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13244  %6 = bitcast <32 x i1> %5 to i32
13245  ret i32 %6
13246}
13247
13248
13249define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13250; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13251; VLX:       # %bb.0: # %entry
13252; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13253; VLX-NEXT:    kmovd %k0, %eax
13254; VLX-NEXT:    retq
13255;
13256; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13257; NoVLX:       # %bb.0: # %entry
13258; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13259; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
13260; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13261; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13262; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13263; NoVLX-NEXT:    kmovw %k0, %eax
13264; NoVLX-NEXT:    vzeroupper
13265; NoVLX-NEXT:    retq
13266entry:
13267  %0 = bitcast <2 x i64> %__a to <2 x i64>
13268  %load = load i64, i64* %__b
13269  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13270  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13271  %2 = icmp sge <2 x i64> %0, %1
13272  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13273  %4 = bitcast <32 x i1> %3 to i32
13274  ret i32 %4
13275}
13276
13277define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13278; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13279; VLX:       # %bb.0: # %entry
13280; VLX-NEXT:    kmovd %edi, %k1
13281; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13282; VLX-NEXT:    kmovd %k0, %eax
13283; VLX-NEXT:    retq
13284;
13285; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13286; NoVLX:       # %bb.0: # %entry
13287; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13288; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
13289; NoVLX-NEXT:    kmovw %edi, %k1
13290; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13291; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13292; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13293; NoVLX-NEXT:    kmovw %k0, %eax
13294; NoVLX-NEXT:    vzeroupper
13295; NoVLX-NEXT:    retq
13296entry:
13297  %0 = bitcast <2 x i64> %__a to <2 x i64>
13298  %load = load i64, i64* %__b
13299  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13300  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13301  %2 = icmp sge <2 x i64> %0, %1
13302  %3 = bitcast i8 %__u to <8 x i1>
13303  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13304  %4 = and <2 x i1> %extract.i, %2
13305  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13306  %6 = bitcast <32 x i1> %5 to i32
13307  ret i32 %6
13308}
13309
13310
13311define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13312; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13313; VLX:       # %bb.0: # %entry
13314; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0
13315; VLX-NEXT:    kmovq %k0, %rax
13316; VLX-NEXT:    retq
13317;
13318; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13319; NoVLX:       # %bb.0: # %entry
13320; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13321; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13322; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13323; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13324; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13325; NoVLX-NEXT:    kmovw %k0, %eax
13326; NoVLX-NEXT:    movzwl %ax, %eax
13327; NoVLX-NEXT:    vzeroupper
13328; NoVLX-NEXT:    retq
13329entry:
13330  %0 = bitcast <2 x i64> %__a to <2 x i64>
13331  %1 = bitcast <2 x i64> %__b to <2 x i64>
13332  %2 = icmp sge <2 x i64> %0, %1
13333  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13334  %4 = bitcast <64 x i1> %3 to i64
13335  ret i64 %4
13336}
13337
13338define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13339; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13340; VLX:       # %bb.0: # %entry
13341; VLX-NEXT:    vpcmpnltq (%rdi), %xmm0, %k0
13342; VLX-NEXT:    kmovq %k0, %rax
13343; VLX-NEXT:    retq
13344;
13345; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13346; NoVLX:       # %bb.0: # %entry
13347; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13348; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
13349; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13350; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13351; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13352; NoVLX-NEXT:    kmovw %k0, %eax
13353; NoVLX-NEXT:    movzwl %ax, %eax
13354; NoVLX-NEXT:    vzeroupper
13355; NoVLX-NEXT:    retq
13356entry:
13357  %0 = bitcast <2 x i64> %__a to <2 x i64>
13358  %load = load <2 x i64>, <2 x i64>* %__b
13359  %1 = bitcast <2 x i64> %load to <2 x i64>
13360  %2 = icmp sge <2 x i64> %0, %1
13361  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13362  %4 = bitcast <64 x i1> %3 to i64
13363  ret i64 %4
13364}
13365
13366define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13367; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13368; VLX:       # %bb.0: # %entry
13369; VLX-NEXT:    kmovd %edi, %k1
13370; VLX-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13371; VLX-NEXT:    kmovq %k0, %rax
13372; VLX-NEXT:    retq
13373;
13374; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13375; NoVLX:       # %bb.0: # %entry
13376; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
13377; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13378; NoVLX-NEXT:    kmovw %edi, %k1
13379; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13380; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13381; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13382; NoVLX-NEXT:    kmovw %k0, %eax
13383; NoVLX-NEXT:    movzwl %ax, %eax
13384; NoVLX-NEXT:    vzeroupper
13385; NoVLX-NEXT:    retq
13386entry:
13387  %0 = bitcast <2 x i64> %__a to <2 x i64>
13388  %1 = bitcast <2 x i64> %__b to <2 x i64>
13389  %2 = icmp sge <2 x i64> %0, %1
13390  %3 = bitcast i8 %__u to <8 x i1>
13391  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13392  %4 = and <2 x i1> %2, %extract.i
13393  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13394  %6 = bitcast <64 x i1> %5 to i64
13395  ret i64 %6
13396}
13397
13398define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13399; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13400; VLX:       # %bb.0: # %entry
13401; VLX-NEXT:    kmovd %edi, %k1
13402; VLX-NEXT:    vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13403; VLX-NEXT:    kmovq %k0, %rax
13404; VLX-NEXT:    retq
13405;
13406; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13407; NoVLX:       # %bb.0: # %entry
13408; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13409; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
13410; NoVLX-NEXT:    kmovw %edi, %k1
13411; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13412; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13413; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13414; NoVLX-NEXT:    kmovw %k0, %eax
13415; NoVLX-NEXT:    movzwl %ax, %eax
13416; NoVLX-NEXT:    vzeroupper
13417; NoVLX-NEXT:    retq
13418entry:
13419  %0 = bitcast <2 x i64> %__a to <2 x i64>
13420  %load = load <2 x i64>, <2 x i64>* %__b
13421  %1 = bitcast <2 x i64> %load to <2 x i64>
13422  %2 = icmp sge <2 x i64> %0, %1
13423  %3 = bitcast i8 %__u to <8 x i1>
13424  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13425  %4 = and <2 x i1> %2, %extract.i
13426  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13427  %6 = bitcast <64 x i1> %5 to i64
13428  ret i64 %6
13429}
13430
13431
13432define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13433; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13434; VLX:       # %bb.0: # %entry
13435; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13436; VLX-NEXT:    kmovq %k0, %rax
13437; VLX-NEXT:    retq
13438;
13439; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13440; NoVLX:       # %bb.0: # %entry
13441; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13442; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
13443; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13444; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13445; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13446; NoVLX-NEXT:    kmovw %k0, %eax
13447; NoVLX-NEXT:    movzwl %ax, %eax
13448; NoVLX-NEXT:    vzeroupper
13449; NoVLX-NEXT:    retq
13450entry:
13451  %0 = bitcast <2 x i64> %__a to <2 x i64>
13452  %load = load i64, i64* %__b
13453  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13454  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13455  %2 = icmp sge <2 x i64> %0, %1
13456  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13457  %4 = bitcast <64 x i1> %3 to i64
13458  ret i64 %4
13459}
13460
13461define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13462; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13463; VLX:       # %bb.0: # %entry
13464; VLX-NEXT:    kmovd %edi, %k1
13465; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13466; VLX-NEXT:    kmovq %k0, %rax
13467; VLX-NEXT:    retq
13468;
13469; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13470; NoVLX:       # %bb.0: # %entry
13471; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
13472; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
13473; NoVLX-NEXT:    kmovw %edi, %k1
13474; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13475; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
13476; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
13477; NoVLX-NEXT:    kmovw %k0, %eax
13478; NoVLX-NEXT:    movzwl %ax, %eax
13479; NoVLX-NEXT:    vzeroupper
13480; NoVLX-NEXT:    retq
13481entry:
13482  %0 = bitcast <2 x i64> %__a to <2 x i64>
13483  %load = load i64, i64* %__b
13484  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13485  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13486  %2 = icmp sge <2 x i64> %0, %1
13487  %3 = bitcast i8 %__u to <8 x i1>
13488  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13489  %4 = and <2 x i1> %extract.i, %2
13490  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13491  %6 = bitcast <64 x i1> %5 to i64
13492  ret i64 %6
13493}
13494
13495
13496define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13497; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13498; VLX:       # %bb.0: # %entry
13499; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13500; VLX-NEXT:    kmovd %k0, %eax
13501; VLX-NEXT:    # kill: def $al killed $al killed $eax
13502; VLX-NEXT:    vzeroupper
13503; VLX-NEXT:    retq
13504;
13505; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13506; NoVLX:       # %bb.0: # %entry
13507; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13508; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13509; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13510; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13511; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13512; NoVLX-NEXT:    kmovw %k0, %eax
13513; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13514; NoVLX-NEXT:    vzeroupper
13515; NoVLX-NEXT:    retq
13516entry:
13517  %0 = bitcast <4 x i64> %__a to <4 x i64>
13518  %1 = bitcast <4 x i64> %__b to <4 x i64>
13519  %2 = icmp sge <4 x i64> %0, %1
13520  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13521  %4 = bitcast <8 x i1> %3 to i8
13522  ret i8 %4
13523}
13524
13525define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13526; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13527; VLX:       # %bb.0: # %entry
13528; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13529; VLX-NEXT:    kmovd %k0, %eax
13530; VLX-NEXT:    # kill: def $al killed $al killed $eax
13531; VLX-NEXT:    vzeroupper
13532; VLX-NEXT:    retq
13533;
13534; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13535; NoVLX:       # %bb.0: # %entry
13536; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13537; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13538; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13539; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13540; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13541; NoVLX-NEXT:    kmovw %k0, %eax
13542; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13543; NoVLX-NEXT:    vzeroupper
13544; NoVLX-NEXT:    retq
13545entry:
13546  %0 = bitcast <4 x i64> %__a to <4 x i64>
13547  %load = load <4 x i64>, <4 x i64>* %__b
13548  %1 = bitcast <4 x i64> %load to <4 x i64>
13549  %2 = icmp sge <4 x i64> %0, %1
13550  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13551  %4 = bitcast <8 x i1> %3 to i8
13552  ret i8 %4
13553}
13554
13555define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13556; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13557; VLX:       # %bb.0: # %entry
13558; VLX-NEXT:    kmovd %edi, %k1
13559; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13560; VLX-NEXT:    kmovd %k0, %eax
13561; VLX-NEXT:    # kill: def $al killed $al killed $eax
13562; VLX-NEXT:    vzeroupper
13563; VLX-NEXT:    retq
13564;
13565; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13566; NoVLX:       # %bb.0: # %entry
13567; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13568; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13569; NoVLX-NEXT:    kmovw %edi, %k1
13570; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13571; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13572; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13573; NoVLX-NEXT:    kmovw %k0, %eax
13574; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13575; NoVLX-NEXT:    vzeroupper
13576; NoVLX-NEXT:    retq
13577entry:
13578  %0 = bitcast <4 x i64> %__a to <4 x i64>
13579  %1 = bitcast <4 x i64> %__b to <4 x i64>
13580  %2 = icmp sge <4 x i64> %0, %1
13581  %3 = bitcast i8 %__u to <8 x i1>
13582  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13583  %4 = and <4 x i1> %2, %extract.i
13584  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13585  %6 = bitcast <8 x i1> %5 to i8
13586  ret i8 %6
13587}
13588
13589define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13590; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13591; VLX:       # %bb.0: # %entry
13592; VLX-NEXT:    kmovd %edi, %k1
13593; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13594; VLX-NEXT:    kmovd %k0, %eax
13595; VLX-NEXT:    # kill: def $al killed $al killed $eax
13596; VLX-NEXT:    vzeroupper
13597; VLX-NEXT:    retq
13598;
13599; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13600; NoVLX:       # %bb.0: # %entry
13601; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13602; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13603; NoVLX-NEXT:    kmovw %edi, %k1
13604; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13605; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13606; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13607; NoVLX-NEXT:    kmovw %k0, %eax
13608; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13609; NoVLX-NEXT:    vzeroupper
13610; NoVLX-NEXT:    retq
13611entry:
13612  %0 = bitcast <4 x i64> %__a to <4 x i64>
13613  %load = load <4 x i64>, <4 x i64>* %__b
13614  %1 = bitcast <4 x i64> %load to <4 x i64>
13615  %2 = icmp sge <4 x i64> %0, %1
13616  %3 = bitcast i8 %__u to <8 x i1>
13617  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13618  %4 = and <4 x i1> %2, %extract.i
13619  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13620  %6 = bitcast <8 x i1> %5 to i8
13621  ret i8 %6
13622}
13623
13624
13625define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13626; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13627; VLX:       # %bb.0: # %entry
13628; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13629; VLX-NEXT:    kmovd %k0, %eax
13630; VLX-NEXT:    # kill: def $al killed $al killed $eax
13631; VLX-NEXT:    vzeroupper
13632; VLX-NEXT:    retq
13633;
13634; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13635; NoVLX:       # %bb.0: # %entry
13636; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13637; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
13638; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13639; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13640; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13641; NoVLX-NEXT:    kmovw %k0, %eax
13642; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13643; NoVLX-NEXT:    vzeroupper
13644; NoVLX-NEXT:    retq
13645entry:
13646  %0 = bitcast <4 x i64> %__a to <4 x i64>
13647  %load = load i64, i64* %__b
13648  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13649  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13650  %2 = icmp sge <4 x i64> %0, %1
13651  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13652  %4 = bitcast <8 x i1> %3 to i8
13653  ret i8 %4
13654}
13655
13656define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13657; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13658; VLX:       # %bb.0: # %entry
13659; VLX-NEXT:    kmovd %edi, %k1
13660; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13661; VLX-NEXT:    kmovd %k0, %eax
13662; VLX-NEXT:    # kill: def $al killed $al killed $eax
13663; VLX-NEXT:    vzeroupper
13664; VLX-NEXT:    retq
13665;
13666; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13667; NoVLX:       # %bb.0: # %entry
13668; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13669; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
13670; NoVLX-NEXT:    kmovw %edi, %k1
13671; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13672; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13673; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13674; NoVLX-NEXT:    kmovw %k0, %eax
13675; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
13676; NoVLX-NEXT:    vzeroupper
13677; NoVLX-NEXT:    retq
13678entry:
13679  %0 = bitcast <4 x i64> %__a to <4 x i64>
13680  %load = load i64, i64* %__b
13681  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13682  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13683  %2 = icmp sge <4 x i64> %0, %1
13684  %3 = bitcast i8 %__u to <8 x i1>
13685  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13686  %4 = and <4 x i1> %extract.i, %2
13687  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13688  %6 = bitcast <8 x i1> %5 to i8
13689  ret i8 %6
13690}
13691
13692
13693define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13694; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13695; VLX:       # %bb.0: # %entry
13696; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13697; VLX-NEXT:    kmovd %k0, %eax
13698; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13699; VLX-NEXT:    vzeroupper
13700; VLX-NEXT:    retq
13701;
13702; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13703; NoVLX:       # %bb.0: # %entry
13704; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13705; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13706; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13707; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13708; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13709; NoVLX-NEXT:    kmovw %k0, %eax
13710; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13711; NoVLX-NEXT:    vzeroupper
13712; NoVLX-NEXT:    retq
13713entry:
13714  %0 = bitcast <4 x i64> %__a to <4 x i64>
13715  %1 = bitcast <4 x i64> %__b to <4 x i64>
13716  %2 = icmp sge <4 x i64> %0, %1
13717  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13718  %4 = bitcast <16 x i1> %3 to i16
13719  ret i16 %4
13720}
13721
13722define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13723; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13724; VLX:       # %bb.0: # %entry
13725; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13726; VLX-NEXT:    kmovd %k0, %eax
13727; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13728; VLX-NEXT:    vzeroupper
13729; VLX-NEXT:    retq
13730;
13731; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13732; NoVLX:       # %bb.0: # %entry
13733; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13734; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13735; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13736; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13737; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13738; NoVLX-NEXT:    kmovw %k0, %eax
13739; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13740; NoVLX-NEXT:    vzeroupper
13741; NoVLX-NEXT:    retq
13742entry:
13743  %0 = bitcast <4 x i64> %__a to <4 x i64>
13744  %load = load <4 x i64>, <4 x i64>* %__b
13745  %1 = bitcast <4 x i64> %load to <4 x i64>
13746  %2 = icmp sge <4 x i64> %0, %1
13747  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13748  %4 = bitcast <16 x i1> %3 to i16
13749  ret i16 %4
13750}
13751
13752define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13753; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13754; VLX:       # %bb.0: # %entry
13755; VLX-NEXT:    kmovd %edi, %k1
13756; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13757; VLX-NEXT:    kmovd %k0, %eax
13758; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13759; VLX-NEXT:    vzeroupper
13760; VLX-NEXT:    retq
13761;
13762; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13763; NoVLX:       # %bb.0: # %entry
13764; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13765; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13766; NoVLX-NEXT:    kmovw %edi, %k1
13767; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13768; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13769; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13770; NoVLX-NEXT:    kmovw %k0, %eax
13771; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13772; NoVLX-NEXT:    vzeroupper
13773; NoVLX-NEXT:    retq
13774entry:
13775  %0 = bitcast <4 x i64> %__a to <4 x i64>
13776  %1 = bitcast <4 x i64> %__b to <4 x i64>
13777  %2 = icmp sge <4 x i64> %0, %1
13778  %3 = bitcast i8 %__u to <8 x i1>
13779  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13780  %4 = and <4 x i1> %2, %extract.i
13781  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13782  %6 = bitcast <16 x i1> %5 to i16
13783  ret i16 %6
13784}
13785
13786define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13787; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13788; VLX:       # %bb.0: # %entry
13789; VLX-NEXT:    kmovd %edi, %k1
13790; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13791; VLX-NEXT:    kmovd %k0, %eax
13792; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13793; VLX-NEXT:    vzeroupper
13794; VLX-NEXT:    retq
13795;
13796; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13797; NoVLX:       # %bb.0: # %entry
13798; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13799; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13800; NoVLX-NEXT:    kmovw %edi, %k1
13801; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13802; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13803; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13804; NoVLX-NEXT:    kmovw %k0, %eax
13805; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13806; NoVLX-NEXT:    vzeroupper
13807; NoVLX-NEXT:    retq
13808entry:
13809  %0 = bitcast <4 x i64> %__a to <4 x i64>
13810  %load = load <4 x i64>, <4 x i64>* %__b
13811  %1 = bitcast <4 x i64> %load to <4 x i64>
13812  %2 = icmp sge <4 x i64> %0, %1
13813  %3 = bitcast i8 %__u to <8 x i1>
13814  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13815  %4 = and <4 x i1> %2, %extract.i
13816  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13817  %6 = bitcast <16 x i1> %5 to i16
13818  ret i16 %6
13819}
13820
13821
13822define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13823; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13824; VLX:       # %bb.0: # %entry
13825; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13826; VLX-NEXT:    kmovd %k0, %eax
13827; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13828; VLX-NEXT:    vzeroupper
13829; VLX-NEXT:    retq
13830;
13831; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13832; NoVLX:       # %bb.0: # %entry
13833; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13834; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
13835; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13836; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13837; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13838; NoVLX-NEXT:    kmovw %k0, %eax
13839; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13840; NoVLX-NEXT:    vzeroupper
13841; NoVLX-NEXT:    retq
13842entry:
13843  %0 = bitcast <4 x i64> %__a to <4 x i64>
13844  %load = load i64, i64* %__b
13845  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13846  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13847  %2 = icmp sge <4 x i64> %0, %1
13848  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13849  %4 = bitcast <16 x i1> %3 to i16
13850  ret i16 %4
13851}
13852
13853define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13854; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13855; VLX:       # %bb.0: # %entry
13856; VLX-NEXT:    kmovd %edi, %k1
13857; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13858; VLX-NEXT:    kmovd %k0, %eax
13859; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
13860; VLX-NEXT:    vzeroupper
13861; VLX-NEXT:    retq
13862;
13863; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13864; NoVLX:       # %bb.0: # %entry
13865; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13866; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
13867; NoVLX-NEXT:    kmovw %edi, %k1
13868; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13869; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13870; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13871; NoVLX-NEXT:    kmovw %k0, %eax
13872; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
13873; NoVLX-NEXT:    vzeroupper
13874; NoVLX-NEXT:    retq
13875entry:
13876  %0 = bitcast <4 x i64> %__a to <4 x i64>
13877  %load = load i64, i64* %__b
13878  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13879  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13880  %2 = icmp sge <4 x i64> %0, %1
13881  %3 = bitcast i8 %__u to <8 x i1>
13882  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13883  %4 = and <4 x i1> %extract.i, %2
13884  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13885  %6 = bitcast <16 x i1> %5 to i16
13886  ret i16 %6
13887}
13888
13889
13890define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13891; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13892; VLX:       # %bb.0: # %entry
13893; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
13894; VLX-NEXT:    kmovd %k0, %eax
13895; VLX-NEXT:    vzeroupper
13896; VLX-NEXT:    retq
13897;
13898; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13899; NoVLX:       # %bb.0: # %entry
13900; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13901; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13902; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13903; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13904; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13905; NoVLX-NEXT:    kmovw %k0, %eax
13906; NoVLX-NEXT:    vzeroupper
13907; NoVLX-NEXT:    retq
13908entry:
13909  %0 = bitcast <4 x i64> %__a to <4 x i64>
13910  %1 = bitcast <4 x i64> %__b to <4 x i64>
13911  %2 = icmp sge <4 x i64> %0, %1
13912  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13913  %4 = bitcast <32 x i1> %3 to i32
13914  ret i32 %4
13915}
13916
13917define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13918; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13919; VLX:       # %bb.0: # %entry
13920; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
13921; VLX-NEXT:    kmovd %k0, %eax
13922; VLX-NEXT:    vzeroupper
13923; VLX-NEXT:    retq
13924;
13925; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13926; NoVLX:       # %bb.0: # %entry
13927; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13928; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
13929; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
13930; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13931; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13932; NoVLX-NEXT:    kmovw %k0, %eax
13933; NoVLX-NEXT:    vzeroupper
13934; NoVLX-NEXT:    retq
13935entry:
13936  %0 = bitcast <4 x i64> %__a to <4 x i64>
13937  %load = load <4 x i64>, <4 x i64>* %__b
13938  %1 = bitcast <4 x i64> %load to <4 x i64>
13939  %2 = icmp sge <4 x i64> %0, %1
13940  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13941  %4 = bitcast <32 x i1> %3 to i32
13942  ret i32 %4
13943}
13944
13945define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13946; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13947; VLX:       # %bb.0: # %entry
13948; VLX-NEXT:    kmovd %edi, %k1
13949; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13950; VLX-NEXT:    kmovd %k0, %eax
13951; VLX-NEXT:    vzeroupper
13952; VLX-NEXT:    retq
13953;
13954; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13955; NoVLX:       # %bb.0: # %entry
13956; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
13957; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13958; NoVLX-NEXT:    kmovw %edi, %k1
13959; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13960; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13961; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13962; NoVLX-NEXT:    kmovw %k0, %eax
13963; NoVLX-NEXT:    vzeroupper
13964; NoVLX-NEXT:    retq
13965entry:
13966  %0 = bitcast <4 x i64> %__a to <4 x i64>
13967  %1 = bitcast <4 x i64> %__b to <4 x i64>
13968  %2 = icmp sge <4 x i64> %0, %1
13969  %3 = bitcast i8 %__u to <8 x i1>
13970  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13971  %4 = and <4 x i1> %2, %extract.i
13972  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13973  %6 = bitcast <32 x i1> %5 to i32
13974  ret i32 %6
13975}
13976
13977define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13978; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13979; VLX:       # %bb.0: # %entry
13980; VLX-NEXT:    kmovd %edi, %k1
13981; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13982; VLX-NEXT:    kmovd %k0, %eax
13983; VLX-NEXT:    vzeroupper
13984; VLX-NEXT:    retq
13985;
13986; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13987; NoVLX:       # %bb.0: # %entry
13988; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
13989; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
13990; NoVLX-NEXT:    kmovw %edi, %k1
13991; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13992; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
13993; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
13994; NoVLX-NEXT:    kmovw %k0, %eax
13995; NoVLX-NEXT:    vzeroupper
13996; NoVLX-NEXT:    retq
13997entry:
13998  %0 = bitcast <4 x i64> %__a to <4 x i64>
13999  %load = load <4 x i64>, <4 x i64>* %__b
14000  %1 = bitcast <4 x i64> %load to <4 x i64>
14001  %2 = icmp sge <4 x i64> %0, %1
14002  %3 = bitcast i8 %__u to <8 x i1>
14003  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14004  %4 = and <4 x i1> %2, %extract.i
14005  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14006  %6 = bitcast <32 x i1> %5 to i32
14007  ret i32 %6
14008}
14009
14010
14011define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
14012; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
14013; VLX:       # %bb.0: # %entry
14014; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
14015; VLX-NEXT:    kmovd %k0, %eax
14016; VLX-NEXT:    vzeroupper
14017; VLX-NEXT:    retq
14018;
14019; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
14020; NoVLX:       # %bb.0: # %entry
14021; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14022; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
14023; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14024; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14025; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14026; NoVLX-NEXT:    kmovw %k0, %eax
14027; NoVLX-NEXT:    vzeroupper
14028; NoVLX-NEXT:    retq
14029entry:
14030  %0 = bitcast <4 x i64> %__a to <4 x i64>
14031  %load = load i64, i64* %__b
14032  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14033  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14034  %2 = icmp sge <4 x i64> %0, %1
14035  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14036  %4 = bitcast <32 x i1> %3 to i32
14037  ret i32 %4
14038}
14039
14040define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
14041; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
14042; VLX:       # %bb.0: # %entry
14043; VLX-NEXT:    kmovd %edi, %k1
14044; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
14045; VLX-NEXT:    kmovd %k0, %eax
14046; VLX-NEXT:    vzeroupper
14047; VLX-NEXT:    retq
14048;
14049; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
14050; NoVLX:       # %bb.0: # %entry
14051; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14052; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
14053; NoVLX-NEXT:    kmovw %edi, %k1
14054; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14055; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14056; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14057; NoVLX-NEXT:    kmovw %k0, %eax
14058; NoVLX-NEXT:    vzeroupper
14059; NoVLX-NEXT:    retq
14060entry:
14061  %0 = bitcast <4 x i64> %__a to <4 x i64>
14062  %load = load i64, i64* %__b
14063  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14064  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14065  %2 = icmp sge <4 x i64> %0, %1
14066  %3 = bitcast i8 %__u to <8 x i1>
14067  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14068  %4 = and <4 x i1> %extract.i, %2
14069  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14070  %6 = bitcast <32 x i1> %5 to i32
14071  ret i32 %6
14072}
14073
14074
14075define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14076; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
14077; VLX:       # %bb.0: # %entry
14078; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0
14079; VLX-NEXT:    kmovq %k0, %rax
14080; VLX-NEXT:    vzeroupper
14081; VLX-NEXT:    retq
14082;
14083; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
14084; NoVLX:       # %bb.0: # %entry
14085; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
14086; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14087; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14088; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14089; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14090; NoVLX-NEXT:    kmovw %k0, %eax
14091; NoVLX-NEXT:    movzwl %ax, %eax
14092; NoVLX-NEXT:    vzeroupper
14093; NoVLX-NEXT:    retq
14094entry:
14095  %0 = bitcast <4 x i64> %__a to <4 x i64>
14096  %1 = bitcast <4 x i64> %__b to <4 x i64>
14097  %2 = icmp sge <4 x i64> %0, %1
14098  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14099  %4 = bitcast <64 x i1> %3 to i64
14100  ret i64 %4
14101}
14102
14103define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14104; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
14105; VLX:       # %bb.0: # %entry
14106; VLX-NEXT:    vpcmpnltq (%rdi), %ymm0, %k0
14107; VLX-NEXT:    kmovq %k0, %rax
14108; VLX-NEXT:    vzeroupper
14109; VLX-NEXT:    retq
14110;
14111; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
14112; NoVLX:       # %bb.0: # %entry
14113; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14114; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
14115; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14116; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14117; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14118; NoVLX-NEXT:    kmovw %k0, %eax
14119; NoVLX-NEXT:    movzwl %ax, %eax
14120; NoVLX-NEXT:    vzeroupper
14121; NoVLX-NEXT:    retq
14122entry:
14123  %0 = bitcast <4 x i64> %__a to <4 x i64>
14124  %load = load <4 x i64>, <4 x i64>* %__b
14125  %1 = bitcast <4 x i64> %load to <4 x i64>
14126  %2 = icmp sge <4 x i64> %0, %1
14127  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14128  %4 = bitcast <64 x i1> %3 to i64
14129  ret i64 %4
14130}
14131
14132define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14133; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
14134; VLX:       # %bb.0: # %entry
14135; VLX-NEXT:    kmovd %edi, %k1
14136; VLX-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
14137; VLX-NEXT:    kmovq %k0, %rax
14138; VLX-NEXT:    vzeroupper
14139; VLX-NEXT:    retq
14140;
14141; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
14142; NoVLX:       # %bb.0: # %entry
14143; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
14144; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14145; NoVLX-NEXT:    kmovw %edi, %k1
14146; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14147; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14148; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14149; NoVLX-NEXT:    kmovw %k0, %eax
14150; NoVLX-NEXT:    movzwl %ax, %eax
14151; NoVLX-NEXT:    vzeroupper
14152; NoVLX-NEXT:    retq
14153entry:
14154  %0 = bitcast <4 x i64> %__a to <4 x i64>
14155  %1 = bitcast <4 x i64> %__b to <4 x i64>
14156  %2 = icmp sge <4 x i64> %0, %1
14157  %3 = bitcast i8 %__u to <8 x i1>
14158  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14159  %4 = and <4 x i1> %2, %extract.i
14160  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14161  %6 = bitcast <64 x i1> %5 to i64
14162  ret i64 %6
14163}
14164
14165define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14166; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
14167; VLX:       # %bb.0: # %entry
14168; VLX-NEXT:    kmovd %edi, %k1
14169; VLX-NEXT:    vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
14170; VLX-NEXT:    kmovq %k0, %rax
14171; VLX-NEXT:    vzeroupper
14172; VLX-NEXT:    retq
14173;
14174; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
14175; NoVLX:       # %bb.0: # %entry
14176; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14177; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
14178; NoVLX-NEXT:    kmovw %edi, %k1
14179; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14180; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14181; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14182; NoVLX-NEXT:    kmovw %k0, %eax
14183; NoVLX-NEXT:    movzwl %ax, %eax
14184; NoVLX-NEXT:    vzeroupper
14185; NoVLX-NEXT:    retq
14186entry:
14187  %0 = bitcast <4 x i64> %__a to <4 x i64>
14188  %load = load <4 x i64>, <4 x i64>* %__b
14189  %1 = bitcast <4 x i64> %load to <4 x i64>
14190  %2 = icmp sge <4 x i64> %0, %1
14191  %3 = bitcast i8 %__u to <8 x i1>
14192  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14193  %4 = and <4 x i1> %2, %extract.i
14194  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14195  %6 = bitcast <64 x i1> %5 to i64
14196  ret i64 %6
14197}
14198
14199
14200define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
14201; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14202; VLX:       # %bb.0: # %entry
14203; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
14204; VLX-NEXT:    kmovq %k0, %rax
14205; VLX-NEXT:    vzeroupper
14206; VLX-NEXT:    retq
14207;
14208; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14209; NoVLX:       # %bb.0: # %entry
14210; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14211; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
14212; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14213; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14214; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14215; NoVLX-NEXT:    kmovw %k0, %eax
14216; NoVLX-NEXT:    movzwl %ax, %eax
14217; NoVLX-NEXT:    vzeroupper
14218; NoVLX-NEXT:    retq
14219entry:
14220  %0 = bitcast <4 x i64> %__a to <4 x i64>
14221  %load = load i64, i64* %__b
14222  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14223  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14224  %2 = icmp sge <4 x i64> %0, %1
14225  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14226  %4 = bitcast <64 x i1> %3 to i64
14227  ret i64 %4
14228}
14229
14230define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
14231; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14232; VLX:       # %bb.0: # %entry
14233; VLX-NEXT:    kmovd %edi, %k1
14234; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
14235; VLX-NEXT:    kmovq %k0, %rax
14236; VLX-NEXT:    vzeroupper
14237; VLX-NEXT:    retq
14238;
14239; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14240; NoVLX:       # %bb.0: # %entry
14241; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
14242; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
14243; NoVLX-NEXT:    kmovw %edi, %k1
14244; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14245; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
14246; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
14247; NoVLX-NEXT:    kmovw %k0, %eax
14248; NoVLX-NEXT:    movzwl %ax, %eax
14249; NoVLX-NEXT:    vzeroupper
14250; NoVLX-NEXT:    retq
14251entry:
14252  %0 = bitcast <4 x i64> %__a to <4 x i64>
14253  %load = load i64, i64* %__b
14254  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14255  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14256  %2 = icmp sge <4 x i64> %0, %1
14257  %3 = bitcast i8 %__u to <8 x i1>
14258  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14259  %4 = and <4 x i1> %extract.i, %2
14260  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14261  %6 = bitcast <64 x i1> %5 to i64
14262  ret i64 %6
14263}
14264
14265
14266define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14267; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14268; VLX:       # %bb.0: # %entry
14269; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14270; VLX-NEXT:    kmovd %k0, %eax
14271; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14272; VLX-NEXT:    vzeroupper
14273; VLX-NEXT:    retq
14274;
14275; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14276; NoVLX:       # %bb.0: # %entry
14277; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14278; NoVLX-NEXT:    kmovw %k0, %eax
14279; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14280; NoVLX-NEXT:    vzeroupper
14281; NoVLX-NEXT:    retq
14282entry:
14283  %0 = bitcast <8 x i64> %__a to <8 x i64>
14284  %1 = bitcast <8 x i64> %__b to <8 x i64>
14285  %2 = icmp sge <8 x i64> %0, %1
14286  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14287  %4 = bitcast <16 x i1> %3 to i16
14288  ret i16 %4
14289}
14290
14291define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14292; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14293; VLX:       # %bb.0: # %entry
14294; VLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14295; VLX-NEXT:    kmovd %k0, %eax
14296; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14297; VLX-NEXT:    vzeroupper
14298; VLX-NEXT:    retq
14299;
14300; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14301; NoVLX:       # %bb.0: # %entry
14302; NoVLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14303; NoVLX-NEXT:    kmovw %k0, %eax
14304; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14305; NoVLX-NEXT:    vzeroupper
14306; NoVLX-NEXT:    retq
14307entry:
14308  %0 = bitcast <8 x i64> %__a to <8 x i64>
14309  %load = load <8 x i64>, <8 x i64>* %__b
14310  %1 = bitcast <8 x i64> %load to <8 x i64>
14311  %2 = icmp sge <8 x i64> %0, %1
14312  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14313  %4 = bitcast <16 x i1> %3 to i16
14314  ret i16 %4
14315}
14316
14317define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14318; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14319; VLX:       # %bb.0: # %entry
14320; VLX-NEXT:    kmovd %edi, %k1
14321; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14322; VLX-NEXT:    kmovd %k0, %eax
14323; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14324; VLX-NEXT:    vzeroupper
14325; VLX-NEXT:    retq
14326;
14327; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14328; NoVLX:       # %bb.0: # %entry
14329; NoVLX-NEXT:    kmovw %edi, %k1
14330; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14331; NoVLX-NEXT:    kmovw %k0, %eax
14332; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14333; NoVLX-NEXT:    vzeroupper
14334; NoVLX-NEXT:    retq
14335entry:
14336  %0 = bitcast <8 x i64> %__a to <8 x i64>
14337  %1 = bitcast <8 x i64> %__b to <8 x i64>
14338  %2 = icmp sge <8 x i64> %0, %1
14339  %3 = bitcast i8 %__u to <8 x i1>
14340  %4 = and <8 x i1> %2, %3
14341  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14342  %6 = bitcast <16 x i1> %5 to i16
14343  ret i16 %6
14344}
14345
14346define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14347; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14348; VLX:       # %bb.0: # %entry
14349; VLX-NEXT:    kmovd %edi, %k1
14350; VLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14351; VLX-NEXT:    kmovd %k0, %eax
14352; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14353; VLX-NEXT:    vzeroupper
14354; VLX-NEXT:    retq
14355;
14356; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14357; NoVLX:       # %bb.0: # %entry
14358; NoVLX-NEXT:    kmovw %edi, %k1
14359; NoVLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14360; NoVLX-NEXT:    kmovw %k0, %eax
14361; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14362; NoVLX-NEXT:    vzeroupper
14363; NoVLX-NEXT:    retq
14364entry:
14365  %0 = bitcast <8 x i64> %__a to <8 x i64>
14366  %load = load <8 x i64>, <8 x i64>* %__b
14367  %1 = bitcast <8 x i64> %load to <8 x i64>
14368  %2 = icmp sge <8 x i64> %0, %1
14369  %3 = bitcast i8 %__u to <8 x i1>
14370  %4 = and <8 x i1> %2, %3
14371  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14372  %6 = bitcast <16 x i1> %5 to i16
14373  ret i16 %6
14374}
14375
14376
14377define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14378; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14379; VLX:       # %bb.0: # %entry
14380; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14381; VLX-NEXT:    kmovd %k0, %eax
14382; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14383; VLX-NEXT:    vzeroupper
14384; VLX-NEXT:    retq
14385;
14386; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14387; NoVLX:       # %bb.0: # %entry
14388; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14389; NoVLX-NEXT:    kmovw %k0, %eax
14390; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14391; NoVLX-NEXT:    vzeroupper
14392; NoVLX-NEXT:    retq
14393entry:
14394  %0 = bitcast <8 x i64> %__a to <8 x i64>
14395  %load = load i64, i64* %__b
14396  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14397  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14398  %2 = icmp sge <8 x i64> %0, %1
14399  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14400  %4 = bitcast <16 x i1> %3 to i16
14401  ret i16 %4
14402}
14403
14404define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14405; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14406; VLX:       # %bb.0: # %entry
14407; VLX-NEXT:    kmovd %edi, %k1
14408; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14409; VLX-NEXT:    kmovd %k0, %eax
14410; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
14411; VLX-NEXT:    vzeroupper
14412; VLX-NEXT:    retq
14413;
14414; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14415; NoVLX:       # %bb.0: # %entry
14416; NoVLX-NEXT:    kmovw %edi, %k1
14417; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14418; NoVLX-NEXT:    kmovw %k0, %eax
14419; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
14420; NoVLX-NEXT:    vzeroupper
14421; NoVLX-NEXT:    retq
14422entry:
14423  %0 = bitcast <8 x i64> %__a to <8 x i64>
14424  %load = load i64, i64* %__b
14425  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14426  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14427  %2 = icmp sge <8 x i64> %0, %1
14428  %3 = bitcast i8 %__u to <8 x i1>
14429  %4 = and <8 x i1> %3, %2
14430  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14431  %6 = bitcast <16 x i1> %5 to i16
14432  ret i16 %6
14433}
14434
14435
14436define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14437; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14438; VLX:       # %bb.0: # %entry
14439; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14440; VLX-NEXT:    kmovd %k0, %eax
14441; VLX-NEXT:    vzeroupper
14442; VLX-NEXT:    retq
14443;
14444; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14445; NoVLX:       # %bb.0: # %entry
14446; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14447; NoVLX-NEXT:    kmovw %k0, %eax
14448; NoVLX-NEXT:    vzeroupper
14449; NoVLX-NEXT:    retq
14450entry:
14451  %0 = bitcast <8 x i64> %__a to <8 x i64>
14452  %1 = bitcast <8 x i64> %__b to <8 x i64>
14453  %2 = icmp sge <8 x i64> %0, %1
14454  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14455  %4 = bitcast <32 x i1> %3 to i32
14456  ret i32 %4
14457}
14458
14459define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14460; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14461; VLX:       # %bb.0: # %entry
14462; VLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14463; VLX-NEXT:    kmovd %k0, %eax
14464; VLX-NEXT:    vzeroupper
14465; VLX-NEXT:    retq
14466;
14467; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14468; NoVLX:       # %bb.0: # %entry
14469; NoVLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14470; NoVLX-NEXT:    kmovw %k0, %eax
14471; NoVLX-NEXT:    vzeroupper
14472; NoVLX-NEXT:    retq
14473entry:
14474  %0 = bitcast <8 x i64> %__a to <8 x i64>
14475  %load = load <8 x i64>, <8 x i64>* %__b
14476  %1 = bitcast <8 x i64> %load to <8 x i64>
14477  %2 = icmp sge <8 x i64> %0, %1
14478  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14479  %4 = bitcast <32 x i1> %3 to i32
14480  ret i32 %4
14481}
14482
14483define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14484; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14485; VLX:       # %bb.0: # %entry
14486; VLX-NEXT:    kmovd %edi, %k1
14487; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14488; VLX-NEXT:    kmovd %k0, %eax
14489; VLX-NEXT:    vzeroupper
14490; VLX-NEXT:    retq
14491;
14492; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14493; NoVLX:       # %bb.0: # %entry
14494; NoVLX-NEXT:    kmovw %edi, %k1
14495; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14496; NoVLX-NEXT:    kmovw %k0, %eax
14497; NoVLX-NEXT:    vzeroupper
14498; NoVLX-NEXT:    retq
14499entry:
14500  %0 = bitcast <8 x i64> %__a to <8 x i64>
14501  %1 = bitcast <8 x i64> %__b to <8 x i64>
14502  %2 = icmp sge <8 x i64> %0, %1
14503  %3 = bitcast i8 %__u to <8 x i1>
14504  %4 = and <8 x i1> %2, %3
14505  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14506  %6 = bitcast <32 x i1> %5 to i32
14507  ret i32 %6
14508}
14509
14510define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14511; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14512; VLX:       # %bb.0: # %entry
14513; VLX-NEXT:    kmovd %edi, %k1
14514; VLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14515; VLX-NEXT:    kmovd %k0, %eax
14516; VLX-NEXT:    vzeroupper
14517; VLX-NEXT:    retq
14518;
14519; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14520; NoVLX:       # %bb.0: # %entry
14521; NoVLX-NEXT:    kmovw %edi, %k1
14522; NoVLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14523; NoVLX-NEXT:    kmovw %k0, %eax
14524; NoVLX-NEXT:    vzeroupper
14525; NoVLX-NEXT:    retq
14526entry:
14527  %0 = bitcast <8 x i64> %__a to <8 x i64>
14528  %load = load <8 x i64>, <8 x i64>* %__b
14529  %1 = bitcast <8 x i64> %load to <8 x i64>
14530  %2 = icmp sge <8 x i64> %0, %1
14531  %3 = bitcast i8 %__u to <8 x i1>
14532  %4 = and <8 x i1> %2, %3
14533  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14534  %6 = bitcast <32 x i1> %5 to i32
14535  ret i32 %6
14536}
14537
14538
14539define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14540; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14541; VLX:       # %bb.0: # %entry
14542; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14543; VLX-NEXT:    kmovd %k0, %eax
14544; VLX-NEXT:    vzeroupper
14545; VLX-NEXT:    retq
14546;
14547; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14548; NoVLX:       # %bb.0: # %entry
14549; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14550; NoVLX-NEXT:    kmovw %k0, %eax
14551; NoVLX-NEXT:    vzeroupper
14552; NoVLX-NEXT:    retq
14553entry:
14554  %0 = bitcast <8 x i64> %__a to <8 x i64>
14555  %load = load i64, i64* %__b
14556  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14557  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14558  %2 = icmp sge <8 x i64> %0, %1
14559  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14560  %4 = bitcast <32 x i1> %3 to i32
14561  ret i32 %4
14562}
14563
14564define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14565; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14566; VLX:       # %bb.0: # %entry
14567; VLX-NEXT:    kmovd %edi, %k1
14568; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14569; VLX-NEXT:    kmovd %k0, %eax
14570; VLX-NEXT:    vzeroupper
14571; VLX-NEXT:    retq
14572;
14573; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14574; NoVLX:       # %bb.0: # %entry
14575; NoVLX-NEXT:    kmovw %edi, %k1
14576; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14577; NoVLX-NEXT:    kmovw %k0, %eax
14578; NoVLX-NEXT:    vzeroupper
14579; NoVLX-NEXT:    retq
14580entry:
14581  %0 = bitcast <8 x i64> %__a to <8 x i64>
14582  %load = load i64, i64* %__b
14583  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14584  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14585  %2 = icmp sge <8 x i64> %0, %1
14586  %3 = bitcast i8 %__u to <8 x i1>
14587  %4 = and <8 x i1> %3, %2
14588  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14589  %6 = bitcast <32 x i1> %5 to i32
14590  ret i32 %6
14591}
14592
14593
14594define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14595; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14596; VLX:       # %bb.0: # %entry
14597; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14598; VLX-NEXT:    kmovq %k0, %rax
14599; VLX-NEXT:    vzeroupper
14600; VLX-NEXT:    retq
14601;
14602; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14603; NoVLX:       # %bb.0: # %entry
14604; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
14605; NoVLX-NEXT:    kmovw %k0, %eax
14606; NoVLX-NEXT:    movzwl %ax, %eax
14607; NoVLX-NEXT:    vzeroupper
14608; NoVLX-NEXT:    retq
14609entry:
14610  %0 = bitcast <8 x i64> %__a to <8 x i64>
14611  %1 = bitcast <8 x i64> %__b to <8 x i64>
14612  %2 = icmp sge <8 x i64> %0, %1
14613  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14614  %4 = bitcast <64 x i1> %3 to i64
14615  ret i64 %4
14616}
14617
14618define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14619; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14620; VLX:       # %bb.0: # %entry
14621; VLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14622; VLX-NEXT:    kmovq %k0, %rax
14623; VLX-NEXT:    vzeroupper
14624; VLX-NEXT:    retq
14625;
14626; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14627; NoVLX:       # %bb.0: # %entry
14628; NoVLX-NEXT:    vpcmpnltq (%rdi), %zmm0, %k0
14629; NoVLX-NEXT:    kmovw %k0, %eax
14630; NoVLX-NEXT:    movzwl %ax, %eax
14631; NoVLX-NEXT:    vzeroupper
14632; NoVLX-NEXT:    retq
14633entry:
14634  %0 = bitcast <8 x i64> %__a to <8 x i64>
14635  %load = load <8 x i64>, <8 x i64>* %__b
14636  %1 = bitcast <8 x i64> %load to <8 x i64>
14637  %2 = icmp sge <8 x i64> %0, %1
14638  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14639  %4 = bitcast <64 x i1> %3 to i64
14640  ret i64 %4
14641}
14642
14643define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14644; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14645; VLX:       # %bb.0: # %entry
14646; VLX-NEXT:    kmovd %edi, %k1
14647; VLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14648; VLX-NEXT:    kmovq %k0, %rax
14649; VLX-NEXT:    vzeroupper
14650; VLX-NEXT:    retq
14651;
14652; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14653; NoVLX:       # %bb.0: # %entry
14654; NoVLX-NEXT:    kmovw %edi, %k1
14655; NoVLX-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14656; NoVLX-NEXT:    kmovw %k0, %eax
14657; NoVLX-NEXT:    movzwl %ax, %eax
14658; NoVLX-NEXT:    vzeroupper
14659; NoVLX-NEXT:    retq
14660entry:
14661  %0 = bitcast <8 x i64> %__a to <8 x i64>
14662  %1 = bitcast <8 x i64> %__b to <8 x i64>
14663  %2 = icmp sge <8 x i64> %0, %1
14664  %3 = bitcast i8 %__u to <8 x i1>
14665  %4 = and <8 x i1> %2, %3
14666  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14667  %6 = bitcast <64 x i1> %5 to i64
14668  ret i64 %6
14669}
14670
14671define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14672; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14673; VLX:       # %bb.0: # %entry
14674; VLX-NEXT:    kmovd %edi, %k1
14675; VLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14676; VLX-NEXT:    kmovq %k0, %rax
14677; VLX-NEXT:    vzeroupper
14678; VLX-NEXT:    retq
14679;
14680; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14681; NoVLX:       # %bb.0: # %entry
14682; NoVLX-NEXT:    kmovw %edi, %k1
14683; NoVLX-NEXT:    vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14684; NoVLX-NEXT:    kmovw %k0, %eax
14685; NoVLX-NEXT:    movzwl %ax, %eax
14686; NoVLX-NEXT:    vzeroupper
14687; NoVLX-NEXT:    retq
14688entry:
14689  %0 = bitcast <8 x i64> %__a to <8 x i64>
14690  %load = load <8 x i64>, <8 x i64>* %__b
14691  %1 = bitcast <8 x i64> %load to <8 x i64>
14692  %2 = icmp sge <8 x i64> %0, %1
14693  %3 = bitcast i8 %__u to <8 x i1>
14694  %4 = and <8 x i1> %2, %3
14695  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14696  %6 = bitcast <64 x i1> %5 to i64
14697  ret i64 %6
14698}
14699
14700
14701define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14702; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14703; VLX:       # %bb.0: # %entry
14704; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14705; VLX-NEXT:    kmovq %k0, %rax
14706; VLX-NEXT:    vzeroupper
14707; VLX-NEXT:    retq
14708;
14709; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14710; NoVLX:       # %bb.0: # %entry
14711; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14712; NoVLX-NEXT:    kmovw %k0, %eax
14713; NoVLX-NEXT:    movzwl %ax, %eax
14714; NoVLX-NEXT:    vzeroupper
14715; NoVLX-NEXT:    retq
14716entry:
14717  %0 = bitcast <8 x i64> %__a to <8 x i64>
14718  %load = load i64, i64* %__b
14719  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14720  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14721  %2 = icmp sge <8 x i64> %0, %1
14722  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14723  %4 = bitcast <64 x i1> %3 to i64
14724  ret i64 %4
14725}
14726
14727define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14728; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14729; VLX:       # %bb.0: # %entry
14730; VLX-NEXT:    kmovd %edi, %k1
14731; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14732; VLX-NEXT:    kmovq %k0, %rax
14733; VLX-NEXT:    vzeroupper
14734; VLX-NEXT:    retq
14735;
14736; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14737; NoVLX:       # %bb.0: # %entry
14738; NoVLX-NEXT:    kmovw %edi, %k1
14739; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14740; NoVLX-NEXT:    kmovw %k0, %eax
14741; NoVLX-NEXT:    movzwl %ax, %eax
14742; NoVLX-NEXT:    vzeroupper
14743; NoVLX-NEXT:    retq
14744entry:
14745  %0 = bitcast <8 x i64> %__a to <8 x i64>
14746  %load = load i64, i64* %__b
14747  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14748  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14749  %2 = icmp sge <8 x i64> %0, %1
14750  %3 = bitcast i8 %__u to <8 x i1>
14751  %4 = and <8 x i1> %3, %2
14752  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14753  %6 = bitcast <64 x i1> %5 to i64
14754  ret i64 %6
14755}
14756
14757
14758define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14759; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14760; VLX:       # %bb.0: # %entry
14761; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0
14762; VLX-NEXT:    kmovd %k0, %eax
14763; VLX-NEXT:    retq
14764;
14765; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14766; NoVLX:       # %bb.0: # %entry
14767; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14768; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14769; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14770; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14771; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14772; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14773; NoVLX-NEXT:    kmovw %k0, %eax
14774; NoVLX-NEXT:    vzeroupper
14775; NoVLX-NEXT:    retq
14776entry:
14777  %0 = bitcast <2 x i64> %__a to <16 x i8>
14778  %1 = bitcast <2 x i64> %__b to <16 x i8>
14779  %2 = icmp ult <16 x i8> %0, %1
14780  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14781  %4 = bitcast <32 x i1> %3 to i32
14782  ret i32 %4
14783}
14784
14785define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14786; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14787; VLX:       # %bb.0: # %entry
14788; VLX-NEXT:    vpcmpltub (%rdi), %xmm0, %k0
14789; VLX-NEXT:    kmovd %k0, %eax
14790; VLX-NEXT:    retq
14791;
14792; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14793; NoVLX:       # %bb.0: # %entry
14794; NoVLX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm1
14795; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14796; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14797; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14798; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14799; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14800; NoVLX-NEXT:    kmovw %k0, %eax
14801; NoVLX-NEXT:    vzeroupper
14802; NoVLX-NEXT:    retq
14803entry:
14804  %0 = bitcast <2 x i64> %__a to <16 x i8>
14805  %load = load <2 x i64>, <2 x i64>* %__b
14806  %1 = bitcast <2 x i64> %load to <16 x i8>
14807  %2 = icmp ult <16 x i8> %0, %1
14808  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14809  %4 = bitcast <32 x i1> %3 to i32
14810  ret i32 %4
14811}
14812
14813define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14814; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14815; VLX:       # %bb.0: # %entry
14816; VLX-NEXT:    kmovd %edi, %k1
14817; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14818; VLX-NEXT:    kmovd %k0, %eax
14819; VLX-NEXT:    retq
14820;
14821; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14822; NoVLX:       # %bb.0: # %entry
14823; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14824; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14825; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14826; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14827; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14828; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14829; NoVLX-NEXT:    kmovw %k0, %eax
14830; NoVLX-NEXT:    andl %edi, %eax
14831; NoVLX-NEXT:    vzeroupper
14832; NoVLX-NEXT:    retq
14833entry:
14834  %0 = bitcast <2 x i64> %__a to <16 x i8>
14835  %1 = bitcast <2 x i64> %__b to <16 x i8>
14836  %2 = icmp ult <16 x i8> %0, %1
14837  %3 = bitcast i16 %__u to <16 x i1>
14838  %4 = and <16 x i1> %2, %3
14839  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14840  %6 = bitcast <32 x i1> %5 to i32
14841  ret i32 %6
14842}
14843
14844define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14845; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14846; VLX:       # %bb.0: # %entry
14847; VLX-NEXT:    kmovd %edi, %k1
14848; VLX-NEXT:    vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14849; VLX-NEXT:    kmovd %k0, %eax
14850; VLX-NEXT:    retq
14851;
14852; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14853; NoVLX:       # %bb.0: # %entry
14854; NoVLX-NEXT:    vpmaxub (%rsi), %xmm0, %xmm1
14855; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14856; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14857; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14858; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14859; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14860; NoVLX-NEXT:    kmovw %k0, %eax
14861; NoVLX-NEXT:    andl %edi, %eax
14862; NoVLX-NEXT:    vzeroupper
14863; NoVLX-NEXT:    retq
14864entry:
14865  %0 = bitcast <2 x i64> %__a to <16 x i8>
14866  %load = load <2 x i64>, <2 x i64>* %__b
14867  %1 = bitcast <2 x i64> %load to <16 x i8>
14868  %2 = icmp ult <16 x i8> %0, %1
14869  %3 = bitcast i16 %__u to <16 x i1>
14870  %4 = and <16 x i1> %2, %3
14871  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14872  %6 = bitcast <32 x i1> %5 to i32
14873  ret i32 %6
14874}
14875
14876
14877define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14878; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14879; VLX:       # %bb.0: # %entry
14880; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0
14881; VLX-NEXT:    kmovq %k0, %rax
14882; VLX-NEXT:    retq
14883;
14884; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14885; NoVLX:       # %bb.0: # %entry
14886; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14887; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14888; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14889; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14890; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14891; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14892; NoVLX-NEXT:    kmovw %k0, %eax
14893; NoVLX-NEXT:    movzwl %ax, %eax
14894; NoVLX-NEXT:    vzeroupper
14895; NoVLX-NEXT:    retq
14896entry:
14897  %0 = bitcast <2 x i64> %__a to <16 x i8>
14898  %1 = bitcast <2 x i64> %__b to <16 x i8>
14899  %2 = icmp ult <16 x i8> %0, %1
14900  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14901  %4 = bitcast <64 x i1> %3 to i64
14902  ret i64 %4
14903}
14904
14905define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14906; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14907; VLX:       # %bb.0: # %entry
14908; VLX-NEXT:    vpcmpltub (%rdi), %xmm0, %k0
14909; VLX-NEXT:    kmovq %k0, %rax
14910; VLX-NEXT:    retq
14911;
14912; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14913; NoVLX:       # %bb.0: # %entry
14914; NoVLX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm1
14915; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14916; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14917; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14918; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14919; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14920; NoVLX-NEXT:    kmovw %k0, %eax
14921; NoVLX-NEXT:    movzwl %ax, %eax
14922; NoVLX-NEXT:    vzeroupper
14923; NoVLX-NEXT:    retq
14924entry:
14925  %0 = bitcast <2 x i64> %__a to <16 x i8>
14926  %load = load <2 x i64>, <2 x i64>* %__b
14927  %1 = bitcast <2 x i64> %load to <16 x i8>
14928  %2 = icmp ult <16 x i8> %0, %1
14929  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14930  %4 = bitcast <64 x i1> %3 to i64
14931  ret i64 %4
14932}
14933
14934define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14935; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14936; VLX:       # %bb.0: # %entry
14937; VLX-NEXT:    kmovd %edi, %k1
14938; VLX-NEXT:    vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14939; VLX-NEXT:    kmovq %k0, %rax
14940; VLX-NEXT:    retq
14941;
14942; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14943; NoVLX:       # %bb.0: # %entry
14944; NoVLX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
14945; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14946; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14947; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14948; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14949; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14950; NoVLX-NEXT:    kmovw %k0, %eax
14951; NoVLX-NEXT:    andl %edi, %eax
14952; NoVLX-NEXT:    vzeroupper
14953; NoVLX-NEXT:    retq
14954entry:
14955  %0 = bitcast <2 x i64> %__a to <16 x i8>
14956  %1 = bitcast <2 x i64> %__b to <16 x i8>
14957  %2 = icmp ult <16 x i8> %0, %1
14958  %3 = bitcast i16 %__u to <16 x i1>
14959  %4 = and <16 x i1> %2, %3
14960  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14961  %6 = bitcast <64 x i1> %5 to i64
14962  ret i64 %6
14963}
14964
14965define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14966; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14967; VLX:       # %bb.0: # %entry
14968; VLX-NEXT:    kmovd %edi, %k1
14969; VLX-NEXT:    vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14970; VLX-NEXT:    kmovq %k0, %rax
14971; VLX-NEXT:    retq
14972;
14973; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14974; NoVLX:       # %bb.0: # %entry
14975; NoVLX-NEXT:    vpmaxub (%rsi), %xmm0, %xmm1
14976; NoVLX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
14977; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
14978; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
14979; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
14980; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
14981; NoVLX-NEXT:    kmovw %k0, %eax
14982; NoVLX-NEXT:    andl %edi, %eax
14983; NoVLX-NEXT:    vzeroupper
14984; NoVLX-NEXT:    retq
14985entry:
14986  %0 = bitcast <2 x i64> %__a to <16 x i8>
14987  %load = load <2 x i64>, <2 x i64>* %__b
14988  %1 = bitcast <2 x i64> %load to <16 x i8>
14989  %2 = icmp ult <16 x i8> %0, %1
14990  %3 = bitcast i16 %__u to <16 x i1>
14991  %4 = and <16 x i1> %2, %3
14992  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14993  %6 = bitcast <64 x i1> %5 to i64
14994  ret i64 %6
14995}
14996
14997
14998define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14999; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
15000; VLX:       # %bb.0: # %entry
15001; VLX-NEXT:    vpcmpltub %ymm1, %ymm0, %k0
15002; VLX-NEXT:    kmovq %k0, %rax
15003; VLX-NEXT:    vzeroupper
15004; VLX-NEXT:    retq
15005;
15006; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
15007; NoVLX:       # %bb.0: # %entry
15008; NoVLX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm1
15009; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
15010; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15011; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
15012; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
15013; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15014; NoVLX-NEXT:    kmovw %k0, %ecx
15015; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
15016; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
15017; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15018; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15019; NoVLX-NEXT:    kmovw %k0, %eax
15020; NoVLX-NEXT:    shll $16, %eax
15021; NoVLX-NEXT:    orl %ecx, %eax
15022; NoVLX-NEXT:    vzeroupper
15023; NoVLX-NEXT:    retq
15024entry:
15025  %0 = bitcast <4 x i64> %__a to <32 x i8>
15026  %1 = bitcast <4 x i64> %__b to <32 x i8>
15027  %2 = icmp ult <32 x i8> %0, %1
15028  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15029  %4 = bitcast <64 x i1> %3 to i64
15030  ret i64 %4
15031}
15032
15033define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15034; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
15035; VLX:       # %bb.0: # %entry
15036; VLX-NEXT:    vpcmpltub (%rdi), %ymm0, %k0
15037; VLX-NEXT:    kmovq %k0, %rax
15038; VLX-NEXT:    vzeroupper
15039; VLX-NEXT:    retq
15040;
15041; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
15042; NoVLX:       # %bb.0: # %entry
15043; NoVLX-NEXT:    vpmaxub (%rdi), %ymm0, %ymm1
15044; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
15045; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15046; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
15047; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
15048; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15049; NoVLX-NEXT:    kmovw %k0, %ecx
15050; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
15051; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
15052; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15053; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15054; NoVLX-NEXT:    kmovw %k0, %eax
15055; NoVLX-NEXT:    shll $16, %eax
15056; NoVLX-NEXT:    orl %ecx, %eax
15057; NoVLX-NEXT:    vzeroupper
15058; NoVLX-NEXT:    retq
15059entry:
15060  %0 = bitcast <4 x i64> %__a to <32 x i8>
15061  %load = load <4 x i64>, <4 x i64>* %__b
15062  %1 = bitcast <4 x i64> %load to <32 x i8>
15063  %2 = icmp ult <32 x i8> %0, %1
15064  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15065  %4 = bitcast <64 x i1> %3 to i64
15066  ret i64 %4
15067}
15068
15069define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15070; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
15071; VLX:       # %bb.0: # %entry
15072; VLX-NEXT:    kmovd %edi, %k1
15073; VLX-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1}
15074; VLX-NEXT:    kmovq %k0, %rax
15075; VLX-NEXT:    vzeroupper
15076; VLX-NEXT:    retq
15077;
15078; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
15079; NoVLX:       # %bb.0: # %entry
15080; NoVLX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm1
15081; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
15082; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15083; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
15084; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
15085; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15086; NoVLX-NEXT:    kmovw %k0, %eax
15087; NoVLX-NEXT:    andl %edi, %eax
15088; NoVLX-NEXT:    shrl $16, %edi
15089; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
15090; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
15091; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15092; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15093; NoVLX-NEXT:    kmovw %k0, %ecx
15094; NoVLX-NEXT:    andl %edi, %ecx
15095; NoVLX-NEXT:    shll $16, %ecx
15096; NoVLX-NEXT:    movzwl %ax, %eax
15097; NoVLX-NEXT:    orl %ecx, %eax
15098; NoVLX-NEXT:    vzeroupper
15099; NoVLX-NEXT:    retq
15100entry:
15101  %0 = bitcast <4 x i64> %__a to <32 x i8>
15102  %1 = bitcast <4 x i64> %__b to <32 x i8>
15103  %2 = icmp ult <32 x i8> %0, %1
15104  %3 = bitcast i32 %__u to <32 x i1>
15105  %4 = and <32 x i1> %2, %3
15106  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15107  %6 = bitcast <64 x i1> %5 to i64
15108  ret i64 %6
15109}
15110
15111define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15112; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
15113; VLX:       # %bb.0: # %entry
15114; VLX-NEXT:    kmovd %edi, %k1
15115; VLX-NEXT:    vpcmpltub (%rsi), %ymm0, %k0 {%k1}
15116; VLX-NEXT:    kmovq %k0, %rax
15117; VLX-NEXT:    vzeroupper
15118; VLX-NEXT:    retq
15119;
15120; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
15121; NoVLX:       # %bb.0: # %entry
15122; NoVLX-NEXT:    vpmaxub (%rsi), %ymm0, %ymm1
15123; NoVLX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
15124; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15125; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
15126; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
15127; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15128; NoVLX-NEXT:    kmovw %k0, %eax
15129; NoVLX-NEXT:    andl %edi, %eax
15130; NoVLX-NEXT:    shrl $16, %edi
15131; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm0
15132; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
15133; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15134; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15135; NoVLX-NEXT:    kmovw %k0, %ecx
15136; NoVLX-NEXT:    andl %edi, %ecx
15137; NoVLX-NEXT:    shll $16, %ecx
15138; NoVLX-NEXT:    movzwl %ax, %eax
15139; NoVLX-NEXT:    orl %ecx, %eax
15140; NoVLX-NEXT:    vzeroupper
15141; NoVLX-NEXT:    retq
15142entry:
15143  %0 = bitcast <4 x i64> %__a to <32 x i8>
15144  %load = load <4 x i64>, <4 x i64>* %__b
15145  %1 = bitcast <4 x i64> %load to <32 x i8>
15146  %2 = icmp ult <32 x i8> %0, %1
15147  %3 = bitcast i32 %__u to <32 x i1>
15148  %4 = and <32 x i1> %2, %3
15149  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15150  %6 = bitcast <64 x i1> %5 to i64
15151  ret i64 %6
15152}
15153
15154
15155define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15156; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
15157; VLX:       # %bb.0: # %entry
15158; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0
15159; VLX-NEXT:    kmovd %k0, %eax
15160; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15161; VLX-NEXT:    retq
15162;
15163; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
15164; NoVLX:       # %bb.0: # %entry
15165; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15166; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15167; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15168; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15169; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15170; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15171; NoVLX-NEXT:    kmovw %k0, %eax
15172; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15173; NoVLX-NEXT:    vzeroupper
15174; NoVLX-NEXT:    retq
15175entry:
15176  %0 = bitcast <2 x i64> %__a to <8 x i16>
15177  %1 = bitcast <2 x i64> %__b to <8 x i16>
15178  %2 = icmp ult <8 x i16> %0, %1
15179  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15180  %4 = bitcast <16 x i1> %3 to i16
15181  ret i16 %4
15182}
15183
15184define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15185; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
15186; VLX:       # %bb.0: # %entry
15187; VLX-NEXT:    vpcmpltuw (%rdi), %xmm0, %k0
15188; VLX-NEXT:    kmovd %k0, %eax
15189; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15190; VLX-NEXT:    retq
15191;
15192; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
15193; NoVLX:       # %bb.0: # %entry
15194; NoVLX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm1
15195; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15196; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15197; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15198; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15199; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15200; NoVLX-NEXT:    kmovw %k0, %eax
15201; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15202; NoVLX-NEXT:    vzeroupper
15203; NoVLX-NEXT:    retq
15204entry:
15205  %0 = bitcast <2 x i64> %__a to <8 x i16>
15206  %load = load <2 x i64>, <2 x i64>* %__b
15207  %1 = bitcast <2 x i64> %load to <8 x i16>
15208  %2 = icmp ult <8 x i16> %0, %1
15209  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15210  %4 = bitcast <16 x i1> %3 to i16
15211  ret i16 %4
15212}
15213
15214define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15215; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
15216; VLX:       # %bb.0: # %entry
15217; VLX-NEXT:    kmovd %edi, %k1
15218; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15219; VLX-NEXT:    kmovd %k0, %eax
15220; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15221; VLX-NEXT:    retq
15222;
15223; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
15224; NoVLX:       # %bb.0: # %entry
15225; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15226; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15227; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15228; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15229; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15230; NoVLX-NEXT:    kmovw %edi, %k1
15231; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15232; NoVLX-NEXT:    kmovw %k0, %eax
15233; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15234; NoVLX-NEXT:    vzeroupper
15235; NoVLX-NEXT:    retq
15236entry:
15237  %0 = bitcast <2 x i64> %__a to <8 x i16>
15238  %1 = bitcast <2 x i64> %__b to <8 x i16>
15239  %2 = icmp ult <8 x i16> %0, %1
15240  %3 = bitcast i8 %__u to <8 x i1>
15241  %4 = and <8 x i1> %2, %3
15242  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15243  %6 = bitcast <16 x i1> %5 to i16
15244  ret i16 %6
15245}
15246
15247define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15248; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
15249; VLX:       # %bb.0: # %entry
15250; VLX-NEXT:    kmovd %edi, %k1
15251; VLX-NEXT:    vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15252; VLX-NEXT:    kmovd %k0, %eax
15253; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
15254; VLX-NEXT:    retq
15255;
15256; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
15257; NoVLX:       # %bb.0: # %entry
15258; NoVLX-NEXT:    vpmaxuw (%rsi), %xmm0, %xmm1
15259; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15260; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15261; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15262; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15263; NoVLX-NEXT:    kmovw %edi, %k1
15264; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15265; NoVLX-NEXT:    kmovw %k0, %eax
15266; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
15267; NoVLX-NEXT:    vzeroupper
15268; NoVLX-NEXT:    retq
15269entry:
15270  %0 = bitcast <2 x i64> %__a to <8 x i16>
15271  %load = load <2 x i64>, <2 x i64>* %__b
15272  %1 = bitcast <2 x i64> %load to <8 x i16>
15273  %2 = icmp ult <8 x i16> %0, %1
15274  %3 = bitcast i8 %__u to <8 x i1>
15275  %4 = and <8 x i1> %2, %3
15276  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15277  %6 = bitcast <16 x i1> %5 to i16
15278  ret i16 %6
15279}
15280
15281
15282define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15283; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15284; VLX:       # %bb.0: # %entry
15285; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0
15286; VLX-NEXT:    kmovd %k0, %eax
15287; VLX-NEXT:    retq
15288;
15289; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15290; NoVLX:       # %bb.0: # %entry
15291; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15292; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15293; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15294; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15295; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15296; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15297; NoVLX-NEXT:    kmovw %k0, %eax
15298; NoVLX-NEXT:    vzeroupper
15299; NoVLX-NEXT:    retq
15300entry:
15301  %0 = bitcast <2 x i64> %__a to <8 x i16>
15302  %1 = bitcast <2 x i64> %__b to <8 x i16>
15303  %2 = icmp ult <8 x i16> %0, %1
15304  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15305  %4 = bitcast <32 x i1> %3 to i32
15306  ret i32 %4
15307}
15308
15309define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15310; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15311; VLX:       # %bb.0: # %entry
15312; VLX-NEXT:    vpcmpltuw (%rdi), %xmm0, %k0
15313; VLX-NEXT:    kmovd %k0, %eax
15314; VLX-NEXT:    retq
15315;
15316; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15317; NoVLX:       # %bb.0: # %entry
15318; NoVLX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm1
15319; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15320; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15321; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15322; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15323; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15324; NoVLX-NEXT:    kmovw %k0, %eax
15325; NoVLX-NEXT:    vzeroupper
15326; NoVLX-NEXT:    retq
15327entry:
15328  %0 = bitcast <2 x i64> %__a to <8 x i16>
15329  %load = load <2 x i64>, <2 x i64>* %__b
15330  %1 = bitcast <2 x i64> %load to <8 x i16>
15331  %2 = icmp ult <8 x i16> %0, %1
15332  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15333  %4 = bitcast <32 x i1> %3 to i32
15334  ret i32 %4
15335}
15336
15337define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15338; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15339; VLX:       # %bb.0: # %entry
15340; VLX-NEXT:    kmovd %edi, %k1
15341; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15342; VLX-NEXT:    kmovd %k0, %eax
15343; VLX-NEXT:    retq
15344;
15345; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15346; NoVLX:       # %bb.0: # %entry
15347; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15348; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15349; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15350; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15351; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15352; NoVLX-NEXT:    kmovw %edi, %k1
15353; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15354; NoVLX-NEXT:    kmovw %k0, %eax
15355; NoVLX-NEXT:    vzeroupper
15356; NoVLX-NEXT:    retq
15357entry:
15358  %0 = bitcast <2 x i64> %__a to <8 x i16>
15359  %1 = bitcast <2 x i64> %__b to <8 x i16>
15360  %2 = icmp ult <8 x i16> %0, %1
15361  %3 = bitcast i8 %__u to <8 x i1>
15362  %4 = and <8 x i1> %2, %3
15363  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15364  %6 = bitcast <32 x i1> %5 to i32
15365  ret i32 %6
15366}
15367
15368define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15369; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15370; VLX:       # %bb.0: # %entry
15371; VLX-NEXT:    kmovd %edi, %k1
15372; VLX-NEXT:    vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15373; VLX-NEXT:    kmovd %k0, %eax
15374; VLX-NEXT:    retq
15375;
15376; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15377; NoVLX:       # %bb.0: # %entry
15378; NoVLX-NEXT:    vpmaxuw (%rsi), %xmm0, %xmm1
15379; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15380; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15381; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15382; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15383; NoVLX-NEXT:    kmovw %edi, %k1
15384; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15385; NoVLX-NEXT:    kmovw %k0, %eax
15386; NoVLX-NEXT:    vzeroupper
15387; NoVLX-NEXT:    retq
15388entry:
15389  %0 = bitcast <2 x i64> %__a to <8 x i16>
15390  %load = load <2 x i64>, <2 x i64>* %__b
15391  %1 = bitcast <2 x i64> %load to <8 x i16>
15392  %2 = icmp ult <8 x i16> %0, %1
15393  %3 = bitcast i8 %__u to <8 x i1>
15394  %4 = and <8 x i1> %2, %3
15395  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15396  %6 = bitcast <32 x i1> %5 to i32
15397  ret i32 %6
15398}
15399
15400
15401define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15402; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15403; VLX:       # %bb.0: # %entry
15404; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0
15405; VLX-NEXT:    kmovq %k0, %rax
15406; VLX-NEXT:    retq
15407;
15408; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15409; NoVLX:       # %bb.0: # %entry
15410; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15411; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15412; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15413; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15414; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15415; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15416; NoVLX-NEXT:    kmovw %k0, %eax
15417; NoVLX-NEXT:    movzwl %ax, %eax
15418; NoVLX-NEXT:    vzeroupper
15419; NoVLX-NEXT:    retq
15420entry:
15421  %0 = bitcast <2 x i64> %__a to <8 x i16>
15422  %1 = bitcast <2 x i64> %__b to <8 x i16>
15423  %2 = icmp ult <8 x i16> %0, %1
15424  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15425  %4 = bitcast <64 x i1> %3 to i64
15426  ret i64 %4
15427}
15428
15429define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15430; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15431; VLX:       # %bb.0: # %entry
15432; VLX-NEXT:    vpcmpltuw (%rdi), %xmm0, %k0
15433; VLX-NEXT:    kmovq %k0, %rax
15434; VLX-NEXT:    retq
15435;
15436; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15437; NoVLX:       # %bb.0: # %entry
15438; NoVLX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm1
15439; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15440; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15441; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15442; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15443; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
15444; NoVLX-NEXT:    kmovw %k0, %eax
15445; NoVLX-NEXT:    movzwl %ax, %eax
15446; NoVLX-NEXT:    vzeroupper
15447; NoVLX-NEXT:    retq
15448entry:
15449  %0 = bitcast <2 x i64> %__a to <8 x i16>
15450  %load = load <2 x i64>, <2 x i64>* %__b
15451  %1 = bitcast <2 x i64> %load to <8 x i16>
15452  %2 = icmp ult <8 x i16> %0, %1
15453  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15454  %4 = bitcast <64 x i1> %3 to i64
15455  ret i64 %4
15456}
15457
15458define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15459; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15460; VLX:       # %bb.0: # %entry
15461; VLX-NEXT:    kmovd %edi, %k1
15462; VLX-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15463; VLX-NEXT:    kmovq %k0, %rax
15464; VLX-NEXT:    retq
15465;
15466; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15467; NoVLX:       # %bb.0: # %entry
15468; NoVLX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
15469; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15470; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15471; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15472; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15473; NoVLX-NEXT:    kmovw %edi, %k1
15474; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15475; NoVLX-NEXT:    kmovw %k0, %eax
15476; NoVLX-NEXT:    movzwl %ax, %eax
15477; NoVLX-NEXT:    vzeroupper
15478; NoVLX-NEXT:    retq
15479entry:
15480  %0 = bitcast <2 x i64> %__a to <8 x i16>
15481  %1 = bitcast <2 x i64> %__b to <8 x i16>
15482  %2 = icmp ult <8 x i16> %0, %1
15483  %3 = bitcast i8 %__u to <8 x i1>
15484  %4 = and <8 x i1> %2, %3
15485  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15486  %6 = bitcast <64 x i1> %5 to i64
15487  ret i64 %6
15488}
15489
15490define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15491; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15492; VLX:       # %bb.0: # %entry
15493; VLX-NEXT:    kmovd %edi, %k1
15494; VLX-NEXT:    vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15495; VLX-NEXT:    kmovq %k0, %rax
15496; VLX-NEXT:    retq
15497;
15498; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15499; NoVLX:       # %bb.0: # %entry
15500; NoVLX-NEXT:    vpmaxuw (%rsi), %xmm0, %xmm1
15501; NoVLX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
15502; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15503; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
15504; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
15505; NoVLX-NEXT:    kmovw %edi, %k1
15506; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
15507; NoVLX-NEXT:    kmovw %k0, %eax
15508; NoVLX-NEXT:    movzwl %ax, %eax
15509; NoVLX-NEXT:    vzeroupper
15510; NoVLX-NEXT:    retq
15511entry:
15512  %0 = bitcast <2 x i64> %__a to <8 x i16>
15513  %load = load <2 x i64>, <2 x i64>* %__b
15514  %1 = bitcast <2 x i64> %load to <8 x i16>
15515  %2 = icmp ult <8 x i16> %0, %1
15516  %3 = bitcast i8 %__u to <8 x i1>
15517  %4 = and <8 x i1> %2, %3
15518  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15519  %6 = bitcast <64 x i1> %5 to i64
15520  ret i64 %6
15521}
15522
15523
15524define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15525; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15526; VLX:       # %bb.0: # %entry
15527; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0
15528; VLX-NEXT:    kmovd %k0, %eax
15529; VLX-NEXT:    vzeroupper
15530; VLX-NEXT:    retq
15531;
15532; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15533; NoVLX:       # %bb.0: # %entry
15534; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15535; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15536; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15537; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15538; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15539; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15540; NoVLX-NEXT:    kmovw %k0, %eax
15541; NoVLX-NEXT:    vzeroupper
15542; NoVLX-NEXT:    retq
15543entry:
15544  %0 = bitcast <4 x i64> %__a to <16 x i16>
15545  %1 = bitcast <4 x i64> %__b to <16 x i16>
15546  %2 = icmp ult <16 x i16> %0, %1
15547  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15548  %4 = bitcast <32 x i1> %3 to i32
15549  ret i32 %4
15550}
15551
15552define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15553; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15554; VLX:       # %bb.0: # %entry
15555; VLX-NEXT:    vpcmpltuw (%rdi), %ymm0, %k0
15556; VLX-NEXT:    kmovd %k0, %eax
15557; VLX-NEXT:    vzeroupper
15558; VLX-NEXT:    retq
15559;
15560; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15561; NoVLX:       # %bb.0: # %entry
15562; NoVLX-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm1
15563; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15564; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15565; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15566; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15567; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15568; NoVLX-NEXT:    kmovw %k0, %eax
15569; NoVLX-NEXT:    vzeroupper
15570; NoVLX-NEXT:    retq
15571entry:
15572  %0 = bitcast <4 x i64> %__a to <16 x i16>
15573  %load = load <4 x i64>, <4 x i64>* %__b
15574  %1 = bitcast <4 x i64> %load to <16 x i16>
15575  %2 = icmp ult <16 x i16> %0, %1
15576  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15577  %4 = bitcast <32 x i1> %3 to i32
15578  ret i32 %4
15579}
15580
15581define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15582; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15583; VLX:       # %bb.0: # %entry
15584; VLX-NEXT:    kmovd %edi, %k1
15585; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15586; VLX-NEXT:    kmovd %k0, %eax
15587; VLX-NEXT:    vzeroupper
15588; VLX-NEXT:    retq
15589;
15590; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15591; NoVLX:       # %bb.0: # %entry
15592; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15593; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15594; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15595; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15596; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15597; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15598; NoVLX-NEXT:    kmovw %k0, %eax
15599; NoVLX-NEXT:    andl %edi, %eax
15600; NoVLX-NEXT:    vzeroupper
15601; NoVLX-NEXT:    retq
15602entry:
15603  %0 = bitcast <4 x i64> %__a to <16 x i16>
15604  %1 = bitcast <4 x i64> %__b to <16 x i16>
15605  %2 = icmp ult <16 x i16> %0, %1
15606  %3 = bitcast i16 %__u to <16 x i1>
15607  %4 = and <16 x i1> %2, %3
15608  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15609  %6 = bitcast <32 x i1> %5 to i32
15610  ret i32 %6
15611}
15612
15613define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15614; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15615; VLX:       # %bb.0: # %entry
15616; VLX-NEXT:    kmovd %edi, %k1
15617; VLX-NEXT:    vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15618; VLX-NEXT:    kmovd %k0, %eax
15619; VLX-NEXT:    vzeroupper
15620; VLX-NEXT:    retq
15621;
15622; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15623; NoVLX:       # %bb.0: # %entry
15624; NoVLX-NEXT:    vpmaxuw (%rsi), %ymm0, %ymm1
15625; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15626; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15627; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15628; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15629; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15630; NoVLX-NEXT:    kmovw %k0, %eax
15631; NoVLX-NEXT:    andl %edi, %eax
15632; NoVLX-NEXT:    vzeroupper
15633; NoVLX-NEXT:    retq
15634entry:
15635  %0 = bitcast <4 x i64> %__a to <16 x i16>
15636  %load = load <4 x i64>, <4 x i64>* %__b
15637  %1 = bitcast <4 x i64> %load to <16 x i16>
15638  %2 = icmp ult <16 x i16> %0, %1
15639  %3 = bitcast i16 %__u to <16 x i1>
15640  %4 = and <16 x i1> %2, %3
15641  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15642  %6 = bitcast <32 x i1> %5 to i32
15643  ret i32 %6
15644}
15645
15646
15647define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15648; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15649; VLX:       # %bb.0: # %entry
15650; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0
15651; VLX-NEXT:    kmovq %k0, %rax
15652; VLX-NEXT:    vzeroupper
15653; VLX-NEXT:    retq
15654;
15655; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15656; NoVLX:       # %bb.0: # %entry
15657; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15658; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15659; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15660; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15661; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15662; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15663; NoVLX-NEXT:    kmovw %k0, %eax
15664; NoVLX-NEXT:    movzwl %ax, %eax
15665; NoVLX-NEXT:    vzeroupper
15666; NoVLX-NEXT:    retq
15667entry:
15668  %0 = bitcast <4 x i64> %__a to <16 x i16>
15669  %1 = bitcast <4 x i64> %__b to <16 x i16>
15670  %2 = icmp ult <16 x i16> %0, %1
15671  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15672  %4 = bitcast <64 x i1> %3 to i64
15673  ret i64 %4
15674}
15675
15676define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15677; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15678; VLX:       # %bb.0: # %entry
15679; VLX-NEXT:    vpcmpltuw (%rdi), %ymm0, %k0
15680; VLX-NEXT:    kmovq %k0, %rax
15681; VLX-NEXT:    vzeroupper
15682; VLX-NEXT:    retq
15683;
15684; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15685; NoVLX:       # %bb.0: # %entry
15686; NoVLX-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm1
15687; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15688; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15689; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15690; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15691; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15692; NoVLX-NEXT:    kmovw %k0, %eax
15693; NoVLX-NEXT:    movzwl %ax, %eax
15694; NoVLX-NEXT:    vzeroupper
15695; NoVLX-NEXT:    retq
15696entry:
15697  %0 = bitcast <4 x i64> %__a to <16 x i16>
15698  %load = load <4 x i64>, <4 x i64>* %__b
15699  %1 = bitcast <4 x i64> %load to <16 x i16>
15700  %2 = icmp ult <16 x i16> %0, %1
15701  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15702  %4 = bitcast <64 x i1> %3 to i64
15703  ret i64 %4
15704}
15705
15706define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15707; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15708; VLX:       # %bb.0: # %entry
15709; VLX-NEXT:    kmovd %edi, %k1
15710; VLX-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15711; VLX-NEXT:    kmovq %k0, %rax
15712; VLX-NEXT:    vzeroupper
15713; VLX-NEXT:    retq
15714;
15715; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15716; NoVLX:       # %bb.0: # %entry
15717; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15718; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15719; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15720; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15721; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15722; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15723; NoVLX-NEXT:    kmovw %k0, %eax
15724; NoVLX-NEXT:    andl %edi, %eax
15725; NoVLX-NEXT:    vzeroupper
15726; NoVLX-NEXT:    retq
15727entry:
15728  %0 = bitcast <4 x i64> %__a to <16 x i16>
15729  %1 = bitcast <4 x i64> %__b to <16 x i16>
15730  %2 = icmp ult <16 x i16> %0, %1
15731  %3 = bitcast i16 %__u to <16 x i1>
15732  %4 = and <16 x i1> %2, %3
15733  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15734  %6 = bitcast <64 x i1> %5 to i64
15735  ret i64 %6
15736}
15737
15738define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15739; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15740; VLX:       # %bb.0: # %entry
15741; VLX-NEXT:    kmovd %edi, %k1
15742; VLX-NEXT:    vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15743; VLX-NEXT:    kmovq %k0, %rax
15744; VLX-NEXT:    vzeroupper
15745; VLX-NEXT:    retq
15746;
15747; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15748; NoVLX:       # %bb.0: # %entry
15749; NoVLX-NEXT:    vpmaxuw (%rsi), %ymm0, %ymm1
15750; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15751; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15752; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15753; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15754; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15755; NoVLX-NEXT:    kmovw %k0, %eax
15756; NoVLX-NEXT:    andl %edi, %eax
15757; NoVLX-NEXT:    vzeroupper
15758; NoVLX-NEXT:    retq
15759entry:
15760  %0 = bitcast <4 x i64> %__a to <16 x i16>
15761  %load = load <4 x i64>, <4 x i64>* %__b
15762  %1 = bitcast <4 x i64> %load to <16 x i16>
15763  %2 = icmp ult <16 x i16> %0, %1
15764  %3 = bitcast i16 %__u to <16 x i1>
15765  %4 = and <16 x i1> %2, %3
15766  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15767  %6 = bitcast <64 x i1> %5 to i64
15768  ret i64 %6
15769}
15770
15771
15772define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15773; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15774; VLX:       # %bb.0: # %entry
15775; VLX-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
15776; VLX-NEXT:    kmovq %k0, %rax
15777; VLX-NEXT:    vzeroupper
15778; VLX-NEXT:    retq
15779;
15780; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15781; NoVLX:       # %bb.0: # %entry
15782; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
15783; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
15784; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15785; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15786; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15787; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15788; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15789; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15790; NoVLX-NEXT:    kmovw %k0, %ecx
15791; NoVLX-NEXT:    vpmaxuw %ymm3, %ymm2, %ymm0
15792; NoVLX-NEXT:    vpcmpeqw %ymm0, %ymm2, %ymm0
15793; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15794; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15795; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15796; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15797; NoVLX-NEXT:    kmovw %k0, %eax
15798; NoVLX-NEXT:    shll $16, %eax
15799; NoVLX-NEXT:    orl %ecx, %eax
15800; NoVLX-NEXT:    vzeroupper
15801; NoVLX-NEXT:    retq
15802entry:
15803  %0 = bitcast <8 x i64> %__a to <32 x i16>
15804  %1 = bitcast <8 x i64> %__b to <32 x i16>
15805  %2 = icmp ult <32 x i16> %0, %1
15806  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15807  %4 = bitcast <64 x i1> %3 to i64
15808  ret i64 %4
15809}
15810
15811define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15812; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15813; VLX:       # %bb.0: # %entry
15814; VLX-NEXT:    vpcmpltuw (%rdi), %zmm0, %k0
15815; VLX-NEXT:    kmovq %k0, %rax
15816; VLX-NEXT:    vzeroupper
15817; VLX-NEXT:    retq
15818;
15819; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15820; NoVLX:       # %bb.0: # %entry
15821; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
15822; NoVLX-NEXT:    vpmaxuw (%rdi), %ymm0, %ymm2
15823; NoVLX-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
15824; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15825; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15826; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15827; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15828; NoVLX-NEXT:    kmovw %k0, %ecx
15829; NoVLX-NEXT:    vpmaxuw 32(%rdi), %ymm1, %ymm0
15830; NoVLX-NEXT:    vpcmpeqw %ymm0, %ymm1, %ymm0
15831; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15832; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15833; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15834; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15835; NoVLX-NEXT:    kmovw %k0, %eax
15836; NoVLX-NEXT:    shll $16, %eax
15837; NoVLX-NEXT:    orl %ecx, %eax
15838; NoVLX-NEXT:    vzeroupper
15839; NoVLX-NEXT:    retq
15840entry:
15841  %0 = bitcast <8 x i64> %__a to <32 x i16>
15842  %load = load <8 x i64>, <8 x i64>* %__b
15843  %1 = bitcast <8 x i64> %load to <32 x i16>
15844  %2 = icmp ult <32 x i16> %0, %1
15845  %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15846  %4 = bitcast <64 x i1> %3 to i64
15847  ret i64 %4
15848}
15849
15850define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15851; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15852; VLX:       # %bb.0: # %entry
15853; VLX-NEXT:    kmovd %edi, %k1
15854; VLX-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
15855; VLX-NEXT:    kmovq %k0, %rax
15856; VLX-NEXT:    vzeroupper
15857; VLX-NEXT:    retq
15858;
15859; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15860; NoVLX:       # %bb.0: # %entry
15861; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2
15862; NoVLX-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
15863; NoVLX-NEXT:    vpternlogq $15, %zmm2, %zmm2, %zmm2
15864; NoVLX-NEXT:    vpmovsxwd %ymm2, %zmm2
15865; NoVLX-NEXT:    vpslld $31, %zmm2, %zmm2
15866; NoVLX-NEXT:    vptestmd %zmm2, %zmm2, %k0
15867; NoVLX-NEXT:    kmovw %k0, %eax
15868; NoVLX-NEXT:    andl %edi, %eax
15869; NoVLX-NEXT:    shrl $16, %edi
15870; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
15871; NoVLX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
15872; NoVLX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm1
15873; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15874; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15875; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15876; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15877; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15878; NoVLX-NEXT:    kmovw %k0, %ecx
15879; NoVLX-NEXT:    andl %edi, %ecx
15880; NoVLX-NEXT:    shll $16, %ecx
15881; NoVLX-NEXT:    movzwl %ax, %eax
15882; NoVLX-NEXT:    orl %ecx, %eax
15883; NoVLX-NEXT:    vzeroupper
15884; NoVLX-NEXT:    retq
15885entry:
15886  %0 = bitcast <8 x i64> %__a to <32 x i16>
15887  %1 = bitcast <8 x i64> %__b to <32 x i16>
15888  %2 = icmp ult <32 x i16> %0, %1
15889  %3 = bitcast i32 %__u to <32 x i1>
15890  %4 = and <32 x i1> %2, %3
15891  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15892  %6 = bitcast <64 x i1> %5 to i64
15893  ret i64 %6
15894}
15895
15896define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15897; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15898; VLX:       # %bb.0: # %entry
15899; VLX-NEXT:    kmovd %edi, %k1
15900; VLX-NEXT:    vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
15901; VLX-NEXT:    kmovq %k0, %rax
15902; VLX-NEXT:    vzeroupper
15903; VLX-NEXT:    retq
15904;
15905; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15906; NoVLX:       # %bb.0: # %entry
15907; NoVLX-NEXT:    vpmaxuw (%rsi), %ymm0, %ymm1
15908; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm1
15909; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
15910; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
15911; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
15912; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
15913; NoVLX-NEXT:    kmovw %k0, %eax
15914; NoVLX-NEXT:    andl %edi, %eax
15915; NoVLX-NEXT:    shrl $16, %edi
15916; NoVLX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
15917; NoVLX-NEXT:    vpmaxuw 32(%rsi), %ymm0, %ymm1
15918; NoVLX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
15919; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
15920; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
15921; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
15922; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
15923; NoVLX-NEXT:    kmovw %k0, %ecx
15924; NoVLX-NEXT:    andl %edi, %ecx
15925; NoVLX-NEXT:    shll $16, %ecx
15926; NoVLX-NEXT:    movzwl %ax, %eax
15927; NoVLX-NEXT:    orl %ecx, %eax
15928; NoVLX-NEXT:    vzeroupper
15929; NoVLX-NEXT:    retq
15930entry:
15931  %0 = bitcast <8 x i64> %__a to <32 x i16>
15932  %load = load <8 x i64>, <8 x i64>* %__b
15933  %1 = bitcast <8 x i64> %load to <32 x i16>
15934  %2 = icmp ult <32 x i16> %0, %1
15935  %3 = bitcast i32 %__u to <32 x i1>
15936  %4 = and <32 x i1> %2, %3
15937  %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15938  %6 = bitcast <64 x i1> %5 to i64
15939  ret i64 %6
15940}
15941
15942
15943define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15944; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15945; VLX:       # %bb.0: # %entry
15946; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
15947; VLX-NEXT:    kmovd %k0, %eax
15948; VLX-NEXT:    # kill: def $al killed $al killed $eax
15949; VLX-NEXT:    retq
15950;
15951; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15952; NoVLX:       # %bb.0: # %entry
15953; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
15954; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15955; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
15956; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15957; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15958; NoVLX-NEXT:    kmovw %k0, %eax
15959; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15960; NoVLX-NEXT:    vzeroupper
15961; NoVLX-NEXT:    retq
15962entry:
15963  %0 = bitcast <2 x i64> %__a to <4 x i32>
15964  %1 = bitcast <2 x i64> %__b to <4 x i32>
15965  %2 = icmp ult <4 x i32> %0, %1
15966  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15967  %4 = bitcast <8 x i1> %3 to i8
15968  ret i8 %4
15969}
15970
15971define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15972; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15973; VLX:       # %bb.0: # %entry
15974; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
15975; VLX-NEXT:    kmovd %k0, %eax
15976; VLX-NEXT:    # kill: def $al killed $al killed $eax
15977; VLX-NEXT:    retq
15978;
15979; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15980; NoVLX:       # %bb.0: # %entry
15981; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
15982; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
15983; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
15984; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
15985; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
15986; NoVLX-NEXT:    kmovw %k0, %eax
15987; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
15988; NoVLX-NEXT:    vzeroupper
15989; NoVLX-NEXT:    retq
15990entry:
15991  %0 = bitcast <2 x i64> %__a to <4 x i32>
15992  %load = load <2 x i64>, <2 x i64>* %__b
15993  %1 = bitcast <2 x i64> %load to <4 x i32>
15994  %2 = icmp ult <4 x i32> %0, %1
15995  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15996  %4 = bitcast <8 x i1> %3 to i8
15997  ret i8 %4
15998}
15999
16000define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16001; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
16002; VLX:       # %bb.0: # %entry
16003; VLX-NEXT:    kmovd %edi, %k1
16004; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16005; VLX-NEXT:    kmovd %k0, %eax
16006; VLX-NEXT:    # kill: def $al killed $al killed $eax
16007; VLX-NEXT:    retq
16008;
16009; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
16010; NoVLX:       # %bb.0: # %entry
16011; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16012; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16013; NoVLX-NEXT:    kmovw %edi, %k1
16014; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16015; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16016; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16017; NoVLX-NEXT:    kmovw %k0, %eax
16018; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
16019; NoVLX-NEXT:    vzeroupper
16020; NoVLX-NEXT:    retq
16021entry:
16022  %0 = bitcast <2 x i64> %__a to <4 x i32>
16023  %1 = bitcast <2 x i64> %__b to <4 x i32>
16024  %2 = icmp ult <4 x i32> %0, %1
16025  %3 = bitcast i8 %__u to <8 x i1>
16026  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16027  %4 = and <4 x i1> %2, %extract.i
16028  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16029  %6 = bitcast <8 x i1> %5 to i8
16030  ret i8 %6
16031}
16032
16033define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16034; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
16035; VLX:       # %bb.0: # %entry
16036; VLX-NEXT:    kmovd %edi, %k1
16037; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16038; VLX-NEXT:    kmovd %k0, %eax
16039; VLX-NEXT:    # kill: def $al killed $al killed $eax
16040; VLX-NEXT:    retq
16041;
16042; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
16043; NoVLX:       # %bb.0: # %entry
16044; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16045; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
16046; NoVLX-NEXT:    kmovw %edi, %k1
16047; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16048; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16049; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16050; NoVLX-NEXT:    kmovw %k0, %eax
16051; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
16052; NoVLX-NEXT:    vzeroupper
16053; NoVLX-NEXT:    retq
16054entry:
16055  %0 = bitcast <2 x i64> %__a to <4 x i32>
16056  %load = load <2 x i64>, <2 x i64>* %__b
16057  %1 = bitcast <2 x i64> %load to <4 x i32>
16058  %2 = icmp ult <4 x i32> %0, %1
16059  %3 = bitcast i8 %__u to <8 x i1>
16060  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16061  %4 = and <4 x i1> %2, %extract.i
16062  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16063  %6 = bitcast <8 x i1> %5 to i8
16064  ret i8 %6
16065}
16066
16067
16068define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16069; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
16070; VLX:       # %bb.0: # %entry
16071; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
16072; VLX-NEXT:    kmovd %k0, %eax
16073; VLX-NEXT:    # kill: def $al killed $al killed $eax
16074; VLX-NEXT:    retq
16075;
16076; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
16077; NoVLX:       # %bb.0: # %entry
16078; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16079; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
16080; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16081; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16082; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16083; NoVLX-NEXT:    kmovw %k0, %eax
16084; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
16085; NoVLX-NEXT:    vzeroupper
16086; NoVLX-NEXT:    retq
16087entry:
16088  %0 = bitcast <2 x i64> %__a to <4 x i32>
16089  %load = load i32, i32* %__b
16090  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16091  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16092  %2 = icmp ult <4 x i32> %0, %1
16093  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16094  %4 = bitcast <8 x i1> %3 to i8
16095  ret i8 %4
16096}
16097
16098define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16099; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
16100; VLX:       # %bb.0: # %entry
16101; VLX-NEXT:    kmovd %edi, %k1
16102; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16103; VLX-NEXT:    kmovd %k0, %eax
16104; VLX-NEXT:    # kill: def $al killed $al killed $eax
16105; VLX-NEXT:    retq
16106;
16107; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
16108; NoVLX:       # %bb.0: # %entry
16109; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16110; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
16111; NoVLX-NEXT:    kmovw %edi, %k1
16112; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16113; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16114; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16115; NoVLX-NEXT:    kmovw %k0, %eax
16116; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
16117; NoVLX-NEXT:    vzeroupper
16118; NoVLX-NEXT:    retq
16119entry:
16120  %0 = bitcast <2 x i64> %__a to <4 x i32>
16121  %load = load i32, i32* %__b
16122  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16123  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16124  %2 = icmp ult <4 x i32> %0, %1
16125  %3 = bitcast i8 %__u to <8 x i1>
16126  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16127  %4 = and <4 x i1> %extract.i, %2
16128  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16129  %6 = bitcast <8 x i1> %5 to i8
16130  ret i8 %6
16131}
16132
16133
16134define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16135; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
16136; VLX:       # %bb.0: # %entry
16137; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
16138; VLX-NEXT:    kmovd %k0, %eax
16139; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16140; VLX-NEXT:    retq
16141;
16142; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
16143; NoVLX:       # %bb.0: # %entry
16144; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16145; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16146; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16147; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16148; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16149; NoVLX-NEXT:    kmovw %k0, %eax
16150; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16151; NoVLX-NEXT:    vzeroupper
16152; NoVLX-NEXT:    retq
16153entry:
16154  %0 = bitcast <2 x i64> %__a to <4 x i32>
16155  %1 = bitcast <2 x i64> %__b to <4 x i32>
16156  %2 = icmp ult <4 x i32> %0, %1
16157  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16158  %4 = bitcast <16 x i1> %3 to i16
16159  ret i16 %4
16160}
16161
16162define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16163; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
16164; VLX:       # %bb.0: # %entry
16165; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
16166; VLX-NEXT:    kmovd %k0, %eax
16167; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16168; VLX-NEXT:    retq
16169;
16170; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
16171; NoVLX:       # %bb.0: # %entry
16172; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16173; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
16174; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16175; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16176; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16177; NoVLX-NEXT:    kmovw %k0, %eax
16178; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16179; NoVLX-NEXT:    vzeroupper
16180; NoVLX-NEXT:    retq
16181entry:
16182  %0 = bitcast <2 x i64> %__a to <4 x i32>
16183  %load = load <2 x i64>, <2 x i64>* %__b
16184  %1 = bitcast <2 x i64> %load to <4 x i32>
16185  %2 = icmp ult <4 x i32> %0, %1
16186  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16187  %4 = bitcast <16 x i1> %3 to i16
16188  ret i16 %4
16189}
16190
16191define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16192; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
16193; VLX:       # %bb.0: # %entry
16194; VLX-NEXT:    kmovd %edi, %k1
16195; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16196; VLX-NEXT:    kmovd %k0, %eax
16197; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16198; VLX-NEXT:    retq
16199;
16200; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
16201; NoVLX:       # %bb.0: # %entry
16202; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16203; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16204; NoVLX-NEXT:    kmovw %edi, %k1
16205; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16206; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16207; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16208; NoVLX-NEXT:    kmovw %k0, %eax
16209; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16210; NoVLX-NEXT:    vzeroupper
16211; NoVLX-NEXT:    retq
16212entry:
16213  %0 = bitcast <2 x i64> %__a to <4 x i32>
16214  %1 = bitcast <2 x i64> %__b to <4 x i32>
16215  %2 = icmp ult <4 x i32> %0, %1
16216  %3 = bitcast i8 %__u to <8 x i1>
16217  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16218  %4 = and <4 x i1> %2, %extract.i
16219  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16220  %6 = bitcast <16 x i1> %5 to i16
16221  ret i16 %6
16222}
16223
16224define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16225; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
16226; VLX:       # %bb.0: # %entry
16227; VLX-NEXT:    kmovd %edi, %k1
16228; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16229; VLX-NEXT:    kmovd %k0, %eax
16230; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16231; VLX-NEXT:    retq
16232;
16233; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
16234; NoVLX:       # %bb.0: # %entry
16235; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16236; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
16237; NoVLX-NEXT:    kmovw %edi, %k1
16238; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16239; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16240; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16241; NoVLX-NEXT:    kmovw %k0, %eax
16242; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16243; NoVLX-NEXT:    vzeroupper
16244; NoVLX-NEXT:    retq
16245entry:
16246  %0 = bitcast <2 x i64> %__a to <4 x i32>
16247  %load = load <2 x i64>, <2 x i64>* %__b
16248  %1 = bitcast <2 x i64> %load to <4 x i32>
16249  %2 = icmp ult <4 x i32> %0, %1
16250  %3 = bitcast i8 %__u to <8 x i1>
16251  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16252  %4 = and <4 x i1> %2, %extract.i
16253  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16254  %6 = bitcast <16 x i1> %5 to i16
16255  ret i16 %6
16256}
16257
16258
16259define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16260; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
16261; VLX:       # %bb.0: # %entry
16262; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
16263; VLX-NEXT:    kmovd %k0, %eax
16264; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16265; VLX-NEXT:    retq
16266;
16267; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
16268; NoVLX:       # %bb.0: # %entry
16269; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16270; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
16271; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16272; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16273; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16274; NoVLX-NEXT:    kmovw %k0, %eax
16275; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16276; NoVLX-NEXT:    vzeroupper
16277; NoVLX-NEXT:    retq
16278entry:
16279  %0 = bitcast <2 x i64> %__a to <4 x i32>
16280  %load = load i32, i32* %__b
16281  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16282  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16283  %2 = icmp ult <4 x i32> %0, %1
16284  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16285  %4 = bitcast <16 x i1> %3 to i16
16286  ret i16 %4
16287}
16288
16289define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16290; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
16291; VLX:       # %bb.0: # %entry
16292; VLX-NEXT:    kmovd %edi, %k1
16293; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16294; VLX-NEXT:    kmovd %k0, %eax
16295; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16296; VLX-NEXT:    retq
16297;
16298; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
16299; NoVLX:       # %bb.0: # %entry
16300; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16301; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
16302; NoVLX-NEXT:    kmovw %edi, %k1
16303; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16304; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16305; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16306; NoVLX-NEXT:    kmovw %k0, %eax
16307; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16308; NoVLX-NEXT:    vzeroupper
16309; NoVLX-NEXT:    retq
16310entry:
16311  %0 = bitcast <2 x i64> %__a to <4 x i32>
16312  %load = load i32, i32* %__b
16313  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16314  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16315  %2 = icmp ult <4 x i32> %0, %1
16316  %3 = bitcast i8 %__u to <8 x i1>
16317  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16318  %4 = and <4 x i1> %extract.i, %2
16319  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16320  %6 = bitcast <16 x i1> %5 to i16
16321  ret i16 %6
16322}
16323
16324
16325define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16326; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16327; VLX:       # %bb.0: # %entry
16328; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
16329; VLX-NEXT:    kmovd %k0, %eax
16330; VLX-NEXT:    retq
16331;
16332; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16333; NoVLX:       # %bb.0: # %entry
16334; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16335; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16336; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16337; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16338; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16339; NoVLX-NEXT:    kmovw %k0, %eax
16340; NoVLX-NEXT:    vzeroupper
16341; NoVLX-NEXT:    retq
16342entry:
16343  %0 = bitcast <2 x i64> %__a to <4 x i32>
16344  %1 = bitcast <2 x i64> %__b to <4 x i32>
16345  %2 = icmp ult <4 x i32> %0, %1
16346  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16347  %4 = bitcast <32 x i1> %3 to i32
16348  ret i32 %4
16349}
16350
16351define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16352; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16353; VLX:       # %bb.0: # %entry
16354; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
16355; VLX-NEXT:    kmovd %k0, %eax
16356; VLX-NEXT:    retq
16357;
16358; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16359; NoVLX:       # %bb.0: # %entry
16360; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16361; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
16362; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16363; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16364; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16365; NoVLX-NEXT:    kmovw %k0, %eax
16366; NoVLX-NEXT:    vzeroupper
16367; NoVLX-NEXT:    retq
16368entry:
16369  %0 = bitcast <2 x i64> %__a to <4 x i32>
16370  %load = load <2 x i64>, <2 x i64>* %__b
16371  %1 = bitcast <2 x i64> %load to <4 x i32>
16372  %2 = icmp ult <4 x i32> %0, %1
16373  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16374  %4 = bitcast <32 x i1> %3 to i32
16375  ret i32 %4
16376}
16377
16378define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16379; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16380; VLX:       # %bb.0: # %entry
16381; VLX-NEXT:    kmovd %edi, %k1
16382; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16383; VLX-NEXT:    kmovd %k0, %eax
16384; VLX-NEXT:    retq
16385;
16386; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16387; NoVLX:       # %bb.0: # %entry
16388; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16389; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16390; NoVLX-NEXT:    kmovw %edi, %k1
16391; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16392; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16393; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16394; NoVLX-NEXT:    kmovw %k0, %eax
16395; NoVLX-NEXT:    vzeroupper
16396; NoVLX-NEXT:    retq
16397entry:
16398  %0 = bitcast <2 x i64> %__a to <4 x i32>
16399  %1 = bitcast <2 x i64> %__b to <4 x i32>
16400  %2 = icmp ult <4 x i32> %0, %1
16401  %3 = bitcast i8 %__u to <8 x i1>
16402  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16403  %4 = and <4 x i1> %2, %extract.i
16404  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16405  %6 = bitcast <32 x i1> %5 to i32
16406  ret i32 %6
16407}
16408
16409define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16410; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16411; VLX:       # %bb.0: # %entry
16412; VLX-NEXT:    kmovd %edi, %k1
16413; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16414; VLX-NEXT:    kmovd %k0, %eax
16415; VLX-NEXT:    retq
16416;
16417; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16418; NoVLX:       # %bb.0: # %entry
16419; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16420; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
16421; NoVLX-NEXT:    kmovw %edi, %k1
16422; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16423; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16424; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16425; NoVLX-NEXT:    kmovw %k0, %eax
16426; NoVLX-NEXT:    vzeroupper
16427; NoVLX-NEXT:    retq
16428entry:
16429  %0 = bitcast <2 x i64> %__a to <4 x i32>
16430  %load = load <2 x i64>, <2 x i64>* %__b
16431  %1 = bitcast <2 x i64> %load to <4 x i32>
16432  %2 = icmp ult <4 x i32> %0, %1
16433  %3 = bitcast i8 %__u to <8 x i1>
16434  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16435  %4 = and <4 x i1> %2, %extract.i
16436  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16437  %6 = bitcast <32 x i1> %5 to i32
16438  ret i32 %6
16439}
16440
16441
16442define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16443; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16444; VLX:       # %bb.0: # %entry
16445; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
16446; VLX-NEXT:    kmovd %k0, %eax
16447; VLX-NEXT:    retq
16448;
16449; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16450; NoVLX:       # %bb.0: # %entry
16451; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16452; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
16453; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16454; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16455; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16456; NoVLX-NEXT:    kmovw %k0, %eax
16457; NoVLX-NEXT:    vzeroupper
16458; NoVLX-NEXT:    retq
16459entry:
16460  %0 = bitcast <2 x i64> %__a to <4 x i32>
16461  %load = load i32, i32* %__b
16462  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16463  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16464  %2 = icmp ult <4 x i32> %0, %1
16465  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16466  %4 = bitcast <32 x i1> %3 to i32
16467  ret i32 %4
16468}
16469
16470define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16471; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16472; VLX:       # %bb.0: # %entry
16473; VLX-NEXT:    kmovd %edi, %k1
16474; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16475; VLX-NEXT:    kmovd %k0, %eax
16476; VLX-NEXT:    retq
16477;
16478; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16479; NoVLX:       # %bb.0: # %entry
16480; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16481; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
16482; NoVLX-NEXT:    kmovw %edi, %k1
16483; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16484; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16485; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16486; NoVLX-NEXT:    kmovw %k0, %eax
16487; NoVLX-NEXT:    vzeroupper
16488; NoVLX-NEXT:    retq
16489entry:
16490  %0 = bitcast <2 x i64> %__a to <4 x i32>
16491  %load = load i32, i32* %__b
16492  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16493  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16494  %2 = icmp ult <4 x i32> %0, %1
16495  %3 = bitcast i8 %__u to <8 x i1>
16496  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16497  %4 = and <4 x i1> %extract.i, %2
16498  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16499  %6 = bitcast <32 x i1> %5 to i32
16500  ret i32 %6
16501}
16502
16503
16504define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16505; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16506; VLX:       # %bb.0: # %entry
16507; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0
16508; VLX-NEXT:    kmovq %k0, %rax
16509; VLX-NEXT:    retq
16510;
16511; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16512; NoVLX:       # %bb.0: # %entry
16513; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16514; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16515; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16516; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16517; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16518; NoVLX-NEXT:    kmovw %k0, %eax
16519; NoVLX-NEXT:    movzwl %ax, %eax
16520; NoVLX-NEXT:    vzeroupper
16521; NoVLX-NEXT:    retq
16522entry:
16523  %0 = bitcast <2 x i64> %__a to <4 x i32>
16524  %1 = bitcast <2 x i64> %__b to <4 x i32>
16525  %2 = icmp ult <4 x i32> %0, %1
16526  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16527  %4 = bitcast <64 x i1> %3 to i64
16528  ret i64 %4
16529}
16530
16531define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16532; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16533; VLX:       # %bb.0: # %entry
16534; VLX-NEXT:    vpcmpltud (%rdi), %xmm0, %k0
16535; VLX-NEXT:    kmovq %k0, %rax
16536; VLX-NEXT:    retq
16537;
16538; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16539; NoVLX:       # %bb.0: # %entry
16540; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16541; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
16542; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16543; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16544; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16545; NoVLX-NEXT:    kmovw %k0, %eax
16546; NoVLX-NEXT:    movzwl %ax, %eax
16547; NoVLX-NEXT:    vzeroupper
16548; NoVLX-NEXT:    retq
16549entry:
16550  %0 = bitcast <2 x i64> %__a to <4 x i32>
16551  %load = load <2 x i64>, <2 x i64>* %__b
16552  %1 = bitcast <2 x i64> %load to <4 x i32>
16553  %2 = icmp ult <4 x i32> %0, %1
16554  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16555  %4 = bitcast <64 x i1> %3 to i64
16556  ret i64 %4
16557}
16558
16559define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16560; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16561; VLX:       # %bb.0: # %entry
16562; VLX-NEXT:    kmovd %edi, %k1
16563; VLX-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16564; VLX-NEXT:    kmovq %k0, %rax
16565; VLX-NEXT:    retq
16566;
16567; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16568; NoVLX:       # %bb.0: # %entry
16569; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
16570; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16571; NoVLX-NEXT:    kmovw %edi, %k1
16572; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16573; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16574; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16575; NoVLX-NEXT:    kmovw %k0, %eax
16576; NoVLX-NEXT:    movzwl %ax, %eax
16577; NoVLX-NEXT:    vzeroupper
16578; NoVLX-NEXT:    retq
16579entry:
16580  %0 = bitcast <2 x i64> %__a to <4 x i32>
16581  %1 = bitcast <2 x i64> %__b to <4 x i32>
16582  %2 = icmp ult <4 x i32> %0, %1
16583  %3 = bitcast i8 %__u to <8 x i1>
16584  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16585  %4 = and <4 x i1> %2, %extract.i
16586  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16587  %6 = bitcast <64 x i1> %5 to i64
16588  ret i64 %6
16589}
16590
16591define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16592; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16593; VLX:       # %bb.0: # %entry
16594; VLX-NEXT:    kmovd %edi, %k1
16595; VLX-NEXT:    vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16596; VLX-NEXT:    kmovq %k0, %rax
16597; VLX-NEXT:    retq
16598;
16599; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16600; NoVLX:       # %bb.0: # %entry
16601; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16602; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
16603; NoVLX-NEXT:    kmovw %edi, %k1
16604; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16605; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16606; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16607; NoVLX-NEXT:    kmovw %k0, %eax
16608; NoVLX-NEXT:    movzwl %ax, %eax
16609; NoVLX-NEXT:    vzeroupper
16610; NoVLX-NEXT:    retq
16611entry:
16612  %0 = bitcast <2 x i64> %__a to <4 x i32>
16613  %load = load <2 x i64>, <2 x i64>* %__b
16614  %1 = bitcast <2 x i64> %load to <4 x i32>
16615  %2 = icmp ult <4 x i32> %0, %1
16616  %3 = bitcast i8 %__u to <8 x i1>
16617  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16618  %4 = and <4 x i1> %2, %extract.i
16619  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16620  %6 = bitcast <64 x i1> %5 to i64
16621  ret i64 %6
16622}
16623
16624
16625define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16626; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16627; VLX:       # %bb.0: # %entry
16628; VLX-NEXT:    vpcmpltud (%rdi){1to4}, %xmm0, %k0
16629; VLX-NEXT:    kmovq %k0, %rax
16630; VLX-NEXT:    retq
16631;
16632; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16633; NoVLX:       # %bb.0: # %entry
16634; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16635; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm1
16636; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16637; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16638; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16639; NoVLX-NEXT:    kmovw %k0, %eax
16640; NoVLX-NEXT:    movzwl %ax, %eax
16641; NoVLX-NEXT:    vzeroupper
16642; NoVLX-NEXT:    retq
16643entry:
16644  %0 = bitcast <2 x i64> %__a to <4 x i32>
16645  %load = load i32, i32* %__b
16646  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16647  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16648  %2 = icmp ult <4 x i32> %0, %1
16649  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16650  %4 = bitcast <64 x i1> %3 to i64
16651  ret i64 %4
16652}
16653
16654define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16655; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16656; VLX:       # %bb.0: # %entry
16657; VLX-NEXT:    kmovd %edi, %k1
16658; VLX-NEXT:    vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16659; VLX-NEXT:    kmovq %k0, %rax
16660; VLX-NEXT:    retq
16661;
16662; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16663; NoVLX:       # %bb.0: # %entry
16664; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
16665; NoVLX-NEXT:    vpbroadcastd (%rsi), %xmm1
16666; NoVLX-NEXT:    kmovw %edi, %k1
16667; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16668; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
16669; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
16670; NoVLX-NEXT:    kmovw %k0, %eax
16671; NoVLX-NEXT:    movzwl %ax, %eax
16672; NoVLX-NEXT:    vzeroupper
16673; NoVLX-NEXT:    retq
16674entry:
16675  %0 = bitcast <2 x i64> %__a to <4 x i32>
16676  %load = load i32, i32* %__b
16677  %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16678  %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16679  %2 = icmp ult <4 x i32> %0, %1
16680  %3 = bitcast i8 %__u to <8 x i1>
16681  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16682  %4 = and <4 x i1> %extract.i, %2
16683  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16684  %6 = bitcast <64 x i1> %5 to i64
16685  ret i64 %6
16686}
16687
16688
16689define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16690; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16691; VLX:       # %bb.0: # %entry
16692; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0
16693; VLX-NEXT:    kmovd %k0, %eax
16694; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16695; VLX-NEXT:    vzeroupper
16696; VLX-NEXT:    retq
16697;
16698; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16699; NoVLX:       # %bb.0: # %entry
16700; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16701; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16702; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16703; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16704; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16705; NoVLX-NEXT:    kmovw %k0, %eax
16706; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16707; NoVLX-NEXT:    vzeroupper
16708; NoVLX-NEXT:    retq
16709entry:
16710  %0 = bitcast <4 x i64> %__a to <8 x i32>
16711  %1 = bitcast <4 x i64> %__b to <8 x i32>
16712  %2 = icmp ult <8 x i32> %0, %1
16713  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16714  %4 = bitcast <16 x i1> %3 to i16
16715  ret i16 %4
16716}
16717
16718define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16719; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16720; VLX:       # %bb.0: # %entry
16721; VLX-NEXT:    vpcmpltud (%rdi), %ymm0, %k0
16722; VLX-NEXT:    kmovd %k0, %eax
16723; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16724; VLX-NEXT:    vzeroupper
16725; VLX-NEXT:    retq
16726;
16727; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16728; NoVLX:       # %bb.0: # %entry
16729; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16730; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
16731; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16732; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16733; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16734; NoVLX-NEXT:    kmovw %k0, %eax
16735; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16736; NoVLX-NEXT:    vzeroupper
16737; NoVLX-NEXT:    retq
16738entry:
16739  %0 = bitcast <4 x i64> %__a to <8 x i32>
16740  %load = load <4 x i64>, <4 x i64>* %__b
16741  %1 = bitcast <4 x i64> %load to <8 x i32>
16742  %2 = icmp ult <8 x i32> %0, %1
16743  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16744  %4 = bitcast <16 x i1> %3 to i16
16745  ret i16 %4
16746}
16747
16748define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16749; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16750; VLX:       # %bb.0: # %entry
16751; VLX-NEXT:    kmovd %edi, %k1
16752; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16753; VLX-NEXT:    kmovd %k0, %eax
16754; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16755; VLX-NEXT:    vzeroupper
16756; VLX-NEXT:    retq
16757;
16758; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16759; NoVLX:       # %bb.0: # %entry
16760; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16761; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16762; NoVLX-NEXT:    kmovw %edi, %k1
16763; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16764; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16765; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16766; NoVLX-NEXT:    kmovw %k0, %eax
16767; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16768; NoVLX-NEXT:    vzeroupper
16769; NoVLX-NEXT:    retq
16770entry:
16771  %0 = bitcast <4 x i64> %__a to <8 x i32>
16772  %1 = bitcast <4 x i64> %__b to <8 x i32>
16773  %2 = icmp ult <8 x i32> %0, %1
16774  %3 = bitcast i8 %__u to <8 x i1>
16775  %4 = and <8 x i1> %2, %3
16776  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16777  %6 = bitcast <16 x i1> %5 to i16
16778  ret i16 %6
16779}
16780
16781define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16782; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16783; VLX:       # %bb.0: # %entry
16784; VLX-NEXT:    kmovd %edi, %k1
16785; VLX-NEXT:    vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16786; VLX-NEXT:    kmovd %k0, %eax
16787; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16788; VLX-NEXT:    vzeroupper
16789; VLX-NEXT:    retq
16790;
16791; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16792; NoVLX:       # %bb.0: # %entry
16793; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16794; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
16795; NoVLX-NEXT:    kmovw %edi, %k1
16796; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16797; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16798; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16799; NoVLX-NEXT:    kmovw %k0, %eax
16800; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16801; NoVLX-NEXT:    vzeroupper
16802; NoVLX-NEXT:    retq
16803entry:
16804  %0 = bitcast <4 x i64> %__a to <8 x i32>
16805  %load = load <4 x i64>, <4 x i64>* %__b
16806  %1 = bitcast <4 x i64> %load to <8 x i32>
16807  %2 = icmp ult <8 x i32> %0, %1
16808  %3 = bitcast i8 %__u to <8 x i1>
16809  %4 = and <8 x i1> %2, %3
16810  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16811  %6 = bitcast <16 x i1> %5 to i16
16812  ret i16 %6
16813}
16814
16815
16816define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16817; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16818; VLX:       # %bb.0: # %entry
16819; VLX-NEXT:    vpcmpltud (%rdi){1to8}, %ymm0, %k0
16820; VLX-NEXT:    kmovd %k0, %eax
16821; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16822; VLX-NEXT:    vzeroupper
16823; VLX-NEXT:    retq
16824;
16825; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16826; NoVLX:       # %bb.0: # %entry
16827; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16828; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
16829; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16830; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16831; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16832; NoVLX-NEXT:    kmovw %k0, %eax
16833; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16834; NoVLX-NEXT:    vzeroupper
16835; NoVLX-NEXT:    retq
16836entry:
16837  %0 = bitcast <4 x i64> %__a to <8 x i32>
16838  %load = load i32, i32* %__b
16839  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16840  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16841  %2 = icmp ult <8 x i32> %0, %1
16842  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16843  %4 = bitcast <16 x i1> %3 to i16
16844  ret i16 %4
16845}
16846
16847define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16848; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16849; VLX:       # %bb.0: # %entry
16850; VLX-NEXT:    kmovd %edi, %k1
16851; VLX-NEXT:    vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16852; VLX-NEXT:    kmovd %k0, %eax
16853; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
16854; VLX-NEXT:    vzeroupper
16855; VLX-NEXT:    retq
16856;
16857; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16858; NoVLX:       # %bb.0: # %entry
16859; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16860; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
16861; NoVLX-NEXT:    kmovw %edi, %k1
16862; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16863; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16864; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16865; NoVLX-NEXT:    kmovw %k0, %eax
16866; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
16867; NoVLX-NEXT:    vzeroupper
16868; NoVLX-NEXT:    retq
16869entry:
16870  %0 = bitcast <4 x i64> %__a to <8 x i32>
16871  %load = load i32, i32* %__b
16872  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16873  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16874  %2 = icmp ult <8 x i32> %0, %1
16875  %3 = bitcast i8 %__u to <8 x i1>
16876  %4 = and <8 x i1> %3, %2
16877  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16878  %6 = bitcast <16 x i1> %5 to i16
16879  ret i16 %6
16880}
16881
16882
16883define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16884; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16885; VLX:       # %bb.0: # %entry
16886; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0
16887; VLX-NEXT:    kmovd %k0, %eax
16888; VLX-NEXT:    vzeroupper
16889; VLX-NEXT:    retq
16890;
16891; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16892; NoVLX:       # %bb.0: # %entry
16893; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16894; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16895; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16896; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16897; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16898; NoVLX-NEXT:    kmovw %k0, %eax
16899; NoVLX-NEXT:    vzeroupper
16900; NoVLX-NEXT:    retq
16901entry:
16902  %0 = bitcast <4 x i64> %__a to <8 x i32>
16903  %1 = bitcast <4 x i64> %__b to <8 x i32>
16904  %2 = icmp ult <8 x i32> %0, %1
16905  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16906  %4 = bitcast <32 x i1> %3 to i32
16907  ret i32 %4
16908}
16909
16910define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16911; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16912; VLX:       # %bb.0: # %entry
16913; VLX-NEXT:    vpcmpltud (%rdi), %ymm0, %k0
16914; VLX-NEXT:    kmovd %k0, %eax
16915; VLX-NEXT:    vzeroupper
16916; VLX-NEXT:    retq
16917;
16918; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16919; NoVLX:       # %bb.0: # %entry
16920; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16921; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
16922; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
16923; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16924; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16925; NoVLX-NEXT:    kmovw %k0, %eax
16926; NoVLX-NEXT:    vzeroupper
16927; NoVLX-NEXT:    retq
16928entry:
16929  %0 = bitcast <4 x i64> %__a to <8 x i32>
16930  %load = load <4 x i64>, <4 x i64>* %__b
16931  %1 = bitcast <4 x i64> %load to <8 x i32>
16932  %2 = icmp ult <8 x i32> %0, %1
16933  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16934  %4 = bitcast <32 x i1> %3 to i32
16935  ret i32 %4
16936}
16937
16938define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16939; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16940; VLX:       # %bb.0: # %entry
16941; VLX-NEXT:    kmovd %edi, %k1
16942; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16943; VLX-NEXT:    kmovd %k0, %eax
16944; VLX-NEXT:    vzeroupper
16945; VLX-NEXT:    retq
16946;
16947; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16948; NoVLX:       # %bb.0: # %entry
16949; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
16950; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16951; NoVLX-NEXT:    kmovw %edi, %k1
16952; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16953; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16954; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16955; NoVLX-NEXT:    kmovw %k0, %eax
16956; NoVLX-NEXT:    vzeroupper
16957; NoVLX-NEXT:    retq
16958entry:
16959  %0 = bitcast <4 x i64> %__a to <8 x i32>
16960  %1 = bitcast <4 x i64> %__b to <8 x i32>
16961  %2 = icmp ult <8 x i32> %0, %1
16962  %3 = bitcast i8 %__u to <8 x i1>
16963  %4 = and <8 x i1> %2, %3
16964  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16965  %6 = bitcast <32 x i1> %5 to i32
16966  ret i32 %6
16967}
16968
16969define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16970; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16971; VLX:       # %bb.0: # %entry
16972; VLX-NEXT:    kmovd %edi, %k1
16973; VLX-NEXT:    vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16974; VLX-NEXT:    kmovd %k0, %eax
16975; VLX-NEXT:    vzeroupper
16976; VLX-NEXT:    retq
16977;
16978; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16979; NoVLX:       # %bb.0: # %entry
16980; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
16981; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
16982; NoVLX-NEXT:    kmovw %edi, %k1
16983; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16984; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
16985; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
16986; NoVLX-NEXT:    kmovw %k0, %eax
16987; NoVLX-NEXT:    vzeroupper
16988; NoVLX-NEXT:    retq
16989entry:
16990  %0 = bitcast <4 x i64> %__a to <8 x i32>
16991  %load = load <4 x i64>, <4 x i64>* %__b
16992  %1 = bitcast <4 x i64> %load to <8 x i32>
16993  %2 = icmp ult <8 x i32> %0, %1
16994  %3 = bitcast i8 %__u to <8 x i1>
16995  %4 = and <8 x i1> %2, %3
16996  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16997  %6 = bitcast <32 x i1> %5 to i32
16998  ret i32 %6
16999}
17000
17001
17002define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
17003; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
17004; VLX:       # %bb.0: # %entry
17005; VLX-NEXT:    vpcmpltud (%rdi){1to8}, %ymm0, %k0
17006; VLX-NEXT:    kmovd %k0, %eax
17007; VLX-NEXT:    vzeroupper
17008; VLX-NEXT:    retq
17009;
17010; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
17011; NoVLX:       # %bb.0: # %entry
17012; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17013; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
17014; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17015; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17016; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17017; NoVLX-NEXT:    kmovw %k0, %eax
17018; NoVLX-NEXT:    vzeroupper
17019; NoVLX-NEXT:    retq
17020entry:
17021  %0 = bitcast <4 x i64> %__a to <8 x i32>
17022  %load = load i32, i32* %__b
17023  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
17024  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17025  %2 = icmp ult <8 x i32> %0, %1
17026  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17027  %4 = bitcast <32 x i1> %3 to i32
17028  ret i32 %4
17029}
17030
17031define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
17032; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
17033; VLX:       # %bb.0: # %entry
17034; VLX-NEXT:    kmovd %edi, %k1
17035; VLX-NEXT:    vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
17036; VLX-NEXT:    kmovd %k0, %eax
17037; VLX-NEXT:    vzeroupper
17038; VLX-NEXT:    retq
17039;
17040; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
17041; NoVLX:       # %bb.0: # %entry
17042; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17043; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
17044; NoVLX-NEXT:    kmovw %edi, %k1
17045; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17046; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17047; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17048; NoVLX-NEXT:    kmovw %k0, %eax
17049; NoVLX-NEXT:    vzeroupper
17050; NoVLX-NEXT:    retq
17051entry:
17052  %0 = bitcast <4 x i64> %__a to <8 x i32>
17053  %load = load i32, i32* %__b
17054  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
17055  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17056  %2 = icmp ult <8 x i32> %0, %1
17057  %3 = bitcast i8 %__u to <8 x i1>
17058  %4 = and <8 x i1> %3, %2
17059  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17060  %6 = bitcast <32 x i1> %5 to i32
17061  ret i32 %6
17062}
17063
17064
17065define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
17066; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
17067; VLX:       # %bb.0: # %entry
17068; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0
17069; VLX-NEXT:    kmovq %k0, %rax
17070; VLX-NEXT:    vzeroupper
17071; VLX-NEXT:    retq
17072;
17073; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
17074; NoVLX:       # %bb.0: # %entry
17075; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
17076; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17077; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17078; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17079; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17080; NoVLX-NEXT:    kmovw %k0, %eax
17081; NoVLX-NEXT:    movzwl %ax, %eax
17082; NoVLX-NEXT:    vzeroupper
17083; NoVLX-NEXT:    retq
17084entry:
17085  %0 = bitcast <4 x i64> %__a to <8 x i32>
17086  %1 = bitcast <4 x i64> %__b to <8 x i32>
17087  %2 = icmp ult <8 x i32> %0, %1
17088  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17089  %4 = bitcast <64 x i1> %3 to i64
17090  ret i64 %4
17091}
17092
17093define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
17094; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
17095; VLX:       # %bb.0: # %entry
17096; VLX-NEXT:    vpcmpltud (%rdi), %ymm0, %k0
17097; VLX-NEXT:    kmovq %k0, %rax
17098; VLX-NEXT:    vzeroupper
17099; VLX-NEXT:    retq
17100;
17101; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
17102; NoVLX:       # %bb.0: # %entry
17103; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17104; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
17105; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17106; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17107; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17108; NoVLX-NEXT:    kmovw %k0, %eax
17109; NoVLX-NEXT:    movzwl %ax, %eax
17110; NoVLX-NEXT:    vzeroupper
17111; NoVLX-NEXT:    retq
17112entry:
17113  %0 = bitcast <4 x i64> %__a to <8 x i32>
17114  %load = load <4 x i64>, <4 x i64>* %__b
17115  %1 = bitcast <4 x i64> %load to <8 x i32>
17116  %2 = icmp ult <8 x i32> %0, %1
17117  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17118  %4 = bitcast <64 x i1> %3 to i64
17119  ret i64 %4
17120}
17121
17122define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
17123; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
17124; VLX:       # %bb.0: # %entry
17125; VLX-NEXT:    kmovd %edi, %k1
17126; VLX-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1}
17127; VLX-NEXT:    kmovq %k0, %rax
17128; VLX-NEXT:    vzeroupper
17129; VLX-NEXT:    retq
17130;
17131; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
17132; NoVLX:       # %bb.0: # %entry
17133; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
17134; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17135; NoVLX-NEXT:    kmovw %edi, %k1
17136; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17137; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17138; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17139; NoVLX-NEXT:    kmovw %k0, %eax
17140; NoVLX-NEXT:    movzwl %ax, %eax
17141; NoVLX-NEXT:    vzeroupper
17142; NoVLX-NEXT:    retq
17143entry:
17144  %0 = bitcast <4 x i64> %__a to <8 x i32>
17145  %1 = bitcast <4 x i64> %__b to <8 x i32>
17146  %2 = icmp ult <8 x i32> %0, %1
17147  %3 = bitcast i8 %__u to <8 x i1>
17148  %4 = and <8 x i1> %2, %3
17149  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17150  %6 = bitcast <64 x i1> %5 to i64
17151  ret i64 %6
17152}
17153
17154define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
17155; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
17156; VLX:       # %bb.0: # %entry
17157; VLX-NEXT:    kmovd %edi, %k1
17158; VLX-NEXT:    vpcmpltud (%rsi), %ymm0, %k0 {%k1}
17159; VLX-NEXT:    kmovq %k0, %rax
17160; VLX-NEXT:    vzeroupper
17161; VLX-NEXT:    retq
17162;
17163; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
17164; NoVLX:       # %bb.0: # %entry
17165; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17166; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
17167; NoVLX-NEXT:    kmovw %edi, %k1
17168; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17169; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17170; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17171; NoVLX-NEXT:    kmovw %k0, %eax
17172; NoVLX-NEXT:    movzwl %ax, %eax
17173; NoVLX-NEXT:    vzeroupper
17174; NoVLX-NEXT:    retq
17175entry:
17176  %0 = bitcast <4 x i64> %__a to <8 x i32>
17177  %load = load <4 x i64>, <4 x i64>* %__b
17178  %1 = bitcast <4 x i64> %load to <8 x i32>
17179  %2 = icmp ult <8 x i32> %0, %1
17180  %3 = bitcast i8 %__u to <8 x i1>
17181  %4 = and <8 x i1> %2, %3
17182  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17183  %6 = bitcast <64 x i1> %5 to i64
17184  ret i64 %6
17185}
17186
17187
17188define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
17189; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
17190; VLX:       # %bb.0: # %entry
17191; VLX-NEXT:    vpcmpltud (%rdi){1to8}, %ymm0, %k0
17192; VLX-NEXT:    kmovq %k0, %rax
17193; VLX-NEXT:    vzeroupper
17194; VLX-NEXT:    retq
17195;
17196; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
17197; NoVLX:       # %bb.0: # %entry
17198; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17199; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm1
17200; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17201; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17202; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17203; NoVLX-NEXT:    kmovw %k0, %eax
17204; NoVLX-NEXT:    movzwl %ax, %eax
17205; NoVLX-NEXT:    vzeroupper
17206; NoVLX-NEXT:    retq
17207entry:
17208  %0 = bitcast <4 x i64> %__a to <8 x i32>
17209  %load = load i32, i32* %__b
17210  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
17211  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17212  %2 = icmp ult <8 x i32> %0, %1
17213  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17214  %4 = bitcast <64 x i1> %3 to i64
17215  ret i64 %4
17216}
17217
17218define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
17219; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
17220; VLX:       # %bb.0: # %entry
17221; VLX-NEXT:    kmovd %edi, %k1
17222; VLX-NEXT:    vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
17223; VLX-NEXT:    kmovq %k0, %rax
17224; VLX-NEXT:    vzeroupper
17225; VLX-NEXT:    retq
17226;
17227; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
17228; NoVLX:       # %bb.0: # %entry
17229; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
17230; NoVLX-NEXT:    vpbroadcastd (%rsi), %ymm1
17231; NoVLX-NEXT:    kmovw %edi, %k1
17232; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17233; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
17234; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
17235; NoVLX-NEXT:    kmovw %k0, %eax
17236; NoVLX-NEXT:    movzwl %ax, %eax
17237; NoVLX-NEXT:    vzeroupper
17238; NoVLX-NEXT:    retq
17239entry:
17240  %0 = bitcast <4 x i64> %__a to <8 x i32>
17241  %load = load i32, i32* %__b
17242  %vec = insertelement <8 x i32> undef, i32 %load, i32 0
17243  %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17244  %2 = icmp ult <8 x i32> %0, %1
17245  %3 = bitcast i8 %__u to <8 x i1>
17246  %4 = and <8 x i1> %3, %2
17247  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17248  %6 = bitcast <64 x i1> %5 to i64
17249  ret i64 %6
17250}
17251
17252
17253define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17254; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
17255; VLX:       # %bb.0: # %entry
17256; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17257; VLX-NEXT:    kmovd %k0, %eax
17258; VLX-NEXT:    vzeroupper
17259; VLX-NEXT:    retq
17260;
17261; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
17262; NoVLX:       # %bb.0: # %entry
17263; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17264; NoVLX-NEXT:    kmovw %k0, %eax
17265; NoVLX-NEXT:    vzeroupper
17266; NoVLX-NEXT:    retq
17267entry:
17268  %0 = bitcast <8 x i64> %__a to <16 x i32>
17269  %1 = bitcast <8 x i64> %__b to <16 x i32>
17270  %2 = icmp ult <16 x i32> %0, %1
17271  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17272  %4 = bitcast <32 x i1> %3 to i32
17273  ret i32 %4
17274}
17275
17276define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17277; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
17278; VLX:       # %bb.0: # %entry
17279; VLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
17280; VLX-NEXT:    kmovd %k0, %eax
17281; VLX-NEXT:    vzeroupper
17282; VLX-NEXT:    retq
17283;
17284; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
17285; NoVLX:       # %bb.0: # %entry
17286; NoVLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
17287; NoVLX-NEXT:    kmovw %k0, %eax
17288; NoVLX-NEXT:    vzeroupper
17289; NoVLX-NEXT:    retq
17290entry:
17291  %0 = bitcast <8 x i64> %__a to <16 x i32>
17292  %load = load <8 x i64>, <8 x i64>* %__b
17293  %1 = bitcast <8 x i64> %load to <16 x i32>
17294  %2 = icmp ult <16 x i32> %0, %1
17295  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17296  %4 = bitcast <32 x i1> %3 to i32
17297  ret i32 %4
17298}
17299
17300define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17301; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
17302; VLX:       # %bb.0: # %entry
17303; VLX-NEXT:    kmovd %edi, %k1
17304; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17305; VLX-NEXT:    kmovd %k0, %eax
17306; VLX-NEXT:    vzeroupper
17307; VLX-NEXT:    retq
17308;
17309; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
17310; NoVLX:       # %bb.0: # %entry
17311; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17312; NoVLX-NEXT:    kmovw %k0, %eax
17313; NoVLX-NEXT:    andl %edi, %eax
17314; NoVLX-NEXT:    vzeroupper
17315; NoVLX-NEXT:    retq
17316entry:
17317  %0 = bitcast <8 x i64> %__a to <16 x i32>
17318  %1 = bitcast <8 x i64> %__b to <16 x i32>
17319  %2 = icmp ult <16 x i32> %0, %1
17320  %3 = bitcast i16 %__u to <16 x i1>
17321  %4 = and <16 x i1> %2, %3
17322  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17323  %6 = bitcast <32 x i1> %5 to i32
17324  ret i32 %6
17325}
17326
17327define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17328; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
17329; VLX:       # %bb.0: # %entry
17330; VLX-NEXT:    kmovd %edi, %k1
17331; VLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17332; VLX-NEXT:    kmovd %k0, %eax
17333; VLX-NEXT:    vzeroupper
17334; VLX-NEXT:    retq
17335;
17336; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
17337; NoVLX:       # %bb.0: # %entry
17338; NoVLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0
17339; NoVLX-NEXT:    kmovw %k0, %eax
17340; NoVLX-NEXT:    andl %edi, %eax
17341; NoVLX-NEXT:    vzeroupper
17342; NoVLX-NEXT:    retq
17343entry:
17344  %0 = bitcast <8 x i64> %__a to <16 x i32>
17345  %load = load <8 x i64>, <8 x i64>* %__b
17346  %1 = bitcast <8 x i64> %load to <16 x i32>
17347  %2 = icmp ult <16 x i32> %0, %1
17348  %3 = bitcast i16 %__u to <16 x i1>
17349  %4 = and <16 x i1> %2, %3
17350  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17351  %6 = bitcast <32 x i1> %5 to i32
17352  ret i32 %6
17353}
17354
17355
17356define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17357; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17358; VLX:       # %bb.0: # %entry
17359; VLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17360; VLX-NEXT:    kmovd %k0, %eax
17361; VLX-NEXT:    vzeroupper
17362; VLX-NEXT:    retq
17363;
17364; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17365; NoVLX:       # %bb.0: # %entry
17366; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17367; NoVLX-NEXT:    kmovw %k0, %eax
17368; NoVLX-NEXT:    vzeroupper
17369; NoVLX-NEXT:    retq
17370entry:
17371  %0 = bitcast <8 x i64> %__a to <16 x i32>
17372  %load = load i32, i32* %__b
17373  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17374  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17375  %2 = icmp ult <16 x i32> %0, %1
17376  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17377  %4 = bitcast <32 x i1> %3 to i32
17378  ret i32 %4
17379}
17380
17381define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17382; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17383; VLX:       # %bb.0: # %entry
17384; VLX-NEXT:    kmovd %edi, %k1
17385; VLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17386; VLX-NEXT:    kmovd %k0, %eax
17387; VLX-NEXT:    vzeroupper
17388; VLX-NEXT:    retq
17389;
17390; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17391; NoVLX:       # %bb.0: # %entry
17392; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0
17393; NoVLX-NEXT:    kmovw %k0, %eax
17394; NoVLX-NEXT:    andl %edi, %eax
17395; NoVLX-NEXT:    vzeroupper
17396; NoVLX-NEXT:    retq
17397entry:
17398  %0 = bitcast <8 x i64> %__a to <16 x i32>
17399  %load = load i32, i32* %__b
17400  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17401  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17402  %2 = icmp ult <16 x i32> %0, %1
17403  %3 = bitcast i16 %__u to <16 x i1>
17404  %4 = and <16 x i1> %3, %2
17405  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17406  %6 = bitcast <32 x i1> %5 to i32
17407  ret i32 %6
17408}
17409
17410
17411define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17412; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17413; VLX:       # %bb.0: # %entry
17414; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17415; VLX-NEXT:    kmovq %k0, %rax
17416; VLX-NEXT:    vzeroupper
17417; VLX-NEXT:    retq
17418;
17419; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17420; NoVLX:       # %bb.0: # %entry
17421; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17422; NoVLX-NEXT:    kmovw %k0, %eax
17423; NoVLX-NEXT:    movzwl %ax, %eax
17424; NoVLX-NEXT:    vzeroupper
17425; NoVLX-NEXT:    retq
17426entry:
17427  %0 = bitcast <8 x i64> %__a to <16 x i32>
17428  %1 = bitcast <8 x i64> %__b to <16 x i32>
17429  %2 = icmp ult <16 x i32> %0, %1
17430  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17431  %4 = bitcast <64 x i1> %3 to i64
17432  ret i64 %4
17433}
17434
17435define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17436; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17437; VLX:       # %bb.0: # %entry
17438; VLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
17439; VLX-NEXT:    kmovq %k0, %rax
17440; VLX-NEXT:    vzeroupper
17441; VLX-NEXT:    retq
17442;
17443; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17444; NoVLX:       # %bb.0: # %entry
17445; NoVLX-NEXT:    vpcmpltud (%rdi), %zmm0, %k0
17446; NoVLX-NEXT:    kmovw %k0, %eax
17447; NoVLX-NEXT:    movzwl %ax, %eax
17448; NoVLX-NEXT:    vzeroupper
17449; NoVLX-NEXT:    retq
17450entry:
17451  %0 = bitcast <8 x i64> %__a to <16 x i32>
17452  %load = load <8 x i64>, <8 x i64>* %__b
17453  %1 = bitcast <8 x i64> %load to <16 x i32>
17454  %2 = icmp ult <16 x i32> %0, %1
17455  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17456  %4 = bitcast <64 x i1> %3 to i64
17457  ret i64 %4
17458}
17459
17460define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17461; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17462; VLX:       # %bb.0: # %entry
17463; VLX-NEXT:    kmovd %edi, %k1
17464; VLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17465; VLX-NEXT:    kmovq %k0, %rax
17466; VLX-NEXT:    vzeroupper
17467; VLX-NEXT:    retq
17468;
17469; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17470; NoVLX:       # %bb.0: # %entry
17471; NoVLX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
17472; NoVLX-NEXT:    kmovw %k0, %eax
17473; NoVLX-NEXT:    andl %edi, %eax
17474; NoVLX-NEXT:    vzeroupper
17475; NoVLX-NEXT:    retq
17476entry:
17477  %0 = bitcast <8 x i64> %__a to <16 x i32>
17478  %1 = bitcast <8 x i64> %__b to <16 x i32>
17479  %2 = icmp ult <16 x i32> %0, %1
17480  %3 = bitcast i16 %__u to <16 x i1>
17481  %4 = and <16 x i1> %2, %3
17482  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17483  %6 = bitcast <64 x i1> %5 to i64
17484  ret i64 %6
17485}
17486
17487define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17488; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17489; VLX:       # %bb.0: # %entry
17490; VLX-NEXT:    kmovd %edi, %k1
17491; VLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17492; VLX-NEXT:    kmovq %k0, %rax
17493; VLX-NEXT:    vzeroupper
17494; VLX-NEXT:    retq
17495;
17496; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17497; NoVLX:       # %bb.0: # %entry
17498; NoVLX-NEXT:    vpcmpltud (%rsi), %zmm0, %k0
17499; NoVLX-NEXT:    kmovw %k0, %eax
17500; NoVLX-NEXT:    andl %edi, %eax
17501; NoVLX-NEXT:    vzeroupper
17502; NoVLX-NEXT:    retq
17503entry:
17504  %0 = bitcast <8 x i64> %__a to <16 x i32>
17505  %load = load <8 x i64>, <8 x i64>* %__b
17506  %1 = bitcast <8 x i64> %load to <16 x i32>
17507  %2 = icmp ult <16 x i32> %0, %1
17508  %3 = bitcast i16 %__u to <16 x i1>
17509  %4 = and <16 x i1> %2, %3
17510  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17511  %6 = bitcast <64 x i1> %5 to i64
17512  ret i64 %6
17513}
17514
17515
17516define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17517; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17518; VLX:       # %bb.0: # %entry
17519; VLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17520; VLX-NEXT:    kmovq %k0, %rax
17521; VLX-NEXT:    vzeroupper
17522; VLX-NEXT:    retq
17523;
17524; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17525; NoVLX:       # %bb.0: # %entry
17526; NoVLX-NEXT:    vpcmpltud (%rdi){1to16}, %zmm0, %k0
17527; NoVLX-NEXT:    kmovw %k0, %eax
17528; NoVLX-NEXT:    movzwl %ax, %eax
17529; NoVLX-NEXT:    vzeroupper
17530; NoVLX-NEXT:    retq
17531entry:
17532  %0 = bitcast <8 x i64> %__a to <16 x i32>
17533  %load = load i32, i32* %__b
17534  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17535  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17536  %2 = icmp ult <16 x i32> %0, %1
17537  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17538  %4 = bitcast <64 x i1> %3 to i64
17539  ret i64 %4
17540}
17541
17542define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17543; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17544; VLX:       # %bb.0: # %entry
17545; VLX-NEXT:    kmovd %edi, %k1
17546; VLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17547; VLX-NEXT:    kmovq %k0, %rax
17548; VLX-NEXT:    vzeroupper
17549; VLX-NEXT:    retq
17550;
17551; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17552; NoVLX:       # %bb.0: # %entry
17553; NoVLX-NEXT:    vpcmpltud (%rsi){1to16}, %zmm0, %k0
17554; NoVLX-NEXT:    kmovw %k0, %eax
17555; NoVLX-NEXT:    andl %edi, %eax
17556; NoVLX-NEXT:    vzeroupper
17557; NoVLX-NEXT:    retq
17558entry:
17559  %0 = bitcast <8 x i64> %__a to <16 x i32>
17560  %load = load i32, i32* %__b
17561  %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17562  %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17563  %2 = icmp ult <16 x i32> %0, %1
17564  %3 = bitcast i16 %__u to <16 x i1>
17565  %4 = and <16 x i1> %3, %2
17566  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17567  %6 = bitcast <64 x i1> %5 to i64
17568  ret i64 %6
17569}
17570
17571
17572define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17573; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17574; VLX:       # %bb.0: # %entry
17575; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17576; VLX-NEXT:    kmovb %k0, %eax
17577; VLX-NEXT:    retq
17578;
17579; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17580; NoVLX:       # %bb.0: # %entry
17581; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17582; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17583; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17584; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17585; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17586; NoVLX-NEXT:    kmovw %k0, %eax
17587; NoVLX-NEXT:    andl $3, %eax
17588; NoVLX-NEXT:    vzeroupper
17589; NoVLX-NEXT:    retq
17590entry:
17591  %0 = bitcast <2 x i64> %__a to <2 x i64>
17592  %1 = bitcast <2 x i64> %__b to <2 x i64>
17593  %2 = icmp ult <2 x i64> %0, %1
17594  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17595  %4 = bitcast <4 x i1> %3 to i4
17596  ret i4 %4
17597}
17598
17599define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17600; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17601; VLX:       # %bb.0: # %entry
17602; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17603; VLX-NEXT:    kmovb %k0, %eax
17604; VLX-NEXT:    retq
17605;
17606; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17607; NoVLX:       # %bb.0: # %entry
17608; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17609; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17610; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17611; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17612; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17613; NoVLX-NEXT:    kmovw %k0, %eax
17614; NoVLX-NEXT:    andl $3, %eax
17615; NoVLX-NEXT:    vzeroupper
17616; NoVLX-NEXT:    retq
17617entry:
17618  %0 = bitcast <2 x i64> %__a to <2 x i64>
17619  %load = load <2 x i64>, <2 x i64>* %__b
17620  %1 = bitcast <2 x i64> %load to <2 x i64>
17621  %2 = icmp ult <2 x i64> %0, %1
17622  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17623  %4 = bitcast <4 x i1> %3 to i4
17624  ret i4 %4
17625}
17626
17627define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17628; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17629; VLX:       # %bb.0: # %entry
17630; VLX-NEXT:    kmovd %edi, %k1
17631; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17632; VLX-NEXT:    kmovb %k0, %eax
17633; VLX-NEXT:    retq
17634;
17635; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17636; NoVLX:       # %bb.0: # %entry
17637; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17638; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17639; NoVLX-NEXT:    kmovw %edi, %k1
17640; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17641; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17642; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17643; NoVLX-NEXT:    kmovw %k0, %eax
17644; NoVLX-NEXT:    andl $3, %eax
17645; NoVLX-NEXT:    vzeroupper
17646; NoVLX-NEXT:    retq
17647entry:
17648  %0 = bitcast <2 x i64> %__a to <2 x i64>
17649  %1 = bitcast <2 x i64> %__b to <2 x i64>
17650  %2 = icmp ult <2 x i64> %0, %1
17651  %3 = bitcast i8 %__u to <8 x i1>
17652  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17653  %4 = and <2 x i1> %2, %extract.i
17654  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17655  %6 = bitcast <4 x i1> %5 to i4
17656  ret i4 %6
17657}
17658
17659define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17660; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17661; VLX:       # %bb.0: # %entry
17662; VLX-NEXT:    kmovd %edi, %k1
17663; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17664; VLX-NEXT:    kmovb %k0, %eax
17665; VLX-NEXT:    retq
17666;
17667; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17668; NoVLX:       # %bb.0: # %entry
17669; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17670; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
17671; NoVLX-NEXT:    kmovw %edi, %k1
17672; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17673; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17674; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17675; NoVLX-NEXT:    kmovw %k0, %eax
17676; NoVLX-NEXT:    andl $3, %eax
17677; NoVLX-NEXT:    vzeroupper
17678; NoVLX-NEXT:    retq
17679entry:
17680  %0 = bitcast <2 x i64> %__a to <2 x i64>
17681  %load = load <2 x i64>, <2 x i64>* %__b
17682  %1 = bitcast <2 x i64> %load to <2 x i64>
17683  %2 = icmp ult <2 x i64> %0, %1
17684  %3 = bitcast i8 %__u to <8 x i1>
17685  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17686  %4 = and <2 x i1> %2, %extract.i
17687  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17688  %6 = bitcast <4 x i1> %5 to i4
17689  ret i4 %6
17690}
17691
17692
17693define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17694; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17695; VLX:       # %bb.0: # %entry
17696; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17697; VLX-NEXT:    kmovb %k0, %eax
17698; VLX-NEXT:    retq
17699;
17700; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17701; NoVLX:       # %bb.0: # %entry
17702; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17703; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
17704; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17705; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17706; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17707; NoVLX-NEXT:    kmovw %k0, %eax
17708; NoVLX-NEXT:    andl $3, %eax
17709; NoVLX-NEXT:    vzeroupper
17710; NoVLX-NEXT:    retq
17711entry:
17712  %0 = bitcast <2 x i64> %__a to <2 x i64>
17713  %load = load i64, i64* %__b
17714  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17715  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17716  %2 = icmp ult <2 x i64> %0, %1
17717  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17718  %4 = bitcast <4 x i1> %3 to i4
17719  ret i4 %4
17720}
17721
17722define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17723; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17724; VLX:       # %bb.0: # %entry
17725; VLX-NEXT:    kmovd %edi, %k1
17726; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17727; VLX-NEXT:    kmovb %k0, %eax
17728; VLX-NEXT:    retq
17729;
17730; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17731; NoVLX:       # %bb.0: # %entry
17732; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17733; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
17734; NoVLX-NEXT:    kmovw %edi, %k1
17735; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17736; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17737; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17738; NoVLX-NEXT:    kmovw %k0, %eax
17739; NoVLX-NEXT:    andl $3, %eax
17740; NoVLX-NEXT:    vzeroupper
17741; NoVLX-NEXT:    retq
17742entry:
17743  %0 = bitcast <2 x i64> %__a to <2 x i64>
17744  %load = load i64, i64* %__b
17745  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17746  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17747  %2 = icmp ult <2 x i64> %0, %1
17748  %3 = bitcast i8 %__u to <8 x i1>
17749  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17750  %4 = and <2 x i1> %extract.i, %2
17751  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17752  %6 = bitcast <4 x i1> %5 to i4
17753  ret i4 %6
17754}
17755
17756
17757define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17758; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17759; VLX:       # %bb.0: # %entry
17760; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17761; VLX-NEXT:    kmovd %k0, %eax
17762; VLX-NEXT:    # kill: def $al killed $al killed $eax
17763; VLX-NEXT:    retq
17764;
17765; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17766; NoVLX:       # %bb.0: # %entry
17767; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17768; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17769; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17770; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17771; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17772; NoVLX-NEXT:    kmovw %k0, %eax
17773; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17774; NoVLX-NEXT:    vzeroupper
17775; NoVLX-NEXT:    retq
17776entry:
17777  %0 = bitcast <2 x i64> %__a to <2 x i64>
17778  %1 = bitcast <2 x i64> %__b to <2 x i64>
17779  %2 = icmp ult <2 x i64> %0, %1
17780  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17781  %4 = bitcast <8 x i1> %3 to i8
17782  ret i8 %4
17783}
17784
17785define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17786; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17787; VLX:       # %bb.0: # %entry
17788; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17789; VLX-NEXT:    kmovd %k0, %eax
17790; VLX-NEXT:    # kill: def $al killed $al killed $eax
17791; VLX-NEXT:    retq
17792;
17793; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17794; NoVLX:       # %bb.0: # %entry
17795; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17796; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17797; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17798; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17799; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17800; NoVLX-NEXT:    kmovw %k0, %eax
17801; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17802; NoVLX-NEXT:    vzeroupper
17803; NoVLX-NEXT:    retq
17804entry:
17805  %0 = bitcast <2 x i64> %__a to <2 x i64>
17806  %load = load <2 x i64>, <2 x i64>* %__b
17807  %1 = bitcast <2 x i64> %load to <2 x i64>
17808  %2 = icmp ult <2 x i64> %0, %1
17809  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17810  %4 = bitcast <8 x i1> %3 to i8
17811  ret i8 %4
17812}
17813
17814define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17815; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17816; VLX:       # %bb.0: # %entry
17817; VLX-NEXT:    kmovd %edi, %k1
17818; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17819; VLX-NEXT:    kmovd %k0, %eax
17820; VLX-NEXT:    # kill: def $al killed $al killed $eax
17821; VLX-NEXT:    retq
17822;
17823; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17824; NoVLX:       # %bb.0: # %entry
17825; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17826; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17827; NoVLX-NEXT:    kmovw %edi, %k1
17828; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17829; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17830; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17831; NoVLX-NEXT:    kmovw %k0, %eax
17832; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17833; NoVLX-NEXT:    vzeroupper
17834; NoVLX-NEXT:    retq
17835entry:
17836  %0 = bitcast <2 x i64> %__a to <2 x i64>
17837  %1 = bitcast <2 x i64> %__b to <2 x i64>
17838  %2 = icmp ult <2 x i64> %0, %1
17839  %3 = bitcast i8 %__u to <8 x i1>
17840  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17841  %4 = and <2 x i1> %2, %extract.i
17842  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17843  %6 = bitcast <8 x i1> %5 to i8
17844  ret i8 %6
17845}
17846
17847define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17848; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17849; VLX:       # %bb.0: # %entry
17850; VLX-NEXT:    kmovd %edi, %k1
17851; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17852; VLX-NEXT:    kmovd %k0, %eax
17853; VLX-NEXT:    # kill: def $al killed $al killed $eax
17854; VLX-NEXT:    retq
17855;
17856; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17857; NoVLX:       # %bb.0: # %entry
17858; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17859; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
17860; NoVLX-NEXT:    kmovw %edi, %k1
17861; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17862; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17863; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17864; NoVLX-NEXT:    kmovw %k0, %eax
17865; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17866; NoVLX-NEXT:    vzeroupper
17867; NoVLX-NEXT:    retq
17868entry:
17869  %0 = bitcast <2 x i64> %__a to <2 x i64>
17870  %load = load <2 x i64>, <2 x i64>* %__b
17871  %1 = bitcast <2 x i64> %load to <2 x i64>
17872  %2 = icmp ult <2 x i64> %0, %1
17873  %3 = bitcast i8 %__u to <8 x i1>
17874  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17875  %4 = and <2 x i1> %2, %extract.i
17876  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17877  %6 = bitcast <8 x i1> %5 to i8
17878  ret i8 %6
17879}
17880
17881
17882define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17883; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17884; VLX:       # %bb.0: # %entry
17885; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17886; VLX-NEXT:    kmovd %k0, %eax
17887; VLX-NEXT:    # kill: def $al killed $al killed $eax
17888; VLX-NEXT:    retq
17889;
17890; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17891; NoVLX:       # %bb.0: # %entry
17892; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17893; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
17894; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17895; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17896; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17897; NoVLX-NEXT:    kmovw %k0, %eax
17898; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17899; NoVLX-NEXT:    vzeroupper
17900; NoVLX-NEXT:    retq
17901entry:
17902  %0 = bitcast <2 x i64> %__a to <2 x i64>
17903  %load = load i64, i64* %__b
17904  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17905  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17906  %2 = icmp ult <2 x i64> %0, %1
17907  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17908  %4 = bitcast <8 x i1> %3 to i8
17909  ret i8 %4
17910}
17911
17912define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17913; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17914; VLX:       # %bb.0: # %entry
17915; VLX-NEXT:    kmovd %edi, %k1
17916; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17917; VLX-NEXT:    kmovd %k0, %eax
17918; VLX-NEXT:    # kill: def $al killed $al killed $eax
17919; VLX-NEXT:    retq
17920;
17921; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17922; NoVLX:       # %bb.0: # %entry
17923; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17924; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
17925; NoVLX-NEXT:    kmovw %edi, %k1
17926; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17927; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17928; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17929; NoVLX-NEXT:    kmovw %k0, %eax
17930; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
17931; NoVLX-NEXT:    vzeroupper
17932; NoVLX-NEXT:    retq
17933entry:
17934  %0 = bitcast <2 x i64> %__a to <2 x i64>
17935  %load = load i64, i64* %__b
17936  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17937  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17938  %2 = icmp ult <2 x i64> %0, %1
17939  %3 = bitcast i8 %__u to <8 x i1>
17940  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17941  %4 = and <2 x i1> %extract.i, %2
17942  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17943  %6 = bitcast <8 x i1> %5 to i8
17944  ret i8 %6
17945}
17946
17947
17948define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17949; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17950; VLX:       # %bb.0: # %entry
17951; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
17952; VLX-NEXT:    kmovd %k0, %eax
17953; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17954; VLX-NEXT:    retq
17955;
17956; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17957; NoVLX:       # %bb.0: # %entry
17958; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
17959; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17960; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17961; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17962; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17963; NoVLX-NEXT:    kmovw %k0, %eax
17964; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17965; NoVLX-NEXT:    vzeroupper
17966; NoVLX-NEXT:    retq
17967entry:
17968  %0 = bitcast <2 x i64> %__a to <2 x i64>
17969  %1 = bitcast <2 x i64> %__b to <2 x i64>
17970  %2 = icmp ult <2 x i64> %0, %1
17971  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17972  %4 = bitcast <16 x i1> %3 to i16
17973  ret i16 %4
17974}
17975
17976define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17977; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17978; VLX:       # %bb.0: # %entry
17979; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
17980; VLX-NEXT:    kmovd %k0, %eax
17981; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
17982; VLX-NEXT:    retq
17983;
17984; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17985; NoVLX:       # %bb.0: # %entry
17986; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
17987; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
17988; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
17989; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
17990; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
17991; NoVLX-NEXT:    kmovw %k0, %eax
17992; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
17993; NoVLX-NEXT:    vzeroupper
17994; NoVLX-NEXT:    retq
17995entry:
17996  %0 = bitcast <2 x i64> %__a to <2 x i64>
17997  %load = load <2 x i64>, <2 x i64>* %__b
17998  %1 = bitcast <2 x i64> %load to <2 x i64>
17999  %2 = icmp ult <2 x i64> %0, %1
18000  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18001  %4 = bitcast <16 x i1> %3 to i16
18002  ret i16 %4
18003}
18004
18005define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18006; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
18007; VLX:       # %bb.0: # %entry
18008; VLX-NEXT:    kmovd %edi, %k1
18009; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18010; VLX-NEXT:    kmovd %k0, %eax
18011; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18012; VLX-NEXT:    retq
18013;
18014; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
18015; NoVLX:       # %bb.0: # %entry
18016; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
18017; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18018; NoVLX-NEXT:    kmovw %edi, %k1
18019; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18020; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18021; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18022; NoVLX-NEXT:    kmovw %k0, %eax
18023; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18024; NoVLX-NEXT:    vzeroupper
18025; NoVLX-NEXT:    retq
18026entry:
18027  %0 = bitcast <2 x i64> %__a to <2 x i64>
18028  %1 = bitcast <2 x i64> %__b to <2 x i64>
18029  %2 = icmp ult <2 x i64> %0, %1
18030  %3 = bitcast i8 %__u to <8 x i1>
18031  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18032  %4 = and <2 x i1> %2, %extract.i
18033  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18034  %6 = bitcast <16 x i1> %5 to i16
18035  ret i16 %6
18036}
18037
18038define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18039; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
18040; VLX:       # %bb.0: # %entry
18041; VLX-NEXT:    kmovd %edi, %k1
18042; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18043; VLX-NEXT:    kmovd %k0, %eax
18044; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18045; VLX-NEXT:    retq
18046;
18047; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
18048; NoVLX:       # %bb.0: # %entry
18049; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18050; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
18051; NoVLX-NEXT:    kmovw %edi, %k1
18052; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18053; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18054; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18055; NoVLX-NEXT:    kmovw %k0, %eax
18056; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18057; NoVLX-NEXT:    vzeroupper
18058; NoVLX-NEXT:    retq
18059entry:
18060  %0 = bitcast <2 x i64> %__a to <2 x i64>
18061  %load = load <2 x i64>, <2 x i64>* %__b
18062  %1 = bitcast <2 x i64> %load to <2 x i64>
18063  %2 = icmp ult <2 x i64> %0, %1
18064  %3 = bitcast i8 %__u to <8 x i1>
18065  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18066  %4 = and <2 x i1> %2, %extract.i
18067  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18068  %6 = bitcast <16 x i1> %5 to i16
18069  ret i16 %6
18070}
18071
18072
18073define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18074; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
18075; VLX:       # %bb.0: # %entry
18076; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18077; VLX-NEXT:    kmovd %k0, %eax
18078; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18079; VLX-NEXT:    retq
18080;
18081; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
18082; NoVLX:       # %bb.0: # %entry
18083; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18084; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
18085; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18086; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18087; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18088; NoVLX-NEXT:    kmovw %k0, %eax
18089; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18090; NoVLX-NEXT:    vzeroupper
18091; NoVLX-NEXT:    retq
18092entry:
18093  %0 = bitcast <2 x i64> %__a to <2 x i64>
18094  %load = load i64, i64* %__b
18095  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18096  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18097  %2 = icmp ult <2 x i64> %0, %1
18098  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18099  %4 = bitcast <16 x i1> %3 to i16
18100  ret i16 %4
18101}
18102
18103define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18104; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
18105; VLX:       # %bb.0: # %entry
18106; VLX-NEXT:    kmovd %edi, %k1
18107; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18108; VLX-NEXT:    kmovd %k0, %eax
18109; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18110; VLX-NEXT:    retq
18111;
18112; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
18113; NoVLX:       # %bb.0: # %entry
18114; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18115; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
18116; NoVLX-NEXT:    kmovw %edi, %k1
18117; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18118; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18119; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18120; NoVLX-NEXT:    kmovw %k0, %eax
18121; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18122; NoVLX-NEXT:    vzeroupper
18123; NoVLX-NEXT:    retq
18124entry:
18125  %0 = bitcast <2 x i64> %__a to <2 x i64>
18126  %load = load i64, i64* %__b
18127  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18128  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18129  %2 = icmp ult <2 x i64> %0, %1
18130  %3 = bitcast i8 %__u to <8 x i1>
18131  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18132  %4 = and <2 x i1> %extract.i, %2
18133  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18134  %6 = bitcast <16 x i1> %5 to i16
18135  ret i16 %6
18136}
18137
18138
18139define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18140; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
18141; VLX:       # %bb.0: # %entry
18142; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
18143; VLX-NEXT:    kmovd %k0, %eax
18144; VLX-NEXT:    retq
18145;
18146; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
18147; NoVLX:       # %bb.0: # %entry
18148; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
18149; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18150; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18151; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18152; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18153; NoVLX-NEXT:    kmovw %k0, %eax
18154; NoVLX-NEXT:    vzeroupper
18155; NoVLX-NEXT:    retq
18156entry:
18157  %0 = bitcast <2 x i64> %__a to <2 x i64>
18158  %1 = bitcast <2 x i64> %__b to <2 x i64>
18159  %2 = icmp ult <2 x i64> %0, %1
18160  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18161  %4 = bitcast <32 x i1> %3 to i32
18162  ret i32 %4
18163}
18164
18165define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18166; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
18167; VLX:       # %bb.0: # %entry
18168; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
18169; VLX-NEXT:    kmovd %k0, %eax
18170; VLX-NEXT:    retq
18171;
18172; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
18173; NoVLX:       # %bb.0: # %entry
18174; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18175; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
18176; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18177; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18178; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18179; NoVLX-NEXT:    kmovw %k0, %eax
18180; NoVLX-NEXT:    vzeroupper
18181; NoVLX-NEXT:    retq
18182entry:
18183  %0 = bitcast <2 x i64> %__a to <2 x i64>
18184  %load = load <2 x i64>, <2 x i64>* %__b
18185  %1 = bitcast <2 x i64> %load to <2 x i64>
18186  %2 = icmp ult <2 x i64> %0, %1
18187  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18188  %4 = bitcast <32 x i1> %3 to i32
18189  ret i32 %4
18190}
18191
18192define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18193; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
18194; VLX:       # %bb.0: # %entry
18195; VLX-NEXT:    kmovd %edi, %k1
18196; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18197; VLX-NEXT:    kmovd %k0, %eax
18198; VLX-NEXT:    retq
18199;
18200; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
18201; NoVLX:       # %bb.0: # %entry
18202; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
18203; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18204; NoVLX-NEXT:    kmovw %edi, %k1
18205; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18206; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18207; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18208; NoVLX-NEXT:    kmovw %k0, %eax
18209; NoVLX-NEXT:    vzeroupper
18210; NoVLX-NEXT:    retq
18211entry:
18212  %0 = bitcast <2 x i64> %__a to <2 x i64>
18213  %1 = bitcast <2 x i64> %__b to <2 x i64>
18214  %2 = icmp ult <2 x i64> %0, %1
18215  %3 = bitcast i8 %__u to <8 x i1>
18216  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18217  %4 = and <2 x i1> %2, %extract.i
18218  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18219  %6 = bitcast <32 x i1> %5 to i32
18220  ret i32 %6
18221}
18222
18223define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18224; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
18225; VLX:       # %bb.0: # %entry
18226; VLX-NEXT:    kmovd %edi, %k1
18227; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18228; VLX-NEXT:    kmovd %k0, %eax
18229; VLX-NEXT:    retq
18230;
18231; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
18232; NoVLX:       # %bb.0: # %entry
18233; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18234; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
18235; NoVLX-NEXT:    kmovw %edi, %k1
18236; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18237; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18238; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18239; NoVLX-NEXT:    kmovw %k0, %eax
18240; NoVLX-NEXT:    vzeroupper
18241; NoVLX-NEXT:    retq
18242entry:
18243  %0 = bitcast <2 x i64> %__a to <2 x i64>
18244  %load = load <2 x i64>, <2 x i64>* %__b
18245  %1 = bitcast <2 x i64> %load to <2 x i64>
18246  %2 = icmp ult <2 x i64> %0, %1
18247  %3 = bitcast i8 %__u to <8 x i1>
18248  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18249  %4 = and <2 x i1> %2, %extract.i
18250  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18251  %6 = bitcast <32 x i1> %5 to i32
18252  ret i32 %6
18253}
18254
18255
18256define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18257; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
18258; VLX:       # %bb.0: # %entry
18259; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18260; VLX-NEXT:    kmovd %k0, %eax
18261; VLX-NEXT:    retq
18262;
18263; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
18264; NoVLX:       # %bb.0: # %entry
18265; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18266; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
18267; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18268; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18269; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18270; NoVLX-NEXT:    kmovw %k0, %eax
18271; NoVLX-NEXT:    vzeroupper
18272; NoVLX-NEXT:    retq
18273entry:
18274  %0 = bitcast <2 x i64> %__a to <2 x i64>
18275  %load = load i64, i64* %__b
18276  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18277  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18278  %2 = icmp ult <2 x i64> %0, %1
18279  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18280  %4 = bitcast <32 x i1> %3 to i32
18281  ret i32 %4
18282}
18283
18284define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18285; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
18286; VLX:       # %bb.0: # %entry
18287; VLX-NEXT:    kmovd %edi, %k1
18288; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18289; VLX-NEXT:    kmovd %k0, %eax
18290; VLX-NEXT:    retq
18291;
18292; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
18293; NoVLX:       # %bb.0: # %entry
18294; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18295; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
18296; NoVLX-NEXT:    kmovw %edi, %k1
18297; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18298; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18299; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18300; NoVLX-NEXT:    kmovw %k0, %eax
18301; NoVLX-NEXT:    vzeroupper
18302; NoVLX-NEXT:    retq
18303entry:
18304  %0 = bitcast <2 x i64> %__a to <2 x i64>
18305  %load = load i64, i64* %__b
18306  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18307  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18308  %2 = icmp ult <2 x i64> %0, %1
18309  %3 = bitcast i8 %__u to <8 x i1>
18310  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18311  %4 = and <2 x i1> %extract.i, %2
18312  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18313  %6 = bitcast <32 x i1> %5 to i32
18314  ret i32 %6
18315}
18316
18317
18318define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18319; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
18320; VLX:       # %bb.0: # %entry
18321; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0
18322; VLX-NEXT:    kmovq %k0, %rax
18323; VLX-NEXT:    retq
18324;
18325; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
18326; NoVLX:       # %bb.0: # %entry
18327; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
18328; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18329; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18330; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18331; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18332; NoVLX-NEXT:    kmovw %k0, %eax
18333; NoVLX-NEXT:    movzwl %ax, %eax
18334; NoVLX-NEXT:    vzeroupper
18335; NoVLX-NEXT:    retq
18336entry:
18337  %0 = bitcast <2 x i64> %__a to <2 x i64>
18338  %1 = bitcast <2 x i64> %__b to <2 x i64>
18339  %2 = icmp ult <2 x i64> %0, %1
18340  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18341  %4 = bitcast <64 x i1> %3 to i64
18342  ret i64 %4
18343}
18344
18345define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18346; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
18347; VLX:       # %bb.0: # %entry
18348; VLX-NEXT:    vpcmpltuq (%rdi), %xmm0, %k0
18349; VLX-NEXT:    kmovq %k0, %rax
18350; VLX-NEXT:    retq
18351;
18352; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
18353; NoVLX:       # %bb.0: # %entry
18354; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18355; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
18356; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18357; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18358; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18359; NoVLX-NEXT:    kmovw %k0, %eax
18360; NoVLX-NEXT:    movzwl %ax, %eax
18361; NoVLX-NEXT:    vzeroupper
18362; NoVLX-NEXT:    retq
18363entry:
18364  %0 = bitcast <2 x i64> %__a to <2 x i64>
18365  %load = load <2 x i64>, <2 x i64>* %__b
18366  %1 = bitcast <2 x i64> %load to <2 x i64>
18367  %2 = icmp ult <2 x i64> %0, %1
18368  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18369  %4 = bitcast <64 x i1> %3 to i64
18370  ret i64 %4
18371}
18372
18373define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18374; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18375; VLX:       # %bb.0: # %entry
18376; VLX-NEXT:    kmovd %edi, %k1
18377; VLX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18378; VLX-NEXT:    kmovq %k0, %rax
18379; VLX-NEXT:    retq
18380;
18381; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18382; NoVLX:       # %bb.0: # %entry
18383; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
18384; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18385; NoVLX-NEXT:    kmovw %edi, %k1
18386; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18387; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18388; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18389; NoVLX-NEXT:    kmovw %k0, %eax
18390; NoVLX-NEXT:    movzwl %ax, %eax
18391; NoVLX-NEXT:    vzeroupper
18392; NoVLX-NEXT:    retq
18393entry:
18394  %0 = bitcast <2 x i64> %__a to <2 x i64>
18395  %1 = bitcast <2 x i64> %__b to <2 x i64>
18396  %2 = icmp ult <2 x i64> %0, %1
18397  %3 = bitcast i8 %__u to <8 x i1>
18398  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18399  %4 = and <2 x i1> %2, %extract.i
18400  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18401  %6 = bitcast <64 x i1> %5 to i64
18402  ret i64 %6
18403}
18404
18405define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18406; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18407; VLX:       # %bb.0: # %entry
18408; VLX-NEXT:    kmovd %edi, %k1
18409; VLX-NEXT:    vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18410; VLX-NEXT:    kmovq %k0, %rax
18411; VLX-NEXT:    retq
18412;
18413; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18414; NoVLX:       # %bb.0: # %entry
18415; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18416; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
18417; NoVLX-NEXT:    kmovw %edi, %k1
18418; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18419; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18420; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18421; NoVLX-NEXT:    kmovw %k0, %eax
18422; NoVLX-NEXT:    movzwl %ax, %eax
18423; NoVLX-NEXT:    vzeroupper
18424; NoVLX-NEXT:    retq
18425entry:
18426  %0 = bitcast <2 x i64> %__a to <2 x i64>
18427  %load = load <2 x i64>, <2 x i64>* %__b
18428  %1 = bitcast <2 x i64> %load to <2 x i64>
18429  %2 = icmp ult <2 x i64> %0, %1
18430  %3 = bitcast i8 %__u to <8 x i1>
18431  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18432  %4 = and <2 x i1> %2, %extract.i
18433  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18434  %6 = bitcast <64 x i1> %5 to i64
18435  ret i64 %6
18436}
18437
18438
18439define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18440; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18441; VLX:       # %bb.0: # %entry
18442; VLX-NEXT:    vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18443; VLX-NEXT:    kmovq %k0, %rax
18444; VLX-NEXT:    retq
18445;
18446; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18447; NoVLX:       # %bb.0: # %entry
18448; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18449; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm1
18450; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18451; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18452; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18453; NoVLX-NEXT:    kmovw %k0, %eax
18454; NoVLX-NEXT:    movzwl %ax, %eax
18455; NoVLX-NEXT:    vzeroupper
18456; NoVLX-NEXT:    retq
18457entry:
18458  %0 = bitcast <2 x i64> %__a to <2 x i64>
18459  %load = load i64, i64* %__b
18460  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18461  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18462  %2 = icmp ult <2 x i64> %0, %1
18463  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18464  %4 = bitcast <64 x i1> %3 to i64
18465  ret i64 %4
18466}
18467
18468define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18469; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18470; VLX:       # %bb.0: # %entry
18471; VLX-NEXT:    kmovd %edi, %k1
18472; VLX-NEXT:    vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18473; VLX-NEXT:    kmovq %k0, %rax
18474; VLX-NEXT:    retq
18475;
18476; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18477; NoVLX:       # %bb.0: # %entry
18478; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
18479; NoVLX-NEXT:    vpbroadcastq (%rsi), %xmm1
18480; NoVLX-NEXT:    kmovw %edi, %k1
18481; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18482; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
18483; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
18484; NoVLX-NEXT:    kmovw %k0, %eax
18485; NoVLX-NEXT:    movzwl %ax, %eax
18486; NoVLX-NEXT:    vzeroupper
18487; NoVLX-NEXT:    retq
18488entry:
18489  %0 = bitcast <2 x i64> %__a to <2 x i64>
18490  %load = load i64, i64* %__b
18491  %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18492  %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18493  %2 = icmp ult <2 x i64> %0, %1
18494  %3 = bitcast i8 %__u to <8 x i1>
18495  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18496  %4 = and <2 x i1> %extract.i, %2
18497  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18498  %6 = bitcast <64 x i1> %5 to i64
18499  ret i64 %6
18500}
18501
18502
18503define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18504; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18505; VLX:       # %bb.0: # %entry
18506; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18507; VLX-NEXT:    kmovd %k0, %eax
18508; VLX-NEXT:    # kill: def $al killed $al killed $eax
18509; VLX-NEXT:    vzeroupper
18510; VLX-NEXT:    retq
18511;
18512; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18513; NoVLX:       # %bb.0: # %entry
18514; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18515; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18516; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18517; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18518; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18519; NoVLX-NEXT:    kmovw %k0, %eax
18520; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18521; NoVLX-NEXT:    vzeroupper
18522; NoVLX-NEXT:    retq
18523entry:
18524  %0 = bitcast <4 x i64> %__a to <4 x i64>
18525  %1 = bitcast <4 x i64> %__b to <4 x i64>
18526  %2 = icmp ult <4 x i64> %0, %1
18527  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18528  %4 = bitcast <8 x i1> %3 to i8
18529  ret i8 %4
18530}
18531
18532define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18533; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18534; VLX:       # %bb.0: # %entry
18535; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18536; VLX-NEXT:    kmovd %k0, %eax
18537; VLX-NEXT:    # kill: def $al killed $al killed $eax
18538; VLX-NEXT:    vzeroupper
18539; VLX-NEXT:    retq
18540;
18541; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18542; NoVLX:       # %bb.0: # %entry
18543; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18544; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18545; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18546; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18547; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18548; NoVLX-NEXT:    kmovw %k0, %eax
18549; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18550; NoVLX-NEXT:    vzeroupper
18551; NoVLX-NEXT:    retq
18552entry:
18553  %0 = bitcast <4 x i64> %__a to <4 x i64>
18554  %load = load <4 x i64>, <4 x i64>* %__b
18555  %1 = bitcast <4 x i64> %load to <4 x i64>
18556  %2 = icmp ult <4 x i64> %0, %1
18557  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18558  %4 = bitcast <8 x i1> %3 to i8
18559  ret i8 %4
18560}
18561
18562define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18563; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18564; VLX:       # %bb.0: # %entry
18565; VLX-NEXT:    kmovd %edi, %k1
18566; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18567; VLX-NEXT:    kmovd %k0, %eax
18568; VLX-NEXT:    # kill: def $al killed $al killed $eax
18569; VLX-NEXT:    vzeroupper
18570; VLX-NEXT:    retq
18571;
18572; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18573; NoVLX:       # %bb.0: # %entry
18574; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18575; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18576; NoVLX-NEXT:    kmovw %edi, %k1
18577; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18578; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18579; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18580; NoVLX-NEXT:    kmovw %k0, %eax
18581; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18582; NoVLX-NEXT:    vzeroupper
18583; NoVLX-NEXT:    retq
18584entry:
18585  %0 = bitcast <4 x i64> %__a to <4 x i64>
18586  %1 = bitcast <4 x i64> %__b to <4 x i64>
18587  %2 = icmp ult <4 x i64> %0, %1
18588  %3 = bitcast i8 %__u to <8 x i1>
18589  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18590  %4 = and <4 x i1> %2, %extract.i
18591  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18592  %6 = bitcast <8 x i1> %5 to i8
18593  ret i8 %6
18594}
18595
18596define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18597; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18598; VLX:       # %bb.0: # %entry
18599; VLX-NEXT:    kmovd %edi, %k1
18600; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18601; VLX-NEXT:    kmovd %k0, %eax
18602; VLX-NEXT:    # kill: def $al killed $al killed $eax
18603; VLX-NEXT:    vzeroupper
18604; VLX-NEXT:    retq
18605;
18606; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18607; NoVLX:       # %bb.0: # %entry
18608; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18609; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18610; NoVLX-NEXT:    kmovw %edi, %k1
18611; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18612; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18613; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18614; NoVLX-NEXT:    kmovw %k0, %eax
18615; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18616; NoVLX-NEXT:    vzeroupper
18617; NoVLX-NEXT:    retq
18618entry:
18619  %0 = bitcast <4 x i64> %__a to <4 x i64>
18620  %load = load <4 x i64>, <4 x i64>* %__b
18621  %1 = bitcast <4 x i64> %load to <4 x i64>
18622  %2 = icmp ult <4 x i64> %0, %1
18623  %3 = bitcast i8 %__u to <8 x i1>
18624  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18625  %4 = and <4 x i1> %2, %extract.i
18626  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18627  %6 = bitcast <8 x i1> %5 to i8
18628  ret i8 %6
18629}
18630
18631
18632define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18633; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18634; VLX:       # %bb.0: # %entry
18635; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18636; VLX-NEXT:    kmovd %k0, %eax
18637; VLX-NEXT:    # kill: def $al killed $al killed $eax
18638; VLX-NEXT:    vzeroupper
18639; VLX-NEXT:    retq
18640;
18641; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18642; NoVLX:       # %bb.0: # %entry
18643; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18644; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
18645; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18646; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18647; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18648; NoVLX-NEXT:    kmovw %k0, %eax
18649; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18650; NoVLX-NEXT:    vzeroupper
18651; NoVLX-NEXT:    retq
18652entry:
18653  %0 = bitcast <4 x i64> %__a to <4 x i64>
18654  %load = load i64, i64* %__b
18655  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18656  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18657  %2 = icmp ult <4 x i64> %0, %1
18658  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18659  %4 = bitcast <8 x i1> %3 to i8
18660  ret i8 %4
18661}
18662
18663define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18664; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18665; VLX:       # %bb.0: # %entry
18666; VLX-NEXT:    kmovd %edi, %k1
18667; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18668; VLX-NEXT:    kmovd %k0, %eax
18669; VLX-NEXT:    # kill: def $al killed $al killed $eax
18670; VLX-NEXT:    vzeroupper
18671; VLX-NEXT:    retq
18672;
18673; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18674; NoVLX:       # %bb.0: # %entry
18675; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18676; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
18677; NoVLX-NEXT:    kmovw %edi, %k1
18678; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18679; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18680; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18681; NoVLX-NEXT:    kmovw %k0, %eax
18682; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
18683; NoVLX-NEXT:    vzeroupper
18684; NoVLX-NEXT:    retq
18685entry:
18686  %0 = bitcast <4 x i64> %__a to <4 x i64>
18687  %load = load i64, i64* %__b
18688  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18689  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18690  %2 = icmp ult <4 x i64> %0, %1
18691  %3 = bitcast i8 %__u to <8 x i1>
18692  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18693  %4 = and <4 x i1> %extract.i, %2
18694  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18695  %6 = bitcast <8 x i1> %5 to i8
18696  ret i8 %6
18697}
18698
18699
18700define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18701; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18702; VLX:       # %bb.0: # %entry
18703; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18704; VLX-NEXT:    kmovd %k0, %eax
18705; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18706; VLX-NEXT:    vzeroupper
18707; VLX-NEXT:    retq
18708;
18709; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18710; NoVLX:       # %bb.0: # %entry
18711; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18712; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18713; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18714; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18715; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18716; NoVLX-NEXT:    kmovw %k0, %eax
18717; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18718; NoVLX-NEXT:    vzeroupper
18719; NoVLX-NEXT:    retq
18720entry:
18721  %0 = bitcast <4 x i64> %__a to <4 x i64>
18722  %1 = bitcast <4 x i64> %__b to <4 x i64>
18723  %2 = icmp ult <4 x i64> %0, %1
18724  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18725  %4 = bitcast <16 x i1> %3 to i16
18726  ret i16 %4
18727}
18728
18729define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18730; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18731; VLX:       # %bb.0: # %entry
18732; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18733; VLX-NEXT:    kmovd %k0, %eax
18734; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18735; VLX-NEXT:    vzeroupper
18736; VLX-NEXT:    retq
18737;
18738; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18739; NoVLX:       # %bb.0: # %entry
18740; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18741; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18742; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18743; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18744; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18745; NoVLX-NEXT:    kmovw %k0, %eax
18746; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18747; NoVLX-NEXT:    vzeroupper
18748; NoVLX-NEXT:    retq
18749entry:
18750  %0 = bitcast <4 x i64> %__a to <4 x i64>
18751  %load = load <4 x i64>, <4 x i64>* %__b
18752  %1 = bitcast <4 x i64> %load to <4 x i64>
18753  %2 = icmp ult <4 x i64> %0, %1
18754  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18755  %4 = bitcast <16 x i1> %3 to i16
18756  ret i16 %4
18757}
18758
18759define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18760; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18761; VLX:       # %bb.0: # %entry
18762; VLX-NEXT:    kmovd %edi, %k1
18763; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18764; VLX-NEXT:    kmovd %k0, %eax
18765; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18766; VLX-NEXT:    vzeroupper
18767; VLX-NEXT:    retq
18768;
18769; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18770; NoVLX:       # %bb.0: # %entry
18771; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18772; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18773; NoVLX-NEXT:    kmovw %edi, %k1
18774; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18775; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18776; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18777; NoVLX-NEXT:    kmovw %k0, %eax
18778; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18779; NoVLX-NEXT:    vzeroupper
18780; NoVLX-NEXT:    retq
18781entry:
18782  %0 = bitcast <4 x i64> %__a to <4 x i64>
18783  %1 = bitcast <4 x i64> %__b to <4 x i64>
18784  %2 = icmp ult <4 x i64> %0, %1
18785  %3 = bitcast i8 %__u to <8 x i1>
18786  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18787  %4 = and <4 x i1> %2, %extract.i
18788  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18789  %6 = bitcast <16 x i1> %5 to i16
18790  ret i16 %6
18791}
18792
18793define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18794; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18795; VLX:       # %bb.0: # %entry
18796; VLX-NEXT:    kmovd %edi, %k1
18797; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18798; VLX-NEXT:    kmovd %k0, %eax
18799; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18800; VLX-NEXT:    vzeroupper
18801; VLX-NEXT:    retq
18802;
18803; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18804; NoVLX:       # %bb.0: # %entry
18805; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18806; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18807; NoVLX-NEXT:    kmovw %edi, %k1
18808; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18809; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18810; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18811; NoVLX-NEXT:    kmovw %k0, %eax
18812; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18813; NoVLX-NEXT:    vzeroupper
18814; NoVLX-NEXT:    retq
18815entry:
18816  %0 = bitcast <4 x i64> %__a to <4 x i64>
18817  %load = load <4 x i64>, <4 x i64>* %__b
18818  %1 = bitcast <4 x i64> %load to <4 x i64>
18819  %2 = icmp ult <4 x i64> %0, %1
18820  %3 = bitcast i8 %__u to <8 x i1>
18821  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18822  %4 = and <4 x i1> %2, %extract.i
18823  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18824  %6 = bitcast <16 x i1> %5 to i16
18825  ret i16 %6
18826}
18827
18828
18829define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18830; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18831; VLX:       # %bb.0: # %entry
18832; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18833; VLX-NEXT:    kmovd %k0, %eax
18834; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18835; VLX-NEXT:    vzeroupper
18836; VLX-NEXT:    retq
18837;
18838; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18839; NoVLX:       # %bb.0: # %entry
18840; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18841; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
18842; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18843; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18844; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18845; NoVLX-NEXT:    kmovw %k0, %eax
18846; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18847; NoVLX-NEXT:    vzeroupper
18848; NoVLX-NEXT:    retq
18849entry:
18850  %0 = bitcast <4 x i64> %__a to <4 x i64>
18851  %load = load i64, i64* %__b
18852  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18853  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18854  %2 = icmp ult <4 x i64> %0, %1
18855  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18856  %4 = bitcast <16 x i1> %3 to i16
18857  ret i16 %4
18858}
18859
18860define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18861; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18862; VLX:       # %bb.0: # %entry
18863; VLX-NEXT:    kmovd %edi, %k1
18864; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18865; VLX-NEXT:    kmovd %k0, %eax
18866; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
18867; VLX-NEXT:    vzeroupper
18868; VLX-NEXT:    retq
18869;
18870; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18871; NoVLX:       # %bb.0: # %entry
18872; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18873; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
18874; NoVLX-NEXT:    kmovw %edi, %k1
18875; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18876; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18877; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18878; NoVLX-NEXT:    kmovw %k0, %eax
18879; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
18880; NoVLX-NEXT:    vzeroupper
18881; NoVLX-NEXT:    retq
18882entry:
18883  %0 = bitcast <4 x i64> %__a to <4 x i64>
18884  %load = load i64, i64* %__b
18885  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18886  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18887  %2 = icmp ult <4 x i64> %0, %1
18888  %3 = bitcast i8 %__u to <8 x i1>
18889  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18890  %4 = and <4 x i1> %extract.i, %2
18891  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18892  %6 = bitcast <16 x i1> %5 to i16
18893  ret i16 %6
18894}
18895
18896
18897define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18898; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18899; VLX:       # %bb.0: # %entry
18900; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
18901; VLX-NEXT:    kmovd %k0, %eax
18902; VLX-NEXT:    vzeroupper
18903; VLX-NEXT:    retq
18904;
18905; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18906; NoVLX:       # %bb.0: # %entry
18907; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18908; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18909; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18910; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18911; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18912; NoVLX-NEXT:    kmovw %k0, %eax
18913; NoVLX-NEXT:    vzeroupper
18914; NoVLX-NEXT:    retq
18915entry:
18916  %0 = bitcast <4 x i64> %__a to <4 x i64>
18917  %1 = bitcast <4 x i64> %__b to <4 x i64>
18918  %2 = icmp ult <4 x i64> %0, %1
18919  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18920  %4 = bitcast <32 x i1> %3 to i32
18921  ret i32 %4
18922}
18923
18924define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18925; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18926; VLX:       # %bb.0: # %entry
18927; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
18928; VLX-NEXT:    kmovd %k0, %eax
18929; VLX-NEXT:    vzeroupper
18930; VLX-NEXT:    retq
18931;
18932; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18933; NoVLX:       # %bb.0: # %entry
18934; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18935; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
18936; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
18937; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18938; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18939; NoVLX-NEXT:    kmovw %k0, %eax
18940; NoVLX-NEXT:    vzeroupper
18941; NoVLX-NEXT:    retq
18942entry:
18943  %0 = bitcast <4 x i64> %__a to <4 x i64>
18944  %load = load <4 x i64>, <4 x i64>* %__b
18945  %1 = bitcast <4 x i64> %load to <4 x i64>
18946  %2 = icmp ult <4 x i64> %0, %1
18947  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18948  %4 = bitcast <32 x i1> %3 to i32
18949  ret i32 %4
18950}
18951
18952define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18953; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18954; VLX:       # %bb.0: # %entry
18955; VLX-NEXT:    kmovd %edi, %k1
18956; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18957; VLX-NEXT:    kmovd %k0, %eax
18958; VLX-NEXT:    vzeroupper
18959; VLX-NEXT:    retq
18960;
18961; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18962; NoVLX:       # %bb.0: # %entry
18963; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
18964; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18965; NoVLX-NEXT:    kmovw %edi, %k1
18966; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18967; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
18968; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
18969; NoVLX-NEXT:    kmovw %k0, %eax
18970; NoVLX-NEXT:    vzeroupper
18971; NoVLX-NEXT:    retq
18972entry:
18973  %0 = bitcast <4 x i64> %__a to <4 x i64>
18974  %1 = bitcast <4 x i64> %__b to <4 x i64>
18975  %2 = icmp ult <4 x i64> %0, %1
18976  %3 = bitcast i8 %__u to <8 x i1>
18977  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18978  %4 = and <4 x i1> %2, %extract.i
18979  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18980  %6 = bitcast <32 x i1> %5 to i32
18981  ret i32 %6
18982}
18983
18984define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18985; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18986; VLX:       # %bb.0: # %entry
18987; VLX-NEXT:    kmovd %edi, %k1
18988; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18989; VLX-NEXT:    kmovd %k0, %eax
18990; VLX-NEXT:    vzeroupper
18991; VLX-NEXT:    retq
18992;
18993; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18994; NoVLX:       # %bb.0: # %entry
18995; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
18996; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
18997; NoVLX-NEXT:    kmovw %edi, %k1
18998; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18999; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19000; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19001; NoVLX-NEXT:    kmovw %k0, %eax
19002; NoVLX-NEXT:    vzeroupper
19003; NoVLX-NEXT:    retq
19004entry:
19005  %0 = bitcast <4 x i64> %__a to <4 x i64>
19006  %load = load <4 x i64>, <4 x i64>* %__b
19007  %1 = bitcast <4 x i64> %load to <4 x i64>
19008  %2 = icmp ult <4 x i64> %0, %1
19009  %3 = bitcast i8 %__u to <8 x i1>
19010  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19011  %4 = and <4 x i1> %2, %extract.i
19012  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19013  %6 = bitcast <32 x i1> %5 to i32
19014  ret i32 %6
19015}
19016
19017
19018define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
19019; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
19020; VLX:       # %bb.0: # %entry
19021; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
19022; VLX-NEXT:    kmovd %k0, %eax
19023; VLX-NEXT:    vzeroupper
19024; VLX-NEXT:    retq
19025;
19026; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
19027; NoVLX:       # %bb.0: # %entry
19028; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19029; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
19030; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19031; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19032; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19033; NoVLX-NEXT:    kmovw %k0, %eax
19034; NoVLX-NEXT:    vzeroupper
19035; NoVLX-NEXT:    retq
19036entry:
19037  %0 = bitcast <4 x i64> %__a to <4 x i64>
19038  %load = load i64, i64* %__b
19039  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
19040  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19041  %2 = icmp ult <4 x i64> %0, %1
19042  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19043  %4 = bitcast <32 x i1> %3 to i32
19044  ret i32 %4
19045}
19046
19047define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
19048; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
19049; VLX:       # %bb.0: # %entry
19050; VLX-NEXT:    kmovd %edi, %k1
19051; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
19052; VLX-NEXT:    kmovd %k0, %eax
19053; VLX-NEXT:    vzeroupper
19054; VLX-NEXT:    retq
19055;
19056; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
19057; NoVLX:       # %bb.0: # %entry
19058; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19059; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
19060; NoVLX-NEXT:    kmovw %edi, %k1
19061; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19062; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19063; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19064; NoVLX-NEXT:    kmovw %k0, %eax
19065; NoVLX-NEXT:    vzeroupper
19066; NoVLX-NEXT:    retq
19067entry:
19068  %0 = bitcast <4 x i64> %__a to <4 x i64>
19069  %load = load i64, i64* %__b
19070  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
19071  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19072  %2 = icmp ult <4 x i64> %0, %1
19073  %3 = bitcast i8 %__u to <8 x i1>
19074  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19075  %4 = and <4 x i1> %extract.i, %2
19076  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19077  %6 = bitcast <32 x i1> %5 to i32
19078  ret i32 %6
19079}
19080
19081
19082define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
19083; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
19084; VLX:       # %bb.0: # %entry
19085; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0
19086; VLX-NEXT:    kmovq %k0, %rax
19087; VLX-NEXT:    vzeroupper
19088; VLX-NEXT:    retq
19089;
19090; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
19091; NoVLX:       # %bb.0: # %entry
19092; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
19093; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19094; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19095; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19096; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19097; NoVLX-NEXT:    kmovw %k0, %eax
19098; NoVLX-NEXT:    movzwl %ax, %eax
19099; NoVLX-NEXT:    vzeroupper
19100; NoVLX-NEXT:    retq
19101entry:
19102  %0 = bitcast <4 x i64> %__a to <4 x i64>
19103  %1 = bitcast <4 x i64> %__b to <4 x i64>
19104  %2 = icmp ult <4 x i64> %0, %1
19105  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19106  %4 = bitcast <64 x i1> %3 to i64
19107  ret i64 %4
19108}
19109
19110define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
19111; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
19112; VLX:       # %bb.0: # %entry
19113; VLX-NEXT:    vpcmpltuq (%rdi), %ymm0, %k0
19114; VLX-NEXT:    kmovq %k0, %rax
19115; VLX-NEXT:    vzeroupper
19116; VLX-NEXT:    retq
19117;
19118; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
19119; NoVLX:       # %bb.0: # %entry
19120; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19121; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
19122; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19123; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19124; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19125; NoVLX-NEXT:    kmovw %k0, %eax
19126; NoVLX-NEXT:    movzwl %ax, %eax
19127; NoVLX-NEXT:    vzeroupper
19128; NoVLX-NEXT:    retq
19129entry:
19130  %0 = bitcast <4 x i64> %__a to <4 x i64>
19131  %load = load <4 x i64>, <4 x i64>* %__b
19132  %1 = bitcast <4 x i64> %load to <4 x i64>
19133  %2 = icmp ult <4 x i64> %0, %1
19134  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19135  %4 = bitcast <64 x i1> %3 to i64
19136  ret i64 %4
19137}
19138
19139define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
19140; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
19141; VLX:       # %bb.0: # %entry
19142; VLX-NEXT:    kmovd %edi, %k1
19143; VLX-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
19144; VLX-NEXT:    kmovq %k0, %rax
19145; VLX-NEXT:    vzeroupper
19146; VLX-NEXT:    retq
19147;
19148; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
19149; NoVLX:       # %bb.0: # %entry
19150; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
19151; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19152; NoVLX-NEXT:    kmovw %edi, %k1
19153; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19154; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19155; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19156; NoVLX-NEXT:    kmovw %k0, %eax
19157; NoVLX-NEXT:    movzwl %ax, %eax
19158; NoVLX-NEXT:    vzeroupper
19159; NoVLX-NEXT:    retq
19160entry:
19161  %0 = bitcast <4 x i64> %__a to <4 x i64>
19162  %1 = bitcast <4 x i64> %__b to <4 x i64>
19163  %2 = icmp ult <4 x i64> %0, %1
19164  %3 = bitcast i8 %__u to <8 x i1>
19165  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19166  %4 = and <4 x i1> %2, %extract.i
19167  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19168  %6 = bitcast <64 x i1> %5 to i64
19169  ret i64 %6
19170}
19171
19172define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
19173; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
19174; VLX:       # %bb.0: # %entry
19175; VLX-NEXT:    kmovd %edi, %k1
19176; VLX-NEXT:    vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
19177; VLX-NEXT:    kmovq %k0, %rax
19178; VLX-NEXT:    vzeroupper
19179; VLX-NEXT:    retq
19180;
19181; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
19182; NoVLX:       # %bb.0: # %entry
19183; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19184; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
19185; NoVLX-NEXT:    kmovw %edi, %k1
19186; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19187; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19188; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19189; NoVLX-NEXT:    kmovw %k0, %eax
19190; NoVLX-NEXT:    movzwl %ax, %eax
19191; NoVLX-NEXT:    vzeroupper
19192; NoVLX-NEXT:    retq
19193entry:
19194  %0 = bitcast <4 x i64> %__a to <4 x i64>
19195  %load = load <4 x i64>, <4 x i64>* %__b
19196  %1 = bitcast <4 x i64> %load to <4 x i64>
19197  %2 = icmp ult <4 x i64> %0, %1
19198  %3 = bitcast i8 %__u to <8 x i1>
19199  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19200  %4 = and <4 x i1> %2, %extract.i
19201  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19202  %6 = bitcast <64 x i1> %5 to i64
19203  ret i64 %6
19204}
19205
19206
19207define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
19208; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
19209; VLX:       # %bb.0: # %entry
19210; VLX-NEXT:    vpcmpltuq (%rdi){1to4}, %ymm0, %k0
19211; VLX-NEXT:    kmovq %k0, %rax
19212; VLX-NEXT:    vzeroupper
19213; VLX-NEXT:    retq
19214;
19215; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
19216; NoVLX:       # %bb.0: # %entry
19217; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19218; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm1
19219; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19220; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19221; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19222; NoVLX-NEXT:    kmovw %k0, %eax
19223; NoVLX-NEXT:    movzwl %ax, %eax
19224; NoVLX-NEXT:    vzeroupper
19225; NoVLX-NEXT:    retq
19226entry:
19227  %0 = bitcast <4 x i64> %__a to <4 x i64>
19228  %load = load i64, i64* %__b
19229  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
19230  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19231  %2 = icmp ult <4 x i64> %0, %1
19232  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19233  %4 = bitcast <64 x i1> %3 to i64
19234  ret i64 %4
19235}
19236
19237define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
19238; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
19239; VLX:       # %bb.0: # %entry
19240; VLX-NEXT:    kmovd %edi, %k1
19241; VLX-NEXT:    vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
19242; VLX-NEXT:    kmovq %k0, %rax
19243; VLX-NEXT:    vzeroupper
19244; VLX-NEXT:    retq
19245;
19246; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
19247; NoVLX:       # %bb.0: # %entry
19248; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
19249; NoVLX-NEXT:    vpbroadcastq (%rsi), %ymm1
19250; NoVLX-NEXT:    kmovw %edi, %k1
19251; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19252; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19253; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19254; NoVLX-NEXT:    kmovw %k0, %eax
19255; NoVLX-NEXT:    movzwl %ax, %eax
19256; NoVLX-NEXT:    vzeroupper
19257; NoVLX-NEXT:    retq
19258entry:
19259  %0 = bitcast <4 x i64> %__a to <4 x i64>
19260  %load = load i64, i64* %__b
19261  %vec = insertelement <4 x i64> undef, i64 %load, i32 0
19262  %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19263  %2 = icmp ult <4 x i64> %0, %1
19264  %3 = bitcast i8 %__u to <8 x i1>
19265  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19266  %4 = and <4 x i1> %extract.i, %2
19267  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19268  %6 = bitcast <64 x i1> %5 to i64
19269  ret i64 %6
19270}
19271
19272
19273define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19274; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
19275; VLX:       # %bb.0: # %entry
19276; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19277; VLX-NEXT:    kmovd %k0, %eax
19278; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19279; VLX-NEXT:    vzeroupper
19280; VLX-NEXT:    retq
19281;
19282; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
19283; NoVLX:       # %bb.0: # %entry
19284; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19285; NoVLX-NEXT:    kmovw %k0, %eax
19286; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19287; NoVLX-NEXT:    vzeroupper
19288; NoVLX-NEXT:    retq
19289entry:
19290  %0 = bitcast <8 x i64> %__a to <8 x i64>
19291  %1 = bitcast <8 x i64> %__b to <8 x i64>
19292  %2 = icmp ult <8 x i64> %0, %1
19293  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19294  %4 = bitcast <16 x i1> %3 to i16
19295  ret i16 %4
19296}
19297
19298define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19299; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
19300; VLX:       # %bb.0: # %entry
19301; VLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19302; VLX-NEXT:    kmovd %k0, %eax
19303; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19304; VLX-NEXT:    vzeroupper
19305; VLX-NEXT:    retq
19306;
19307; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
19308; NoVLX:       # %bb.0: # %entry
19309; NoVLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19310; NoVLX-NEXT:    kmovw %k0, %eax
19311; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19312; NoVLX-NEXT:    vzeroupper
19313; NoVLX-NEXT:    retq
19314entry:
19315  %0 = bitcast <8 x i64> %__a to <8 x i64>
19316  %load = load <8 x i64>, <8 x i64>* %__b
19317  %1 = bitcast <8 x i64> %load to <8 x i64>
19318  %2 = icmp ult <8 x i64> %0, %1
19319  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19320  %4 = bitcast <16 x i1> %3 to i16
19321  ret i16 %4
19322}
19323
19324define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19325; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
19326; VLX:       # %bb.0: # %entry
19327; VLX-NEXT:    kmovd %edi, %k1
19328; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19329; VLX-NEXT:    kmovd %k0, %eax
19330; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19331; VLX-NEXT:    vzeroupper
19332; VLX-NEXT:    retq
19333;
19334; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
19335; NoVLX:       # %bb.0: # %entry
19336; NoVLX-NEXT:    kmovw %edi, %k1
19337; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19338; NoVLX-NEXT:    kmovw %k0, %eax
19339; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19340; NoVLX-NEXT:    vzeroupper
19341; NoVLX-NEXT:    retq
19342entry:
19343  %0 = bitcast <8 x i64> %__a to <8 x i64>
19344  %1 = bitcast <8 x i64> %__b to <8 x i64>
19345  %2 = icmp ult <8 x i64> %0, %1
19346  %3 = bitcast i8 %__u to <8 x i1>
19347  %4 = and <8 x i1> %2, %3
19348  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19349  %6 = bitcast <16 x i1> %5 to i16
19350  ret i16 %6
19351}
19352
19353define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19354; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
19355; VLX:       # %bb.0: # %entry
19356; VLX-NEXT:    kmovd %edi, %k1
19357; VLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19358; VLX-NEXT:    kmovd %k0, %eax
19359; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19360; VLX-NEXT:    vzeroupper
19361; VLX-NEXT:    retq
19362;
19363; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
19364; NoVLX:       # %bb.0: # %entry
19365; NoVLX-NEXT:    kmovw %edi, %k1
19366; NoVLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19367; NoVLX-NEXT:    kmovw %k0, %eax
19368; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19369; NoVLX-NEXT:    vzeroupper
19370; NoVLX-NEXT:    retq
19371entry:
19372  %0 = bitcast <8 x i64> %__a to <8 x i64>
19373  %load = load <8 x i64>, <8 x i64>* %__b
19374  %1 = bitcast <8 x i64> %load to <8 x i64>
19375  %2 = icmp ult <8 x i64> %0, %1
19376  %3 = bitcast i8 %__u to <8 x i1>
19377  %4 = and <8 x i1> %2, %3
19378  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19379  %6 = bitcast <16 x i1> %5 to i16
19380  ret i16 %6
19381}
19382
19383
19384define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19385; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19386; VLX:       # %bb.0: # %entry
19387; VLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19388; VLX-NEXT:    kmovd %k0, %eax
19389; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19390; VLX-NEXT:    vzeroupper
19391; VLX-NEXT:    retq
19392;
19393; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19394; NoVLX:       # %bb.0: # %entry
19395; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19396; NoVLX-NEXT:    kmovw %k0, %eax
19397; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19398; NoVLX-NEXT:    vzeroupper
19399; NoVLX-NEXT:    retq
19400entry:
19401  %0 = bitcast <8 x i64> %__a to <8 x i64>
19402  %load = load i64, i64* %__b
19403  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19404  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19405  %2 = icmp ult <8 x i64> %0, %1
19406  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19407  %4 = bitcast <16 x i1> %3 to i16
19408  ret i16 %4
19409}
19410
19411define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19412; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19413; VLX:       # %bb.0: # %entry
19414; VLX-NEXT:    kmovd %edi, %k1
19415; VLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19416; VLX-NEXT:    kmovd %k0, %eax
19417; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19418; VLX-NEXT:    vzeroupper
19419; VLX-NEXT:    retq
19420;
19421; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19422; NoVLX:       # %bb.0: # %entry
19423; NoVLX-NEXT:    kmovw %edi, %k1
19424; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19425; NoVLX-NEXT:    kmovw %k0, %eax
19426; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19427; NoVLX-NEXT:    vzeroupper
19428; NoVLX-NEXT:    retq
19429entry:
19430  %0 = bitcast <8 x i64> %__a to <8 x i64>
19431  %load = load i64, i64* %__b
19432  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19433  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19434  %2 = icmp ult <8 x i64> %0, %1
19435  %3 = bitcast i8 %__u to <8 x i1>
19436  %4 = and <8 x i1> %3, %2
19437  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19438  %6 = bitcast <16 x i1> %5 to i16
19439  ret i16 %6
19440}
19441
19442
19443define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19444; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19445; VLX:       # %bb.0: # %entry
19446; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19447; VLX-NEXT:    kmovd %k0, %eax
19448; VLX-NEXT:    vzeroupper
19449; VLX-NEXT:    retq
19450;
19451; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19452; NoVLX:       # %bb.0: # %entry
19453; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19454; NoVLX-NEXT:    kmovw %k0, %eax
19455; NoVLX-NEXT:    vzeroupper
19456; NoVLX-NEXT:    retq
19457entry:
19458  %0 = bitcast <8 x i64> %__a to <8 x i64>
19459  %1 = bitcast <8 x i64> %__b to <8 x i64>
19460  %2 = icmp ult <8 x i64> %0, %1
19461  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19462  %4 = bitcast <32 x i1> %3 to i32
19463  ret i32 %4
19464}
19465
19466define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19467; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19468; VLX:       # %bb.0: # %entry
19469; VLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19470; VLX-NEXT:    kmovd %k0, %eax
19471; VLX-NEXT:    vzeroupper
19472; VLX-NEXT:    retq
19473;
19474; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19475; NoVLX:       # %bb.0: # %entry
19476; NoVLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19477; NoVLX-NEXT:    kmovw %k0, %eax
19478; NoVLX-NEXT:    vzeroupper
19479; NoVLX-NEXT:    retq
19480entry:
19481  %0 = bitcast <8 x i64> %__a to <8 x i64>
19482  %load = load <8 x i64>, <8 x i64>* %__b
19483  %1 = bitcast <8 x i64> %load to <8 x i64>
19484  %2 = icmp ult <8 x i64> %0, %1
19485  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19486  %4 = bitcast <32 x i1> %3 to i32
19487  ret i32 %4
19488}
19489
19490define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19491; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19492; VLX:       # %bb.0: # %entry
19493; VLX-NEXT:    kmovd %edi, %k1
19494; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19495; VLX-NEXT:    kmovd %k0, %eax
19496; VLX-NEXT:    vzeroupper
19497; VLX-NEXT:    retq
19498;
19499; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19500; NoVLX:       # %bb.0: # %entry
19501; NoVLX-NEXT:    kmovw %edi, %k1
19502; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19503; NoVLX-NEXT:    kmovw %k0, %eax
19504; NoVLX-NEXT:    vzeroupper
19505; NoVLX-NEXT:    retq
19506entry:
19507  %0 = bitcast <8 x i64> %__a to <8 x i64>
19508  %1 = bitcast <8 x i64> %__b to <8 x i64>
19509  %2 = icmp ult <8 x i64> %0, %1
19510  %3 = bitcast i8 %__u to <8 x i1>
19511  %4 = and <8 x i1> %2, %3
19512  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19513  %6 = bitcast <32 x i1> %5 to i32
19514  ret i32 %6
19515}
19516
19517define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19518; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19519; VLX:       # %bb.0: # %entry
19520; VLX-NEXT:    kmovd %edi, %k1
19521; VLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19522; VLX-NEXT:    kmovd %k0, %eax
19523; VLX-NEXT:    vzeroupper
19524; VLX-NEXT:    retq
19525;
19526; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19527; NoVLX:       # %bb.0: # %entry
19528; NoVLX-NEXT:    kmovw %edi, %k1
19529; NoVLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19530; NoVLX-NEXT:    kmovw %k0, %eax
19531; NoVLX-NEXT:    vzeroupper
19532; NoVLX-NEXT:    retq
19533entry:
19534  %0 = bitcast <8 x i64> %__a to <8 x i64>
19535  %load = load <8 x i64>, <8 x i64>* %__b
19536  %1 = bitcast <8 x i64> %load to <8 x i64>
19537  %2 = icmp ult <8 x i64> %0, %1
19538  %3 = bitcast i8 %__u to <8 x i1>
19539  %4 = and <8 x i1> %2, %3
19540  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19541  %6 = bitcast <32 x i1> %5 to i32
19542  ret i32 %6
19543}
19544
19545
19546define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19547; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19548; VLX:       # %bb.0: # %entry
19549; VLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19550; VLX-NEXT:    kmovd %k0, %eax
19551; VLX-NEXT:    vzeroupper
19552; VLX-NEXT:    retq
19553;
19554; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19555; NoVLX:       # %bb.0: # %entry
19556; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19557; NoVLX-NEXT:    kmovw %k0, %eax
19558; NoVLX-NEXT:    vzeroupper
19559; NoVLX-NEXT:    retq
19560entry:
19561  %0 = bitcast <8 x i64> %__a to <8 x i64>
19562  %load = load i64, i64* %__b
19563  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19564  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19565  %2 = icmp ult <8 x i64> %0, %1
19566  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19567  %4 = bitcast <32 x i1> %3 to i32
19568  ret i32 %4
19569}
19570
19571define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19572; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19573; VLX:       # %bb.0: # %entry
19574; VLX-NEXT:    kmovd %edi, %k1
19575; VLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19576; VLX-NEXT:    kmovd %k0, %eax
19577; VLX-NEXT:    vzeroupper
19578; VLX-NEXT:    retq
19579;
19580; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19581; NoVLX:       # %bb.0: # %entry
19582; NoVLX-NEXT:    kmovw %edi, %k1
19583; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19584; NoVLX-NEXT:    kmovw %k0, %eax
19585; NoVLX-NEXT:    vzeroupper
19586; NoVLX-NEXT:    retq
19587entry:
19588  %0 = bitcast <8 x i64> %__a to <8 x i64>
19589  %load = load i64, i64* %__b
19590  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19591  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19592  %2 = icmp ult <8 x i64> %0, %1
19593  %3 = bitcast i8 %__u to <8 x i1>
19594  %4 = and <8 x i1> %3, %2
19595  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19596  %6 = bitcast <32 x i1> %5 to i32
19597  ret i32 %6
19598}
19599
19600
19601define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19602; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19603; VLX:       # %bb.0: # %entry
19604; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19605; VLX-NEXT:    kmovq %k0, %rax
19606; VLX-NEXT:    vzeroupper
19607; VLX-NEXT:    retq
19608;
19609; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19610; NoVLX:       # %bb.0: # %entry
19611; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
19612; NoVLX-NEXT:    kmovw %k0, %eax
19613; NoVLX-NEXT:    movzwl %ax, %eax
19614; NoVLX-NEXT:    vzeroupper
19615; NoVLX-NEXT:    retq
19616entry:
19617  %0 = bitcast <8 x i64> %__a to <8 x i64>
19618  %1 = bitcast <8 x i64> %__b to <8 x i64>
19619  %2 = icmp ult <8 x i64> %0, %1
19620  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19621  %4 = bitcast <64 x i1> %3 to i64
19622  ret i64 %4
19623}
19624
19625define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19626; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19627; VLX:       # %bb.0: # %entry
19628; VLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19629; VLX-NEXT:    kmovq %k0, %rax
19630; VLX-NEXT:    vzeroupper
19631; VLX-NEXT:    retq
19632;
19633; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19634; NoVLX:       # %bb.0: # %entry
19635; NoVLX-NEXT:    vpcmpltuq (%rdi), %zmm0, %k0
19636; NoVLX-NEXT:    kmovw %k0, %eax
19637; NoVLX-NEXT:    movzwl %ax, %eax
19638; NoVLX-NEXT:    vzeroupper
19639; NoVLX-NEXT:    retq
19640entry:
19641  %0 = bitcast <8 x i64> %__a to <8 x i64>
19642  %load = load <8 x i64>, <8 x i64>* %__b
19643  %1 = bitcast <8 x i64> %load to <8 x i64>
19644  %2 = icmp ult <8 x i64> %0, %1
19645  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19646  %4 = bitcast <64 x i1> %3 to i64
19647  ret i64 %4
19648}
19649
19650define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19651; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19652; VLX:       # %bb.0: # %entry
19653; VLX-NEXT:    kmovd %edi, %k1
19654; VLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19655; VLX-NEXT:    kmovq %k0, %rax
19656; VLX-NEXT:    vzeroupper
19657; VLX-NEXT:    retq
19658;
19659; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19660; NoVLX:       # %bb.0: # %entry
19661; NoVLX-NEXT:    kmovw %edi, %k1
19662; NoVLX-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19663; NoVLX-NEXT:    kmovw %k0, %eax
19664; NoVLX-NEXT:    movzwl %ax, %eax
19665; NoVLX-NEXT:    vzeroupper
19666; NoVLX-NEXT:    retq
19667entry:
19668  %0 = bitcast <8 x i64> %__a to <8 x i64>
19669  %1 = bitcast <8 x i64> %__b to <8 x i64>
19670  %2 = icmp ult <8 x i64> %0, %1
19671  %3 = bitcast i8 %__u to <8 x i1>
19672  %4 = and <8 x i1> %2, %3
19673  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19674  %6 = bitcast <64 x i1> %5 to i64
19675  ret i64 %6
19676}
19677
19678define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19679; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19680; VLX:       # %bb.0: # %entry
19681; VLX-NEXT:    kmovd %edi, %k1
19682; VLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19683; VLX-NEXT:    kmovq %k0, %rax
19684; VLX-NEXT:    vzeroupper
19685; VLX-NEXT:    retq
19686;
19687; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19688; NoVLX:       # %bb.0: # %entry
19689; NoVLX-NEXT:    kmovw %edi, %k1
19690; NoVLX-NEXT:    vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19691; NoVLX-NEXT:    kmovw %k0, %eax
19692; NoVLX-NEXT:    movzwl %ax, %eax
19693; NoVLX-NEXT:    vzeroupper
19694; NoVLX-NEXT:    retq
19695entry:
19696  %0 = bitcast <8 x i64> %__a to <8 x i64>
19697  %load = load <8 x i64>, <8 x i64>* %__b
19698  %1 = bitcast <8 x i64> %load to <8 x i64>
19699  %2 = icmp ult <8 x i64> %0, %1
19700  %3 = bitcast i8 %__u to <8 x i1>
19701  %4 = and <8 x i1> %2, %3
19702  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19703  %6 = bitcast <64 x i1> %5 to i64
19704  ret i64 %6
19705}
19706
19707
19708define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19709; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19710; VLX:       # %bb.0: # %entry
19711; VLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19712; VLX-NEXT:    kmovq %k0, %rax
19713; VLX-NEXT:    vzeroupper
19714; VLX-NEXT:    retq
19715;
19716; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19717; NoVLX:       # %bb.0: # %entry
19718; NoVLX-NEXT:    vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19719; NoVLX-NEXT:    kmovw %k0, %eax
19720; NoVLX-NEXT:    movzwl %ax, %eax
19721; NoVLX-NEXT:    vzeroupper
19722; NoVLX-NEXT:    retq
19723entry:
19724  %0 = bitcast <8 x i64> %__a to <8 x i64>
19725  %load = load i64, i64* %__b
19726  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19727  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19728  %2 = icmp ult <8 x i64> %0, %1
19729  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19730  %4 = bitcast <64 x i1> %3 to i64
19731  ret i64 %4
19732}
19733
19734define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19735; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19736; VLX:       # %bb.0: # %entry
19737; VLX-NEXT:    kmovd %edi, %k1
19738; VLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19739; VLX-NEXT:    kmovq %k0, %rax
19740; VLX-NEXT:    vzeroupper
19741; VLX-NEXT:    retq
19742;
19743; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19744; NoVLX:       # %bb.0: # %entry
19745; NoVLX-NEXT:    kmovw %edi, %k1
19746; NoVLX-NEXT:    vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19747; NoVLX-NEXT:    kmovw %k0, %eax
19748; NoVLX-NEXT:    movzwl %ax, %eax
19749; NoVLX-NEXT:    vzeroupper
19750; NoVLX-NEXT:    retq
19751entry:
19752  %0 = bitcast <8 x i64> %__a to <8 x i64>
19753  %load = load i64, i64* %__b
19754  %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19755  %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19756  %2 = icmp ult <8 x i64> %0, %1
19757  %3 = bitcast i8 %__u to <8 x i1>
19758  %4 = and <8 x i1> %3, %2
19759  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19760  %6 = bitcast <64 x i1> %5 to i64
19761  ret i64 %6
19762}
19763
19764
19765declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
19766define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19767; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19768; VLX:       # %bb.0: # %entry
19769; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
19770; VLX-NEXT:    kmovd %k0, %eax
19771; VLX-NEXT:    # kill: def $al killed $al killed $eax
19772; VLX-NEXT:    retq
19773;
19774; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19775; NoVLX:       # %bb.0: # %entry
19776; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19777; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19778; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19779; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19780; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19781; NoVLX-NEXT:    kmovw %k0, %eax
19782; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19783; NoVLX-NEXT:    vzeroupper
19784; NoVLX-NEXT:    retq
19785entry:
19786  %0 = bitcast <2 x i64> %__a to <4 x float>
19787  %1 = bitcast <2 x i64> %__b to <4 x float>
19788  %2 = fcmp oeq <4 x float> %0, %1
19789  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19790  %4 = bitcast <8 x i1> %3 to i8
19791  ret i8 %4
19792}
19793
19794define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19795; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19796; VLX:       # %bb.0: # %entry
19797; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
19798; VLX-NEXT:    kmovd %k0, %eax
19799; VLX-NEXT:    # kill: def $al killed $al killed $eax
19800; VLX-NEXT:    retq
19801;
19802; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19803; NoVLX:       # %bb.0: # %entry
19804; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19805; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
19806; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19807; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19808; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19809; NoVLX-NEXT:    kmovw %k0, %eax
19810; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19811; NoVLX-NEXT:    vzeroupper
19812; NoVLX-NEXT:    retq
19813entry:
19814  %0 = bitcast <2 x i64> %__a to <4 x float>
19815  %load = load <2 x i64>, <2 x i64>* %__b
19816  %1 = bitcast <2 x i64> %load to <4 x float>
19817  %2 = fcmp oeq <4 x float> %0, %1
19818  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19819  %4 = bitcast <8 x i1> %3 to i8
19820  ret i8 %4
19821}
19822
19823define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19824; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19825; VLX:       # %bb.0: # %entry
19826; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
19827; VLX-NEXT:    kmovd %k0, %eax
19828; VLX-NEXT:    # kill: def $al killed $al killed $eax
19829; VLX-NEXT:    retq
19830;
19831; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19832; NoVLX:       # %bb.0: # %entry
19833; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19834; NoVLX-NEXT:    vbroadcastss (%rdi), %xmm1
19835; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19836; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19837; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19838; NoVLX-NEXT:    kmovw %k0, %eax
19839; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19840; NoVLX-NEXT:    vzeroupper
19841; NoVLX-NEXT:    retq
19842entry:
19843  %0 = bitcast <2 x i64> %__a to <4 x float>
19844  %load = load float, float* %__b
19845  %vec = insertelement <4 x float> undef, float %load, i32 0
19846  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19847  %2 = fcmp oeq <4 x float> %0, %1
19848  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19849  %4 = bitcast <8 x i1> %3 to i8
19850  ret i8 %4
19851}
19852
19853define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19854; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19855; VLX:       # %bb.0: # %entry
19856; VLX-NEXT:    kmovd %edi, %k1
19857; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19858; VLX-NEXT:    kmovd %k0, %eax
19859; VLX-NEXT:    # kill: def $al killed $al killed $eax
19860; VLX-NEXT:    retq
19861;
19862; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19863; NoVLX:       # %bb.0: # %entry
19864; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19865; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19866; NoVLX-NEXT:    kmovw %edi, %k1
19867; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19868; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19869; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19870; NoVLX-NEXT:    kmovw %k0, %eax
19871; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19872; NoVLX-NEXT:    vzeroupper
19873; NoVLX-NEXT:    retq
19874entry:
19875  %0 = bitcast <2 x i64> %__a to <4 x float>
19876  %1 = bitcast <2 x i64> %__b to <4 x float>
19877  %2 = fcmp oeq <4 x float> %0, %1
19878  %3 = bitcast i4 %__u to <4 x i1>
19879  %4 = and <4 x i1> %2, %3
19880  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19881  %6 = bitcast <8 x i1> %5 to i8
19882  ret i8 %6
19883}
19884
19885define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19886; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19887; VLX:       # %bb.0: # %entry
19888; VLX-NEXT:    kmovd %edi, %k1
19889; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19890; VLX-NEXT:    kmovd %k0, %eax
19891; VLX-NEXT:    # kill: def $al killed $al killed $eax
19892; VLX-NEXT:    retq
19893;
19894; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19895; NoVLX:       # %bb.0: # %entry
19896; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19897; NoVLX-NEXT:    kmovw %edi, %k1
19898; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
19899; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19900; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19901; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19902; NoVLX-NEXT:    kmovw %k0, %eax
19903; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19904; NoVLX-NEXT:    vzeroupper
19905; NoVLX-NEXT:    retq
19906entry:
19907  %0 = bitcast <2 x i64> %__a to <4 x float>
19908  %load = load <2 x i64>, <2 x i64>* %__b
19909  %1 = bitcast <2 x i64> %load to <4 x float>
19910  %2 = fcmp oeq <4 x float> %0, %1
19911  %3 = bitcast i4 %__u to <4 x i1>
19912  %4 = and <4 x i1> %2, %3
19913  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19914  %6 = bitcast <8 x i1> %5 to i8
19915  ret i8 %6
19916}
19917
19918define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19919; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19920; VLX:       # %bb.0: # %entry
19921; VLX-NEXT:    kmovd %edi, %k1
19922; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19923; VLX-NEXT:    kmovd %k0, %eax
19924; VLX-NEXT:    # kill: def $al killed $al killed $eax
19925; VLX-NEXT:    retq
19926;
19927; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19928; NoVLX:       # %bb.0: # %entry
19929; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19930; NoVLX-NEXT:    kmovw %edi, %k1
19931; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm1
19932; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19933; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19934; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19935; NoVLX-NEXT:    kmovw %k0, %eax
19936; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
19937; NoVLX-NEXT:    vzeroupper
19938; NoVLX-NEXT:    retq
19939entry:
19940  %0 = bitcast <2 x i64> %__a to <4 x float>
19941  %load = load float, float* %__b
19942  %vec = insertelement <4 x float> undef, float %load, i32 0
19943  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19944  %2 = fcmp oeq <4 x float> %0, %1
19945  %3 = bitcast i4 %__u to <4 x i1>
19946  %4 = and <4 x i1> %2, %3
19947  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19948  %6 = bitcast <8 x i1> %5 to i8
19949  ret i8 %6
19950}
19951
19952
19953
19954define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19955; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19956; VLX:       # %bb.0: # %entry
19957; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
19958; VLX-NEXT:    kmovd %k0, %eax
19959; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19960; VLX-NEXT:    retq
19961;
19962; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19963; NoVLX:       # %bb.0: # %entry
19964; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
19965; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19966; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19967; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19968; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19969; NoVLX-NEXT:    kmovw %k0, %eax
19970; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19971; NoVLX-NEXT:    vzeroupper
19972; NoVLX-NEXT:    retq
19973entry:
19974  %0 = bitcast <2 x i64> %__a to <4 x float>
19975  %1 = bitcast <2 x i64> %__b to <4 x float>
19976  %2 = fcmp oeq <4 x float> %0, %1
19977  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19978  %4 = bitcast <16 x i1> %3 to i16
19979  ret i16 %4
19980}
19981
19982define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19983; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19984; VLX:       # %bb.0: # %entry
19985; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
19986; VLX-NEXT:    kmovd %k0, %eax
19987; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
19988; VLX-NEXT:    retq
19989;
19990; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19991; NoVLX:       # %bb.0: # %entry
19992; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
19993; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
19994; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
19995; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
19996; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
19997; NoVLX-NEXT:    kmovw %k0, %eax
19998; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
19999; NoVLX-NEXT:    vzeroupper
20000; NoVLX-NEXT:    retq
20001entry:
20002  %0 = bitcast <2 x i64> %__a to <4 x float>
20003  %load = load <2 x i64>, <2 x i64>* %__b
20004  %1 = bitcast <2 x i64> %load to <4 x float>
20005  %2 = fcmp oeq <4 x float> %0, %1
20006  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20007  %4 = bitcast <16 x i1> %3 to i16
20008  ret i16 %4
20009}
20010
20011define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
20012; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
20013; VLX:       # %bb.0: # %entry
20014; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
20015; VLX-NEXT:    kmovd %k0, %eax
20016; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20017; VLX-NEXT:    retq
20018;
20019; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
20020; NoVLX:       # %bb.0: # %entry
20021; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20022; NoVLX-NEXT:    vbroadcastss (%rdi), %xmm1
20023; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20024; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20025; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20026; NoVLX-NEXT:    kmovw %k0, %eax
20027; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20028; NoVLX-NEXT:    vzeroupper
20029; NoVLX-NEXT:    retq
20030entry:
20031  %0 = bitcast <2 x i64> %__a to <4 x float>
20032  %load = load float, float* %__b
20033  %vec = insertelement <4 x float> undef, float %load, i32 0
20034  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20035  %2 = fcmp oeq <4 x float> %0, %1
20036  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20037  %4 = bitcast <16 x i1> %3 to i16
20038  ret i16 %4
20039}
20040
20041define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20042; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
20043; VLX:       # %bb.0: # %entry
20044; VLX-NEXT:    kmovd %edi, %k1
20045; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20046; VLX-NEXT:    kmovd %k0, %eax
20047; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20048; VLX-NEXT:    retq
20049;
20050; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
20051; NoVLX:       # %bb.0: # %entry
20052; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
20053; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20054; NoVLX-NEXT:    kmovw %edi, %k1
20055; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20056; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20057; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20058; NoVLX-NEXT:    kmovw %k0, %eax
20059; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20060; NoVLX-NEXT:    vzeroupper
20061; NoVLX-NEXT:    retq
20062entry:
20063  %0 = bitcast <2 x i64> %__a to <4 x float>
20064  %1 = bitcast <2 x i64> %__b to <4 x float>
20065  %2 = fcmp oeq <4 x float> %0, %1
20066  %3 = bitcast i4 %__u to <4 x i1>
20067  %4 = and <4 x i1> %2, %3
20068  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20069  %6 = bitcast <16 x i1> %5 to i16
20070  ret i16 %6
20071}
20072
20073define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20074; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
20075; VLX:       # %bb.0: # %entry
20076; VLX-NEXT:    kmovd %edi, %k1
20077; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20078; VLX-NEXT:    kmovd %k0, %eax
20079; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20080; VLX-NEXT:    retq
20081;
20082; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
20083; NoVLX:       # %bb.0: # %entry
20084; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20085; NoVLX-NEXT:    kmovw %edi, %k1
20086; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
20087; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20088; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20089; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20090; NoVLX-NEXT:    kmovw %k0, %eax
20091; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20092; NoVLX-NEXT:    vzeroupper
20093; NoVLX-NEXT:    retq
20094entry:
20095  %0 = bitcast <2 x i64> %__a to <4 x float>
20096  %load = load <2 x i64>, <2 x i64>* %__b
20097  %1 = bitcast <2 x i64> %load to <4 x float>
20098  %2 = fcmp oeq <4 x float> %0, %1
20099  %3 = bitcast i4 %__u to <4 x i1>
20100  %4 = and <4 x i1> %2, %3
20101  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20102  %6 = bitcast <16 x i1> %5 to i16
20103  ret i16 %6
20104}
20105
20106define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20107; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
20108; VLX:       # %bb.0: # %entry
20109; VLX-NEXT:    kmovd %edi, %k1
20110; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20111; VLX-NEXT:    kmovd %k0, %eax
20112; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20113; VLX-NEXT:    retq
20114;
20115; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
20116; NoVLX:       # %bb.0: # %entry
20117; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20118; NoVLX-NEXT:    kmovw %edi, %k1
20119; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm1
20120; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20121; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20122; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20123; NoVLX-NEXT:    kmovw %k0, %eax
20124; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20125; NoVLX-NEXT:    vzeroupper
20126; NoVLX-NEXT:    retq
20127entry:
20128  %0 = bitcast <2 x i64> %__a to <4 x float>
20129  %load = load float, float* %__b
20130  %vec = insertelement <4 x float> undef, float %load, i32 0
20131  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20132  %2 = fcmp oeq <4 x float> %0, %1
20133  %3 = bitcast i4 %__u to <4 x i1>
20134  %4 = and <4 x i1> %2, %3
20135  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20136  %6 = bitcast <16 x i1> %5 to i16
20137  ret i16 %6
20138}
20139
20140
20141
20142define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20143; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
20144; VLX:       # %bb.0: # %entry
20145; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
20146; VLX-NEXT:    kmovd %k0, %eax
20147; VLX-NEXT:    retq
20148;
20149; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
20150; NoVLX:       # %bb.0: # %entry
20151; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
20152; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20153; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20154; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20155; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20156; NoVLX-NEXT:    kmovw %k0, %eax
20157; NoVLX-NEXT:    vzeroupper
20158; NoVLX-NEXT:    retq
20159entry:
20160  %0 = bitcast <2 x i64> %__a to <4 x float>
20161  %1 = bitcast <2 x i64> %__b to <4 x float>
20162  %2 = fcmp oeq <4 x float> %0, %1
20163  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20164  %4 = bitcast <32 x i1> %3 to i32
20165  ret i32 %4
20166}
20167
20168define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20169; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
20170; VLX:       # %bb.0: # %entry
20171; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
20172; VLX-NEXT:    kmovd %k0, %eax
20173; VLX-NEXT:    retq
20174;
20175; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
20176; NoVLX:       # %bb.0: # %entry
20177; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20178; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
20179; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20180; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20181; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20182; NoVLX-NEXT:    kmovw %k0, %eax
20183; NoVLX-NEXT:    vzeroupper
20184; NoVLX-NEXT:    retq
20185entry:
20186  %0 = bitcast <2 x i64> %__a to <4 x float>
20187  %load = load <2 x i64>, <2 x i64>* %__b
20188  %1 = bitcast <2 x i64> %load to <4 x float>
20189  %2 = fcmp oeq <4 x float> %0, %1
20190  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20191  %4 = bitcast <32 x i1> %3 to i32
20192  ret i32 %4
20193}
20194
20195define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
20196; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20197; VLX:       # %bb.0: # %entry
20198; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
20199; VLX-NEXT:    kmovd %k0, %eax
20200; VLX-NEXT:    retq
20201;
20202; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20203; NoVLX:       # %bb.0: # %entry
20204; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20205; NoVLX-NEXT:    vbroadcastss (%rdi), %xmm1
20206; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20207; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20208; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20209; NoVLX-NEXT:    kmovw %k0, %eax
20210; NoVLX-NEXT:    vzeroupper
20211; NoVLX-NEXT:    retq
20212entry:
20213  %0 = bitcast <2 x i64> %__a to <4 x float>
20214  %load = load float, float* %__b
20215  %vec = insertelement <4 x float> undef, float %load, i32 0
20216  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20217  %2 = fcmp oeq <4 x float> %0, %1
20218  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20219  %4 = bitcast <32 x i1> %3 to i32
20220  ret i32 %4
20221}
20222
20223define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20224; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
20225; VLX:       # %bb.0: # %entry
20226; VLX-NEXT:    kmovd %edi, %k1
20227; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20228; VLX-NEXT:    kmovd %k0, %eax
20229; VLX-NEXT:    retq
20230;
20231; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
20232; NoVLX:       # %bb.0: # %entry
20233; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
20234; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20235; NoVLX-NEXT:    kmovw %edi, %k1
20236; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20237; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20238; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20239; NoVLX-NEXT:    kmovw %k0, %eax
20240; NoVLX-NEXT:    vzeroupper
20241; NoVLX-NEXT:    retq
20242entry:
20243  %0 = bitcast <2 x i64> %__a to <4 x float>
20244  %1 = bitcast <2 x i64> %__b to <4 x float>
20245  %2 = fcmp oeq <4 x float> %0, %1
20246  %3 = bitcast i4 %__u to <4 x i1>
20247  %4 = and <4 x i1> %2, %3
20248  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20249  %6 = bitcast <32 x i1> %5 to i32
20250  ret i32 %6
20251}
20252
20253define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20254; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
20255; VLX:       # %bb.0: # %entry
20256; VLX-NEXT:    kmovd %edi, %k1
20257; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20258; VLX-NEXT:    kmovd %k0, %eax
20259; VLX-NEXT:    retq
20260;
20261; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
20262; NoVLX:       # %bb.0: # %entry
20263; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20264; NoVLX-NEXT:    kmovw %edi, %k1
20265; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
20266; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20267; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20268; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20269; NoVLX-NEXT:    kmovw %k0, %eax
20270; NoVLX-NEXT:    vzeroupper
20271; NoVLX-NEXT:    retq
20272entry:
20273  %0 = bitcast <2 x i64> %__a to <4 x float>
20274  %load = load <2 x i64>, <2 x i64>* %__b
20275  %1 = bitcast <2 x i64> %load to <4 x float>
20276  %2 = fcmp oeq <4 x float> %0, %1
20277  %3 = bitcast i4 %__u to <4 x i1>
20278  %4 = and <4 x i1> %2, %3
20279  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20280  %6 = bitcast <32 x i1> %5 to i32
20281  ret i32 %6
20282}
20283
20284define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20285; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20286; VLX:       # %bb.0: # %entry
20287; VLX-NEXT:    kmovd %edi, %k1
20288; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20289; VLX-NEXT:    kmovd %k0, %eax
20290; VLX-NEXT:    retq
20291;
20292; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20293; NoVLX:       # %bb.0: # %entry
20294; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20295; NoVLX-NEXT:    kmovw %edi, %k1
20296; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm1
20297; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20298; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20299; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20300; NoVLX-NEXT:    kmovw %k0, %eax
20301; NoVLX-NEXT:    vzeroupper
20302; NoVLX-NEXT:    retq
20303entry:
20304  %0 = bitcast <2 x i64> %__a to <4 x float>
20305  %load = load float, float* %__b
20306  %vec = insertelement <4 x float> undef, float %load, i32 0
20307  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20308  %2 = fcmp oeq <4 x float> %0, %1
20309  %3 = bitcast i4 %__u to <4 x i1>
20310  %4 = and <4 x i1> %2, %3
20311  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20312  %6 = bitcast <32 x i1> %5 to i32
20313  ret i32 %6
20314}
20315
20316
20317
20318define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20319; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
20320; VLX:       # %bb.0: # %entry
20321; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
20322; VLX-NEXT:    kmovq %k0, %rax
20323; VLX-NEXT:    retq
20324;
20325; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
20326; NoVLX:       # %bb.0: # %entry
20327; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
20328; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20329; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20330; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20331; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20332; NoVLX-NEXT:    kmovw %k0, %eax
20333; NoVLX-NEXT:    movzwl %ax, %eax
20334; NoVLX-NEXT:    vzeroupper
20335; NoVLX-NEXT:    retq
20336entry:
20337  %0 = bitcast <2 x i64> %__a to <4 x float>
20338  %1 = bitcast <2 x i64> %__b to <4 x float>
20339  %2 = fcmp oeq <4 x float> %0, %1
20340  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20341  %4 = bitcast <64 x i1> %3 to i64
20342  ret i64 %4
20343}
20344
20345define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20346; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
20347; VLX:       # %bb.0: # %entry
20348; VLX-NEXT:    vcmpeqps (%rdi), %xmm0, %k0
20349; VLX-NEXT:    kmovq %k0, %rax
20350; VLX-NEXT:    retq
20351;
20352; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
20353; NoVLX:       # %bb.0: # %entry
20354; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20355; NoVLX-NEXT:    vmovaps (%rdi), %xmm1
20356; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20357; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20358; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20359; NoVLX-NEXT:    kmovw %k0, %eax
20360; NoVLX-NEXT:    movzwl %ax, %eax
20361; NoVLX-NEXT:    vzeroupper
20362; NoVLX-NEXT:    retq
20363entry:
20364  %0 = bitcast <2 x i64> %__a to <4 x float>
20365  %load = load <2 x i64>, <2 x i64>* %__b
20366  %1 = bitcast <2 x i64> %load to <4 x float>
20367  %2 = fcmp oeq <4 x float> %0, %1
20368  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20369  %4 = bitcast <64 x i1> %3 to i64
20370  ret i64 %4
20371}
20372
20373define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
20374; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20375; VLX:       # %bb.0: # %entry
20376; VLX-NEXT:    vcmpeqps (%rdi){1to4}, %xmm0, %k0
20377; VLX-NEXT:    kmovq %k0, %rax
20378; VLX-NEXT:    retq
20379;
20380; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20381; NoVLX:       # %bb.0: # %entry
20382; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20383; NoVLX-NEXT:    vbroadcastss (%rdi), %xmm1
20384; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20385; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20386; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20387; NoVLX-NEXT:    kmovw %k0, %eax
20388; NoVLX-NEXT:    movzwl %ax, %eax
20389; NoVLX-NEXT:    vzeroupper
20390; NoVLX-NEXT:    retq
20391entry:
20392  %0 = bitcast <2 x i64> %__a to <4 x float>
20393  %load = load float, float* %__b
20394  %vec = insertelement <4 x float> undef, float %load, i32 0
20395  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20396  %2 = fcmp oeq <4 x float> %0, %1
20397  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20398  %4 = bitcast <64 x i1> %3 to i64
20399  ret i64 %4
20400}
20401
20402define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20403; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20404; VLX:       # %bb.0: # %entry
20405; VLX-NEXT:    kmovd %edi, %k1
20406; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20407; VLX-NEXT:    kmovq %k0, %rax
20408; VLX-NEXT:    retq
20409;
20410; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20411; NoVLX:       # %bb.0: # %entry
20412; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
20413; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20414; NoVLX-NEXT:    kmovw %edi, %k1
20415; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20416; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20417; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20418; NoVLX-NEXT:    kmovw %k0, %eax
20419; NoVLX-NEXT:    movzwl %ax, %eax
20420; NoVLX-NEXT:    vzeroupper
20421; NoVLX-NEXT:    retq
20422entry:
20423  %0 = bitcast <2 x i64> %__a to <4 x float>
20424  %1 = bitcast <2 x i64> %__b to <4 x float>
20425  %2 = fcmp oeq <4 x float> %0, %1
20426  %3 = bitcast i4 %__u to <4 x i1>
20427  %4 = and <4 x i1> %2, %3
20428  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20429  %6 = bitcast <64 x i1> %5 to i64
20430  ret i64 %6
20431}
20432
20433define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20434; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20435; VLX:       # %bb.0: # %entry
20436; VLX-NEXT:    kmovd %edi, %k1
20437; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20438; VLX-NEXT:    kmovq %k0, %rax
20439; VLX-NEXT:    retq
20440;
20441; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20442; NoVLX:       # %bb.0: # %entry
20443; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20444; NoVLX-NEXT:    kmovw %edi, %k1
20445; NoVLX-NEXT:    vmovaps (%rsi), %xmm1
20446; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20447; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20448; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20449; NoVLX-NEXT:    kmovw %k0, %eax
20450; NoVLX-NEXT:    movzwl %ax, %eax
20451; NoVLX-NEXT:    vzeroupper
20452; NoVLX-NEXT:    retq
20453entry:
20454  %0 = bitcast <2 x i64> %__a to <4 x float>
20455  %load = load <2 x i64>, <2 x i64>* %__b
20456  %1 = bitcast <2 x i64> %load to <4 x float>
20457  %2 = fcmp oeq <4 x float> %0, %1
20458  %3 = bitcast i4 %__u to <4 x i1>
20459  %4 = and <4 x i1> %2, %3
20460  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20461  %6 = bitcast <64 x i1> %5 to i64
20462  ret i64 %6
20463}
20464
20465define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20466; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20467; VLX:       # %bb.0: # %entry
20468; VLX-NEXT:    kmovd %edi, %k1
20469; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20470; VLX-NEXT:    kmovq %k0, %rax
20471; VLX-NEXT:    retq
20472;
20473; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20474; NoVLX:       # %bb.0: # %entry
20475; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
20476; NoVLX-NEXT:    kmovw %edi, %k1
20477; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm1
20478; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20479; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
20480; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
20481; NoVLX-NEXT:    kmovw %k0, %eax
20482; NoVLX-NEXT:    movzwl %ax, %eax
20483; NoVLX-NEXT:    vzeroupper
20484; NoVLX-NEXT:    retq
20485entry:
20486  %0 = bitcast <2 x i64> %__a to <4 x float>
20487  %load = load float, float* %__b
20488  %vec = insertelement <4 x float> undef, float %load, i32 0
20489  %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20490  %2 = fcmp oeq <4 x float> %0, %1
20491  %3 = bitcast i4 %__u to <4 x i1>
20492  %4 = and <4 x i1> %2, %3
20493  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20494  %6 = bitcast <64 x i1> %5 to i64
20495  ret i64 %6
20496}
20497
20498
20499
20500define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20501; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20502; VLX:       # %bb.0: # %entry
20503; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
20504; VLX-NEXT:    kmovd %k0, %eax
20505; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20506; VLX-NEXT:    vzeroupper
20507; VLX-NEXT:    retq
20508;
20509; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20510; NoVLX:       # %bb.0: # %entry
20511; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20512; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20513; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20514; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20515; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20516; NoVLX-NEXT:    kmovw %k0, %eax
20517; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20518; NoVLX-NEXT:    vzeroupper
20519; NoVLX-NEXT:    retq
20520entry:
20521  %0 = bitcast <4 x i64> %__a to <8 x float>
20522  %1 = bitcast <4 x i64> %__b to <8 x float>
20523  %2 = fcmp oeq <8 x float> %0, %1
20524  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20525  %4 = bitcast <16 x i1> %3 to i16
20526  ret i16 %4
20527}
20528
20529define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20530; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20531; VLX:       # %bb.0: # %entry
20532; VLX-NEXT:    vcmpeqps (%rdi), %ymm0, %k0
20533; VLX-NEXT:    kmovd %k0, %eax
20534; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20535; VLX-NEXT:    vzeroupper
20536; VLX-NEXT:    retq
20537;
20538; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20539; NoVLX:       # %bb.0: # %entry
20540; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20541; NoVLX-NEXT:    vmovaps (%rdi), %ymm1
20542; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20543; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20544; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20545; NoVLX-NEXT:    kmovw %k0, %eax
20546; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20547; NoVLX-NEXT:    vzeroupper
20548; NoVLX-NEXT:    retq
20549entry:
20550  %0 = bitcast <4 x i64> %__a to <8 x float>
20551  %load = load <4 x i64>, <4 x i64>* %__b
20552  %1 = bitcast <4 x i64> %load to <8 x float>
20553  %2 = fcmp oeq <8 x float> %0, %1
20554  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20555  %4 = bitcast <16 x i1> %3 to i16
20556  ret i16 %4
20557}
20558
20559define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20560; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20561; VLX:       # %bb.0: # %entry
20562; VLX-NEXT:    vcmpeqps (%rdi){1to8}, %ymm0, %k0
20563; VLX-NEXT:    kmovd %k0, %eax
20564; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20565; VLX-NEXT:    vzeroupper
20566; VLX-NEXT:    retq
20567;
20568; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20569; NoVLX:       # %bb.0: # %entry
20570; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20571; NoVLX-NEXT:    vbroadcastss (%rdi), %ymm1
20572; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20573; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20574; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20575; NoVLX-NEXT:    kmovw %k0, %eax
20576; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20577; NoVLX-NEXT:    vzeroupper
20578; NoVLX-NEXT:    retq
20579entry:
20580  %0 = bitcast <4 x i64> %__a to <8 x float>
20581  %load = load float, float* %__b
20582  %vec = insertelement <8 x float> undef, float %load, i32 0
20583  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20584  %2 = fcmp oeq <8 x float> %0, %1
20585  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20586  %4 = bitcast <16 x i1> %3 to i16
20587  ret i16 %4
20588}
20589
20590define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20591; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20592; VLX:       # %bb.0: # %entry
20593; VLX-NEXT:    kmovd %edi, %k1
20594; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20595; VLX-NEXT:    kmovd %k0, %eax
20596; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20597; VLX-NEXT:    vzeroupper
20598; VLX-NEXT:    retq
20599;
20600; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20601; NoVLX:       # %bb.0: # %entry
20602; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20603; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20604; NoVLX-NEXT:    kmovw %edi, %k1
20605; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20606; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20607; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20608; NoVLX-NEXT:    kmovw %k0, %eax
20609; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20610; NoVLX-NEXT:    vzeroupper
20611; NoVLX-NEXT:    retq
20612entry:
20613  %0 = bitcast <4 x i64> %__a to <8 x float>
20614  %1 = bitcast <4 x i64> %__b to <8 x float>
20615  %2 = fcmp oeq <8 x float> %0, %1
20616  %3 = bitcast i8 %__u to <8 x i1>
20617  %4 = and <8 x i1> %2, %3
20618  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20619  %6 = bitcast <16 x i1> %5 to i16
20620  ret i16 %6
20621}
20622
20623define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20624; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20625; VLX:       # %bb.0: # %entry
20626; VLX-NEXT:    kmovd %edi, %k1
20627; VLX-NEXT:    vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20628; VLX-NEXT:    kmovd %k0, %eax
20629; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20630; VLX-NEXT:    vzeroupper
20631; VLX-NEXT:    retq
20632;
20633; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20634; NoVLX:       # %bb.0: # %entry
20635; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20636; NoVLX-NEXT:    vmovaps (%rsi), %ymm1
20637; NoVLX-NEXT:    kmovw %edi, %k1
20638; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20639; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20640; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20641; NoVLX-NEXT:    kmovw %k0, %eax
20642; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20643; NoVLX-NEXT:    vzeroupper
20644; NoVLX-NEXT:    retq
20645entry:
20646  %0 = bitcast <4 x i64> %__a to <8 x float>
20647  %load = load <4 x i64>, <4 x i64>* %__b
20648  %1 = bitcast <4 x i64> %load to <8 x float>
20649  %2 = fcmp oeq <8 x float> %0, %1
20650  %3 = bitcast i8 %__u to <8 x i1>
20651  %4 = and <8 x i1> %2, %3
20652  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20653  %6 = bitcast <16 x i1> %5 to i16
20654  ret i16 %6
20655}
20656
20657define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20658; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20659; VLX:       # %bb.0: # %entry
20660; VLX-NEXT:    kmovd %edi, %k1
20661; VLX-NEXT:    vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20662; VLX-NEXT:    kmovd %k0, %eax
20663; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
20664; VLX-NEXT:    vzeroupper
20665; VLX-NEXT:    retq
20666;
20667; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20668; NoVLX:       # %bb.0: # %entry
20669; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20670; NoVLX-NEXT:    vbroadcastss (%rsi), %ymm1
20671; NoVLX-NEXT:    kmovw %edi, %k1
20672; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20673; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20674; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20675; NoVLX-NEXT:    kmovw %k0, %eax
20676; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
20677; NoVLX-NEXT:    vzeroupper
20678; NoVLX-NEXT:    retq
20679entry:
20680  %0 = bitcast <4 x i64> %__a to <8 x float>
20681  %load = load float, float* %__b
20682  %vec = insertelement <8 x float> undef, float %load, i32 0
20683  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20684  %2 = fcmp oeq <8 x float> %0, %1
20685  %3 = bitcast i8 %__u to <8 x i1>
20686  %4 = and <8 x i1> %2, %3
20687  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20688  %6 = bitcast <16 x i1> %5 to i16
20689  ret i16 %6
20690}
20691
20692
20693
20694define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20695; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20696; VLX:       # %bb.0: # %entry
20697; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
20698; VLX-NEXT:    kmovd %k0, %eax
20699; VLX-NEXT:    vzeroupper
20700; VLX-NEXT:    retq
20701;
20702; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20703; NoVLX:       # %bb.0: # %entry
20704; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20705; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20706; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20707; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20708; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20709; NoVLX-NEXT:    kmovw %k0, %eax
20710; NoVLX-NEXT:    vzeroupper
20711; NoVLX-NEXT:    retq
20712entry:
20713  %0 = bitcast <4 x i64> %__a to <8 x float>
20714  %1 = bitcast <4 x i64> %__b to <8 x float>
20715  %2 = fcmp oeq <8 x float> %0, %1
20716  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20717  %4 = bitcast <32 x i1> %3 to i32
20718  ret i32 %4
20719}
20720
20721define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20722; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20723; VLX:       # %bb.0: # %entry
20724; VLX-NEXT:    vcmpeqps (%rdi), %ymm0, %k0
20725; VLX-NEXT:    kmovd %k0, %eax
20726; VLX-NEXT:    vzeroupper
20727; VLX-NEXT:    retq
20728;
20729; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20730; NoVLX:       # %bb.0: # %entry
20731; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20732; NoVLX-NEXT:    vmovaps (%rdi), %ymm1
20733; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20734; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20735; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20736; NoVLX-NEXT:    kmovw %k0, %eax
20737; NoVLX-NEXT:    vzeroupper
20738; NoVLX-NEXT:    retq
20739entry:
20740  %0 = bitcast <4 x i64> %__a to <8 x float>
20741  %load = load <4 x i64>, <4 x i64>* %__b
20742  %1 = bitcast <4 x i64> %load to <8 x float>
20743  %2 = fcmp oeq <8 x float> %0, %1
20744  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20745  %4 = bitcast <32 x i1> %3 to i32
20746  ret i32 %4
20747}
20748
20749define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20750; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20751; VLX:       # %bb.0: # %entry
20752; VLX-NEXT:    vcmpeqps (%rdi){1to8}, %ymm0, %k0
20753; VLX-NEXT:    kmovd %k0, %eax
20754; VLX-NEXT:    vzeroupper
20755; VLX-NEXT:    retq
20756;
20757; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20758; NoVLX:       # %bb.0: # %entry
20759; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20760; NoVLX-NEXT:    vbroadcastss (%rdi), %ymm1
20761; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20762; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20763; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20764; NoVLX-NEXT:    kmovw %k0, %eax
20765; NoVLX-NEXT:    vzeroupper
20766; NoVLX-NEXT:    retq
20767entry:
20768  %0 = bitcast <4 x i64> %__a to <8 x float>
20769  %load = load float, float* %__b
20770  %vec = insertelement <8 x float> undef, float %load, i32 0
20771  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20772  %2 = fcmp oeq <8 x float> %0, %1
20773  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20774  %4 = bitcast <32 x i1> %3 to i32
20775  ret i32 %4
20776}
20777
20778define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20779; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20780; VLX:       # %bb.0: # %entry
20781; VLX-NEXT:    kmovd %edi, %k1
20782; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20783; VLX-NEXT:    kmovd %k0, %eax
20784; VLX-NEXT:    vzeroupper
20785; VLX-NEXT:    retq
20786;
20787; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20788; NoVLX:       # %bb.0: # %entry
20789; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20790; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20791; NoVLX-NEXT:    kmovw %edi, %k1
20792; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20793; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20794; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20795; NoVLX-NEXT:    kmovw %k0, %eax
20796; NoVLX-NEXT:    vzeroupper
20797; NoVLX-NEXT:    retq
20798entry:
20799  %0 = bitcast <4 x i64> %__a to <8 x float>
20800  %1 = bitcast <4 x i64> %__b to <8 x float>
20801  %2 = fcmp oeq <8 x float> %0, %1
20802  %3 = bitcast i8 %__u to <8 x i1>
20803  %4 = and <8 x i1> %2, %3
20804  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20805  %6 = bitcast <32 x i1> %5 to i32
20806  ret i32 %6
20807}
20808
20809define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20810; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20811; VLX:       # %bb.0: # %entry
20812; VLX-NEXT:    kmovd %edi, %k1
20813; VLX-NEXT:    vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20814; VLX-NEXT:    kmovd %k0, %eax
20815; VLX-NEXT:    vzeroupper
20816; VLX-NEXT:    retq
20817;
20818; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20819; NoVLX:       # %bb.0: # %entry
20820; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20821; NoVLX-NEXT:    vmovaps (%rsi), %ymm1
20822; NoVLX-NEXT:    kmovw %edi, %k1
20823; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20824; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20825; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20826; NoVLX-NEXT:    kmovw %k0, %eax
20827; NoVLX-NEXT:    vzeroupper
20828; NoVLX-NEXT:    retq
20829entry:
20830  %0 = bitcast <4 x i64> %__a to <8 x float>
20831  %load = load <4 x i64>, <4 x i64>* %__b
20832  %1 = bitcast <4 x i64> %load to <8 x float>
20833  %2 = fcmp oeq <8 x float> %0, %1
20834  %3 = bitcast i8 %__u to <8 x i1>
20835  %4 = and <8 x i1> %2, %3
20836  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20837  %6 = bitcast <32 x i1> %5 to i32
20838  ret i32 %6
20839}
20840
20841define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20842; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20843; VLX:       # %bb.0: # %entry
20844; VLX-NEXT:    kmovd %edi, %k1
20845; VLX-NEXT:    vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20846; VLX-NEXT:    kmovd %k0, %eax
20847; VLX-NEXT:    vzeroupper
20848; VLX-NEXT:    retq
20849;
20850; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20851; NoVLX:       # %bb.0: # %entry
20852; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20853; NoVLX-NEXT:    vbroadcastss (%rsi), %ymm1
20854; NoVLX-NEXT:    kmovw %edi, %k1
20855; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20856; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20857; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20858; NoVLX-NEXT:    kmovw %k0, %eax
20859; NoVLX-NEXT:    vzeroupper
20860; NoVLX-NEXT:    retq
20861entry:
20862  %0 = bitcast <4 x i64> %__a to <8 x float>
20863  %load = load float, float* %__b
20864  %vec = insertelement <8 x float> undef, float %load, i32 0
20865  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20866  %2 = fcmp oeq <8 x float> %0, %1
20867  %3 = bitcast i8 %__u to <8 x i1>
20868  %4 = and <8 x i1> %2, %3
20869  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20870  %6 = bitcast <32 x i1> %5 to i32
20871  ret i32 %6
20872}
20873
20874
20875
20876define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20877; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20878; VLX:       # %bb.0: # %entry
20879; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0
20880; VLX-NEXT:    kmovq %k0, %rax
20881; VLX-NEXT:    vzeroupper
20882; VLX-NEXT:    retq
20883;
20884; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20885; NoVLX:       # %bb.0: # %entry
20886; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20887; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20888; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20889; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20890; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20891; NoVLX-NEXT:    kmovw %k0, %eax
20892; NoVLX-NEXT:    movzwl %ax, %eax
20893; NoVLX-NEXT:    vzeroupper
20894; NoVLX-NEXT:    retq
20895entry:
20896  %0 = bitcast <4 x i64> %__a to <8 x float>
20897  %1 = bitcast <4 x i64> %__b to <8 x float>
20898  %2 = fcmp oeq <8 x float> %0, %1
20899  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20900  %4 = bitcast <64 x i1> %3 to i64
20901  ret i64 %4
20902}
20903
20904define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20905; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20906; VLX:       # %bb.0: # %entry
20907; VLX-NEXT:    vcmpeqps (%rdi), %ymm0, %k0
20908; VLX-NEXT:    kmovq %k0, %rax
20909; VLX-NEXT:    vzeroupper
20910; VLX-NEXT:    retq
20911;
20912; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20913; NoVLX:       # %bb.0: # %entry
20914; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20915; NoVLX-NEXT:    vmovaps (%rdi), %ymm1
20916; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20917; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20918; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20919; NoVLX-NEXT:    kmovw %k0, %eax
20920; NoVLX-NEXT:    movzwl %ax, %eax
20921; NoVLX-NEXT:    vzeroupper
20922; NoVLX-NEXT:    retq
20923entry:
20924  %0 = bitcast <4 x i64> %__a to <8 x float>
20925  %load = load <4 x i64>, <4 x i64>* %__b
20926  %1 = bitcast <4 x i64> %load to <8 x float>
20927  %2 = fcmp oeq <8 x float> %0, %1
20928  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20929  %4 = bitcast <64 x i1> %3 to i64
20930  ret i64 %4
20931}
20932
20933define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20934; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20935; VLX:       # %bb.0: # %entry
20936; VLX-NEXT:    vcmpeqps (%rdi){1to8}, %ymm0, %k0
20937; VLX-NEXT:    kmovq %k0, %rax
20938; VLX-NEXT:    vzeroupper
20939; VLX-NEXT:    retq
20940;
20941; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20942; NoVLX:       # %bb.0: # %entry
20943; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20944; NoVLX-NEXT:    vbroadcastss (%rdi), %ymm1
20945; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
20946; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20947; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20948; NoVLX-NEXT:    kmovw %k0, %eax
20949; NoVLX-NEXT:    movzwl %ax, %eax
20950; NoVLX-NEXT:    vzeroupper
20951; NoVLX-NEXT:    retq
20952entry:
20953  %0 = bitcast <4 x i64> %__a to <8 x float>
20954  %load = load float, float* %__b
20955  %vec = insertelement <8 x float> undef, float %load, i32 0
20956  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20957  %2 = fcmp oeq <8 x float> %0, %1
20958  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20959  %4 = bitcast <64 x i1> %3 to i64
20960  ret i64 %4
20961}
20962
20963define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20964; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20965; VLX:       # %bb.0: # %entry
20966; VLX-NEXT:    kmovd %edi, %k1
20967; VLX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20968; VLX-NEXT:    kmovq %k0, %rax
20969; VLX-NEXT:    vzeroupper
20970; VLX-NEXT:    retq
20971;
20972; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20973; NoVLX:       # %bb.0: # %entry
20974; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
20975; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
20976; NoVLX-NEXT:    kmovw %edi, %k1
20977; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20978; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
20979; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
20980; NoVLX-NEXT:    kmovw %k0, %eax
20981; NoVLX-NEXT:    movzwl %ax, %eax
20982; NoVLX-NEXT:    vzeroupper
20983; NoVLX-NEXT:    retq
20984entry:
20985  %0 = bitcast <4 x i64> %__a to <8 x float>
20986  %1 = bitcast <4 x i64> %__b to <8 x float>
20987  %2 = fcmp oeq <8 x float> %0, %1
20988  %3 = bitcast i8 %__u to <8 x i1>
20989  %4 = and <8 x i1> %2, %3
20990  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20991  %6 = bitcast <64 x i1> %5 to i64
20992  ret i64 %6
20993}
20994
20995define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20996; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20997; VLX:       # %bb.0: # %entry
20998; VLX-NEXT:    kmovd %edi, %k1
20999; VLX-NEXT:    vcmpeqps (%rsi), %ymm0, %k0 {%k1}
21000; VLX-NEXT:    kmovq %k0, %rax
21001; VLX-NEXT:    vzeroupper
21002; VLX-NEXT:    retq
21003;
21004; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
21005; NoVLX:       # %bb.0: # %entry
21006; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
21007; NoVLX-NEXT:    vmovaps (%rsi), %ymm1
21008; NoVLX-NEXT:    kmovw %edi, %k1
21009; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21010; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
21011; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
21012; NoVLX-NEXT:    kmovw %k0, %eax
21013; NoVLX-NEXT:    movzwl %ax, %eax
21014; NoVLX-NEXT:    vzeroupper
21015; NoVLX-NEXT:    retq
21016entry:
21017  %0 = bitcast <4 x i64> %__a to <8 x float>
21018  %load = load <4 x i64>, <4 x i64>* %__b
21019  %1 = bitcast <4 x i64> %load to <8 x float>
21020  %2 = fcmp oeq <8 x float> %0, %1
21021  %3 = bitcast i8 %__u to <8 x i1>
21022  %4 = and <8 x i1> %2, %3
21023  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
21024  %6 = bitcast <64 x i1> %5 to i64
21025  ret i64 %6
21026}
21027
21028define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
21029; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
21030; VLX:       # %bb.0: # %entry
21031; VLX-NEXT:    kmovd %edi, %k1
21032; VLX-NEXT:    vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
21033; VLX-NEXT:    kmovq %k0, %rax
21034; VLX-NEXT:    vzeroupper
21035; VLX-NEXT:    retq
21036;
21037; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
21038; NoVLX:       # %bb.0: # %entry
21039; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
21040; NoVLX-NEXT:    vbroadcastss (%rsi), %ymm1
21041; NoVLX-NEXT:    kmovw %edi, %k1
21042; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21043; NoVLX-NEXT:    kshiftlw $8, %k0, %k0
21044; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
21045; NoVLX-NEXT:    kmovw %k0, %eax
21046; NoVLX-NEXT:    movzwl %ax, %eax
21047; NoVLX-NEXT:    vzeroupper
21048; NoVLX-NEXT:    retq
21049entry:
21050  %0 = bitcast <4 x i64> %__a to <8 x float>
21051  %load = load float, float* %__b
21052  %vec = insertelement <8 x float> undef, float %load, i32 0
21053  %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21054  %2 = fcmp oeq <8 x float> %0, %1
21055  %3 = bitcast i8 %__u to <8 x i1>
21056  %4 = and <8 x i1> %2, %3
21057  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
21058  %6 = bitcast <64 x i1> %5 to i64
21059  ret i64 %6
21060}
21061
21062
21063
21064define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21065; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
21066; VLX:       # %bb.0: # %entry
21067; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
21068; VLX-NEXT:    kmovd %k0, %eax
21069; VLX-NEXT:    vzeroupper
21070; VLX-NEXT:    retq
21071;
21072; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
21073; NoVLX:       # %bb.0: # %entry
21074; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
21075; NoVLX-NEXT:    kmovw %k0, %eax
21076; NoVLX-NEXT:    vzeroupper
21077; NoVLX-NEXT:    retq
21078entry:
21079  %0 = bitcast <8 x i64> %__a to <16 x float>
21080  %1 = bitcast <8 x i64> %__b to <16 x float>
21081  %2 = fcmp oeq <16 x float> %0, %1
21082  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21083  %4 = bitcast <32 x i1> %3 to i32
21084  ret i32 %4
21085}
21086
21087define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21088; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
21089; VLX:       # %bb.0: # %entry
21090; VLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
21091; VLX-NEXT:    kmovd %k0, %eax
21092; VLX-NEXT:    vzeroupper
21093; VLX-NEXT:    retq
21094;
21095; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
21096; NoVLX:       # %bb.0: # %entry
21097; NoVLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
21098; NoVLX-NEXT:    kmovw %k0, %eax
21099; NoVLX-NEXT:    vzeroupper
21100; NoVLX-NEXT:    retq
21101entry:
21102  %0 = bitcast <8 x i64> %__a to <16 x float>
21103  %load = load <8 x i64>, <8 x i64>* %__b
21104  %1 = bitcast <8 x i64> %load to <16 x float>
21105  %2 = fcmp oeq <16 x float> %0, %1
21106  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21107  %4 = bitcast <32 x i1> %3 to i32
21108  ret i32 %4
21109}
21110
21111define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
21112; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21113; VLX:       # %bb.0: # %entry
21114; VLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
21115; VLX-NEXT:    kmovd %k0, %eax
21116; VLX-NEXT:    vzeroupper
21117; VLX-NEXT:    retq
21118;
21119; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21120; NoVLX:       # %bb.0: # %entry
21121; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
21122; NoVLX-NEXT:    kmovw %k0, %eax
21123; NoVLX-NEXT:    vzeroupper
21124; NoVLX-NEXT:    retq
21125entry:
21126  %0 = bitcast <8 x i64> %__a to <16 x float>
21127  %load = load float, float* %__b
21128  %vec = insertelement <16 x float> undef, float %load, i32 0
21129  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21130  %2 = fcmp oeq <16 x float> %0, %1
21131  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21132  %4 = bitcast <32 x i1> %3 to i32
21133  ret i32 %4
21134}
21135
21136define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21137; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
21138; VLX:       # %bb.0: # %entry
21139; VLX-NEXT:    kmovd %edi, %k1
21140; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21141; VLX-NEXT:    kmovd %k0, %eax
21142; VLX-NEXT:    vzeroupper
21143; VLX-NEXT:    retq
21144;
21145; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
21146; NoVLX:       # %bb.0: # %entry
21147; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
21148; NoVLX-NEXT:    kmovw %k0, %eax
21149; NoVLX-NEXT:    andl %edi, %eax
21150; NoVLX-NEXT:    vzeroupper
21151; NoVLX-NEXT:    retq
21152entry:
21153  %0 = bitcast <8 x i64> %__a to <16 x float>
21154  %1 = bitcast <8 x i64> %__b to <16 x float>
21155  %2 = fcmp oeq <16 x float> %0, %1
21156  %3 = bitcast i16 %__u to <16 x i1>
21157  %4 = and <16 x i1> %2, %3
21158  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21159  %6 = bitcast <32 x i1> %5 to i32
21160  ret i32 %6
21161}
21162
21163define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21164; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
21165; VLX:       # %bb.0: # %entry
21166; VLX-NEXT:    kmovd %edi, %k1
21167; VLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0 {%k1}
21168; VLX-NEXT:    kmovd %k0, %eax
21169; VLX-NEXT:    vzeroupper
21170; VLX-NEXT:    retq
21171;
21172; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
21173; NoVLX:       # %bb.0: # %entry
21174; NoVLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0
21175; NoVLX-NEXT:    kmovw %k0, %eax
21176; NoVLX-NEXT:    andl %edi, %eax
21177; NoVLX-NEXT:    vzeroupper
21178; NoVLX-NEXT:    retq
21179entry:
21180  %0 = bitcast <8 x i64> %__a to <16 x float>
21181  %load = load <8 x i64>, <8 x i64>* %__b
21182  %1 = bitcast <8 x i64> %load to <16 x float>
21183  %2 = fcmp oeq <16 x float> %0, %1
21184  %3 = bitcast i16 %__u to <16 x i1>
21185  %4 = and <16 x i1> %2, %3
21186  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21187  %6 = bitcast <32 x i1> %5 to i32
21188  ret i32 %6
21189}
21190
21191define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
21192; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21193; VLX:       # %bb.0: # %entry
21194; VLX-NEXT:    kmovd %edi, %k1
21195; VLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
21196; VLX-NEXT:    kmovd %k0, %eax
21197; VLX-NEXT:    vzeroupper
21198; VLX-NEXT:    retq
21199;
21200; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21201; NoVLX:       # %bb.0: # %entry
21202; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0
21203; NoVLX-NEXT:    kmovw %k0, %eax
21204; NoVLX-NEXT:    andl %edi, %eax
21205; NoVLX-NEXT:    vzeroupper
21206; NoVLX-NEXT:    retq
21207entry:
21208  %0 = bitcast <8 x i64> %__a to <16 x float>
21209  %load = load float, float* %__b
21210  %vec = insertelement <16 x float> undef, float %load, i32 0
21211  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21212  %2 = fcmp oeq <16 x float> %0, %1
21213  %3 = bitcast i16 %__u to <16 x i1>
21214  %4 = and <16 x i1> %2, %3
21215  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21216  %6 = bitcast <32 x i1> %5 to i32
21217  ret i32 %6
21218}
21219
21220
21221
21222define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21223; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
21224; CHECK:       # %bb.0: # %entry
21225; CHECK-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21226; CHECK-NEXT:    kmovw %k0, %eax
21227; CHECK-NEXT:    vzeroupper
21228; CHECK-NEXT:    retq
21229entry:
21230  %0 = bitcast <8 x i64> %__a to <16 x float>
21231  %1 = bitcast <8 x i64> %__b to <16 x float>
21232  %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21233  %3 = bitcast <16 x i1> %2 to i16
21234  %4 = zext i16 %3 to i32
21235  ret i32 %4
21236}
21237
21238define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21239; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
21240; VLX:       # %bb.0: # %entry
21241; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21242; VLX-NEXT:    kmovd %k0, %eax
21243; VLX-NEXT:    andl %edi, %eax
21244; VLX-NEXT:    vzeroupper
21245; VLX-NEXT:    retq
21246;
21247; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
21248; NoVLX:       # %bb.0: # %entry
21249; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21250; NoVLX-NEXT:    kmovw %k0, %eax
21251; NoVLX-NEXT:    andl %edi, %eax
21252; NoVLX-NEXT:    vzeroupper
21253; NoVLX-NEXT:    retq
21254entry:
21255  %0 = bitcast <8 x i64> %__a to <16 x float>
21256  %1 = bitcast <8 x i64> %__b to <16 x float>
21257  %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21258  %3 = bitcast i16 %__u to <16 x i1>
21259  %4 = and <16 x i1> %2, %3
21260  %5 = bitcast <16 x i1> %4 to i16
21261  %6 = zext i16 %5 to i32
21262  ret i32 %6
21263}
21264
21265
21266
21267define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21268; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
21269; VLX:       # %bb.0: # %entry
21270; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
21271; VLX-NEXT:    kmovq %k0, %rax
21272; VLX-NEXT:    vzeroupper
21273; VLX-NEXT:    retq
21274;
21275; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
21276; NoVLX:       # %bb.0: # %entry
21277; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
21278; NoVLX-NEXT:    kmovw %k0, %eax
21279; NoVLX-NEXT:    movzwl %ax, %eax
21280; NoVLX-NEXT:    vzeroupper
21281; NoVLX-NEXT:    retq
21282entry:
21283  %0 = bitcast <8 x i64> %__a to <16 x float>
21284  %1 = bitcast <8 x i64> %__b to <16 x float>
21285  %2 = fcmp oeq <16 x float> %0, %1
21286  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21287  %4 = bitcast <64 x i1> %3 to i64
21288  ret i64 %4
21289}
21290
21291define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21292; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
21293; VLX:       # %bb.0: # %entry
21294; VLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
21295; VLX-NEXT:    kmovq %k0, %rax
21296; VLX-NEXT:    vzeroupper
21297; VLX-NEXT:    retq
21298;
21299; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
21300; NoVLX:       # %bb.0: # %entry
21301; NoVLX-NEXT:    vcmpeqps (%rdi), %zmm0, %k0
21302; NoVLX-NEXT:    kmovw %k0, %eax
21303; NoVLX-NEXT:    movzwl %ax, %eax
21304; NoVLX-NEXT:    vzeroupper
21305; NoVLX-NEXT:    retq
21306entry:
21307  %0 = bitcast <8 x i64> %__a to <16 x float>
21308  %load = load <8 x i64>, <8 x i64>* %__b
21309  %1 = bitcast <8 x i64> %load to <16 x float>
21310  %2 = fcmp oeq <16 x float> %0, %1
21311  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21312  %4 = bitcast <64 x i1> %3 to i64
21313  ret i64 %4
21314}
21315
21316define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
21317; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21318; VLX:       # %bb.0: # %entry
21319; VLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
21320; VLX-NEXT:    kmovq %k0, %rax
21321; VLX-NEXT:    vzeroupper
21322; VLX-NEXT:    retq
21323;
21324; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21325; NoVLX:       # %bb.0: # %entry
21326; NoVLX-NEXT:    vcmpeqps (%rdi){1to16}, %zmm0, %k0
21327; NoVLX-NEXT:    kmovw %k0, %eax
21328; NoVLX-NEXT:    movzwl %ax, %eax
21329; NoVLX-NEXT:    vzeroupper
21330; NoVLX-NEXT:    retq
21331entry:
21332  %0 = bitcast <8 x i64> %__a to <16 x float>
21333  %load = load float, float* %__b
21334  %vec = insertelement <16 x float> undef, float %load, i32 0
21335  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21336  %2 = fcmp oeq <16 x float> %0, %1
21337  %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21338  %4 = bitcast <64 x i1> %3 to i64
21339  ret i64 %4
21340}
21341
21342define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21343; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
21344; VLX:       # %bb.0: # %entry
21345; VLX-NEXT:    kmovd %edi, %k1
21346; VLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21347; VLX-NEXT:    kmovq %k0, %rax
21348; VLX-NEXT:    vzeroupper
21349; VLX-NEXT:    retq
21350;
21351; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
21352; NoVLX:       # %bb.0: # %entry
21353; NoVLX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
21354; NoVLX-NEXT:    kmovw %k0, %eax
21355; NoVLX-NEXT:    andl %edi, %eax
21356; NoVLX-NEXT:    vzeroupper
21357; NoVLX-NEXT:    retq
21358entry:
21359  %0 = bitcast <8 x i64> %__a to <16 x float>
21360  %1 = bitcast <8 x i64> %__b to <16 x float>
21361  %2 = fcmp oeq <16 x float> %0, %1
21362  %3 = bitcast i16 %__u to <16 x i1>
21363  %4 = and <16 x i1> %2, %3
21364  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21365  %6 = bitcast <64 x i1> %5 to i64
21366  ret i64 %6
21367}
21368
21369define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21370; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
21371; VLX:       # %bb.0: # %entry
21372; VLX-NEXT:    kmovd %edi, %k1
21373; VLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0 {%k1}
21374; VLX-NEXT:    kmovq %k0, %rax
21375; VLX-NEXT:    vzeroupper
21376; VLX-NEXT:    retq
21377;
21378; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
21379; NoVLX:       # %bb.0: # %entry
21380; NoVLX-NEXT:    vcmpeqps (%rsi), %zmm0, %k0
21381; NoVLX-NEXT:    kmovw %k0, %eax
21382; NoVLX-NEXT:    andl %edi, %eax
21383; NoVLX-NEXT:    vzeroupper
21384; NoVLX-NEXT:    retq
21385entry:
21386  %0 = bitcast <8 x i64> %__a to <16 x float>
21387  %load = load <8 x i64>, <8 x i64>* %__b
21388  %1 = bitcast <8 x i64> %load to <16 x float>
21389  %2 = fcmp oeq <16 x float> %0, %1
21390  %3 = bitcast i16 %__u to <16 x i1>
21391  %4 = and <16 x i1> %2, %3
21392  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21393  %6 = bitcast <64 x i1> %5 to i64
21394  ret i64 %6
21395}
21396
21397define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
21398; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21399; VLX:       # %bb.0: # %entry
21400; VLX-NEXT:    kmovd %edi, %k1
21401; VLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
21402; VLX-NEXT:    kmovq %k0, %rax
21403; VLX-NEXT:    vzeroupper
21404; VLX-NEXT:    retq
21405;
21406; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21407; NoVLX:       # %bb.0: # %entry
21408; NoVLX-NEXT:    vcmpeqps (%rsi){1to16}, %zmm0, %k0
21409; NoVLX-NEXT:    kmovw %k0, %eax
21410; NoVLX-NEXT:    andl %edi, %eax
21411; NoVLX-NEXT:    vzeroupper
21412; NoVLX-NEXT:    retq
21413entry:
21414  %0 = bitcast <8 x i64> %__a to <16 x float>
21415  %load = load float, float* %__b
21416  %vec = insertelement <16 x float> undef, float %load, i32 0
21417  %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21418  %2 = fcmp oeq <16 x float> %0, %1
21419  %3 = bitcast i16 %__u to <16 x i1>
21420  %4 = and <16 x i1> %2, %3
21421  %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21422  %6 = bitcast <64 x i1> %5 to i64
21423  ret i64 %6
21424}
21425
21426
21427
21428define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21429; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21430; VLX:       # %bb.0: # %entry
21431; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21432; VLX-NEXT:    kmovd %k0, %eax
21433; VLX-NEXT:    movzwl %ax, %eax
21434; VLX-NEXT:    vzeroupper
21435; VLX-NEXT:    retq
21436;
21437; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21438; NoVLX:       # %bb.0: # %entry
21439; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21440; NoVLX-NEXT:    kmovw %k0, %eax
21441; NoVLX-NEXT:    movzwl %ax, %eax
21442; NoVLX-NEXT:    vzeroupper
21443; NoVLX-NEXT:    retq
21444entry:
21445  %0 = bitcast <8 x i64> %__a to <16 x float>
21446  %1 = bitcast <8 x i64> %__b to <16 x float>
21447  %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21448  %3 = bitcast <16 x i1> %2 to i16
21449  %4 = zext i16 %3 to i64
21450  ret i64 %4
21451}
21452
21453define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21454; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21455; VLX:       # %bb.0: # %entry
21456; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21457; VLX-NEXT:    kmovd %k0, %eax
21458; VLX-NEXT:    andl %edi, %eax
21459; VLX-NEXT:    vzeroupper
21460; VLX-NEXT:    retq
21461;
21462; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21463; NoVLX:       # %bb.0: # %entry
21464; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
21465; NoVLX-NEXT:    kmovw %k0, %eax
21466; NoVLX-NEXT:    andl %edi, %eax
21467; NoVLX-NEXT:    vzeroupper
21468; NoVLX-NEXT:    retq
21469entry:
21470  %0 = bitcast <8 x i64> %__a to <16 x float>
21471  %1 = bitcast <8 x i64> %__b to <16 x float>
21472  %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21473  %3 = bitcast i16 %__u to <16 x i1>
21474  %4 = and <16 x i1> %2, %3
21475  %5 = bitcast <16 x i1> %4 to i16
21476  %6 = zext i16 %5 to i64
21477  ret i64 %6
21478}
21479
21480
21481
21482declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
21483define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21484; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21485; VLX:       # %bb.0: # %entry
21486; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21487; VLX-NEXT:    kmovb %k0, %eax
21488; VLX-NEXT:    retq
21489;
21490; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21491; NoVLX:       # %bb.0: # %entry
21492; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21493; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21494; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21495; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21496; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21497; NoVLX-NEXT:    kmovw %k0, %eax
21498; NoVLX-NEXT:    andl $3, %eax
21499; NoVLX-NEXT:    vzeroupper
21500; NoVLX-NEXT:    retq
21501entry:
21502  %0 = bitcast <2 x i64> %__a to <2 x double>
21503  %1 = bitcast <2 x i64> %__b to <2 x double>
21504  %2 = fcmp oeq <2 x double> %0, %1
21505  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21506  %4 = bitcast <4 x i1> %3 to i4
21507  ret i4 %4
21508}
21509
21510define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21511; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21512; VLX:       # %bb.0: # %entry
21513; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21514; VLX-NEXT:    kmovb %k0, %eax
21515; VLX-NEXT:    retq
21516;
21517; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21518; NoVLX:       # %bb.0: # %entry
21519; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21520; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21521; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21522; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21523; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21524; NoVLX-NEXT:    kmovw %k0, %eax
21525; NoVLX-NEXT:    andl $3, %eax
21526; NoVLX-NEXT:    vzeroupper
21527; NoVLX-NEXT:    retq
21528entry:
21529  %0 = bitcast <2 x i64> %__a to <2 x double>
21530  %load = load <2 x i64>, <2 x i64>* %__b
21531  %1 = bitcast <2 x i64> %load to <2 x double>
21532  %2 = fcmp oeq <2 x double> %0, %1
21533  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21534  %4 = bitcast <4 x i1> %3 to i4
21535  ret i4 %4
21536}
21537
21538define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21539; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21540; VLX:       # %bb.0: # %entry
21541; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21542; VLX-NEXT:    kmovb %k0, %eax
21543; VLX-NEXT:    retq
21544;
21545; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21546; NoVLX:       # %bb.0: # %entry
21547; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21548; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
21549; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21550; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21551; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21552; NoVLX-NEXT:    kmovw %k0, %eax
21553; NoVLX-NEXT:    andl $3, %eax
21554; NoVLX-NEXT:    vzeroupper
21555; NoVLX-NEXT:    retq
21556entry:
21557  %0 = bitcast <2 x i64> %__a to <2 x double>
21558  %load = load double, double* %__b
21559  %vec = insertelement <2 x double> undef, double %load, i32 0
21560  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21561  %2 = fcmp oeq <2 x double> %0, %1
21562  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21563  %4 = bitcast <4 x i1> %3 to i4
21564  ret i4 %4
21565}
21566
21567define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21568; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21569; VLX:       # %bb.0: # %entry
21570; VLX-NEXT:    kmovd %edi, %k1
21571; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21572; VLX-NEXT:    kmovb %k0, %eax
21573; VLX-NEXT:    retq
21574;
21575; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21576; NoVLX:       # %bb.0: # %entry
21577; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21578; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21579; NoVLX-NEXT:    kmovw %edi, %k1
21580; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21581; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21582; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21583; NoVLX-NEXT:    kmovw %k0, %eax
21584; NoVLX-NEXT:    andl $3, %eax
21585; NoVLX-NEXT:    vzeroupper
21586; NoVLX-NEXT:    retq
21587entry:
21588  %0 = bitcast <2 x i64> %__a to <2 x double>
21589  %1 = bitcast <2 x i64> %__b to <2 x double>
21590  %2 = fcmp oeq <2 x double> %0, %1
21591  %3 = bitcast i2 %__u to <2 x i1>
21592  %4 = and <2 x i1> %2, %3
21593  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21594  %6 = bitcast <4 x i1> %5 to i4
21595  ret i4 %6
21596}
21597
21598define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21599; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21600; VLX:       # %bb.0: # %entry
21601; VLX-NEXT:    kmovd %edi, %k1
21602; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21603; VLX-NEXT:    kmovb %k0, %eax
21604; VLX-NEXT:    retq
21605;
21606; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21607; NoVLX:       # %bb.0: # %entry
21608; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21609; NoVLX-NEXT:    kmovw %edi, %k1
21610; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21611; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21612; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21613; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21614; NoVLX-NEXT:    kmovw %k0, %eax
21615; NoVLX-NEXT:    andl $3, %eax
21616; NoVLX-NEXT:    vzeroupper
21617; NoVLX-NEXT:    retq
21618entry:
21619  %0 = bitcast <2 x i64> %__a to <2 x double>
21620  %load = load <2 x i64>, <2 x i64>* %__b
21621  %1 = bitcast <2 x i64> %load to <2 x double>
21622  %2 = fcmp oeq <2 x double> %0, %1
21623  %3 = bitcast i2 %__u to <2 x i1>
21624  %4 = and <2 x i1> %2, %3
21625  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21626  %6 = bitcast <4 x i1> %5 to i4
21627  ret i4 %6
21628}
21629
21630define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21631; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21632; VLX:       # %bb.0: # %entry
21633; VLX-NEXT:    kmovd %edi, %k1
21634; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21635; VLX-NEXT:    kmovb %k0, %eax
21636; VLX-NEXT:    retq
21637;
21638; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21639; NoVLX:       # %bb.0: # %entry
21640; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21641; NoVLX-NEXT:    kmovw %edi, %k1
21642; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
21643; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21644; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21645; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21646; NoVLX-NEXT:    kmovw %k0, %eax
21647; NoVLX-NEXT:    andl $3, %eax
21648; NoVLX-NEXT:    vzeroupper
21649; NoVLX-NEXT:    retq
21650entry:
21651  %0 = bitcast <2 x i64> %__a to <2 x double>
21652  %load = load double, double* %__b
21653  %vec = insertelement <2 x double> undef, double %load, i32 0
21654  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21655  %2 = fcmp oeq <2 x double> %0, %1
21656  %3 = bitcast i2 %__u to <2 x i1>
21657  %4 = and <2 x i1> %2, %3
21658  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21659  %6 = bitcast <4 x i1> %5 to i4
21660  ret i4 %6
21661}
21662
21663
21664
21665define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21666; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21667; VLX:       # %bb.0: # %entry
21668; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21669; VLX-NEXT:    kmovd %k0, %eax
21670; VLX-NEXT:    # kill: def $al killed $al killed $eax
21671; VLX-NEXT:    retq
21672;
21673; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21674; NoVLX:       # %bb.0: # %entry
21675; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21676; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21677; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21678; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21679; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21680; NoVLX-NEXT:    kmovw %k0, %eax
21681; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21682; NoVLX-NEXT:    vzeroupper
21683; NoVLX-NEXT:    retq
21684entry:
21685  %0 = bitcast <2 x i64> %__a to <2 x double>
21686  %1 = bitcast <2 x i64> %__b to <2 x double>
21687  %2 = fcmp oeq <2 x double> %0, %1
21688  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21689  %4 = bitcast <8 x i1> %3 to i8
21690  ret i8 %4
21691}
21692
21693define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21694; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21695; VLX:       # %bb.0: # %entry
21696; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21697; VLX-NEXT:    kmovd %k0, %eax
21698; VLX-NEXT:    # kill: def $al killed $al killed $eax
21699; VLX-NEXT:    retq
21700;
21701; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21702; NoVLX:       # %bb.0: # %entry
21703; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21704; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21705; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21706; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21707; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21708; NoVLX-NEXT:    kmovw %k0, %eax
21709; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21710; NoVLX-NEXT:    vzeroupper
21711; NoVLX-NEXT:    retq
21712entry:
21713  %0 = bitcast <2 x i64> %__a to <2 x double>
21714  %load = load <2 x i64>, <2 x i64>* %__b
21715  %1 = bitcast <2 x i64> %load to <2 x double>
21716  %2 = fcmp oeq <2 x double> %0, %1
21717  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21718  %4 = bitcast <8 x i1> %3 to i8
21719  ret i8 %4
21720}
21721
21722define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21723; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21724; VLX:       # %bb.0: # %entry
21725; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21726; VLX-NEXT:    kmovd %k0, %eax
21727; VLX-NEXT:    # kill: def $al killed $al killed $eax
21728; VLX-NEXT:    retq
21729;
21730; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21731; NoVLX:       # %bb.0: # %entry
21732; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21733; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
21734; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21735; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21736; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21737; NoVLX-NEXT:    kmovw %k0, %eax
21738; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21739; NoVLX-NEXT:    vzeroupper
21740; NoVLX-NEXT:    retq
21741entry:
21742  %0 = bitcast <2 x i64> %__a to <2 x double>
21743  %load = load double, double* %__b
21744  %vec = insertelement <2 x double> undef, double %load, i32 0
21745  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21746  %2 = fcmp oeq <2 x double> %0, %1
21747  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21748  %4 = bitcast <8 x i1> %3 to i8
21749  ret i8 %4
21750}
21751
21752define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21753; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21754; VLX:       # %bb.0: # %entry
21755; VLX-NEXT:    kmovd %edi, %k1
21756; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21757; VLX-NEXT:    kmovd %k0, %eax
21758; VLX-NEXT:    # kill: def $al killed $al killed $eax
21759; VLX-NEXT:    retq
21760;
21761; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21762; NoVLX:       # %bb.0: # %entry
21763; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21764; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21765; NoVLX-NEXT:    kmovw %edi, %k1
21766; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21767; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21768; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21769; NoVLX-NEXT:    kmovw %k0, %eax
21770; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21771; NoVLX-NEXT:    vzeroupper
21772; NoVLX-NEXT:    retq
21773entry:
21774  %0 = bitcast <2 x i64> %__a to <2 x double>
21775  %1 = bitcast <2 x i64> %__b to <2 x double>
21776  %2 = fcmp oeq <2 x double> %0, %1
21777  %3 = bitcast i2 %__u to <2 x i1>
21778  %4 = and <2 x i1> %2, %3
21779  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21780  %6 = bitcast <8 x i1> %5 to i8
21781  ret i8 %6
21782}
21783
21784define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21785; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21786; VLX:       # %bb.0: # %entry
21787; VLX-NEXT:    kmovd %edi, %k1
21788; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21789; VLX-NEXT:    kmovd %k0, %eax
21790; VLX-NEXT:    # kill: def $al killed $al killed $eax
21791; VLX-NEXT:    retq
21792;
21793; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21794; NoVLX:       # %bb.0: # %entry
21795; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21796; NoVLX-NEXT:    kmovw %edi, %k1
21797; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21798; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21799; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21800; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21801; NoVLX-NEXT:    kmovw %k0, %eax
21802; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21803; NoVLX-NEXT:    vzeroupper
21804; NoVLX-NEXT:    retq
21805entry:
21806  %0 = bitcast <2 x i64> %__a to <2 x double>
21807  %load = load <2 x i64>, <2 x i64>* %__b
21808  %1 = bitcast <2 x i64> %load to <2 x double>
21809  %2 = fcmp oeq <2 x double> %0, %1
21810  %3 = bitcast i2 %__u to <2 x i1>
21811  %4 = and <2 x i1> %2, %3
21812  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21813  %6 = bitcast <8 x i1> %5 to i8
21814  ret i8 %6
21815}
21816
21817define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21818; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21819; VLX:       # %bb.0: # %entry
21820; VLX-NEXT:    kmovd %edi, %k1
21821; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21822; VLX-NEXT:    kmovd %k0, %eax
21823; VLX-NEXT:    # kill: def $al killed $al killed $eax
21824; VLX-NEXT:    retq
21825;
21826; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21827; NoVLX:       # %bb.0: # %entry
21828; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21829; NoVLX-NEXT:    kmovw %edi, %k1
21830; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
21831; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21832; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21833; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21834; NoVLX-NEXT:    kmovw %k0, %eax
21835; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
21836; NoVLX-NEXT:    vzeroupper
21837; NoVLX-NEXT:    retq
21838entry:
21839  %0 = bitcast <2 x i64> %__a to <2 x double>
21840  %load = load double, double* %__b
21841  %vec = insertelement <2 x double> undef, double %load, i32 0
21842  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21843  %2 = fcmp oeq <2 x double> %0, %1
21844  %3 = bitcast i2 %__u to <2 x i1>
21845  %4 = and <2 x i1> %2, %3
21846  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21847  %6 = bitcast <8 x i1> %5 to i8
21848  ret i8 %6
21849}
21850
21851
21852
21853define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21854; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21855; VLX:       # %bb.0: # %entry
21856; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
21857; VLX-NEXT:    kmovd %k0, %eax
21858; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21859; VLX-NEXT:    retq
21860;
21861; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21862; NoVLX:       # %bb.0: # %entry
21863; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21864; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21865; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21866; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21867; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21868; NoVLX-NEXT:    kmovw %k0, %eax
21869; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21870; NoVLX-NEXT:    vzeroupper
21871; NoVLX-NEXT:    retq
21872entry:
21873  %0 = bitcast <2 x i64> %__a to <2 x double>
21874  %1 = bitcast <2 x i64> %__b to <2 x double>
21875  %2 = fcmp oeq <2 x double> %0, %1
21876  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21877  %4 = bitcast <16 x i1> %3 to i16
21878  ret i16 %4
21879}
21880
21881define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21882; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21883; VLX:       # %bb.0: # %entry
21884; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
21885; VLX-NEXT:    kmovd %k0, %eax
21886; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21887; VLX-NEXT:    retq
21888;
21889; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21890; NoVLX:       # %bb.0: # %entry
21891; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21892; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
21893; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21894; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21895; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21896; NoVLX-NEXT:    kmovw %k0, %eax
21897; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21898; NoVLX-NEXT:    vzeroupper
21899; NoVLX-NEXT:    retq
21900entry:
21901  %0 = bitcast <2 x i64> %__a to <2 x double>
21902  %load = load <2 x i64>, <2 x i64>* %__b
21903  %1 = bitcast <2 x i64> %load to <2 x double>
21904  %2 = fcmp oeq <2 x double> %0, %1
21905  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21906  %4 = bitcast <16 x i1> %3 to i16
21907  ret i16 %4
21908}
21909
21910define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21911; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21912; VLX:       # %bb.0: # %entry
21913; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21914; VLX-NEXT:    kmovd %k0, %eax
21915; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21916; VLX-NEXT:    retq
21917;
21918; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21919; NoVLX:       # %bb.0: # %entry
21920; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21921; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
21922; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
21923; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21924; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21925; NoVLX-NEXT:    kmovw %k0, %eax
21926; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21927; NoVLX-NEXT:    vzeroupper
21928; NoVLX-NEXT:    retq
21929entry:
21930  %0 = bitcast <2 x i64> %__a to <2 x double>
21931  %load = load double, double* %__b
21932  %vec = insertelement <2 x double> undef, double %load, i32 0
21933  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21934  %2 = fcmp oeq <2 x double> %0, %1
21935  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21936  %4 = bitcast <16 x i1> %3 to i16
21937  ret i16 %4
21938}
21939
21940define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21941; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21942; VLX:       # %bb.0: # %entry
21943; VLX-NEXT:    kmovd %edi, %k1
21944; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21945; VLX-NEXT:    kmovd %k0, %eax
21946; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21947; VLX-NEXT:    retq
21948;
21949; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21950; NoVLX:       # %bb.0: # %entry
21951; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
21952; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21953; NoVLX-NEXT:    kmovw %edi, %k1
21954; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21955; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21956; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21957; NoVLX-NEXT:    kmovw %k0, %eax
21958; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21959; NoVLX-NEXT:    vzeroupper
21960; NoVLX-NEXT:    retq
21961entry:
21962  %0 = bitcast <2 x i64> %__a to <2 x double>
21963  %1 = bitcast <2 x i64> %__b to <2 x double>
21964  %2 = fcmp oeq <2 x double> %0, %1
21965  %3 = bitcast i2 %__u to <2 x i1>
21966  %4 = and <2 x i1> %2, %3
21967  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21968  %6 = bitcast <16 x i1> %5 to i16
21969  ret i16 %6
21970}
21971
21972define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21973; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21974; VLX:       # %bb.0: # %entry
21975; VLX-NEXT:    kmovd %edi, %k1
21976; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21977; VLX-NEXT:    kmovd %k0, %eax
21978; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
21979; VLX-NEXT:    retq
21980;
21981; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21982; NoVLX:       # %bb.0: # %entry
21983; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
21984; NoVLX-NEXT:    kmovw %edi, %k1
21985; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
21986; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21987; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
21988; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
21989; NoVLX-NEXT:    kmovw %k0, %eax
21990; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
21991; NoVLX-NEXT:    vzeroupper
21992; NoVLX-NEXT:    retq
21993entry:
21994  %0 = bitcast <2 x i64> %__a to <2 x double>
21995  %load = load <2 x i64>, <2 x i64>* %__b
21996  %1 = bitcast <2 x i64> %load to <2 x double>
21997  %2 = fcmp oeq <2 x double> %0, %1
21998  %3 = bitcast i2 %__u to <2 x i1>
21999  %4 = and <2 x i1> %2, %3
22000  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22001  %6 = bitcast <16 x i1> %5 to i16
22002  ret i16 %6
22003}
22004
22005define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
22006; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
22007; VLX:       # %bb.0: # %entry
22008; VLX-NEXT:    kmovd %edi, %k1
22009; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
22010; VLX-NEXT:    kmovd %k0, %eax
22011; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22012; VLX-NEXT:    retq
22013;
22014; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
22015; NoVLX:       # %bb.0: # %entry
22016; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22017; NoVLX-NEXT:    kmovw %edi, %k1
22018; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
22019; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22020; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22021; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22022; NoVLX-NEXT:    kmovw %k0, %eax
22023; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22024; NoVLX-NEXT:    vzeroupper
22025; NoVLX-NEXT:    retq
22026entry:
22027  %0 = bitcast <2 x i64> %__a to <2 x double>
22028  %load = load double, double* %__b
22029  %vec = insertelement <2 x double> undef, double %load, i32 0
22030  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22031  %2 = fcmp oeq <2 x double> %0, %1
22032  %3 = bitcast i2 %__u to <2 x i1>
22033  %4 = and <2 x i1> %2, %3
22034  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22035  %6 = bitcast <16 x i1> %5 to i16
22036  ret i16 %6
22037}
22038
22039
22040
22041define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22042; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
22043; VLX:       # %bb.0: # %entry
22044; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
22045; VLX-NEXT:    kmovd %k0, %eax
22046; VLX-NEXT:    retq
22047;
22048; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
22049; NoVLX:       # %bb.0: # %entry
22050; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
22051; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22052; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22053; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22054; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22055; NoVLX-NEXT:    kmovw %k0, %eax
22056; NoVLX-NEXT:    vzeroupper
22057; NoVLX-NEXT:    retq
22058entry:
22059  %0 = bitcast <2 x i64> %__a to <2 x double>
22060  %1 = bitcast <2 x i64> %__b to <2 x double>
22061  %2 = fcmp oeq <2 x double> %0, %1
22062  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22063  %4 = bitcast <32 x i1> %3 to i32
22064  ret i32 %4
22065}
22066
22067define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22068; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
22069; VLX:       # %bb.0: # %entry
22070; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
22071; VLX-NEXT:    kmovd %k0, %eax
22072; VLX-NEXT:    retq
22073;
22074; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
22075; NoVLX:       # %bb.0: # %entry
22076; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22077; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
22078; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22079; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22080; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22081; NoVLX-NEXT:    kmovw %k0, %eax
22082; NoVLX-NEXT:    vzeroupper
22083; NoVLX-NEXT:    retq
22084entry:
22085  %0 = bitcast <2 x i64> %__a to <2 x double>
22086  %load = load <2 x i64>, <2 x i64>* %__b
22087  %1 = bitcast <2 x i64> %load to <2 x double>
22088  %2 = fcmp oeq <2 x double> %0, %1
22089  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22090  %4 = bitcast <32 x i1> %3 to i32
22091  ret i32 %4
22092}
22093
22094define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
22095; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22096; VLX:       # %bb.0: # %entry
22097; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
22098; VLX-NEXT:    kmovd %k0, %eax
22099; VLX-NEXT:    retq
22100;
22101; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22102; NoVLX:       # %bb.0: # %entry
22103; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22104; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
22105; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22106; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22107; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22108; NoVLX-NEXT:    kmovw %k0, %eax
22109; NoVLX-NEXT:    vzeroupper
22110; NoVLX-NEXT:    retq
22111entry:
22112  %0 = bitcast <2 x i64> %__a to <2 x double>
22113  %load = load double, double* %__b
22114  %vec = insertelement <2 x double> undef, double %load, i32 0
22115  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22116  %2 = fcmp oeq <2 x double> %0, %1
22117  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22118  %4 = bitcast <32 x i1> %3 to i32
22119  ret i32 %4
22120}
22121
22122define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22123; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
22124; VLX:       # %bb.0: # %entry
22125; VLX-NEXT:    kmovd %edi, %k1
22126; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
22127; VLX-NEXT:    kmovd %k0, %eax
22128; VLX-NEXT:    retq
22129;
22130; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
22131; NoVLX:       # %bb.0: # %entry
22132; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
22133; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22134; NoVLX-NEXT:    kmovw %edi, %k1
22135; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22136; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22137; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22138; NoVLX-NEXT:    kmovw %k0, %eax
22139; NoVLX-NEXT:    vzeroupper
22140; NoVLX-NEXT:    retq
22141entry:
22142  %0 = bitcast <2 x i64> %__a to <2 x double>
22143  %1 = bitcast <2 x i64> %__b to <2 x double>
22144  %2 = fcmp oeq <2 x double> %0, %1
22145  %3 = bitcast i2 %__u to <2 x i1>
22146  %4 = and <2 x i1> %2, %3
22147  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22148  %6 = bitcast <32 x i1> %5 to i32
22149  ret i32 %6
22150}
22151
22152define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22153; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
22154; VLX:       # %bb.0: # %entry
22155; VLX-NEXT:    kmovd %edi, %k1
22156; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
22157; VLX-NEXT:    kmovd %k0, %eax
22158; VLX-NEXT:    retq
22159;
22160; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
22161; NoVLX:       # %bb.0: # %entry
22162; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22163; NoVLX-NEXT:    kmovw %edi, %k1
22164; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
22165; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22166; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22167; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22168; NoVLX-NEXT:    kmovw %k0, %eax
22169; NoVLX-NEXT:    vzeroupper
22170; NoVLX-NEXT:    retq
22171entry:
22172  %0 = bitcast <2 x i64> %__a to <2 x double>
22173  %load = load <2 x i64>, <2 x i64>* %__b
22174  %1 = bitcast <2 x i64> %load to <2 x double>
22175  %2 = fcmp oeq <2 x double> %0, %1
22176  %3 = bitcast i2 %__u to <2 x i1>
22177  %4 = and <2 x i1> %2, %3
22178  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22179  %6 = bitcast <32 x i1> %5 to i32
22180  ret i32 %6
22181}
22182
22183define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
22184; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22185; VLX:       # %bb.0: # %entry
22186; VLX-NEXT:    kmovd %edi, %k1
22187; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
22188; VLX-NEXT:    kmovd %k0, %eax
22189; VLX-NEXT:    retq
22190;
22191; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22192; NoVLX:       # %bb.0: # %entry
22193; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22194; NoVLX-NEXT:    kmovw %edi, %k1
22195; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
22196; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22197; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22198; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22199; NoVLX-NEXT:    kmovw %k0, %eax
22200; NoVLX-NEXT:    vzeroupper
22201; NoVLX-NEXT:    retq
22202entry:
22203  %0 = bitcast <2 x i64> %__a to <2 x double>
22204  %load = load double, double* %__b
22205  %vec = insertelement <2 x double> undef, double %load, i32 0
22206  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22207  %2 = fcmp oeq <2 x double> %0, %1
22208  %3 = bitcast i2 %__u to <2 x i1>
22209  %4 = and <2 x i1> %2, %3
22210  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22211  %6 = bitcast <32 x i1> %5 to i32
22212  ret i32 %6
22213}
22214
22215
22216
22217define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22218; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
22219; VLX:       # %bb.0: # %entry
22220; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
22221; VLX-NEXT:    kmovq %k0, %rax
22222; VLX-NEXT:    retq
22223;
22224; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
22225; NoVLX:       # %bb.0: # %entry
22226; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
22227; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22228; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22229; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22230; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22231; NoVLX-NEXT:    kmovw %k0, %eax
22232; NoVLX-NEXT:    movzwl %ax, %eax
22233; NoVLX-NEXT:    vzeroupper
22234; NoVLX-NEXT:    retq
22235entry:
22236  %0 = bitcast <2 x i64> %__a to <2 x double>
22237  %1 = bitcast <2 x i64> %__b to <2 x double>
22238  %2 = fcmp oeq <2 x double> %0, %1
22239  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22240  %4 = bitcast <64 x i1> %3 to i64
22241  ret i64 %4
22242}
22243
22244define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22245; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
22246; VLX:       # %bb.0: # %entry
22247; VLX-NEXT:    vcmpeqpd (%rdi), %xmm0, %k0
22248; VLX-NEXT:    kmovq %k0, %rax
22249; VLX-NEXT:    retq
22250;
22251; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
22252; NoVLX:       # %bb.0: # %entry
22253; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22254; NoVLX-NEXT:    vmovapd (%rdi), %xmm1
22255; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22256; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22257; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22258; NoVLX-NEXT:    kmovw %k0, %eax
22259; NoVLX-NEXT:    movzwl %ax, %eax
22260; NoVLX-NEXT:    vzeroupper
22261; NoVLX-NEXT:    retq
22262entry:
22263  %0 = bitcast <2 x i64> %__a to <2 x double>
22264  %load = load <2 x i64>, <2 x i64>* %__b
22265  %1 = bitcast <2 x i64> %load to <2 x double>
22266  %2 = fcmp oeq <2 x double> %0, %1
22267  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22268  %4 = bitcast <64 x i1> %3 to i64
22269  ret i64 %4
22270}
22271
22272define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
22273; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22274; VLX:       # %bb.0: # %entry
22275; VLX-NEXT:    vcmpeqpd (%rdi){1to2}, %xmm0, %k0
22276; VLX-NEXT:    kmovq %k0, %rax
22277; VLX-NEXT:    retq
22278;
22279; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22280; NoVLX:       # %bb.0: # %entry
22281; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22282; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
22283; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22284; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22285; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22286; NoVLX-NEXT:    kmovw %k0, %eax
22287; NoVLX-NEXT:    movzwl %ax, %eax
22288; NoVLX-NEXT:    vzeroupper
22289; NoVLX-NEXT:    retq
22290entry:
22291  %0 = bitcast <2 x i64> %__a to <2 x double>
22292  %load = load double, double* %__b
22293  %vec = insertelement <2 x double> undef, double %load, i32 0
22294  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22295  %2 = fcmp oeq <2 x double> %0, %1
22296  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22297  %4 = bitcast <64 x i1> %3 to i64
22298  ret i64 %4
22299}
22300
22301define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22302; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
22303; VLX:       # %bb.0: # %entry
22304; VLX-NEXT:    kmovd %edi, %k1
22305; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
22306; VLX-NEXT:    kmovq %k0, %rax
22307; VLX-NEXT:    retq
22308;
22309; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
22310; NoVLX:       # %bb.0: # %entry
22311; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
22312; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22313; NoVLX-NEXT:    kmovw %edi, %k1
22314; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22315; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22316; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22317; NoVLX-NEXT:    kmovw %k0, %eax
22318; NoVLX-NEXT:    movzwl %ax, %eax
22319; NoVLX-NEXT:    vzeroupper
22320; NoVLX-NEXT:    retq
22321entry:
22322  %0 = bitcast <2 x i64> %__a to <2 x double>
22323  %1 = bitcast <2 x i64> %__b to <2 x double>
22324  %2 = fcmp oeq <2 x double> %0, %1
22325  %3 = bitcast i2 %__u to <2 x i1>
22326  %4 = and <2 x i1> %2, %3
22327  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22328  %6 = bitcast <64 x i1> %5 to i64
22329  ret i64 %6
22330}
22331
22332define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22333; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
22334; VLX:       # %bb.0: # %entry
22335; VLX-NEXT:    kmovd %edi, %k1
22336; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
22337; VLX-NEXT:    kmovq %k0, %rax
22338; VLX-NEXT:    retq
22339;
22340; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
22341; NoVLX:       # %bb.0: # %entry
22342; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22343; NoVLX-NEXT:    kmovw %edi, %k1
22344; NoVLX-NEXT:    vmovapd (%rsi), %xmm1
22345; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22346; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22347; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22348; NoVLX-NEXT:    kmovw %k0, %eax
22349; NoVLX-NEXT:    movzwl %ax, %eax
22350; NoVLX-NEXT:    vzeroupper
22351; NoVLX-NEXT:    retq
22352entry:
22353  %0 = bitcast <2 x i64> %__a to <2 x double>
22354  %load = load <2 x i64>, <2 x i64>* %__b
22355  %1 = bitcast <2 x i64> %load to <2 x double>
22356  %2 = fcmp oeq <2 x double> %0, %1
22357  %3 = bitcast i2 %__u to <2 x i1>
22358  %4 = and <2 x i1> %2, %3
22359  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22360  %6 = bitcast <64 x i1> %5 to i64
22361  ret i64 %6
22362}
22363
22364define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
22365; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22366; VLX:       # %bb.0: # %entry
22367; VLX-NEXT:    kmovd %edi, %k1
22368; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
22369; VLX-NEXT:    kmovq %k0, %rax
22370; VLX-NEXT:    retq
22371;
22372; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22373; NoVLX:       # %bb.0: # %entry
22374; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
22375; NoVLX-NEXT:    kmovw %edi, %k1
22376; NoVLX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
22377; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22378; NoVLX-NEXT:    kshiftlw $14, %k0, %k0
22379; NoVLX-NEXT:    kshiftrw $14, %k0, %k0
22380; NoVLX-NEXT:    kmovw %k0, %eax
22381; NoVLX-NEXT:    movzwl %ax, %eax
22382; NoVLX-NEXT:    vzeroupper
22383; NoVLX-NEXT:    retq
22384entry:
22385  %0 = bitcast <2 x i64> %__a to <2 x double>
22386  %load = load double, double* %__b
22387  %vec = insertelement <2 x double> undef, double %load, i32 0
22388  %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22389  %2 = fcmp oeq <2 x double> %0, %1
22390  %3 = bitcast i2 %__u to <2 x i1>
22391  %4 = and <2 x i1> %2, %3
22392  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22393  %6 = bitcast <64 x i1> %5 to i64
22394  ret i64 %6
22395}
22396
22397
22398
22399define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22400; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
22401; VLX:       # %bb.0: # %entry
22402; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22403; VLX-NEXT:    kmovd %k0, %eax
22404; VLX-NEXT:    # kill: def $al killed $al killed $eax
22405; VLX-NEXT:    vzeroupper
22406; VLX-NEXT:    retq
22407;
22408; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
22409; NoVLX:       # %bb.0: # %entry
22410; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22411; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22412; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22413; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22414; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22415; NoVLX-NEXT:    kmovw %k0, %eax
22416; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22417; NoVLX-NEXT:    vzeroupper
22418; NoVLX-NEXT:    retq
22419entry:
22420  %0 = bitcast <4 x i64> %__a to <4 x double>
22421  %1 = bitcast <4 x i64> %__b to <4 x double>
22422  %2 = fcmp oeq <4 x double> %0, %1
22423  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22424  %4 = bitcast <8 x i1> %3 to i8
22425  ret i8 %4
22426}
22427
22428define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22429; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
22430; VLX:       # %bb.0: # %entry
22431; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
22432; VLX-NEXT:    kmovd %k0, %eax
22433; VLX-NEXT:    # kill: def $al killed $al killed $eax
22434; VLX-NEXT:    vzeroupper
22435; VLX-NEXT:    retq
22436;
22437; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
22438; NoVLX:       # %bb.0: # %entry
22439; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22440; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
22441; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22442; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22443; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22444; NoVLX-NEXT:    kmovw %k0, %eax
22445; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22446; NoVLX-NEXT:    vzeroupper
22447; NoVLX-NEXT:    retq
22448entry:
22449  %0 = bitcast <4 x i64> %__a to <4 x double>
22450  %load = load <4 x i64>, <4 x i64>* %__b
22451  %1 = bitcast <4 x i64> %load to <4 x double>
22452  %2 = fcmp oeq <4 x double> %0, %1
22453  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22454  %4 = bitcast <8 x i1> %3 to i8
22455  ret i8 %4
22456}
22457
22458define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22459; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22460; VLX:       # %bb.0: # %entry
22461; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22462; VLX-NEXT:    kmovd %k0, %eax
22463; VLX-NEXT:    # kill: def $al killed $al killed $eax
22464; VLX-NEXT:    vzeroupper
22465; VLX-NEXT:    retq
22466;
22467; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22468; NoVLX:       # %bb.0: # %entry
22469; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22470; NoVLX-NEXT:    vbroadcastsd (%rdi), %ymm1
22471; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22472; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22473; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22474; NoVLX-NEXT:    kmovw %k0, %eax
22475; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22476; NoVLX-NEXT:    vzeroupper
22477; NoVLX-NEXT:    retq
22478entry:
22479  %0 = bitcast <4 x i64> %__a to <4 x double>
22480  %load = load double, double* %__b
22481  %vec = insertelement <4 x double> undef, double %load, i32 0
22482  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22483  %2 = fcmp oeq <4 x double> %0, %1
22484  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22485  %4 = bitcast <8 x i1> %3 to i8
22486  ret i8 %4
22487}
22488
22489define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22490; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22491; VLX:       # %bb.0: # %entry
22492; VLX-NEXT:    kmovd %edi, %k1
22493; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22494; VLX-NEXT:    kmovd %k0, %eax
22495; VLX-NEXT:    # kill: def $al killed $al killed $eax
22496; VLX-NEXT:    vzeroupper
22497; VLX-NEXT:    retq
22498;
22499; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22500; NoVLX:       # %bb.0: # %entry
22501; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22502; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22503; NoVLX-NEXT:    kmovw %edi, %k1
22504; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22505; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22506; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22507; NoVLX-NEXT:    kmovw %k0, %eax
22508; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22509; NoVLX-NEXT:    vzeroupper
22510; NoVLX-NEXT:    retq
22511entry:
22512  %0 = bitcast <4 x i64> %__a to <4 x double>
22513  %1 = bitcast <4 x i64> %__b to <4 x double>
22514  %2 = fcmp oeq <4 x double> %0, %1
22515  %3 = bitcast i4 %__u to <4 x i1>
22516  %4 = and <4 x i1> %2, %3
22517  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22518  %6 = bitcast <8 x i1> %5 to i8
22519  ret i8 %6
22520}
22521
22522define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22523; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22524; VLX:       # %bb.0: # %entry
22525; VLX-NEXT:    kmovd %edi, %k1
22526; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22527; VLX-NEXT:    kmovd %k0, %eax
22528; VLX-NEXT:    # kill: def $al killed $al killed $eax
22529; VLX-NEXT:    vzeroupper
22530; VLX-NEXT:    retq
22531;
22532; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22533; NoVLX:       # %bb.0: # %entry
22534; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22535; NoVLX-NEXT:    kmovw %edi, %k1
22536; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22537; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22538; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22539; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22540; NoVLX-NEXT:    kmovw %k0, %eax
22541; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22542; NoVLX-NEXT:    vzeroupper
22543; NoVLX-NEXT:    retq
22544entry:
22545  %0 = bitcast <4 x i64> %__a to <4 x double>
22546  %load = load <4 x i64>, <4 x i64>* %__b
22547  %1 = bitcast <4 x i64> %load to <4 x double>
22548  %2 = fcmp oeq <4 x double> %0, %1
22549  %3 = bitcast i4 %__u to <4 x i1>
22550  %4 = and <4 x i1> %2, %3
22551  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22552  %6 = bitcast <8 x i1> %5 to i8
22553  ret i8 %6
22554}
22555
22556define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22557; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22558; VLX:       # %bb.0: # %entry
22559; VLX-NEXT:    kmovd %edi, %k1
22560; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22561; VLX-NEXT:    kmovd %k0, %eax
22562; VLX-NEXT:    # kill: def $al killed $al killed $eax
22563; VLX-NEXT:    vzeroupper
22564; VLX-NEXT:    retq
22565;
22566; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22567; NoVLX:       # %bb.0: # %entry
22568; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22569; NoVLX-NEXT:    kmovw %edi, %k1
22570; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm1
22571; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22572; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22573; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22574; NoVLX-NEXT:    kmovw %k0, %eax
22575; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
22576; NoVLX-NEXT:    vzeroupper
22577; NoVLX-NEXT:    retq
22578entry:
22579  %0 = bitcast <4 x i64> %__a to <4 x double>
22580  %load = load double, double* %__b
22581  %vec = insertelement <4 x double> undef, double %load, i32 0
22582  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22583  %2 = fcmp oeq <4 x double> %0, %1
22584  %3 = bitcast i4 %__u to <4 x i1>
22585  %4 = and <4 x i1> %2, %3
22586  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22587  %6 = bitcast <8 x i1> %5 to i8
22588  ret i8 %6
22589}
22590
22591
22592
22593define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22594; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22595; VLX:       # %bb.0: # %entry
22596; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22597; VLX-NEXT:    kmovd %k0, %eax
22598; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22599; VLX-NEXT:    vzeroupper
22600; VLX-NEXT:    retq
22601;
22602; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22603; NoVLX:       # %bb.0: # %entry
22604; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22605; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22606; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22607; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22608; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22609; NoVLX-NEXT:    kmovw %k0, %eax
22610; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22611; NoVLX-NEXT:    vzeroupper
22612; NoVLX-NEXT:    retq
22613entry:
22614  %0 = bitcast <4 x i64> %__a to <4 x double>
22615  %1 = bitcast <4 x i64> %__b to <4 x double>
22616  %2 = fcmp oeq <4 x double> %0, %1
22617  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22618  %4 = bitcast <16 x i1> %3 to i16
22619  ret i16 %4
22620}
22621
22622define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22623; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22624; VLX:       # %bb.0: # %entry
22625; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
22626; VLX-NEXT:    kmovd %k0, %eax
22627; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22628; VLX-NEXT:    vzeroupper
22629; VLX-NEXT:    retq
22630;
22631; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22632; NoVLX:       # %bb.0: # %entry
22633; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22634; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
22635; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22636; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22637; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22638; NoVLX-NEXT:    kmovw %k0, %eax
22639; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22640; NoVLX-NEXT:    vzeroupper
22641; NoVLX-NEXT:    retq
22642entry:
22643  %0 = bitcast <4 x i64> %__a to <4 x double>
22644  %load = load <4 x i64>, <4 x i64>* %__b
22645  %1 = bitcast <4 x i64> %load to <4 x double>
22646  %2 = fcmp oeq <4 x double> %0, %1
22647  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22648  %4 = bitcast <16 x i1> %3 to i16
22649  ret i16 %4
22650}
22651
22652define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22653; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22654; VLX:       # %bb.0: # %entry
22655; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22656; VLX-NEXT:    kmovd %k0, %eax
22657; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22658; VLX-NEXT:    vzeroupper
22659; VLX-NEXT:    retq
22660;
22661; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22662; NoVLX:       # %bb.0: # %entry
22663; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22664; NoVLX-NEXT:    vbroadcastsd (%rdi), %ymm1
22665; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22666; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22667; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22668; NoVLX-NEXT:    kmovw %k0, %eax
22669; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22670; NoVLX-NEXT:    vzeroupper
22671; NoVLX-NEXT:    retq
22672entry:
22673  %0 = bitcast <4 x i64> %__a to <4 x double>
22674  %load = load double, double* %__b
22675  %vec = insertelement <4 x double> undef, double %load, i32 0
22676  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22677  %2 = fcmp oeq <4 x double> %0, %1
22678  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22679  %4 = bitcast <16 x i1> %3 to i16
22680  ret i16 %4
22681}
22682
22683define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22684; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22685; VLX:       # %bb.0: # %entry
22686; VLX-NEXT:    kmovd %edi, %k1
22687; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22688; VLX-NEXT:    kmovd %k0, %eax
22689; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22690; VLX-NEXT:    vzeroupper
22691; VLX-NEXT:    retq
22692;
22693; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22694; NoVLX:       # %bb.0: # %entry
22695; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22696; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22697; NoVLX-NEXT:    kmovw %edi, %k1
22698; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22699; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22700; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22701; NoVLX-NEXT:    kmovw %k0, %eax
22702; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22703; NoVLX-NEXT:    vzeroupper
22704; NoVLX-NEXT:    retq
22705entry:
22706  %0 = bitcast <4 x i64> %__a to <4 x double>
22707  %1 = bitcast <4 x i64> %__b to <4 x double>
22708  %2 = fcmp oeq <4 x double> %0, %1
22709  %3 = bitcast i4 %__u to <4 x i1>
22710  %4 = and <4 x i1> %2, %3
22711  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22712  %6 = bitcast <16 x i1> %5 to i16
22713  ret i16 %6
22714}
22715
22716define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22717; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22718; VLX:       # %bb.0: # %entry
22719; VLX-NEXT:    kmovd %edi, %k1
22720; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22721; VLX-NEXT:    kmovd %k0, %eax
22722; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22723; VLX-NEXT:    vzeroupper
22724; VLX-NEXT:    retq
22725;
22726; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22727; NoVLX:       # %bb.0: # %entry
22728; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22729; NoVLX-NEXT:    kmovw %edi, %k1
22730; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22731; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22732; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22733; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22734; NoVLX-NEXT:    kmovw %k0, %eax
22735; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22736; NoVLX-NEXT:    vzeroupper
22737; NoVLX-NEXT:    retq
22738entry:
22739  %0 = bitcast <4 x i64> %__a to <4 x double>
22740  %load = load <4 x i64>, <4 x i64>* %__b
22741  %1 = bitcast <4 x i64> %load to <4 x double>
22742  %2 = fcmp oeq <4 x double> %0, %1
22743  %3 = bitcast i4 %__u to <4 x i1>
22744  %4 = and <4 x i1> %2, %3
22745  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22746  %6 = bitcast <16 x i1> %5 to i16
22747  ret i16 %6
22748}
22749
22750define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22751; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22752; VLX:       # %bb.0: # %entry
22753; VLX-NEXT:    kmovd %edi, %k1
22754; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22755; VLX-NEXT:    kmovd %k0, %eax
22756; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
22757; VLX-NEXT:    vzeroupper
22758; VLX-NEXT:    retq
22759;
22760; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22761; NoVLX:       # %bb.0: # %entry
22762; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22763; NoVLX-NEXT:    kmovw %edi, %k1
22764; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm1
22765; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22766; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22767; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22768; NoVLX-NEXT:    kmovw %k0, %eax
22769; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
22770; NoVLX-NEXT:    vzeroupper
22771; NoVLX-NEXT:    retq
22772entry:
22773  %0 = bitcast <4 x i64> %__a to <4 x double>
22774  %load = load double, double* %__b
22775  %vec = insertelement <4 x double> undef, double %load, i32 0
22776  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22777  %2 = fcmp oeq <4 x double> %0, %1
22778  %3 = bitcast i4 %__u to <4 x i1>
22779  %4 = and <4 x i1> %2, %3
22780  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22781  %6 = bitcast <16 x i1> %5 to i16
22782  ret i16 %6
22783}
22784
22785
22786
22787define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22788; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22789; VLX:       # %bb.0: # %entry
22790; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22791; VLX-NEXT:    kmovd %k0, %eax
22792; VLX-NEXT:    vzeroupper
22793; VLX-NEXT:    retq
22794;
22795; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22796; NoVLX:       # %bb.0: # %entry
22797; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22798; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22799; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22800; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22801; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22802; NoVLX-NEXT:    kmovw %k0, %eax
22803; NoVLX-NEXT:    vzeroupper
22804; NoVLX-NEXT:    retq
22805entry:
22806  %0 = bitcast <4 x i64> %__a to <4 x double>
22807  %1 = bitcast <4 x i64> %__b to <4 x double>
22808  %2 = fcmp oeq <4 x double> %0, %1
22809  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22810  %4 = bitcast <32 x i1> %3 to i32
22811  ret i32 %4
22812}
22813
22814define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22815; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22816; VLX:       # %bb.0: # %entry
22817; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
22818; VLX-NEXT:    kmovd %k0, %eax
22819; VLX-NEXT:    vzeroupper
22820; VLX-NEXT:    retq
22821;
22822; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22823; NoVLX:       # %bb.0: # %entry
22824; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22825; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
22826; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22827; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22828; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22829; NoVLX-NEXT:    kmovw %k0, %eax
22830; NoVLX-NEXT:    vzeroupper
22831; NoVLX-NEXT:    retq
22832entry:
22833  %0 = bitcast <4 x i64> %__a to <4 x double>
22834  %load = load <4 x i64>, <4 x i64>* %__b
22835  %1 = bitcast <4 x i64> %load to <4 x double>
22836  %2 = fcmp oeq <4 x double> %0, %1
22837  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22838  %4 = bitcast <32 x i1> %3 to i32
22839  ret i32 %4
22840}
22841
22842define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22843; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22844; VLX:       # %bb.0: # %entry
22845; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22846; VLX-NEXT:    kmovd %k0, %eax
22847; VLX-NEXT:    vzeroupper
22848; VLX-NEXT:    retq
22849;
22850; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22851; NoVLX:       # %bb.0: # %entry
22852; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22853; NoVLX-NEXT:    vbroadcastsd (%rdi), %ymm1
22854; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22855; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22856; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22857; NoVLX-NEXT:    kmovw %k0, %eax
22858; NoVLX-NEXT:    vzeroupper
22859; NoVLX-NEXT:    retq
22860entry:
22861  %0 = bitcast <4 x i64> %__a to <4 x double>
22862  %load = load double, double* %__b
22863  %vec = insertelement <4 x double> undef, double %load, i32 0
22864  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22865  %2 = fcmp oeq <4 x double> %0, %1
22866  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22867  %4 = bitcast <32 x i1> %3 to i32
22868  ret i32 %4
22869}
22870
22871define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22872; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22873; VLX:       # %bb.0: # %entry
22874; VLX-NEXT:    kmovd %edi, %k1
22875; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22876; VLX-NEXT:    kmovd %k0, %eax
22877; VLX-NEXT:    vzeroupper
22878; VLX-NEXT:    retq
22879;
22880; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22881; NoVLX:       # %bb.0: # %entry
22882; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22883; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22884; NoVLX-NEXT:    kmovw %edi, %k1
22885; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22886; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22887; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22888; NoVLX-NEXT:    kmovw %k0, %eax
22889; NoVLX-NEXT:    vzeroupper
22890; NoVLX-NEXT:    retq
22891entry:
22892  %0 = bitcast <4 x i64> %__a to <4 x double>
22893  %1 = bitcast <4 x i64> %__b to <4 x double>
22894  %2 = fcmp oeq <4 x double> %0, %1
22895  %3 = bitcast i4 %__u to <4 x i1>
22896  %4 = and <4 x i1> %2, %3
22897  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22898  %6 = bitcast <32 x i1> %5 to i32
22899  ret i32 %6
22900}
22901
22902define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22903; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22904; VLX:       # %bb.0: # %entry
22905; VLX-NEXT:    kmovd %edi, %k1
22906; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22907; VLX-NEXT:    kmovd %k0, %eax
22908; VLX-NEXT:    vzeroupper
22909; VLX-NEXT:    retq
22910;
22911; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22912; NoVLX:       # %bb.0: # %entry
22913; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22914; NoVLX-NEXT:    kmovw %edi, %k1
22915; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
22916; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22917; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22918; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22919; NoVLX-NEXT:    kmovw %k0, %eax
22920; NoVLX-NEXT:    vzeroupper
22921; NoVLX-NEXT:    retq
22922entry:
22923  %0 = bitcast <4 x i64> %__a to <4 x double>
22924  %load = load <4 x i64>, <4 x i64>* %__b
22925  %1 = bitcast <4 x i64> %load to <4 x double>
22926  %2 = fcmp oeq <4 x double> %0, %1
22927  %3 = bitcast i4 %__u to <4 x i1>
22928  %4 = and <4 x i1> %2, %3
22929  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22930  %6 = bitcast <32 x i1> %5 to i32
22931  ret i32 %6
22932}
22933
22934define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22935; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22936; VLX:       # %bb.0: # %entry
22937; VLX-NEXT:    kmovd %edi, %k1
22938; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22939; VLX-NEXT:    kmovd %k0, %eax
22940; VLX-NEXT:    vzeroupper
22941; VLX-NEXT:    retq
22942;
22943; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22944; NoVLX:       # %bb.0: # %entry
22945; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22946; NoVLX-NEXT:    kmovw %edi, %k1
22947; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm1
22948; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22949; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22950; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22951; NoVLX-NEXT:    kmovw %k0, %eax
22952; NoVLX-NEXT:    vzeroupper
22953; NoVLX-NEXT:    retq
22954entry:
22955  %0 = bitcast <4 x i64> %__a to <4 x double>
22956  %load = load double, double* %__b
22957  %vec = insertelement <4 x double> undef, double %load, i32 0
22958  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22959  %2 = fcmp oeq <4 x double> %0, %1
22960  %3 = bitcast i4 %__u to <4 x i1>
22961  %4 = and <4 x i1> %2, %3
22962  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22963  %6 = bitcast <32 x i1> %5 to i32
22964  ret i32 %6
22965}
22966
22967
22968
22969define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22970; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22971; VLX:       # %bb.0: # %entry
22972; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0
22973; VLX-NEXT:    kmovq %k0, %rax
22974; VLX-NEXT:    vzeroupper
22975; VLX-NEXT:    retq
22976;
22977; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22978; NoVLX:       # %bb.0: # %entry
22979; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
22980; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
22981; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
22982; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
22983; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
22984; NoVLX-NEXT:    kmovw %k0, %eax
22985; NoVLX-NEXT:    movzwl %ax, %eax
22986; NoVLX-NEXT:    vzeroupper
22987; NoVLX-NEXT:    retq
22988entry:
22989  %0 = bitcast <4 x i64> %__a to <4 x double>
22990  %1 = bitcast <4 x i64> %__b to <4 x double>
22991  %2 = fcmp oeq <4 x double> %0, %1
22992  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22993  %4 = bitcast <64 x i1> %3 to i64
22994  ret i64 %4
22995}
22996
22997define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22998; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22999; VLX:       # %bb.0: # %entry
23000; VLX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k0
23001; VLX-NEXT:    kmovq %k0, %rax
23002; VLX-NEXT:    vzeroupper
23003; VLX-NEXT:    retq
23004;
23005; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
23006; NoVLX:       # %bb.0: # %entry
23007; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
23008; NoVLX-NEXT:    vmovapd (%rdi), %ymm1
23009; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23010; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
23011; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
23012; NoVLX-NEXT:    kmovw %k0, %eax
23013; NoVLX-NEXT:    movzwl %ax, %eax
23014; NoVLX-NEXT:    vzeroupper
23015; NoVLX-NEXT:    retq
23016entry:
23017  %0 = bitcast <4 x i64> %__a to <4 x double>
23018  %load = load <4 x i64>, <4 x i64>* %__b
23019  %1 = bitcast <4 x i64> %load to <4 x double>
23020  %2 = fcmp oeq <4 x double> %0, %1
23021  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23022  %4 = bitcast <64 x i1> %3 to i64
23023  ret i64 %4
23024}
23025
23026define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
23027; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
23028; VLX:       # %bb.0: # %entry
23029; VLX-NEXT:    vcmpeqpd (%rdi){1to4}, %ymm0, %k0
23030; VLX-NEXT:    kmovq %k0, %rax
23031; VLX-NEXT:    vzeroupper
23032; VLX-NEXT:    retq
23033;
23034; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
23035; NoVLX:       # %bb.0: # %entry
23036; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
23037; NoVLX-NEXT:    vbroadcastsd (%rdi), %ymm1
23038; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23039; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
23040; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
23041; NoVLX-NEXT:    kmovw %k0, %eax
23042; NoVLX-NEXT:    movzwl %ax, %eax
23043; NoVLX-NEXT:    vzeroupper
23044; NoVLX-NEXT:    retq
23045entry:
23046  %0 = bitcast <4 x i64> %__a to <4 x double>
23047  %load = load double, double* %__b
23048  %vec = insertelement <4 x double> undef, double %load, i32 0
23049  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
23050  %2 = fcmp oeq <4 x double> %0, %1
23051  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23052  %4 = bitcast <64 x i1> %3 to i64
23053  ret i64 %4
23054}
23055
23056define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
23057; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
23058; VLX:       # %bb.0: # %entry
23059; VLX-NEXT:    kmovd %edi, %k1
23060; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
23061; VLX-NEXT:    kmovq %k0, %rax
23062; VLX-NEXT:    vzeroupper
23063; VLX-NEXT:    retq
23064;
23065; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
23066; NoVLX:       # %bb.0: # %entry
23067; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
23068; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
23069; NoVLX-NEXT:    kmovw %edi, %k1
23070; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23071; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
23072; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
23073; NoVLX-NEXT:    kmovw %k0, %eax
23074; NoVLX-NEXT:    movzwl %ax, %eax
23075; NoVLX-NEXT:    vzeroupper
23076; NoVLX-NEXT:    retq
23077entry:
23078  %0 = bitcast <4 x i64> %__a to <4 x double>
23079  %1 = bitcast <4 x i64> %__b to <4 x double>
23080  %2 = fcmp oeq <4 x double> %0, %1
23081  %3 = bitcast i4 %__u to <4 x i1>
23082  %4 = and <4 x i1> %2, %3
23083  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23084  %6 = bitcast <64 x i1> %5 to i64
23085  ret i64 %6
23086}
23087
23088define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
23089; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
23090; VLX:       # %bb.0: # %entry
23091; VLX-NEXT:    kmovd %edi, %k1
23092; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
23093; VLX-NEXT:    kmovq %k0, %rax
23094; VLX-NEXT:    vzeroupper
23095; VLX-NEXT:    retq
23096;
23097; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
23098; NoVLX:       # %bb.0: # %entry
23099; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
23100; NoVLX-NEXT:    kmovw %edi, %k1
23101; NoVLX-NEXT:    vmovapd (%rsi), %ymm1
23102; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23103; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
23104; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
23105; NoVLX-NEXT:    kmovw %k0, %eax
23106; NoVLX-NEXT:    movzwl %ax, %eax
23107; NoVLX-NEXT:    vzeroupper
23108; NoVLX-NEXT:    retq
23109entry:
23110  %0 = bitcast <4 x i64> %__a to <4 x double>
23111  %load = load <4 x i64>, <4 x i64>* %__b
23112  %1 = bitcast <4 x i64> %load to <4 x double>
23113  %2 = fcmp oeq <4 x double> %0, %1
23114  %3 = bitcast i4 %__u to <4 x i1>
23115  %4 = and <4 x i1> %2, %3
23116  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23117  %6 = bitcast <64 x i1> %5 to i64
23118  ret i64 %6
23119}
23120
23121define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
23122; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
23123; VLX:       # %bb.0: # %entry
23124; VLX-NEXT:    kmovd %edi, %k1
23125; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
23126; VLX-NEXT:    kmovq %k0, %rax
23127; VLX-NEXT:    vzeroupper
23128; VLX-NEXT:    retq
23129;
23130; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
23131; NoVLX:       # %bb.0: # %entry
23132; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
23133; NoVLX-NEXT:    kmovw %edi, %k1
23134; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm1
23135; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23136; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
23137; NoVLX-NEXT:    kshiftrw $12, %k0, %k0
23138; NoVLX-NEXT:    kmovw %k0, %eax
23139; NoVLX-NEXT:    movzwl %ax, %eax
23140; NoVLX-NEXT:    vzeroupper
23141; NoVLX-NEXT:    retq
23142entry:
23143  %0 = bitcast <4 x i64> %__a to <4 x double>
23144  %load = load double, double* %__b
23145  %vec = insertelement <4 x double> undef, double %load, i32 0
23146  %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
23147  %2 = fcmp oeq <4 x double> %0, %1
23148  %3 = bitcast i4 %__u to <4 x i1>
23149  %4 = and <4 x i1> %2, %3
23150  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23151  %6 = bitcast <64 x i1> %5 to i64
23152  ret i64 %6
23153}
23154
23155
23156
23157define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23158; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
23159; VLX:       # %bb.0: # %entry
23160; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23161; VLX-NEXT:    kmovd %k0, %eax
23162; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23163; VLX-NEXT:    vzeroupper
23164; VLX-NEXT:    retq
23165;
23166; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
23167; NoVLX:       # %bb.0: # %entry
23168; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23169; NoVLX-NEXT:    kmovw %k0, %eax
23170; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23171; NoVLX-NEXT:    vzeroupper
23172; NoVLX-NEXT:    retq
23173entry:
23174  %0 = bitcast <8 x i64> %__a to <8 x double>
23175  %1 = bitcast <8 x i64> %__b to <8 x double>
23176  %2 = fcmp oeq <8 x double> %0, %1
23177  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23178  %4 = bitcast <16 x i1> %3 to i16
23179  ret i16 %4
23180}
23181
23182define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23183; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
23184; VLX:       # %bb.0: # %entry
23185; VLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23186; VLX-NEXT:    kmovd %k0, %eax
23187; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23188; VLX-NEXT:    vzeroupper
23189; VLX-NEXT:    retq
23190;
23191; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
23192; NoVLX:       # %bb.0: # %entry
23193; NoVLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23194; NoVLX-NEXT:    kmovw %k0, %eax
23195; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23196; NoVLX-NEXT:    vzeroupper
23197; NoVLX-NEXT:    retq
23198entry:
23199  %0 = bitcast <8 x i64> %__a to <8 x double>
23200  %load = load <8 x i64>, <8 x i64>* %__b
23201  %1 = bitcast <8 x i64> %load to <8 x double>
23202  %2 = fcmp oeq <8 x double> %0, %1
23203  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23204  %4 = bitcast <16 x i1> %3 to i16
23205  ret i16 %4
23206}
23207
23208define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23209; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23210; VLX:       # %bb.0: # %entry
23211; VLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23212; VLX-NEXT:    kmovd %k0, %eax
23213; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23214; VLX-NEXT:    vzeroupper
23215; VLX-NEXT:    retq
23216;
23217; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23218; NoVLX:       # %bb.0: # %entry
23219; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23220; NoVLX-NEXT:    kmovw %k0, %eax
23221; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23222; NoVLX-NEXT:    vzeroupper
23223; NoVLX-NEXT:    retq
23224entry:
23225  %0 = bitcast <8 x i64> %__a to <8 x double>
23226  %load = load double, double* %__b
23227  %vec = insertelement <8 x double> undef, double %load, i32 0
23228  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23229  %2 = fcmp oeq <8 x double> %0, %1
23230  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23231  %4 = bitcast <16 x i1> %3 to i16
23232  ret i16 %4
23233}
23234
23235define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23236; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
23237; VLX:       # %bb.0: # %entry
23238; VLX-NEXT:    kmovd %edi, %k1
23239; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23240; VLX-NEXT:    kmovd %k0, %eax
23241; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23242; VLX-NEXT:    vzeroupper
23243; VLX-NEXT:    retq
23244;
23245; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
23246; NoVLX:       # %bb.0: # %entry
23247; NoVLX-NEXT:    kmovw %edi, %k1
23248; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23249; NoVLX-NEXT:    kmovw %k0, %eax
23250; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23251; NoVLX-NEXT:    vzeroupper
23252; NoVLX-NEXT:    retq
23253entry:
23254  %0 = bitcast <8 x i64> %__a to <8 x double>
23255  %1 = bitcast <8 x i64> %__b to <8 x double>
23256  %2 = fcmp oeq <8 x double> %0, %1
23257  %3 = bitcast i8 %__u to <8 x i1>
23258  %4 = and <8 x i1> %2, %3
23259  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23260  %6 = bitcast <16 x i1> %5 to i16
23261  ret i16 %6
23262}
23263
23264define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23265; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
23266; VLX:       # %bb.0: # %entry
23267; VLX-NEXT:    kmovd %edi, %k1
23268; VLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23269; VLX-NEXT:    kmovd %k0, %eax
23270; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23271; VLX-NEXT:    vzeroupper
23272; VLX-NEXT:    retq
23273;
23274; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
23275; NoVLX:       # %bb.0: # %entry
23276; NoVLX-NEXT:    kmovw %edi, %k1
23277; NoVLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23278; NoVLX-NEXT:    kmovw %k0, %eax
23279; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23280; NoVLX-NEXT:    vzeroupper
23281; NoVLX-NEXT:    retq
23282entry:
23283  %0 = bitcast <8 x i64> %__a to <8 x double>
23284  %load = load <8 x i64>, <8 x i64>* %__b
23285  %1 = bitcast <8 x i64> %load to <8 x double>
23286  %2 = fcmp oeq <8 x double> %0, %1
23287  %3 = bitcast i8 %__u to <8 x i1>
23288  %4 = and <8 x i1> %2, %3
23289  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23290  %6 = bitcast <16 x i1> %5 to i16
23291  ret i16 %6
23292}
23293
23294define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23295; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23296; VLX:       # %bb.0: # %entry
23297; VLX-NEXT:    kmovd %edi, %k1
23298; VLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23299; VLX-NEXT:    kmovd %k0, %eax
23300; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23301; VLX-NEXT:    vzeroupper
23302; VLX-NEXT:    retq
23303;
23304; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23305; NoVLX:       # %bb.0: # %entry
23306; NoVLX-NEXT:    kmovw %edi, %k1
23307; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23308; NoVLX-NEXT:    kmovw %k0, %eax
23309; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23310; NoVLX-NEXT:    vzeroupper
23311; NoVLX-NEXT:    retq
23312entry:
23313  %0 = bitcast <8 x i64> %__a to <8 x double>
23314  %load = load double, double* %__b
23315  %vec = insertelement <8 x double> undef, double %load, i32 0
23316  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23317  %2 = fcmp oeq <8 x double> %0, %1
23318  %3 = bitcast i8 %__u to <8 x i1>
23319  %4 = and <8 x i1> %2, %3
23320  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23321  %6 = bitcast <16 x i1> %5 to i16
23322  ret i16 %6
23323}
23324
23325
23326
23327define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23328; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
23329; VLX:       # %bb.0: # %entry
23330; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23331; VLX-NEXT:    kmovd %k0, %eax
23332; VLX-NEXT:    movzbl %al, %eax
23333; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23334; VLX-NEXT:    vzeroupper
23335; VLX-NEXT:    retq
23336;
23337; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
23338; NoVLX:       # %bb.0: # %entry
23339; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23340; NoVLX-NEXT:    kmovw %k0, %eax
23341; NoVLX-NEXT:    movzbl %al, %eax
23342; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23343; NoVLX-NEXT:    vzeroupper
23344; NoVLX-NEXT:    retq
23345entry:
23346  %0 = bitcast <8 x i64> %__a to <8 x double>
23347  %1 = bitcast <8 x i64> %__b to <8 x double>
23348  %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23349  %3 = bitcast <8 x i1> %2 to i8
23350  %4 = zext i8 %3 to i16
23351  ret i16 %4
23352}
23353
23354define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23355; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
23356; VLX:       # %bb.0: # %entry
23357; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23358; VLX-NEXT:    kmovd %k0, %eax
23359; VLX-NEXT:    andb %dil, %al
23360; VLX-NEXT:    movzbl %al, %eax
23361; VLX-NEXT:    # kill: def $ax killed $ax killed $eax
23362; VLX-NEXT:    vzeroupper
23363; VLX-NEXT:    retq
23364;
23365; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
23366; NoVLX:       # %bb.0: # %entry
23367; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23368; NoVLX-NEXT:    kmovw %k0, %eax
23369; NoVLX-NEXT:    andb %dil, %al
23370; NoVLX-NEXT:    movzbl %al, %eax
23371; NoVLX-NEXT:    # kill: def $ax killed $ax killed $eax
23372; NoVLX-NEXT:    vzeroupper
23373; NoVLX-NEXT:    retq
23374entry:
23375  %0 = bitcast <8 x i64> %__a to <8 x double>
23376  %1 = bitcast <8 x i64> %__b to <8 x double>
23377  %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23378  %3 = bitcast i8 %__u to <8 x i1>
23379  %4 = and <8 x i1> %2, %3
23380  %5 = bitcast <8 x i1> %4 to i8
23381  %6 = zext i8 %5 to i16
23382  ret i16 %6
23383}
23384
23385
23386
23387define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23388; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
23389; VLX:       # %bb.0: # %entry
23390; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23391; VLX-NEXT:    kmovd %k0, %eax
23392; VLX-NEXT:    vzeroupper
23393; VLX-NEXT:    retq
23394;
23395; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
23396; NoVLX:       # %bb.0: # %entry
23397; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23398; NoVLX-NEXT:    kmovw %k0, %eax
23399; NoVLX-NEXT:    vzeroupper
23400; NoVLX-NEXT:    retq
23401entry:
23402  %0 = bitcast <8 x i64> %__a to <8 x double>
23403  %1 = bitcast <8 x i64> %__b to <8 x double>
23404  %2 = fcmp oeq <8 x double> %0, %1
23405  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23406  %4 = bitcast <32 x i1> %3 to i32
23407  ret i32 %4
23408}
23409
23410define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23411; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
23412; VLX:       # %bb.0: # %entry
23413; VLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23414; VLX-NEXT:    kmovd %k0, %eax
23415; VLX-NEXT:    vzeroupper
23416; VLX-NEXT:    retq
23417;
23418; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
23419; NoVLX:       # %bb.0: # %entry
23420; NoVLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23421; NoVLX-NEXT:    kmovw %k0, %eax
23422; NoVLX-NEXT:    vzeroupper
23423; NoVLX-NEXT:    retq
23424entry:
23425  %0 = bitcast <8 x i64> %__a to <8 x double>
23426  %load = load <8 x i64>, <8 x i64>* %__b
23427  %1 = bitcast <8 x i64> %load to <8 x double>
23428  %2 = fcmp oeq <8 x double> %0, %1
23429  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23430  %4 = bitcast <32 x i1> %3 to i32
23431  ret i32 %4
23432}
23433
23434define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23435; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23436; VLX:       # %bb.0: # %entry
23437; VLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23438; VLX-NEXT:    kmovd %k0, %eax
23439; VLX-NEXT:    vzeroupper
23440; VLX-NEXT:    retq
23441;
23442; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23443; NoVLX:       # %bb.0: # %entry
23444; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23445; NoVLX-NEXT:    kmovw %k0, %eax
23446; NoVLX-NEXT:    vzeroupper
23447; NoVLX-NEXT:    retq
23448entry:
23449  %0 = bitcast <8 x i64> %__a to <8 x double>
23450  %load = load double, double* %__b
23451  %vec = insertelement <8 x double> undef, double %load, i32 0
23452  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23453  %2 = fcmp oeq <8 x double> %0, %1
23454  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23455  %4 = bitcast <32 x i1> %3 to i32
23456  ret i32 %4
23457}
23458
23459define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23460; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
23461; VLX:       # %bb.0: # %entry
23462; VLX-NEXT:    kmovd %edi, %k1
23463; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23464; VLX-NEXT:    kmovd %k0, %eax
23465; VLX-NEXT:    vzeroupper
23466; VLX-NEXT:    retq
23467;
23468; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
23469; NoVLX:       # %bb.0: # %entry
23470; NoVLX-NEXT:    kmovw %edi, %k1
23471; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23472; NoVLX-NEXT:    kmovw %k0, %eax
23473; NoVLX-NEXT:    vzeroupper
23474; NoVLX-NEXT:    retq
23475entry:
23476  %0 = bitcast <8 x i64> %__a to <8 x double>
23477  %1 = bitcast <8 x i64> %__b to <8 x double>
23478  %2 = fcmp oeq <8 x double> %0, %1
23479  %3 = bitcast i8 %__u to <8 x i1>
23480  %4 = and <8 x i1> %2, %3
23481  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23482  %6 = bitcast <32 x i1> %5 to i32
23483  ret i32 %6
23484}
23485
23486define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23487; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23488; VLX:       # %bb.0: # %entry
23489; VLX-NEXT:    kmovd %edi, %k1
23490; VLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23491; VLX-NEXT:    kmovd %k0, %eax
23492; VLX-NEXT:    vzeroupper
23493; VLX-NEXT:    retq
23494;
23495; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23496; NoVLX:       # %bb.0: # %entry
23497; NoVLX-NEXT:    kmovw %edi, %k1
23498; NoVLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23499; NoVLX-NEXT:    kmovw %k0, %eax
23500; NoVLX-NEXT:    vzeroupper
23501; NoVLX-NEXT:    retq
23502entry:
23503  %0 = bitcast <8 x i64> %__a to <8 x double>
23504  %load = load <8 x i64>, <8 x i64>* %__b
23505  %1 = bitcast <8 x i64> %load to <8 x double>
23506  %2 = fcmp oeq <8 x double> %0, %1
23507  %3 = bitcast i8 %__u to <8 x i1>
23508  %4 = and <8 x i1> %2, %3
23509  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23510  %6 = bitcast <32 x i1> %5 to i32
23511  ret i32 %6
23512}
23513
23514define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23515; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23516; VLX:       # %bb.0: # %entry
23517; VLX-NEXT:    kmovd %edi, %k1
23518; VLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23519; VLX-NEXT:    kmovd %k0, %eax
23520; VLX-NEXT:    vzeroupper
23521; VLX-NEXT:    retq
23522;
23523; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23524; NoVLX:       # %bb.0: # %entry
23525; NoVLX-NEXT:    kmovw %edi, %k1
23526; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23527; NoVLX-NEXT:    kmovw %k0, %eax
23528; NoVLX-NEXT:    vzeroupper
23529; NoVLX-NEXT:    retq
23530entry:
23531  %0 = bitcast <8 x i64> %__a to <8 x double>
23532  %load = load double, double* %__b
23533  %vec = insertelement <8 x double> undef, double %load, i32 0
23534  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23535  %2 = fcmp oeq <8 x double> %0, %1
23536  %3 = bitcast i8 %__u to <8 x i1>
23537  %4 = and <8 x i1> %2, %3
23538  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23539  %6 = bitcast <32 x i1> %5 to i32
23540  ret i32 %6
23541}
23542
23543
23544
23545define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23546; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23547; VLX:       # %bb.0: # %entry
23548; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23549; VLX-NEXT:    kmovb %k0, %eax
23550; VLX-NEXT:    vzeroupper
23551; VLX-NEXT:    retq
23552;
23553; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23554; NoVLX:       # %bb.0: # %entry
23555; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23556; NoVLX-NEXT:    kmovw %k0, %eax
23557; NoVLX-NEXT:    movzbl %al, %eax
23558; NoVLX-NEXT:    vzeroupper
23559; NoVLX-NEXT:    retq
23560entry:
23561  %0 = bitcast <8 x i64> %__a to <8 x double>
23562  %1 = bitcast <8 x i64> %__b to <8 x double>
23563  %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23564  %3 = bitcast <8 x i1> %2 to i8
23565  %4 = zext i8 %3 to i32
23566  ret i32 %4
23567}
23568
23569define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23570; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23571; VLX:       # %bb.0: # %entry
23572; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23573; VLX-NEXT:    kmovd %k0, %eax
23574; VLX-NEXT:    andb %dil, %al
23575; VLX-NEXT:    movzbl %al, %eax
23576; VLX-NEXT:    vzeroupper
23577; VLX-NEXT:    retq
23578;
23579; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23580; NoVLX:       # %bb.0: # %entry
23581; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23582; NoVLX-NEXT:    kmovw %k0, %eax
23583; NoVLX-NEXT:    andb %dil, %al
23584; NoVLX-NEXT:    movzbl %al, %eax
23585; NoVLX-NEXT:    vzeroupper
23586; NoVLX-NEXT:    retq
23587entry:
23588  %0 = bitcast <8 x i64> %__a to <8 x double>
23589  %1 = bitcast <8 x i64> %__b to <8 x double>
23590  %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23591  %3 = bitcast i8 %__u to <8 x i1>
23592  %4 = and <8 x i1> %2, %3
23593  %5 = bitcast <8 x i1> %4 to i8
23594  %6 = zext i8 %5 to i32
23595  ret i32 %6
23596}
23597
23598
23599
23600define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23601; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23602; VLX:       # %bb.0: # %entry
23603; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23604; VLX-NEXT:    kmovq %k0, %rax
23605; VLX-NEXT:    vzeroupper
23606; VLX-NEXT:    retq
23607;
23608; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23609; NoVLX:       # %bb.0: # %entry
23610; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
23611; NoVLX-NEXT:    kmovw %k0, %eax
23612; NoVLX-NEXT:    movzwl %ax, %eax
23613; NoVLX-NEXT:    vzeroupper
23614; NoVLX-NEXT:    retq
23615entry:
23616  %0 = bitcast <8 x i64> %__a to <8 x double>
23617  %1 = bitcast <8 x i64> %__b to <8 x double>
23618  %2 = fcmp oeq <8 x double> %0, %1
23619  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23620  %4 = bitcast <64 x i1> %3 to i64
23621  ret i64 %4
23622}
23623
23624define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23625; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23626; VLX:       # %bb.0: # %entry
23627; VLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23628; VLX-NEXT:    kmovq %k0, %rax
23629; VLX-NEXT:    vzeroupper
23630; VLX-NEXT:    retq
23631;
23632; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23633; NoVLX:       # %bb.0: # %entry
23634; NoVLX-NEXT:    vcmpeqpd (%rdi), %zmm0, %k0
23635; NoVLX-NEXT:    kmovw %k0, %eax
23636; NoVLX-NEXT:    movzwl %ax, %eax
23637; NoVLX-NEXT:    vzeroupper
23638; NoVLX-NEXT:    retq
23639entry:
23640  %0 = bitcast <8 x i64> %__a to <8 x double>
23641  %load = load <8 x i64>, <8 x i64>* %__b
23642  %1 = bitcast <8 x i64> %load to <8 x double>
23643  %2 = fcmp oeq <8 x double> %0, %1
23644  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23645  %4 = bitcast <64 x i1> %3 to i64
23646  ret i64 %4
23647}
23648
23649define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23650; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23651; VLX:       # %bb.0: # %entry
23652; VLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23653; VLX-NEXT:    kmovq %k0, %rax
23654; VLX-NEXT:    vzeroupper
23655; VLX-NEXT:    retq
23656;
23657; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23658; NoVLX:       # %bb.0: # %entry
23659; NoVLX-NEXT:    vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23660; NoVLX-NEXT:    kmovw %k0, %eax
23661; NoVLX-NEXT:    movzwl %ax, %eax
23662; NoVLX-NEXT:    vzeroupper
23663; NoVLX-NEXT:    retq
23664entry:
23665  %0 = bitcast <8 x i64> %__a to <8 x double>
23666  %load = load double, double* %__b
23667  %vec = insertelement <8 x double> undef, double %load, i32 0
23668  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23669  %2 = fcmp oeq <8 x double> %0, %1
23670  %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23671  %4 = bitcast <64 x i1> %3 to i64
23672  ret i64 %4
23673}
23674
23675define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23676; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23677; VLX:       # %bb.0: # %entry
23678; VLX-NEXT:    kmovd %edi, %k1
23679; VLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23680; VLX-NEXT:    kmovq %k0, %rax
23681; VLX-NEXT:    vzeroupper
23682; VLX-NEXT:    retq
23683;
23684; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23685; NoVLX:       # %bb.0: # %entry
23686; NoVLX-NEXT:    kmovw %edi, %k1
23687; NoVLX-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23688; NoVLX-NEXT:    kmovw %k0, %eax
23689; NoVLX-NEXT:    movzwl %ax, %eax
23690; NoVLX-NEXT:    vzeroupper
23691; NoVLX-NEXT:    retq
23692entry:
23693  %0 = bitcast <8 x i64> %__a to <8 x double>
23694  %1 = bitcast <8 x i64> %__b to <8 x double>
23695  %2 = fcmp oeq <8 x double> %0, %1
23696  %3 = bitcast i8 %__u to <8 x i1>
23697  %4 = and <8 x i1> %2, %3
23698  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23699  %6 = bitcast <64 x i1> %5 to i64
23700  ret i64 %6
23701}
23702
23703define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23704; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23705; VLX:       # %bb.0: # %entry
23706; VLX-NEXT:    kmovd %edi, %k1
23707; VLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23708; VLX-NEXT:    kmovq %k0, %rax
23709; VLX-NEXT:    vzeroupper
23710; VLX-NEXT:    retq
23711;
23712; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23713; NoVLX:       # %bb.0: # %entry
23714; NoVLX-NEXT:    kmovw %edi, %k1
23715; NoVLX-NEXT:    vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23716; NoVLX-NEXT:    kmovw %k0, %eax
23717; NoVLX-NEXT:    movzwl %ax, %eax
23718; NoVLX-NEXT:    vzeroupper
23719; NoVLX-NEXT:    retq
23720entry:
23721  %0 = bitcast <8 x i64> %__a to <8 x double>
23722  %load = load <8 x i64>, <8 x i64>* %__b
23723  %1 = bitcast <8 x i64> %load to <8 x double>
23724  %2 = fcmp oeq <8 x double> %0, %1
23725  %3 = bitcast i8 %__u to <8 x i1>
23726  %4 = and <8 x i1> %2, %3
23727  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23728  %6 = bitcast <64 x i1> %5 to i64
23729  ret i64 %6
23730}
23731
23732define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23733; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23734; VLX:       # %bb.0: # %entry
23735; VLX-NEXT:    kmovd %edi, %k1
23736; VLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23737; VLX-NEXT:    kmovq %k0, %rax
23738; VLX-NEXT:    vzeroupper
23739; VLX-NEXT:    retq
23740;
23741; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23742; NoVLX:       # %bb.0: # %entry
23743; NoVLX-NEXT:    kmovw %edi, %k1
23744; NoVLX-NEXT:    vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23745; NoVLX-NEXT:    kmovw %k0, %eax
23746; NoVLX-NEXT:    movzwl %ax, %eax
23747; NoVLX-NEXT:    vzeroupper
23748; NoVLX-NEXT:    retq
23749entry:
23750  %0 = bitcast <8 x i64> %__a to <8 x double>
23751  %load = load double, double* %__b
23752  %vec = insertelement <8 x double> undef, double %load, i32 0
23753  %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23754  %2 = fcmp oeq <8 x double> %0, %1
23755  %3 = bitcast i8 %__u to <8 x i1>
23756  %4 = and <8 x i1> %2, %3
23757  %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23758  %6 = bitcast <64 x i1> %5 to i64
23759  ret i64 %6
23760}
23761
23762
23763
23764define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23765; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23766; VLX:       # %bb.0: # %entry
23767; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23768; VLX-NEXT:    kmovd %k0, %eax
23769; VLX-NEXT:    movzbl %al, %eax
23770; VLX-NEXT:    vzeroupper
23771; VLX-NEXT:    retq
23772;
23773; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23774; NoVLX:       # %bb.0: # %entry
23775; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23776; NoVLX-NEXT:    kmovw %k0, %eax
23777; NoVLX-NEXT:    movzbl %al, %eax
23778; NoVLX-NEXT:    vzeroupper
23779; NoVLX-NEXT:    retq
23780entry:
23781  %0 = bitcast <8 x i64> %__a to <8 x double>
23782  %1 = bitcast <8 x i64> %__b to <8 x double>
23783  %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23784  %3 = bitcast <8 x i1> %2 to i8
23785  %4 = zext i8 %3 to i64
23786  ret i64 %4
23787}
23788
23789define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23790; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23791; VLX:       # %bb.0: # %entry
23792; VLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23793; VLX-NEXT:    kmovd %k0, %eax
23794; VLX-NEXT:    andb %dil, %al
23795; VLX-NEXT:    movzbl %al, %eax
23796; VLX-NEXT:    vzeroupper
23797; VLX-NEXT:    retq
23798;
23799; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23800; NoVLX:       # %bb.0: # %entry
23801; NoVLX-NEXT:    vcmplepd {sae}, %zmm1, %zmm0, %k0
23802; NoVLX-NEXT:    kmovw %k0, %eax
23803; NoVLX-NEXT:    andb %dil, %al
23804; NoVLX-NEXT:    movzbl %al, %eax
23805; NoVLX-NEXT:    vzeroupper
23806; NoVLX-NEXT:    retq
23807entry:
23808  %0 = bitcast <8 x i64> %__a to <8 x double>
23809  %1 = bitcast <8 x i64> %__b to <8 x double>
23810  %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23811  %3 = bitcast i8 %__u to <8 x i1>
23812  %4 = and <8 x i1> %2, %3
23813  %5 = bitcast <8 x i1> %4 to i8
23814  %6 = zext i8 %5 to i64
23815  ret i64 %6
23816}
23817
23818; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
23819define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
23820; VLX-LABEL: test_cmpm_rnd_zero:
23821; VLX:       # %bb.0:
23822; VLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
23823; VLX-NEXT:    kmovd %k0, %eax
23824; VLX-NEXT:    vzeroupper
23825; VLX-NEXT:    retq
23826;
23827; NoVLX-LABEL: test_cmpm_rnd_zero:
23828; NoVLX:       # %bb.0:
23829; NoVLX-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
23830; NoVLX-NEXT:    kmovw %k0, %eax
23831; NoVLX-NEXT:    vzeroupper
23832; NoVLX-NEXT:    retq
23833  %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
23834  %1 = bitcast <16 x i1> %res to i16
23835  %cast = bitcast i16 %1 to <16 x i1>
23836  %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23837  %cast2 = bitcast <32 x i1> %shuffle to i32
23838  ret i32 %cast2
23839}
23840
23841define i8 @mask_zero_lower(<4 x i32> %a) {
23842; VLX-LABEL: mask_zero_lower:
23843; VLX:       # %bb.0:
23844; VLX-NEXT:    vptestmd %xmm0, %xmm0, %k0
23845; VLX-NEXT:    kshiftlb $4, %k0, %k0
23846; VLX-NEXT:    kmovd %k0, %eax
23847; VLX-NEXT:    # kill: def $al killed $al killed $eax
23848; VLX-NEXT:    retq
23849;
23850; NoVLX-LABEL: mask_zero_lower:
23851; NoVLX:       # %bb.0:
23852; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
23853; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
23854; NoVLX-NEXT:    kshiftlw $12, %k0, %k0
23855; NoVLX-NEXT:    kshiftrw $8, %k0, %k0
23856; NoVLX-NEXT:    kmovw %k0, %eax
23857; NoVLX-NEXT:    # kill: def $al killed $al killed $eax
23858; NoVLX-NEXT:    vzeroupper
23859; NoVLX-NEXT:    retq
23860  %cmp = icmp ne <4 x i32> %a, zeroinitializer
23861  %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
23862  %cast = bitcast <8 x i1> %concat to i8
23863  ret i8 %cast
23864}
23865