• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512F
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X86,X86-AVX512BW
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512BW
6
7define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind {
8; X86-LABEL: select00:
9; X86:       # %bb.0:
10; X86-NEXT:    cmpl $255, {{[0-9]+}}(%esp)
11; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
12; X86-NEXT:    je .LBB0_2
13; X86-NEXT:  # %bb.1:
14; X86-NEXT:    vmovdqa64 %zmm0, %zmm1
15; X86-NEXT:  .LBB0_2:
16; X86-NEXT:    vpxord %zmm1, %zmm0, %zmm0
17; X86-NEXT:    retl
18;
19; X64-LABEL: select00:
20; X64:       # %bb.0:
21; X64-NEXT:    cmpl $255, %edi
22; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23; X64-NEXT:    je .LBB0_2
24; X64-NEXT:  # %bb.1:
25; X64-NEXT:    vmovdqa64 %zmm0, %zmm1
26; X64-NEXT:  .LBB0_2:
27; X64-NEXT:    vpxord %zmm1, %zmm0, %zmm0
28; X64-NEXT:    retq
29  %cmpres = icmp eq i32 %a, 255
30  %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b
31  %res = xor <16 x i32> %b, %selres
32  ret <16 x i32> %res
33}
34
35define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
36; X86-LABEL: select01:
37; X86:       # %bb.0:
38; X86-NEXT:    cmpl $255, {{[0-9]+}}(%esp)
39; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
40; X86-NEXT:    je .LBB1_2
41; X86-NEXT:  # %bb.1:
42; X86-NEXT:    vmovdqa64 %zmm0, %zmm1
43; X86-NEXT:  .LBB1_2:
44; X86-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
45; X86-NEXT:    retl
46;
47; X64-LABEL: select01:
48; X64:       # %bb.0:
49; X64-NEXT:    cmpl $255, %edi
50; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
51; X64-NEXT:    je .LBB1_2
52; X64-NEXT:  # %bb.1:
53; X64-NEXT:    vmovdqa64 %zmm0, %zmm1
54; X64-NEXT:  .LBB1_2:
55; X64-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
56; X64-NEXT:    retq
57  %cmpres = icmp eq i32 %a, 255
58  %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
59  %res = xor <8 x i64> %b, %selres
60  ret <8 x i64> %res
61}
62
63define float @select02(float %a, float %b, float %c, float %eps) {
64; X86-LABEL: select02:
65; X86:       # %bb.0:
66; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
67; X86-NEXT:    vucomiss {{[0-9]+}}(%esp), %xmm0
68; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
69; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
70; X86-NEXT:    cmovael %eax, %ecx
71; X86-NEXT:    flds (%ecx)
72; X86-NEXT:    retl
73;
74; X64-LABEL: select02:
75; X64:       # %bb.0:
76; X64-NEXT:    vcmpless %xmm0, %xmm3, %k1
77; X64-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
78; X64-NEXT:    vmovaps %xmm1, %xmm0
79; X64-NEXT:    retq
80  %cmp = fcmp oge float %a, %eps
81  %cond = select i1 %cmp, float %c, float %b
82  ret float %cond
83}
84
85define double @select03(double %a, double %b, double %c, double %eps) {
86; X86-LABEL: select03:
87; X86:       # %bb.0:
88; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
89; X86-NEXT:    vucomisd {{[0-9]+}}(%esp), %xmm0
90; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
91; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
92; X86-NEXT:    cmovael %eax, %ecx
93; X86-NEXT:    fldl (%ecx)
94; X86-NEXT:    retl
95;
96; X64-LABEL: select03:
97; X64:       # %bb.0:
98; X64-NEXT:    vcmplesd %xmm0, %xmm3, %k1
99; X64-NEXT:    vmovsd %xmm2, %xmm1, %xmm1 {%k1}
100; X64-NEXT:    vmovapd %xmm1, %xmm0
101; X64-NEXT:    retq
102  %cmp = fcmp oge double %a, %eps
103  %cond = select i1 %cmp, double %c, double %b
104  ret double %cond
105}
106
107define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
108; X86-LABEL: select04:
109; X86:       # %bb.0:
110; X86-NEXT:    pushl %ebp
111; X86-NEXT:    .cfi_def_cfa_offset 8
112; X86-NEXT:    .cfi_offset %ebp, -8
113; X86-NEXT:    movl %esp, %ebp
114; X86-NEXT:    .cfi_def_cfa_register %ebp
115; X86-NEXT:    andl $-64, %esp
116; X86-NEXT:    subl $64, %esp
117; X86-NEXT:    vmovaps 8(%ebp), %zmm1
118; X86-NEXT:    movl %ebp, %esp
119; X86-NEXT:    popl %ebp
120; X86-NEXT:    .cfi_def_cfa %esp, 4
121; X86-NEXT:    retl
122;
123; X64-LABEL: select04:
124; X64:       # %bb.0:
125; X64-NEXT:    vmovaps %zmm3, %zmm1
126; X64-NEXT:    retq
127  %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
128  ret <16 x double> %sel
129}
130
131define i8 @select05(i8 %a.0, i8 %m) {
132; X86-LABEL: select05:
133; X86:       # %bb.0:
134; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
135; X86-NEXT:    orb {{[0-9]+}}(%esp), %al
136; X86-NEXT:    retl
137;
138; X64-LABEL: select05:
139; X64:       # %bb.0:
140; X64-NEXT:    movl %edi, %eax
141; X64-NEXT:    orl %esi, %eax
142; X64-NEXT:    # kill: def $al killed $al killed $eax
143; X64-NEXT:    retq
144  %mask = bitcast i8 %m to <8 x i1>
145  %a = bitcast i8 %a.0 to <8 x i1>
146  %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
147  %res = bitcast <8 x i1> %r to i8
148  ret i8 %res;
149}
150
151define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
152; X86-AVX512F-LABEL: select05_mem:
153; X86-AVX512F:       # %bb.0:
154; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
155; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
156; X86-AVX512F-NEXT:    kmovw (%ecx), %k0
157; X86-AVX512F-NEXT:    kmovw (%eax), %k1
158; X86-AVX512F-NEXT:    korw %k1, %k0, %k0
159; X86-AVX512F-NEXT:    kmovw %k0, %eax
160; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
161; X86-AVX512F-NEXT:    retl
162;
163; X64-AVX512F-LABEL: select05_mem:
164; X64-AVX512F:       # %bb.0:
165; X64-AVX512F-NEXT:    kmovw (%rsi), %k0
166; X64-AVX512F-NEXT:    kmovw (%rdi), %k1
167; X64-AVX512F-NEXT:    korw %k1, %k0, %k0
168; X64-AVX512F-NEXT:    kmovw %k0, %eax
169; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
170; X64-AVX512F-NEXT:    retq
171;
172; X86-AVX512BW-LABEL: select05_mem:
173; X86-AVX512BW:       # %bb.0:
174; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
175; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
176; X86-AVX512BW-NEXT:    kmovw (%ecx), %k0
177; X86-AVX512BW-NEXT:    kmovw (%eax), %k1
178; X86-AVX512BW-NEXT:    korw %k1, %k0, %k0
179; X86-AVX512BW-NEXT:    kmovd %k0, %eax
180; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
181; X86-AVX512BW-NEXT:    retl
182;
183; X64-AVX512BW-LABEL: select05_mem:
184; X64-AVX512BW:       # %bb.0:
185; X64-AVX512BW-NEXT:    kmovw (%rsi), %k0
186; X64-AVX512BW-NEXT:    kmovw (%rdi), %k1
187; X64-AVX512BW-NEXT:    korw %k1, %k0, %k0
188; X64-AVX512BW-NEXT:    kmovd %k0, %eax
189; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
190; X64-AVX512BW-NEXT:    retq
191  %mask = load <8 x i1> , <8 x i1>* %m
192  %a = load <8 x i1> , <8 x i1>* %a.0
193  %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
194  %res = bitcast <8 x i1> %r to i8
195  ret i8 %res;
196}
197
198define i8 @select06(i8 %a.0, i8 %m) {
199; X86-LABEL: select06:
200; X86:       # %bb.0:
201; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
202; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
203; X86-NEXT:    retl
204;
205; X64-LABEL: select06:
206; X64:       # %bb.0:
207; X64-NEXT:    movl %edi, %eax
208; X64-NEXT:    andl %esi, %eax
209; X64-NEXT:    # kill: def $al killed $al killed $eax
210; X64-NEXT:    retq
211  %mask = bitcast i8 %m to <8 x i1>
212  %a = bitcast i8 %a.0 to <8 x i1>
213  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
214  %res = bitcast <8 x i1> %r to i8
215  ret i8 %res;
216}
217
218define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
219; X86-AVX512F-LABEL: select06_mem:
220; X86-AVX512F:       # %bb.0:
221; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
222; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
223; X86-AVX512F-NEXT:    kmovw (%ecx), %k0
224; X86-AVX512F-NEXT:    kmovw (%eax), %k1
225; X86-AVX512F-NEXT:    kandw %k1, %k0, %k0
226; X86-AVX512F-NEXT:    kmovw %k0, %eax
227; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
228; X86-AVX512F-NEXT:    retl
229;
230; X64-AVX512F-LABEL: select06_mem:
231; X64-AVX512F:       # %bb.0:
232; X64-AVX512F-NEXT:    kmovw (%rsi), %k0
233; X64-AVX512F-NEXT:    kmovw (%rdi), %k1
234; X64-AVX512F-NEXT:    kandw %k1, %k0, %k0
235; X64-AVX512F-NEXT:    kmovw %k0, %eax
236; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
237; X64-AVX512F-NEXT:    retq
238;
239; X86-AVX512BW-LABEL: select06_mem:
240; X86-AVX512BW:       # %bb.0:
241; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
242; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
243; X86-AVX512BW-NEXT:    kmovw (%ecx), %k0
244; X86-AVX512BW-NEXT:    kmovw (%eax), %k1
245; X86-AVX512BW-NEXT:    kandw %k1, %k0, %k0
246; X86-AVX512BW-NEXT:    kmovd %k0, %eax
247; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
248; X86-AVX512BW-NEXT:    retl
249;
250; X64-AVX512BW-LABEL: select06_mem:
251; X64-AVX512BW:       # %bb.0:
252; X64-AVX512BW-NEXT:    kmovw (%rsi), %k0
253; X64-AVX512BW-NEXT:    kmovw (%rdi), %k1
254; X64-AVX512BW-NEXT:    kandw %k1, %k0, %k0
255; X64-AVX512BW-NEXT:    kmovd %k0, %eax
256; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
257; X64-AVX512BW-NEXT:    retq
258  %mask = load <8 x i1> , <8 x i1>* %m
259  %a = load <8 x i1> , <8 x i1>* %a.0
260  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
261  %res = bitcast <8 x i1> %r to i8
262  ret i8 %res;
263}
264define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
265; X86-AVX512F-LABEL: select07:
266; X86-AVX512F:       # %bb.0:
267; X86-AVX512F-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
268; X86-AVX512F-NEXT:    kmovw %eax, %k0
269; X86-AVX512F-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
270; X86-AVX512F-NEXT:    kmovw %eax, %k1
271; X86-AVX512F-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
272; X86-AVX512F-NEXT:    kmovw %eax, %k2
273; X86-AVX512F-NEXT:    kandnw %k2, %k0, %k2
274; X86-AVX512F-NEXT:    kandw %k0, %k1, %k0
275; X86-AVX512F-NEXT:    korw %k2, %k0, %k0
276; X86-AVX512F-NEXT:    kmovw %k0, %eax
277; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
278; X86-AVX512F-NEXT:    retl
279;
280; X64-AVX512F-LABEL: select07:
281; X64-AVX512F:       # %bb.0:
282; X64-AVX512F-NEXT:    kmovw %edx, %k0
283; X64-AVX512F-NEXT:    kmovw %edi, %k1
284; X64-AVX512F-NEXT:    kmovw %esi, %k2
285; X64-AVX512F-NEXT:    kandnw %k2, %k0, %k2
286; X64-AVX512F-NEXT:    kandw %k0, %k1, %k0
287; X64-AVX512F-NEXT:    korw %k2, %k0, %k0
288; X64-AVX512F-NEXT:    kmovw %k0, %eax
289; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
290; X64-AVX512F-NEXT:    retq
291;
292; X86-AVX512BW-LABEL: select07:
293; X86-AVX512BW:       # %bb.0:
294; X86-AVX512BW-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
295; X86-AVX512BW-NEXT:    kmovd %eax, %k0
296; X86-AVX512BW-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
297; X86-AVX512BW-NEXT:    kmovd %eax, %k1
298; X86-AVX512BW-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
299; X86-AVX512BW-NEXT:    kmovd %eax, %k2
300; X86-AVX512BW-NEXT:    kandnw %k2, %k0, %k2
301; X86-AVX512BW-NEXT:    kandw %k0, %k1, %k0
302; X86-AVX512BW-NEXT:    korw %k2, %k0, %k0
303; X86-AVX512BW-NEXT:    kmovd %k0, %eax
304; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
305; X86-AVX512BW-NEXT:    retl
306;
307; X64-AVX512BW-LABEL: select07:
308; X64-AVX512BW:       # %bb.0:
309; X64-AVX512BW-NEXT:    kmovd %edx, %k0
310; X64-AVX512BW-NEXT:    kmovd %edi, %k1
311; X64-AVX512BW-NEXT:    kmovd %esi, %k2
312; X64-AVX512BW-NEXT:    kandnw %k2, %k0, %k2
313; X64-AVX512BW-NEXT:    kandw %k0, %k1, %k0
314; X64-AVX512BW-NEXT:    korw %k2, %k0, %k0
315; X64-AVX512BW-NEXT:    kmovd %k0, %eax
316; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
317; X64-AVX512BW-NEXT:    retq
318  %mask = bitcast i8 %m to <8 x i1>
319  %a = bitcast i8 %a.0 to <8 x i1>
320  %b = bitcast i8 %b.0 to <8 x i1>
321  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> %b
322  %res = bitcast <8 x i1> %r to i8
323  ret i8 %res;
324}
325
326define i64 @pr30249() {
327; X86-LABEL: pr30249:
328; X86:       # %bb.0:
329; X86-NEXT:    movl $1, %eax
330; X86-NEXT:    xorl %edx, %edx
331; X86-NEXT:    retl
332;
333; X64-LABEL: pr30249:
334; X64:       # %bb.0:
335; X64-NEXT:    movl $1, %eax
336; X64-NEXT:    retq
337  %v = select i1 undef , i64 1, i64 2
338  ret i64 %v
339}
340
341define double @pr30561_f64(double %b, double %a, i1 %c) {
342; X86-LABEL: pr30561_f64:
343; X86:       # %bb.0:
344; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
345; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
346; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
347; X86-NEXT:    cmovnel %eax, %ecx
348; X86-NEXT:    fldl (%ecx)
349; X86-NEXT:    retl
350;
351; X64-AVX512F-LABEL: pr30561_f64:
352; X64-AVX512F:       # %bb.0:
353; X64-AVX512F-NEXT:    kmovw %edi, %k1
354; X64-AVX512F-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
355; X64-AVX512F-NEXT:    retq
356;
357; X64-AVX512BW-LABEL: pr30561_f64:
358; X64-AVX512BW:       # %bb.0:
359; X64-AVX512BW-NEXT:    kmovd %edi, %k1
360; X64-AVX512BW-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
361; X64-AVX512BW-NEXT:    retq
362  %cond = select i1 %c, double %a, double %b
363  ret double %cond
364}
365
366define float @pr30561_f32(float %b, float %a, i1 %c) {
367; X86-LABEL: pr30561_f32:
368; X86:       # %bb.0:
369; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
370; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
371; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
372; X86-NEXT:    cmovnel %eax, %ecx
373; X86-NEXT:    flds (%ecx)
374; X86-NEXT:    retl
375;
376; X64-AVX512F-LABEL: pr30561_f32:
377; X64-AVX512F:       # %bb.0:
378; X64-AVX512F-NEXT:    kmovw %edi, %k1
379; X64-AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
380; X64-AVX512F-NEXT:    retq
381;
382; X64-AVX512BW-LABEL: pr30561_f32:
383; X64-AVX512BW:       # %bb.0:
384; X64-AVX512BW-NEXT:    kmovd %edi, %k1
385; X64-AVX512BW-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
386; X64-AVX512BW-NEXT:    retq
387  %cond = select i1 %c, float %a, float %b
388  ret float %cond
389}
390
391define <16 x i16> @pr31515(<16 x i1> %a, <16 x i1> %b, <16 x i16> %c) nounwind {
392; X86-AVX512F-LABEL: pr31515:
393; X86-AVX512F:       # %bb.0:
394; X86-AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
395; X86-AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
396; X86-AVX512F-NEXT:    vpsllw $15, %ymm0, %ymm0
397; X86-AVX512F-NEXT:    vpsraw $15, %ymm0, %ymm0
398; X86-AVX512F-NEXT:    vpandn %ymm2, %ymm0, %ymm0
399; X86-AVX512F-NEXT:    retl
400;
401; X64-AVX512F-LABEL: pr31515:
402; X64-AVX512F:       # %bb.0:
403; X64-AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
404; X64-AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
405; X64-AVX512F-NEXT:    vpsllw $15, %ymm0, %ymm0
406; X64-AVX512F-NEXT:    vpsraw $15, %ymm0, %ymm0
407; X64-AVX512F-NEXT:    vpandn %ymm2, %ymm0, %ymm0
408; X64-AVX512F-NEXT:    retq
409;
410; X86-AVX512BW-LABEL: pr31515:
411; X86-AVX512BW:       # %bb.0:
412; X86-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
413; X86-AVX512BW-NEXT:    vpand %xmm1, %xmm0, %xmm0
414; X86-AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
415; X86-AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
416; X86-AVX512BW-NEXT:    knotw %k0, %k1
417; X86-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1} {z}
418; X86-AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
419; X86-AVX512BW-NEXT:    retl
420;
421; X64-AVX512BW-LABEL: pr31515:
422; X64-AVX512BW:       # %bb.0:
423; X64-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
424; X64-AVX512BW-NEXT:    vpand %xmm1, %xmm0, %xmm0
425; X64-AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
426; X64-AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
427; X64-AVX512BW-NEXT:    knotw %k0, %k1
428; X64-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1} {z}
429; X64-AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
430; X64-AVX512BW-NEXT:    retq
431  %mask = and <16 x i1> %a, %b
432  %res = select <16 x i1> %mask, <16 x i16> zeroinitializer, <16 x i16> %c
433  ret <16 x i16> %res
434}
435
436define <32 x i16> @pr42355_v32i16(i1 %c, <32 x i16> %x, <32 x i16> %y) {
437; X86-LABEL: pr42355_v32i16:
438; X86:       # %bb.0:
439; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
440; X86-NEXT:    jne .LBB14_2
441; X86-NEXT:  # %bb.1:
442; X86-NEXT:    vmovaps %zmm1, %zmm0
443; X86-NEXT:  .LBB14_2:
444; X86-NEXT:    retl
445;
446; X64-LABEL: pr42355_v32i16:
447; X64:       # %bb.0:
448; X64-NEXT:    testb $1, %dil
449; X64-NEXT:    jne .LBB14_2
450; X64-NEXT:  # %bb.1:
451; X64-NEXT:    vmovaps %zmm1, %zmm0
452; X64-NEXT:  .LBB14_2:
453; X64-NEXT:    retq
454  %a = select i1 %c, <32 x i16> %x, <32 x i16> %y
455  ret <32 x i16> %a
456}
457
458define <64 x i8> @pr42355_v64i8(i1 %c, <64 x i8> %x, <64 x i8> %y) {
459; X86-LABEL: pr42355_v64i8:
460; X86:       # %bb.0:
461; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
462; X86-NEXT:    jne .LBB15_2
463; X86-NEXT:  # %bb.1:
464; X86-NEXT:    vmovaps %zmm1, %zmm0
465; X86-NEXT:  .LBB15_2:
466; X86-NEXT:    retl
467;
468; X64-LABEL: pr42355_v64i8:
469; X64:       # %bb.0:
470; X64-NEXT:    testb $1, %dil
471; X64-NEXT:    jne .LBB15_2
472; X64-NEXT:  # %bb.1:
473; X64-NEXT:    vmovaps %zmm1, %zmm0
474; X64-NEXT:  .LBB15_2:
475; X64-NEXT:    retq
476  %a = select i1 %c, <64 x i8> %x, <64 x i8> %y
477  ret <64 x i8> %a
478}
479
480; This would crash because AVX512 has legal vector select
481; condition values that are not 256/512-bit vectors.
482
483define <16 x i64> @narrowExtractedVectorSelect_crash(<16 x i64> %arg, <16 x i16> %arg1) #0 {
484; X86-AVX512F-LABEL: narrowExtractedVectorSelect_crash:
485; X86-AVX512F:       # %bb.0:
486; X86-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
487; X86-AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
488; X86-AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
489; X86-AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
490; X86-AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
491; X86-AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm1
492; X86-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
493; X86-AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
494; X86-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
495; X86-AVX512F-NEXT:    retl
496;
497; X64-AVX512F-LABEL: narrowExtractedVectorSelect_crash:
498; X64-AVX512F:       # %bb.0:
499; X64-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
500; X64-AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
501; X64-AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
502; X64-AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
503; X64-AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
504; X64-AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm1
505; X64-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
506; X64-AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
507; X64-AVX512F-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
508; X64-AVX512F-NEXT:    retq
509;
510; X86-AVX512BW-LABEL: narrowExtractedVectorSelect_crash:
511; X86-AVX512BW:       # %bb.0:
512; X86-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
513; X86-AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
514; X86-AVX512BW-NEXT:    vptestmq %zmm1, %zmm1, %k1
515; X86-AVX512BW-NEXT:    kunpckbw %k0, %k1, %k1
516; X86-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm1 {%k1} {z}
517; X86-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
518; X86-AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm1
519; X86-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
520; X86-AVX512BW-NEXT:    retl
521;
522; X64-AVX512BW-LABEL: narrowExtractedVectorSelect_crash:
523; X64-AVX512BW:       # %bb.0:
524; X64-AVX512BW-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
525; X64-AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
526; X64-AVX512BW-NEXT:    vptestmq %zmm1, %zmm1, %k1
527; X64-AVX512BW-NEXT:    kunpckbw %k0, %k1, %k1
528; X64-AVX512BW-NEXT:    vmovdqu16 %zmm2, %zmm1 {%k1} {z}
529; X64-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
530; X64-AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm1
531; X64-AVX512BW-NEXT:    vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
532; X64-AVX512BW-NEXT:    retq
533  %tmp = icmp ne <16 x i64> %arg, zeroinitializer
534  %tmp2 = select <16 x i1> %tmp, <16 x i16> %arg1, <16 x i16> zeroinitializer
535  %tmp3 = zext <16 x i16> %tmp2 to <16 x i64>
536  ret <16 x i64> %tmp3
537}
538
539define void @vselect_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y) nounwind {
540; X86-AVX512F-LABEL: vselect_v1i1:
541; X86-AVX512F:       # %bb.0:
542; X86-AVX512F-NEXT:    pushl %esi
543; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
544; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
545; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %edx
546; X86-AVX512F-NEXT:    movzbl (%edx), %esi
547; X86-AVX512F-NEXT:    kmovw %esi, %k0
548; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
549; X86-AVX512F-NEXT:    kmovw %ecx, %k1
550; X86-AVX512F-NEXT:    movzbl (%eax), %eax
551; X86-AVX512F-NEXT:    kmovw %eax, %k2
552; X86-AVX512F-NEXT:    kandnw %k1, %k2, %k1
553; X86-AVX512F-NEXT:    kandw %k2, %k0, %k0
554; X86-AVX512F-NEXT:    korw %k1, %k0, %k0
555; X86-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
556; X86-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
557; X86-AVX512F-NEXT:    kmovw %k0, %eax
558; X86-AVX512F-NEXT:    movb %al, (%edx)
559; X86-AVX512F-NEXT:    popl %esi
560; X86-AVX512F-NEXT:    retl
561;
562; X64-AVX512F-LABEL: vselect_v1i1:
563; X64-AVX512F:       # %bb.0:
564; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
565; X64-AVX512F-NEXT:    kmovw %eax, %k0
566; X64-AVX512F-NEXT:    movzbl (%rdx), %eax
567; X64-AVX512F-NEXT:    kmovw %eax, %k1
568; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
569; X64-AVX512F-NEXT:    kmovw %eax, %k2
570; X64-AVX512F-NEXT:    kandnw %k1, %k2, %k1
571; X64-AVX512F-NEXT:    kandw %k2, %k0, %k0
572; X64-AVX512F-NEXT:    korw %k1, %k0, %k0
573; X64-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
574; X64-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
575; X64-AVX512F-NEXT:    kmovw %k0, %eax
576; X64-AVX512F-NEXT:    movb %al, (%rsi)
577; X64-AVX512F-NEXT:    retq
578;
579; X86-AVX512BW-LABEL: vselect_v1i1:
580; X86-AVX512BW:       # %bb.0:
581; X86-AVX512BW-NEXT:    pushl %esi
582; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
583; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
584; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %edx
585; X86-AVX512BW-NEXT:    movzbl (%edx), %esi
586; X86-AVX512BW-NEXT:    kmovd %esi, %k0
587; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
588; X86-AVX512BW-NEXT:    kmovd %ecx, %k1
589; X86-AVX512BW-NEXT:    movzbl (%eax), %eax
590; X86-AVX512BW-NEXT:    kmovd %eax, %k2
591; X86-AVX512BW-NEXT:    kandnw %k1, %k2, %k1
592; X86-AVX512BW-NEXT:    kandw %k2, %k0, %k0
593; X86-AVX512BW-NEXT:    korw %k1, %k0, %k0
594; X86-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
595; X86-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
596; X86-AVX512BW-NEXT:    kmovd %k0, %eax
597; X86-AVX512BW-NEXT:    movb %al, (%edx)
598; X86-AVX512BW-NEXT:    popl %esi
599; X86-AVX512BW-NEXT:    retl
600;
601; X64-AVX512BW-LABEL: vselect_v1i1:
602; X64-AVX512BW:       # %bb.0:
603; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
604; X64-AVX512BW-NEXT:    kmovd %eax, %k0
605; X64-AVX512BW-NEXT:    movzbl (%rdx), %eax
606; X64-AVX512BW-NEXT:    kmovd %eax, %k1
607; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
608; X64-AVX512BW-NEXT:    kmovd %eax, %k2
609; X64-AVX512BW-NEXT:    kandnw %k1, %k2, %k1
610; X64-AVX512BW-NEXT:    kandw %k2, %k0, %k0
611; X64-AVX512BW-NEXT:    korw %k1, %k0, %k0
612; X64-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
613; X64-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
614; X64-AVX512BW-NEXT:    kmovd %k0, %eax
615; X64-AVX512BW-NEXT:    movb %al, (%rsi)
616; X64-AVX512BW-NEXT:    retq
617  %a = load <1 x i1>, <1 x i1>* %x
618  %b = load <1 x i1>, <1 x i1>* %y
619  %b2 = load <1 x i1>, <1 x i1>* %w
620  %c = select <1 x i1> %b2, <1 x i1> %a, <1 x i1> %b
621  store <1 x i1> %c, <1 x i1>* %x
622  ret void
623}
624
625; Scalar condition with v1i1 operands
626define void @select_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y, i1 %z) nounwind {
627; X86-AVX512F-LABEL: select_v1i1:
628; X86-AVX512F:       # %bb.0:
629; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
630; X86-AVX512F-NEXT:    testb $1, {{[0-9]+}}(%esp)
631; X86-AVX512F-NEXT:    jne .LBB18_1
632; X86-AVX512F-NEXT:  # %bb.2:
633; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
634; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %edx
635; X86-AVX512F-NEXT:    movzbl (%edx), %edx
636; X86-AVX512F-NEXT:    kmovw %edx, %k0
637; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
638; X86-AVX512F-NEXT:    kmovw %ecx, %k1
639; X86-AVX512F-NEXT:    kxorw %k1, %k0, %k0
640; X86-AVX512F-NEXT:    jmp .LBB18_3
641; X86-AVX512F-NEXT:  .LBB18_1:
642; X86-AVX512F-NEXT:    movzbl (%eax), %ecx
643; X86-AVX512F-NEXT:    kmovw %ecx, %k0
644; X86-AVX512F-NEXT:  .LBB18_3:
645; X86-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
646; X86-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
647; X86-AVX512F-NEXT:    kmovw %k0, %ecx
648; X86-AVX512F-NEXT:    movb %cl, (%eax)
649; X86-AVX512F-NEXT:    retl
650;
651; X64-AVX512F-LABEL: select_v1i1:
652; X64-AVX512F:       # %bb.0:
653; X64-AVX512F-NEXT:    testb $1, %cl
654; X64-AVX512F-NEXT:    jne .LBB18_1
655; X64-AVX512F-NEXT:  # %bb.2:
656; X64-AVX512F-NEXT:    movzbl (%rdx), %eax
657; X64-AVX512F-NEXT:    kmovw %eax, %k0
658; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
659; X64-AVX512F-NEXT:    kmovw %eax, %k1
660; X64-AVX512F-NEXT:    kxorw %k1, %k0, %k0
661; X64-AVX512F-NEXT:    jmp .LBB18_3
662; X64-AVX512F-NEXT:  .LBB18_1:
663; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
664; X64-AVX512F-NEXT:    kmovw %eax, %k0
665; X64-AVX512F-NEXT:  .LBB18_3:
666; X64-AVX512F-NEXT:    kshiftlw $15, %k0, %k0
667; X64-AVX512F-NEXT:    kshiftrw $15, %k0, %k0
668; X64-AVX512F-NEXT:    kmovw %k0, %eax
669; X64-AVX512F-NEXT:    movb %al, (%rsi)
670; X64-AVX512F-NEXT:    retq
671;
672; X86-AVX512BW-LABEL: select_v1i1:
673; X86-AVX512BW:       # %bb.0:
674; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
675; X86-AVX512BW-NEXT:    testb $1, {{[0-9]+}}(%esp)
676; X86-AVX512BW-NEXT:    jne .LBB18_1
677; X86-AVX512BW-NEXT:  # %bb.2:
678; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
679; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %edx
680; X86-AVX512BW-NEXT:    movzbl (%edx), %edx
681; X86-AVX512BW-NEXT:    kmovd %edx, %k0
682; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
683; X86-AVX512BW-NEXT:    kmovd %ecx, %k1
684; X86-AVX512BW-NEXT:    kxorw %k1, %k0, %k0
685; X86-AVX512BW-NEXT:    jmp .LBB18_3
686; X86-AVX512BW-NEXT:  .LBB18_1:
687; X86-AVX512BW-NEXT:    movzbl (%eax), %ecx
688; X86-AVX512BW-NEXT:    kmovd %ecx, %k0
689; X86-AVX512BW-NEXT:  .LBB18_3:
690; X86-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
691; X86-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
692; X86-AVX512BW-NEXT:    kmovd %k0, %ecx
693; X86-AVX512BW-NEXT:    movb %cl, (%eax)
694; X86-AVX512BW-NEXT:    retl
695;
696; X64-AVX512BW-LABEL: select_v1i1:
697; X64-AVX512BW:       # %bb.0:
698; X64-AVX512BW-NEXT:    testb $1, %cl
699; X64-AVX512BW-NEXT:    jne .LBB18_1
700; X64-AVX512BW-NEXT:  # %bb.2:
701; X64-AVX512BW-NEXT:    movzbl (%rdx), %eax
702; X64-AVX512BW-NEXT:    kmovd %eax, %k0
703; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
704; X64-AVX512BW-NEXT:    kmovd %eax, %k1
705; X64-AVX512BW-NEXT:    kxorw %k1, %k0, %k0
706; X64-AVX512BW-NEXT:    jmp .LBB18_3
707; X64-AVX512BW-NEXT:  .LBB18_1:
708; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
709; X64-AVX512BW-NEXT:    kmovd %eax, %k0
710; X64-AVX512BW-NEXT:  .LBB18_3:
711; X64-AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
712; X64-AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
713; X64-AVX512BW-NEXT:    kmovd %k0, %eax
714; X64-AVX512BW-NEXT:    movb %al, (%rsi)
715; X64-AVX512BW-NEXT:    retq
716  %a = load <1 x i1>, <1 x i1>* %x
717  %b = load <1 x i1>, <1 x i1>* %y
718  %b2 = load <1 x i1>, <1 x i1>* %w
719  %b3 = xor <1 x i1> %b, %b2
720  %c = select i1 %z, <1 x i1> %a, <1 x i1> %b3
721  store <1 x i1> %c, <1 x i1>* %x
722  ret void
723}
724
725; Regression test from https://github.com/JuliaLang/julia/issues/36955
726define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
727; X86-AVX512F-LABEL: julia_issue36955:
728; X86-AVX512F:       # %bb.0:
729; X86-AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
730; X86-AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
731; X86-AVX512F-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
732; X86-AVX512F-NEXT:    vcmplepd %zmm2, %zmm1, %k1
733; X86-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
734; X86-AVX512F-NEXT:    korw %k0, %k1, %k0
735; X86-AVX512F-NEXT:    kmovw %k0, %eax
736; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
737; X86-AVX512F-NEXT:    vzeroupper
738; X86-AVX512F-NEXT:    retl
739;
740; X64-AVX512F-LABEL: julia_issue36955:
741; X64-AVX512F:       # %bb.0:
742; X64-AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
743; X64-AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
744; X64-AVX512F-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
745; X64-AVX512F-NEXT:    vcmplepd %zmm2, %zmm1, %k1
746; X64-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
747; X64-AVX512F-NEXT:    korw %k0, %k1, %k0
748; X64-AVX512F-NEXT:    kmovw %k0, %eax
749; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
750; X64-AVX512F-NEXT:    vzeroupper
751; X64-AVX512F-NEXT:    retq
752;
753; X86-AVX512BW-LABEL: julia_issue36955:
754; X86-AVX512BW:       # %bb.0:
755; X86-AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
756; X86-AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
757; X86-AVX512BW-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
758; X86-AVX512BW-NEXT:    vcmplepd %zmm3, %zmm1, %k1
759; X86-AVX512BW-NEXT:    vpcmpgtw %zmm0, %zmm2, %k0 {%k1}
760; X86-AVX512BW-NEXT:    korw %k0, %k1, %k0
761; X86-AVX512BW-NEXT:    kmovd %k0, %eax
762; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
763; X86-AVX512BW-NEXT:    vzeroupper
764; X86-AVX512BW-NEXT:    retl
765;
766; X64-AVX512BW-LABEL: julia_issue36955:
767; X64-AVX512BW:       # %bb.0:
768; X64-AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
769; X64-AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
770; X64-AVX512BW-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
771; X64-AVX512BW-NEXT:    vcmplepd %zmm3, %zmm1, %k1
772; X64-AVX512BW-NEXT:    vpcmpgtw %zmm0, %zmm2, %k0 {%k1}
773; X64-AVX512BW-NEXT:    korw %k0, %k1, %k0
774; X64-AVX512BW-NEXT:    kmovd %k0, %eax
775; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
776; X64-AVX512BW-NEXT:    vzeroupper
777; X64-AVX512BW-NEXT:    retq
778  %fcmp = fcmp ugt <8 x double> %a, zeroinitializer
779  %xor = xor <8 x i1> %fcmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
780  %select1 = select <8 x i1> %fcmp, <8 x i1> zeroinitializer, <8 x i1> %mask
781  %select2 = select <8 x i1> %xor, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %select1
782  %ret = bitcast <8 x i1> %select2 to i8
783  ret i8 %ret
784}
785