• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
4; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
5
6define <16 x i1> @test1() {
7; ALL_X64-LABEL: test1:
8; ALL_X64:       ## %bb.0:
9; ALL_X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
10; ALL_X64-NEXT:    retq
11;
12; KNL_X32-LABEL: test1:
13; KNL_X32:       ## %bb.0:
14; KNL_X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
15; KNL_X32-NEXT:    retl
16  ret <16 x i1> zeroinitializer
17}
18
19define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
20; ALL_X64-LABEL: test2:
21; ALL_X64:       ## %bb.0:
22; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
23; ALL_X64-NEXT:    retq
24;
25; KNL_X32-LABEL: test2:
26; KNL_X32:       ## %bb.0:
27; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
28; KNL_X32-NEXT:    retl
29  %c = and <16 x i1>%a, %b
30  ret <16 x i1> %c
31}
32
33define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
34; ALL_X64-LABEL: test3:
35; ALL_X64:       ## %bb.0:
36; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
37; ALL_X64-NEXT:    retq
38;
39; KNL_X32-LABEL: test3:
40; KNL_X32:       ## %bb.0:
41; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
42; KNL_X32-NEXT:    retl
43  %c = and <8 x i1>%a, %b
44  ret <8 x i1> %c
45}
46
47define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
48; ALL_X64-LABEL: test4:
49; ALL_X64:       ## %bb.0:
50; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
51; ALL_X64-NEXT:    retq
52;
53; KNL_X32-LABEL: test4:
54; KNL_X32:       ## %bb.0:
55; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
56; KNL_X32-NEXT:    retl
57  %c = and <4 x i1>%a, %b
58  ret <4 x i1> %c
59}
60
61declare <8 x i1> @func8xi1(<8 x i1> %a)
62
63define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
64; KNL-LABEL: test5:
65; KNL:       ## %bb.0:
66; KNL-NEXT:    pushq %rax
67; KNL-NEXT:    .cfi_def_cfa_offset 16
68; KNL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
69; KNL-NEXT:    vpmovdw %zmm0, %ymm0
70; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
71; KNL-NEXT:    callq _func8xi1
72; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
73; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
74; KNL-NEXT:    vpsrad $31, %ymm0, %ymm0
75; KNL-NEXT:    popq %rax
76; KNL-NEXT:    retq
77;
78; SKX-LABEL: test5:
79; SKX:       ## %bb.0:
80; SKX-NEXT:    pushq %rax
81; SKX-NEXT:    .cfi_def_cfa_offset 16
82; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
83; SKX-NEXT:    vpmovm2w %k0, %xmm0
84; SKX-NEXT:    vzeroupper
85; SKX-NEXT:    callq _func8xi1
86; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
87; SKX-NEXT:    vpslld $31, %ymm0, %ymm0
88; SKX-NEXT:    vpsrad $31, %ymm0, %ymm0
89; SKX-NEXT:    popq %rax
90; SKX-NEXT:    retq
91;
92; KNL_X32-LABEL: test5:
93; KNL_X32:       ## %bb.0:
94; KNL_X32-NEXT:    subl $12, %esp
95; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
96; KNL_X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
97; KNL_X32-NEXT:    vpmovdw %zmm0, %ymm0
98; KNL_X32-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
99; KNL_X32-NEXT:    calll _func8xi1
100; KNL_X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
101; KNL_X32-NEXT:    vpslld $31, %ymm0, %ymm0
102; KNL_X32-NEXT:    vpsrad $31, %ymm0, %ymm0
103; KNL_X32-NEXT:    addl $12, %esp
104; KNL_X32-NEXT:    retl
105  %cmpRes = icmp sgt <8 x i32>%a, %b
106  %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
107  %res = sext <8 x i1>%resi to <8 x i32>
108  ret <8 x i32> %res
109}
110
111declare <16 x i1> @func16xi1(<16 x i1> %a)
112
113define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
114; KNL-LABEL: test6:
115; KNL:       ## %bb.0:
116; KNL-NEXT:    pushq %rax
117; KNL-NEXT:    .cfi_def_cfa_offset 16
118; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
119; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
120; KNL-NEXT:    vpmovdb %zmm0, %xmm0
121; KNL-NEXT:    callq _func16xi1
122; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
123; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
124; KNL-NEXT:    vpsrad $31, %zmm0, %zmm0
125; KNL-NEXT:    popq %rax
126; KNL-NEXT:    retq
127;
128; SKX-LABEL: test6:
129; SKX:       ## %bb.0:
130; SKX-NEXT:    pushq %rax
131; SKX-NEXT:    .cfi_def_cfa_offset 16
132; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
133; SKX-NEXT:    vpmovm2b %k0, %xmm0
134; SKX-NEXT:    vzeroupper
135; SKX-NEXT:    callq _func16xi1
136; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
137; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
138; SKX-NEXT:    vpsrad $31, %zmm0, %zmm0
139; SKX-NEXT:    popq %rax
140; SKX-NEXT:    retq
141;
142; KNL_X32-LABEL: test6:
143; KNL_X32:       ## %bb.0:
144; KNL_X32-NEXT:    subl $12, %esp
145; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
146; KNL_X32-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
147; KNL_X32-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
148; KNL_X32-NEXT:    vpmovdb %zmm0, %xmm0
149; KNL_X32-NEXT:    calll _func16xi1
150; KNL_X32-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
151; KNL_X32-NEXT:    vpslld $31, %zmm0, %zmm0
152; KNL_X32-NEXT:    vpsrad $31, %zmm0, %zmm0
153; KNL_X32-NEXT:    addl $12, %esp
154; KNL_X32-NEXT:    retl
155  %cmpRes = icmp sgt <16 x i32>%a, %b
156  %resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
157  %res = sext <16 x i1>%resi to <16 x i32>
158  ret <16 x i32> %res
159}
160
161declare <4 x i1> @func4xi1(<4 x i1> %a)
162
163define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
164; ALL_X64-LABEL: test7:
165; ALL_X64:       ## %bb.0:
166; ALL_X64-NEXT:    pushq %rax
167; ALL_X64-NEXT:    .cfi_def_cfa_offset 16
168; ALL_X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
169; ALL_X64-NEXT:    callq _func4xi1
170; ALL_X64-NEXT:    vpslld $31, %xmm0, %xmm0
171; ALL_X64-NEXT:    vpsrad $31, %xmm0, %xmm0
172; ALL_X64-NEXT:    popq %rax
173; ALL_X64-NEXT:    retq
174;
175; KNL_X32-LABEL: test7:
176; KNL_X32:       ## %bb.0:
177; KNL_X32-NEXT:    subl $12, %esp
178; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
179; KNL_X32-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
180; KNL_X32-NEXT:    calll _func4xi1
181; KNL_X32-NEXT:    vpslld $31, %xmm0, %xmm0
182; KNL_X32-NEXT:    vpsrad $31, %xmm0, %xmm0
183; KNL_X32-NEXT:    addl $12, %esp
184; KNL_X32-NEXT:    retl
185  %cmpRes = icmp sgt <4 x i32>%a, %b
186  %resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
187  %res = sext <4 x i1>%resi to <4 x i32>
188  ret <4 x i32> %res
189}
190
191define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
192; KNL-LABEL: test7a:
193; KNL:       ## %bb.0:
194; KNL-NEXT:    pushq %rax
195; KNL-NEXT:    .cfi_def_cfa_offset 16
196; KNL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
197; KNL-NEXT:    vpmovdw %zmm0, %ymm0
198; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
199; KNL-NEXT:    callq _func8xi1
200; KNL-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
201; KNL-NEXT:    popq %rax
202; KNL-NEXT:    retq
203;
204; SKX-LABEL: test7a:
205; SKX:       ## %bb.0:
206; SKX-NEXT:    pushq %rax
207; SKX-NEXT:    .cfi_def_cfa_offset 16
208; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
209; SKX-NEXT:    vpmovm2w %k0, %xmm0
210; SKX-NEXT:    vzeroupper
211; SKX-NEXT:    callq _func8xi1
212; SKX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
213; SKX-NEXT:    popq %rax
214; SKX-NEXT:    retq
215;
216; KNL_X32-LABEL: test7a:
217; KNL_X32:       ## %bb.0:
218; KNL_X32-NEXT:    subl $12, %esp
219; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
220; KNL_X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
221; KNL_X32-NEXT:    vpmovdw %zmm0, %ymm0
222; KNL_X32-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
223; KNL_X32-NEXT:    calll _func8xi1
224; KNL_X32-NEXT:    vandps LCPI7_0, %xmm0, %xmm0
225; KNL_X32-NEXT:    addl $12, %esp
226; KNL_X32-NEXT:    retl
227  %cmpRes = icmp sgt <8 x i32>%a, %b
228  %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
229  %res = and <8 x i1>%resi,  <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
230  ret <8 x i1> %res
231}
232
233define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
234; ALL_X64-LABEL: test8:
235; ALL_X64:       ## %bb.0:
236; ALL_X64-NEXT:    testb $1, %dil
237; ALL_X64-NEXT:    jne LBB8_2
238; ALL_X64-NEXT:  ## %bb.1:
239; ALL_X64-NEXT:    vmovaps %xmm1, %xmm0
240; ALL_X64-NEXT:  LBB8_2:
241; ALL_X64-NEXT:    retq
242;
243; KNL_X32-LABEL: test8:
244; KNL_X32:       ## %bb.0:
245; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
246; KNL_X32-NEXT:    jne LBB8_2
247; KNL_X32-NEXT:  ## %bb.1:
248; KNL_X32-NEXT:    vmovaps %xmm1, %xmm0
249; KNL_X32-NEXT:  LBB8_2:
250; KNL_X32-NEXT:    retl
251  %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
252  ret <16 x i8> %res
253}
254
255define i1 @test9(double %a, double %b) {
256; ALL_X64-LABEL: test9:
257; ALL_X64:       ## %bb.0:
258; ALL_X64-NEXT:    vucomisd %xmm0, %xmm1
259; ALL_X64-NEXT:    setb %al
260; ALL_X64-NEXT:    retq
261;
262; KNL_X32-LABEL: test9:
263; KNL_X32:       ## %bb.0:
264; KNL_X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
265; KNL_X32-NEXT:    vucomisd {{[0-9]+}}(%esp), %xmm0
266; KNL_X32-NEXT:    setb %al
267; KNL_X32-NEXT:    retl
268  %c = fcmp ugt double %a, %b
269  ret i1 %c
270}
271
272define i32 @test10(i32 %a, i32 %b, i1 %cond) {
273; ALL_X64-LABEL: test10:
274; ALL_X64:       ## %bb.0:
275; ALL_X64-NEXT:    testb $1, %dl
276; ALL_X64-NEXT:    cmovel %esi, %edi
277; ALL_X64-NEXT:    movl %edi, %eax
278; ALL_X64-NEXT:    retq
279;
280; KNL_X32-LABEL: test10:
281; KNL_X32:       ## %bb.0:
282; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
283; KNL_X32-NEXT:    leal {{[0-9]+}}(%esp), %eax
284; KNL_X32-NEXT:    leal {{[0-9]+}}(%esp), %ecx
285; KNL_X32-NEXT:    cmovnel %eax, %ecx
286; KNL_X32-NEXT:    movl (%ecx), %eax
287; KNL_X32-NEXT:    retl
288  %c = select i1 %cond, i32 %a, i32 %b
289  ret i32 %c
290}
291
292define i1 @test11(i32 %a, i32 %b) {
293; ALL_X64-LABEL: test11:
294; ALL_X64:       ## %bb.0:
295; ALL_X64-NEXT:    cmpl %esi, %edi
296; ALL_X64-NEXT:    setg %al
297; ALL_X64-NEXT:    retq
298;
299; KNL_X32-LABEL: test11:
300; KNL_X32:       ## %bb.0:
301; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
302; KNL_X32-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
303; KNL_X32-NEXT:    setg %al
304; KNL_X32-NEXT:    retl
305  %c = icmp sgt i32 %a, %b
306  ret i1 %c
307}
308
309define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
310; ALL_X64-LABEL: test12:
311; ALL_X64:       ## %bb.0:
312; ALL_X64-NEXT:    pushq %rbp
313; ALL_X64-NEXT:    .cfi_def_cfa_offset 16
314; ALL_X64-NEXT:    pushq %r14
315; ALL_X64-NEXT:    .cfi_def_cfa_offset 24
316; ALL_X64-NEXT:    pushq %rbx
317; ALL_X64-NEXT:    .cfi_def_cfa_offset 32
318; ALL_X64-NEXT:    .cfi_offset %rbx, -32
319; ALL_X64-NEXT:    .cfi_offset %r14, -24
320; ALL_X64-NEXT:    .cfi_offset %rbp, -16
321; ALL_X64-NEXT:    movl %esi, %r14d
322; ALL_X64-NEXT:    movl %edi, %ebp
323; ALL_X64-NEXT:    movl %edx, %esi
324; ALL_X64-NEXT:    callq _test11
325; ALL_X64-NEXT:    movzbl %al, %ebx
326; ALL_X64-NEXT:    movl %ebp, %edi
327; ALL_X64-NEXT:    movl %r14d, %esi
328; ALL_X64-NEXT:    movl %ebx, %edx
329; ALL_X64-NEXT:    callq _test10
330; ALL_X64-NEXT:    xorl %ecx, %ecx
331; ALL_X64-NEXT:    testb $1, %bl
332; ALL_X64-NEXT:    cmovel %ecx, %eax
333; ALL_X64-NEXT:    popq %rbx
334; ALL_X64-NEXT:    popq %r14
335; ALL_X64-NEXT:    popq %rbp
336; ALL_X64-NEXT:    retq
337;
338; KNL_X32-LABEL: test12:
339; KNL_X32:       ## %bb.0:
340; KNL_X32-NEXT:    pushl %ebx
341; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
342; KNL_X32-NEXT:    pushl %edi
343; KNL_X32-NEXT:    .cfi_def_cfa_offset 12
344; KNL_X32-NEXT:    pushl %esi
345; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
346; KNL_X32-NEXT:    subl $16, %esp
347; KNL_X32-NEXT:    .cfi_def_cfa_offset 32
348; KNL_X32-NEXT:    .cfi_offset %esi, -16
349; KNL_X32-NEXT:    .cfi_offset %edi, -12
350; KNL_X32-NEXT:    .cfi_offset %ebx, -8
351; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
352; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
353; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
354; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
355; KNL_X32-NEXT:    movl %edi, (%esp)
356; KNL_X32-NEXT:    calll _test11
357; KNL_X32-NEXT:    movl %eax, %ebx
358; KNL_X32-NEXT:    movzbl %al, %eax
359; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
360; KNL_X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
361; KNL_X32-NEXT:    movl %edi, (%esp)
362; KNL_X32-NEXT:    calll _test10
363; KNL_X32-NEXT:    xorl %ecx, %ecx
364; KNL_X32-NEXT:    testb $1, %bl
365; KNL_X32-NEXT:    cmovel %ecx, %eax
366; KNL_X32-NEXT:    addl $16, %esp
367; KNL_X32-NEXT:    popl %esi
368; KNL_X32-NEXT:    popl %edi
369; KNL_X32-NEXT:    popl %ebx
370; KNL_X32-NEXT:    retl
371  %cond = call i1 @test11(i32 %a1, i32 %b1)
372  %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
373  %res1 = select i1 %cond, i32 %res, i32 0
374  ret i32 %res1
375}
376
377define <1 x i1> @test13(<1 x i1>* %foo) {
378; KNL-LABEL: test13:
379; KNL:       ## %bb.0:
380; KNL-NEXT:    movzbl (%rdi), %eax
381; KNL-NEXT:    ## kill: def $al killed $al killed $eax
382; KNL-NEXT:    retq
383;
384; SKX-LABEL: test13:
385; SKX:       ## %bb.0:
386; SKX-NEXT:    kmovb (%rdi), %k0
387; SKX-NEXT:    kmovd %k0, %eax
388; SKX-NEXT:    ## kill: def $al killed $al killed $eax
389; SKX-NEXT:    retq
390;
391; KNL_X32-LABEL: test13:
392; KNL_X32:       ## %bb.0:
393; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
394; KNL_X32-NEXT:    movzbl (%eax), %eax
395; KNL_X32-NEXT:    ## kill: def $al killed $al killed $eax
396; KNL_X32-NEXT:    retl
397  %bar = load <1 x i1>, <1 x i1>* %foo
398  ret <1 x i1> %bar
399}
400