• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; Test patterns which generates lzcnt instructions.
3; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
4; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
5; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
6; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
7; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
8; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
9; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
10
11; Test one 32-bit input, output is 32-bit, no transformations expected.
12define i32 @test_zext_cmp0(i32 %a) {
13; ALL-LABEL: test_zext_cmp0:
14; ALL:       # %bb.0: # %entry
15; ALL-NEXT:    xorl %eax, %eax
16; ALL-NEXT:    testl %edi, %edi
17; ALL-NEXT:    sete %al
18; ALL-NEXT:    retq
19entry:
20  %cmp = icmp eq i32 %a, 0
21  %conv = zext i1 %cmp to i32
22  ret i32 %conv
23}
24
25; Test two 32-bit inputs, output is 32-bit.
26define i32 @test_zext_cmp1(i32 %a, i32 %b) {
27; FASTLZCNT-LABEL: test_zext_cmp1:
28; FASTLZCNT:       # %bb.0:
29; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
30; FASTLZCNT-NEXT:    lzcntl %esi, %eax
31; FASTLZCNT-NEXT:    orl %ecx, %eax
32; FASTLZCNT-NEXT:    shrl $5, %eax
33; FASTLZCNT-NEXT:    retq
34;
35; NOFASTLZCNT-LABEL: test_zext_cmp1:
36; NOFASTLZCNT:       # %bb.0:
37; NOFASTLZCNT-NEXT:    testl %edi, %edi
38; NOFASTLZCNT-NEXT:    sete %al
39; NOFASTLZCNT-NEXT:    testl %esi, %esi
40; NOFASTLZCNT-NEXT:    sete %cl
41; NOFASTLZCNT-NEXT:    orb %al, %cl
42; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
43; NOFASTLZCNT-NEXT:    retq
44  %cmp = icmp eq i32 %a, 0
45  %cmp1 = icmp eq i32 %b, 0
46  %or = or i1 %cmp, %cmp1
47  %lor.ext = zext i1 %or to i32
48  ret i32 %lor.ext
49}
50
51; Test two 64-bit inputs, output is 64-bit.
52define i64 @test_zext_cmp2(i64 %a, i64 %b) {
53; FASTLZCNT-LABEL: test_zext_cmp2:
54; FASTLZCNT:       # %bb.0:
55; FASTLZCNT-NEXT:    lzcntq %rdi, %rcx
56; FASTLZCNT-NEXT:    lzcntq %rsi, %rax
57; FASTLZCNT-NEXT:    orl %ecx, %eax
58; FASTLZCNT-NEXT:    shrl $6, %eax
59; FASTLZCNT-NEXT:    retq
60;
61; NOFASTLZCNT-LABEL: test_zext_cmp2:
62; NOFASTLZCNT:       # %bb.0:
63; NOFASTLZCNT-NEXT:    testq %rdi, %rdi
64; NOFASTLZCNT-NEXT:    sete %al
65; NOFASTLZCNT-NEXT:    testq %rsi, %rsi
66; NOFASTLZCNT-NEXT:    sete %cl
67; NOFASTLZCNT-NEXT:    orb %al, %cl
68; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
69; NOFASTLZCNT-NEXT:    retq
70  %cmp = icmp eq i64 %a, 0
71  %cmp1 = icmp eq i64 %b, 0
72  %or = or i1 %cmp, %cmp1
73  %lor.ext = zext i1 %or to i64
74  ret i64 %lor.ext
75}
76
77; Test two 16-bit inputs, output is 16-bit.
78; The transform is disabled for the 16-bit case, as we still have to clear the
79; upper 16-bits, adding one more instruction.
80define i16 @test_zext_cmp3(i16 %a, i16 %b) {
81; ALL-LABEL: test_zext_cmp3:
82; ALL:       # %bb.0:
83; ALL-NEXT:    testw %di, %di
84; ALL-NEXT:    sete %al
85; ALL-NEXT:    testw %si, %si
86; ALL-NEXT:    sete %cl
87; ALL-NEXT:    orb %al, %cl
88; ALL-NEXT:    movzbl %cl, %eax
89; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
90; ALL-NEXT:    retq
91  %cmp = icmp eq i16 %a, 0
92  %cmp1 = icmp eq i16 %b, 0
93  %or = or i1 %cmp, %cmp1
94  %lor.ext = zext i1 %or to i16
95  ret i16 %lor.ext
96}
97
98; Test two 32-bit inputs, output is 64-bit.
99define i64 @test_zext_cmp4(i32 %a, i32 %b) {
100; FASTLZCNT-LABEL: test_zext_cmp4:
101; FASTLZCNT:       # %bb.0: # %entry
102; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
103; FASTLZCNT-NEXT:    lzcntl %esi, %eax
104; FASTLZCNT-NEXT:    orl %ecx, %eax
105; FASTLZCNT-NEXT:    shrl $5, %eax
106; FASTLZCNT-NEXT:    retq
107;
108; NOFASTLZCNT-LABEL: test_zext_cmp4:
109; NOFASTLZCNT:       # %bb.0: # %entry
110; NOFASTLZCNT-NEXT:    testl %edi, %edi
111; NOFASTLZCNT-NEXT:    sete %al
112; NOFASTLZCNT-NEXT:    testl %esi, %esi
113; NOFASTLZCNT-NEXT:    sete %cl
114; NOFASTLZCNT-NEXT:    orb %al, %cl
115; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
116; NOFASTLZCNT-NEXT:    retq
117entry:
118  %cmp = icmp eq i32 %a, 0
119  %cmp1 = icmp eq i32 %b, 0
120  %0 = or i1 %cmp, %cmp1
121  %conv = zext i1 %0 to i64
122  ret i64 %conv
123}
124
125; Test two 64-bit inputs, output is 32-bit.
126define i32 @test_zext_cmp5(i64 %a, i64 %b) {
127; FASTLZCNT-LABEL: test_zext_cmp5:
128; FASTLZCNT:       # %bb.0: # %entry
129; FASTLZCNT-NEXT:    lzcntq %rdi, %rcx
130; FASTLZCNT-NEXT:    lzcntq %rsi, %rax
131; FASTLZCNT-NEXT:    orl %ecx, %eax
132; FASTLZCNT-NEXT:    shrl $6, %eax
133; FASTLZCNT-NEXT:    # kill: def $eax killed $eax killed $rax
134; FASTLZCNT-NEXT:    retq
135;
136; NOFASTLZCNT-LABEL: test_zext_cmp5:
137; NOFASTLZCNT:       # %bb.0: # %entry
138; NOFASTLZCNT-NEXT:    testq %rdi, %rdi
139; NOFASTLZCNT-NEXT:    sete %al
140; NOFASTLZCNT-NEXT:    testq %rsi, %rsi
141; NOFASTLZCNT-NEXT:    sete %cl
142; NOFASTLZCNT-NEXT:    orb %al, %cl
143; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
144; NOFASTLZCNT-NEXT:    retq
145entry:
146  %cmp = icmp eq i64 %a, 0
147  %cmp1 = icmp eq i64 %b, 0
148  %0 = or i1 %cmp, %cmp1
149  %lor.ext = zext i1 %0 to i32
150  ret i32 %lor.ext
151}
152
153; Test three 32-bit inputs, output is 32-bit.
154define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) {
155; FASTLZCNT-LABEL: test_zext_cmp6:
156; FASTLZCNT:       # %bb.0: # %entry
157; FASTLZCNT-NEXT:    lzcntl %edi, %eax
158; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
159; FASTLZCNT-NEXT:    orl %eax, %ecx
160; FASTLZCNT-NEXT:    lzcntl %edx, %eax
161; FASTLZCNT-NEXT:    orl %ecx, %eax
162; FASTLZCNT-NEXT:    shrl $5, %eax
163; FASTLZCNT-NEXT:    retq
164;
165; NOFASTLZCNT-LABEL: test_zext_cmp6:
166; NOFASTLZCNT:       # %bb.0: # %entry
167; NOFASTLZCNT-NEXT:    testl %edi, %edi
168; NOFASTLZCNT-NEXT:    sete %al
169; NOFASTLZCNT-NEXT:    testl %esi, %esi
170; NOFASTLZCNT-NEXT:    sete %cl
171; NOFASTLZCNT-NEXT:    orb %al, %cl
172; NOFASTLZCNT-NEXT:    testl %edx, %edx
173; NOFASTLZCNT-NEXT:    sete %al
174; NOFASTLZCNT-NEXT:    orb %cl, %al
175; NOFASTLZCNT-NEXT:    movzbl %al, %eax
176; NOFASTLZCNT-NEXT:    retq
177entry:
178  %cmp = icmp eq i32 %a, 0
179  %cmp1 = icmp eq i32 %b, 0
180  %or.cond = or i1 %cmp, %cmp1
181  %cmp2 = icmp eq i32 %c, 0
182  %.cmp2 = or i1 %or.cond, %cmp2
183  %lor.ext = zext i1 %.cmp2 to i32
184  ret i32 %lor.ext
185}
186
187; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test,
188; %.cmp2 inputs' order is inverted.
189define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) {
190; FASTLZCNT-LABEL: test_zext_cmp7:
191; FASTLZCNT:       # %bb.0: # %entry
192; FASTLZCNT-NEXT:    lzcntl %edi, %eax
193; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
194; FASTLZCNT-NEXT:    orl %eax, %ecx
195; FASTLZCNT-NEXT:    lzcntl %edx, %eax
196; FASTLZCNT-NEXT:    orl %ecx, %eax
197; FASTLZCNT-NEXT:    shrl $5, %eax
198; FASTLZCNT-NEXT:    retq
199;
200; NOFASTLZCNT-LABEL: test_zext_cmp7:
201; NOFASTLZCNT:       # %bb.0: # %entry
202; NOFASTLZCNT-NEXT:    testl %edi, %edi
203; NOFASTLZCNT-NEXT:    sete %al
204; NOFASTLZCNT-NEXT:    testl %esi, %esi
205; NOFASTLZCNT-NEXT:    sete %cl
206; NOFASTLZCNT-NEXT:    orb %al, %cl
207; NOFASTLZCNT-NEXT:    testl %edx, %edx
208; NOFASTLZCNT-NEXT:    sete %al
209; NOFASTLZCNT-NEXT:    orb %cl, %al
210; NOFASTLZCNT-NEXT:    movzbl %al, %eax
211; NOFASTLZCNT-NEXT:    retq
212entry:
213  %cmp = icmp eq i32 %a, 0
214  %cmp1 = icmp eq i32 %b, 0
215  %or.cond = or i1 %cmp, %cmp1
216  %cmp2 = icmp eq i32 %c, 0
217  %.cmp2 = or i1 %cmp2, %or.cond
218  %lor.ext = zext i1 %.cmp2 to i32
219  ret i32 %lor.ext
220}
221
222; Test four 32-bit inputs, output is 32-bit.
223define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) {
224; FASTLZCNT-LABEL: test_zext_cmp8:
225; FASTLZCNT:       # %bb.0: # %entry
226; FASTLZCNT-NEXT:    lzcntl %edi, %eax
227; FASTLZCNT-NEXT:    lzcntl %esi, %esi
228; FASTLZCNT-NEXT:    lzcntl %edx, %edx
229; FASTLZCNT-NEXT:    orl %eax, %esi
230; FASTLZCNT-NEXT:    lzcntl %ecx, %eax
231; FASTLZCNT-NEXT:    orl %edx, %eax
232; FASTLZCNT-NEXT:    orl %esi, %eax
233; FASTLZCNT-NEXT:    shrl $5, %eax
234; FASTLZCNT-NEXT:    retq
235;
236; NOFASTLZCNT-LABEL: test_zext_cmp8:
237; NOFASTLZCNT:       # %bb.0: # %entry
238; NOFASTLZCNT-NEXT:    testl %edi, %edi
239; NOFASTLZCNT-NEXT:    sete %dil
240; NOFASTLZCNT-NEXT:    testl %esi, %esi
241; NOFASTLZCNT-NEXT:    sete %al
242; NOFASTLZCNT-NEXT:    orb %dil, %al
243; NOFASTLZCNT-NEXT:    testl %edx, %edx
244; NOFASTLZCNT-NEXT:    sete %dl
245; NOFASTLZCNT-NEXT:    testl %ecx, %ecx
246; NOFASTLZCNT-NEXT:    sete %cl
247; NOFASTLZCNT-NEXT:    orb %dl, %cl
248; NOFASTLZCNT-NEXT:    orb %al, %cl
249; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
250; NOFASTLZCNT-NEXT:    retq
251entry:
252  %cmp = icmp eq i32 %a, 0
253  %cmp1 = icmp eq i32 %b, 0
254  %or.cond = or i1 %cmp, %cmp1
255  %cmp3 = icmp eq i32 %c, 0
256  %or.cond5 = or i1 %or.cond, %cmp3
257  %cmp4 = icmp eq i32 %d, 0
258  %.cmp4 = or i1 %or.cond5, %cmp4
259  %lor.ext = zext i1 %.cmp4 to i32
260  ret i32 %lor.ext
261}
262
263; Test one 32-bit input, one 64-bit input, output is 32-bit.
264define i32 @test_zext_cmp9(i32 %a, i64 %b) {
265; FASTLZCNT-LABEL: test_zext_cmp9:
266; FASTLZCNT:       # %bb.0: # %entry
267; FASTLZCNT-NEXT:    lzcntq %rsi, %rax
268; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
269; FASTLZCNT-NEXT:    shrl $5, %ecx
270; FASTLZCNT-NEXT:    shrl $6, %eax
271; FASTLZCNT-NEXT:    orl %ecx, %eax
272; FASTLZCNT-NEXT:    # kill: def $eax killed $eax killed $rax
273; FASTLZCNT-NEXT:    retq
274;
275; NOFASTLZCNT-LABEL: test_zext_cmp9:
276; NOFASTLZCNT:       # %bb.0: # %entry
277; NOFASTLZCNT-NEXT:    testl %edi, %edi
278; NOFASTLZCNT-NEXT:    sete %al
279; NOFASTLZCNT-NEXT:    testq %rsi, %rsi
280; NOFASTLZCNT-NEXT:    sete %cl
281; NOFASTLZCNT-NEXT:    orb %al, %cl
282; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
283; NOFASTLZCNT-NEXT:    retq
284entry:
285  %cmp = icmp eq i32 %a, 0
286  %cmp1 = icmp eq i64 %b, 0
287  %0 = or i1 %cmp, %cmp1
288  %lor.ext = zext i1 %0 to i32
289  ret i32 %lor.ext
290}
291
292; Test 2 128-bit inputs, output is 32-bit, no transformations expected.
293define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) {
294; ALL-LABEL: test_zext_cmp10:
295; ALL:       # %bb.0: # %entry
296; ALL-NEXT:    orq %rsi, %rdi
297; ALL-NEXT:    sete %al
298; ALL-NEXT:    orq %rcx, %rdx
299; ALL-NEXT:    sete %cl
300; ALL-NEXT:    orb %al, %cl
301; ALL-NEXT:    movzbl %cl, %eax
302; ALL-NEXT:    retq
303entry:
304  %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128
305  %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64
306  %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128
307  %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext
308  %b.sroa.2.0.insert.ext = zext i64 %b.coerce1 to i128
309  %b.sroa.2.0.insert.shift = shl nuw i128 %b.sroa.2.0.insert.ext, 64
310  %b.sroa.0.0.insert.ext = zext i64 %b.coerce0 to i128
311  %b.sroa.0.0.insert.insert = or i128 %b.sroa.2.0.insert.shift, %b.sroa.0.0.insert.ext
312  %cmp = icmp eq i128 %a.sroa.0.0.insert.insert, 0
313  %cmp3 = icmp eq i128 %b.sroa.0.0.insert.insert, 0
314  %0 = or i1 %cmp, %cmp3
315  %lor.ext = zext i1 %0 to i32
316  ret i32 %lor.ext
317}
318
319; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math.
320define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" {
321;
322; ALL-LABEL: test_zext_cmp11:
323; ALL:       # %bb.0: # %entry
324; ALL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
325; ALL-NEXT:    vucomisd %xmm2, %xmm0
326; ALL-NEXT:    sete %al
327; ALL-NEXT:    vucomisd %xmm2, %xmm1
328; ALL-NEXT:    sete %cl
329; ALL-NEXT:    orb %al, %cl
330; ALL-NEXT:    movzbl %cl, %eax
331; ALL-NEXT:    retq
332entry:
333  %cmp = fcmp fast oeq double %a, 0.000000e+00
334  %cmp1 = fcmp fast oeq double %b, 0.000000e+00
335  %0 = or i1 %cmp, %cmp1
336  %conv = zext i1 %0 to i32
337  ret i32 %conv
338}
339