• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; Test patterns which generates lzcnt instructions.
3; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
4; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
5; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
6; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
7; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
8
9; Test one 32-bit input, output is 32-bit, no transformations expected.
10define i32 @test_zext_cmp0(i32 %a) {
11; ALL-LABEL: test_zext_cmp0:
12; ALL:       # %bb.0: # %entry
13; ALL-NEXT:    xorl %eax, %eax
14; ALL-NEXT:    testl %edi, %edi
15; ALL-NEXT:    sete %al
16; ALL-NEXT:    retq
17entry:
18  %cmp = icmp eq i32 %a, 0
19  %conv = zext i1 %cmp to i32
20  ret i32 %conv
21}
22
23; Test two 32-bit inputs, output is 32-bit.
24define i32 @test_zext_cmp1(i32 %a, i32 %b) {
25; FASTLZCNT-LABEL: test_zext_cmp1:
26; FASTLZCNT:       # %bb.0:
27; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
28; FASTLZCNT-NEXT:    lzcntl %esi, %eax
29; FASTLZCNT-NEXT:    orl %ecx, %eax
30; FASTLZCNT-NEXT:    shrl $5, %eax
31; FASTLZCNT-NEXT:    retq
32;
33; NOFASTLZCNT-LABEL: test_zext_cmp1:
34; NOFASTLZCNT:       # %bb.0:
35; NOFASTLZCNT-NEXT:    testl %edi, %edi
36; NOFASTLZCNT-NEXT:    sete %al
37; NOFASTLZCNT-NEXT:    testl %esi, %esi
38; NOFASTLZCNT-NEXT:    sete %cl
39; NOFASTLZCNT-NEXT:    orb %al, %cl
40; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
41; NOFASTLZCNT-NEXT:    retq
42  %cmp = icmp eq i32 %a, 0
43  %cmp1 = icmp eq i32 %b, 0
44  %or = or i1 %cmp, %cmp1
45  %lor.ext = zext i1 %or to i32
46  ret i32 %lor.ext
47}
48
49; Test two 64-bit inputs, output is 64-bit.
50define i64 @test_zext_cmp2(i64 %a, i64 %b) {
51; FASTLZCNT-LABEL: test_zext_cmp2:
52; FASTLZCNT:       # %bb.0:
53; FASTLZCNT-NEXT:    lzcntq %rdi, %rcx
54; FASTLZCNT-NEXT:    lzcntq %rsi, %rax
55; FASTLZCNT-NEXT:    orl %ecx, %eax
56; FASTLZCNT-NEXT:    shrl $6, %eax
57; FASTLZCNT-NEXT:    retq
58;
59; NOFASTLZCNT-LABEL: test_zext_cmp2:
60; NOFASTLZCNT:       # %bb.0:
61; NOFASTLZCNT-NEXT:    testq %rdi, %rdi
62; NOFASTLZCNT-NEXT:    sete %al
63; NOFASTLZCNT-NEXT:    testq %rsi, %rsi
64; NOFASTLZCNT-NEXT:    sete %cl
65; NOFASTLZCNT-NEXT:    orb %al, %cl
66; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
67; NOFASTLZCNT-NEXT:    retq
68  %cmp = icmp eq i64 %a, 0
69  %cmp1 = icmp eq i64 %b, 0
70  %or = or i1 %cmp, %cmp1
71  %lor.ext = zext i1 %or to i64
72  ret i64 %lor.ext
73}
74
75; Test two 16-bit inputs, output is 16-bit.
76; The transform is disabled for the 16-bit case, as we still have to clear the
77; upper 16-bits, adding one more instruction.
78define i16 @test_zext_cmp3(i16 %a, i16 %b) {
79; ALL-LABEL: test_zext_cmp3:
80; ALL:       # %bb.0:
81; ALL-NEXT:    testw %di, %di
82; ALL-NEXT:    sete %al
83; ALL-NEXT:    testw %si, %si
84; ALL-NEXT:    sete %cl
85; ALL-NEXT:    orb %al, %cl
86; ALL-NEXT:    movzbl %cl, %eax
87; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
88; ALL-NEXT:    retq
89  %cmp = icmp eq i16 %a, 0
90  %cmp1 = icmp eq i16 %b, 0
91  %or = or i1 %cmp, %cmp1
92  %lor.ext = zext i1 %or to i16
93  ret i16 %lor.ext
94}
95
96; Test two 32-bit inputs, output is 64-bit.
97define i64 @test_zext_cmp4(i32 %a, i32 %b) {
98; FASTLZCNT-LABEL: test_zext_cmp4:
99; FASTLZCNT:       # %bb.0: # %entry
100; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
101; FASTLZCNT-NEXT:    lzcntl %esi, %eax
102; FASTLZCNT-NEXT:    orl %ecx, %eax
103; FASTLZCNT-NEXT:    shrl $5, %eax
104; FASTLZCNT-NEXT:    retq
105;
106; NOFASTLZCNT-LABEL: test_zext_cmp4:
107; NOFASTLZCNT:       # %bb.0: # %entry
108; NOFASTLZCNT-NEXT:    testl %edi, %edi
109; NOFASTLZCNT-NEXT:    sete %al
110; NOFASTLZCNT-NEXT:    testl %esi, %esi
111; NOFASTLZCNT-NEXT:    sete %cl
112; NOFASTLZCNT-NEXT:    orb %al, %cl
113; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
114; NOFASTLZCNT-NEXT:    retq
115entry:
116  %cmp = icmp eq i32 %a, 0
117  %cmp1 = icmp eq i32 %b, 0
118  %0 = or i1 %cmp, %cmp1
119  %conv = zext i1 %0 to i64
120  ret i64 %conv
121}
122
123; Test two 64-bit inputs, output is 32-bit.
124define i32 @test_zext_cmp5(i64 %a, i64 %b) {
125; FASTLZCNT-LABEL: test_zext_cmp5:
126; FASTLZCNT:       # %bb.0: # %entry
127; FASTLZCNT-NEXT:    lzcntq %rdi, %rcx
128; FASTLZCNT-NEXT:    lzcntq %rsi, %rax
129; FASTLZCNT-NEXT:    orl %ecx, %eax
130; FASTLZCNT-NEXT:    shrl $6, %eax
131; FASTLZCNT-NEXT:    # kill: def $eax killed $eax killed $rax
132; FASTLZCNT-NEXT:    retq
133;
134; NOFASTLZCNT-LABEL: test_zext_cmp5:
135; NOFASTLZCNT:       # %bb.0: # %entry
136; NOFASTLZCNT-NEXT:    testq %rdi, %rdi
137; NOFASTLZCNT-NEXT:    sete %al
138; NOFASTLZCNT-NEXT:    testq %rsi, %rsi
139; NOFASTLZCNT-NEXT:    sete %cl
140; NOFASTLZCNT-NEXT:    orb %al, %cl
141; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
142; NOFASTLZCNT-NEXT:    retq
143entry:
144  %cmp = icmp eq i64 %a, 0
145  %cmp1 = icmp eq i64 %b, 0
146  %0 = or i1 %cmp, %cmp1
147  %lor.ext = zext i1 %0 to i32
148  ret i32 %lor.ext
149}
150
151; Test three 32-bit inputs, output is 32-bit.
152define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) {
153; FASTLZCNT-LABEL: test_zext_cmp6:
154; FASTLZCNT:       # %bb.0: # %entry
155; FASTLZCNT-NEXT:    lzcntl %edi, %eax
156; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
157; FASTLZCNT-NEXT:    orl %eax, %ecx
158; FASTLZCNT-NEXT:    lzcntl %edx, %eax
159; FASTLZCNT-NEXT:    orl %ecx, %eax
160; FASTLZCNT-NEXT:    shrl $5, %eax
161; FASTLZCNT-NEXT:    retq
162;
163; NOFASTLZCNT-LABEL: test_zext_cmp6:
164; NOFASTLZCNT:       # %bb.0: # %entry
165; NOFASTLZCNT-NEXT:    testl %edi, %edi
166; NOFASTLZCNT-NEXT:    sete %al
167; NOFASTLZCNT-NEXT:    testl %esi, %esi
168; NOFASTLZCNT-NEXT:    sete %cl
169; NOFASTLZCNT-NEXT:    orb %al, %cl
170; NOFASTLZCNT-NEXT:    testl %edx, %edx
171; NOFASTLZCNT-NEXT:    sete %al
172; NOFASTLZCNT-NEXT:    orb %cl, %al
173; NOFASTLZCNT-NEXT:    movzbl %al, %eax
174; NOFASTLZCNT-NEXT:    retq
175entry:
176  %cmp = icmp eq i32 %a, 0
177  %cmp1 = icmp eq i32 %b, 0
178  %or.cond = or i1 %cmp, %cmp1
179  %cmp2 = icmp eq i32 %c, 0
180  %.cmp2 = or i1 %or.cond, %cmp2
181  %lor.ext = zext i1 %.cmp2 to i32
182  ret i32 %lor.ext
183}
184
185; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test,
186; %.cmp2 inputs' order is inverted.
187define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) {
188; FASTLZCNT-LABEL: test_zext_cmp7:
189; FASTLZCNT:       # %bb.0: # %entry
190; FASTLZCNT-NEXT:    lzcntl %edi, %eax
191; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
192; FASTLZCNT-NEXT:    orl %eax, %ecx
193; FASTLZCNT-NEXT:    lzcntl %edx, %eax
194; FASTLZCNT-NEXT:    orl %ecx, %eax
195; FASTLZCNT-NEXT:    shrl $5, %eax
196; FASTLZCNT-NEXT:    retq
197;
198; NOFASTLZCNT-LABEL: test_zext_cmp7:
199; NOFASTLZCNT:       # %bb.0: # %entry
200; NOFASTLZCNT-NEXT:    testl %edi, %edi
201; NOFASTLZCNT-NEXT:    sete %al
202; NOFASTLZCNT-NEXT:    testl %esi, %esi
203; NOFASTLZCNT-NEXT:    sete %cl
204; NOFASTLZCNT-NEXT:    orb %al, %cl
205; NOFASTLZCNT-NEXT:    testl %edx, %edx
206; NOFASTLZCNT-NEXT:    sete %al
207; NOFASTLZCNT-NEXT:    orb %cl, %al
208; NOFASTLZCNT-NEXT:    movzbl %al, %eax
209; NOFASTLZCNT-NEXT:    retq
210entry:
211  %cmp = icmp eq i32 %a, 0
212  %cmp1 = icmp eq i32 %b, 0
213  %or.cond = or i1 %cmp, %cmp1
214  %cmp2 = icmp eq i32 %c, 0
215  %.cmp2 = or i1 %cmp2, %or.cond
216  %lor.ext = zext i1 %.cmp2 to i32
217  ret i32 %lor.ext
218}
219
220; Test four 32-bit inputs, output is 32-bit.
221define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) {
222; FASTLZCNT-LABEL: test_zext_cmp8:
223; FASTLZCNT:       # %bb.0: # %entry
224; FASTLZCNT-NEXT:    lzcntl %edi, %eax
225; FASTLZCNT-NEXT:    lzcntl %esi, %esi
226; FASTLZCNT-NEXT:    lzcntl %edx, %edx
227; FASTLZCNT-NEXT:    orl %eax, %esi
228; FASTLZCNT-NEXT:    lzcntl %ecx, %eax
229; FASTLZCNT-NEXT:    orl %edx, %eax
230; FASTLZCNT-NEXT:    orl %esi, %eax
231; FASTLZCNT-NEXT:    shrl $5, %eax
232; FASTLZCNT-NEXT:    retq
233;
234; NOFASTLZCNT-LABEL: test_zext_cmp8:
235; NOFASTLZCNT:       # %bb.0: # %entry
236; NOFASTLZCNT-NEXT:    testl %edi, %edi
237; NOFASTLZCNT-NEXT:    sete %dil
238; NOFASTLZCNT-NEXT:    testl %esi, %esi
239; NOFASTLZCNT-NEXT:    sete %al
240; NOFASTLZCNT-NEXT:    orb %dil, %al
241; NOFASTLZCNT-NEXT:    testl %edx, %edx
242; NOFASTLZCNT-NEXT:    sete %dl
243; NOFASTLZCNT-NEXT:    testl %ecx, %ecx
244; NOFASTLZCNT-NEXT:    sete %cl
245; NOFASTLZCNT-NEXT:    orb %dl, %cl
246; NOFASTLZCNT-NEXT:    orb %al, %cl
247; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
248; NOFASTLZCNT-NEXT:    retq
249entry:
250  %cmp = icmp eq i32 %a, 0
251  %cmp1 = icmp eq i32 %b, 0
252  %or.cond = or i1 %cmp, %cmp1
253  %cmp3 = icmp eq i32 %c, 0
254  %or.cond5 = or i1 %or.cond, %cmp3
255  %cmp4 = icmp eq i32 %d, 0
256  %.cmp4 = or i1 %or.cond5, %cmp4
257  %lor.ext = zext i1 %.cmp4 to i32
258  ret i32 %lor.ext
259}
260
261; Test one 32-bit input, one 64-bit input, output is 32-bit.
262define i32 @test_zext_cmp9(i32 %a, i64 %b) {
263; FASTLZCNT-LABEL: test_zext_cmp9:
264; FASTLZCNT:       # %bb.0: # %entry
265; FASTLZCNT-NEXT:    lzcntq %rsi, %rax
266; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
267; FASTLZCNT-NEXT:    shrl $5, %ecx
268; FASTLZCNT-NEXT:    shrl $6, %eax
269; FASTLZCNT-NEXT:    orl %ecx, %eax
270; FASTLZCNT-NEXT:    # kill: def $eax killed $eax killed $rax
271; FASTLZCNT-NEXT:    retq
272;
273; NOFASTLZCNT-LABEL: test_zext_cmp9:
274; NOFASTLZCNT:       # %bb.0: # %entry
275; NOFASTLZCNT-NEXT:    testl %edi, %edi
276; NOFASTLZCNT-NEXT:    sete %al
277; NOFASTLZCNT-NEXT:    testq %rsi, %rsi
278; NOFASTLZCNT-NEXT:    sete %cl
279; NOFASTLZCNT-NEXT:    orb %al, %cl
280; NOFASTLZCNT-NEXT:    movzbl %cl, %eax
281; NOFASTLZCNT-NEXT:    retq
282entry:
283  %cmp = icmp eq i32 %a, 0
284  %cmp1 = icmp eq i64 %b, 0
285  %0 = or i1 %cmp, %cmp1
286  %lor.ext = zext i1 %0 to i32
287  ret i32 %lor.ext
288}
289
290; Test 2 128-bit inputs, output is 32-bit, no transformations expected.
291define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) {
292; ALL-LABEL: test_zext_cmp10:
293; ALL:       # %bb.0: # %entry
294; ALL-NEXT:    orq %rsi, %rdi
295; ALL-NEXT:    sete %al
296; ALL-NEXT:    orq %rcx, %rdx
297; ALL-NEXT:    sete %cl
298; ALL-NEXT:    orb %al, %cl
299; ALL-NEXT:    movzbl %cl, %eax
300; ALL-NEXT:    retq
301entry:
302  %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128
303  %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64
304  %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128
305  %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext
306  %b.sroa.2.0.insert.ext = zext i64 %b.coerce1 to i128
307  %b.sroa.2.0.insert.shift = shl nuw i128 %b.sroa.2.0.insert.ext, 64
308  %b.sroa.0.0.insert.ext = zext i64 %b.coerce0 to i128
309  %b.sroa.0.0.insert.insert = or i128 %b.sroa.2.0.insert.shift, %b.sroa.0.0.insert.ext
310  %cmp = icmp eq i128 %a.sroa.0.0.insert.insert, 0
311  %cmp3 = icmp eq i128 %b.sroa.0.0.insert.insert, 0
312  %0 = or i1 %cmp, %cmp3
313  %lor.ext = zext i1 %0 to i32
314  ret i32 %lor.ext
315}
316
317; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math.
318define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" {
319;
320; ALL-LABEL: test_zext_cmp11:
321; ALL:       # %bb.0: # %entry
322; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
323; ALL-NEXT:    vucomisd %xmm2, %xmm0
324; ALL-NEXT:    sete %al
325; ALL-NEXT:    vucomisd %xmm2, %xmm1
326; ALL-NEXT:    sete %cl
327; ALL-NEXT:    orb %al, %cl
328; ALL-NEXT:    movzbl %cl, %eax
329; ALL-NEXT:    retq
330entry:
331  %cmp = fcmp fast oeq double %a, 0.000000e+00
332  %cmp1 = fcmp fast oeq double %b, 0.000000e+00
333  %0 = or i1 %cmp, %cmp1
334  %conv = zext i1 %0 to i32
335  ret i32 %conv
336}
337