• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X32-CLZ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X64-CLZ
6
7declare i8 @llvm.cttz.i8(i8, i1)
8declare i16 @llvm.cttz.i16(i16, i1)
9declare i32 @llvm.cttz.i32(i32, i1)
10declare i64 @llvm.cttz.i64(i64, i1)
11
12declare i8 @llvm.ctlz.i8(i8, i1)
13declare i16 @llvm.ctlz.i16(i16, i1)
14declare i32 @llvm.ctlz.i32(i32, i1)
15declare i64 @llvm.ctlz.i64(i64, i1)
16
17define i8 @cttz_i8(i8 %x)  {
18; X32-LABEL: cttz_i8:
19; X32:       # %bb.0:
20; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
21; X32-NEXT:    bsfl %eax, %eax
22; X32-NEXT:    # kill: def $al killed $al killed $eax
23; X32-NEXT:    retl
24;
25; X64-LABEL: cttz_i8:
26; X64:       # %bb.0:
27; X64-NEXT:    movzbl %dil, %eax
28; X64-NEXT:    bsfl %eax, %eax
29; X64-NEXT:    # kill: def $al killed $al killed $eax
30; X64-NEXT:    retq
31;
32; X32-CLZ-LABEL: cttz_i8:
33; X32-CLZ:       # %bb.0:
34; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
35; X32-CLZ-NEXT:    tzcntl %eax, %eax
36; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
37; X32-CLZ-NEXT:    retl
38;
39; X64-CLZ-LABEL: cttz_i8:
40; X64-CLZ:       # %bb.0:
41; X64-CLZ-NEXT:    movzbl %dil, %eax
42; X64-CLZ-NEXT:    tzcntl %eax, %eax
43; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
44; X64-CLZ-NEXT:    retq
45  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
46  ret i8 %tmp
47}
48
49define i16 @cttz_i16(i16 %x)  {
50; X32-LABEL: cttz_i16:
51; X32:       # %bb.0:
52; X32-NEXT:    bsfw {{[0-9]+}}(%esp), %ax
53; X32-NEXT:    retl
54;
55; X64-LABEL: cttz_i16:
56; X64:       # %bb.0:
57; X64-NEXT:    bsfw %di, %ax
58; X64-NEXT:    retq
59;
60; X32-CLZ-LABEL: cttz_i16:
61; X32-CLZ:       # %bb.0:
62; X32-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
63; X32-CLZ-NEXT:    retl
64;
65; X64-CLZ-LABEL: cttz_i16:
66; X64-CLZ:       # %bb.0:
67; X64-CLZ-NEXT:    tzcntw %di, %ax
68; X64-CLZ-NEXT:    retq
69  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
70  ret i16 %tmp
71}
72
73define i32 @cttz_i32(i32 %x)  {
74; X32-LABEL: cttz_i32:
75; X32:       # %bb.0:
76; X32-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
77; X32-NEXT:    retl
78;
79; X64-LABEL: cttz_i32:
80; X64:       # %bb.0:
81; X64-NEXT:    bsfl %edi, %eax
82; X64-NEXT:    retq
83;
84; X32-CLZ-LABEL: cttz_i32:
85; X32-CLZ:       # %bb.0:
86; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
87; X32-CLZ-NEXT:    retl
88;
89; X64-CLZ-LABEL: cttz_i32:
90; X64-CLZ:       # %bb.0:
91; X64-CLZ-NEXT:    tzcntl %edi, %eax
92; X64-CLZ-NEXT:    retq
93  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
94  ret i32 %tmp
95}
96
97define i64 @cttz_i64(i64 %x)  {
98; X32-LABEL: cttz_i64:
99; X32:       # %bb.0:
100; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
101; X32-NEXT:    testl %eax, %eax
102; X32-NEXT:    jne .LBB3_1
103; X32-NEXT:  # %bb.2:
104; X32-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
105; X32-NEXT:    addl $32, %eax
106; X32-NEXT:    xorl %edx, %edx
107; X32-NEXT:    retl
108; X32-NEXT:  .LBB3_1:
109; X32-NEXT:    bsfl %eax, %eax
110; X32-NEXT:    xorl %edx, %edx
111; X32-NEXT:    retl
112;
113; X64-LABEL: cttz_i64:
114; X64:       # %bb.0:
115; X64-NEXT:    bsfq %rdi, %rax
116; X64-NEXT:    retq
117;
118; X32-CLZ-LABEL: cttz_i64:
119; X32-CLZ:       # %bb.0:
120; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
121; X32-CLZ-NEXT:    testl %eax, %eax
122; X32-CLZ-NEXT:    jne .LBB3_1
123; X32-CLZ-NEXT:  # %bb.2:
124; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
125; X32-CLZ-NEXT:    addl $32, %eax
126; X32-CLZ-NEXT:    xorl %edx, %edx
127; X32-CLZ-NEXT:    retl
128; X32-CLZ-NEXT:  .LBB3_1:
129; X32-CLZ-NEXT:    tzcntl %eax, %eax
130; X32-CLZ-NEXT:    xorl %edx, %edx
131; X32-CLZ-NEXT:    retl
132;
133; X64-CLZ-LABEL: cttz_i64:
134; X64-CLZ:       # %bb.0:
135; X64-CLZ-NEXT:    tzcntq %rdi, %rax
136; X64-CLZ-NEXT:    retq
137  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
138  ret i64 %tmp
139}
140
141define i8 @ctlz_i8(i8 %x) {
142; X32-LABEL: ctlz_i8:
143; X32:       # %bb.0:
144; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
145; X32-NEXT:    bsrl %eax, %eax
146; X32-NEXT:    xorl $7, %eax
147; X32-NEXT:    # kill: def $al killed $al killed $eax
148; X32-NEXT:    retl
149;
150; X64-LABEL: ctlz_i8:
151; X64:       # %bb.0:
152; X64-NEXT:    movzbl %dil, %eax
153; X64-NEXT:    bsrl %eax, %eax
154; X64-NEXT:    xorl $7, %eax
155; X64-NEXT:    # kill: def $al killed $al killed $eax
156; X64-NEXT:    retq
157;
158; X32-CLZ-LABEL: ctlz_i8:
159; X32-CLZ:       # %bb.0:
160; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
161; X32-CLZ-NEXT:    lzcntl %eax, %eax
162; X32-CLZ-NEXT:    addl $-24, %eax
163; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
164; X32-CLZ-NEXT:    retl
165;
166; X64-CLZ-LABEL: ctlz_i8:
167; X64-CLZ:       # %bb.0:
168; X64-CLZ-NEXT:    movzbl %dil, %eax
169; X64-CLZ-NEXT:    lzcntl %eax, %eax
170; X64-CLZ-NEXT:    addl $-24, %eax
171; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
172; X64-CLZ-NEXT:    retq
173  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
174  ret i8 %tmp2
175}
176
177define i16 @ctlz_i16(i16 %x) {
178; X32-LABEL: ctlz_i16:
179; X32:       # %bb.0:
180; X32-NEXT:    bsrw {{[0-9]+}}(%esp), %ax
181; X32-NEXT:    xorl $15, %eax
182; X32-NEXT:    # kill: def $ax killed $ax killed $eax
183; X32-NEXT:    retl
184;
185; X64-LABEL: ctlz_i16:
186; X64:       # %bb.0:
187; X64-NEXT:    bsrw %di, %ax
188; X64-NEXT:    xorl $15, %eax
189; X64-NEXT:    # kill: def $ax killed $ax killed $eax
190; X64-NEXT:    retq
191;
192; X32-CLZ-LABEL: ctlz_i16:
193; X32-CLZ:       # %bb.0:
194; X32-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
195; X32-CLZ-NEXT:    retl
196;
197; X64-CLZ-LABEL: ctlz_i16:
198; X64-CLZ:       # %bb.0:
199; X64-CLZ-NEXT:    lzcntw %di, %ax
200; X64-CLZ-NEXT:    retq
201  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
202  ret i16 %tmp2
203}
204
205define i32 @ctlz_i32(i32 %x) {
206; X32-LABEL: ctlz_i32:
207; X32:       # %bb.0:
208; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
209; X32-NEXT:    xorl $31, %eax
210; X32-NEXT:    retl
211;
212; X64-LABEL: ctlz_i32:
213; X64:       # %bb.0:
214; X64-NEXT:    bsrl %edi, %eax
215; X64-NEXT:    xorl $31, %eax
216; X64-NEXT:    retq
217;
218; X32-CLZ-LABEL: ctlz_i32:
219; X32-CLZ:       # %bb.0:
220; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
221; X32-CLZ-NEXT:    retl
222;
223; X64-CLZ-LABEL: ctlz_i32:
224; X64-CLZ:       # %bb.0:
225; X64-CLZ-NEXT:    lzcntl %edi, %eax
226; X64-CLZ-NEXT:    retq
227  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
228  ret i32 %tmp
229}
230
231define i64 @ctlz_i64(i64 %x) {
232; X32-LABEL: ctlz_i64:
233; X32:       # %bb.0:
234; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
235; X32-NEXT:    testl %eax, %eax
236; X32-NEXT:    jne .LBB7_1
237; X32-NEXT:  # %bb.2:
238; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
239; X32-NEXT:    xorl $31, %eax
240; X32-NEXT:    addl $32, %eax
241; X32-NEXT:    xorl %edx, %edx
242; X32-NEXT:    retl
243; X32-NEXT:  .LBB7_1:
244; X32-NEXT:    bsrl %eax, %eax
245; X32-NEXT:    xorl $31, %eax
246; X32-NEXT:    xorl %edx, %edx
247; X32-NEXT:    retl
248;
249; X64-LABEL: ctlz_i64:
250; X64:       # %bb.0:
251; X64-NEXT:    bsrq %rdi, %rax
252; X64-NEXT:    xorq $63, %rax
253; X64-NEXT:    retq
254;
255; X32-CLZ-LABEL: ctlz_i64:
256; X32-CLZ:       # %bb.0:
257; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
258; X32-CLZ-NEXT:    testl %eax, %eax
259; X32-CLZ-NEXT:    jne .LBB7_1
260; X32-CLZ-NEXT:  # %bb.2:
261; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
262; X32-CLZ-NEXT:    addl $32, %eax
263; X32-CLZ-NEXT:    xorl %edx, %edx
264; X32-CLZ-NEXT:    retl
265; X32-CLZ-NEXT:  .LBB7_1:
266; X32-CLZ-NEXT:    lzcntl %eax, %eax
267; X32-CLZ-NEXT:    xorl %edx, %edx
268; X32-CLZ-NEXT:    retl
269;
270; X64-CLZ-LABEL: ctlz_i64:
271; X64-CLZ:       # %bb.0:
272; X64-CLZ-NEXT:    lzcntq %rdi, %rax
273; X64-CLZ-NEXT:    retq
274  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
275  ret i64 %tmp
276}
277
278; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
279define i8 @ctlz_i8_zero_test(i8 %n) {
280; X32-LABEL: ctlz_i8_zero_test:
281; X32:       # %bb.0:
282; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
283; X32-NEXT:    testb %al, %al
284; X32-NEXT:    je .LBB8_1
285; X32-NEXT:  # %bb.2: # %cond.false
286; X32-NEXT:    movzbl %al, %eax
287; X32-NEXT:    bsrl %eax, %eax
288; X32-NEXT:    xorl $7, %eax
289; X32-NEXT:    # kill: def $al killed $al killed $eax
290; X32-NEXT:    retl
291; X32-NEXT:  .LBB8_1:
292; X32-NEXT:    movb    $8, %al
293; X32-NEXT:    # kill: def $al killed $al killed $eax
294; X32-NEXT:    retl
295;
296; X64-LABEL: ctlz_i8_zero_test:
297; X64:       # %bb.0:
298; X64-NEXT:    testb %dil, %dil
299; X64-NEXT:    je .LBB8_1
300; X64-NEXT:  # %bb.2: # %cond.false
301; X64-NEXT:    movzbl %dil, %eax
302; X64-NEXT:    bsrl %eax, %eax
303; X64-NEXT:    xorl $7, %eax
304; X64-NEXT:    # kill: def $al killed $al killed $eax
305; X64-NEXT:    retq
306; X64-NEXT:  .LBB8_1:
307; X64-NEXT:    movb    $8, %al
308; X64-NEXT:    # kill: def $al killed $al killed $eax
309; X64-NEXT:    retq
310;
311; X32-CLZ-LABEL: ctlz_i8_zero_test:
312; X32-CLZ:       # %bb.0:
313; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
314; X32-CLZ-NEXT:    lzcntl %eax, %eax
315; X32-CLZ-NEXT:    addl $-24, %eax
316; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
317; X32-CLZ-NEXT:    retl
318;
319; X64-CLZ-LABEL: ctlz_i8_zero_test:
320; X64-CLZ:       # %bb.0:
321; X64-CLZ-NEXT:    movzbl %dil, %eax
322; X64-CLZ-NEXT:    lzcntl %eax, %eax
323; X64-CLZ-NEXT:    addl $-24, %eax
324; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
325; X64-CLZ-NEXT:    retq
326  %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
327  ret i8 %tmp1
328}
329
330; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
331define i16 @ctlz_i16_zero_test(i16 %n) {
332; X32-LABEL: ctlz_i16_zero_test:
333; X32:       # %bb.0:
334; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
335; X32-NEXT:    testw %ax, %ax
336; X32-NEXT:    je .LBB9_1
337; X32-NEXT:  # %bb.2: # %cond.false
338; X32-NEXT:    bsrw %ax, %ax
339; X32-NEXT:    xorl $15, %eax
340; X32-NEXT:    # kill: def $ax killed $ax killed $eax
341; X32-NEXT:    retl
342; X32-NEXT:  .LBB9_1:
343; X32-NEXT:    movw    $16, %ax
344; X32-NEXT:    # kill: def $ax killed $ax killed $eax
345; X32-NEXT:    retl
346;
347; X64-LABEL: ctlz_i16_zero_test:
348; X64:       # %bb.0:
349; X64-NEXT:    testw %di, %di
350; X64-NEXT:    je .LBB9_1
351; X64-NEXT:  # %bb.2: # %cond.false
352; X64-NEXT:    bsrw %di, %ax
353; X64-NEXT:    xorl $15, %eax
354; X64-NEXT:    # kill: def $ax killed $ax killed $eax
355; X64-NEXT:    retq
356; X64-NEXT:  .LBB9_1:
357; X64-NEXT:    movw $16, %ax
358; X64-NEXT:    # kill: def $ax killed $ax killed $eax
359; X64-NEXT:    retq
360;
361; X32-CLZ-LABEL: ctlz_i16_zero_test:
362; X32-CLZ:       # %bb.0:
363; X32-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
364; X32-CLZ-NEXT:    retl
365;
366; X64-CLZ-LABEL: ctlz_i16_zero_test:
367; X64-CLZ:       # %bb.0:
368; X64-CLZ-NEXT:    lzcntw %di, %ax
369; X64-CLZ-NEXT:    retq
370  %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
371  ret i16 %tmp1
372}
373
374; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
375define i32 @ctlz_i32_zero_test(i32 %n) {
376; X32-LABEL: ctlz_i32_zero_test:
377; X32:       # %bb.0:
378; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
379; X32-NEXT:    testl %eax, %eax
380; X32-NEXT:    je .LBB10_1
381; X32-NEXT:  # %bb.2: # %cond.false
382; X32-NEXT:    bsrl %eax, %eax
383; X32-NEXT:    xorl $31, %eax
384; X32-NEXT:    retl
385; X32-NEXT:  .LBB10_1:
386; X32-NEXT:    movl    $32, %eax
387; X32-NEXT:    retl
388;
389; X64-LABEL: ctlz_i32_zero_test:
390; X64:       # %bb.0:
391; X64-NEXT:    testl %edi, %edi
392; X64-NEXT:    je .LBB10_1
393; X64-NEXT:  # %bb.2: # %cond.false
394; X64-NEXT:    bsrl %edi, %eax
395; X64-NEXT:    xorl $31, %eax
396; X64-NEXT:    retq
397; X64-NEXT:  .LBB10_1:
398; X64-NEXT:    movl $32, %eax
399; X64-NEXT:    retq
400;
401; X32-CLZ-LABEL: ctlz_i32_zero_test:
402; X32-CLZ:       # %bb.0:
403; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
404; X32-CLZ-NEXT:    retl
405;
406; X64-CLZ-LABEL: ctlz_i32_zero_test:
407; X64-CLZ:       # %bb.0:
408; X64-CLZ-NEXT:    lzcntl %edi, %eax
409; X64-CLZ-NEXT:    retq
410  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
411  ret i32 %tmp1
412}
413
414; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
415define i64 @ctlz_i64_zero_test(i64 %n) {
416; X32-LABEL: ctlz_i64_zero_test:
417; X32:       # %bb.0:
418; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
419; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %edx
420; X32-NEXT:    movl $63, %eax
421; X32-NEXT:    je .LBB11_2
422; X32-NEXT:  # %bb.1:
423; X32-NEXT:    movl %edx, %eax
424; X32-NEXT:  .LBB11_2:
425; X32-NEXT:    testl %ecx, %ecx
426; X32-NEXT:    jne .LBB11_3
427; X32-NEXT:  # %bb.4:
428; X32-NEXT:    xorl $31, %eax
429; X32-NEXT:    addl $32, %eax
430; X32-NEXT:    xorl %edx, %edx
431; X32-NEXT:    retl
432; X32-NEXT:  .LBB11_3:
433; X32-NEXT:    bsrl %ecx, %eax
434; X32-NEXT:    xorl $31, %eax
435; X32-NEXT:    xorl %edx, %edx
436; X32-NEXT:    retl
437;
438; X64-LABEL: ctlz_i64_zero_test:
439; X64:       # %bb.0:
440; X64-NEXT:    testq %rdi, %rdi
441; X64-NEXT:    je .LBB11_1
442; X64-NEXT:  # %bb.2: # %cond.false
443; X64-NEXT:    bsrq %rdi, %rax
444; X64-NEXT:    xorq $63, %rax
445; X64-NEXT:    retq
446; X64-NEXT:  .LBB11_1:
447; X64-NEXT:    movl $64, %eax
448; X64-NEXT:    retq
449;
450; X32-CLZ-LABEL: ctlz_i64_zero_test:
451; X32-CLZ:       # %bb.0:
452; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
453; X32-CLZ-NEXT:    testl %eax, %eax
454; X32-CLZ-NEXT:    jne .LBB11_1
455; X32-CLZ-NEXT:  # %bb.2:
456; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
457; X32-CLZ-NEXT:    addl $32, %eax
458; X32-CLZ-NEXT:    xorl %edx, %edx
459; X32-CLZ-NEXT:    retl
460; X32-CLZ-NEXT:  .LBB11_1:
461; X32-CLZ-NEXT:    lzcntl %eax, %eax
462; X32-CLZ-NEXT:    xorl %edx, %edx
463; X32-CLZ-NEXT:    retl
464;
465; X64-CLZ-LABEL: ctlz_i64_zero_test:
466; X64-CLZ:       # %bb.0:
467; X64-CLZ-NEXT:    lzcntq %rdi, %rax
468; X64-CLZ-NEXT:    retq
469  %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
470  ret i64 %tmp1
471}
472
473; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
474define i8 @cttz_i8_zero_test(i8 %n) {
475; X32-LABEL: cttz_i8_zero_test:
476; X32:       # %bb.0:
477; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
478; X32-NEXT:    testb %al, %al
479; X32-NEXT:    je .LBB12_1
480; X32-NEXT:  # %bb.2: # %cond.false
481; X32-NEXT:    movzbl %al, %eax
482; X32-NEXT:    bsfl %eax, %eax
483; X32-NEXT:    # kill: def $al killed $al killed $eax
484; X32-NEXT:    retl
485; X32-NEXT:  .LBB12_1
486; X32-NEXT:    movb $8, %al
487; X32-NEXT:    # kill: def $al killed $al killed $eax
488; X32-NEXT:    retl
489;
490; X64-LABEL: cttz_i8_zero_test:
491; X64:       # %bb.0:
492; X64-NEXT:    testb %dil, %dil
493; X64-NEXT:    je .LBB12_1
494; X64-NEXT:  # %bb.2: # %cond.false
495; X64-NEXT:    movzbl %dil, %eax
496; X64-NEXT:    bsfl %eax, %eax
497; X64-NEXT:    # kill: def $al killed $al killed $eax
498; X64-NEXT:    retq
499; X64-NEXT:  .LBB12_1:
500; X64-NEXT:    movb $8, %al
501; X64-NEXT:    # kill: def $al killed $al killed $eax
502; X64-NEXT:    retq
503;
504; X32-CLZ-LABEL: cttz_i8_zero_test:
505; X32-CLZ:       # %bb.0:
506; X32-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
507; X32-CLZ-NEXT:    orl $256, %eax # imm = 0x100
508; X32-CLZ-NEXT:    tzcntl %eax, %eax
509; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
510; X32-CLZ-NEXT:    retl
511;
512; X64-CLZ-LABEL: cttz_i8_zero_test:
513; X64-CLZ:       # %bb.0:
514; X64-CLZ-NEXT:    movzbl %dil, %eax
515; X64-CLZ-NEXT:    orl $256, %eax # imm = 0x100
516; X64-CLZ-NEXT:    tzcntl %eax, %eax
517; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
518; X64-CLZ-NEXT:    retq
519  %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
520  ret i8 %tmp1
521}
522
523; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
524define i16 @cttz_i16_zero_test(i16 %n) {
525; X32-LABEL: cttz_i16_zero_test:
526; X32:       # %bb.0:
527; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
528; X32-NEXT:    testw %ax, %ax
529; X32-NEXT:    je .LBB13_1
530; X32-NEXT:  # %bb.2: # %cond.false
531; X32-NEXT:    bsfw %ax, %ax
532; X32-NEXT:    retl
533; X32-NEXT:  .LBB13_1
534; X32-NEXT:    movw $16, %ax
535; X32-NEXT:    retl
536;
537; X64-LABEL: cttz_i16_zero_test:
538; X64:       # %bb.0:
539; X64-NEXT:    testw %di, %di
540; X64-NEXT:    je .LBB13_1
541; X64-NEXT:  # %bb.2: # %cond.false
542; X64-NEXT:    bsfw %di, %ax
543; X64-NEXT:    retq
544; X64-NEXT:  .LBB13_1:
545; X64-NEXT:    movw $16, %ax
546; X64-NEXT:    retq
547;
548; X32-CLZ-LABEL: cttz_i16_zero_test:
549; X32-CLZ:       # %bb.0:
550; X32-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
551; X32-CLZ-NEXT:    retl
552;
553; X64-CLZ-LABEL: cttz_i16_zero_test:
554; X64-CLZ:       # %bb.0:
555; X64-CLZ-NEXT:    tzcntw %di, %ax
556; X64-CLZ-NEXT:    retq
557  %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
558  ret i16 %tmp1
559}
560
561; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
562define i32 @cttz_i32_zero_test(i32 %n) {
563; X32-LABEL: cttz_i32_zero_test:
564; X32:       # %bb.0:
565; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
566; X32-NEXT:    testl %eax, %eax
567; X32-NEXT:    je .LBB14_1
568; X32-NEXT:  # %bb.2: # %cond.false
569; X32-NEXT:    bsfl %eax, %eax
570; X32-NEXT:    retl
571; X32-NEXT:  .LBB14_1
572; X32-NEXT:    movl $32, %eax
573; X32-NEXT:    retl
574;
575; X64-LABEL: cttz_i32_zero_test:
576; X64:       # %bb.0:
577; X64-NEXT:    testl %edi, %edi
578; X64-NEXT:    je .LBB14_1
579; X64-NEXT:  # %bb.2: # %cond.false
580; X64-NEXT:    bsfl %edi, %eax
581; X64-NEXT:    retq
582; X64-NEXT:  .LBB14_1:
583; X64-NEXT:    movl $32, %eax
584; X64-NEXT:    retq
585;
586; X32-CLZ-LABEL: cttz_i32_zero_test:
587; X32-CLZ:       # %bb.0:
588; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
589; X32-CLZ-NEXT:    retl
590;
591; X64-CLZ-LABEL: cttz_i32_zero_test:
592; X64-CLZ:       # %bb.0:
593; X64-CLZ-NEXT:    tzcntl %edi, %eax
594; X64-CLZ-NEXT:    retq
595  %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
596  ret i32 %tmp1
597}
598
599; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
600define i64 @cttz_i64_zero_test(i64 %n) {
601; X32-LABEL: cttz_i64_zero_test:
602; X32:       # %bb.0:
603; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
604; X32-NEXT:    bsfl {{[0-9]+}}(%esp), %edx
605; X32-NEXT:    movl $32, %eax
606; X32-NEXT:    je .LBB15_2
607; X32-NEXT:  # %bb.1:
608; X32-NEXT:    movl %edx, %eax
609; X32-NEXT:  .LBB15_2:
610; X32-NEXT:    testl %ecx, %ecx
611; X32-NEXT:    jne .LBB15_3
612; X32-NEXT:  # %bb.4:
613; X32-NEXT:    addl $32, %eax
614; X32-NEXT:    xorl %edx, %edx
615; X32-NEXT:    retl
616; X32-NEXT:  .LBB15_3:
617; X32-NEXT:    bsfl %ecx, %eax
618; X32-NEXT:    xorl %edx, %edx
619; X32-NEXT:    retl
620;
621; X64-LABEL: cttz_i64_zero_test:
622; X64:       # %bb.0:
623; X64-NEXT:    testq %rdi, %rdi
624; X64-NEXT:    je .LBB15_1
625; X64-NEXT:  # %bb.2: # %cond.false
626; X64-NEXT:    bsfq %rdi, %rax
627; X64-NEXT:    retq
628; X64-NEXT:  .LBB15_1:
629; X64-NEXT:    movl $64, %eax
630; X64-NEXT:    retq
631;
632; X32-CLZ-LABEL: cttz_i64_zero_test:
633; X32-CLZ:       # %bb.0:
634; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
635; X32-CLZ-NEXT:    testl %eax, %eax
636; X32-CLZ-NEXT:    jne .LBB15_1
637; X32-CLZ-NEXT:  # %bb.2:
638; X32-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
639; X32-CLZ-NEXT:    addl $32, %eax
640; X32-CLZ-NEXT:    xorl %edx, %edx
641; X32-CLZ-NEXT:    retl
642; X32-CLZ-NEXT:  .LBB15_1:
643; X32-CLZ-NEXT:    tzcntl %eax, %eax
644; X32-CLZ-NEXT:    xorl %edx, %edx
645; X32-CLZ-NEXT:    retl
646;
647; X64-CLZ-LABEL: cttz_i64_zero_test:
648; X64-CLZ:       # %bb.0:
649; X64-CLZ-NEXT:    tzcntq %rdi, %rax
650; X64-CLZ-NEXT:    retq
651  %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
652  ret i64 %tmp1
653}
654
655; Don't generate the cmovne when the source is known non-zero (and bsr would
656; not set ZF).
657; rdar://9490949
658; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
659;        codegen doesn't know how to delete the movl and je.
660define i32 @ctlz_i32_fold_cmov(i32 %n) {
661; X32-LABEL: ctlz_i32_fold_cmov:
662; X32:       # %bb.0:
663; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
664; X32-NEXT:    orl $1, %eax
665; X32-NEXT:    je .LBB16_1
666; X32-NEXT:  # %bb.2: # %cond.false
667; X32-NEXT:    bsrl %eax, %eax
668; X32-NEXT:    xorl $31, %eax
669; X32-NEXT:    retl
670; X32-NEXT:  .LBB16_1
671; X32-NEXT:    movl $32, %eax
672; X32-NEXT:    retl
673;
674; X64-LABEL: ctlz_i32_fold_cmov:
675; X64:       # %bb.0:
676; X64-NEXT:    orl $1, %edi
677; X64-NEXT:    je .LBB16_1
678; X64-NEXT:  # %bb.2: # %cond.false
679; X64-NEXT:    bsrl %edi, %eax
680; X64-NEXT:    xorl $31, %eax
681; X64-NEXT:    retq
682; X64-NEXT:  .LBB16_1:
683; X64-NEXT:    movl $32, %eax
684; X64-NEXT:    retq
685;
686; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
687; X32-CLZ:       # %bb.0:
688; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
689; X32-CLZ-NEXT:    orl $1, %eax
690; X32-CLZ-NEXT:    lzcntl %eax, %eax
691; X32-CLZ-NEXT:    retl
692;
693; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
694; X64-CLZ:       # %bb.0:
695; X64-CLZ-NEXT:    orl $1, %edi
696; X64-CLZ-NEXT:    lzcntl %edi, %eax
697; X64-CLZ-NEXT:    retq
698  %or = or i32 %n, 1
699  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
700  ret i32 %tmp1
701}
702
703; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
704; the most significant bit, which is what 'bsr' does natively.
705; FIXME: We should probably select BSR instead of LZCNT in these circumstances.
706define i32 @ctlz_bsr(i32 %n) {
707; X32-LABEL: ctlz_bsr:
708; X32:       # %bb.0:
709; X32-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
710; X32-NEXT:    retl
711;
712; X64-LABEL: ctlz_bsr:
713; X64:       # %bb.0:
714; X64-NEXT:    bsrl %edi, %eax
715; X64-NEXT:    retq
716;
717; X32-CLZ-LABEL: ctlz_bsr:
718; X32-CLZ:       # %bb.0:
719; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
720; X32-CLZ-NEXT:    xorl $31, %eax
721; X32-CLZ-NEXT:    retl
722;
723; X64-CLZ-LABEL: ctlz_bsr:
724; X64-CLZ:       # %bb.0:
725; X64-CLZ-NEXT:    lzcntl %edi, %eax
726; X64-CLZ-NEXT:    xorl $31, %eax
727; X64-CLZ-NEXT:    retq
728  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
729  %bsr = xor i32 %ctlz, 31
730  ret i32 %bsr
731}
732
733; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
734; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
735;        codegen doesn't know how to combine the $32 and $31 into $63.
736define i32 @ctlz_bsr_zero_test(i32 %n) {
737; X32-LABEL: ctlz_bsr_zero_test:
738; X32:       # %bb.0:
739; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
740; X32-NEXT:    testl %eax, %eax
741; X32-NEXT:    je .LBB18_1
742; X32-NEXT:  # %bb.2: # %cond.false
743; X32-NEXT:    bsrl %eax, %eax
744; X32-NEXT:    xorl $31, %eax
745; X32-NEXT:    xorl $31, %eax
746; X32-NEXT:    retl
747; X32-NEXT:  .LBB18_1:
748; X32-NEXT:    movl $32, %eax
749; X32-NEXT:    xorl $31, %eax
750; X32-NEXT:    retl
751;
752; X64-LABEL: ctlz_bsr_zero_test:
753; X64:       # %bb.0:
754; X64-NEXT:    testl %edi, %edi
755; X64-NEXT:    je .LBB18_1
756; X64-NEXT:  # %bb.2: # %cond.false
757; X64-NEXT:    bsrl %edi, %eax
758; X64-NEXT:    xorl $31, %eax
759; X64-NEXT:    xorl $31, %eax
760; X64-NEXT:    retq
761; X64-NEXT:  .LBB18_1:
762; X64-NEXT:    movl $32, %eax
763; X64-NEXT:    xorl $31, %eax
764; X64-NEXT:    retq
765;
766; X32-CLZ-LABEL: ctlz_bsr_zero_test:
767; X32-CLZ:       # %bb.0:
768; X32-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
769; X32-CLZ-NEXT:    xorl $31, %eax
770; X32-CLZ-NEXT:    retl
771;
772; X64-CLZ-LABEL: ctlz_bsr_zero_test:
773; X64-CLZ:       # %bb.0:
774; X64-CLZ-NEXT:    lzcntl %edi, %eax
775; X64-CLZ-NEXT:    xorl $31, %eax
776; X64-CLZ-NEXT:    retq
777  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
778  %bsr = xor i32 %ctlz, 31
779  ret i32 %bsr
780}
781
782define i8 @cttz_i8_knownbits(i8 %x)  {
783; X32-LABEL: cttz_i8_knownbits:
784; X32:       # %bb.0:
785; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
786; X32-NEXT:    orb $2, %al
787; X32-NEXT:    movzbl %al, %eax
788; X32-NEXT:    bsfl %eax, %eax
789; X32-NEXT:    # kill: def $al killed $al killed $eax
790; X32-NEXT:    retl
791;
792; X64-LABEL: cttz_i8_knownbits:
793; X64:       # %bb.0:
794; X64-NEXT:    orb $2, %dil
795; X64-NEXT:    movzbl %dil, %eax
796; X64-NEXT:    bsfl %eax, %eax
797; X64-NEXT:    # kill: def $al killed $al killed $eax
798; X64-NEXT:    retq
799;
800; X32-CLZ-LABEL: cttz_i8_knownbits:
801; X32-CLZ:       # %bb.0:
802; X32-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
803; X32-CLZ-NEXT:    orb $2, %al
804; X32-CLZ-NEXT:    movzbl %al, %eax
805; X32-CLZ-NEXT:    tzcntl %eax, %eax
806; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
807; X32-CLZ-NEXT:    retl
808;
809; X64-CLZ-LABEL: cttz_i8_knownbits:
810; X64-CLZ:       # %bb.0:
811; X64-CLZ-NEXT:    orb $2, %dil
812; X64-CLZ-NEXT:    movzbl %dil, %eax
813; X64-CLZ-NEXT:    tzcntl %eax, %eax
814; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
815; X64-CLZ-NEXT:    retq
816  %x2 = or i8 %x, 2
817  %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
818  %tmp2 = and i8 %tmp, 1
819  ret i8 %tmp2
820}
821
822define i8 @ctlz_i8_knownbits(i8 %x)  {
823; X32-LABEL: ctlz_i8_knownbits:
824; X32:       # %bb.0:
825; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
826; X32-NEXT:    orb $64, %al
827; X32-NEXT:    movzbl %al, %eax
828; X32-NEXT:    bsrl %eax, %eax
829; X32-NEXT:    xorl $7, %eax
830; X32-NEXT:    # kill: def $al killed $al killed $eax
831; X32-NEXT:    retl
832;
833; X64-LABEL: ctlz_i8_knownbits:
834; X64:       # %bb.0:
835; X64-NEXT:    orb $64, %dil
836; X64-NEXT:    movzbl %dil, %eax
837; X64-NEXT:    bsrl %eax, %eax
838; X64-NEXT:    xorl $7, %eax
839; X64-NEXT:    # kill: def $al killed $al killed $eax
840; X64-NEXT:    retq
841;
842; X32-CLZ-LABEL: ctlz_i8_knownbits:
843; X32-CLZ:       # %bb.0:
844; X32-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
845; X32-CLZ-NEXT:    orb $64, %al
846; X32-CLZ-NEXT:    movzbl %al, %eax
847; X32-CLZ-NEXT:    lzcntl %eax, %eax
848; X32-CLZ-NEXT:    addl $-24, %eax
849; X32-CLZ-NEXT:    # kill: def $al killed $al killed $eax
850; X32-CLZ-NEXT:    retl
851;
852; X64-CLZ-LABEL: ctlz_i8_knownbits:
853; X64-CLZ:       # %bb.0:
854; X64-CLZ-NEXT:    orb $64, %dil
855; X64-CLZ-NEXT:    movzbl %dil, %eax
856; X64-CLZ-NEXT:    lzcntl %eax, %eax
857; X64-CLZ-NEXT:    addl $-24, %eax
858; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
859; X64-CLZ-NEXT:    retq
860
861  %x2 = or i8 %x, 64
862  %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
863  %tmp2 = and i8 %tmp, 1
864  ret i8 %tmp2
865}
866
867; Make sure we can detect that the input is non-zero and avoid cmov after BSR
868; This is relevant for 32-bit mode without lzcnt
869define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
870; X32-LABEL: ctlz_i64_zero_test_knownneverzero:
871; X32:       # %bb.0:
872; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
873; X32-NEXT:    testl %eax, %eax
874; X32-NEXT:    jne .LBB21_1
875; X32-NEXT:  # %bb.2:
876; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
877; X32-NEXT:    orl $1, %eax
878; X32-NEXT:    bsrl %eax, %eax
879; X32-NEXT:    xorl $31, %eax
880; X32-NEXT:    orl $32, %eax
881; X32-NEXT:    xorl %edx, %edx
882; X32-NEXT:    retl
883; X32-NEXT:  .LBB21_1:
884; X32-NEXT:    bsrl %eax, %eax
885; X32-NEXT:    xorl $31, %eax
886; X32-NEXT:    xorl %edx, %edx
887; X32-NEXT:    retl
888;
889; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
890; X64:       # %bb.0:
891; X64-NEXT:    orq $1, %rdi
892; X64-NEXT:    je .LBB21_1
893; X64-NEXT:  # %bb.2: # %cond.false
894; X64-NEXT:    bsrq %rdi, %rax
895; X64-NEXT:    xorq $63, %rax
896; X64-NEXT:    retq
897; X64-NEXT:  .LBB21_1:
898; X64-NEXT:    movl $64, %eax
899; X64-NEXT:    retq
900;
901; X32-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
902; X32-CLZ:       # %bb.0:
903; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
904; X32-CLZ-NEXT:    testl %eax, %eax
905; X32-CLZ-NEXT:    jne .LBB21_1
906; X32-CLZ-NEXT:  # %bb.2:
907; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
908; X32-CLZ-NEXT:    orl $1, %eax
909; X32-CLZ-NEXT:    lzcntl %eax, %eax
910; X32-CLZ-NEXT:    orl $32, %eax
911; X32-CLZ-NEXT:    xorl %edx, %edx
912; X32-CLZ-NEXT:    retl
913; X32-CLZ-NEXT:  .LBB21_1:
914; X32-CLZ-NEXT:    lzcntl %eax, %eax
915; X32-CLZ-NEXT:    xorl %edx, %edx
916; X32-CLZ-NEXT:    retl
917;
918; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
919; X64-CLZ:       # %bb.0:
920; X64-CLZ-NEXT:    orq $1, %rdi
921; X64-CLZ-NEXT:    lzcntq %rdi, %rax
922; X64-CLZ-NEXT:    retq
923  %o = or i64 %n, 1
924  %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
925  ret i64 %tmp1
926}
927
928; Make sure we can detect that the input is non-zero and avoid cmov after BSF
929; This is relevant for 32-bit mode without tzcnt
930define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
931; X32-LABEL: cttz_i64_zero_test_knownneverzero:
932; X32:       # %bb.0:
933; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
934; X32-NEXT:    testl %eax, %eax
935; X32-NEXT:    jne .LBB22_1
936; X32-NEXT:  # %bb.2:
937; X32-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
938; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
939; X32-NEXT:    bsfl %eax, %eax
940; X32-NEXT:    orl $32, %eax
941; X32-NEXT:    xorl %edx, %edx
942; X32-NEXT:    retl
943; X32-NEXT:  .LBB22_1:
944; X32-NEXT:    bsfl %eax, %eax
945; X32-NEXT:    xorl %edx, %edx
946; X32-NEXT:    retl
947;
948; X64-LABEL: cttz_i64_zero_test_knownneverzero:
949; X64:       # %bb.0:
950; X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
951; X64-NEXT:    orq %rdi, %rax
952; X64-NEXT:    je .LBB22_1
953; X64-NEXT:  # %bb.2: # %cond.false
954; X64-NEXT:    bsfq %rax, %rax
955; X64-NEXT:    retq
956; X64-NEXT:  .LBB22_1:
957; X64-NEXT:    movl $64, %eax
958; X64-NEXT:    retq
959;
960; X32-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
961; X32-CLZ:       # %bb.0:
962; X32-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
963; X32-CLZ-NEXT:    testl %eax, %eax
964; X32-CLZ-NEXT:    jne .LBB22_1
965; X32-CLZ-NEXT:  # %bb.2:
966; X32-CLZ-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
967; X32-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
968; X32-CLZ-NEXT:    tzcntl %eax, %eax
969; X32-CLZ-NEXT:    orl $32, %eax
970; X32-CLZ-NEXT:    xorl %edx, %edx
971; X32-CLZ-NEXT:    retl
972; X32-CLZ-NEXT:  .LBB22_1:
973; X32-CLZ-NEXT:    tzcntl %eax, %eax
974; X32-CLZ-NEXT:    xorl %edx, %edx
975; X32-CLZ-NEXT:    retl
976;
977; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
978; X64-CLZ:       # %bb.0:
979; X64-CLZ-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
980; X64-CLZ-NEXT:    orq %rdi, %rax
981; X64-CLZ-NEXT:    tzcntq %rax, %rax
982; X64-CLZ-NEXT:    retq
983  %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
984  %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)
985  ret i64 %tmp1
986}
987