• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
2
3declare i8 @llvm.cttz.i8(i8, i1)
4declare i16 @llvm.cttz.i16(i16, i1)
5declare i32 @llvm.cttz.i32(i32, i1)
6declare i64 @llvm.cttz.i64(i64, i1)
7declare i8 @llvm.ctlz.i8(i8, i1)
8declare i16 @llvm.ctlz.i16(i16, i1)
9declare i32 @llvm.ctlz.i32(i32, i1)
10declare i64 @llvm.ctlz.i64(i64, i1)
11
12define i8 @cttz_i8(i8 %x)  {
13; CHECK-LABEL: cttz_i8:
14; CHECK:       # BB#0:
15; CHECK-NEXT:    movzbl %dil, %eax
16; CHECK-NEXT:    bsfl %eax, %eax
17; CHECK-NEXT:    # kill
18; CHECK-NEXT:    retq
19  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
20  ret i8 %tmp
21}
22
23define i16 @cttz_i16(i16 %x)  {
24; CHECK-LABEL: cttz_i16:
25; CHECK:       # BB#0:
26; CHECK-NEXT:    bsfw %di, %ax
27; CHECK-NEXT:    retq
28  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
29  ret i16 %tmp
30}
31
32define i32 @cttz_i32(i32 %x)  {
33; CHECK-LABEL: cttz_i32:
34; CHECK:       # BB#0:
35; CHECK-NEXT:    bsfl %edi, %eax
36; CHECK-NEXT:    retq
37  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
38  ret i32 %tmp
39}
40
41define i64 @cttz_i64(i64 %x)  {
42; CHECK-LABEL: cttz_i64:
43; CHECK:       # BB#0:
44; CHECK-NEXT:    bsfq %rdi, %rax
45; CHECK-NEXT:    retq
46  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
47  ret i64 %tmp
48}
49
50define i8 @ctlz_i8(i8 %x) {
51; CHECK-LABEL: ctlz_i8:
52; CHECK:       # BB#0:
53; CHECK-NEXT:    movzbl %dil, %eax
54; CHECK-NEXT:    bsrl %eax, %eax
55; CHECK-NEXT:    xorl $7, %eax
56; CHECK-NEXT:    # kill
57; CHECK-NEXT:    retq
58  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
59  ret i8 %tmp2
60}
61
62define i16 @ctlz_i16(i16 %x) {
63; CHECK-LABEL: ctlz_i16:
64; CHECK:       # BB#0:
65; CHECK-NEXT:    bsrw %di, %ax
66; CHECK-NEXT:    xorl $15, %eax
67; CHECK-NEXT:    # kill
68; CHECK-NEXT:    retq
69  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
70  ret i16 %tmp2
71}
72
73define i32 @ctlz_i32(i32 %x) {
74; CHECK-LABEL: ctlz_i32:
75; CHECK:       # BB#0:
76; CHECK-NEXT:    bsrl %edi, %eax
77; CHECK-NEXT:    xorl $31, %eax
78; CHECK-NEXT:    retq
79  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
80  ret i32 %tmp
81}
82
83define i64 @ctlz_i64(i64 %x) {
84; CHECK-LABEL: ctlz_i64:
85; CHECK:       # BB#0:
86; CHECK-NEXT:    bsrq %rdi, %rax
87; CHECK-NEXT:    xorq $63, %rax
88; CHECK-NEXT:    retq
89  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
90  ret i64 %tmp
91}
92
93define i8 @ctlz_i8_zero_test(i8 %n) {
94; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
95
96; CHECK-LABEL: ctlz_i8_zero_test:
97; CHECK:       # BB#0:
98; CHECK-NEXT:    movb $8, %al
99; CHECK-NEXT:    testb %dil, %dil
100; CHECK-NEXT:    je .LBB8_2
101; CHECK-NEXT:  # BB#1: # %cond.false
102; CHECK-NEXT:    movzbl %dil, %eax
103; CHECK-NEXT:    bsrl %eax, %eax
104; CHECK-NEXT:    xorl $7, %eax
105; CHECK-NEXT:  .LBB8_2: # %cond.end
106; CHECK-NEXT:    # kill
107; CHECK-NEXT:    retq
108  %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
109  ret i8 %tmp1
110}
111
112define i16 @ctlz_i16_zero_test(i16 %n) {
113; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
114
115; CHECK-LABEL: ctlz_i16_zero_test:
116; CHECK:       # BB#0:
117; CHECK-NEXT:    movw $16, %ax
118; CHECK-NEXT:    testw %di, %di
119; CHECK-NEXT:    je .LBB9_2
120; CHECK-NEXT:  # BB#1: # %cond.false
121; CHECK-NEXT:    bsrw %di, %ax
122; CHECK-NEXT:    xorl $15, %eax
123; CHECK-NEXT:  .LBB9_2: # %cond.end
124; CHECK-NEXT:    # kill
125; CHECK-NEXT:    retq
126  %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
127  ret i16 %tmp1
128}
129
130define i32 @ctlz_i32_zero_test(i32 %n) {
131; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
132
133; CHECK-LABEL: ctlz_i32_zero_test:
134; CHECK:       # BB#0:
135; CHECK-NEXT:    movl $32, %eax
136; CHECK-NEXT:    testl %edi, %edi
137; CHECK-NEXT:    je .LBB10_2
138; CHECK-NEXT:  # BB#1: # %cond.false
139; CHECK-NEXT:    bsrl %edi, %eax
140; CHECK-NEXT:    xorl $31, %eax
141; CHECK-NEXT:  .LBB10_2: # %cond.end
142; CHECK-NEXT:    retq
143  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
144  ret i32 %tmp1
145}
146
147define i64 @ctlz_i64_zero_test(i64 %n) {
148; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
149
150; CHECK-LABEL: ctlz_i64_zero_test:
151; CHECK:       # BB#0:
152; CHECK-NEXT:    movl $64, %eax
153; CHECK-NEXT:    testq %rdi, %rdi
154; CHECK-NEXT:    je .LBB11_2
155; CHECK-NEXT:  # BB#1: # %cond.false
156; CHECK-NEXT:    bsrq %rdi, %rax
157; CHECK-NEXT:    xorq $63, %rax
158; CHECK-NEXT:  .LBB11_2: # %cond.end
159; CHECK-NEXT:    retq
160  %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
161  ret i64 %tmp1
162}
163
164define i8 @cttz_i8_zero_test(i8 %n) {
165; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
166
167; CHECK-LABEL: cttz_i8_zero_test:
168; CHECK:       # BB#0:
169; CHECK-NEXT:    movb $8, %al
170; CHECK-NEXT:    testb %dil, %dil
171; CHECK-NEXT:    je .LBB12_2
172; CHECK-NEXT:  # BB#1: # %cond.false
173; CHECK-NEXT:    movzbl %dil, %eax
174; CHECK-NEXT:    bsfl %eax, %eax
175; CHECK-NEXT:  .LBB12_2: # %cond.end
176; CHECK-NEXT:    # kill
177; CHECK-NEXT:    retq
178  %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
179  ret i8 %tmp1
180}
181
182define i16 @cttz_i16_zero_test(i16 %n) {
183; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
184
185; CHECK-LABEL: cttz_i16_zero_test:
186; CHECK:       # BB#0:
187; CHECK-NEXT:    movw $16, %ax
188; CHECK-NEXT:    testw %di, %di
189; CHECK-NEXT:    je .LBB13_2
190; CHECK-NEXT:  # BB#1: # %cond.false
191; CHECK-NEXT:    bsfw %di, %ax
192; CHECK-NEXT:  .LBB13_2: # %cond.end
193; CHECK-NEXT:    retq
194  %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
195  ret i16 %tmp1
196}
197
198define i32 @cttz_i32_zero_test(i32 %n) {
199; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
200
201; CHECK-LABEL: cttz_i32_zero_test:
202; CHECK:       # BB#0:
203; CHECK-NEXT:    movl $32, %eax
204; CHECK-NEXT:    testl %edi, %edi
205; CHECK-NEXT:    je .LBB14_2
206; CHECK-NEXT:  # BB#1: # %cond.false
207; CHECK-NEXT:    bsfl %edi, %eax
208; CHECK-NEXT:  .LBB14_2: # %cond.end
209; CHECK-NEXT:    retq
210  %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
211  ret i32 %tmp1
212}
213
214define i64 @cttz_i64_zero_test(i64 %n) {
215; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
216
217; CHECK-LABEL: cttz_i64_zero_test:
218; CHECK:       # BB#0:
219; CHECK-NEXT:    movl $64, %eax
220; CHECK-NEXT:    testq %rdi, %rdi
221; CHECK-NEXT:    je .LBB15_2
222; CHECK-NEXT:  # BB#1: # %cond.false
223; CHECK-NEXT:    bsfq %rdi, %rax
224; CHECK-NEXT:  .LBB15_2: # %cond.end
225; CHECK-NEXT:    retq
226  %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
227  ret i64 %tmp1
228}
229
230define i32 @ctlz_i32_fold_cmov(i32 %n) {
231; Don't generate the cmovne when the source is known non-zero (and bsr would
232; not set ZF).
233; rdar://9490949
234; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
235;        codegen doesn't know how to delete the movl and je.
236
237; CHECK-LABEL: ctlz_i32_fold_cmov:
238; CHECK:       # BB#0:
239; CHECK-NEXT:    orl $1, %edi
240; CHECK-NEXT:    movl $32, %eax
241; CHECK-NEXT:    je .LBB16_2
242; CHECK-NEXT:  # BB#1: # %cond.false
243; CHECK-NEXT:    bsrl %edi, %eax
244; CHECK-NEXT:    xorl $31, %eax
245; CHECK-NEXT:  .LBB16_2: # %cond.end
246; CHECK-NEXT:    retq
247  %or = or i32 %n, 1
248  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
249  ret i32 %tmp1
250}
251
252define i32 @ctlz_bsr(i32 %n) {
253; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
254; the most significant bit, which is what 'bsr' does natively.
255
256; CHECK-LABEL: ctlz_bsr:
257; CHECK:       # BB#0:
258; CHECK-NEXT:    bsrl %edi, %eax
259; CHECK-NEXT:    retq
260  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
261  %bsr = xor i32 %ctlz, 31
262  ret i32 %bsr
263}
264
265define i32 @ctlz_bsr_zero_test(i32 %n) {
266; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
267; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
268;        codegen doesn't know how to combine the $32 and $31 into $63.
269
270; CHECK-LABEL: ctlz_bsr_zero_test:
271; CHECK:       # BB#0:
272; CHECK-NEXT:    movl $32, %eax
273; CHECK-NEXT:    testl %edi, %edi
274; CHECK-NEXT:    je .LBB18_2
275; CHECK-NEXT:  # BB#1: # %cond.false
276; CHECK-NEXT:    bsrl %edi, %eax
277; CHECK-NEXT:    xorl $31, %eax
278; CHECK-NEXT:  .LBB18_2: # %cond.end
279; CHECK-NEXT:    xorl $31, %eax
280; CHECK-NEXT:    retq
281  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
282  %bsr = xor i32 %ctlz, 31
283  ret i32 %bsr
284}
285