1; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s 2 3declare i8 @llvm.cttz.i8(i8, i1) 4declare i16 @llvm.cttz.i16(i16, i1) 5declare i32 @llvm.cttz.i32(i32, i1) 6declare i64 @llvm.cttz.i64(i64, i1) 7declare i8 @llvm.ctlz.i8(i8, i1) 8declare i16 @llvm.ctlz.i16(i16, i1) 9declare i32 @llvm.ctlz.i32(i32, i1) 10declare i64 @llvm.ctlz.i64(i64, i1) 11 12define i8 @cttz_i8(i8 %x) { 13; CHECK-LABEL: cttz_i8: 14; CHECK: # BB#0: 15; CHECK-NEXT: movzbl %dil, %eax 16; CHECK-NEXT: bsfl %eax, %eax 17; CHECK-NEXT: # kill 18; CHECK-NEXT: retq 19 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true ) 20 ret i8 %tmp 21} 22 23define i16 @cttz_i16(i16 %x) { 24; CHECK-LABEL: cttz_i16: 25; CHECK: # BB#0: 26; CHECK-NEXT: bsfw %di, %ax 27; CHECK-NEXT: retq 28 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true ) 29 ret i16 %tmp 30} 31 32define i32 @cttz_i32(i32 %x) { 33; CHECK-LABEL: cttz_i32: 34; CHECK: # BB#0: 35; CHECK-NEXT: bsfl %edi, %eax 36; CHECK-NEXT: retq 37 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true ) 38 ret i32 %tmp 39} 40 41define i64 @cttz_i64(i64 %x) { 42; CHECK-LABEL: cttz_i64: 43; CHECK: # BB#0: 44; CHECK-NEXT: bsfq %rdi, %rax 45; CHECK-NEXT: retq 46 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true ) 47 ret i64 %tmp 48} 49 50define i8 @ctlz_i8(i8 %x) { 51; CHECK-LABEL: ctlz_i8: 52; CHECK: # BB#0: 53; CHECK-NEXT: movzbl %dil, %eax 54; CHECK-NEXT: bsrl %eax, %eax 55; CHECK-NEXT: xorl $7, %eax 56; CHECK-NEXT: # kill 57; CHECK-NEXT: retq 58 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) 59 ret i8 %tmp2 60} 61 62define i16 @ctlz_i16(i16 %x) { 63; CHECK-LABEL: ctlz_i16: 64; CHECK: # BB#0: 65; CHECK-NEXT: bsrw %di, %ax 66; CHECK-NEXT: xorl $15, %eax 67; CHECK-NEXT: # kill 68; CHECK-NEXT: retq 69 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true ) 70 ret i16 %tmp2 71} 72 73define i32 @ctlz_i32(i32 %x) { 74; CHECK-LABEL: ctlz_i32: 75; CHECK: # BB#0: 76; CHECK-NEXT: bsrl %edi, %eax 77; CHECK-NEXT: xorl $31, %eax 78; CHECK-NEXT: retq 79 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true ) 80 ret i32 %tmp 81} 82 83define i64 @ctlz_i64(i64 %x) { 84; CHECK-LABEL: ctlz_i64: 85; CHECK: # BB#0: 86; CHECK-NEXT: bsrq %rdi, %rax 87; CHECK-NEXT: xorq $63, %rax 88; CHECK-NEXT: retq 89 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true ) 90 ret i64 %tmp 91} 92 93define i8 @ctlz_i8_zero_test(i8 %n) { 94; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 95 96; CHECK-LABEL: ctlz_i8_zero_test: 97; CHECK: # BB#0: 98; CHECK-NEXT: movb $8, %al 99; CHECK-NEXT: testb %dil, %dil 100; CHECK-NEXT: je .LBB8_2 101; CHECK-NEXT: # BB#1: # %cond.false 102; CHECK-NEXT: movzbl %dil, %eax 103; CHECK-NEXT: bsrl %eax, %eax 104; CHECK-NEXT: xorl $7, %eax 105; CHECK-NEXT: .LBB8_2: # %cond.end 106; CHECK-NEXT: # kill 107; CHECK-NEXT: retq 108 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false) 109 ret i8 %tmp1 110} 111 112define i16 @ctlz_i16_zero_test(i16 %n) { 113; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 114 115; CHECK-LABEL: ctlz_i16_zero_test: 116; CHECK: # BB#0: 117; CHECK-NEXT: movw $16, %ax 118; CHECK-NEXT: testw %di, %di 119; CHECK-NEXT: je .LBB9_2 120; CHECK-NEXT: # BB#1: # %cond.false 121; CHECK-NEXT: bsrw %di, %ax 122; CHECK-NEXT: xorl $15, %eax 123; CHECK-NEXT: .LBB9_2: # %cond.end 124; CHECK-NEXT: # kill 125; CHECK-NEXT: retq 126 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false) 127 ret i16 %tmp1 128} 129 130define i32 @ctlz_i32_zero_test(i32 %n) { 131; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 132 133; CHECK-LABEL: ctlz_i32_zero_test: 134; CHECK: # BB#0: 135; CHECK-NEXT: movl $32, %eax 136; CHECK-NEXT: testl %edi, %edi 137; CHECK-NEXT: je .LBB10_2 138; CHECK-NEXT: # BB#1: # %cond.false 139; CHECK-NEXT: bsrl %edi, %eax 140; CHECK-NEXT: xorl $31, %eax 141; CHECK-NEXT: .LBB10_2: # %cond.end 142; CHECK-NEXT: retq 143 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 144 ret i32 %tmp1 145} 146 147define i64 @ctlz_i64_zero_test(i64 %n) { 148; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 149 150; CHECK-LABEL: ctlz_i64_zero_test: 151; CHECK: # BB#0: 152; CHECK-NEXT: movl $64, %eax 153; CHECK-NEXT: testq %rdi, %rdi 154; CHECK-NEXT: je .LBB11_2 155; CHECK-NEXT: # BB#1: # %cond.false 156; CHECK-NEXT: bsrq %rdi, %rax 157; CHECK-NEXT: xorq $63, %rax 158; CHECK-NEXT: .LBB11_2: # %cond.end 159; CHECK-NEXT: retq 160 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false) 161 ret i64 %tmp1 162} 163 164define i8 @cttz_i8_zero_test(i8 %n) { 165; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 166 167; CHECK-LABEL: cttz_i8_zero_test: 168; CHECK: # BB#0: 169; CHECK-NEXT: movb $8, %al 170; CHECK-NEXT: testb %dil, %dil 171; CHECK-NEXT: je .LBB12_2 172; CHECK-NEXT: # BB#1: # %cond.false 173; CHECK-NEXT: movzbl %dil, %eax 174; CHECK-NEXT: bsfl %eax, %eax 175; CHECK-NEXT: .LBB12_2: # %cond.end 176; CHECK-NEXT: # kill 177; CHECK-NEXT: retq 178 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false) 179 ret i8 %tmp1 180} 181 182define i16 @cttz_i16_zero_test(i16 %n) { 183; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 184 185; CHECK-LABEL: cttz_i16_zero_test: 186; CHECK: # BB#0: 187; CHECK-NEXT: movw $16, %ax 188; CHECK-NEXT: testw %di, %di 189; CHECK-NEXT: je .LBB13_2 190; CHECK-NEXT: # BB#1: # %cond.false 191; CHECK-NEXT: bsfw %di, %ax 192; CHECK-NEXT: .LBB13_2: # %cond.end 193; CHECK-NEXT: retq 194 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false) 195 ret i16 %tmp1 196} 197 198define i32 @cttz_i32_zero_test(i32 %n) { 199; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 200 201; CHECK-LABEL: cttz_i32_zero_test: 202; CHECK: # BB#0: 203; CHECK-NEXT: movl $32, %eax 204; CHECK-NEXT: testl %edi, %edi 205; CHECK-NEXT: je .LBB14_2 206; CHECK-NEXT: # BB#1: # %cond.false 207; CHECK-NEXT: bsfl %edi, %eax 208; CHECK-NEXT: .LBB14_2: # %cond.end 209; CHECK-NEXT: retq 210 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false) 211 ret i32 %tmp1 212} 213 214define i64 @cttz_i64_zero_test(i64 %n) { 215; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 216 217; CHECK-LABEL: cttz_i64_zero_test: 218; CHECK: # BB#0: 219; CHECK-NEXT: movl $64, %eax 220; CHECK-NEXT: testq %rdi, %rdi 221; CHECK-NEXT: je .LBB15_2 222; CHECK-NEXT: # BB#1: # %cond.false 223; CHECK-NEXT: bsfq %rdi, %rax 224; CHECK-NEXT: .LBB15_2: # %cond.end 225; CHECK-NEXT: retq 226 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false) 227 ret i64 %tmp1 228} 229 230define i32 @ctlz_i32_fold_cmov(i32 %n) { 231; Don't generate the cmovne when the source is known non-zero (and bsr would 232; not set ZF). 233; rdar://9490949 234; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 235; codegen doesn't know how to delete the movl and je. 236 237; CHECK-LABEL: ctlz_i32_fold_cmov: 238; CHECK: # BB#0: 239; CHECK-NEXT: orl $1, %edi 240; CHECK-NEXT: movl $32, %eax 241; CHECK-NEXT: je .LBB16_2 242; CHECK-NEXT: # BB#1: # %cond.false 243; CHECK-NEXT: bsrl %edi, %eax 244; CHECK-NEXT: xorl $31, %eax 245; CHECK-NEXT: .LBB16_2: # %cond.end 246; CHECK-NEXT: retq 247 %or = or i32 %n, 1 248 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false) 249 ret i32 %tmp1 250} 251 252define i32 @ctlz_bsr(i32 %n) { 253; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute 254; the most significant bit, which is what 'bsr' does natively. 255 256; CHECK-LABEL: ctlz_bsr: 257; CHECK: # BB#0: 258; CHECK-NEXT: bsrl %edi, %eax 259; CHECK-NEXT: retq 260 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true) 261 %bsr = xor i32 %ctlz, 31 262 ret i32 %bsr 263} 264 265define i32 @ctlz_bsr_zero_test(i32 %n) { 266; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 267; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 268; codegen doesn't know how to combine the $32 and $31 into $63. 269 270; CHECK-LABEL: ctlz_bsr_zero_test: 271; CHECK: # BB#0: 272; CHECK-NEXT: movl $32, %eax 273; CHECK-NEXT: testl %edi, %edi 274; CHECK-NEXT: je .LBB18_2 275; CHECK-NEXT: # BB#1: # %cond.false 276; CHECK-NEXT: bsrl %edi, %eax 277; CHECK-NEXT: xorl $31, %eax 278; CHECK-NEXT: .LBB18_2: # %cond.end 279; CHECK-NEXT: xorl $31, %eax 280; CHECK-NEXT: retq 281 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 282 %bsr = xor i32 %ctlz, 31 283 ret i32 %bsr 284} 285