1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR 8 9define i32 @andn32(i32 %x, i32 %y) { 10; X86-LABEL: andn32: 11; X86: # %bb.0: 12; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 13; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 14; X86-NEXT: retl 15; 16; X64-LABEL: andn32: 17; X64: # %bb.0: 18; X64-NEXT: andnl %esi, %edi, %eax 19; X64-NEXT: retq 20 %tmp1 = xor i32 %x, -1 21 %tmp2 = and i32 %y, %tmp1 22 ret i32 %tmp2 23} 24 25define i32 @andn32_load(i32 %x, i32* %y) { 26; X86-LABEL: andn32_load: 27; X86: # %bb.0: 28; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 29; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 30; X86-NEXT: andnl (%eax), %ecx, %eax 31; X86-NEXT: retl 32; 33; X64-LABEL: andn32_load: 34; X64: # %bb.0: 35; X64-NEXT: andnl (%rsi), %edi, %eax 36; X64-NEXT: retq 37 %y1 = load i32, i32* %y 38 %tmp1 = xor i32 %x, -1 39 %tmp2 = and i32 %y1, %tmp1 40 ret i32 %tmp2 41} 42 43define i64 @andn64(i64 %x, i64 %y) { 44; X86-LABEL: andn64: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 47; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 48; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 49; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 50; X86-NEXT: retl 51; 52; X64-LABEL: andn64: 53; X64: # %bb.0: 54; X64-NEXT: andnq %rsi, %rdi, %rax 55; X64-NEXT: retq 56 %tmp1 = xor i64 %x, -1 57 %tmp2 = and i64 %tmp1, %y 58 ret i64 %tmp2 59} 60 61; Don't choose a 'test' if an 'andn' can be used. 62define i1 @andn_cmp(i32 %x, i32 %y) { 63; X86-LABEL: andn_cmp: 64; X86: # %bb.0: 65; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 67; X86-NEXT: sete %al 68; X86-NEXT: retl 69; 70; X64-LABEL: andn_cmp: 71; X64: # %bb.0: 72; X64-NEXT: andnl %esi, %edi, %eax 73; X64-NEXT: sete %al 74; X64-NEXT: retq 75 %notx = xor i32 %x, -1 76 %and = and i32 %notx, %y 77 %cmp = icmp eq i32 %and, 0 78 ret i1 %cmp 79} 80 81; Recognize a disguised andn in the following 4 tests. 82define i1 @and_cmp1(i32 %x, i32 %y) { 83; X86-LABEL: and_cmp1: 84; X86: # %bb.0: 85; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 86; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 87; X86-NEXT: sete %al 88; X86-NEXT: retl 89; 90; X64-LABEL: and_cmp1: 91; X64: # %bb.0: 92; X64-NEXT: andnl %esi, %edi, %eax 93; X64-NEXT: sete %al 94; X64-NEXT: retq 95 %and = and i32 %x, %y 96 %cmp = icmp eq i32 %and, %y 97 ret i1 %cmp 98} 99 100define i1 @and_cmp2(i32 %x, i32 %y) { 101; X86-LABEL: and_cmp2: 102; X86: # %bb.0: 103; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 104; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 105; X86-NEXT: setne %al 106; X86-NEXT: retl 107; 108; X64-LABEL: and_cmp2: 109; X64: # %bb.0: 110; X64-NEXT: andnl %esi, %edi, %eax 111; X64-NEXT: setne %al 112; X64-NEXT: retq 113 %and = and i32 %y, %x 114 %cmp = icmp ne i32 %and, %y 115 ret i1 %cmp 116} 117 118define i1 @and_cmp3(i32 %x, i32 %y) { 119; X86-LABEL: and_cmp3: 120; X86: # %bb.0: 121; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 122; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 123; X86-NEXT: sete %al 124; X86-NEXT: retl 125; 126; X64-LABEL: and_cmp3: 127; X64: # %bb.0: 128; X64-NEXT: andnl %esi, %edi, %eax 129; X64-NEXT: sete %al 130; X64-NEXT: retq 131 %and = and i32 %x, %y 132 %cmp = icmp eq i32 %y, %and 133 ret i1 %cmp 134} 135 136define i1 @and_cmp4(i32 %x, i32 %y) { 137; X86-LABEL: and_cmp4: 138; X86: # %bb.0: 139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 140; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 141; X86-NEXT: setne %al 142; X86-NEXT: retl 143; 144; X64-LABEL: and_cmp4: 145; X64: # %bb.0: 146; X64-NEXT: andnl %esi, %edi, %eax 147; X64-NEXT: setne %al 148; X64-NEXT: retq 149 %and = and i32 %y, %x 150 %cmp = icmp ne i32 %y, %and 151 ret i1 %cmp 152} 153 154; A mask and compare against constant is ok for an 'andn' too 155; even though the BMI instruction doesn't have an immediate form. 156define i1 @and_cmp_const(i32 %x) { 157; X86-LABEL: and_cmp_const: 158; X86: # %bb.0: 159; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 160; X86-NEXT: andl $43, %eax 161; X86-NEXT: cmpl $43, %eax 162; X86-NEXT: sete %al 163; X86-NEXT: retl 164; 165; X64-LABEL: and_cmp_const: 166; X64: # %bb.0: 167; X64-NEXT: andl $43, %edi 168; X64-NEXT: cmpl $43, %edi 169; X64-NEXT: sete %al 170; X64-NEXT: retq 171 %and = and i32 %x, 43 172 %cmp = icmp eq i32 %and, 43 173 ret i1 %cmp 174} 175 176; But don't use 'andn' if the mask is a power-of-two. 177define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { 178; X86-LABEL: and_cmp_const_power_of_two: 179; X86: # %bb.0: 180; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 181; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 182; X86-NEXT: btl %ecx, %eax 183; X86-NEXT: setae %al 184; X86-NEXT: retl 185; 186; X64-LABEL: and_cmp_const_power_of_two: 187; X64: # %bb.0: 188; X64-NEXT: btl %esi, %edi 189; X64-NEXT: setae %al 190; X64-NEXT: retq 191 %shl = shl i32 1, %y 192 %and = and i32 %x, %shl 193 %cmp = icmp ne i32 %and, %shl 194 ret i1 %cmp 195} 196 197; Don't transform to 'andn' if there's another use of the 'and'. 198define i32 @and_cmp_not_one_use(i32 %x) { 199; X86-LABEL: and_cmp_not_one_use: 200; X86: # %bb.0: 201; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 202; X86-NEXT: andl $37, %ecx 203; X86-NEXT: xorl %eax, %eax 204; X86-NEXT: cmpl $37, %ecx 205; X86-NEXT: sete %al 206; X86-NEXT: addl %ecx, %eax 207; X86-NEXT: retl 208; 209; X64-LABEL: and_cmp_not_one_use: 210; X64: # %bb.0: 211; X64-NEXT: andl $37, %edi 212; X64-NEXT: xorl %eax, %eax 213; X64-NEXT: cmpl $37, %edi 214; X64-NEXT: sete %al 215; X64-NEXT: addl %edi, %eax 216; X64-NEXT: retq 217 %and = and i32 %x, 37 218 %cmp = icmp eq i32 %and, 37 219 %ext = zext i1 %cmp to i32 220 %add = add i32 %and, %ext 221 ret i32 %add 222} 223 224; Verify that we're not transforming invalid comparison predicates. 225define i1 @not_an_andn1(i32 %x, i32 %y) { 226; X86-LABEL: not_an_andn1: 227; X86: # %bb.0: 228; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 230; X86-NEXT: andl %eax, %ecx 231; X86-NEXT: cmpl %ecx, %eax 232; X86-NEXT: setg %al 233; X86-NEXT: retl 234; 235; X64-LABEL: not_an_andn1: 236; X64: # %bb.0: 237; X64-NEXT: andl %esi, %edi 238; X64-NEXT: cmpl %edi, %esi 239; X64-NEXT: setg %al 240; X64-NEXT: retq 241 %and = and i32 %x, %y 242 %cmp = icmp sgt i32 %y, %and 243 ret i1 %cmp 244} 245 246define i1 @not_an_andn2(i32 %x, i32 %y) { 247; X86-LABEL: not_an_andn2: 248; X86: # %bb.0: 249; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 250; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 251; X86-NEXT: andl %eax, %ecx 252; X86-NEXT: cmpl %ecx, %eax 253; X86-NEXT: setbe %al 254; X86-NEXT: retl 255; 256; X64-LABEL: not_an_andn2: 257; X64: # %bb.0: 258; X64-NEXT: andl %esi, %edi 259; X64-NEXT: cmpl %edi, %esi 260; X64-NEXT: setbe %al 261; X64-NEXT: retq 262 %and = and i32 %y, %x 263 %cmp = icmp ule i32 %y, %and 264 ret i1 %cmp 265} 266 267; Don't choose a 'test' if an 'andn' can be used. 268define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { 269; X86-LABEL: andn_cmp_swap_ops: 270; X86: # %bb.0: 271; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 272; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 273; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %ecx 274; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 275; X86-NEXT: orl %ecx, %eax 276; X86-NEXT: sete %al 277; X86-NEXT: retl 278; 279; X64-LABEL: andn_cmp_swap_ops: 280; X64: # %bb.0: 281; X64-NEXT: andnq %rsi, %rdi, %rax 282; X64-NEXT: sete %al 283; X64-NEXT: retq 284 %notx = xor i64 %x, -1 285 %and = and i64 %y, %notx 286 %cmp = icmp eq i64 %and, 0 287 ret i1 %cmp 288} 289 290; Use a 'test' (not an 'and') because 'andn' only works for i32/i64. 291define i1 @andn_cmp_i8(i8 %x, i8 %y) { 292; X86-LABEL: andn_cmp_i8: 293; X86: # %bb.0: 294; X86-NEXT: movb {{[0-9]+}}(%esp), %al 295; X86-NEXT: notb %al 296; X86-NEXT: testb %al, {{[0-9]+}}(%esp) 297; X86-NEXT: sete %al 298; X86-NEXT: retl 299; 300; X64-LABEL: andn_cmp_i8: 301; X64: # %bb.0: 302; X64-NEXT: notb %sil 303; X64-NEXT: testb %sil, %dil 304; X64-NEXT: sete %al 305; X64-NEXT: retq 306 %noty = xor i8 %y, -1 307 %and = and i8 %x, %noty 308 %cmp = icmp eq i8 %and, 0 309 ret i1 %cmp 310} 311 312declare i32 @llvm.x86.bmi.bextr.32(i32, i32) 313 314define i32 @bextr32(i32 %x, i32 %y) { 315; X86-LABEL: bextr32: 316; X86: # %bb.0: 317; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 318; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 319; X86-NEXT: retl 320; 321; X64-LABEL: bextr32: 322; X64: # %bb.0: 323; X64-NEXT: bextrl %esi, %edi, %eax 324; X64-NEXT: retq 325 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y) 326 ret i32 %tmp 327} 328 329define i32 @bextr32_load(i32* %x, i32 %y) { 330; X86-LABEL: bextr32_load: 331; X86: # %bb.0: 332; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 333; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 334; X86-NEXT: bextrl %eax, (%ecx), %eax 335; X86-NEXT: retl 336; 337; X64-LABEL: bextr32_load: 338; X64: # %bb.0: 339; X64-NEXT: bextrl %esi, (%rdi), %eax 340; X64-NEXT: retq 341 %x1 = load i32, i32* %x 342 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y) 343 ret i32 %tmp 344} 345 346define i32 @bextr32b(i32 %x) uwtable ssp { 347; X86-SLOW-BEXTR-LABEL: bextr32b: 348; X86-SLOW-BEXTR: # %bb.0: 349; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax 350; X86-SLOW-BEXTR-NEXT: shrl $4, %eax 351; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 352; X86-SLOW-BEXTR-NEXT: retl 353; 354; X64-SLOW-BEXTR-LABEL: bextr32b: 355; X64-SLOW-BEXTR: # %bb.0: 356; X64-SLOW-BEXTR-NEXT: movl %edi, %eax 357; X64-SLOW-BEXTR-NEXT: shrl $4, %eax 358; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 359; X64-SLOW-BEXTR-NEXT: retq 360; 361; X86-FAST-BEXTR-LABEL: bextr32b: 362; X86-FAST-BEXTR: # %bb.0: 363; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 364; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 365; X86-FAST-BEXTR-NEXT: retl 366; 367; X64-FAST-BEXTR-LABEL: bextr32b: 368; X64-FAST-BEXTR: # %bb.0: 369; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 370; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax 371; X64-FAST-BEXTR-NEXT: retq 372 %1 = lshr i32 %x, 4 373 %2 = and i32 %1, 4095 374 ret i32 %2 375} 376 377; Make sure we still use AH subreg trick to extract 15:8 378define i32 @bextr32_subreg(i32 %x) uwtable ssp { 379; X86-LABEL: bextr32_subreg: 380; X86: # %bb.0: 381; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 382; X86-NEXT: retl 383; 384; X64-LABEL: bextr32_subreg: 385; X64: # %bb.0: 386; X64-NEXT: movl %edi, %eax 387; X64-NEXT: movzbl %ah, %eax 388; X64-NEXT: retq 389 %1 = lshr i32 %x, 8 390 %2 = and i32 %1, 255 391 ret i32 %2 392} 393 394define i32 @bextr32b_load(i32* %x) uwtable ssp { 395; X86-SLOW-BEXTR-LABEL: bextr32b_load: 396; X86-SLOW-BEXTR: # %bb.0: 397; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax 398; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax 399; X86-SLOW-BEXTR-NEXT: shrl $4, %eax 400; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 401; X86-SLOW-BEXTR-NEXT: retl 402; 403; X64-SLOW-BEXTR-LABEL: bextr32b_load: 404; X64-SLOW-BEXTR: # %bb.0: 405; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax 406; X64-SLOW-BEXTR-NEXT: shrl $4, %eax 407; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 408; X64-SLOW-BEXTR-NEXT: retq 409; 410; X86-FAST-BEXTR-LABEL: bextr32b_load: 411; X86-FAST-BEXTR: # %bb.0: 412; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax 413; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 414; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax 415; X86-FAST-BEXTR-NEXT: retl 416; 417; X64-FAST-BEXTR-LABEL: bextr32b_load: 418; X64-FAST-BEXTR: # %bb.0: 419; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 420; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax 421; X64-FAST-BEXTR-NEXT: retq 422 %1 = load i32, i32* %x 423 %2 = lshr i32 %1, 4 424 %3 = and i32 %2, 4095 425 ret i32 %3 426} 427 428; PR34042 429define i32 @bextr32c(i32 %x, i16 zeroext %y) { 430; X86-LABEL: bextr32c: 431; X86: # %bb.0: 432; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 433; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 434; X86-NEXT: retl 435; 436; X64-LABEL: bextr32c: 437; X64: # %bb.0: 438; X64-NEXT: bextrl %esi, %edi, %eax 439; X64-NEXT: retq 440 %tmp0 = sext i16 %y to i32 441 %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0) 442 ret i32 %tmp1 443} 444 445define i32 @non_bextr32(i32 %x) { 446; X86-LABEL: non_bextr32: 447; X86: # %bb.0: # %entry 448; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 449; X86-NEXT: shrl $2, %eax 450; X86-NEXT: andl $111, %eax 451; X86-NEXT: retl 452; 453; X64-LABEL: non_bextr32: 454; X64: # %bb.0: # %entry 455; X64-NEXT: movl %edi, %eax 456; X64-NEXT: shrl $2, %eax 457; X64-NEXT: andl $111, %eax 458; X64-NEXT: retq 459entry: 460 %shr = lshr i32 %x, 2 461 %and = and i32 %shr, 111 462 ret i32 %and 463} 464 465define i32 @blsi32(i32 %x) { 466; X86-LABEL: blsi32: 467; X86: # %bb.0: 468; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 469; X86-NEXT: retl 470; 471; X64-LABEL: blsi32: 472; X64: # %bb.0: 473; X64-NEXT: blsil %edi, %eax 474; X64-NEXT: retq 475 %tmp = sub i32 0, %x 476 %tmp2 = and i32 %x, %tmp 477 ret i32 %tmp2 478} 479 480define i32 @blsi32_load(i32* %x) { 481; X86-LABEL: blsi32_load: 482; X86: # %bb.0: 483; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 484; X86-NEXT: blsil (%eax), %eax 485; X86-NEXT: retl 486; 487; X64-LABEL: blsi32_load: 488; X64: # %bb.0: 489; X64-NEXT: blsil (%rdi), %eax 490; X64-NEXT: retq 491 %x1 = load i32, i32* %x 492 %tmp = sub i32 0, %x1 493 %tmp2 = and i32 %x1, %tmp 494 ret i32 %tmp2 495} 496 497define i32 @blsi32_z(i32 %a, i32 %b) nounwind { 498; X86-LABEL: blsi32_z: 499; X86: # %bb.0: 500; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 501; X86-NEXT: jne .LBB24_2 502; X86-NEXT: # %bb.1: 503; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 504; X86-NEXT: .LBB24_2: 505; X86-NEXT: retl 506; 507; X64-LABEL: blsi32_z: 508; X64: # %bb.0: 509; X64-NEXT: blsil %edi, %eax 510; X64-NEXT: cmovel %esi, %eax 511; X64-NEXT: retq 512 %t0 = sub i32 0, %a 513 %t1 = and i32 %t0, %a 514 %t2 = icmp eq i32 %t1, 0 515 %t3 = select i1 %t2, i32 %b, i32 %t1 516 ret i32 %t3 517} 518 519define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind { 520; X86-LABEL: blsi32_z2: 521; X86: # %bb.0: 522; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 523; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 524; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 525; X86-NEXT: cmovel %eax, %ecx 526; X86-NEXT: movl (%ecx), %eax 527; X86-NEXT: retl 528; 529; X64-LABEL: blsi32_z2: 530; X64: # %bb.0: 531; X64-NEXT: movl %esi, %eax 532; X64-NEXT: blsil %edi, %ecx 533; X64-NEXT: cmovnel %edx, %eax 534; X64-NEXT: retq 535 %t0 = sub i32 0, %a 536 %t1 = and i32 %t0, %a 537 %t2 = icmp eq i32 %t1, 0 538 %t3 = select i1 %t2, i32 %b, i32 %c 539 ret i32 %t3 540} 541 542define i64 @blsi64(i64 %x) { 543; X86-LABEL: blsi64: 544; X86: # %bb.0: 545; X86-NEXT: pushl %esi 546; X86-NEXT: .cfi_def_cfa_offset 8 547; X86-NEXT: .cfi_offset %esi, -8 548; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 549; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 550; X86-NEXT: xorl %edx, %edx 551; X86-NEXT: movl %ecx, %eax 552; X86-NEXT: negl %eax 553; X86-NEXT: sbbl %esi, %edx 554; X86-NEXT: andl %esi, %edx 555; X86-NEXT: andl %ecx, %eax 556; X86-NEXT: popl %esi 557; X86-NEXT: .cfi_def_cfa_offset 4 558; X86-NEXT: retl 559; 560; X64-LABEL: blsi64: 561; X64: # %bb.0: 562; X64-NEXT: blsiq %rdi, %rax 563; X64-NEXT: retq 564 %tmp = sub i64 0, %x 565 %tmp2 = and i64 %tmp, %x 566 ret i64 %tmp2 567} 568 569define i64 @blsi64_z(i64 %a, i64 %b) nounwind { 570; X86-LABEL: blsi64_z: 571; X86: # %bb.0: 572; X86-NEXT: pushl %esi 573; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 574; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 575; X86-NEXT: xorl %edx, %edx 576; X86-NEXT: movl %ecx, %eax 577; X86-NEXT: negl %eax 578; X86-NEXT: sbbl %esi, %edx 579; X86-NEXT: andl %esi, %edx 580; X86-NEXT: andl %ecx, %eax 581; X86-NEXT: movl %eax, %ecx 582; X86-NEXT: orl %edx, %ecx 583; X86-NEXT: jne .LBB27_2 584; X86-NEXT: # %bb.1: 585; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 586; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 587; X86-NEXT: .LBB27_2: 588; X86-NEXT: popl %esi 589; X86-NEXT: retl 590; 591; X64-LABEL: blsi64_z: 592; X64: # %bb.0: 593; X64-NEXT: blsiq %rdi, %rax 594; X64-NEXT: cmoveq %rsi, %rax 595; X64-NEXT: retq 596 %t0 = sub i64 0, %a 597 %t1 = and i64 %t0, %a 598 %t2 = icmp eq i64 %t1, 0 599 %t3 = select i1 %t2, i64 %b, i64 %t1 600 ret i64 %t3 601} 602 603define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind { 604; X86-LABEL: blsi64_z2: 605; X86: # %bb.0: 606; X86-NEXT: pushl %esi 607; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 608; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 609; X86-NEXT: xorl %edx, %edx 610; X86-NEXT: movl %eax, %esi 611; X86-NEXT: negl %esi 612; X86-NEXT: sbbl %ecx, %edx 613; X86-NEXT: andl %ecx, %edx 614; X86-NEXT: andl %eax, %esi 615; X86-NEXT: orl %edx, %esi 616; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 617; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 618; X86-NEXT: cmovel %eax, %ecx 619; X86-NEXT: movl (%ecx), %eax 620; X86-NEXT: movl 4(%ecx), %edx 621; X86-NEXT: popl %esi 622; X86-NEXT: retl 623; 624; X64-LABEL: blsi64_z2: 625; X64: # %bb.0: 626; X64-NEXT: movq %rsi, %rax 627; X64-NEXT: blsiq %rdi, %rcx 628; X64-NEXT: cmovneq %rdx, %rax 629; X64-NEXT: retq 630 %t0 = sub i64 0, %a 631 %t1 = and i64 %t0, %a 632 %t2 = icmp eq i64 %t1, 0 633 %t3 = select i1 %t2, i64 %b, i64 %c 634 ret i64 %t3 635} 636 637define i32 @blsmsk32(i32 %x) { 638; X86-LABEL: blsmsk32: 639; X86: # %bb.0: 640; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 641; X86-NEXT: retl 642; 643; X64-LABEL: blsmsk32: 644; X64: # %bb.0: 645; X64-NEXT: blsmskl %edi, %eax 646; X64-NEXT: retq 647 %tmp = sub i32 %x, 1 648 %tmp2 = xor i32 %x, %tmp 649 ret i32 %tmp2 650} 651 652define i32 @blsmsk32_load(i32* %x) { 653; X86-LABEL: blsmsk32_load: 654; X86: # %bb.0: 655; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 656; X86-NEXT: blsmskl (%eax), %eax 657; X86-NEXT: retl 658; 659; X64-LABEL: blsmsk32_load: 660; X64: # %bb.0: 661; X64-NEXT: blsmskl (%rdi), %eax 662; X64-NEXT: retq 663 %x1 = load i32, i32* %x 664 %tmp = sub i32 %x1, 1 665 %tmp2 = xor i32 %x1, %tmp 666 ret i32 %tmp2 667} 668 669define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind { 670; X86-LABEL: blsmsk32_z: 671; X86: # %bb.0: 672; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 673; X86-NEXT: jne .LBB31_2 674; X86-NEXT: # %bb.1: 675; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 676; X86-NEXT: .LBB31_2: 677; X86-NEXT: retl 678; 679; X64-LABEL: blsmsk32_z: 680; X64: # %bb.0: 681; X64-NEXT: blsmskl %edi, %eax 682; X64-NEXT: cmovel %esi, %eax 683; X64-NEXT: retq 684 %t0 = sub i32 %a, 1 685 %t1 = xor i32 %t0, %a 686 %t2 = icmp eq i32 %t1, 0 687 %t3 = select i1 %t2, i32 %b, i32 %t1 688 ret i32 %t3 689} 690 691define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind { 692; X86-LABEL: blsmsk32_z2: 693; X86: # %bb.0: 694; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 695; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 696; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 697; X86-NEXT: cmovel %eax, %ecx 698; X86-NEXT: movl (%ecx), %eax 699; X86-NEXT: retl 700; 701; X64-LABEL: blsmsk32_z2: 702; X64: # %bb.0: 703; X64-NEXT: movl %esi, %eax 704; X64-NEXT: blsmskl %edi, %ecx 705; X64-NEXT: cmovnel %edx, %eax 706; X64-NEXT: retq 707 %t0 = sub i32 %a, 1 708 %t1 = xor i32 %t0, %a 709 %t2 = icmp eq i32 %t1, 0 710 %t3 = select i1 %t2, i32 %b, i32 %c 711 ret i32 %t3 712} 713 714define i64 @blsmsk64(i64 %x) { 715; X86-LABEL: blsmsk64: 716; X86: # %bb.0: 717; X86-NEXT: pushl %esi 718; X86-NEXT: .cfi_def_cfa_offset 8 719; X86-NEXT: .cfi_offset %esi, -8 720; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 721; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 722; X86-NEXT: movl %ecx, %eax 723; X86-NEXT: addl $-1, %eax 724; X86-NEXT: movl %esi, %edx 725; X86-NEXT: adcl $-1, %edx 726; X86-NEXT: xorl %ecx, %eax 727; X86-NEXT: xorl %esi, %edx 728; X86-NEXT: popl %esi 729; X86-NEXT: .cfi_def_cfa_offset 4 730; X86-NEXT: retl 731; 732; X64-LABEL: blsmsk64: 733; X64: # %bb.0: 734; X64-NEXT: blsmskq %rdi, %rax 735; X64-NEXT: retq 736 %tmp = sub i64 %x, 1 737 %tmp2 = xor i64 %tmp, %x 738 ret i64 %tmp2 739} 740 741define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind { 742; X86-LABEL: blsmsk64_z: 743; X86: # %bb.0: 744; X86-NEXT: pushl %esi 745; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 746; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 747; X86-NEXT: movl %ecx, %eax 748; X86-NEXT: addl $-1, %eax 749; X86-NEXT: movl %esi, %edx 750; X86-NEXT: adcl $-1, %edx 751; X86-NEXT: xorl %ecx, %eax 752; X86-NEXT: xorl %esi, %edx 753; X86-NEXT: movl %eax, %ecx 754; X86-NEXT: orl %edx, %ecx 755; X86-NEXT: jne .LBB34_2 756; X86-NEXT: # %bb.1: 757; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 758; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 759; X86-NEXT: .LBB34_2: 760; X86-NEXT: popl %esi 761; X86-NEXT: retl 762; 763; X64-LABEL: blsmsk64_z: 764; X64: # %bb.0: 765; X64-NEXT: blsmskq %rdi, %rax 766; X64-NEXT: cmoveq %rsi, %rax 767; X64-NEXT: retq 768 %t0 = sub i64 %a, 1 769 %t1 = xor i64 %t0, %a 770 %t2 = icmp eq i64 %t1, 0 771 %t3 = select i1 %t2, i64 %b, i64 %t1 772 ret i64 %t3 773} 774 775define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind { 776; X86-LABEL: blsmsk64_z2: 777; X86: # %bb.0: 778; X86-NEXT: pushl %esi 779; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 780; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 781; X86-NEXT: movl %eax, %edx 782; X86-NEXT: addl $-1, %edx 783; X86-NEXT: movl %ecx, %esi 784; X86-NEXT: adcl $-1, %esi 785; X86-NEXT: xorl %eax, %edx 786; X86-NEXT: xorl %ecx, %esi 787; X86-NEXT: orl %edx, %esi 788; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 789; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 790; X86-NEXT: cmovel %eax, %ecx 791; X86-NEXT: movl (%ecx), %eax 792; X86-NEXT: movl 4(%ecx), %edx 793; X86-NEXT: popl %esi 794; X86-NEXT: retl 795; 796; X64-LABEL: blsmsk64_z2: 797; X64: # %bb.0: 798; X64-NEXT: movq %rsi, %rax 799; X64-NEXT: blsmskq %rdi, %rcx 800; X64-NEXT: cmovneq %rdx, %rax 801; X64-NEXT: retq 802 %t0 = sub i64 %a, 1 803 %t1 = xor i64 %t0, %a 804 %t2 = icmp eq i64 %t1, 0 805 %t3 = select i1 %t2, i64 %b, i64 %c 806 ret i64 %t3 807} 808 809define i32 @blsr32(i32 %x) { 810; X86-LABEL: blsr32: 811; X86: # %bb.0: 812; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 813; X86-NEXT: retl 814; 815; X64-LABEL: blsr32: 816; X64: # %bb.0: 817; X64-NEXT: blsrl %edi, %eax 818; X64-NEXT: retq 819 %tmp = sub i32 %x, 1 820 %tmp2 = and i32 %x, %tmp 821 ret i32 %tmp2 822} 823 824define i32 @blsr32_load(i32* %x) { 825; X86-LABEL: blsr32_load: 826; X86: # %bb.0: 827; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 828; X86-NEXT: blsrl (%eax), %eax 829; X86-NEXT: retl 830; 831; X64-LABEL: blsr32_load: 832; X64: # %bb.0: 833; X64-NEXT: blsrl (%rdi), %eax 834; X64-NEXT: retq 835 %x1 = load i32, i32* %x 836 %tmp = sub i32 %x1, 1 837 %tmp2 = and i32 %x1, %tmp 838 ret i32 %tmp2 839} 840 841define i32 @blsr32_z(i32 %a, i32 %b) nounwind { 842; X86-LABEL: blsr32_z: 843; X86: # %bb.0: 844; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 845; X86-NEXT: jne .LBB38_2 846; X86-NEXT: # %bb.1: 847; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 848; X86-NEXT: .LBB38_2: 849; X86-NEXT: retl 850; 851; X64-LABEL: blsr32_z: 852; X64: # %bb.0: 853; X64-NEXT: blsrl %edi, %eax 854; X64-NEXT: cmovel %esi, %eax 855; X64-NEXT: retq 856 %t0 = sub i32 %a, 1 857 %t1 = and i32 %t0, %a 858 %t2 = icmp eq i32 %t1, 0 859 %t3 = select i1 %t2, i32 %b, i32 %t1 860 ret i32 %t3 861} 862 863define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind { 864; X86-LABEL: blsr32_z2: 865; X86: # %bb.0: 866; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 867; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 868; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 869; X86-NEXT: cmovel %eax, %ecx 870; X86-NEXT: movl (%ecx), %eax 871; X86-NEXT: retl 872; 873; X64-LABEL: blsr32_z2: 874; X64: # %bb.0: 875; X64-NEXT: movl %esi, %eax 876; X64-NEXT: blsrl %edi, %ecx 877; X64-NEXT: cmovnel %edx, %eax 878; X64-NEXT: retq 879 %t0 = sub i32 %a, 1 880 %t1 = and i32 %t0, %a 881 %t2 = icmp eq i32 %t1, 0 882 %t3 = select i1 %t2, i32 %b, i32 %c 883 ret i32 %t3 884} 885 886define i64 @blsr64(i64 %x) { 887; X86-LABEL: blsr64: 888; X86: # %bb.0: 889; X86-NEXT: pushl %esi 890; X86-NEXT: .cfi_def_cfa_offset 8 891; X86-NEXT: .cfi_offset %esi, -8 892; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 893; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 894; X86-NEXT: movl %ecx, %eax 895; X86-NEXT: addl $-1, %eax 896; X86-NEXT: movl %esi, %edx 897; X86-NEXT: adcl $-1, %edx 898; X86-NEXT: andl %ecx, %eax 899; X86-NEXT: andl %esi, %edx 900; X86-NEXT: popl %esi 901; X86-NEXT: .cfi_def_cfa_offset 4 902; X86-NEXT: retl 903; 904; X64-LABEL: blsr64: 905; X64: # %bb.0: 906; X64-NEXT: blsrq %rdi, %rax 907; X64-NEXT: retq 908 %tmp = sub i64 %x, 1 909 %tmp2 = and i64 %tmp, %x 910 ret i64 %tmp2 911} 912 913define i64 @blsr64_z(i64 %a, i64 %b) nounwind { 914; X86-LABEL: blsr64_z: 915; X86: # %bb.0: 916; X86-NEXT: pushl %esi 917; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 918; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 919; X86-NEXT: movl %ecx, %eax 920; X86-NEXT: addl $-1, %eax 921; X86-NEXT: movl %esi, %edx 922; X86-NEXT: adcl $-1, %edx 923; X86-NEXT: andl %ecx, %eax 924; X86-NEXT: andl %esi, %edx 925; X86-NEXT: movl %eax, %ecx 926; X86-NEXT: orl %edx, %ecx 927; X86-NEXT: jne .LBB41_2 928; X86-NEXT: # %bb.1: 929; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 930; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 931; X86-NEXT: .LBB41_2: 932; X86-NEXT: popl %esi 933; X86-NEXT: retl 934; 935; X64-LABEL: blsr64_z: 936; X64: # %bb.0: 937; X64-NEXT: blsrq %rdi, %rax 938; X64-NEXT: cmoveq %rsi, %rax 939; X64-NEXT: retq 940 %t0 = sub i64 %a, 1 941 %t1 = and i64 %t0, %a 942 %t2 = icmp eq i64 %t1, 0 943 %t3 = select i1 %t2, i64 %b, i64 %t1 944 ret i64 %t3 945} 946 947define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind { 948; X86-LABEL: blsr64_z2: 949; X86: # %bb.0: 950; X86-NEXT: pushl %esi 951; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 952; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 953; X86-NEXT: movl %eax, %edx 954; X86-NEXT: addl $-1, %edx 955; X86-NEXT: movl %ecx, %esi 956; X86-NEXT: adcl $-1, %esi 957; X86-NEXT: andl %eax, %edx 958; X86-NEXT: andl %ecx, %esi 959; X86-NEXT: orl %edx, %esi 960; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 961; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 962; X86-NEXT: cmovel %eax, %ecx 963; X86-NEXT: movl (%ecx), %eax 964; X86-NEXT: movl 4(%ecx), %edx 965; X86-NEXT: popl %esi 966; X86-NEXT: retl 967; 968; X64-LABEL: blsr64_z2: 969; X64: # %bb.0: 970; X64-NEXT: movq %rsi, %rax 971; X64-NEXT: blsrq %rdi, %rcx 972; X64-NEXT: cmovneq %rdx, %rax 973; X64-NEXT: retq 974 %t0 = sub i64 %a, 1 975 %t1 = and i64 %t0, %a 976 %t2 = icmp eq i64 %t1, 0 977 %t3 = select i1 %t2, i64 %b, i64 %c 978 ret i64 %t3 979} 980 981; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792 982 983define i64 @blsr_disguised_constant(i64 %x) { 984; X86-LABEL: blsr_disguised_constant: 985; X86: # %bb.0: 986; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 987; X86-NEXT: movzwl %ax, %eax 988; X86-NEXT: xorl %edx, %edx 989; X86-NEXT: retl 990; 991; X64-LABEL: blsr_disguised_constant: 992; X64: # %bb.0: 993; X64-NEXT: blsrl %edi, %eax 994; X64-NEXT: movzwl %ax, %eax 995; X64-NEXT: retq 996 %a1 = and i64 %x, 65535 997 %a2 = add i64 %x, 65535 998 %r = and i64 %a1, %a2 999 ret i64 %r 1000} 1001 1002; The add here used to get shrunk, but the and did not thus hiding the blsr pattern. 1003; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too. 1004define i64 @blsr_disguised_shrunk_add(i64 %x) { 1005; X86-LABEL: blsr_disguised_shrunk_add: 1006; X86: # %bb.0: 1007; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1008; X86-NEXT: shrl $16, %eax 1009; X86-NEXT: blsrl %eax, %eax 1010; X86-NEXT: xorl %edx, %edx 1011; X86-NEXT: retl 1012; 1013; X64-LABEL: blsr_disguised_shrunk_add: 1014; X64: # %bb.0: 1015; X64-NEXT: shrq $48, %rdi 1016; X64-NEXT: blsrl %edi, %eax 1017; X64-NEXT: retq 1018 %a = lshr i64 %x, 48 1019 %b = add i64 %a, -1 1020 %c = and i64 %b, %a 1021 ret i64 %c 1022} 1023 1024; FIXME: We should not be using the S flag from BEXTR. 1025define void @pr40060(i32, i32) { 1026; X86-LABEL: pr40060: 1027; X86: # %bb.0: 1028; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1029; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1030; X86-NEXT: testl %eax, %eax 1031; X86-NEXT: js .LBB45_1 1032; X86-NEXT: # %bb.2: 1033; X86-NEXT: jmp bar # TAILCALL 1034; X86-NEXT: .LBB45_1: 1035; X86-NEXT: retl 1036; 1037; X64-LABEL: pr40060: 1038; X64: # %bb.0: 1039; X64-NEXT: bextrl %esi, %edi, %eax 1040; X64-NEXT: testl %eax, %eax 1041; X64-NEXT: js .LBB45_1 1042; X64-NEXT: # %bb.2: 1043; X64-NEXT: jmp bar # TAILCALL 1044; X64-NEXT: .LBB45_1: 1045; X64-NEXT: retq 1046 %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1) 1047 %4 = icmp sgt i32 %3, -1 1048 br i1 %4, label %5, label %6 1049 1050 tail call void @bar() 1051 br label %6 1052 1053 ret void 1054} 1055 1056define i32 @blsr32_branch(i32 %x) { 1057; X86-LABEL: blsr32_branch: 1058; X86: # %bb.0: 1059; X86-NEXT: pushl %esi 1060; X86-NEXT: .cfi_def_cfa_offset 8 1061; X86-NEXT: .cfi_offset %esi, -8 1062; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi 1063; X86-NEXT: jne .LBB46_2 1064; X86-NEXT: # %bb.1: 1065; X86-NEXT: calll bar 1066; X86-NEXT: .LBB46_2: 1067; X86-NEXT: movl %esi, %eax 1068; X86-NEXT: popl %esi 1069; X86-NEXT: .cfi_def_cfa_offset 4 1070; X86-NEXT: retl 1071; 1072; X64-LABEL: blsr32_branch: 1073; X64: # %bb.0: 1074; X64-NEXT: pushq %rbx 1075; X64-NEXT: .cfi_def_cfa_offset 16 1076; X64-NEXT: .cfi_offset %rbx, -16 1077; X64-NEXT: blsrl %edi, %ebx 1078; X64-NEXT: jne .LBB46_2 1079; X64-NEXT: # %bb.1: 1080; X64-NEXT: callq bar 1081; X64-NEXT: .LBB46_2: 1082; X64-NEXT: movl %ebx, %eax 1083; X64-NEXT: popq %rbx 1084; X64-NEXT: .cfi_def_cfa_offset 8 1085; X64-NEXT: retq 1086 %tmp = sub i32 %x, 1 1087 %tmp2 = and i32 %x, %tmp 1088 %cmp = icmp eq i32 %tmp2, 0 1089 br i1 %cmp, label %1, label %2 1090 1091 tail call void @bar() 1092 br label %2 1093 ret i32 %tmp2 1094} 1095 1096define i64 @blsr64_branch(i64 %x) { 1097; X86-LABEL: blsr64_branch: 1098; X86: # %bb.0: 1099; X86-NEXT: pushl %edi 1100; X86-NEXT: .cfi_def_cfa_offset 8 1101; X86-NEXT: pushl %esi 1102; X86-NEXT: .cfi_def_cfa_offset 12 1103; X86-NEXT: .cfi_offset %esi, -12 1104; X86-NEXT: .cfi_offset %edi, -8 1105; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1106; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1107; X86-NEXT: movl %eax, %esi 1108; X86-NEXT: addl $-1, %esi 1109; X86-NEXT: movl %ecx, %edi 1110; X86-NEXT: adcl $-1, %edi 1111; X86-NEXT: andl %eax, %esi 1112; X86-NEXT: andl %ecx, %edi 1113; X86-NEXT: movl %esi, %eax 1114; X86-NEXT: orl %edi, %eax 1115; X86-NEXT: jne .LBB47_2 1116; X86-NEXT: # %bb.1: 1117; X86-NEXT: calll bar 1118; X86-NEXT: .LBB47_2: 1119; X86-NEXT: movl %esi, %eax 1120; X86-NEXT: movl %edi, %edx 1121; X86-NEXT: popl %esi 1122; X86-NEXT: .cfi_def_cfa_offset 8 1123; X86-NEXT: popl %edi 1124; X86-NEXT: .cfi_def_cfa_offset 4 1125; X86-NEXT: retl 1126; 1127; X64-LABEL: blsr64_branch: 1128; X64: # %bb.0: 1129; X64-NEXT: pushq %rbx 1130; X64-NEXT: .cfi_def_cfa_offset 16 1131; X64-NEXT: .cfi_offset %rbx, -16 1132; X64-NEXT: blsrq %rdi, %rbx 1133; X64-NEXT: jne .LBB47_2 1134; X64-NEXT: # %bb.1: 1135; X64-NEXT: callq bar 1136; X64-NEXT: .LBB47_2: 1137; X64-NEXT: movq %rbx, %rax 1138; X64-NEXT: popq %rbx 1139; X64-NEXT: .cfi_def_cfa_offset 8 1140; X64-NEXT: retq 1141 %tmp = sub i64 %x, 1 1142 %tmp2 = and i64 %x, %tmp 1143 %cmp = icmp eq i64 %tmp2, 0 1144 br i1 %cmp, label %1, label %2 1145 1146 tail call void @bar() 1147 br label %2 1148 ret i64 %tmp2 1149} 1150 1151define i32 @blsi32_branch(i32 %x) { 1152; X86-LABEL: blsi32_branch: 1153; X86: # %bb.0: 1154; X86-NEXT: pushl %esi 1155; X86-NEXT: .cfi_def_cfa_offset 8 1156; X86-NEXT: .cfi_offset %esi, -8 1157; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi 1158; X86-NEXT: jne .LBB48_2 1159; X86-NEXT: # %bb.1: 1160; X86-NEXT: calll bar 1161; X86-NEXT: .LBB48_2: 1162; X86-NEXT: movl %esi, %eax 1163; X86-NEXT: popl %esi 1164; X86-NEXT: .cfi_def_cfa_offset 4 1165; X86-NEXT: retl 1166; 1167; X64-LABEL: blsi32_branch: 1168; X64: # %bb.0: 1169; X64-NEXT: pushq %rbx 1170; X64-NEXT: .cfi_def_cfa_offset 16 1171; X64-NEXT: .cfi_offset %rbx, -16 1172; X64-NEXT: blsil %edi, %ebx 1173; X64-NEXT: jne .LBB48_2 1174; X64-NEXT: # %bb.1: 1175; X64-NEXT: callq bar 1176; X64-NEXT: .LBB48_2: 1177; X64-NEXT: movl %ebx, %eax 1178; X64-NEXT: popq %rbx 1179; X64-NEXT: .cfi_def_cfa_offset 8 1180; X64-NEXT: retq 1181 %tmp = sub i32 0, %x 1182 %tmp2 = and i32 %x, %tmp 1183 %cmp = icmp eq i32 %tmp2, 0 1184 br i1 %cmp, label %1, label %2 1185 1186 tail call void @bar() 1187 br label %2 1188 ret i32 %tmp2 1189} 1190 1191define i64 @blsi64_branch(i64 %x) { 1192; X86-LABEL: blsi64_branch: 1193; X86: # %bb.0: 1194; X86-NEXT: pushl %edi 1195; X86-NEXT: .cfi_def_cfa_offset 8 1196; X86-NEXT: pushl %esi 1197; X86-NEXT: .cfi_def_cfa_offset 12 1198; X86-NEXT: .cfi_offset %esi, -12 1199; X86-NEXT: .cfi_offset %edi, -8 1200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1201; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1202; X86-NEXT: xorl %esi, %esi 1203; X86-NEXT: movl %eax, %edi 1204; X86-NEXT: negl %edi 1205; X86-NEXT: sbbl %ecx, %esi 1206; X86-NEXT: andl %ecx, %esi 1207; X86-NEXT: andl %eax, %edi 1208; X86-NEXT: movl %edi, %eax 1209; X86-NEXT: orl %esi, %eax 1210; X86-NEXT: jne .LBB49_2 1211; X86-NEXT: # %bb.1: 1212; X86-NEXT: calll bar 1213; X86-NEXT: .LBB49_2: 1214; X86-NEXT: movl %edi, %eax 1215; X86-NEXT: movl %esi, %edx 1216; X86-NEXT: popl %esi 1217; X86-NEXT: .cfi_def_cfa_offset 8 1218; X86-NEXT: popl %edi 1219; X86-NEXT: .cfi_def_cfa_offset 4 1220; X86-NEXT: retl 1221; 1222; X64-LABEL: blsi64_branch: 1223; X64: # %bb.0: 1224; X64-NEXT: pushq %rbx 1225; X64-NEXT: .cfi_def_cfa_offset 16 1226; X64-NEXT: .cfi_offset %rbx, -16 1227; X64-NEXT: blsiq %rdi, %rbx 1228; X64-NEXT: jne .LBB49_2 1229; X64-NEXT: # %bb.1: 1230; X64-NEXT: callq bar 1231; X64-NEXT: .LBB49_2: 1232; X64-NEXT: movq %rbx, %rax 1233; X64-NEXT: popq %rbx 1234; X64-NEXT: .cfi_def_cfa_offset 8 1235; X64-NEXT: retq 1236 %tmp = sub i64 0, %x 1237 %tmp2 = and i64 %x, %tmp 1238 %cmp = icmp eq i64 %tmp2, 0 1239 br i1 %cmp, label %1, label %2 1240 1241 tail call void @bar() 1242 br label %2 1243 ret i64 %tmp2 1244} 1245 1246declare dso_local void @bar() 1247 1248define void @pr42118_i32(i32 %x) { 1249; X86-LABEL: pr42118_i32: 1250; X86: # %bb.0: 1251; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 1252; X86-NEXT: jne .LBB50_1 1253; X86-NEXT: # %bb.2: 1254; X86-NEXT: jmp bar # TAILCALL 1255; X86-NEXT: .LBB50_1: 1256; X86-NEXT: retl 1257; 1258; X64-LABEL: pr42118_i32: 1259; X64: # %bb.0: 1260; X64-NEXT: blsrl %edi, %eax 1261; X64-NEXT: jne .LBB50_1 1262; X64-NEXT: # %bb.2: 1263; X64-NEXT: jmp bar # TAILCALL 1264; X64-NEXT: .LBB50_1: 1265; X64-NEXT: retq 1266 %tmp = sub i32 0, %x 1267 %tmp1 = and i32 %tmp, %x 1268 %cmp = icmp eq i32 %tmp1, %x 1269 br i1 %cmp, label %1, label %2 1270 1271 tail call void @bar() 1272 br label %2 1273 1274 ret void 1275} 1276 1277define void @pr42118_i64(i64 %x) { 1278; X86-LABEL: pr42118_i64: 1279; X86: # %bb.0: 1280; X86-NEXT: pushl %esi 1281; X86-NEXT: .cfi_def_cfa_offset 8 1282; X86-NEXT: .cfi_offset %esi, -8 1283; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1284; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1285; X86-NEXT: movl %eax, %edx 1286; X86-NEXT: addl $-1, %edx 1287; X86-NEXT: movl %ecx, %esi 1288; X86-NEXT: adcl $-1, %esi 1289; X86-NEXT: andl %eax, %edx 1290; X86-NEXT: andl %ecx, %esi 1291; X86-NEXT: orl %edx, %esi 1292; X86-NEXT: jne .LBB51_1 1293; X86-NEXT: # %bb.2: 1294; X86-NEXT: popl %esi 1295; X86-NEXT: .cfi_def_cfa_offset 4 1296; X86-NEXT: jmp bar # TAILCALL 1297; X86-NEXT: .LBB51_1: 1298; X86-NEXT: .cfi_def_cfa_offset 8 1299; X86-NEXT: popl %esi 1300; X86-NEXT: .cfi_def_cfa_offset 4 1301; X86-NEXT: retl 1302; 1303; X64-LABEL: pr42118_i64: 1304; X64: # %bb.0: 1305; X64-NEXT: blsrq %rdi, %rax 1306; X64-NEXT: jne .LBB51_1 1307; X64-NEXT: # %bb.2: 1308; X64-NEXT: jmp bar # TAILCALL 1309; X64-NEXT: .LBB51_1: 1310; X64-NEXT: retq 1311 %tmp = sub i64 0, %x 1312 %tmp1 = and i64 %tmp, %x 1313 %cmp = icmp eq i64 %tmp1, %x 1314 br i1 %cmp, label %1, label %2 1315 1316 tail call void @bar() 1317 br label %2 1318 1319 ret void 1320} 1321 1322define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind { 1323; X86-LABEL: blsi_cflag_32: 1324; X86: # %bb.0: 1325; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1326; X86-NEXT: testl %eax, %eax 1327; X86-NEXT: jne .LBB52_1 1328; X86-NEXT: # %bb.2: 1329; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1330; X86-NEXT: retl 1331; X86-NEXT: .LBB52_1: 1332; X86-NEXT: blsil %eax, %eax 1333; X86-NEXT: retl 1334; 1335; X64-LABEL: blsi_cflag_32: 1336; X64: # %bb.0: 1337; X64-NEXT: blsil %edi, %eax 1338; X64-NEXT: cmovael %esi, %eax 1339; X64-NEXT: retq 1340 %tobool = icmp eq i32 %x, 0 1341 %sub = sub nsw i32 0, %x 1342 %and = and i32 %sub, %x 1343 %cond = select i1 %tobool, i32 %y, i32 %and 1344 ret i32 %cond 1345} 1346 1347define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind { 1348; X86-LABEL: blsi_cflag_64: 1349; X86: # %bb.0: 1350; X86-NEXT: pushl %edi 1351; X86-NEXT: pushl %esi 1352; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1353; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1354; X86-NEXT: xorl %edx, %edx 1355; X86-NEXT: movl %ecx, %eax 1356; X86-NEXT: negl %eax 1357; X86-NEXT: sbbl %esi, %edx 1358; X86-NEXT: movl %ecx, %edi 1359; X86-NEXT: orl %esi, %edi 1360; X86-NEXT: jne .LBB53_1 1361; X86-NEXT: # %bb.2: 1362; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1363; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1364; X86-NEXT: jmp .LBB53_3 1365; X86-NEXT: .LBB53_1: 1366; X86-NEXT: andl %esi, %edx 1367; X86-NEXT: andl %ecx, %eax 1368; X86-NEXT: .LBB53_3: 1369; X86-NEXT: popl %esi 1370; X86-NEXT: popl %edi 1371; X86-NEXT: retl 1372; 1373; X64-LABEL: blsi_cflag_64: 1374; X64: # %bb.0: 1375; X64-NEXT: blsiq %rdi, %rax 1376; X64-NEXT: cmovaeq %rsi, %rax 1377; X64-NEXT: retq 1378 %tobool = icmp eq i64 %x, 0 1379 %sub = sub nsw i64 0, %x 1380 %and = and i64 %sub, %x 1381 %cond = select i1 %tobool, i64 %y, i64 %and 1382 ret i64 %cond 1383} 1384