1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2 6 7define i32 @andn32(i32 %x, i32 %y) { 8; X86-LABEL: andn32: 9; X86: # %bb.0: 10; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 12; X86-NEXT: retl 13; 14; X64-LABEL: andn32: 15; X64: # %bb.0: 16; X64-NEXT: andnl %esi, %edi, %eax 17; X64-NEXT: retq 18 %tmp1 = xor i32 %x, -1 19 %tmp2 = and i32 %y, %tmp1 20 ret i32 %tmp2 21} 22 23define i32 @andn32_load(i32 %x, i32* %y) { 24; X86-LABEL: andn32_load: 25; X86: # %bb.0: 26; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 27; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 28; X86-NEXT: andnl (%eax), %ecx, %eax 29; X86-NEXT: retl 30; 31; X64-LABEL: andn32_load: 32; X64: # %bb.0: 33; X64-NEXT: andnl (%rsi), %edi, %eax 34; X64-NEXT: retq 35 %y1 = load i32, i32* %y 36 %tmp1 = xor i32 %x, -1 37 %tmp2 = and i32 %y1, %tmp1 38 ret i32 %tmp2 39} 40 41define i64 @andn64(i64 %x, i64 %y) { 42; X86-LABEL: andn64: 43; X86: # %bb.0: 44; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 45; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 46; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 47; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 48; X86-NEXT: retl 49; 50; X64-LABEL: andn64: 51; X64: # %bb.0: 52; X64-NEXT: andnq %rsi, %rdi, %rax 53; X64-NEXT: retq 54 %tmp1 = xor i64 %x, -1 55 %tmp2 = and i64 %tmp1, %y 56 ret i64 %tmp2 57} 58 59; Don't choose a 'test' if an 'andn' can be used. 60define i1 @andn_cmp(i32 %x, i32 %y) { 61; X86-LABEL: andn_cmp: 62; X86: # %bb.0: 63; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 64; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 65; X86-NEXT: sete %al 66; X86-NEXT: retl 67; 68; X64-LABEL: andn_cmp: 69; X64: # %bb.0: 70; X64-NEXT: andnl %esi, %edi, %eax 71; X64-NEXT: sete %al 72; X64-NEXT: retq 73 %notx = xor i32 %x, -1 74 %and = and i32 %notx, %y 75 %cmp = icmp eq i32 %and, 0 76 ret i1 %cmp 77} 78 79; Recognize a disguised andn in the following 4 tests. 80define i1 @and_cmp1(i32 %x, i32 %y) { 81; X86-LABEL: and_cmp1: 82; X86: # %bb.0: 83; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 84; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 85; X86-NEXT: sete %al 86; X86-NEXT: retl 87; 88; X64-LABEL: and_cmp1: 89; X64: # %bb.0: 90; X64-NEXT: andnl %esi, %edi, %eax 91; X64-NEXT: sete %al 92; X64-NEXT: retq 93 %and = and i32 %x, %y 94 %cmp = icmp eq i32 %and, %y 95 ret i1 %cmp 96} 97 98define i1 @and_cmp2(i32 %x, i32 %y) { 99; X86-LABEL: and_cmp2: 100; X86: # %bb.0: 101; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 102; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 103; X86-NEXT: setne %al 104; X86-NEXT: retl 105; 106; X64-LABEL: and_cmp2: 107; X64: # %bb.0: 108; X64-NEXT: andnl %esi, %edi, %eax 109; X64-NEXT: setne %al 110; X64-NEXT: retq 111 %and = and i32 %y, %x 112 %cmp = icmp ne i32 %and, %y 113 ret i1 %cmp 114} 115 116define i1 @and_cmp3(i32 %x, i32 %y) { 117; X86-LABEL: and_cmp3: 118; X86: # %bb.0: 119; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 120; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 121; X86-NEXT: sete %al 122; X86-NEXT: retl 123; 124; X64-LABEL: and_cmp3: 125; X64: # %bb.0: 126; X64-NEXT: andnl %esi, %edi, %eax 127; X64-NEXT: sete %al 128; X64-NEXT: retq 129 %and = and i32 %x, %y 130 %cmp = icmp eq i32 %y, %and 131 ret i1 %cmp 132} 133 134define i1 @and_cmp4(i32 %x, i32 %y) { 135; X86-LABEL: and_cmp4: 136; X86: # %bb.0: 137; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 138; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 139; X86-NEXT: setne %al 140; X86-NEXT: retl 141; 142; X64-LABEL: and_cmp4: 143; X64: # %bb.0: 144; X64-NEXT: andnl %esi, %edi, %eax 145; X64-NEXT: setne %al 146; X64-NEXT: retq 147 %and = and i32 %y, %x 148 %cmp = icmp ne i32 %y, %and 149 ret i1 %cmp 150} 151 152; A mask and compare against constant is ok for an 'andn' too 153; even though the BMI instruction doesn't have an immediate form. 154define i1 @and_cmp_const(i32 %x) { 155; X86-LABEL: and_cmp_const: 156; X86: # %bb.0: 157; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 158; X86-NEXT: notl %eax 159; X86-NEXT: andl $43, %eax 160; X86-NEXT: sete %al 161; X86-NEXT: retl 162; 163; X64-LABEL: and_cmp_const: 164; X64: # %bb.0: 165; X64-NEXT: notl %edi 166; X64-NEXT: andl $43, %edi 167; X64-NEXT: sete %al 168; X64-NEXT: retq 169 %and = and i32 %x, 43 170 %cmp = icmp eq i32 %and, 43 171 ret i1 %cmp 172} 173 174; But don't use 'andn' if the mask is a power-of-two. 175define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { 176; X86-LABEL: and_cmp_const_power_of_two: 177; X86: # %bb.0: 178; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 180; X86-NEXT: btl %ecx, %eax 181; X86-NEXT: setae %al 182; X86-NEXT: retl 183; 184; X64-LABEL: and_cmp_const_power_of_two: 185; X64: # %bb.0: 186; X64-NEXT: btl %esi, %edi 187; X64-NEXT: setae %al 188; X64-NEXT: retq 189 %shl = shl i32 1, %y 190 %and = and i32 %x, %shl 191 %cmp = icmp ne i32 %and, %shl 192 ret i1 %cmp 193} 194 195; Don't transform to 'andn' if there's another use of the 'and'. 196define i32 @and_cmp_not_one_use(i32 %x) { 197; X86-LABEL: and_cmp_not_one_use: 198; X86: # %bb.0: 199; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 200; X86-NEXT: andl $37, %ecx 201; X86-NEXT: xorl %eax, %eax 202; X86-NEXT: cmpl $37, %ecx 203; X86-NEXT: sete %al 204; X86-NEXT: addl %ecx, %eax 205; X86-NEXT: retl 206; 207; X64-LABEL: and_cmp_not_one_use: 208; X64: # %bb.0: 209; X64-NEXT: andl $37, %edi 210; X64-NEXT: xorl %eax, %eax 211; X64-NEXT: cmpl $37, %edi 212; X64-NEXT: sete %al 213; X64-NEXT: addl %edi, %eax 214; X64-NEXT: retq 215 %and = and i32 %x, 37 216 %cmp = icmp eq i32 %and, 37 217 %ext = zext i1 %cmp to i32 218 %add = add i32 %and, %ext 219 ret i32 %add 220} 221 222; Verify that we're not transforming invalid comparison predicates. 223define i1 @not_an_andn1(i32 %x, i32 %y) { 224; X86-LABEL: not_an_andn1: 225; X86: # %bb.0: 226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 227; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 228; X86-NEXT: andl %eax, %ecx 229; X86-NEXT: cmpl %ecx, %eax 230; X86-NEXT: setg %al 231; X86-NEXT: retl 232; 233; X64-LABEL: not_an_andn1: 234; X64: # %bb.0: 235; X64-NEXT: andl %esi, %edi 236; X64-NEXT: cmpl %edi, %esi 237; X64-NEXT: setg %al 238; X64-NEXT: retq 239 %and = and i32 %x, %y 240 %cmp = icmp sgt i32 %y, %and 241 ret i1 %cmp 242} 243 244define i1 @not_an_andn2(i32 %x, i32 %y) { 245; X86-LABEL: not_an_andn2: 246; X86: # %bb.0: 247; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 248; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 249; X86-NEXT: andl %eax, %ecx 250; X86-NEXT: cmpl %ecx, %eax 251; X86-NEXT: setbe %al 252; X86-NEXT: retl 253; 254; X64-LABEL: not_an_andn2: 255; X64: # %bb.0: 256; X64-NEXT: andl %esi, %edi 257; X64-NEXT: cmpl %edi, %esi 258; X64-NEXT: setbe %al 259; X64-NEXT: retq 260 %and = and i32 %y, %x 261 %cmp = icmp ule i32 %y, %and 262 ret i1 %cmp 263} 264 265; Don't choose a 'test' if an 'andn' can be used. 266define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { 267; X86-LABEL: andn_cmp_swap_ops: 268; X86: # %bb.0: 269; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 270; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 271; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %ecx 272; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 273; X86-NEXT: orl %ecx, %eax 274; X86-NEXT: sete %al 275; X86-NEXT: retl 276; 277; X64-LABEL: andn_cmp_swap_ops: 278; X64: # %bb.0: 279; X64-NEXT: andnq %rsi, %rdi, %rax 280; X64-NEXT: sete %al 281; X64-NEXT: retq 282 %notx = xor i64 %x, -1 283 %and = and i64 %y, %notx 284 %cmp = icmp eq i64 %and, 0 285 ret i1 %cmp 286} 287 288; Use a 'test' (not an 'and') because 'andn' only works for i32/i64. 289define i1 @andn_cmp_i8(i8 %x, i8 %y) { 290; X86-LABEL: andn_cmp_i8: 291; X86: # %bb.0: 292; X86-NEXT: movb {{[0-9]+}}(%esp), %al 293; X86-NEXT: notb %al 294; X86-NEXT: testb %al, {{[0-9]+}}(%esp) 295; X86-NEXT: sete %al 296; X86-NEXT: retl 297; 298; X64-LABEL: andn_cmp_i8: 299; X64: # %bb.0: 300; X64-NEXT: notb %sil 301; X64-NEXT: testb %sil, %dil 302; X64-NEXT: sete %al 303; X64-NEXT: retq 304 %noty = xor i8 %y, -1 305 %and = and i8 %x, %noty 306 %cmp = icmp eq i8 %and, 0 307 ret i1 %cmp 308} 309 310declare i32 @llvm.x86.bmi.bextr.32(i32, i32) 311 312define i32 @bextr32(i32 %x, i32 %y) { 313; X86-LABEL: bextr32: 314; X86: # %bb.0: 315; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 316; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 317; X86-NEXT: retl 318; 319; X64-LABEL: bextr32: 320; X64: # %bb.0: 321; X64-NEXT: bextrl %esi, %edi, %eax 322; X64-NEXT: retq 323 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y) 324 ret i32 %tmp 325} 326 327define i32 @bextr32_load(i32* %x, i32 %y) { 328; X86-LABEL: bextr32_load: 329; X86: # %bb.0: 330; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 331; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 332; X86-NEXT: bextrl %eax, (%ecx), %eax 333; X86-NEXT: retl 334; 335; X64-LABEL: bextr32_load: 336; X64: # %bb.0: 337; X64-NEXT: bextrl %esi, (%rdi), %eax 338; X64-NEXT: retq 339 %x1 = load i32, i32* %x 340 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y) 341 ret i32 %tmp 342} 343 344define i32 @bextr32b(i32 %x) uwtable ssp { 345; X86-LABEL: bextr32b: 346; X86: # %bb.0: 347; X86-NEXT: movl $3076, %eax # imm = 0xC04 348; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 349; X86-NEXT: retl 350; 351; X64-LABEL: bextr32b: 352; X64: # %bb.0: 353; X64-NEXT: movl $3076, %eax # imm = 0xC04 354; X64-NEXT: bextrl %eax, %edi, %eax 355; X64-NEXT: retq 356 %1 = lshr i32 %x, 4 357 %2 = and i32 %1, 4095 358 ret i32 %2 359} 360 361; Make sure we still use AH subreg trick to extract 15:8 362define i32 @bextr32_subreg(i32 %x) uwtable ssp { 363; X86-LABEL: bextr32_subreg: 364; X86: # %bb.0: 365; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 366; X86-NEXT: retl 367; 368; X64-LABEL: bextr32_subreg: 369; X64: # %bb.0: 370; X64-NEXT: movl %edi, %eax 371; X64-NEXT: movzbl %ah, %eax 372; X64-NEXT: retq 373 %1 = lshr i32 %x, 8 374 %2 = and i32 %1, 255 375 ret i32 %2 376} 377 378define i32 @bextr32b_load(i32* %x) uwtable ssp { 379; X86-LABEL: bextr32b_load: 380; X86: # %bb.0: 381; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 382; X86-NEXT: movl $3076, %ecx # imm = 0xC04 383; X86-NEXT: bextrl %ecx, (%eax), %eax 384; X86-NEXT: retl 385; 386; X64-LABEL: bextr32b_load: 387; X64: # %bb.0: 388; X64-NEXT: movl $3076, %eax # imm = 0xC04 389; X64-NEXT: bextrl %eax, (%rdi), %eax 390; X64-NEXT: retq 391 %1 = load i32, i32* %x 392 %2 = lshr i32 %1, 4 393 %3 = and i32 %2, 4095 394 ret i32 %3 395} 396 397; PR34042 398define i32 @bextr32c(i32 %x, i16 zeroext %y) { 399; X86-LABEL: bextr32c: 400; X86: # %bb.0: 401; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 402; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 403; X86-NEXT: retl 404; 405; X64-LABEL: bextr32c: 406; X64: # %bb.0: 407; X64-NEXT: bextrl %esi, %edi, %eax 408; X64-NEXT: retq 409 %tmp0 = sext i16 %y to i32 410 %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0) 411 ret i32 %tmp1 412} 413 414define i32 @non_bextr32(i32 %x) { 415; X86-LABEL: non_bextr32: 416; X86: # %bb.0: # %entry 417; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 418; X86-NEXT: shrl $2, %eax 419; X86-NEXT: andl $111, %eax 420; X86-NEXT: retl 421; 422; X64-LABEL: non_bextr32: 423; X64: # %bb.0: # %entry 424; X64-NEXT: shrl $2, %edi 425; X64-NEXT: andl $111, %edi 426; X64-NEXT: movl %edi, %eax 427; X64-NEXT: retq 428entry: 429 %shr = lshr i32 %x, 2 430 %and = and i32 %shr, 111 431 ret i32 %and 432} 433 434define i32 @blsi32(i32 %x) { 435; X86-LABEL: blsi32: 436; X86: # %bb.0: 437; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 438; X86-NEXT: retl 439; 440; X64-LABEL: blsi32: 441; X64: # %bb.0: 442; X64-NEXT: blsil %edi, %eax 443; X64-NEXT: retq 444 %tmp = sub i32 0, %x 445 %tmp2 = and i32 %x, %tmp 446 ret i32 %tmp2 447} 448 449define i32 @blsi32_load(i32* %x) { 450; X86-LABEL: blsi32_load: 451; X86: # %bb.0: 452; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 453; X86-NEXT: blsil (%eax), %eax 454; X86-NEXT: retl 455; 456; X64-LABEL: blsi32_load: 457; X64: # %bb.0: 458; X64-NEXT: blsil (%rdi), %eax 459; X64-NEXT: retq 460 %x1 = load i32, i32* %x 461 %tmp = sub i32 0, %x1 462 %tmp2 = and i32 %x1, %tmp 463 ret i32 %tmp2 464} 465 466define i64 @blsi64(i64 %x) { 467; X86-LABEL: blsi64: 468; X86: # %bb.0: 469; X86-NEXT: pushl %esi 470; X86-NEXT: .cfi_def_cfa_offset 8 471; X86-NEXT: .cfi_offset %esi, -8 472; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 473; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 474; X86-NEXT: xorl %edx, %edx 475; X86-NEXT: movl %ecx, %eax 476; X86-NEXT: negl %eax 477; X86-NEXT: sbbl %esi, %edx 478; X86-NEXT: andl %esi, %edx 479; X86-NEXT: andl %ecx, %eax 480; X86-NEXT: popl %esi 481; X86-NEXT: .cfi_def_cfa_offset 4 482; X86-NEXT: retl 483; 484; X64-LABEL: blsi64: 485; X64: # %bb.0: 486; X64-NEXT: blsiq %rdi, %rax 487; X64-NEXT: retq 488 %tmp = sub i64 0, %x 489 %tmp2 = and i64 %tmp, %x 490 ret i64 %tmp2 491} 492 493define i32 @blsmsk32(i32 %x) { 494; X86-LABEL: blsmsk32: 495; X86: # %bb.0: 496; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 497; X86-NEXT: retl 498; 499; X64-LABEL: blsmsk32: 500; X64: # %bb.0: 501; X64-NEXT: blsmskl %edi, %eax 502; X64-NEXT: retq 503 %tmp = sub i32 %x, 1 504 %tmp2 = xor i32 %x, %tmp 505 ret i32 %tmp2 506} 507 508define i32 @blsmsk32_load(i32* %x) { 509; X86-LABEL: blsmsk32_load: 510; X86: # %bb.0: 511; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 512; X86-NEXT: blsmskl (%eax), %eax 513; X86-NEXT: retl 514; 515; X64-LABEL: blsmsk32_load: 516; X64: # %bb.0: 517; X64-NEXT: blsmskl (%rdi), %eax 518; X64-NEXT: retq 519 %x1 = load i32, i32* %x 520 %tmp = sub i32 %x1, 1 521 %tmp2 = xor i32 %x1, %tmp 522 ret i32 %tmp2 523} 524 525define i64 @blsmsk64(i64 %x) { 526; X86-LABEL: blsmsk64: 527; X86: # %bb.0: 528; X86-NEXT: pushl %esi 529; X86-NEXT: .cfi_def_cfa_offset 8 530; X86-NEXT: .cfi_offset %esi, -8 531; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 532; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 533; X86-NEXT: movl %ecx, %eax 534; X86-NEXT: addl $-1, %eax 535; X86-NEXT: movl %esi, %edx 536; X86-NEXT: adcl $-1, %edx 537; X86-NEXT: xorl %ecx, %eax 538; X86-NEXT: xorl %esi, %edx 539; X86-NEXT: popl %esi 540; X86-NEXT: .cfi_def_cfa_offset 4 541; X86-NEXT: retl 542; 543; X64-LABEL: blsmsk64: 544; X64: # %bb.0: 545; X64-NEXT: blsmskq %rdi, %rax 546; X64-NEXT: retq 547 %tmp = sub i64 %x, 1 548 %tmp2 = xor i64 %tmp, %x 549 ret i64 %tmp2 550} 551 552define i32 @blsr32(i32 %x) { 553; X86-LABEL: blsr32: 554; X86: # %bb.0: 555; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 556; X86-NEXT: retl 557; 558; X64-LABEL: blsr32: 559; X64: # %bb.0: 560; X64-NEXT: blsrl %edi, %eax 561; X64-NEXT: retq 562 %tmp = sub i32 %x, 1 563 %tmp2 = and i32 %x, %tmp 564 ret i32 %tmp2 565} 566 567define i32 @blsr32_load(i32* %x) { 568; X86-LABEL: blsr32_load: 569; X86: # %bb.0: 570; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 571; X86-NEXT: blsrl (%eax), %eax 572; X86-NEXT: retl 573; 574; X64-LABEL: blsr32_load: 575; X64: # %bb.0: 576; X64-NEXT: blsrl (%rdi), %eax 577; X64-NEXT: retq 578 %x1 = load i32, i32* %x 579 %tmp = sub i32 %x1, 1 580 %tmp2 = and i32 %x1, %tmp 581 ret i32 %tmp2 582} 583 584define i64 @blsr64(i64 %x) { 585; X86-LABEL: blsr64: 586; X86: # %bb.0: 587; X86-NEXT: pushl %esi 588; X86-NEXT: .cfi_def_cfa_offset 8 589; X86-NEXT: .cfi_offset %esi, -8 590; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 591; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 592; X86-NEXT: movl %ecx, %eax 593; X86-NEXT: addl $-1, %eax 594; X86-NEXT: movl %esi, %edx 595; X86-NEXT: adcl $-1, %edx 596; X86-NEXT: andl %ecx, %eax 597; X86-NEXT: andl %esi, %edx 598; X86-NEXT: popl %esi 599; X86-NEXT: .cfi_def_cfa_offset 4 600; X86-NEXT: retl 601; 602; X64-LABEL: blsr64: 603; X64: # %bb.0: 604; X64-NEXT: blsrq %rdi, %rax 605; X64-NEXT: retq 606 %tmp = sub i64 %x, 1 607 %tmp2 = and i64 %tmp, %x 608 ret i64 %tmp2 609} 610 611; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792 612 613define i64 @blsr_disguised_constant(i64 %x) { 614; X86-LABEL: blsr_disguised_constant: 615; X86: # %bb.0: 616; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 617; X86-NEXT: movzwl %ax, %eax 618; X86-NEXT: xorl %edx, %edx 619; X86-NEXT: retl 620; 621; X64-LABEL: blsr_disguised_constant: 622; X64: # %bb.0: 623; X64-NEXT: blsrl %edi, %eax 624; X64-NEXT: movzwl %ax, %eax 625; X64-NEXT: retq 626 %a1 = and i64 %x, 65535 627 %a2 = add i64 %x, 65535 628 %r = and i64 %a1, %a2 629 ret i64 %r 630} 631 632; The add here used to get shrunk, but the and did not thus hiding the blsr pattern. 633; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too. 634define i64 @blsr_disguised_shrunk_add(i64 %x) { 635; X86-LABEL: blsr_disguised_shrunk_add: 636; X86: # %bb.0: 637; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 638; X86-NEXT: shrl $16, %eax 639; X86-NEXT: blsrl %eax, %eax 640; X86-NEXT: xorl %edx, %edx 641; X86-NEXT: retl 642; 643; X64-LABEL: blsr_disguised_shrunk_add: 644; X64: # %bb.0: 645; X64-NEXT: shrq $48, %rdi 646; X64-NEXT: blsrl %edi, %eax 647; X64-NEXT: retq 648 %a = lshr i64 %x, 48 649 %b = add i64 %a, -1 650 %c = and i64 %b, %a 651 ret i64 %c 652} 653