1; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s 2; RUN: opt -disable-output -debugify < %s 3 4declare void @error(i32 %i, i32 %a, i32 %b) 5 6define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { 7; Test a chain of ifs, where the block guarded by the if is error handling code 8; that is not expected to run. 9; CHECK-LABEL: test_ifchains: 10; CHECK: %entry 11; CHECK-NOT: .p2align 12; CHECK: %else1 13; CHECK-NOT: .p2align 14; CHECK: %else2 15; CHECK-NOT: .p2align 16; CHECK: %else3 17; CHECK-NOT: .p2align 18; CHECK: %else4 19; CHECK-NOT: .p2align 20; CHECK: %exit 21; CHECK: %then1 22; CHECK: %then2 23; CHECK: %then3 24; CHECK: %then4 25; CHECK: %then5 26 27entry: 28 %gep1 = getelementptr i32, i32* %a, i32 1 29 %val1 = load i32, i32* %gep1 30 %cond1 = icmp ugt i32 %val1, 1 31 br i1 %cond1, label %then1, label %else1, !prof !0 32 33then1: 34 call void @error(i32 %i, i32 1, i32 %b) 35 br label %else1 36 37else1: 38 %gep2 = getelementptr i32, i32* %a, i32 2 39 %val2 = load i32, i32* %gep2 40 %cond2 = icmp ugt i32 %val2, 2 41 br i1 %cond2, label %then2, label %else2, !prof !0 42 43then2: 44 call void @error(i32 %i, i32 1, i32 %b) 45 br label %else2 46 47else2: 48 %gep3 = getelementptr i32, i32* %a, i32 3 49 %val3 = load i32, i32* %gep3 50 %cond3 = icmp ugt i32 %val3, 3 51 br i1 %cond3, label %then3, label %else3, !prof !0 52 53then3: 54 call void @error(i32 %i, i32 1, i32 %b) 55 br label %else3 56 57else3: 58 %gep4 = getelementptr i32, i32* %a, i32 4 59 %val4 = load i32, i32* %gep4 60 %cond4 = icmp ugt i32 %val4, 4 61 br i1 %cond4, label %then4, label %else4, !prof !0 62 63then4: 64 call void @error(i32 %i, i32 1, i32 %b) 65 br label %else4 66 67else4: 68 %gep5 = getelementptr i32, i32* %a, i32 3 69 %val5 = load i32, i32* %gep5 70 %cond5 = icmp ugt i32 %val5, 3 71 br i1 %cond5, label %then5, label %exit, !prof !0 72 73then5: 74 call void @error(i32 %i, i32 1, i32 %b) 75 br label %exit 76 77exit: 78 ret i32 %b 79} 80 81define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { 82; Check that we sink cold loop blocks after the hot loop body. 83; CHECK-LABEL: test_loop_cold_blocks: 84; CHECK: %entry 85; CHECK-NOT: .p2align 86; CHECK: %unlikely1 87; CHECK-NOT: .p2align 88; CHECK: %unlikely2 89; CHECK: .p2align 90; CHECK: %body1 91; CHECK: %body2 92; CHECK: %body3 93; CHECK: %exit 94 95entry: 96 br label %body1 97 98body1: 99 %iv = phi i32 [ 0, %entry ], [ %next, %body3 ] 100 %base = phi i32 [ 0, %entry ], [ %sum, %body3 ] 101 %unlikelycond1 = icmp slt i32 %base, 42 102 br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0 103 104unlikely1: 105 call void @error(i32 %i, i32 1, i32 %base) 106 br label %body2 107 108body2: 109 %unlikelycond2 = icmp sgt i32 %base, 21 110 br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0 111 112unlikely2: 113 call void @error(i32 %i, i32 2, i32 %base) 114 br label %body3 115 116body3: 117 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 118 %0 = load i32, i32* %arrayidx 119 %sum = add nsw i32 %0, %base 120 %next = add i32 %iv, 1 121 %exitcond = icmp eq i32 %next, %i 122 br i1 %exitcond, label %exit, label %body1 123 124exit: 125 ret i32 %sum 126} 127 128!0 = !{!"branch_weights", i32 4, i32 64} 129 130define i32 @test_loop_early_exits(i32 %i, i32* %a) { 131; Check that we sink early exit blocks out of loop bodies. 132; CHECK-LABEL: test_loop_early_exits: 133; CHECK: %entry 134; CHECK: %body1 135; CHECK: %body2 136; CHECK: %body3 137; CHECK: %body4 138; CHECK: %exit 139; CHECK: %bail1 140; CHECK: %bail2 141; CHECK: %bail3 142 143entry: 144 br label %body1 145 146body1: 147 %iv = phi i32 [ 0, %entry ], [ %next, %body4 ] 148 %base = phi i32 [ 0, %entry ], [ %sum, %body4 ] 149 %bailcond1 = icmp eq i32 %base, 42 150 br i1 %bailcond1, label %bail1, label %body2 151 152bail1: 153 ret i32 -1 154 155body2: 156 %bailcond2 = icmp eq i32 %base, 43 157 br i1 %bailcond2, label %bail2, label %body3 158 159bail2: 160 ret i32 -2 161 162body3: 163 %bailcond3 = icmp eq i32 %base, 44 164 br i1 %bailcond3, label %bail3, label %body4 165 166bail3: 167 ret i32 -3 168 169body4: 170 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 171 %0 = load i32, i32* %arrayidx 172 %sum = add nsw i32 %0, %base 173 %next = add i32 %iv, 1 174 %exitcond = icmp eq i32 %next, %i 175 br i1 %exitcond, label %exit, label %body1 176 177exit: 178 ret i32 %sum 179} 180 181; Tail duplication during layout can entirely remove body0 by duplicating it 182; into the entry block and into body1. This is a good thing but it isn't what 183; this test is looking for. So to make the blocks longer so they don't get 184; duplicated, we add some calls to dummy. 185declare void @dummy() 186 187define i32 @test_loop_rotate(i32 %i, i32* %a) { 188; Check that we rotate conditional exits from the loop to the bottom of the 189; loop, eliminating unconditional branches to the top. 190; CHECK-LABEL: test_loop_rotate: 191; CHECK: %entry 192; CHECK: %body1 193; CHECK: %body0 194; CHECK: %exit 195 196entry: 197 br label %body0 198 199body0: 200 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 201 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 202 %next = add i32 %iv, 1 203 %exitcond = icmp eq i32 %next, %i 204 call void @dummy() 205 call void @dummy() 206 br i1 %exitcond, label %exit, label %body1 207 208body1: 209 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 210 %0 = load i32, i32* %arrayidx 211 %sum = add nsw i32 %0, %base 212 %bailcond1 = icmp eq i32 %sum, 42 213 br label %body0 214 215exit: 216 ret i32 %base 217} 218 219define i32 @test_no_loop_rotate(i32 %i, i32* %a) { 220; Check that we don't try to rotate a loop which is already laid out with 221; fallthrough opportunities into the top and out of the bottom. 222; CHECK-LABEL: test_no_loop_rotate: 223; CHECK: %entry 224; CHECK: %body0 225; CHECK: %body1 226; CHECK: %exit 227 228entry: 229 br label %body0 230 231body0: 232 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 233 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 234 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 235 %0 = load i32, i32* %arrayidx 236 %sum = add nsw i32 %0, %base 237 %bailcond1 = icmp eq i32 %sum, 42 238 br i1 %bailcond1, label %exit, label %body1 239 240body1: 241 %next = add i32 %iv, 1 242 %exitcond = icmp eq i32 %next, %i 243 br i1 %exitcond, label %exit, label %body0 244 245exit: 246 ret i32 %base 247} 248 249define i32 @test_loop_align(i32 %i, i32* %a) { 250; Check that we provide basic loop body alignment with the block placement 251; pass. 252; CHECK-LABEL: test_loop_align: 253; CHECK: %entry 254; CHECK: .p2align [[ALIGN:[0-9]+]], 255; CHECK-NEXT: %body 256; CHECK: %exit 257 258entry: 259 br label %body 260 261body: 262 %iv = phi i32 [ 0, %entry ], [ %next, %body ] 263 %base = phi i32 [ 0, %entry ], [ %sum, %body ] 264 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 265 %0 = load i32, i32* %arrayidx 266 %sum = add nsw i32 %0, %base 267 %next = add i32 %iv, 1 268 %exitcond = icmp eq i32 %next, %i 269 br i1 %exitcond, label %exit, label %body 270 271exit: 272 ret i32 %sum 273} 274 275define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { 276; Check that we provide nested loop body alignment. 277; CHECK-LABEL: test_nested_loop_align: 278; CHECK: %entry 279; CHECK: .p2align [[ALIGN]], 280; CHECK-NEXT: %loop.body.1 281; CHECK: .p2align [[ALIGN]], 282; CHECK-NEXT: %inner.loop.body 283; CHECK-NOT: .p2align 284; CHECK: %exit 285 286entry: 287 br label %loop.body.1 288 289loop.body.1: 290 %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ] 291 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 292 %bidx = load i32, i32* %arrayidx 293 br label %inner.loop.body 294 295inner.loop.body: 296 %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ] 297 %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ] 298 %scaled_idx = mul i32 %bidx, %iv 299 %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx 300 %0 = load i32, i32* %inner.arrayidx 301 %sum = add nsw i32 %0, %base 302 %inner.next = add i32 %iv, 1 303 %inner.exitcond = icmp eq i32 %inner.next, %i 304 br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body 305 306loop.body.2: 307 %next = add i32 %iv, 1 308 %exitcond = icmp eq i32 %next, %i 309 br i1 %exitcond, label %exit, label %loop.body.1 310 311exit: 312 ret i32 %sum 313} 314 315define void @unnatural_cfg1() { 316; Test that we can handle a loop with an inner unnatural loop at the end of 317; a function. This is a gross CFG reduced out of the single source GCC. 318; CHECK-LABEL: unnatural_cfg1 319; CHECK: %entry 320; CHECK: %loop.header 321; CHECK: %loop.body2 322; CHECK: %loop.body3 323 324entry: 325 br label %loop.header 326 327loop.header: 328 br label %loop.body1 329 330loop.body1: 331 br i1 undef, label %loop.body3, label %loop.body2 332 333loop.body2: 334 %ptr = load i32*, i32** undef, align 4 335 br label %loop.body3 336 337loop.body3: 338 %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ] 339 %bcmyptr = bitcast i32* %myptr to i32* 340 %val = load i32, i32* %bcmyptr, align 4 341 %comp = icmp eq i32 %val, 48 342 br i1 %comp, label %loop.body4, label %loop.body5 343 344loop.body4: 345 br i1 undef, label %loop.header, label %loop.body5 346 347loop.body5: 348 %ptr2 = load i32*, i32** undef, align 4 349 br label %loop.body3 350} 351 352define void @unnatural_cfg2() { 353; Test that we can handle a loop with a nested natural loop *and* an unnatural 354; loop. This was reduced from a crash on block placement when run over 355; single-source GCC. 356; CHECK-LABEL: unnatural_cfg2 357; CHECK: %entry 358; CHECK: %loop.header 359; CHECK: %loop.body1 360; CHECK: %loop.body2 361; CHECK: %loop.body4 362; CHECK: %loop.inner2.begin 363; CHECK: %loop.inner2.begin 364; CHECK: %loop.body3 365; CHECK: %loop.inner1.begin 366; CHECK: %bail 367 368entry: 369 br label %loop.header 370 371loop.header: 372 %comp0 = icmp eq i32* undef, null 373 br i1 %comp0, label %bail, label %loop.body1 374 375loop.body1: 376 %val0 = load i32*, i32** undef, align 4 377 br i1 undef, label %loop.body2, label %loop.inner1.begin 378 379loop.body2: 380 br i1 undef, label %loop.body4, label %loop.body3 381 382loop.body3: 383 %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0 384 %castptr1 = bitcast i32* %ptr1 to i32** 385 %val1 = load i32*, i32** %castptr1, align 4 386 br label %loop.inner1.begin 387 388loop.inner1.begin: 389 %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] 390 %castval = bitcast i32* %valphi to i32* 391 %comp1 = icmp eq i32 undef, 48 392 br i1 %comp1, label %loop.inner1.end, label %loop.body4 393 394loop.inner1.end: 395 %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0 396 %castptr2 = bitcast i32* %ptr2 to i32** 397 %val2 = load i32*, i32** %castptr2, align 4 398 br label %loop.inner1.begin 399 400loop.body4.dead: 401 br label %loop.body4 402 403loop.body4: 404 %comp2 = icmp ult i32 undef, 3 405 br i1 %comp2, label %loop.inner2.begin, label %loop.end 406 407loop.inner2.begin: 408 br i1 false, label %loop.end, label %loop.inner2.end 409 410loop.inner2.end: 411 %comp3 = icmp eq i32 undef, 1769472 412 br i1 %comp3, label %loop.end, label %loop.inner2.begin 413 414loop.end: 415 br label %loop.header 416 417bail: 418 unreachable 419} 420 421define i32 @problematic_switch() { 422; This function's CFG caused overlow in the machine branch probability 423; calculation, triggering asserts. Make sure we don't crash on it. 424; CHECK: problematic_switch 425 426entry: 427 switch i32 undef, label %exit [ 428 i32 879, label %bogus 429 i32 877, label %step 430 i32 876, label %step 431 i32 875, label %step 432 i32 874, label %step 433 i32 873, label %step 434 i32 872, label %step 435 i32 868, label %step 436 i32 867, label %step 437 i32 866, label %step 438 i32 861, label %step 439 i32 860, label %step 440 i32 856, label %step 441 i32 855, label %step 442 i32 854, label %step 443 i32 831, label %step 444 i32 830, label %step 445 i32 829, label %step 446 i32 828, label %step 447 i32 815, label %step 448 i32 814, label %step 449 i32 811, label %step 450 i32 806, label %step 451 i32 805, label %step 452 i32 804, label %step 453 i32 803, label %step 454 i32 802, label %step 455 i32 801, label %step 456 i32 800, label %step 457 i32 799, label %step 458 i32 798, label %step 459 i32 797, label %step 460 i32 796, label %step 461 i32 795, label %step 462 ] 463bogus: 464 unreachable 465step: 466 br label %exit 467exit: 468 %merge = phi i32 [ 3, %step ], [ 6, %entry ] 469 ret i32 %merge 470} 471 472define void @fpcmp_unanalyzable_branch(i1 %cond) { 473; This function's CFG contains an once-unanalyzable branch (une on floating 474; points). As now it becomes analyzable, we should get best layout in which each 475; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is 476; fall-through. 477; CHECK-LABEL: fpcmp_unanalyzable_branch: 478; CHECK: # %bb.0: # %entry 479; CHECK: # %bb.1: # %entry.if.then_crit_edge 480; CHECK: .LBB10_5: # %if.then 481; CHECK: .LBB10_6: # %if.end 482; CHECK: # %bb.3: # %exit 483; CHECK: jne .LBB10_4 484; CHECK-NEXT: jnp .LBB10_6 485; CHECK: jmp .LBB10_5 486 487entry: 488; Note that this branch must be strongly biased toward 489; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for 490; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. 491 br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 492 493entry.if.then_crit_edge: 494 %.pre14 = load i8, i8* undef, align 1 495 br label %if.then 496 497lor.lhs.false: 498 br i1 undef, label %if.end, label %exit 499 500exit: 501 %cmp.i = fcmp une double 0.000000e+00, undef 502 br i1 %cmp.i, label %if.then, label %if.end, !prof !3 503 504if.then: 505 %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] 506 %1 = and i8 %0, 1 507 store i8 %1, i8* undef, align 4 508 br label %if.end 509 510if.end: 511 ret void 512} 513 514!1 = !{!"branch_weights", i32 1000, i32 1} 515!3 = !{!"branch_weights", i32 1, i32 1000} 516 517declare i32 @f() 518declare i32 @g() 519declare i32 @h(i32 %x) 520 521define i32 @test_global_cfg_break_profitability() { 522; Check that our metrics for the profitability of a CFG break are global rather 523; than local. A successor may be very hot, but if the current block isn't, it 524; doesn't matter. Within this test the 'then' block is slightly warmer than the 525; 'else' block, but not nearly enough to merit merging it with the exit block 526; even though the probability of 'then' branching to the 'exit' block is very 527; high. 528; CHECK: test_global_cfg_break_profitability 529; CHECK: calll {{_?}}f 530; CHECK: calll {{_?}}g 531; CHECK: calll {{_?}}h 532; CHECK: ret 533 534entry: 535 br i1 undef, label %then, label %else, !prof !2 536 537then: 538 %then.result = call i32 @f() 539 br label %exit 540 541else: 542 %else.result = call i32 @g() 543 br label %exit 544 545exit: 546 %result = phi i32 [ %then.result, %then ], [ %else.result, %else ] 547 %result2 = call i32 @h(i32 %result) 548 ret i32 %result 549} 550 551!2 = !{!"branch_weights", i32 3, i32 1} 552 553declare i32 @__gxx_personality_v0(...) 554 555define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 556; Some times the landing pad ends up as the first successor of an invoke block. 557; When this happens, a strange result used to fall out of updateTerminators: we 558; didn't correctly locate the fallthrough successor, assuming blindly that the 559; first one was the fallthrough successor. As a result, we would add an 560; erroneous jump to the landing pad thinking *that* was the default successor. 561; CHECK-LABEL: test_eh_lpad_successor 562; CHECK: %entry 563; CHECK-NOT: jmp 564; CHECK: %loop 565 566entry: 567 invoke i32 @f() to label %preheader unwind label %lpad 568 569preheader: 570 br label %loop 571 572lpad: 573 %lpad.val = landingpad { i8*, i32 } 574 cleanup 575 resume { i8*, i32 } %lpad.val 576 577loop: 578 br label %loop 579} 580 581declare void @fake_throw() noreturn 582 583define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 584; For blocks containing a 'throw' (or similar functionality), we have 585; a no-return invoke. In this case, only EH successors will exist, and 586; fallthrough simply won't occur. Make sure we don't crash trying to update 587; terminators for such constructs. 588; 589; CHECK-LABEL: test_eh_throw 590; CHECK: %entry 591; CHECK: %cleanup 592 593entry: 594 invoke void @fake_throw() to label %continue unwind label %cleanup 595 596continue: 597 unreachable 598 599cleanup: 600 %0 = landingpad { i8*, i32 } 601 cleanup 602 unreachable 603} 604 605define void @test_unnatural_cfg_backwards_inner_loop() { 606; Test that when we encounter an unnatural CFG structure after having formed 607; a chain for an inner loop which happened to be laid out backwards we don't 608; attempt to merge onto the wrong end of the inner loop just because we find it 609; first. This was reduced from a crasher in GCC's single source. 610; 611; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop 612; CHECK: %entry 613; CHECK: %loop2b 614; CHECK: %loop3 615 616entry: 617 br i1 undef, label %loop2a, label %body 618 619body: 620 br label %loop2a 621 622loop1: 623 %next.load = load i32*, i32** undef 624 br i1 %comp.a, label %loop2a, label %loop2b 625 626loop2a: 627 %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ] 628 %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ] 629 %comp.a = icmp eq i32* %var, null 630 br label %loop3 631 632loop2b: 633 %gep = getelementptr inbounds i32, i32* %var.phi, i32 0 634 %next.ptr = bitcast i32* %gep to i32** 635 store i32* %next.phi, i32** %next.ptr 636 br label %loop3 637 638loop3: 639 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 640 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 641 br label %loop1 642} 643 644define void @unanalyzable_branch_to_loop_header() { 645; Ensure that we can handle unanalyzable branches into loop headers. We 646; pre-form chains for unanalyzable branches, and will find the tail end of that 647; at the start of the loop. This function uses floating point comparison 648; fallthrough because that happens to always produce unanalyzable branches on 649; x86. 650; 651; CHECK-LABEL: unanalyzable_branch_to_loop_header 652; CHECK: %entry 653; CHECK: %loop 654; CHECK: %exit 655 656entry: 657 %cmp = fcmp une double 0.000000e+00, undef 658 br i1 %cmp, label %loop, label %exit 659 660loop: 661 %cond = icmp eq i8 undef, 42 662 br i1 %cond, label %exit, label %loop 663 664exit: 665 ret void 666} 667 668define void @unanalyzable_branch_to_best_succ(i1 %cond) { 669; Ensure that we can handle unanalyzable branches where the destination block 670; gets selected as the optimal successor to merge. 671; 672; This branch is now analyzable and hence the destination block becomes the 673; hotter one. The right order is entry->bar->exit->foo. 674; 675; CHECK-LABEL: unanalyzable_branch_to_best_succ 676; CHECK: %entry 677; CHECK: %bar 678; CHECK: %exit 679; CHECK: %foo 680 681entry: 682 ; Bias this branch toward bar to ensure we form that chain. 683 br i1 %cond, label %bar, label %foo, !prof !1 684 685foo: 686 %cmp = fcmp une double 0.000000e+00, undef 687 br i1 %cmp, label %bar, label %exit 688 689bar: 690 call i32 @f() 691 br label %exit 692 693exit: 694 ret void 695} 696 697define void @unanalyzable_branch_to_free_block(float %x) { 698; Ensure that we can handle unanalyzable branches where the destination block 699; gets selected as the best free block in the CFG. 700; 701; CHECK-LABEL: unanalyzable_branch_to_free_block 702; CHECK: %entry 703; CHECK: %a 704; CHECK: %b 705; CHECK: %c 706; CHECK: %exit 707 708entry: 709 br i1 undef, label %a, label %b 710 711a: 712 call i32 @f() 713 br label %c 714 715b: 716 %cmp = fcmp une float %x, undef 717 br i1 %cmp, label %c, label %exit 718 719c: 720 call i32 @g() 721 br label %exit 722 723exit: 724 ret void 725} 726 727define void @many_unanalyzable_branches() { 728; Ensure that we don't crash as we're building up many unanalyzable branches, 729; blocks, and loops. 730; 731; CHECK-LABEL: many_unanalyzable_branches 732; CHECK: %entry 733; CHECK: %exit 734 735entry: 736 br label %0 737 738 %val0 = load volatile float, float* undef 739 %cmp0 = fcmp une float %val0, undef 740 br i1 %cmp0, label %1, label %0 741 %val1 = load volatile float, float* undef 742 %cmp1 = fcmp une float %val1, undef 743 br i1 %cmp1, label %2, label %1 744 %val2 = load volatile float, float* undef 745 %cmp2 = fcmp une float %val2, undef 746 br i1 %cmp2, label %3, label %2 747 %val3 = load volatile float, float* undef 748 %cmp3 = fcmp une float %val3, undef 749 br i1 %cmp3, label %4, label %3 750 %val4 = load volatile float, float* undef 751 %cmp4 = fcmp une float %val4, undef 752 br i1 %cmp4, label %5, label %4 753 %val5 = load volatile float, float* undef 754 %cmp5 = fcmp une float %val5, undef 755 br i1 %cmp5, label %6, label %5 756 %val6 = load volatile float, float* undef 757 %cmp6 = fcmp une float %val6, undef 758 br i1 %cmp6, label %7, label %6 759 %val7 = load volatile float, float* undef 760 %cmp7 = fcmp une float %val7, undef 761 br i1 %cmp7, label %8, label %7 762 %val8 = load volatile float, float* undef 763 %cmp8 = fcmp une float %val8, undef 764 br i1 %cmp8, label %9, label %8 765 %val9 = load volatile float, float* undef 766 %cmp9 = fcmp une float %val9, undef 767 br i1 %cmp9, label %10, label %9 768 %val10 = load volatile float, float* undef 769 %cmp10 = fcmp une float %val10, undef 770 br i1 %cmp10, label %11, label %10 771 %val11 = load volatile float, float* undef 772 %cmp11 = fcmp une float %val11, undef 773 br i1 %cmp11, label %12, label %11 774 %val12 = load volatile float, float* undef 775 %cmp12 = fcmp une float %val12, undef 776 br i1 %cmp12, label %13, label %12 777 %val13 = load volatile float, float* undef 778 %cmp13 = fcmp une float %val13, undef 779 br i1 %cmp13, label %14, label %13 780 %val14 = load volatile float, float* undef 781 %cmp14 = fcmp une float %val14, undef 782 br i1 %cmp14, label %15, label %14 783 %val15 = load volatile float, float* undef 784 %cmp15 = fcmp une float %val15, undef 785 br i1 %cmp15, label %16, label %15 786 %val16 = load volatile float, float* undef 787 %cmp16 = fcmp une float %val16, undef 788 br i1 %cmp16, label %17, label %16 789 %val17 = load volatile float, float* undef 790 %cmp17 = fcmp une float %val17, undef 791 br i1 %cmp17, label %18, label %17 792 %val18 = load volatile float, float* undef 793 %cmp18 = fcmp une float %val18, undef 794 br i1 %cmp18, label %19, label %18 795 %val19 = load volatile float, float* undef 796 %cmp19 = fcmp une float %val19, undef 797 br i1 %cmp19, label %20, label %19 798 %val20 = load volatile float, float* undef 799 %cmp20 = fcmp une float %val20, undef 800 br i1 %cmp20, label %21, label %20 801 %val21 = load volatile float, float* undef 802 %cmp21 = fcmp une float %val21, undef 803 br i1 %cmp21, label %22, label %21 804 %val22 = load volatile float, float* undef 805 %cmp22 = fcmp une float %val22, undef 806 br i1 %cmp22, label %23, label %22 807 %val23 = load volatile float, float* undef 808 %cmp23 = fcmp une float %val23, undef 809 br i1 %cmp23, label %24, label %23 810 %val24 = load volatile float, float* undef 811 %cmp24 = fcmp une float %val24, undef 812 br i1 %cmp24, label %25, label %24 813 %val25 = load volatile float, float* undef 814 %cmp25 = fcmp une float %val25, undef 815 br i1 %cmp25, label %26, label %25 816 %val26 = load volatile float, float* undef 817 %cmp26 = fcmp une float %val26, undef 818 br i1 %cmp26, label %27, label %26 819 %val27 = load volatile float, float* undef 820 %cmp27 = fcmp une float %val27, undef 821 br i1 %cmp27, label %28, label %27 822 %val28 = load volatile float, float* undef 823 %cmp28 = fcmp une float %val28, undef 824 br i1 %cmp28, label %29, label %28 825 %val29 = load volatile float, float* undef 826 %cmp29 = fcmp une float %val29, undef 827 br i1 %cmp29, label %30, label %29 828 %val30 = load volatile float, float* undef 829 %cmp30 = fcmp une float %val30, undef 830 br i1 %cmp30, label %31, label %30 831 %val31 = load volatile float, float* undef 832 %cmp31 = fcmp une float %val31, undef 833 br i1 %cmp31, label %32, label %31 834 %val32 = load volatile float, float* undef 835 %cmp32 = fcmp une float %val32, undef 836 br i1 %cmp32, label %33, label %32 837 %val33 = load volatile float, float* undef 838 %cmp33 = fcmp une float %val33, undef 839 br i1 %cmp33, label %34, label %33 840 %val34 = load volatile float, float* undef 841 %cmp34 = fcmp une float %val34, undef 842 br i1 %cmp34, label %35, label %34 843 %val35 = load volatile float, float* undef 844 %cmp35 = fcmp une float %val35, undef 845 br i1 %cmp35, label %36, label %35 846 %val36 = load volatile float, float* undef 847 %cmp36 = fcmp une float %val36, undef 848 br i1 %cmp36, label %37, label %36 849 %val37 = load volatile float, float* undef 850 %cmp37 = fcmp une float %val37, undef 851 br i1 %cmp37, label %38, label %37 852 %val38 = load volatile float, float* undef 853 %cmp38 = fcmp une float %val38, undef 854 br i1 %cmp38, label %39, label %38 855 %val39 = load volatile float, float* undef 856 %cmp39 = fcmp une float %val39, undef 857 br i1 %cmp39, label %40, label %39 858 %val40 = load volatile float, float* undef 859 %cmp40 = fcmp une float %val40, undef 860 br i1 %cmp40, label %41, label %40 861 %val41 = load volatile float, float* undef 862 %cmp41 = fcmp une float %val41, undef 863 br i1 %cmp41, label %42, label %41 864 %val42 = load volatile float, float* undef 865 %cmp42 = fcmp une float %val42, undef 866 br i1 %cmp42, label %43, label %42 867 %val43 = load volatile float, float* undef 868 %cmp43 = fcmp une float %val43, undef 869 br i1 %cmp43, label %44, label %43 870 %val44 = load volatile float, float* undef 871 %cmp44 = fcmp une float %val44, undef 872 br i1 %cmp44, label %45, label %44 873 %val45 = load volatile float, float* undef 874 %cmp45 = fcmp une float %val45, undef 875 br i1 %cmp45, label %46, label %45 876 %val46 = load volatile float, float* undef 877 %cmp46 = fcmp une float %val46, undef 878 br i1 %cmp46, label %47, label %46 879 %val47 = load volatile float, float* undef 880 %cmp47 = fcmp une float %val47, undef 881 br i1 %cmp47, label %48, label %47 882 %val48 = load volatile float, float* undef 883 %cmp48 = fcmp une float %val48, undef 884 br i1 %cmp48, label %49, label %48 885 %val49 = load volatile float, float* undef 886 %cmp49 = fcmp une float %val49, undef 887 br i1 %cmp49, label %50, label %49 888 %val50 = load volatile float, float* undef 889 %cmp50 = fcmp une float %val50, undef 890 br i1 %cmp50, label %51, label %50 891 %val51 = load volatile float, float* undef 892 %cmp51 = fcmp une float %val51, undef 893 br i1 %cmp51, label %52, label %51 894 %val52 = load volatile float, float* undef 895 %cmp52 = fcmp une float %val52, undef 896 br i1 %cmp52, label %53, label %52 897 %val53 = load volatile float, float* undef 898 %cmp53 = fcmp une float %val53, undef 899 br i1 %cmp53, label %54, label %53 900 %val54 = load volatile float, float* undef 901 %cmp54 = fcmp une float %val54, undef 902 br i1 %cmp54, label %55, label %54 903 %val55 = load volatile float, float* undef 904 %cmp55 = fcmp une float %val55, undef 905 br i1 %cmp55, label %56, label %55 906 %val56 = load volatile float, float* undef 907 %cmp56 = fcmp une float %val56, undef 908 br i1 %cmp56, label %57, label %56 909 %val57 = load volatile float, float* undef 910 %cmp57 = fcmp une float %val57, undef 911 br i1 %cmp57, label %58, label %57 912 %val58 = load volatile float, float* undef 913 %cmp58 = fcmp une float %val58, undef 914 br i1 %cmp58, label %59, label %58 915 %val59 = load volatile float, float* undef 916 %cmp59 = fcmp une float %val59, undef 917 br i1 %cmp59, label %60, label %59 918 %val60 = load volatile float, float* undef 919 %cmp60 = fcmp une float %val60, undef 920 br i1 %cmp60, label %61, label %60 921 %val61 = load volatile float, float* undef 922 %cmp61 = fcmp une float %val61, undef 923 br i1 %cmp61, label %62, label %61 924 %val62 = load volatile float, float* undef 925 %cmp62 = fcmp une float %val62, undef 926 br i1 %cmp62, label %63, label %62 927 %val63 = load volatile float, float* undef 928 %cmp63 = fcmp une float %val63, undef 929 br i1 %cmp63, label %64, label %63 930 %val64 = load volatile float, float* undef 931 %cmp64 = fcmp une float %val64, undef 932 br i1 %cmp64, label %65, label %64 933 934 br label %exit 935exit: 936 ret void 937} 938 939define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { 940; This test case comes from the heapsort benchmark, and exemplifies several 941; important aspects to block placement in the presence of loops: 942; 1) Loop rotation needs to *ensure* that the desired exiting edge can be 943; a fallthrough. 944; 2) The exiting edge from the loop which is rotated to be laid out at the 945; bottom of the loop needs to be exiting into the nearest enclosing loop (to 946; which there is an exit). Otherwise, we force that enclosing loop into 947; strange layouts that are siginificantly less efficient, often times making 948; it discontiguous. 949; 950; CHECK-LABEL: @benchmark_heapsort 951; CHECK: %entry 952; First rotated loop top. 953; CHECK: .p2align 954; CHECK: %while.end 955; %for.cond gets completely tail-duplicated away. 956; CHECK: %if.then 957; CHECK: %if.else 958; CHECK: %if.end10 959; Second rotated loop top 960; CHECK: .p2align 961; CHECK: %if.then24 962; CHECK: %while.cond.outer 963; Third rotated loop top 964; CHECK: .p2align 965; CHECK: %while.cond 966; CHECK: %while.body 967; CHECK: %land.lhs.true 968; CHECK: %if.then19 969; CHECK: %if.end20 970; CHECK: %if.then8 971; CHECK: ret 972 973entry: 974 %shr = ashr i32 %n, 1 975 %add = add nsw i32 %shr, 1 976 %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1 977 br label %for.cond 978 979for.cond: 980 %ir.0 = phi i32 [ %n, %entry ], [ %ir.1, %while.end ] 981 %l.0 = phi i32 [ %add, %entry ], [ %l.1, %while.end ] 982 %cmp = icmp sgt i32 %l.0, 1 983 br i1 %cmp, label %if.then, label %if.else 984 985if.then: 986 %dec = add nsw i32 %l.0, -1 987 %idxprom = sext i32 %dec to i64 988 %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom 989 %0 = load double, double* %arrayidx, align 8 990 br label %if.end10 991 992if.else: 993 %idxprom1 = sext i32 %ir.0 to i64 994 %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1 995 %1 = load double, double* %arrayidx2, align 8 996 %2 = load double, double* %arrayidx3, align 8 997 store double %2, double* %arrayidx2, align 8 998 %dec6 = add nsw i32 %ir.0, -1 999 %cmp7 = icmp eq i32 %dec6, 1 1000 br i1 %cmp7, label %if.then8, label %if.end10 1001 1002if.then8: 1003 store double %1, double* %arrayidx3, align 8 1004 ret void 1005 1006if.end10: 1007 %ir.1 = phi i32 [ %ir.0, %if.then ], [ %dec6, %if.else ] 1008 %l.1 = phi i32 [ %dec, %if.then ], [ %l.0, %if.else ] 1009 %rra.0 = phi double [ %0, %if.then ], [ %1, %if.else ] 1010 %add31 = add nsw i32 %ir.1, 1 1011 br label %while.cond.outer 1012 1013while.cond.outer: 1014 %j.0.ph.in = phi i32 [ %l.1, %if.end10 ], [ %j.1, %if.then24 ] 1015 %j.0.ph = shl i32 %j.0.ph.in, 1 1016 br label %while.cond 1017 1018while.cond: 1019 %j.0 = phi i32 [ %add31, %if.end20 ], [ %j.0.ph, %while.cond.outer ] 1020 %cmp11 = icmp sgt i32 %j.0, %ir.1 1021 br i1 %cmp11, label %while.end, label %while.body 1022 1023while.body: 1024 %cmp12 = icmp slt i32 %j.0, %ir.1 1025 br i1 %cmp12, label %land.lhs.true, label %if.end20 1026 1027land.lhs.true: 1028 %idxprom13 = sext i32 %j.0 to i64 1029 %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13 1030 %3 = load double, double* %arrayidx14, align 8 1031 %add15 = add nsw i32 %j.0, 1 1032 %idxprom16 = sext i32 %add15 to i64 1033 %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16 1034 %4 = load double, double* %arrayidx17, align 8 1035 %cmp18 = fcmp olt double %3, %4 1036 br i1 %cmp18, label %if.then19, label %if.end20 1037 1038if.then19: 1039 br label %if.end20 1040 1041if.end20: 1042 %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ] 1043 %idxprom21 = sext i32 %j.1 to i64 1044 %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21 1045 %5 = load double, double* %arrayidx22, align 8 1046 %cmp23 = fcmp olt double %rra.0, %5 1047 br i1 %cmp23, label %if.then24, label %while.cond 1048 1049if.then24: 1050 %idxprom27 = sext i32 %j.0.ph.in to i64 1051 %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27 1052 store double %5, double* %arrayidx28, align 8 1053 br label %while.cond.outer 1054 1055while.end: 1056 %idxprom33 = sext i32 %j.0.ph.in to i64 1057 %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33 1058 store double %rra.0, double* %arrayidx34, align 8 1059 br label %for.cond 1060} 1061 1062declare void @cold_function() cold 1063 1064define i32 @test_cold_calls(i32* %a) { 1065; Test that edges to blocks post-dominated by cold calls are 1066; marked as not expected to be taken. They should be laid out 1067; at the bottom. 1068; CHECK-LABEL: test_cold_calls: 1069; CHECK: %entry 1070; CHECK: %else 1071; CHECK: %exit 1072; CHECK: %then 1073 1074entry: 1075 %gep1 = getelementptr i32, i32* %a, i32 1 1076 %val1 = load i32, i32* %gep1 1077 %cond1 = icmp ugt i32 %val1, 1 1078 br i1 %cond1, label %then, label %else 1079 1080then: 1081 call void @cold_function() 1082 br label %exit 1083 1084else: 1085 %gep2 = getelementptr i32, i32* %a, i32 2 1086 %val2 = load i32, i32* %gep2 1087 br label %exit 1088 1089exit: 1090 %ret = phi i32 [ %val1, %then ], [ %val2, %else ] 1091 ret i32 %ret 1092} 1093 1094; Make sure we put landingpads out of the way. 1095declare i32 @pers(...) 1096 1097declare i32 @foo(); 1098 1099declare i32 @bar(); 1100 1101define i32 @test_lp(i32 %a) personality i32 (...)* @pers { 1102; CHECK-LABEL: test_lp: 1103; CHECK: %entry 1104; CHECK: %hot 1105; CHECK: %then 1106; CHECK: %cold 1107; CHECK: %coldlp 1108; CHECK: %hotlp 1109; CHECK: %lpret 1110entry: 1111 %0 = icmp sgt i32 %a, 1 1112 br i1 %0, label %hot, label %cold, !prof !4 1113 1114hot: 1115 %1 = invoke i32 @foo() 1116 to label %then unwind label %hotlp 1117 1118cold: 1119 %2 = invoke i32 @bar() 1120 to label %then unwind label %coldlp 1121 1122then: 1123 %3 = phi i32 [ %1, %hot ], [ %2, %cold ] 1124 ret i32 %3 1125 1126hotlp: 1127 %4 = landingpad { i8*, i32 } 1128 cleanup 1129 br label %lpret 1130 1131coldlp: 1132 %5 = landingpad { i8*, i32 } 1133 cleanup 1134 br label %lpret 1135 1136lpret: 1137 %6 = phi i32 [-1, %hotlp], [-2, %coldlp] 1138 %7 = add i32 %6, 42 1139 ret i32 %7 1140} 1141 1142!4 = !{!"branch_weights", i32 65536, i32 0} 1143 1144; Make sure that ehpad are scheduled from the least probable one 1145; to the most probable one. See selectBestCandidateBlock as to why. 1146declare void @clean(); 1147 1148define void @test_flow_unwind() personality i32 (...)* @pers { 1149; CHECK-LABEL: test_flow_unwind: 1150; CHECK: %entry 1151; CHECK: %then 1152; CHECK: %exit 1153; CHECK: %innerlp 1154; CHECK: %outerlp 1155; CHECK: %outercleanup 1156entry: 1157 %0 = invoke i32 @foo() 1158 to label %then unwind label %outerlp 1159 1160then: 1161 %1 = invoke i32 @bar() 1162 to label %exit unwind label %innerlp 1163 1164exit: 1165 ret void 1166 1167innerlp: 1168 %2 = landingpad { i8*, i32 } 1169 cleanup 1170 br label %innercleanup 1171 1172outerlp: 1173 %3 = landingpad { i8*, i32 } 1174 cleanup 1175 br label %outercleanup 1176 1177outercleanup: 1178 %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp] 1179 call void @clean() 1180 resume { i8*, i32 } %4 1181 1182innercleanup: 1183 call void @clean() 1184 br label %outercleanup 1185} 1186 1187declare void @hot_function() 1188 1189define void @test_hot_branch(i32* %a) { 1190; Test that a hot branch that has a probability a little larger than 80% will 1191; break CFG constrains when doing block placement. 1192; CHECK-LABEL: test_hot_branch: 1193; CHECK: %entry 1194; CHECK: %then 1195; CHECK: %exit 1196; CHECK: %else 1197 1198entry: 1199 %gep1 = getelementptr i32, i32* %a, i32 1 1200 %val1 = load i32, i32* %gep1 1201 %cond1 = icmp ugt i32 %val1, 1 1202 br i1 %cond1, label %then, label %else, !prof !5 1203 1204then: 1205 call void @hot_function() 1206 br label %exit 1207 1208else: 1209 call void @cold_function() 1210 br label %exit 1211 1212exit: 1213 call void @hot_function() 1214 ret void 1215} 1216 1217define void @test_hot_branch_profile(i32* %a) !prof !6 { 1218; Test that a hot branch that has a probability a little larger than 50% will 1219; break CFG constrains when doing block placement when profile is available. 1220; CHECK-LABEL: test_hot_branch_profile: 1221; CHECK: %entry 1222; CHECK: %then 1223; CHECK: %exit 1224; CHECK: %else 1225 1226entry: 1227 %gep1 = getelementptr i32, i32* %a, i32 1 1228 %val1 = load i32, i32* %gep1 1229 %cond1 = icmp ugt i32 %val1, 1 1230 br i1 %cond1, label %then, label %else, !prof !7 1231 1232then: 1233 call void @hot_function() 1234 br label %exit 1235 1236else: 1237 call void @cold_function() 1238 br label %exit 1239 1240exit: 1241 call void @hot_function() 1242 ret void 1243} 1244 1245define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 { 1246; Test that a hot branch that has a probability a little larger than 80% will 1247; break triangle shaped CFG constrains when doing block placement if profile 1248; is present. 1249; CHECK-LABEL: test_hot_branch_triangle_profile: 1250; CHECK: %entry 1251; CHECK: %exit 1252; CHECK: %then 1253 1254entry: 1255 %gep1 = getelementptr i32, i32* %a, i32 1 1256 %val1 = load i32, i32* %gep1 1257 %cond1 = icmp ugt i32 %val1, 1 1258 br i1 %cond1, label %exit, label %then, !prof !5 1259 1260then: 1261 call void @hot_function() 1262 br label %exit 1263 1264exit: 1265 call void @hot_function() 1266 ret void 1267} 1268 1269define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 { 1270; Test that a hot branch that has a probability between 50% and 66% will not 1271; break triangle shaped CFG constrains when doing block placement if profile 1272; is present. 1273; CHECK-LABEL: test_hot_branch_triangle_profile_topology: 1274; CHECK: %entry 1275; CHECK: %then 1276; CHECK: %exit 1277 1278entry: 1279 %gep1 = getelementptr i32, i32* %a, i32 1 1280 %val1 = load i32, i32* %gep1 1281 %cond1 = icmp ugt i32 %val1, 1 1282 br i1 %cond1, label %exit, label %then, !prof !7 1283 1284then: 1285 call void @hot_function() 1286 br label %exit 1287 1288exit: 1289 call void @hot_function() 1290 ret void 1291} 1292 1293declare void @a() 1294declare void @b() 1295 1296define void @test_forked_hot_diamond(i32* %a) { 1297; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1298; will not place the cold predecessor if the probability for the fallthrough 1299; remains above 80% 1300; CHECK-LABEL: test_forked_hot_diamond 1301; CHECK: %entry 1302; CHECK: %then 1303; CHECK: %fork1 1304; CHECK: %else 1305; CHECK: %fork2 1306; CHECK: %exit 1307entry: 1308 %gep1 = getelementptr i32, i32* %a, i32 1 1309 %val1 = load i32, i32* %gep1 1310 %cond1 = icmp ugt i32 %val1, 1 1311 br i1 %cond1, label %then, label %else, !prof !5 1312 1313then: 1314 call void @hot_function() 1315 %gep2 = getelementptr i32, i32* %a, i32 2 1316 %val2 = load i32, i32* %gep2 1317 %cond2 = icmp ugt i32 %val2, 2 1318 br i1 %cond2, label %fork1, label %fork2, !prof !8 1319 1320else: 1321 call void @cold_function() 1322 %gep3 = getelementptr i32, i32* %a, i32 3 1323 %val3 = load i32, i32* %gep3 1324 %cond3 = icmp ugt i32 %val3, 3 1325 br i1 %cond3, label %fork1, label %fork2, !prof !8 1326 1327fork1: 1328 call void @a() 1329 br label %exit 1330 1331fork2: 1332 call void @b() 1333 br label %exit 1334 1335exit: 1336 call void @hot_function() 1337 ret void 1338} 1339 1340define void @test_forked_hot_diamond_gets_cold(i32* %a) { 1341; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1342; will place the cold predecessor if the probability for the fallthrough 1343; falls below 80% 1344; The probability for both branches is 85%. For then2 vs else1 1345; this results in a compounded probability of 83%. 1346; Neither then2->fork1 nor then2->fork2 has a large enough relative 1347; probability to break the CFG. 1348; Relative probs: 1349; then2 -> fork1 vs else1 -> fork1 = 71% 1350; then2 -> fork2 vs else2 -> fork2 = 74% 1351; CHECK-LABEL: test_forked_hot_diamond_gets_cold 1352; CHECK: %entry 1353; CHECK: %then1 1354; CHECK: %then2 1355; CHECK: %else1 1356; CHECK: %fork1 1357; CHECK: %else2 1358; CHECK: %fork2 1359; CHECK: %exit 1360entry: 1361 %gep1 = getelementptr i32, i32* %a, i32 1 1362 %val1 = load i32, i32* %gep1 1363 %cond1 = icmp ugt i32 %val1, 1 1364 br i1 %cond1, label %then1, label %else1, !prof !9 1365 1366then1: 1367 call void @hot_function() 1368 %gep2 = getelementptr i32, i32* %a, i32 2 1369 %val2 = load i32, i32* %gep2 1370 %cond2 = icmp ugt i32 %val2, 2 1371 br i1 %cond2, label %then2, label %else2, !prof !9 1372 1373else1: 1374 call void @cold_function() 1375 br label %fork1 1376 1377then2: 1378 call void @hot_function() 1379 %gep3 = getelementptr i32, i32* %a, i32 3 1380 %val3 = load i32, i32* %gep2 1381 %cond3 = icmp ugt i32 %val2, 3 1382 br i1 %cond3, label %fork1, label %fork2, !prof !8 1383 1384else2: 1385 call void @cold_function() 1386 br label %fork2 1387 1388fork1: 1389 call void @a() 1390 br label %exit 1391 1392fork2: 1393 call void @b() 1394 br label %exit 1395 1396exit: 1397 call void @hot_function() 1398 ret void 1399} 1400 1401define void @test_forked_hot_diamond_stays_hot(i32* %a) { 1402; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50 1403; branch will not place the cold predecessor as the probability for the 1404; fallthrough stays above 80% 1405; (1:8) followed by (1:1) is still (1:4) 1406; Here we use 90% probability because two in a row 1407; have a 89 % probability vs the original branch. 1408; CHECK-LABEL: test_forked_hot_diamond_stays_hot 1409; CHECK: %entry 1410; CHECK: %then1 1411; CHECK: %then2 1412; CHECK: %fork1 1413; CHECK: %else1 1414; CHECK: %else2 1415; CHECK: %fork2 1416; CHECK: %exit 1417entry: 1418 %gep1 = getelementptr i32, i32* %a, i32 1 1419 %val1 = load i32, i32* %gep1 1420 %cond1 = icmp ugt i32 %val1, 1 1421 br i1 %cond1, label %then1, label %else1, !prof !10 1422 1423then1: 1424 call void @hot_function() 1425 %gep2 = getelementptr i32, i32* %a, i32 2 1426 %val2 = load i32, i32* %gep2 1427 %cond2 = icmp ugt i32 %val2, 2 1428 br i1 %cond2, label %then2, label %else2, !prof !10 1429 1430else1: 1431 call void @cold_function() 1432 br label %fork1 1433 1434then2: 1435 call void @hot_function() 1436 %gep3 = getelementptr i32, i32* %a, i32 3 1437 %val3 = load i32, i32* %gep2 1438 %cond3 = icmp ugt i32 %val2, 3 1439 br i1 %cond3, label %fork1, label %fork2, !prof !8 1440 1441else2: 1442 call void @cold_function() 1443 br label %fork2 1444 1445fork1: 1446 call void @a() 1447 br label %exit 1448 1449fork2: 1450 call void @b() 1451 br label %exit 1452 1453exit: 1454 call void @hot_function() 1455 ret void 1456} 1457 1458; Because %endif has a higher frequency than %if, the calculations show we 1459; shouldn't tail-duplicate %endif so that we can place it after %if. We were 1460; previously undercounting the cost by ignoring execution frequency that didn't 1461; come from the %if->%endif path. 1462; CHECK-LABEL: higher_frequency_succ_tail_dup 1463; CHECK: %entry 1464; CHECK: %elseif 1465; CHECK: %else 1466; CHECK: %endif 1467; CHECK: %then 1468; CHECK: %ret 1469define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) { 1470entry: 1471 br label %if 1472if: ; preds = %entry 1473 call void @effect(i32 0) 1474 br i1 %a, label %elseif, label %endif, !prof !11 ; even 1475 1476elseif: ; preds = %if 1477 call void @effect(i32 1) 1478 br i1 %b, label %else, label %endif, !prof !11 ; even 1479 1480else: ; preds = %elseif 1481 call void @effect(i32 2) 1482 br label %endif 1483 1484endif: ; preds = %if, %elseif, %else 1485 br i1 %c, label %then, label %ret, !prof !12 ; 5 to 3 1486 1487then: ; preds = %endif 1488 call void @effect(i32 3) 1489 br label %ret 1490 1491ret: ; preds = %endif, %then 1492 ret void 1493} 1494 1495define i32 @not_rotate_if_extra_branch(i32 %count) { 1496; Test checks that there is no loop rotation 1497; if it introduces extra branch. 1498; Specifically in this case because best exit is .header 1499; but it has fallthrough to .middle block and last block in 1500; loop chain .slow does not have afallthrough to .header. 1501; CHECK-LABEL: not_rotate_if_extra_branch 1502; CHECK: %.entry 1503; CHECK: %.header 1504; CHECK: %.middle 1505; CHECK: %.backedge 1506; CHECK: %.slow 1507; CHECK: %.bailout 1508; CHECK: %.stop 1509.entry: 1510 %sum.0 = shl nsw i32 %count, 1 1511 br label %.header 1512 1513.header: 1514 %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] 1515 %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] 1516 %is_exc = icmp sgt i32 %i, 9000000 1517 br i1 %is_exc, label %.bailout, label %.middle, !prof !13 1518 1519.bailout: 1520 %sum.2 = add nsw i32 %count, 1 1521 br label %.stop 1522 1523.middle: 1524 %pr.1 = and i32 %i, 1023 1525 %pr.2 = icmp eq i32 %pr.1, 0 1526 br i1 %pr.2, label %.slow, label %.backedge, !prof !14 1527 1528.slow: 1529 tail call void @effect(i32 %sum) 1530 br label %.backedge 1531 1532.backedge: 1533 %sum.1 = add nsw i32 %i, %sum 1534 %i.1 = add nsw i32 %i, 1 1535 %end = icmp slt i32 %i.1, %count 1536 br i1 %end, label %.header, label %.stop, !prof !15 1537 1538.stop: 1539 %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] 1540 ret i32 %sum.phi 1541} 1542 1543define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { 1544; This is a regression test against patch avoid loop rotation if 1545; it introduce an extra btanch. 1546; CHECK-LABEL: not_rotate_if_extra_branch_regression 1547; CHECK: %.entry 1548; CHECK: %.first_backedge 1549; CHECK: %.slow 1550; CHECK: %.second_header 1551.entry: 1552 %sum.0 = shl nsw i32 %count, 1 1553 br label %.first_header 1554 1555.first_header: 1556 %i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ] 1557 %is_bo1 = icmp sgt i32 %i, 9000000 1558 br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14 1559 1560.first_backedge: 1561 %i.1 = add nsw i32 %i, 1 1562 %end = icmp slt i32 %i.1, %count 1563 br i1 %end, label %.first_header, label %.second_header, !prof !13 1564 1565.second_header: 1566 %j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ] 1567 %end.2 = icmp sgt i32 %j, %count 1568 br i1 %end.2, label %.stop, label %.second_middle, !prof !14 1569 1570.second_middle: 1571 %is_slow = icmp sgt i32 %j, 9000000 1572 br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14 1573 1574.slow: 1575 tail call void @effect(i32 %j) 1576 br label %.second_backedge 1577 1578.second_backedge: 1579 %j.1 = add nsw i32 %j, 1 1580 %end.3 = icmp slt i32 %j, 10000000 1581 br i1 %end.3, label %.second_header, label %.stop, !prof !13 1582 1583.stop: 1584 %res = add nsw i32 %j, %i.1 1585 ret i32 %res 1586 1587.bailout: 1588 ret i32 0 1589} 1590 1591declare void @effect(i32) 1592 1593!5 = !{!"branch_weights", i32 84, i32 16} 1594!6 = !{!"function_entry_count", i32 10} 1595!7 = !{!"branch_weights", i32 60, i32 40} 1596!8 = !{!"branch_weights", i32 5001, i32 4999} 1597!9 = !{!"branch_weights", i32 85, i32 15} 1598!10 = !{!"branch_weights", i32 90, i32 10} 1599!11 = !{!"branch_weights", i32 1, i32 1} 1600!12 = !{!"branch_weights", i32 5, i32 3} 1601!13 = !{!"branch_weights", i32 1, i32 1} 1602!14 = !{!"branch_weights", i32 1, i32 1023} 1603!15 = !{!"branch_weights", i32 4095, i32 1} 1604