1; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE 2; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE 3; 4; Note: Lots of tests use inline asm instead of regular calls. 5; This allows to have a better control on what the allocation will do. 6; Otherwise, we may have spill right in the entry block, defeating 7; shrink-wrapping. Moreover, some of the inline asm statement (nop) 8; are here to ensure that the related paths do not end up as critical 9; edges. 10 11 12; Initial motivating example: Simple diamond with a call just on one side. 13; CHECK-LABEL: foo: 14; 15; Compare the arguments and return 16; No prologue needed. 17; ENABLE: cmpw 0, 3, 4 18; ENABLE-NEXT: bgelr 0 19; 20; Prologue code. 21; At a minimum, we save/restore the link register. Other registers may be saved 22; as well. 23; CHECK: mflr 24; 25; Compare the arguments and jump to exit. 26; After the prologue is set. 27; DISABLE: cmpw 0, 3, 4 28; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]] 29; 30; Store %a on the stack 31; CHECK: stw 3, {{[0-9]+([0-9]+)}} 32; Set the alloca address in the second argument. 33; CHECK-NEXT: addi 4, 1, {{[0-9]+}} 34; Set the first argument to zero. 35; CHECK-NEXT: li 3, 0 36; CHECK-NEXT: bl doSomething 37; 38; With shrink-wrapping, epilogue is just after the call. 39; Restore the link register and return. 40; Note that there could be other epilog code before the link register is 41; restored but we will not check for it here. 42; ENABLE: mtlr 43; ENABLE-NEXT: blr 44; 45; DISABLE: [[EXIT_LABEL]]: 46; 47; Without shrink-wrapping, epilogue is in the exit block. 48; Epilogue code. (What we pop does not matter.) 49; DISABLE: mtlr {{[0-9]+}} 50; DISABLE-NEXT: blr 51; 52 53define i32 @foo(i32 %a, i32 %b) { 54 %tmp = alloca i32, align 4 55 %tmp2 = icmp slt i32 %a, %b 56 br i1 %tmp2, label %true, label %false 57 58true: 59 store i32 %a, i32* %tmp, align 4 60 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) 61 br label %false 62 63false: 64 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 65 ret i32 %tmp.0 66} 67 68; Function Attrs: optsize 69declare i32 @doSomething(i32, i32*) 70 71 72 73; Check that we do not perform the restore inside the loop whereas the save 74; is outside. 75; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: 76; 77; Shrink-wrapping allows to skip the prologue in the else case. 78; ENABLE: cmplwi 0, 3, 0 79; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 80; 81; Prologue code. 82; Make sure we save the link register 83; CHECK: mflr {{[0-9]+}} 84; 85; DISABLE: cmplwi 0, 3, 0 86; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 87; 88; Loop preheader 89; CHECK-DAG: li [[SUM:[0-9]+]], 0 90; CHECK-DAG: li [[IV:[0-9]+]], 10 91; 92; Loop body 93; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body 94; CHECK: bl something 95; CHECK-DAG: addi [[IV]], [[IV]], -1 96; CHECK-DAG: add [[SUM]], 3, [[SUM]] 97; CHECK-NEXT: cmplwi [[IV]], 0 98; CHECK-NEXT: bne 0, .[[LOOP]] 99; 100; Next BB. 101; CHECK: slwi 3, [[SUM]], 3 102; 103; Jump to epilogue. 104; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] 105; 106; DISABLE: .[[ELSE_LABEL]]: # %if.else 107; Shift second argument by one and store into returned register. 108; DISABLE: slwi 3, 4, 1 109; DISABLE: .[[EPILOG_BB]]: # %if.end 110; 111; Epilogue code. 112; CHECK: mtlr {{[0-9]+}} 113; CHECK-NEXT: blr 114; 115; ENABLE: .[[ELSE_LABEL]]: # %if.else 116; Shift second argument by one and store into returned register. 117; ENABLE: slwi 3, 4, 1 118; ENABLE-NEXT: blr 119define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { 120entry: 121 %tobool = icmp eq i32 %cond, 0 122 br i1 %tobool, label %if.else, label %for.preheader 123 124for.preheader: 125 tail call void asm "nop", ""() 126 br label %for.body 127 128for.body: ; preds = %entry, %for.body 129 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 130 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 131 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 132 %add = add nsw i32 %call, %sum.04 133 %inc = add nuw nsw i32 %i.05, 1 134 %exitcond = icmp eq i32 %inc, 10 135 br i1 %exitcond, label %for.end, label %for.body 136 137for.end: ; preds = %for.body 138 %shl = shl i32 %add, 3 139 br label %if.end 140 141if.else: ; preds = %entry 142 %mul = shl nsw i32 %N, 1 143 br label %if.end 144 145if.end: ; preds = %if.else, %for.end 146 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 147 ret i32 %sum.1 148} 149 150declare i32 @something(...) 151 152; Check that we do not perform the shrink-wrapping inside the loop even 153; though that would be legal. The cost model must prevent that. 154; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: 155; Prologue code. 156; Make sure we save the link register before the call 157; CHECK: mflr {{[0-9]+}} 158; 159; Loop preheader 160; CHECK-DAG: li [[SUM:[0-9]+]], 0 161; CHECK-DAG: li [[IV:[0-9]+]], 10 162; 163; Loop body 164; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body 165; CHECK: bl something 166; CHECK-DAG: addi [[IV]], [[IV]], -1 167; CHECK-DAG: add [[SUM]], 3, [[SUM]] 168; CHECK-NEXT: cmplwi [[IV]], 0 169; CHECK-NEXT: bne 0, .[[LOOP]] 170; 171; Next BB 172; CHECK: %for.end 173; CHECK: mtlr {{[0-9]+}} 174; CHECK-NEXT: blr 175define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { 176entry: 177 br label %for.preheader 178 179for.preheader: 180 tail call void asm "nop", ""() 181 br label %for.body 182 183for.body: ; preds = %for.body, %entry 184 %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] 185 %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] 186 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 187 %add = add nsw i32 %call, %sum.03 188 %inc = add nuw nsw i32 %i.04, 1 189 %exitcond = icmp eq i32 %inc, 10 190 br i1 %exitcond, label %for.exit, label %for.body 191 192for.exit: 193 tail call void asm "nop", ""() 194 br label %for.end 195 196for.end: ; preds = %for.body 197 ret i32 %add 198} 199 200 201; Check with a more complex case that we do not have save within the loop and 202; restore outside. 203; CHECK-LABEL: loopInfoSaveOutsideLoop: 204; 205; ENABLE: cmplwi 0, 3, 0 206; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 207; 208; Prologue code. 209; Make sure we save the link register 210; CHECK: mflr {{[0-9]+}} 211; 212; DISABLE: cmplwi 0, 3, 0 213; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 214; 215; Loop preheader 216; CHECK-DAG: li [[SUM:[0-9]+]], 0 217; CHECK-DAG: li [[IV:[0-9]+]], 10 218; 219; Loop body 220; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body 221; CHECK: bl something 222; CHECK-DAG: addi [[IV]], [[IV]], -1 223; CHECK-DAG: add [[SUM]], 3, [[SUM]] 224; CHECK-NEXT: cmplwi [[IV]], 0 225; CHECK-NEXT: bne 0, .[[LOOP]] 226; 227; Next BB 228; CHECK: bl somethingElse 229; CHECK: slwi 3, [[SUM]], 3 230; 231; Jump to epilogue 232; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] 233; 234; DISABLE: .[[ELSE_LABEL]]: # %if.else 235; Shift second argument by one and store into returned register. 236; DISABLE: slwi 3, 4, 1 237; 238; DISABLE: .[[EPILOG_BB]]: # %if.end 239; Epilog code 240; CHECK: mtlr {{[0-9]+}} 241; CHECK-NEXT: blr 242; 243; ENABLE: .[[ELSE_LABEL]]: # %if.else 244; Shift second argument by one and store into returned register. 245; ENABLE: slwi 3, 4, 1 246; ENABLE-NEXT: blr 247define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { 248entry: 249 %tobool = icmp eq i32 %cond, 0 250 br i1 %tobool, label %if.else, label %for.preheader 251 252for.preheader: 253 tail call void asm "nop", ""() 254 br label %for.body 255 256for.body: ; preds = %entry, %for.body 257 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 258 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 259 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 260 %add = add nsw i32 %call, %sum.04 261 %inc = add nuw nsw i32 %i.05, 1 262 %exitcond = icmp eq i32 %inc, 10 263 br i1 %exitcond, label %for.end, label %for.body 264 265for.end: ; preds = %for.body 266 tail call void bitcast (void (...)* @somethingElse to void ()*)() 267 %shl = shl i32 %add, 3 268 br label %if.end 269 270if.else: ; preds = %entry 271 %mul = shl nsw i32 %N, 1 272 br label %if.end 273 274if.end: ; preds = %if.else, %for.end 275 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 276 ret i32 %sum.1 277} 278 279declare void @somethingElse(...) 280 281; Check with a more complex case that we do not have restore within the loop and 282; save outside. 283; CHECK-LABEL: loopInfoRestoreOutsideLoop: 284; 285; ENABLE: cmplwi 0, 3, 0 286; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 287; 288; Prologue code. 289; Make sure we save the link register 290; CHECK: mflr {{[0-9]+}} 291; 292; DISABLE: cmplwi 0, 3, 0 293; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 294; 295; CHECK: bl somethingElse 296; 297; Loop preheader 298; CHECK-DAG: li [[SUM:[0-9]+]], 0 299; CHECK-DAG: li [[IV:[0-9]+]], 10 300; 301; Loop body 302; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body 303; CHECK: bl something 304; CHECK-DAG: addi [[IV]], [[IV]], -1 305; CHECK-DAG: add [[SUM]], 3, [[SUM]] 306; CHECK-NEXT: cmplwi [[IV]], 0 307; CHECK-NEXT: bne 0, .[[LOOP]] 308; 309; Next BB. 310; slwi 3, [[SUM]], 3 311; 312; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] 313; 314; DISABLE: .[[ELSE_LABEL]]: # %if.else 315; Shift second argument by one and store into returned register. 316; DISABLE: slwi 3, 4, 1 317; DISABLE: .[[EPILOG_BB]]: # %if.end 318; 319; Epilogue code. 320; CHECK: mtlr {{[0-9]+}} 321; CHECK-NEXT: blr 322; 323; ENABLE: .[[ELSE_LABEL]]: # %if.else 324; Shift second argument by one and store into returned register. 325; ENABLE: slwi 3, 4, 1 326; ENABLE-NEXT: blr 327define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { 328entry: 329 %tobool = icmp eq i32 %cond, 0 330 br i1 %tobool, label %if.else, label %if.then 331 332if.then: ; preds = %entry 333 tail call void bitcast (void (...)* @somethingElse to void ()*)() 334 br label %for.body 335 336for.body: ; preds = %for.body, %if.then 337 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] 338 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 339 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 340 %add = add nsw i32 %call, %sum.04 341 %inc = add nuw nsw i32 %i.05, 1 342 %exitcond = icmp eq i32 %inc, 10 343 br i1 %exitcond, label %for.end, label %for.body 344 345for.end: ; preds = %for.body 346 %shl = shl i32 %add, 3 347 br label %if.end 348 349if.else: ; preds = %entry 350 %mul = shl nsw i32 %N, 1 351 br label %if.end 352 353if.end: ; preds = %if.else, %for.end 354 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 355 ret i32 %sum.1 356} 357 358; Check that we handle function with no frame information correctly. 359; CHECK-LABEL: emptyFrame: 360; CHECK: # %entry 361; CHECK-NEXT: li 3, 0 362; CHECK-NEXT: blr 363define i32 @emptyFrame() { 364entry: 365 ret i32 0 366} 367 368 369; Check that we handle inline asm correctly. 370; CHECK-LABEL: inlineAsm: 371; 372; ENABLE: cmplwi 0, 3, 0 373; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 374; 375; Prologue code. 376; Make sure we save the CSR used in the inline asm: r14 377; ENABLE-DAG: li [[IV:[0-9]+]], 10 378; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill 379; 380; DISABLE: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill 381; DISABLE: cmplwi 0, 3, 0 382; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 383; DISABLE: li [[IV:[0-9]+]], 10 384; 385; CHECK: nop 386; CHECK: mtctr [[IV]] 387; 388; CHECK: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body 389; Inline asm statement. 390; CHECK: addi 14, 14, 1 391; CHECK: bdnz .[[LOOP_LABEL]] 392; 393; Epilogue code. 394; CHECK: li 3, 0 395; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload 396; CHECK: nop 397; CHECK: blr 398; 399; CHECK: [[ELSE_LABEL]] 400; CHECK-NEXT: slwi 3, 4, 1 401; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload 402; CHECK-NEXT blr 403; 404define i32 @inlineAsm(i32 %cond, i32 %N) { 405entry: 406 %tobool = icmp eq i32 %cond, 0 407 br i1 %tobool, label %if.else, label %for.preheader 408 409for.preheader: 410 tail call void asm "nop", ""() 411 br label %for.body 412 413for.body: ; preds = %entry, %for.body 414 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 415 tail call void asm "addi 14, 14, 1", "~{r14}"() 416 %inc = add nuw nsw i32 %i.03, 1 417 %exitcond = icmp eq i32 %inc, 10 418 br i1 %exitcond, label %for.exit, label %for.body 419 420for.exit: 421 tail call void asm "nop", ""() 422 br label %if.end 423 424if.else: ; preds = %entry 425 %mul = shl nsw i32 %N, 1 426 br label %if.end 427 428if.end: ; preds = %for.body, %if.else 429 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] 430 ret i32 %sum.0 431} 432 433 434; Check that we handle calls to variadic functions correctly. 435; CHECK-LABEL: callVariadicFunc: 436; 437; ENABLE: cmplwi 0, 3, 0 438; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 439; 440; Prologue code. 441; CHECK: mflr {{[0-9]+}} 442; 443; DISABLE: cmplwi 0, 3, 0 444; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] 445; 446; Setup of the varags. 447; CHECK: mr 4, 3 448; CHECK-NEXT: mr 5, 3 449; CHECK-NEXT: mr 6, 3 450; CHECK-NEXT: mr 7, 3 451; CHECK-NEXT: mr 8, 3 452; CHECK-NEXT: mr 9, 3 453; CHECK-NEXT: bl someVariadicFunc 454; CHECK: slwi 3, 3, 3 455; DISABLE: b .[[EPILOGUE_BB:LBB[0-9_]+]] 456; 457; ENABLE: mtlr {{[0-9]+}} 458; ENABLE-NEXT: blr 459; 460; CHECK: .[[ELSE_LABEL]]: # %if.else 461; CHECK-NEXT: slwi 3, 4, 1 462; 463; DISABLE: .[[EPILOGUE_BB]]: # %if.end 464; DISABLE: mtlr 465; CHECK: blr 466define i32 @callVariadicFunc(i32 %cond, i32 %N) { 467entry: 468 %tobool = icmp eq i32 %cond, 0 469 br i1 %tobool, label %if.else, label %if.then 470 471if.then: ; preds = %entry 472 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) 473 %shl = shl i32 %call, 3 474 br label %if.end 475 476if.else: ; preds = %entry 477 %mul = shl nsw i32 %N, 1 478 br label %if.end 479 480if.end: ; preds = %if.else, %if.then 481 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] 482 ret i32 %sum.0 483} 484 485declare i32 @someVariadicFunc(i32, ...) 486 487 488 489; Make sure we do not insert unreachable code after noreturn function. 490; Although this is not incorrect to insert such code, it is useless 491; and it hurts the binary size. 492; 493; CHECK-LABEL: noreturn: 494; DISABLE: mflr {{[0-9]+}} 495; 496; CHECK: cmplwi 3, 0 497; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]] 498; 499; CHECK: li 3, 42 500; 501; DISABLE: mtlr {{[0-9]+}} 502; 503; CHECK-NEXT: blr 504; 505; CHECK: .[[ABORT]]: # %if.abort 506; 507; ENABLE: mflr {{[0-9]+}} 508; 509; CHECK: bl abort 510; ENABLE-NOT: mtlr {{[0-9]+}} 511define i32 @noreturn(i8 signext %bad_thing) { 512entry: 513 %tobool = icmp eq i8 %bad_thing, 0 514 br i1 %tobool, label %if.end, label %if.abort 515 516if.abort: 517 tail call void @abort() #0 518 unreachable 519 520if.end: 521 ret i32 42 522} 523 524declare void @abort() #0 525 526attributes #0 = { noreturn nounwind } 527 528 529; Make sure that we handle infinite loops properly When checking that the Save 530; and Restore blocks are control flow equivalent, the loop searches for the 531; immediate (post) dominator for the (restore) save blocks. When either the Save 532; or Restore block is located in an infinite loop the only immediate (post) 533; dominator is itself. In this case, we cannot perform shrink wrapping, but we 534; should return gracefully and continue compilation. 535; The only condition for this test is the compilation finishes correctly. 536; 537; CHECK-LABEL: infiniteloop 538; CHECK: blr 539define void @infiniteloop() { 540entry: 541 br i1 undef, label %if.then, label %if.end 542 543if.then: 544 %ptr = alloca i32, i32 4 545 br label %for.body 546 547for.body: ; preds = %for.body, %entry 548 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 549 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 550 %add = add nsw i32 %call, %sum.03 551 store i32 %add, i32* %ptr 552 br label %for.body 553 554if.end: 555 ret void 556} 557 558; Another infinite loop test this time with a body bigger than just one block. 559; CHECK-LABEL: infiniteloop2 560; CHECK: blr 561define void @infiniteloop2() { 562entry: 563 br i1 undef, label %if.then, label %if.end 564 565if.then: 566 %ptr = alloca i32, i32 4 567 br label %for.body 568 569for.body: ; preds = %for.body, %entry 570 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] 571 %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"() 572 %add = add nsw i32 %call, %sum.03 573 store i32 %add, i32* %ptr 574 br i1 undef, label %body1, label %body2 575 576body1: 577 tail call void asm sideeffect "nop", "~{r14}"() 578 br label %for.body 579 580body2: 581 tail call void asm sideeffect "nop", "~{r14}"() 582 br label %for.body 583 584if.end: 585 ret void 586} 587 588; Another infinite loop test this time with two nested infinite loop. 589; CHECK-LABEL: infiniteloop3 590; CHECK: # %end 591define void @infiniteloop3() { 592entry: 593 br i1 undef, label %loop2a, label %body 594 595body: ; preds = %entry 596 br i1 undef, label %loop2a, label %end 597 598loop1: ; preds = %loop2a, %loop2b 599 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 600 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 601 %0 = icmp eq i32* %var, null 602 %next.load = load i32*, i32** undef 603 br i1 %0, label %loop2a, label %loop2b 604 605loop2a: ; preds = %loop1, %body, %entry 606 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] 607 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] 608 br label %loop1 609 610loop2b: ; preds = %loop1 611 %gep1 = bitcast i32* %var.phi to i32* 612 %next.ptr = bitcast i32* %gep1 to i32** 613 store i32* %next.phi, i32** %next.ptr 614 br label %loop1 615 616end: 617 ret void 618} 619 620@columns = external global [0 x i32], align 4 621@lock = common global i32 0, align 4 622@htindex = common global i32 0, align 4 623@stride = common global i32 0, align 4 624@ht = common global i32* null, align 8 625@he = common global i8* null, align 8 626 627; Test for a bug that was caused when save point was equal to restore point. 628; Function Attrs: nounwind 629; CHECK-LABEL: transpose 630; 631; Store of callee-save register saved by shrink wrapping 632; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer! 633; CHECKXX: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill 634; 635; Reload of callee-save register 636; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload 637; 638; Ensure no subsequent uses of callee-save register before end of function 639; CHECK-NOT: {{[a-z]+}} [[CSR]] 640; CHECK: blr 641define signext i32 @transpose() { 642entry: 643 %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4 644 %shl.i = shl i32 %0, 7 645 %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4 646 %or.i = or i32 %shl.i, %1 647 %shl1.i = shl i32 %or.i, 7 648 %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4 649 %or2.i = or i32 %shl1.i, %2 650 %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4 651 %shl3.i = shl i32 %3, 7 652 %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4 653 %or4.i = or i32 %shl3.i, %4 654 %shl5.i = shl i32 %or4.i, 7 655 %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4 656 %or6.i = or i32 %shl5.i, %5 657 %cmp.i = icmp ugt i32 %or2.i, %or6.i 658 br i1 %cmp.i, label %cond.true.i, label %cond.false.i 659 660cond.true.i: 661 %shl7.i = shl i32 %or2.i, 7 662 %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4 663 %or8.i = or i32 %6, %shl7.i 664 %conv.i = zext i32 %or8.i to i64 665 %shl9.i = shl nuw nsw i64 %conv.i, 21 666 %conv10.i = zext i32 %or6.i to i64 667 %or11.i = or i64 %shl9.i, %conv10.i 668 br label %hash.exit 669 670cond.false.i: 671 %shl12.i = shl i32 %or6.i, 7 672 %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4 673 %or13.i = or i32 %7, %shl12.i 674 %conv14.i = zext i32 %or13.i to i64 675 %shl15.i = shl nuw nsw i64 %conv14.i, 21 676 %conv16.i = zext i32 %or2.i to i64 677 %or17.i = or i64 %shl15.i, %conv16.i 678 br label %hash.exit 679 680hash.exit: 681 %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ] 682 %shr.29.i = lshr i64 %cond.i, 17 683 %conv18.i = trunc i64 %shr.29.i to i32 684 store i32 %conv18.i, i32* @lock, align 4 685 %rem.i = srem i64 %cond.i, 1050011 686 %conv19.i = trunc i64 %rem.i to i32 687 store i32 %conv19.i, i32* @htindex, align 4 688 %rem20.i = urem i32 %conv18.i, 179 689 %add.i = or i32 %rem20.i, 131072 690 store i32 %add.i, i32* @stride, align 4 691 %8 = load i32*, i32** @ht, align 8 692 %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i 693 %9 = load i32, i32* %arrayidx, align 4 694 %cmp1 = icmp eq i32 %9, %conv18.i 695 br i1 %cmp1, label %if.then, label %if.end 696 697if.then: 698 %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ] 699 %10 = load i8*, i8** @he, align 8 700 %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa 701 %11 = load i8, i8* %arrayidx3, align 1 702 %conv = sext i8 %11 to i32 703 br label %cleanup 704 705if.end: 706 %add = add nsw i32 %add.i, %conv19.i 707 %cmp4 = icmp sgt i32 %add, 1050010 708 %sub = add nsw i32 %add, -1050011 709 %sub.add = select i1 %cmp4, i32 %sub, i32 %add 710 %idxprom.1 = sext i32 %sub.add to i64 711 %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1 712 %12 = load i32, i32* %arrayidx.1, align 4 713 %cmp1.1 = icmp eq i32 %12, %conv18.i 714 br i1 %cmp1.1, label %if.then, label %if.end.1 715 716cleanup: 717 %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ] 718 ret i32 %retval.0 719 720if.end.1: 721 %add.1 = add nsw i32 %add.i, %sub.add 722 %cmp4.1 = icmp sgt i32 %add.1, 1050010 723 %sub.1 = add nsw i32 %add.1, -1050011 724 %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1 725 %idxprom.2 = sext i32 %sub.add.1 to i64 726 %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2 727 %13 = load i32, i32* %arrayidx.2, align 4 728 %cmp1.2 = icmp eq i32 %13, %conv18.i 729 br i1 %cmp1.2, label %if.then, label %if.end.2 730 731if.end.2: 732 %add.2 = add nsw i32 %add.i, %sub.add.1 733 %cmp4.2 = icmp sgt i32 %add.2, 1050010 734 %sub.2 = add nsw i32 %add.2, -1050011 735 %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2 736 %idxprom.3 = sext i32 %sub.add.2 to i64 737 %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3 738 %14 = load i32, i32* %arrayidx.3, align 4 739 %cmp1.3 = icmp eq i32 %14, %conv18.i 740 br i1 %cmp1.3, label %if.then, label %if.end.3 741 742if.end.3: 743 %add.3 = add nsw i32 %add.i, %sub.add.2 744 %cmp4.3 = icmp sgt i32 %add.3, 1050010 745 %sub.3 = add nsw i32 %add.3, -1050011 746 %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3 747 %idxprom.4 = sext i32 %sub.add.3 to i64 748 %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4 749 %15 = load i32, i32* %arrayidx.4, align 4 750 %cmp1.4 = icmp eq i32 %15, %conv18.i 751 br i1 %cmp1.4, label %if.then, label %if.end.4 752 753if.end.4: 754 %add.4 = add nsw i32 %add.i, %sub.add.3 755 %cmp4.4 = icmp sgt i32 %add.4, 1050010 756 %sub.4 = add nsw i32 %add.4, -1050011 757 %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4 758 %idxprom.5 = sext i32 %sub.add.4 to i64 759 %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5 760 %16 = load i32, i32* %arrayidx.5, align 4 761 %cmp1.5 = icmp eq i32 %16, %conv18.i 762 br i1 %cmp1.5, label %if.then, label %if.end.5 763 764if.end.5: 765 %add.5 = add nsw i32 %add.i, %sub.add.4 766 %cmp4.5 = icmp sgt i32 %add.5, 1050010 767 %sub.5 = add nsw i32 %add.5, -1050011 768 %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5 769 %idxprom.6 = sext i32 %sub.add.5 to i64 770 %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6 771 %17 = load i32, i32* %arrayidx.6, align 4 772 %cmp1.6 = icmp eq i32 %17, %conv18.i 773 br i1 %cmp1.6, label %if.then, label %if.end.6 774 775if.end.6: 776 %add.6 = add nsw i32 %add.i, %sub.add.5 777 %cmp4.6 = icmp sgt i32 %add.6, 1050010 778 %sub.6 = add nsw i32 %add.6, -1050011 779 %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6 780 %idxprom.7 = sext i32 %sub.add.6 to i64 781 %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7 782 %18 = load i32, i32* %arrayidx.7, align 4 783 %cmp1.7 = icmp eq i32 %18, %conv18.i 784 br i1 %cmp1.7, label %if.then, label %cleanup 785} 786