1; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE 2; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE 3target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 4target triple = "arm64-apple-ios" 5 6 7; Initial motivating example: Simple diamond with a call just on one side. 8; CHECK-LABEL: foo: 9; 10; Compare the arguments and jump to exit. 11; No prologue needed. 12; ENABLE: cmp w0, w1 13; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] 14; 15; Prologue code. 16; CHECK: sub sp, sp, #32 17; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16] 18; CHECK-NEXT: add [[SAVE_SP]], sp, #16 19; 20; Compare the arguments and jump to exit. 21; After the prologue is set. 22; DISABLE: cmp w0, w1 23; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] 24; 25; Store %a in the alloca. 26; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4] 27; Set the alloca address in the second argument. 28; CHECK-NEXT: sub x1, [[SAVE_SP]], #4 29; Set the first argument to zero. 30; CHECK-NEXT: mov w0, wzr 31; CHECK-NEXT: bl _doSomething 32; 33; Without shrink-wrapping, epilogue is in the exit block. 34; DISABLE: [[EXIT_LABEL]]: 35; Epilogue code. 36; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16] 37; CHECK-NEXT: add sp, sp, #32 38; 39; With shrink-wrapping, exit block is a simple return. 40; ENABLE: [[EXIT_LABEL]]: 41; CHECK-NEXT: ret 42define i32 @foo(i32 %a, i32 %b) { 43 %tmp = alloca i32, align 4 44 %tmp2 = icmp slt i32 %a, %b 45 br i1 %tmp2, label %true, label %false 46 47true: 48 store i32 %a, i32* %tmp, align 4 49 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) 50 br label %false 51 52false: 53 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 54 ret i32 %tmp.0 55} 56 57; Function Attrs: optsize 58declare i32 @doSomething(i32, i32*) 59 60 61; Check that we do not perform the restore inside the loop whereas the save 62; is outside. 63; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: 64; 65; Shrink-wrapping allows to skip the prologue in the else case. 66; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 67; 68; Prologue code. 69; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! 70; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] 71; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 72; 73; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 74; 75; CHECK: mov [[SUM:w[0-9]+]], wzr 76; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 77; 78; Next BB. 79; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body 80; CHECK: bl _something 81; CHECK-NEXT: add [[SUM]], w0, [[SUM]] 82; CHECK-NEXT: sub [[IV]], [[IV]], #1 83; CHECK-NEXT: cbnz [[IV]], [[LOOP]] 84; 85; Next BB. 86; Copy SUM into the returned register + << 3. 87; CHECK: lsl w0, [[SUM]], #3 88; 89; Jump to epilogue. 90; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] 91; 92; DISABLE: [[ELSE_LABEL]]: ; %if.else 93; Shift second argument by one and store into returned register. 94; DISABLE: lsl w0, w1, #1 95; DISABLE: [[EPILOG_BB]]: ; %if.end 96; 97; Epilogue code. 98; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] 99; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 100; CHECK-NEXT: ret 101; 102; ENABLE: [[ELSE_LABEL]]: ; %if.else 103; Shift second argument by one and store into returned register. 104; ENABLE: lsl w0, w1, #1 105; ENABLE: ret 106define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { 107entry: 108 %tobool = icmp eq i32 %cond, 0 109 br i1 %tobool, label %if.else, label %for.body 110 111for.body: ; preds = %entry, %for.body 112 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 113 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] 114 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 115 %add = add nsw i32 %call, %sum.04 116 %inc = add nuw nsw i32 %i.05, 1 117 %exitcond = icmp eq i32 %inc, 10 118 br i1 %exitcond, label %for.end, label %for.body 119 120for.end: ; preds = %for.body 121 %shl = shl i32 %add, 3 122 br label %if.end 123 124if.else: ; preds = %entry 125 %mul = shl nsw i32 %N, 1 126 br label %if.end 127 128if.end: ; preds = %if.else, %for.end 129 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 130 ret i32 %sum.1 131} 132 133declare i32 @something(...) 134 135; Check that we do not perform the shrink-wrapping inside the loop even 136; though that would be legal. The cost model must prevent that. 137; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: 138; Prologue code. 139; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! 140; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] 141; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 142; CHECK: mov [[SUM:w[0-9]+]], wzr 143; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 144; Next BB. 145; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body 146; CHECK: bl _something 147; CHECK-NEXT: add [[SUM]], w0, [[SUM]] 148; CHECK-NEXT: sub [[IV]], [[IV]], #1 149; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] 150; Next BB. 151; CHECK: ; %for.end 152; CHECK: mov w0, [[SUM]] 153; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] 154; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 155; CHECK-NEXT: ret 156define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { 157entry: 158 br label %for.body 159 160for.body: ; preds = %for.body, %entry 161 %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 162 %sum.03 = phi i32 [ 0, %entry ], [ %add, %for.body ] 163 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 164 %add = add nsw i32 %call, %sum.03 165 %inc = add nuw nsw i32 %i.04, 1 166 %exitcond = icmp eq i32 %inc, 10 167 br i1 %exitcond, label %for.end, label %for.body 168 169for.end: ; preds = %for.body 170 ret i32 %add 171} 172 173; Check with a more complex case that we do not have save within the loop and 174; restore outside. 175; CHECK-LABEL: loopInfoSaveOutsideLoop: 176; 177; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 178; 179; Prologue code. 180; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! 181; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] 182; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 183; 184; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 185; 186; CHECK: mov [[SUM:w[0-9]+]], wzr 187; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 188; 189; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body 190; CHECK: bl _something 191; CHECK-NEXT: add [[SUM]], w0, [[SUM]] 192; CHECK-NEXT: sub [[IV]], [[IV]], #1 193; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] 194; Next BB. 195; CHECK: bl _somethingElse 196; CHECK-NEXT: lsl w0, [[SUM]], #3 197; 198; Jump to epilogue. 199; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] 200; 201; DISABLE: [[ELSE_LABEL]]: ; %if.else 202; Shift second argument by one and store into returned register. 203; DISABLE: lsl w0, w1, #1 204; DISABLE: [[EPILOG_BB]]: ; %if.end 205; Epilogue code. 206; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] 207; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 208; CHECK-NEXT: ret 209; 210; ENABLE: [[ELSE_LABEL]]: ; %if.else 211; Shift second argument by one and store into returned register. 212; ENABLE: lsl w0, w1, #1 213; ENABLE: ret 214define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { 215entry: 216 %tobool = icmp eq i32 %cond, 0 217 br i1 %tobool, label %if.else, label %for.body 218 219for.body: ; preds = %entry, %for.body 220 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 221 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] 222 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 223 %add = add nsw i32 %call, %sum.04 224 %inc = add nuw nsw i32 %i.05, 1 225 %exitcond = icmp eq i32 %inc, 10 226 br i1 %exitcond, label %for.end, label %for.body 227 228for.end: ; preds = %for.body 229 tail call void bitcast (void (...)* @somethingElse to void ()*)() 230 %shl = shl i32 %add, 3 231 br label %if.end 232 233if.else: ; preds = %entry 234 %mul = shl nsw i32 %N, 1 235 br label %if.end 236 237if.end: ; preds = %if.else, %for.end 238 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 239 ret i32 %sum.1 240} 241 242declare void @somethingElse(...) 243 244; Check with a more complex case that we do not have restore within the loop and 245; save outside. 246; CHECK-LABEL: loopInfoRestoreOutsideLoop: 247; 248; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 249; 250; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! 251; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] 252; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 253; 254; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 255; 256; CHECK: bl _somethingElse 257; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr 258; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 259; 260; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body 261; CHECK: bl _something 262; CHECK-NEXT: add [[SUM]], w0, [[SUM]] 263; CHECK-NEXT: sub [[IV]], [[IV]], #1 264; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] 265; Next BB. 266; CHECK: lsl w0, [[SUM]], #3 267; 268; Jump to epilogue. 269; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] 270; 271; DISABLE: [[ELSE_LABEL]]: ; %if.else 272; Shift second argument by one and store into returned register. 273; DISABLE: lsl w0, w1, #1 274; DISABLE: [[EPILOG_BB]]: ; %if.end 275; Epilogue code. 276; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] 277; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 278; CHECK-NEXT: ret 279; 280; ENABLE: [[ELSE_LABEL]]: ; %if.else 281; Shift second argument by one and store into returned register. 282; ENABLE: lsl w0, w1, #1 283; ENABLE: ret 284define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { 285entry: 286 %tobool = icmp eq i32 %cond, 0 287 br i1 %tobool, label %if.else, label %if.then 288 289if.then: ; preds = %entry 290 tail call void bitcast (void (...)* @somethingElse to void ()*)() 291 br label %for.body 292 293for.body: ; preds = %for.body, %if.then 294 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] 295 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 296 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 297 %add = add nsw i32 %call, %sum.04 298 %inc = add nuw nsw i32 %i.05, 1 299 %exitcond = icmp eq i32 %inc, 10 300 br i1 %exitcond, label %for.end, label %for.body 301 302for.end: ; preds = %for.body 303 %shl = shl i32 %add, 3 304 br label %if.end 305 306if.else: ; preds = %entry 307 %mul = shl nsw i32 %N, 1 308 br label %if.end 309 310if.end: ; preds = %if.else, %for.end 311 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 312 ret i32 %sum.1 313} 314 315; Check that we handle function with no frame information correctly. 316; CHECK-LABEL: emptyFrame: 317; CHECK: ; %entry 318; CHECK-NEXT: mov w0, wzr 319; CHECK-NEXT: ret 320define i32 @emptyFrame() { 321entry: 322 ret i32 0 323} 324 325; Check that we handle variadic function correctly. 326; CHECK-LABEL: variadicFunc: 327; 328; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 329; 330; Prologue code. 331; CHECK: sub sp, sp, #16 332; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 333; 334; Sum is merged with the returned register. 335; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16 336; CHECK-NEXT: str [[VA_BASE]], [sp, #8] 337; CHECK-NEXT: cmp w1, #1 338; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]] 339; CHECK: mov [[SUM:w0]], wzr 340; 341; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body 342; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8] 343; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8 344; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8] 345; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]] 346; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] 347; CHECK-NEXT: sub w1, w1, #1 348; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] 349; 350; DISABLE-NEXT: b 351; DISABLE: [[ELSE_LABEL]]: ; %if.else 352; DISABLE: lsl w0, w1, #1 353; 354; ENABLE: [[ELSE_LABEL]]: ; %if.else 355; ENABLE: lsl w0, w1, #1 356; ENABLE-NEXT: ret 357; 358; CHECK: [[IFEND_LABEL]]: 359; Epilogue code. 360; CHECK: add sp, sp, #16 361; CHECK-NEXT: ret 362define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { 363entry: 364 %ap = alloca i8*, align 8 365 %tobool = icmp eq i32 %cond, 0 366 br i1 %tobool, label %if.else, label %if.then 367 368if.then: ; preds = %entry 369 %ap1 = bitcast i8** %ap to i8* 370 call void @llvm.va_start(i8* %ap1) 371 %cmp6 = icmp sgt i32 %count, 0 372 br i1 %cmp6, label %for.body, label %for.end 373 374for.body: ; preds = %if.then, %for.body 375 %i.08 = phi i32 [ %inc, %for.body ], [ 0, %if.then ] 376 %sum.07 = phi i32 [ %add, %for.body ], [ 0, %if.then ] 377 %0 = va_arg i8** %ap, i32 378 %add = add nsw i32 %sum.07, %0 379 %inc = add nuw nsw i32 %i.08, 1 380 %exitcond = icmp eq i32 %inc, %count 381 br i1 %exitcond, label %for.end, label %for.body 382 383for.end: ; preds = %for.body, %if.then 384 %sum.0.lcssa = phi i32 [ 0, %if.then ], [ %add, %for.body ] 385 call void @llvm.va_end(i8* %ap1) 386 br label %if.end 387 388if.else: ; preds = %entry 389 %mul = shl nsw i32 %count, 1 390 br label %if.end 391 392if.end: ; preds = %if.else, %for.end 393 %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %mul, %if.else ] 394 ret i32 %sum.1 395} 396 397declare void @llvm.va_start(i8*) 398 399declare void @llvm.va_end(i8*) 400 401; Check that we handle inline asm correctly. 402; CHECK-LABEL: inlineAsm: 403; 404; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 405; 406; Prologue code. 407; Make sure we save the CSR used in the inline asm: x19. 408; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]! 409; 410; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 411; 412; CHECK: mov [[IV:w[0-9]+]], #10 413; 414; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body 415; Inline asm statement. 416; CHECK: add x19, x19, #1 417; CHECK: sub [[IV]], [[IV]], #1 418; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] 419; Next BB. 420; CHECK: mov w0, wzr 421; Epilogue code. 422; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 423; CHECK-NEXT: ret 424; Next BB. 425; CHECK: [[ELSE_LABEL]]: ; %if.else 426; CHECK-NEXT: lsl w0, w1, #1 427; Epilogue code. 428; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 429; CHECK-NEXT: ret 430define i32 @inlineAsm(i32 %cond, i32 %N) { 431entry: 432 %tobool = icmp eq i32 %cond, 0 433 br i1 %tobool, label %if.else, label %for.body 434 435for.body: ; preds = %entry, %for.body 436 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 437 tail call void asm sideeffect "add x19, x19, #1", "~{x19}"() 438 %inc = add nuw nsw i32 %i.03, 1 439 %exitcond = icmp eq i32 %inc, 10 440 br i1 %exitcond, label %if.end, label %for.body 441 442if.else: ; preds = %entry 443 %mul = shl nsw i32 %N, 1 444 br label %if.end 445 446if.end: ; preds = %for.body, %if.else 447 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.body ] 448 ret i32 %sum.0 449} 450 451; Check that we handle calls to variadic functions correctly. 452; CHECK-LABEL: callVariadicFunc: 453; 454; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 455; 456; Prologue code. 457; CHECK: sub sp, sp, #64 458; CHECK-NEXT: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #48] 459; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #48 460; 461; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] 462; Setup of the varags. 463; CHECK: stp x1, x1, [sp, #32] 464; CHECK-NEXT: stp x1, x1, [sp, #16] 465; CHECK-NEXT: stp x1, x1, [sp] 466; CHECK-NEXT: mov w0, w1 467; CHECK-NEXT: bl _someVariadicFunc 468; CHECK-NEXT: lsl w0, w0, #3 469; 470; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]] 471; DISABLE: [[ELSE_LABEL]]: ; %if.else 472; DISABLE-NEXT: lsl w0, w1, #1 473; DISABLE: [[IFEND_LABEL]]: ; %if.end 474; 475; Epilogue code. 476; CHECK: ldp [[CSR1]], [[CSR2]], [sp, #48] 477; CHECK-NEXT: add sp, sp, #64 478; CHECK-NEXT: ret 479; 480; ENABLE: [[ELSE_LABEL]]: ; %if.else 481; ENABLE-NEXT: lsl w0, w1, #1 482; ENABLE-NEXT: ret 483define i32 @callVariadicFunc(i32 %cond, i32 %N) { 484entry: 485 %tobool = icmp eq i32 %cond, 0 486 br i1 %tobool, label %if.else, label %if.then 487 488if.then: ; preds = %entry 489 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) 490 %shl = shl i32 %call, 3 491 br label %if.end 492 493if.else: ; preds = %entry 494 %mul = shl nsw i32 %N, 1 495 br label %if.end 496 497if.end: ; preds = %if.else, %if.then 498 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] 499 ret i32 %sum.0 500} 501 502declare i32 @someVariadicFunc(i32, ...) 503 504; Make sure we do not insert unreachable code after noreturn function. 505; Although this is not incorrect to insert such code, it is useless 506; and it hurts the binary size. 507; 508; CHECK-LABEL: noreturn: 509; DISABLE: stp 510; 511; CHECK: and [[TEST:w[0-9]+]], w0, #0xff 512; CHECK-NEXT: cbnz [[TEST]], [[ABORT:LBB[0-9_]+]] 513; 514; CHECK: mov w0, #42 515; 516; DISABLE-NEXT: ldp 517; 518; CHECK-NEXT: ret 519; 520; CHECK: [[ABORT]]: ; %if.abort 521; 522; ENABLE: stp 523; 524; CHECK: bl _abort 525; ENABLE-NOT: ldp 526define i32 @noreturn(i8 signext %bad_thing) { 527entry: 528 %tobool = icmp eq i8 %bad_thing, 0 529 br i1 %tobool, label %if.end, label %if.abort 530 531if.abort: 532 tail call void @abort() #0 533 unreachable 534 535if.end: 536 ret i32 42 537} 538 539declare void @abort() #0 540 541attributes #0 = { noreturn nounwind } 542 543; Make sure that we handle infinite loops properly When checking that the Save 544; and Restore blocks are control flow equivalent, the loop searches for the 545; immediate (post) dominator for the (restore) save blocks. When either the Save 546; or Restore block is located in an infinite loop the only immediate (post) 547; dominator is itself. In this case, we cannot perform shrink wrapping, but we 548; should return gracefully and continue compilation. 549; The only condition for this test is the compilation finishes correctly. 550; 551; CHECK-LABEL: infiniteloop 552; CHECK: ret 553define void @infiniteloop() { 554entry: 555 br i1 undef, label %if.then, label %if.end 556 557if.then: 558 %ptr = alloca i32, i32 4 559 br label %for.body 560 561for.body: ; preds = %for.body, %entry 562 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 563 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 564 %add = add nsw i32 %call, %sum.03 565 store i32 %add, i32* %ptr 566 br label %for.body 567 568if.end: 569 ret void 570} 571 572; Another infinite loop test this time with a body bigger than just one block. 573; CHECK-LABEL: infiniteloop2 574; CHECK: ret 575define void @infiniteloop2() { 576entry: 577 br i1 undef, label %if.then, label %if.end 578 579if.then: 580 %ptr = alloca i32, i32 4 581 br label %for.body 582 583for.body: ; preds = %for.body, %entry 584 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] 585 %call = tail call i32 asm "mov $0, #0", "=r,~{x19}"() 586 %add = add nsw i32 %call, %sum.03 587 store i32 %add, i32* %ptr 588 br i1 undef, label %body1, label %body2 589 590body1: 591 tail call void asm sideeffect "nop", "~{x19}"() 592 br label %for.body 593 594body2: 595 tail call void asm sideeffect "nop", "~{x19}"() 596 br label %for.body 597 598if.end: 599 ret void 600} 601 602; Another infinite loop test this time with two nested infinite loop. 603; CHECK-LABEL: infiniteloop3 604; CHECK: ret 605define void @infiniteloop3() { 606entry: 607 br i1 undef, label %loop2a, label %body 608 609body: ; preds = %entry 610 br i1 undef, label %loop2a, label %end 611 612loop1: ; preds = %loop2a, %loop2b 613 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 614 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 615 %0 = icmp eq i32* %var, null 616 %next.load = load i32*, i32** undef 617 br i1 %0, label %loop2a, label %loop2b 618 619loop2a: ; preds = %loop1, %body, %entry 620 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] 621 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] 622 br label %loop1 623 624loop2b: ; preds = %loop1 625 %gep1 = bitcast i32* %var.phi to i32* 626 %next.ptr = bitcast i32* %gep1 to i32** 627 store i32* %next.phi, i32** %next.ptr 628 br label %loop1 629 630end: 631 ret void 632} 633 634; Re-aligned stack pointer. See bug 26642. Avoid clobbering live 635; values in the prologue when re-aligning the stack pointer. 636; CHECK-LABEL: stack_realign: 637; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 638; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 639; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1 640; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0 641; CHECK: stp x29, x30, [sp, #-16]! 642; CHECK: mov x29, sp 643; ENABLE-NOT: sub x[[LSL1]], sp, #16 644; ENABLE-NOT: sub x[[LSL2]], sp, #16 645; DISABLE: sub x{{[0-9]+}}, sp, #16 646; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 647; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 648; CHECK-DAG: str w[[LSL1]], 649; CHECK-DAG: str w[[LSL2]], 650 651define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) { 652 %tmp = alloca i32, align 32 653 %shl1 = shl i32 %a, %b 654 %shl2 = shl i32 %b, %a 655 %tmp2 = icmp slt i32 %a, %b 656 br i1 %tmp2, label %true, label %false 657 658true: 659 store i32 %a, i32* %tmp, align 4 660 %tmp4 = load i32, i32* %tmp 661 br label %false 662 663false: 664 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 665 store i32 %shl1, i32* %ptr1 666 store i32 %shl2, i32* %ptr2 667 ret i32 %tmp.0 668} 669 670; Re-aligned stack pointer with all caller-save regs live. See bug 671; 26642. In this case we currently avoid shrink wrapping because 672; ensuring we have a scratch register to re-align the stack pointer is 673; too complicated. Output should be the same for both enabled and 674; disabled shrink wrapping. 675; CHECK-LABEL: stack_realign2: 676; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]! 677; CHECK: add x29, sp, #{{[0-9]+}} 678; CHECK: lsl {{w[0-9]+}}, w0, w1 679 680define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) { 681 %tmp = alloca i32, align 32 682 %tmp1 = shl i32 %a, %b 683 %tmp2 = shl i32 %b, %a 684 %tmp3 = lshr i32 %a, %b 685 %tmp4 = lshr i32 %b, %a 686 %tmp5 = add i32 %b, %a 687 %tmp6 = sub i32 %b, %a 688 %tmp7 = add i32 %tmp1, %tmp2 689 %tmp8 = sub i32 %tmp2, %tmp3 690 %tmp9 = add i32 %tmp3, %tmp4 691 %tmp10 = add i32 %tmp4, %tmp5 692 %cmp = icmp slt i32 %a, %b 693 br i1 %cmp, label %true, label %false 694 695true: 696 store i32 %a, i32* %tmp, align 4 697 call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind 698 br label %false 699 700false: 701 store i32 %tmp1, i32* %ptr1, align 4 702 store i32 %tmp2, i32* %ptr2, align 4 703 store i32 %tmp3, i32* %ptr3, align 4 704 store i32 %tmp4, i32* %ptr4, align 4 705 store i32 %tmp5, i32* %ptr5, align 4 706 store i32 %tmp6, i32* %ptr6, align 4 707 %idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1 708 store i32 %a, i32* %idx1, align 4 709 %idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2 710 store i32 %b, i32* %idx2, align 4 711 %idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3 712 store i32 %tmp7, i32* %idx3, align 4 713 %idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4 714 store i32 %tmp8, i32* %idx4, align 4 715 %idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5 716 store i32 %tmp9, i32* %idx5, align 4 717 %idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6 718 store i32 %tmp10, i32* %idx6, align 4 719 720 ret void 721} 722