1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -debugify-and-strip-all-safe %s -o - -mtriple=arm64-apple-ios -enable-shrink-wrap=true -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=ENABLE 3; RUN: llc -debugify-and-strip-all-safe %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=DISABLE 4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 5target triple = "arm64-apple-ios" 6 7 8; Initial motivating example: Simple diamond with a call just on one side. 9define i32 @foo(i32 %a, i32 %b) { 10; ENABLE-LABEL: foo: 11; ENABLE: ; %bb.0: 12; ENABLE-NEXT: cmp w0, w1 13; ENABLE-NEXT: b.ge LBB0_2 14; ENABLE-NEXT: ; %bb.1: ; %true 15; ENABLE-NEXT: sub sp, sp, #32 ; =32 16; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 17; ENABLE-NEXT: add x29, sp, #16 ; =16 18; ENABLE-NEXT: .cfi_def_cfa w29, 16 19; ENABLE-NEXT: .cfi_offset w30, -8 20; ENABLE-NEXT: .cfi_offset w29, -16 21; ENABLE-NEXT: stur w0, [x29, #-4] 22; ENABLE-NEXT: sub x1, x29, #4 ; =4 23; ENABLE-NEXT: mov w0, wzr 24; ENABLE-NEXT: bl _doSomething 25; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 26; ENABLE-NEXT: add sp, sp, #32 ; =32 27; ENABLE-NEXT: LBB0_2: ; %false 28; ENABLE-NEXT: ret 29; 30; DISABLE-LABEL: foo: 31; DISABLE: ; %bb.0: 32; DISABLE-NEXT: sub sp, sp, #32 ; =32 33; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 34; DISABLE-NEXT: add x29, sp, #16 ; =16 35; DISABLE-NEXT: .cfi_def_cfa w29, 16 36; DISABLE-NEXT: .cfi_offset w30, -8 37; DISABLE-NEXT: .cfi_offset w29, -16 38; DISABLE-NEXT: cmp w0, w1 39; DISABLE-NEXT: b.ge LBB0_2 40; DISABLE-NEXT: ; %bb.1: ; %true 41; DISABLE-NEXT: stur w0, [x29, #-4] 42; DISABLE-NEXT: sub x1, x29, #4 ; =4 43; DISABLE-NEXT: mov w0, wzr 44; DISABLE-NEXT: bl _doSomething 45; DISABLE-NEXT: LBB0_2: ; %false 46; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 47; DISABLE-NEXT: add sp, sp, #32 ; =32 48; DISABLE-NEXT: ret 49 %tmp = alloca i32, align 4 50 %tmp2 = icmp slt i32 %a, %b 51 br i1 %tmp2, label %true, label %false 52 53true: 54 store i32 %a, i32* %tmp, align 4 55 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) 56 br label %false 57 58false: 59 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 60 ret i32 %tmp.0 61} 62 63; Function Attrs: optsize 64declare i32 @doSomething(i32, i32*) 65 66 67; Check that we do not perform the restore inside the loop whereas the save 68; is outside. 69define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { 70; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: 71; ENABLE: ; %bb.0: ; %entry 72; ENABLE-NEXT: cbz w0, LBB1_4 73; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader 74; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 75; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 76; ENABLE-NEXT: add x29, sp, #16 ; =16 77; ENABLE-NEXT: .cfi_def_cfa w29, 16 78; ENABLE-NEXT: .cfi_offset w30, -8 79; ENABLE-NEXT: .cfi_offset w29, -16 80; ENABLE-NEXT: .cfi_offset w19, -24 81; ENABLE-NEXT: .cfi_offset w20, -32 82; ENABLE-NEXT: mov w19, wzr 83; ENABLE-NEXT: mov w20, #10 84; ENABLE-NEXT: LBB1_2: ; %for.body 85; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 86; ENABLE-NEXT: bl _something 87; ENABLE-NEXT: subs w20, w20, #1 ; =1 88; ENABLE-NEXT: add w19, w0, w19 89; ENABLE-NEXT: b.ne LBB1_2 90; ENABLE-NEXT: ; %bb.3: ; %for.end 91; ENABLE-NEXT: lsl w0, w19, #3 92; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 93; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 94; ENABLE-NEXT: ret 95; ENABLE-NEXT: LBB1_4: ; %if.else 96; ENABLE-NEXT: lsl w0, w1, #1 97; ENABLE-NEXT: ret 98; 99; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: 100; DISABLE: ; %bb.0: ; %entry 101; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 102; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 103; DISABLE-NEXT: add x29, sp, #16 ; =16 104; DISABLE-NEXT: .cfi_def_cfa w29, 16 105; DISABLE-NEXT: .cfi_offset w30, -8 106; DISABLE-NEXT: .cfi_offset w29, -16 107; DISABLE-NEXT: .cfi_offset w19, -24 108; DISABLE-NEXT: .cfi_offset w20, -32 109; DISABLE-NEXT: cbz w0, LBB1_4 110; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader 111; DISABLE-NEXT: mov w19, wzr 112; DISABLE-NEXT: mov w20, #10 113; DISABLE-NEXT: LBB1_2: ; %for.body 114; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 115; DISABLE-NEXT: bl _something 116; DISABLE-NEXT: subs w20, w20, #1 ; =1 117; DISABLE-NEXT: add w19, w0, w19 118; DISABLE-NEXT: b.ne LBB1_2 119; DISABLE-NEXT: ; %bb.3: ; %for.end 120; DISABLE-NEXT: lsl w0, w19, #3 121; DISABLE-NEXT: b LBB1_5 122; DISABLE-NEXT: LBB1_4: ; %if.else 123; DISABLE-NEXT: lsl w0, w1, #1 124; DISABLE-NEXT: LBB1_5: ; %if.end 125; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 126; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 127; DISABLE-NEXT: ret 128entry: 129 %tobool = icmp eq i32 %cond, 0 130 br i1 %tobool, label %if.else, label %for.body 131 132for.body: ; preds = %entry, %for.body 133 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 134 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] 135 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 136 %add = add nsw i32 %call, %sum.04 137 %inc = add nuw nsw i32 %i.05, 1 138 %exitcond = icmp eq i32 %inc, 10 139 br i1 %exitcond, label %for.end, label %for.body 140 141for.end: ; preds = %for.body 142 %shl = shl i32 %add, 3 143 br label %if.end 144 145if.else: ; preds = %entry 146 %mul = shl nsw i32 %N, 1 147 br label %if.end 148 149if.end: ; preds = %if.else, %for.end 150 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 151 ret i32 %sum.1 152} 153 154declare i32 @something(...) 155 156; Check that we do not perform the shrink-wrapping inside the loop even 157; though that would be legal. The cost model must prevent that. 158define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { 159; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: 160; ENABLE: ; %bb.0: ; %entry 161; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 162; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 163; ENABLE-NEXT: add x29, sp, #16 ; =16 164; ENABLE-NEXT: .cfi_def_cfa w29, 16 165; ENABLE-NEXT: .cfi_offset w30, -8 166; ENABLE-NEXT: .cfi_offset w29, -16 167; ENABLE-NEXT: .cfi_offset w19, -24 168; ENABLE-NEXT: .cfi_offset w20, -32 169; ENABLE-NEXT: mov w19, wzr 170; ENABLE-NEXT: mov w20, #10 171; ENABLE-NEXT: LBB2_1: ; %for.body 172; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 173; ENABLE-NEXT: bl _something 174; ENABLE-NEXT: subs w20, w20, #1 ; =1 175; ENABLE-NEXT: add w19, w0, w19 176; ENABLE-NEXT: b.ne LBB2_1 177; ENABLE-NEXT: ; %bb.2: ; %for.end 178; ENABLE-NEXT: mov w0, w19 179; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 180; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 181; ENABLE-NEXT: ret 182; 183; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: 184; DISABLE: ; %bb.0: ; %entry 185; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 186; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 187; DISABLE-NEXT: add x29, sp, #16 ; =16 188; DISABLE-NEXT: .cfi_def_cfa w29, 16 189; DISABLE-NEXT: .cfi_offset w30, -8 190; DISABLE-NEXT: .cfi_offset w29, -16 191; DISABLE-NEXT: .cfi_offset w19, -24 192; DISABLE-NEXT: .cfi_offset w20, -32 193; DISABLE-NEXT: mov w19, wzr 194; DISABLE-NEXT: mov w20, #10 195; DISABLE-NEXT: LBB2_1: ; %for.body 196; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 197; DISABLE-NEXT: bl _something 198; DISABLE-NEXT: subs w20, w20, #1 ; =1 199; DISABLE-NEXT: add w19, w0, w19 200; DISABLE-NEXT: b.ne LBB2_1 201; DISABLE-NEXT: ; %bb.2: ; %for.end 202; DISABLE-NEXT: mov w0, w19 203; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 204; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 205; DISABLE-NEXT: ret 206entry: 207 br label %for.body 208 209for.body: ; preds = %for.body, %entry 210 %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 211 %sum.03 = phi i32 [ 0, %entry ], [ %add, %for.body ] 212 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 213 %add = add nsw i32 %call, %sum.03 214 %inc = add nuw nsw i32 %i.04, 1 215 %exitcond = icmp eq i32 %inc, 10 216 br i1 %exitcond, label %for.end, label %for.body 217 218for.end: ; preds = %for.body 219 ret i32 %add 220} 221 222; Check with a more complex case that we do not have save within the loop and 223; restore outside. 224define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { 225; ENABLE-LABEL: loopInfoSaveOutsideLoop: 226; ENABLE: ; %bb.0: ; %entry 227; ENABLE-NEXT: cbz w0, LBB3_4 228; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader 229; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 230; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 231; ENABLE-NEXT: add x29, sp, #16 ; =16 232; ENABLE-NEXT: .cfi_def_cfa w29, 16 233; ENABLE-NEXT: .cfi_offset w30, -8 234; ENABLE-NEXT: .cfi_offset w29, -16 235; ENABLE-NEXT: .cfi_offset w19, -24 236; ENABLE-NEXT: .cfi_offset w20, -32 237; ENABLE-NEXT: mov w19, wzr 238; ENABLE-NEXT: mov w20, #10 239; ENABLE-NEXT: LBB3_2: ; %for.body 240; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 241; ENABLE-NEXT: bl _something 242; ENABLE-NEXT: subs w20, w20, #1 ; =1 243; ENABLE-NEXT: add w19, w0, w19 244; ENABLE-NEXT: b.ne LBB3_2 245; ENABLE-NEXT: ; %bb.3: ; %for.end 246; ENABLE-NEXT: bl _somethingElse 247; ENABLE-NEXT: lsl w0, w19, #3 248; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 249; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 250; ENABLE-NEXT: ret 251; ENABLE-NEXT: LBB3_4: ; %if.else 252; ENABLE-NEXT: lsl w0, w1, #1 253; ENABLE-NEXT: ret 254; 255; DISABLE-LABEL: loopInfoSaveOutsideLoop: 256; DISABLE: ; %bb.0: ; %entry 257; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 258; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 259; DISABLE-NEXT: add x29, sp, #16 ; =16 260; DISABLE-NEXT: .cfi_def_cfa w29, 16 261; DISABLE-NEXT: .cfi_offset w30, -8 262; DISABLE-NEXT: .cfi_offset w29, -16 263; DISABLE-NEXT: .cfi_offset w19, -24 264; DISABLE-NEXT: .cfi_offset w20, -32 265; DISABLE-NEXT: cbz w0, LBB3_4 266; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader 267; DISABLE-NEXT: mov w19, wzr 268; DISABLE-NEXT: mov w20, #10 269; DISABLE-NEXT: LBB3_2: ; %for.body 270; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 271; DISABLE-NEXT: bl _something 272; DISABLE-NEXT: subs w20, w20, #1 ; =1 273; DISABLE-NEXT: add w19, w0, w19 274; DISABLE-NEXT: b.ne LBB3_2 275; DISABLE-NEXT: ; %bb.3: ; %for.end 276; DISABLE-NEXT: bl _somethingElse 277; DISABLE-NEXT: lsl w0, w19, #3 278; DISABLE-NEXT: b LBB3_5 279; DISABLE-NEXT: LBB3_4: ; %if.else 280; DISABLE-NEXT: lsl w0, w1, #1 281; DISABLE-NEXT: LBB3_5: ; %if.end 282; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 283; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 284; DISABLE-NEXT: ret 285entry: 286 %tobool = icmp eq i32 %cond, 0 287 br i1 %tobool, label %if.else, label %for.body 288 289for.body: ; preds = %entry, %for.body 290 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 291 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] 292 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 293 %add = add nsw i32 %call, %sum.04 294 %inc = add nuw nsw i32 %i.05, 1 295 %exitcond = icmp eq i32 %inc, 10 296 br i1 %exitcond, label %for.end, label %for.body 297 298for.end: ; preds = %for.body 299 tail call void bitcast (void (...)* @somethingElse to void ()*)() 300 %shl = shl i32 %add, 3 301 br label %if.end 302 303if.else: ; preds = %entry 304 %mul = shl nsw i32 %N, 1 305 br label %if.end 306 307if.end: ; preds = %if.else, %for.end 308 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 309 ret i32 %sum.1 310} 311 312declare void @somethingElse(...) 313 314; Check with a more complex case that we do not have restore within the loop and 315; save outside. 316define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { 317; ENABLE-LABEL: loopInfoRestoreOutsideLoop: 318; ENABLE: ; %bb.0: ; %entry 319; ENABLE-NEXT: cbz w0, LBB4_4 320; ENABLE-NEXT: ; %bb.1: ; %if.then 321; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 322; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 323; ENABLE-NEXT: add x29, sp, #16 ; =16 324; ENABLE-NEXT: bl _somethingElse 325; ENABLE-NEXT: mov w19, wzr 326; ENABLE-NEXT: mov w20, #10 327; ENABLE-NEXT: LBB4_2: ; %for.body 328; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 329; ENABLE-NEXT: bl _something 330; ENABLE-NEXT: subs w20, w20, #1 ; =1 331; ENABLE-NEXT: add w19, w0, w19 332; ENABLE-NEXT: b.ne LBB4_2 333; ENABLE-NEXT: ; %bb.3: ; %for.end 334; ENABLE-NEXT: lsl w0, w19, #3 335; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 336; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 337; ENABLE-NEXT: ret 338; ENABLE-NEXT: LBB4_4: ; %if.else 339; ENABLE-NEXT: lsl w0, w1, #1 340; ENABLE-NEXT: ret 341; 342; DISABLE-LABEL: loopInfoRestoreOutsideLoop: 343; DISABLE: ; %bb.0: ; %entry 344; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 345; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 346; DISABLE-NEXT: add x29, sp, #16 ; =16 347; DISABLE-NEXT: cbz w0, LBB4_4 348; DISABLE-NEXT: ; %bb.1: ; %if.then 349; DISABLE-NEXT: bl _somethingElse 350; DISABLE-NEXT: mov w19, wzr 351; DISABLE-NEXT: mov w20, #10 352; DISABLE-NEXT: LBB4_2: ; %for.body 353; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 354; DISABLE-NEXT: bl _something 355; DISABLE-NEXT: subs w20, w20, #1 ; =1 356; DISABLE-NEXT: add w19, w0, w19 357; DISABLE-NEXT: b.ne LBB4_2 358; DISABLE-NEXT: ; %bb.3: ; %for.end 359; DISABLE-NEXT: lsl w0, w19, #3 360; DISABLE-NEXT: b LBB4_5 361; DISABLE-NEXT: LBB4_4: ; %if.else 362; DISABLE-NEXT: lsl w0, w1, #1 363; DISABLE-NEXT: LBB4_5: ; %if.end 364; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 365; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 366; DISABLE-NEXT: ret 367entry: 368 %tobool = icmp eq i32 %cond, 0 369 br i1 %tobool, label %if.else, label %if.then 370 371if.then: ; preds = %entry 372 tail call void bitcast (void (...)* @somethingElse to void ()*)() 373 br label %for.body 374 375for.body: ; preds = %for.body, %if.then 376 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] 377 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 378 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 379 %add = add nsw i32 %call, %sum.04 380 %inc = add nuw nsw i32 %i.05, 1 381 %exitcond = icmp eq i32 %inc, 10 382 br i1 %exitcond, label %for.end, label %for.body 383 384for.end: ; preds = %for.body 385 %shl = shl i32 %add, 3 386 br label %if.end 387 388if.else: ; preds = %entry 389 %mul = shl nsw i32 %N, 1 390 br label %if.end 391 392if.end: ; preds = %if.else, %for.end 393 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 394 ret i32 %sum.1 395} 396 397; Check that we handle function with no frame information correctly. 398define i32 @emptyFrame() { 399; ENABLE-LABEL: emptyFrame: 400; ENABLE: ; %bb.0: ; %entry 401; ENABLE-NEXT: mov w0, wzr 402; ENABLE-NEXT: ret 403; 404; DISABLE-LABEL: emptyFrame: 405; DISABLE: ; %bb.0: ; %entry 406; DISABLE-NEXT: mov w0, wzr 407; DISABLE-NEXT: ret 408entry: 409 ret i32 0 410} 411 412; Check that we handle variadic function correctly. 413define i32 @variadicFunc(i32 %cond, i32 %count, ...) nounwind { 414; ENABLE-LABEL: variadicFunc: 415; ENABLE: ; %bb.0: ; %entry 416; ENABLE-NEXT: cbz w0, LBB6_4 417; ENABLE-NEXT: ; %bb.1: ; %if.then 418; ENABLE-NEXT: sub sp, sp, #16 ; =16 419; ENABLE-NEXT: add x8, sp, #16 ; =16 420; ENABLE-NEXT: cmp w1, #1 ; =1 421; ENABLE-NEXT: str x8, [sp, #8] 422; ENABLE-NEXT: mov w0, wzr 423; ENABLE-NEXT: b.lt LBB6_3 424; ENABLE-NEXT: LBB6_2: ; %for.body 425; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 426; ENABLE-NEXT: ldr x8, [sp, #8] 427; ENABLE-NEXT: add x9, x8, #8 ; =8 428; ENABLE-NEXT: str x9, [sp, #8] 429; ENABLE-NEXT: ldr w8, [x8] 430; ENABLE-NEXT: subs w1, w1, #1 ; =1 431; ENABLE-NEXT: add w0, w0, w8 432; ENABLE-NEXT: b.ne LBB6_2 433; ENABLE-NEXT: LBB6_3: ; %for.end 434; ENABLE-NEXT: add sp, sp, #16 ; =16 435; ENABLE-NEXT: ret 436; ENABLE-NEXT: LBB6_4: ; %if.else 437; ENABLE-NEXT: lsl w0, w1, #1 438; ENABLE-NEXT: ret 439; 440; DISABLE-LABEL: variadicFunc: 441; DISABLE: ; %bb.0: ; %entry 442; DISABLE-NEXT: sub sp, sp, #16 ; =16 443; DISABLE-NEXT: cbz w0, LBB6_4 444; DISABLE-NEXT: ; %bb.1: ; %if.then 445; DISABLE-NEXT: add x8, sp, #16 ; =16 446; DISABLE-NEXT: cmp w1, #1 ; =1 447; DISABLE-NEXT: str x8, [sp, #8] 448; DISABLE-NEXT: mov w0, wzr 449; DISABLE-NEXT: b.lt LBB6_3 450; DISABLE-NEXT: LBB6_2: ; %for.body 451; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 452; DISABLE-NEXT: ldr x8, [sp, #8] 453; DISABLE-NEXT: add x9, x8, #8 ; =8 454; DISABLE-NEXT: str x9, [sp, #8] 455; DISABLE-NEXT: ldr w8, [x8] 456; DISABLE-NEXT: subs w1, w1, #1 ; =1 457; DISABLE-NEXT: add w0, w0, w8 458; DISABLE-NEXT: b.ne LBB6_2 459; DISABLE-NEXT: LBB6_3: ; %if.end 460; DISABLE-NEXT: add sp, sp, #16 ; =16 461; DISABLE-NEXT: ret 462; DISABLE-NEXT: LBB6_4: ; %if.else 463; DISABLE-NEXT: lsl w0, w1, #1 464; DISABLE-NEXT: add sp, sp, #16 ; =16 465; DISABLE-NEXT: ret 466entry: 467 %ap = alloca i8*, align 8 468 %tobool = icmp eq i32 %cond, 0 469 br i1 %tobool, label %if.else, label %if.then 470 471if.then: ; preds = %entry 472 %ap1 = bitcast i8** %ap to i8* 473 call void @llvm.va_start(i8* %ap1) 474 %cmp6 = icmp sgt i32 %count, 0 475 br i1 %cmp6, label %for.body, label %for.end 476 477for.body: ; preds = %if.then, %for.body 478 %i.08 = phi i32 [ %inc, %for.body ], [ 0, %if.then ] 479 %sum.07 = phi i32 [ %add, %for.body ], [ 0, %if.then ] 480 %0 = va_arg i8** %ap, i32 481 %add = add nsw i32 %sum.07, %0 482 %inc = add nuw nsw i32 %i.08, 1 483 %exitcond = icmp eq i32 %inc, %count 484 br i1 %exitcond, label %for.end, label %for.body 485 486for.end: ; preds = %for.body, %if.then 487 %sum.0.lcssa = phi i32 [ 0, %if.then ], [ %add, %for.body ] 488 call void @llvm.va_end(i8* %ap1) 489 br label %if.end 490 491if.else: ; preds = %entry 492 %mul = shl nsw i32 %count, 1 493 br label %if.end 494 495if.end: ; preds = %if.else, %for.end 496 %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %mul, %if.else ] 497 ret i32 %sum.1 498} 499 500declare void @llvm.va_start(i8*) 501 502declare void @llvm.va_end(i8*) 503 504; Check that we handle inline asm correctly. 505define i32 @inlineAsm(i32 %cond, i32 %N) { 506; ENABLE-LABEL: inlineAsm: 507; ENABLE: ; %bb.0: ; %entry 508; ENABLE-NEXT: cbz w0, LBB7_4 509; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader 510; ENABLE-NEXT: stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill 511; ENABLE-NEXT: .cfi_def_cfa_offset 16 512; ENABLE-NEXT: .cfi_offset w19, -8 513; ENABLE-NEXT: .cfi_offset w20, -16 514; ENABLE-NEXT: mov w8, #10 515; ENABLE-NEXT: LBB7_2: ; %for.body 516; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 517; ENABLE-NEXT: subs w8, w8, #1 ; =1 518; ENABLE-NEXT: ; InlineAsm Start 519; ENABLE-NEXT: add x19, x19, #1 ; =1 520; ENABLE-NEXT: ; InlineAsm End 521; ENABLE-NEXT: b.ne LBB7_2 522; ENABLE-NEXT: ; %bb.3: 523; ENABLE-NEXT: mov w0, wzr 524; ENABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload 525; ENABLE-NEXT: ret 526; ENABLE-NEXT: LBB7_4: ; %if.else 527; ENABLE-NEXT: lsl w0, w1, #1 528; ENABLE-NEXT: ret 529; 530; DISABLE-LABEL: inlineAsm: 531; DISABLE: ; %bb.0: ; %entry 532; DISABLE-NEXT: stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill 533; DISABLE-NEXT: .cfi_def_cfa_offset 16 534; DISABLE-NEXT: .cfi_offset w19, -8 535; DISABLE-NEXT: .cfi_offset w20, -16 536; DISABLE-NEXT: cbz w0, LBB7_4 537; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader 538; DISABLE-NEXT: mov w8, #10 539; DISABLE-NEXT: LBB7_2: ; %for.body 540; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 541; DISABLE-NEXT: subs w8, w8, #1 ; =1 542; DISABLE-NEXT: ; InlineAsm Start 543; DISABLE-NEXT: add x19, x19, #1 ; =1 544; DISABLE-NEXT: ; InlineAsm End 545; DISABLE-NEXT: b.ne LBB7_2 546; DISABLE-NEXT: ; %bb.3: 547; DISABLE-NEXT: mov w0, wzr 548; DISABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload 549; DISABLE-NEXT: ret 550; DISABLE-NEXT: LBB7_4: ; %if.else 551; DISABLE-NEXT: lsl w0, w1, #1 552; DISABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload 553; DISABLE-NEXT: ret 554entry: 555 %tobool = icmp eq i32 %cond, 0 556 br i1 %tobool, label %if.else, label %for.body 557 558for.body: ; preds = %entry, %for.body 559 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 560 tail call void asm sideeffect "add x19, x19, #1", "~{x19}"() 561 %inc = add nuw nsw i32 %i.03, 1 562 %exitcond = icmp eq i32 %inc, 10 563 br i1 %exitcond, label %if.end, label %for.body 564 565if.else: ; preds = %entry 566 %mul = shl nsw i32 %N, 1 567 br label %if.end 568 569if.end: ; preds = %for.body, %if.else 570 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.body ] 571 ret i32 %sum.0 572} 573 574; Check that we handle calls to variadic functions correctly. 575define i32 @callVariadicFunc(i32 %cond, i32 %N) { 576; ENABLE-LABEL: callVariadicFunc: 577; ENABLE: ; %bb.0: ; %entry 578; ENABLE-NEXT: ; kill: def $w1 killed $w1 def $x1 579; ENABLE-NEXT: cbz w0, LBB8_2 580; ENABLE-NEXT: ; %bb.1: ; %if.then 581; ENABLE-NEXT: sub sp, sp, #64 ; =64 582; ENABLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill 583; ENABLE-NEXT: add x29, sp, #48 ; =48 584; ENABLE-NEXT: .cfi_def_cfa w29, 16 585; ENABLE-NEXT: .cfi_offset w30, -8 586; ENABLE-NEXT: .cfi_offset w29, -16 587; ENABLE-NEXT: stp x1, x1, [sp, #32] 588; ENABLE-NEXT: stp x1, x1, [sp, #16] 589; ENABLE-NEXT: stp x1, x1, [sp] 590; ENABLE-NEXT: mov w0, w1 591; ENABLE-NEXT: bl _someVariadicFunc 592; ENABLE-NEXT: lsl w0, w0, #3 593; ENABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload 594; ENABLE-NEXT: add sp, sp, #64 ; =64 595; ENABLE-NEXT: ret 596; ENABLE-NEXT: LBB8_2: ; %if.else 597; ENABLE-NEXT: lsl w0, w1, #1 598; ENABLE-NEXT: ret 599; 600; DISABLE-LABEL: callVariadicFunc: 601; DISABLE: ; %bb.0: ; %entry 602; DISABLE-NEXT: sub sp, sp, #64 ; =64 603; DISABLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill 604; DISABLE-NEXT: add x29, sp, #48 ; =48 605; DISABLE-NEXT: .cfi_def_cfa w29, 16 606; DISABLE-NEXT: .cfi_offset w30, -8 607; DISABLE-NEXT: .cfi_offset w29, -16 608; DISABLE-NEXT: ; kill: def $w1 killed $w1 def $x1 609; DISABLE-NEXT: cbz w0, LBB8_2 610; DISABLE-NEXT: ; %bb.1: ; %if.then 611; DISABLE-NEXT: stp x1, x1, [sp, #32] 612; DISABLE-NEXT: stp x1, x1, [sp, #16] 613; DISABLE-NEXT: stp x1, x1, [sp] 614; DISABLE-NEXT: mov w0, w1 615; DISABLE-NEXT: bl _someVariadicFunc 616; DISABLE-NEXT: lsl w0, w0, #3 617; DISABLE-NEXT: b LBB8_3 618; DISABLE-NEXT: LBB8_2: ; %if.else 619; DISABLE-NEXT: lsl w0, w1, #1 620; DISABLE-NEXT: LBB8_3: ; %if.end 621; DISABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload 622; DISABLE-NEXT: add sp, sp, #64 ; =64 623; DISABLE-NEXT: ret 624entry: 625 %tobool = icmp eq i32 %cond, 0 626 br i1 %tobool, label %if.else, label %if.then 627 628if.then: ; preds = %entry 629 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) 630 %shl = shl i32 %call, 3 631 br label %if.end 632 633if.else: ; preds = %entry 634 %mul = shl nsw i32 %N, 1 635 br label %if.end 636 637if.end: ; preds = %if.else, %if.then 638 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] 639 ret i32 %sum.0 640} 641 642declare i32 @someVariadicFunc(i32, ...) 643 644; Make sure we do not insert unreachable code after noreturn function. 645; Although this is not incorrect to insert such code, it is useless 646; and it hurts the binary size. 647; 648define i32 @noreturn(i8 signext %bad_thing) { 649; ENABLE-LABEL: noreturn: 650; ENABLE: ; %bb.0: ; %entry 651; ENABLE-NEXT: cbnz w0, LBB9_2 652; ENABLE-NEXT: ; %bb.1: ; %if.end 653; ENABLE-NEXT: mov w0, #42 654; ENABLE-NEXT: ret 655; ENABLE-NEXT: LBB9_2: ; %if.abort 656; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill 657; ENABLE-NEXT: mov x29, sp 658; ENABLE-NEXT: .cfi_def_cfa w29, 16 659; ENABLE-NEXT: .cfi_offset w30, -8 660; ENABLE-NEXT: .cfi_offset w29, -16 661; ENABLE-NEXT: bl _abort 662; 663; DISABLE-LABEL: noreturn: 664; DISABLE: ; %bb.0: ; %entry 665; DISABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill 666; DISABLE-NEXT: mov x29, sp 667; DISABLE-NEXT: .cfi_def_cfa w29, 16 668; DISABLE-NEXT: .cfi_offset w30, -8 669; DISABLE-NEXT: .cfi_offset w29, -16 670; DISABLE-NEXT: cbnz w0, LBB9_2 671; DISABLE-NEXT: ; %bb.1: ; %if.end 672; DISABLE-NEXT: mov w0, #42 673; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload 674; DISABLE-NEXT: ret 675; DISABLE-NEXT: LBB9_2: ; %if.abort 676; DISABLE-NEXT: bl _abort 677entry: 678 %tobool = icmp eq i8 %bad_thing, 0 679 br i1 %tobool, label %if.end, label %if.abort 680 681if.abort: 682 tail call void @abort() #0 683 unreachable 684 685if.end: 686 ret i32 42 687} 688 689declare void @abort() #0 690 691attributes #0 = { noreturn nounwind } 692 693; Make sure that we handle infinite loops properly When checking that the Save 694; and Restore blocks are control flow equivalent, the loop searches for the 695; immediate (post) dominator for the (restore) save blocks. When either the Save 696; or Restore block is located in an infinite loop the only immediate (post) 697; dominator is itself. In this case, we cannot perform shrink wrapping, but we 698; should return gracefully and continue compilation. 699; The only condition for this test is the compilation finishes correctly. 700; 701define void @infiniteloop() { 702; ENABLE-LABEL: infiniteloop: 703; ENABLE: ; %bb.0: ; %entry 704; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 705; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 706; ENABLE-NEXT: add x29, sp, #16 ; =16 707; ENABLE-NEXT: .cfi_def_cfa w29, 16 708; ENABLE-NEXT: .cfi_offset w30, -8 709; ENABLE-NEXT: .cfi_offset w29, -16 710; ENABLE-NEXT: .cfi_offset w19, -24 711; ENABLE-NEXT: .cfi_offset w20, -32 712; ENABLE-NEXT: cbnz wzr, LBB10_3 713; ENABLE-NEXT: ; %bb.1: ; %if.then 714; ENABLE-NEXT: sub x19, sp, #16 ; =16 715; ENABLE-NEXT: mov sp, x19 716; ENABLE-NEXT: mov w20, wzr 717; ENABLE-NEXT: LBB10_2: ; %for.body 718; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 719; ENABLE-NEXT: bl _something 720; ENABLE-NEXT: add w20, w0, w20 721; ENABLE-NEXT: str w20, [x19] 722; ENABLE-NEXT: b LBB10_2 723; ENABLE-NEXT: LBB10_3: ; %if.end 724; ENABLE-NEXT: sub sp, x29, #16 ; =16 725; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 726; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 727; ENABLE-NEXT: ret 728; 729; DISABLE-LABEL: infiniteloop: 730; DISABLE: ; %bb.0: ; %entry 731; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 732; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 733; DISABLE-NEXT: add x29, sp, #16 ; =16 734; DISABLE-NEXT: .cfi_def_cfa w29, 16 735; DISABLE-NEXT: .cfi_offset w30, -8 736; DISABLE-NEXT: .cfi_offset w29, -16 737; DISABLE-NEXT: .cfi_offset w19, -24 738; DISABLE-NEXT: .cfi_offset w20, -32 739; DISABLE-NEXT: cbnz wzr, LBB10_3 740; DISABLE-NEXT: ; %bb.1: ; %if.then 741; DISABLE-NEXT: sub x19, sp, #16 ; =16 742; DISABLE-NEXT: mov sp, x19 743; DISABLE-NEXT: mov w20, wzr 744; DISABLE-NEXT: LBB10_2: ; %for.body 745; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 746; DISABLE-NEXT: bl _something 747; DISABLE-NEXT: add w20, w0, w20 748; DISABLE-NEXT: str w20, [x19] 749; DISABLE-NEXT: b LBB10_2 750; DISABLE-NEXT: LBB10_3: ; %if.end 751; DISABLE-NEXT: sub sp, x29, #16 ; =16 752; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 753; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 754; DISABLE-NEXT: ret 755entry: 756 br i1 undef, label %if.then, label %if.end 757 758if.then: 759 %ptr = alloca i32, i32 4 760 br label %for.body 761 762for.body: ; preds = %for.body, %entry 763 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 764 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() 765 %add = add nsw i32 %call, %sum.03 766 store i32 %add, i32* %ptr 767 br label %for.body 768 769if.end: 770 ret void 771} 772 773; Another infinite loop test this time with a body bigger than just one block. 774define void @infiniteloop2() { 775; ENABLE-LABEL: infiniteloop2: 776; ENABLE: ; %bb.0: ; %entry 777; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 778; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 779; ENABLE-NEXT: add x29, sp, #16 ; =16 780; ENABLE-NEXT: .cfi_def_cfa w29, 16 781; ENABLE-NEXT: .cfi_offset w30, -8 782; ENABLE-NEXT: .cfi_offset w29, -16 783; ENABLE-NEXT: .cfi_offset w19, -24 784; ENABLE-NEXT: .cfi_offset w20, -32 785; ENABLE-NEXT: cbnz wzr, LBB11_3 786; ENABLE-NEXT: ; %bb.1: ; %if.then 787; ENABLE-NEXT: sub x8, sp, #16 ; =16 788; ENABLE-NEXT: mov sp, x8 789; ENABLE-NEXT: mov w9, wzr 790; ENABLE-NEXT: ; InlineAsm Start 791; ENABLE-NEXT: mov x10, #0 792; ENABLE-NEXT: ; InlineAsm End 793; ENABLE-NEXT: LBB11_2: ; %for.body 794; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 795; ENABLE-NEXT: add w9, w10, w9 796; ENABLE-NEXT: str w9, [x8] 797; ENABLE-NEXT: ; InlineAsm Start 798; ENABLE-NEXT: nop 799; ENABLE-NEXT: ; InlineAsm End 800; ENABLE-NEXT: mov w9, #1 801; ENABLE-NEXT: b LBB11_2 802; ENABLE-NEXT: LBB11_3: ; %if.end 803; ENABLE-NEXT: sub sp, x29, #16 ; =16 804; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 805; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 806; ENABLE-NEXT: ret 807; 808; DISABLE-LABEL: infiniteloop2: 809; DISABLE: ; %bb.0: ; %entry 810; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill 811; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill 812; DISABLE-NEXT: add x29, sp, #16 ; =16 813; DISABLE-NEXT: .cfi_def_cfa w29, 16 814; DISABLE-NEXT: .cfi_offset w30, -8 815; DISABLE-NEXT: .cfi_offset w29, -16 816; DISABLE-NEXT: .cfi_offset w19, -24 817; DISABLE-NEXT: .cfi_offset w20, -32 818; DISABLE-NEXT: cbnz wzr, LBB11_3 819; DISABLE-NEXT: ; %bb.1: ; %if.then 820; DISABLE-NEXT: sub x8, sp, #16 ; =16 821; DISABLE-NEXT: mov sp, x8 822; DISABLE-NEXT: mov w9, wzr 823; DISABLE-NEXT: ; InlineAsm Start 824; DISABLE-NEXT: mov x10, #0 825; DISABLE-NEXT: ; InlineAsm End 826; DISABLE-NEXT: LBB11_2: ; %for.body 827; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 828; DISABLE-NEXT: add w9, w10, w9 829; DISABLE-NEXT: str w9, [x8] 830; DISABLE-NEXT: ; InlineAsm Start 831; DISABLE-NEXT: nop 832; DISABLE-NEXT: ; InlineAsm End 833; DISABLE-NEXT: mov w9, #1 834; DISABLE-NEXT: b LBB11_2 835; DISABLE-NEXT: LBB11_3: ; %if.end 836; DISABLE-NEXT: sub sp, x29, #16 ; =16 837; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload 838; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload 839; DISABLE-NEXT: ret 840entry: 841 br i1 undef, label %if.then, label %if.end 842 843if.then: 844 %ptr = alloca i32, i32 4 845 br label %for.body 846 847for.body: ; preds = %for.body, %entry 848 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] 849 %call = tail call i32 asm "mov $0, #0", "=r,~{x19}"() 850 %add = add nsw i32 %call, %sum.03 851 store i32 %add, i32* %ptr 852 br i1 undef, label %body1, label %body2 853 854body1: 855 tail call void asm sideeffect "nop", "~{x19}"() 856 br label %for.body 857 858body2: 859 tail call void asm sideeffect "nop", "~{x19}"() 860 br label %for.body 861 862if.end: 863 ret void 864} 865 866; Another infinite loop test this time with two nested infinite loop. 867define void @infiniteloop3() { 868; ENABLE-LABEL: infiniteloop3: 869; ENABLE: ; %bb.0: ; %entry 870; ENABLE-NEXT: cbnz wzr, LBB12_5 871; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader 872; ENABLE-NEXT: mov x8, xzr 873; ENABLE-NEXT: mov x9, xzr 874; ENABLE-NEXT: mov x11, xzr 875; ENABLE-NEXT: b LBB12_3 876; ENABLE-NEXT: LBB12_2: ; %loop2b 877; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 878; ENABLE-NEXT: str x10, [x11] 879; ENABLE-NEXT: mov x11, x10 880; ENABLE-NEXT: LBB12_3: ; %loop1 881; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 882; ENABLE-NEXT: mov x10, x9 883; ENABLE-NEXT: ldr x9, [x8] 884; ENABLE-NEXT: cbnz x8, LBB12_2 885; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 886; ENABLE-NEXT: mov x8, x10 887; ENABLE-NEXT: mov x11, x10 888; ENABLE-NEXT: b LBB12_3 889; ENABLE-NEXT: LBB12_5: ; %end 890; ENABLE-NEXT: ret 891; 892; DISABLE-LABEL: infiniteloop3: 893; DISABLE: ; %bb.0: ; %entry 894; DISABLE-NEXT: cbnz wzr, LBB12_5 895; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader 896; DISABLE-NEXT: mov x8, xzr 897; DISABLE-NEXT: mov x9, xzr 898; DISABLE-NEXT: mov x11, xzr 899; DISABLE-NEXT: b LBB12_3 900; DISABLE-NEXT: LBB12_2: ; %loop2b 901; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 902; DISABLE-NEXT: str x10, [x11] 903; DISABLE-NEXT: mov x11, x10 904; DISABLE-NEXT: LBB12_3: ; %loop1 905; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 906; DISABLE-NEXT: mov x10, x9 907; DISABLE-NEXT: ldr x9, [x8] 908; DISABLE-NEXT: cbnz x8, LBB12_2 909; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 910; DISABLE-NEXT: mov x8, x10 911; DISABLE-NEXT: mov x11, x10 912; DISABLE-NEXT: b LBB12_3 913; DISABLE-NEXT: LBB12_5: ; %end 914; DISABLE-NEXT: ret 915entry: 916 br i1 undef, label %loop2a, label %body 917 918body: ; preds = %entry 919 br i1 undef, label %loop2a, label %end 920 921loop1: ; preds = %loop2a, %loop2b 922 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 923 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 924 %0 = icmp eq i32* %var, null 925 %next.load = load i32*, i32** undef 926 br i1 %0, label %loop2a, label %loop2b 927 928loop2a: ; preds = %loop1, %body, %entry 929 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] 930 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] 931 br label %loop1 932 933loop2b: ; preds = %loop1 934 %gep1 = bitcast i32* %var.phi to i32* 935 %next.ptr = bitcast i32* %gep1 to i32** 936 store i32* %next.phi, i32** %next.ptr 937 br label %loop1 938 939end: 940 ret void 941} 942 943; Re-aligned stack pointer. See bug 26642. Avoid clobbering live 944; values in the prologue when re-aligning the stack pointer. 945define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) { 946; ENABLE-LABEL: stack_realign: 947; ENABLE: ; %bb.0: 948; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill 949; ENABLE-NEXT: mov x29, sp 950; ENABLE-NEXT: sub x9, sp, #16 ; =16 951; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 952; ENABLE-NEXT: .cfi_def_cfa w29, 16 953; ENABLE-NEXT: .cfi_offset w30, -8 954; ENABLE-NEXT: .cfi_offset w29, -16 955; ENABLE-NEXT: lsl w8, w0, w1 956; ENABLE-NEXT: cmp w0, w1 957; ENABLE-NEXT: lsl w9, w1, w0 958; ENABLE-NEXT: b.ge LBB13_2 959; ENABLE-NEXT: ; %bb.1: ; %true 960; ENABLE-NEXT: str w0, [sp] 961; ENABLE-NEXT: LBB13_2: ; %false 962; ENABLE-NEXT: str w8, [x2] 963; ENABLE-NEXT: str w9, [x3] 964; ENABLE-NEXT: mov sp, x29 965; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload 966; ENABLE-NEXT: ret 967; 968; DISABLE-LABEL: stack_realign: 969; DISABLE: ; %bb.0: 970; DISABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill 971; DISABLE-NEXT: mov x29, sp 972; DISABLE-NEXT: sub x9, sp, #16 ; =16 973; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 974; DISABLE-NEXT: .cfi_def_cfa w29, 16 975; DISABLE-NEXT: .cfi_offset w30, -8 976; DISABLE-NEXT: .cfi_offset w29, -16 977; DISABLE-NEXT: lsl w8, w0, w1 978; DISABLE-NEXT: cmp w0, w1 979; DISABLE-NEXT: lsl w9, w1, w0 980; DISABLE-NEXT: b.ge LBB13_2 981; DISABLE-NEXT: ; %bb.1: ; %true 982; DISABLE-NEXT: str w0, [sp] 983; DISABLE-NEXT: LBB13_2: ; %false 984; DISABLE-NEXT: str w8, [x2] 985; DISABLE-NEXT: str w9, [x3] 986; DISABLE-NEXT: mov sp, x29 987; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload 988; DISABLE-NEXT: ret 989 %tmp = alloca i32, align 32 990 %shl1 = shl i32 %a, %b 991 %shl2 = shl i32 %b, %a 992 %tmp2 = icmp slt i32 %a, %b 993 br i1 %tmp2, label %true, label %false 994 995true: 996 store i32 %a, i32* %tmp, align 4 997 %tmp4 = load i32, i32* %tmp 998 br label %false 999 1000false: 1001 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 1002 store i32 %shl1, i32* %ptr1 1003 store i32 %shl2, i32* %ptr2 1004 ret i32 %tmp.0 1005} 1006 1007; Re-aligned stack pointer with all caller-save regs live. See bug 1008; 26642. In this case we currently avoid shrink wrapping because 1009; ensuring we have a scratch register to re-align the stack pointer is 1010; too complicated. Output should be the same for both enabled and 1011; disabled shrink wrapping. 1012define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) { 1013; ENABLE-LABEL: stack_realign2: 1014; ENABLE: ; %bb.0: 1015; ENABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill 1016; ENABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill 1017; ENABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill 1018; ENABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill 1019; ENABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill 1020; ENABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill 1021; ENABLE-NEXT: add x29, sp, #80 ; =80 1022; ENABLE-NEXT: sub x9, sp, #32 ; =32 1023; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 1024; ENABLE-NEXT: .cfi_def_cfa w29, 16 1025; ENABLE-NEXT: .cfi_offset w30, -8 1026; ENABLE-NEXT: .cfi_offset w29, -16 1027; ENABLE-NEXT: .cfi_offset w19, -24 1028; ENABLE-NEXT: .cfi_offset w20, -32 1029; ENABLE-NEXT: .cfi_offset w21, -40 1030; ENABLE-NEXT: .cfi_offset w22, -48 1031; ENABLE-NEXT: .cfi_offset w23, -56 1032; ENABLE-NEXT: .cfi_offset w24, -64 1033; ENABLE-NEXT: .cfi_offset w25, -72 1034; ENABLE-NEXT: .cfi_offset w26, -80 1035; ENABLE-NEXT: .cfi_offset w27, -88 1036; ENABLE-NEXT: .cfi_offset w28, -96 1037; ENABLE-NEXT: lsl w8, w0, w1 1038; ENABLE-NEXT: lsl w9, w1, w0 1039; ENABLE-NEXT: lsr w10, w0, w1 1040; ENABLE-NEXT: lsr w12, w1, w0 1041; ENABLE-NEXT: add w15, w1, w0 1042; ENABLE-NEXT: subs w17, w1, w0 1043; ENABLE-NEXT: sub w11, w9, w10 1044; ENABLE-NEXT: add w16, w8, w9 1045; ENABLE-NEXT: add w13, w10, w12 1046; ENABLE-NEXT: add w14, w12, w15 1047; ENABLE-NEXT: b.le LBB14_2 1048; ENABLE-NEXT: ; %bb.1: ; %true 1049; ENABLE-NEXT: str w0, [sp] 1050; ENABLE-NEXT: ; InlineAsm Start 1051; ENABLE-NEXT: nop 1052; ENABLE-NEXT: ; InlineAsm End 1053; ENABLE-NEXT: LBB14_2: ; %false 1054; ENABLE-NEXT: str w8, [x2] 1055; ENABLE-NEXT: str w9, [x3] 1056; ENABLE-NEXT: str w10, [x4] 1057; ENABLE-NEXT: str w12, [x5] 1058; ENABLE-NEXT: str w15, [x6] 1059; ENABLE-NEXT: str w17, [x7] 1060; ENABLE-NEXT: stp w0, w1, [x2, #4] 1061; ENABLE-NEXT: stp w16, w11, [x2, #12] 1062; ENABLE-NEXT: stp w13, w14, [x2, #20] 1063; ENABLE-NEXT: sub sp, x29, #80 ; =80 1064; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload 1065; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload 1066; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload 1067; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload 1068; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload 1069; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload 1070; ENABLE-NEXT: ret 1071; 1072; DISABLE-LABEL: stack_realign2: 1073; DISABLE: ; %bb.0: 1074; DISABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill 1075; DISABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill 1076; DISABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill 1077; DISABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill 1078; DISABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill 1079; DISABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill 1080; DISABLE-NEXT: add x29, sp, #80 ; =80 1081; DISABLE-NEXT: sub x9, sp, #32 ; =32 1082; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 1083; DISABLE-NEXT: .cfi_def_cfa w29, 16 1084; DISABLE-NEXT: .cfi_offset w30, -8 1085; DISABLE-NEXT: .cfi_offset w29, -16 1086; DISABLE-NEXT: .cfi_offset w19, -24 1087; DISABLE-NEXT: .cfi_offset w20, -32 1088; DISABLE-NEXT: .cfi_offset w21, -40 1089; DISABLE-NEXT: .cfi_offset w22, -48 1090; DISABLE-NEXT: .cfi_offset w23, -56 1091; DISABLE-NEXT: .cfi_offset w24, -64 1092; DISABLE-NEXT: .cfi_offset w25, -72 1093; DISABLE-NEXT: .cfi_offset w26, -80 1094; DISABLE-NEXT: .cfi_offset w27, -88 1095; DISABLE-NEXT: .cfi_offset w28, -96 1096; DISABLE-NEXT: lsl w8, w0, w1 1097; DISABLE-NEXT: lsl w9, w1, w0 1098; DISABLE-NEXT: lsr w10, w0, w1 1099; DISABLE-NEXT: lsr w12, w1, w0 1100; DISABLE-NEXT: add w15, w1, w0 1101; DISABLE-NEXT: subs w17, w1, w0 1102; DISABLE-NEXT: sub w11, w9, w10 1103; DISABLE-NEXT: add w16, w8, w9 1104; DISABLE-NEXT: add w13, w10, w12 1105; DISABLE-NEXT: add w14, w12, w15 1106; DISABLE-NEXT: b.le LBB14_2 1107; DISABLE-NEXT: ; %bb.1: ; %true 1108; DISABLE-NEXT: str w0, [sp] 1109; DISABLE-NEXT: ; InlineAsm Start 1110; DISABLE-NEXT: nop 1111; DISABLE-NEXT: ; InlineAsm End 1112; DISABLE-NEXT: LBB14_2: ; %false 1113; DISABLE-NEXT: str w8, [x2] 1114; DISABLE-NEXT: str w9, [x3] 1115; DISABLE-NEXT: str w10, [x4] 1116; DISABLE-NEXT: str w12, [x5] 1117; DISABLE-NEXT: str w15, [x6] 1118; DISABLE-NEXT: str w17, [x7] 1119; DISABLE-NEXT: stp w0, w1, [x2, #4] 1120; DISABLE-NEXT: stp w16, w11, [x2, #12] 1121; DISABLE-NEXT: stp w13, w14, [x2, #20] 1122; DISABLE-NEXT: sub sp, x29, #80 ; =80 1123; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload 1124; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload 1125; DISABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload 1126; DISABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload 1127; DISABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload 1128; DISABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload 1129; DISABLE-NEXT: ret 1130 %tmp = alloca i32, align 32 1131 %tmp1 = shl i32 %a, %b 1132 %tmp2 = shl i32 %b, %a 1133 %tmp3 = lshr i32 %a, %b 1134 %tmp4 = lshr i32 %b, %a 1135 %tmp5 = add i32 %b, %a 1136 %tmp6 = sub i32 %b, %a 1137 %tmp7 = add i32 %tmp1, %tmp2 1138 %tmp8 = sub i32 %tmp2, %tmp3 1139 %tmp9 = add i32 %tmp3, %tmp4 1140 %tmp10 = add i32 %tmp4, %tmp5 1141 %cmp = icmp slt i32 %a, %b 1142 br i1 %cmp, label %true, label %false 1143 1144true: 1145 store i32 %a, i32* %tmp, align 4 1146 call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind 1147 br label %false 1148 1149false: 1150 store i32 %tmp1, i32* %ptr1, align 4 1151 store i32 %tmp2, i32* %ptr2, align 4 1152 store i32 %tmp3, i32* %ptr3, align 4 1153 store i32 %tmp4, i32* %ptr4, align 4 1154 store i32 %tmp5, i32* %ptr5, align 4 1155 store i32 %tmp6, i32* %ptr6, align 4 1156 %idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1 1157 store i32 %a, i32* %idx1, align 4 1158 %idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2 1159 store i32 %b, i32* %idx2, align 4 1160 %idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3 1161 store i32 %tmp7, i32* %idx3, align 4 1162 %idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4 1163 store i32 %tmp8, i32* %idx4, align 4 1164 %idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5 1165 store i32 %tmp9, i32* %idx5, align 4 1166 %idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6 1167 store i32 %tmp10, i32* %idx6, align 4 1168 1169 ret void 1170} 1171