1; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE 2; RUN: cat %t | FileCheck %s --check-prefix=REMARKS 3; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE 4; 5; Note: Lots of tests use inline asm instead of regular calls. 6; This allows to have a better control on what the allocation will do. 7; Otherwise, we may have spill right in the entry block, defeating 8; shrink-wrapping. Moreover, some of the inline asm statement (nop) 9; are here to ensure that the related paths do not end up as critical 10; edges. 11target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 12target triple = "x86_64-apple-macosx" 13 14 15; Initial motivating example: Simple diamond with a call just on one side. 16; CHECK-LABEL: foo: 17; 18; Compare the arguments and jump to exit. 19; No prologue needed. 20; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] 21; ENABLE-NEXT: cmpl %esi, %edi 22; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 23; 24; Prologue code. 25; (What we push does not matter. It should be some random sratch register.) 26; CHECK: pushq 27; 28; Compare the arguments and jump to exit. 29; After the prologue is set. 30; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] 31; DISABLE-NEXT: cmpl %esi, %edi 32; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 33; 34; Store %a in the alloca. 35; CHECK: movl [[ARG0CPY]], 4(%rsp) 36; Set the alloca address in the second argument. 37; CHECK-NEXT: leaq 4(%rsp), %rsi 38; Set the first argument to zero. 39; CHECK-NEXT: xorl %edi, %edi 40; CHECK-NEXT: callq _doSomething 41; 42; With shrink-wrapping, epilogue is just after the call. 43; ENABLE-NEXT: addq $8, %rsp 44; 45; CHECK: [[EXIT_LABEL]]: 46; 47; Without shrink-wrapping, epilogue is in the exit block. 48; Epilogue code. (What we pop does not matter.) 49; DISABLE-NEXT: popq 50; 51; CHECK-NEXT: retq 52define i32 @foo(i32 %a, i32 %b) { 53 %tmp = alloca i32, align 4 54 %tmp2 = icmp slt i32 %a, %b 55 br i1 %tmp2, label %true, label %false 56 57true: 58 store i32 %a, i32* %tmp, align 4 59 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) 60 br label %false 61 62false: 63 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 64 ret i32 %tmp.0 65} 66 67; Function Attrs: optsize 68declare i32 @doSomething(i32, i32*) 69 70 71; Check that we do not perform the restore inside the loop whereas the save 72; is outside. 73; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: 74; 75; Shrink-wrapping allows to skip the prologue in the else case. 76; ENABLE: testl %edi, %edi 77; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]] 78; 79; Prologue code. 80; Make sure we save the CSR used in the inline asm: rbx. 81; CHECK: pushq %rbx 82; 83; DISABLE: testl %edi, %edi 84; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] 85; 86; SUM is in %esi because it is coalesced with the second 87; argument on the else path. 88; CHECK: xorl [[SUM:%esi]], [[SUM]] 89; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 90; 91; Next BB. 92; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body 93; CHECK: movl $1, [[TMP:%e[a-z]+]] 94; CHECK: addl [[TMP]], [[SUM]] 95; CHECK-NEXT: decl [[IV]] 96; CHECK-NEXT: jne [[LOOP]] 97; 98; Next BB. 99; SUM << 3. 100; CHECK: shll $3, [[SUM]] 101; 102; Jump to epilogue. 103; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 104; 105; DISABLE: [[ELSE_LABEL]]: ## %if.else 106; Shift second argument by one and store into returned register. 107; DISABLE: addl %esi, %esi 108; DISABLE: [[EPILOG_BB]]: ## %if.end 109; 110; Epilogue code. 111; CHECK-DAG: popq %rbx 112; CHECK-DAG: movl %esi, %eax 113; CHECK: retq 114; 115; ENABLE: [[ELSE_LABEL]]: ## %if.else 116; Shift second argument by one and store into returned register. 117; ENABLE: addl %esi, %esi 118; ENABLE-NEXT: movl %esi, %eax 119; ENABLE-NEXT: retq 120define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { 121entry: 122 %tobool = icmp eq i32 %cond, 0 123 br i1 %tobool, label %if.else, label %for.preheader 124 125for.preheader: 126 tail call void asm "nop", ""() 127 br label %for.body 128 129for.body: ; preds = %entry, %for.body 130 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 131 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 132 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 133 %add = add nsw i32 %call, %sum.04 134 %inc = add nuw nsw i32 %i.05, 1 135 %exitcond = icmp eq i32 %inc, 10 136 br i1 %exitcond, label %for.end, label %for.body 137 138for.end: ; preds = %for.body 139 %shl = shl i32 %add, 3 140 br label %if.end 141 142if.else: ; preds = %entry 143 %mul = shl nsw i32 %N, 1 144 br label %if.end 145 146if.end: ; preds = %if.else, %for.end 147 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 148 ret i32 %sum.1 149} 150 151declare i32 @something(...) 152 153; Check that we do not perform the shrink-wrapping inside the loop even 154; though that would be legal. The cost model must prevent that. 155; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: 156; Prologue code. 157; Make sure we save the CSR used in the inline asm: rbx. 158; CHECK: pushq %rbx 159; CHECK: nop 160; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]] 161; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 162; Next BB. 163; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 164; CHECK: movl $1, [[TMP:%e[a-z]+]] 165; CHECK: addl [[TMP]], [[SUM]] 166; CHECK-NEXT: decl [[IV]] 167; CHECK-NEXT: jne [[LOOP_LABEL]] 168; Next BB. 169; CHECK: ## %for.exit 170; CHECK: nop 171; CHECK: popq %rbx 172; CHECK-NEXT: retq 173define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { 174entry: 175 br label %for.preheader 176 177for.preheader: 178 tail call void asm "nop", ""() 179 br label %for.body 180 181for.body: ; preds = %for.body, %entry 182 %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] 183 %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] 184 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 185 %add = add nsw i32 %call, %sum.03 186 %inc = add nuw nsw i32 %i.04, 1 187 %exitcond = icmp eq i32 %inc, 10 188 br i1 %exitcond, label %for.exit, label %for.body 189 190for.exit: 191 tail call void asm "nop", ""() 192 br label %for.end 193 194for.end: ; preds = %for.body 195 ret i32 %add 196} 197 198; Check with a more complex case that we do not have save within the loop and 199; restore outside. 200; CHECK-LABEL: loopInfoSaveOutsideLoop: 201; 202; ENABLE: testl %edi, %edi 203; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 204; 205; Prologue code. 206; Make sure we save the CSR used in the inline asm: rbx. 207; CHECK: pushq %rbx 208; 209; DISABLE: testl %edi, %edi 210; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 211; 212; CHECK: nop 213; CHECK: xorl [[SUM:%esi]], [[SUM]] 214; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 215; 216; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 217; CHECK: movl $1, [[TMP:%e[a-z]+]] 218; CHECK: addl [[TMP]], [[SUM]] 219; CHECK-NEXT: decl [[IV]] 220; CHECK-NEXT: jne [[LOOP_LABEL]] 221; Next BB. 222; CHECK: nop 223; CHECK: shll $3, [[SUM]] 224; 225; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 226; 227; DISABLE: [[ELSE_LABEL]]: ## %if.else 228; Shift second argument by one and store into returned register. 229; DISABLE: addl %esi, %esi 230; DISABLE: [[EPILOG_BB]]: ## %if.end 231; 232; Epilogue code. 233; CHECK-DAG: popq %rbx 234; CHECK-DAG: movl %esi, %eax 235; CHECK: retq 236; 237; ENABLE: [[ELSE_LABEL]]: ## %if.else 238; Shift second argument by one and store into returned register. 239; ENABLE: addl %esi, %esi 240; ENABLE-NEXT: movl %esi, %eax 241; ENABLE-NEXT: retq 242define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { 243entry: 244 %tobool = icmp eq i32 %cond, 0 245 br i1 %tobool, label %if.else, label %for.preheader 246 247for.preheader: 248 tail call void asm "nop", ""() 249 br label %for.body 250 251for.body: ; preds = %entry, %for.body 252 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 253 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 254 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 255 %add = add nsw i32 %call, %sum.04 256 %inc = add nuw nsw i32 %i.05, 1 257 %exitcond = icmp eq i32 %inc, 10 258 br i1 %exitcond, label %for.end, label %for.body 259 260for.end: ; preds = %for.body 261 tail call void asm "nop", "~{ebx}"() 262 %shl = shl i32 %add, 3 263 br label %if.end 264 265if.else: ; preds = %entry 266 %mul = shl nsw i32 %N, 1 267 br label %if.end 268 269if.end: ; preds = %if.else, %for.end 270 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 271 ret i32 %sum.1 272} 273 274; Check with a more complex case that we do not have restore within the loop and 275; save outside. 276; CHECK-LABEL: loopInfoRestoreOutsideLoop: 277; 278; ENABLE: testl %edi, %edi 279; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 280; 281; Prologue code. 282; Make sure we save the CSR used in the inline asm: rbx. 283; CHECK: pushq %rbx 284; 285; DISABLE: testl %edi, %edi 286; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 287; 288; CHECK: nop 289; CHECK: xorl [[SUM:%esi]], [[SUM]] 290; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 291; 292; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 293; CHECK: movl $1, [[TMP:%e[a-z]+]] 294; CHECK: addl [[TMP]], [[SUM]] 295; CHECK-NEXT: decl [[IV]] 296; CHECK-NEXT: jne [[LOOP_LABEL]] 297; Next BB. 298; CHECK: shll $3, [[SUM]] 299; 300; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 301; 302; DISABLE: [[ELSE_LABEL]]: ## %if.else 303 304; Shift second argument by one and store into returned register. 305; DISABLE: addl %esi, %esi 306; DISABLE: [[EPILOG_BB]]: ## %if.end 307; 308; Epilogue code. 309; CHECK-DAG: popq %rbx 310; CHECK-DAG: movl %esi, %eax 311; CHECK: retq 312; 313; ENABLE: [[ELSE_LABEL]]: ## %if.else 314; Shift second argument by one and store into returned register. 315; ENABLE: addl %esi, %esi 316; ENABLE-NEXT: movl %esi, %eax 317; ENABLE-NEXT: retq 318define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { 319entry: 320 %tobool = icmp eq i32 %cond, 0 321 br i1 %tobool, label %if.else, label %if.then 322 323if.then: ; preds = %entry 324 tail call void asm "nop", "~{ebx}"() 325 br label %for.body 326 327for.body: ; preds = %for.body, %if.then 328 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] 329 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 330 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 331 %add = add nsw i32 %call, %sum.04 332 %inc = add nuw nsw i32 %i.05, 1 333 %exitcond = icmp eq i32 %inc, 10 334 br i1 %exitcond, label %for.end, label %for.body 335 336for.end: ; preds = %for.body 337 %shl = shl i32 %add, 3 338 br label %if.end 339 340if.else: ; preds = %entry 341 %mul = shl nsw i32 %N, 1 342 br label %if.end 343 344if.end: ; preds = %if.else, %for.end 345 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 346 ret i32 %sum.1 347} 348 349; Check that we handle function with no frame information correctly. 350; CHECK-LABEL: emptyFrame: 351; CHECK: ## %entry 352; CHECK-NEXT: xorl %eax, %eax 353; CHECK-NEXT: retq 354define i32 @emptyFrame() { 355entry: 356 ret i32 0 357} 358 359; Check that we handle inline asm correctly. 360; CHECK-LABEL: inlineAsm: 361; 362; ENABLE: testl %edi, %edi 363; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 364; 365; Prologue code. 366; Make sure we save the CSR used in the inline asm: rbx. 367; CHECK: pushq %rbx 368; 369; DISABLE: testl %edi, %edi 370; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 371; 372; CHECK: nop 373; CHECK: movl $10, [[IV:%e[a-z]+]] 374; 375; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 376; Inline asm statement. 377; CHECK: addl $1, %ebx 378; CHECK: decl [[IV]] 379; CHECK-NEXT: jne [[LOOP_LABEL]] 380; Next BB. 381; CHECK: nop 382; CHECK: xorl %esi, %esi 383; 384; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 385; 386; DISABLE: [[ELSE_LABEL]]: ## %if.else 387; Shift second argument by one and store into returned register. 388; DISABLE: addl %esi, %esi 389; DISABLE: [[EPILOG_BB]]: ## %if.end 390; 391; Epilogue code. 392; CHECK-DAG: popq %rbx 393; CHECK-DAG: movl %esi, %eax 394; CHECK: retq 395; 396; ENABLE: [[ELSE_LABEL]]: ## %if.else 397; Shift second argument by one and store into returned register. 398; ENABLE: addl %esi, %esi 399; ENABLE-NEXT: movl %esi, %eax 400; ENABLE-NEXT: retq 401define i32 @inlineAsm(i32 %cond, i32 %N) { 402entry: 403 %tobool = icmp eq i32 %cond, 0 404 br i1 %tobool, label %if.else, label %for.preheader 405 406for.preheader: 407 tail call void asm "nop", ""() 408 br label %for.body 409 410for.body: ; preds = %entry, %for.body 411 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 412 tail call void asm "addl $$1, %ebx", "~{ebx}"() 413 %inc = add nuw nsw i32 %i.03, 1 414 %exitcond = icmp eq i32 %inc, 10 415 br i1 %exitcond, label %for.exit, label %for.body 416 417for.exit: 418 tail call void asm "nop", ""() 419 br label %if.end 420 421if.else: ; preds = %entry 422 %mul = shl nsw i32 %N, 1 423 br label %if.end 424 425if.end: ; preds = %for.body, %if.else 426 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] 427 ret i32 %sum.0 428} 429 430; Check that we handle calls to variadic functions correctly. 431; CHECK-LABEL: callVariadicFunc: 432; 433; ENABLE: testl %edi, %edi 434; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 435; 436; Prologue code. 437; CHECK: pushq 438; 439; DISABLE: testl %edi, %edi 440; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 441; 442; Setup of the varags. 443; CHECK: movl %esi, (%rsp) 444; CHECK-NEXT: xorl %eax, %eax 445; CHECK-NEXT: %esi, %edi 446; CHECK-NEXT: %esi, %edx 447; CHECK-NEXT: %esi, %ecx 448; CHECK-NEXT: %esi, %r8d 449; CHECK-NEXT: %esi, %r9d 450; CHECK-NEXT: callq _someVariadicFunc 451; CHECK-NEXT: movl %eax, %esi 452; CHECK-NEXT: shll $3, %esi 453; 454; ENABLE-NEXT: addq $8, %rsp 455; ENABLE-NEXT: movl %esi, %eax 456; ENABLE-NEXT: retq 457; 458; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]] 459; 460; CHECK: [[ELSE_LABEL]]: ## %if.else 461; Shift second argument by one and store into returned register. 462; CHECK: addl %esi, %esi 463; 464; DISABLE: [[IFEND_LABEL]]: ## %if.end 465; 466; Epilogue code. 467; CHECK-NEXT: movl %esi, %eax 468; DISABLE-NEXT: popq 469; CHECK-NEXT: retq 470define i32 @callVariadicFunc(i32 %cond, i32 %N) { 471entry: 472 %tobool = icmp eq i32 %cond, 0 473 br i1 %tobool, label %if.else, label %if.then 474 475if.then: ; preds = %entry 476 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) 477 %shl = shl i32 %call, 3 478 br label %if.end 479 480if.else: ; preds = %entry 481 %mul = shl nsw i32 %N, 1 482 br label %if.end 483 484if.end: ; preds = %if.else, %if.then 485 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] 486 ret i32 %sum.0 487} 488 489declare i32 @someVariadicFunc(i32, ...) 490 491; Check that we use LEA not to clobber EFLAGS. 492%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 } 493%union.tree_node = type { %struct.tree_decl } 494%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* } 495%struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 } 496%union.anon = type { i64 } 497%union.anon.1 = type { %struct.function* } 498%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 } 499%struct.eh_status = type opaque 500%struct.stmt_status = type opaque 501%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } 502%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** } 503%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* } 504%struct.varasm_status = type opaque 505%struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 } 506%struct.initial_value_struct = type opaque 507%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } 508%struct.machine_function = type opaque 509%struct.language_function = type opaque 510%struct.lang_decl = type opaque 511%struct.rtx_def = type { i32, [1 x %union.rtunion_def] } 512%union.rtunion_def = type { i64 } 513 514declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly) 515 516; CHECK-LABEL: useLEA: 517; DISABLE: pushq 518; 519; CHECK: testq %rdi, %rdi 520; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]] 521; 522; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]] 523; CHECK-NEXT: cmpl $66, [[BF_LOAD]] 524; CHECK-NEXT: jne [[CLEANUP]] 525; 526; CHECK: movq 8(%rdi), %rdi 527; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]] 528; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]] 529; CHECK-NEXT: cmpl $14, [[TMP]] 530; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]] 531; 532; CHECK: movl $24599, [[TMP2:%e[a-z]+]] 533; CHECK-NEXT: btl [[TMP]], [[TMP2]] 534; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]] 535; 536; CHECK: [[CLEANUP]]: ## %cleanup 537; DISABLE: popq 538; CHECK-NEXT: retq 539; 540; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false 541; CHECK: cmpl $134, %e[[BF_LOAD2]] 542; CHECK-NEXT: je [[CLEANUP]] 543; 544; CHECK: cmpl $140, %e[[BF_LOAD2]] 545; CHECK-NEXT: je [[CLEANUP]] 546; 547; ENABLE: pushq 548; CHECK: callq _find_temp_slot_from_address 549; CHECK-NEXT: testq %rax, %rax 550; 551; The adjustment must use LEA here (or be moved above the test). 552; ENABLE-NEXT: leaq 8(%rsp), %rsp 553; 554; CHECK-NEXT: je [[CLEANUP]] 555; 556; CHECK: movb $1, 57(%rax) 557define void @useLEA(%struct.rtx_def* readonly %x) { 558entry: 559 %cmp = icmp eq %struct.rtx_def* %x, null 560 br i1 %cmp, label %cleanup, label %if.end 561 562if.end: ; preds = %entry 563 %tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0 564 %bf.load = load i32, i32* %tmp, align 8 565 %bf.clear = and i32 %bf.load, 65535 566 %cmp1 = icmp eq i32 %bf.clear, 66 567 br i1 %cmp1, label %lor.lhs.false, label %cleanup 568 569lor.lhs.false: ; preds = %if.end 570 %arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0 571 %rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def** 572 %tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8 573 %tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0 574 %bf.load2 = load i32, i32* %tmp2, align 8 575 %bf.clear3 = and i32 %bf.load2, 65535 576 switch i32 %bf.clear3, label %if.end.55 [ 577 i32 67, label %cleanup 578 i32 68, label %cleanup 579 i32 54, label %cleanup 580 i32 55, label %cleanup 581 i32 58, label %cleanup 582 i32 134, label %cleanup 583 i32 56, label %cleanup 584 i32 140, label %cleanup 585 ] 586 587if.end.55: ; preds = %lor.lhs.false 588 %call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2 589 %cmp59 = icmp eq %struct.temp_slot* %call, null 590 br i1 %cmp59, label %cleanup, label %if.then.60 591 592if.then.60: ; preds = %if.end.55 593 %addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8 594 store i8 1, i8* %addr_taken, align 1 595 br label %cleanup 596 597cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry 598 ret void 599} 600 601; Make sure we do not insert unreachable code after noreturn function. 602; Although this is not incorrect to insert such code, it is useless 603; and it hurts the binary size. 604; 605; CHECK-LABEL: noreturn: 606; DISABLE: pushq 607; 608; CHECK: testb %dil, %dil 609; CHECK-NEXT: jne [[ABORT:LBB[0-9_]+]] 610; 611; CHECK: movl $42, %eax 612; 613; DISABLE-NEXT: popq 614; 615; CHECK-NEXT: retq 616; 617; CHECK: [[ABORT]]: ## %if.abort 618; 619; ENABLE: pushq 620; 621; CHECK: callq _abort 622; ENABLE-NOT: popq 623define i32 @noreturn(i8 signext %bad_thing) { 624entry: 625 %tobool = icmp eq i8 %bad_thing, 0 626 br i1 %tobool, label %if.end, label %if.abort 627 628if.abort: 629 tail call void @abort() #0 630 unreachable 631 632if.end: 633 ret i32 42 634} 635 636declare void @abort() #0 637 638attributes #0 = { noreturn nounwind } 639 640 641; Make sure that we handle infinite loops properly When checking that the Save 642; and Restore blocks are control flow equivalent, the loop searches for the 643; immediate (post) dominator for the (restore) save blocks. When either the Save 644; or Restore block is located in an infinite loop the only immediate (post) 645; dominator is itself. In this case, we cannot perform shrink wrapping, but we 646; should return gracefully and continue compilation. 647; The only condition for this test is the compilation finishes correctly. 648; 649; CHECK-LABEL: infiniteloop 650; CHECK: retq 651define void @infiniteloop() { 652entry: 653 br i1 undef, label %if.then, label %if.end 654 655if.then: 656 %ptr = alloca i32, i32 4 657 br label %for.body 658 659for.body: ; preds = %for.body, %entry 660 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 661 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 662 %add = add nsw i32 %call, %sum.03 663 store i32 %add, i32* %ptr 664 br label %for.body 665 666if.end: 667 ret void 668} 669 670; Another infinite loop test this time with a body bigger than just one block. 671; CHECK-LABEL: infiniteloop2 672; CHECK: retq 673define void @infiniteloop2() { 674entry: 675 br i1 undef, label %if.then, label %if.end 676 677if.then: 678 %ptr = alloca i32, i32 4 679 br label %for.body 680 681for.body: ; preds = %for.body, %entry 682 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] 683 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 684 %add = add nsw i32 %call, %sum.03 685 store i32 %add, i32* %ptr 686 br i1 undef, label %body1, label %body2 687 688body1: 689 tail call void asm sideeffect "nop", "~{ebx}"() 690 br label %for.body 691 692body2: 693 tail call void asm sideeffect "nop", "~{ebx}"() 694 br label %for.body 695 696if.end: 697 ret void 698} 699 700; Another infinite loop test this time with two nested infinite loop. 701; CHECK-LABEL: infiniteloop3 702; CHECK: retq 703define void @infiniteloop3() { 704entry: 705 br i1 undef, label %loop2a, label %body 706 707body: ; preds = %entry 708 br i1 undef, label %loop2a, label %end 709 710loop1: ; preds = %loop2a, %loop2b 711 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 712 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 713 %0 = icmp eq i32* %var, null 714 %next.load = load i32*, i32** undef 715 br i1 %0, label %loop2a, label %loop2b 716 717loop2a: ; preds = %loop1, %body, %entry 718 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] 719 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] 720 br label %loop1 721 722loop2b: ; preds = %loop1 723 %gep1 = bitcast i32* %var.phi to i32* 724 %next.ptr = bitcast i32* %gep1 to i32** 725 store i32* %next.phi, i32** %next.ptr 726 br label %loop1 727 728end: 729 ret void 730} 731 732; Check that we just don't bail out on RegMask. 733; In this case, the RegMask does not touch a CSR so we are good to go! 734; CHECK-LABEL: regmask: 735; 736; Compare the arguments and jump to exit. 737; No prologue needed. 738; ENABLE: cmpl %esi, %edi 739; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 740; 741; Prologue code. 742; (What we push does not matter. It should be some random sratch register.) 743; CHECK: pushq 744; 745; Compare the arguments and jump to exit. 746; After the prologue is set. 747; DISABLE: cmpl %esi, %edi 748; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 749; 750; CHECK: nop 751; Set the first argument to zero. 752; CHECK: xorl %edi, %edi 753; Set the second argument to addr. 754; CHECK-NEXT: movq %rdx, %rsi 755; CHECK-NEXT: callq _doSomething 756; CHECK-NEXT: popq 757; CHECK-NEXT: retq 758; 759; CHECK: [[EXIT_LABEL]]: 760; Set the first argument to 6. 761; CHECK-NEXT: movl $6, %edi 762; Set the second argument to addr. 763; CHECK-NEXT: movq %rdx, %rsi 764; 765; Without shrink-wrapping, we need to restore the stack before 766; making the tail call. 767; Epilogue code. 768; DISABLE-NEXT: popq 769; 770; CHECK-NEXT: jmp _doSomething 771define i32 @regmask(i32 %a, i32 %b, i32* %addr) { 772 %tmp2 = icmp slt i32 %a, %b 773 br i1 %tmp2, label %true, label %false 774 775true: 776 ; Clobber a CSR so that we check something on the regmask 777 ; of the tail call. 778 tail call void asm sideeffect "nop", "~{ebx}"() 779 %tmp4 = call i32 @doSomething(i32 0, i32* %addr) 780 br label %end 781 782false: 783 %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr) 784 br label %end 785 786end: 787 %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ] 788 ret i32 %tmp.0 789} 790 791@b = internal unnamed_addr global i1 false 792@c = internal unnamed_addr global i8 0, align 1 793@a = common global i32 0, align 4 794 795; Make sure the prologue does not clobber the EFLAGS when 796; it is live accross. 797; PR25629. 798; Note: The registers may change in the following patterns, but 799; because they imply register hierarchy (e.g., eax, al) this is 800; tricky to write robust patterns. 801; 802; CHECK-LABEL: useLEAForPrologue: 803; 804; Prologue is at the beginning of the function when shrink-wrapping 805; is disabled. 806; DISABLE: pushq 807; The stack adjustment can use SUB instr because we do not need to 808; preserve the EFLAGS at this point. 809; DISABLE-NEXT: subq $16, %rsp 810; 811; Load the value of b. 812; Create the zero value for the select assignment. 813; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] 814; CHECK-NEXT: cmpb $0, _b(%rip) 815; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]] 816; 817; CHECK: movb $48, [[CMOVE_VAL:%al]] 818; 819; CHECK: [[STOREC_LABEL]]: 820; 821; ENABLE-NEXT: pushq 822; For the stack adjustment, we need to preserve the EFLAGS. 823; ENABLE-NEXT: leaq -16(%rsp), %rsp 824; 825; Technically, we should use CMOVE_VAL here or its subregister. 826; CHECK-NEXT: movb %al, _c(%rip) 827; testb set the EFLAGS read here. 828; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]] 829; 830; The code of the loop is not interesting. 831; [...] 832; 833; CHECK: [[VARFUNC_CALL]]: 834; Set the null parameter. 835; CHECK-NEXT: xorl %edi, %edi 836; CHECK-NEXT: callq _varfunc 837; 838; Set the return value. 839; CHECK-NEXT: xorl %eax, %eax 840; 841; Epilogue code. 842; CHECK-NEXT: addq $16, %rsp 843; CHECK-NEXT: popq 844; CHECK-NEXT: retq 845define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { 846entry: 847 %tmp = alloca i3 848 %.b = load i1, i1* @b, align 1 849 %bool = select i1 %.b, i8 0, i8 48 850 store i8 %bool, i8* @c, align 1 851 br i1 %.b, label %for.body.lr.ph, label %for.end 852 853for.body.lr.ph: ; preds = %entry 854 tail call void asm sideeffect "nop", "~{ebx}"() 855 br label %for.body 856 857for.body: ; preds = %for.body.lr.ph, %for.body 858 %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ] 859 %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ] 860 %cmp2 = icmp slt i32 %d, %cond5 861 %conv3 = zext i1 %cmp2 to i32 862 %inc = add i8 %inc6, 1 863 %cmp = icmp slt i8 %inc, 45 864 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge 865 866for.cond.for.end_crit_edge: ; preds = %for.body 867 store i32 %conv3, i32* @a, align 4 868 br label %for.end 869 870for.end: ; preds = %for.cond.for.end_crit_edge, %entry 871 %call = tail call i32 (i8*) @varfunc(i8* null) 872 ret i32 0 873} 874 875declare i32 @varfunc(i8* nocapture readonly) 876 877@sum1 = external hidden thread_local global i32, align 4 878 879 880; Function Attrs: nounwind 881; Make sure the TLS call used to access @sum1 happens after the prologue 882; and before the epilogue. 883; TLS calls used to be wrongly model and shrink-wrapping would have inserted 884; the prologue and epilogue just around the call to doSomething. 885; PR25820. 886; 887; CHECK-LABEL: tlsCall: 888; CHECK: pushq 889; CHECK: testb $1, %dil 890; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]] 891; 892; master bb 893; CHECK: movq _sum1@TLVP(%rip), %rdi 894; CHECK-NEXT: callq *(%rdi) 895; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]] 896; 897; [[ELSE_LABEL]]: 898; CHECK: callq _doSomething 899; 900; [[EXIT_LABEL]]: 901; CHECK: popq 902; CHECK-NEXT: retq 903define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 { 904entry: 905 br i1 %bool1, label %master, label %else 906 907master: 908 %tmp1 = load i32, i32* %sum1, align 4 909 store i32 %tmp1, i32* @sum1, align 4 910 br label %exit 911 912else: 913 %call = call i32 @doSomething(i32 0, i32* null) 914 br label %exit 915 916exit: 917 %res = phi i32 [ %arg, %master], [ %call, %else ] 918 ret i32 %res 919} 920 921attributes #3 = { nounwind } 922 923@irreducibleCFGa = common global i32 0, align 4 924@irreducibleCFGf = common global i8 0, align 1 925@irreducibleCFGb = common global i32 0, align 4 926 927; Check that we do not run shrink-wrapping on irreducible CFGs until 928; it is actually supported. 929; At the moment, on those CFGs the loop information may be incorrect 930; and since we use that information to do the placement, we may end up 931; inserting the prologue/epilogue at incorrect places. 932; PR25988. 933; 934; CHECK-LABEL: irreducibleCFG: 935; CHECK: %entry 936; Make sure the prologue happens in the entry block. 937; CHECK-NEXT: pushq 938; ... 939; Make sure the epilogue happens in the exit block. 940; CHECK-NOT: popq 941; CHECK: popq 942; CHECK-NEXT: popq 943; CHECK-NEXT: retq 944; Make sure we emit missed optimization remarks for this. 945; REMARKS: Pass: shrink-wrap 946; REMARKS-NEXT: Name: UnsupportedIrreducibleCFG 947; REMARKS-NEXT: Function: irreducibleCFG 948; REMARKS-NEXT: Args: 949; REMARKS-NEXT: - String: Irreducible CFGs are not supported yet 950 951define i32 @irreducibleCFG() #4 { 952entry: 953 %i0 = load i32, i32* @irreducibleCFGa, align 4 954 %.pr = load i8, i8* @irreducibleCFGf, align 1 955 %bool = icmp eq i8 %.pr, 0 956 br i1 %bool, label %split, label %preheader 957 958preheader: 959 br label %preheader 960 961split: 962 %i1 = load i32, i32* @irreducibleCFGb, align 4 963 %tobool1.i = icmp ne i32 %i1, 0 964 br i1 %tobool1.i, label %for.body4.i, label %for.cond8.i.preheader 965 966for.body4.i: 967 %call.i = tail call i32 (...) @something(i32 %i0) 968 br label %for.cond8 969 970for.cond8: 971 %p1 = phi i32 [ %inc18.i, %for.inc ], [ 0, %for.body4.i ] 972 %.pr1.pr = load i32, i32* @irreducibleCFGb, align 4 973 br label %for.cond8.i.preheader 974 975for.cond8.i.preheader: 976 %.pr1 = phi i32 [ %.pr1.pr, %for.cond8 ], [ %i1, %split ] 977 %p13 = phi i32 [ %p1, %for.cond8 ], [ 0, %split ] 978 br label %for.inc 979 980fn1.exit: 981 ret i32 0 982 983for.inc: 984 %inc18.i = add nuw nsw i32 %p13, 1 985 %cmp = icmp slt i32 %inc18.i, 7 986 br i1 %cmp, label %for.cond8, label %fn1.exit 987} 988 989attributes #4 = { "no-frame-pointer-elim"="true" } 990 991@x = external global i32, align 4 992@y = external global i32, align 4 993 994; The post-dominator tree does not include the branch containing the infinite 995; loop, which can occur into a misplacement of the restore block, if we're 996; looking for the nearest common post-dominator of an "unreachable" block. 997 998; CHECK-LABEL: infiniteLoopNoSuccessor: 999; CHECK: ## %bb.0: 1000; Make sure the prologue happens in the entry block. 1001; CHECK-NEXT: pushq %rbp 1002; ... 1003; Make sure we don't shrink-wrap. 1004; CHECK: ## %bb.1 1005; CHECK-NOT: pushq %rbp 1006; ... 1007; Make sure the epilogue happens in the exit block. 1008; CHECK: ## %bb.5 1009; CHECK: popq %rbp 1010; CHECK-NEXT: retq 1011define void @infiniteLoopNoSuccessor() #5 { 1012 %1 = load i32, i32* @x, align 4 1013 %2 = icmp ne i32 %1, 0 1014 br i1 %2, label %3, label %4 1015 1016; <label>:3: 1017 store i32 0, i32* @x, align 4 1018 br label %4 1019 1020; <label>:4: 1021 call void (...) @somethingElse() 1022 %5 = load i32, i32* @y, align 4 1023 %6 = icmp ne i32 %5, 0 1024 br i1 %6, label %10, label %7 1025 1026; <label>:7: 1027 %8 = call i32 (...) @something() 1028 br label %9 1029 1030; <label>:9: 1031 call void (...) @somethingElse() 1032 br label %9 1033 1034; <label>:10: 1035 ret void 1036} 1037 1038declare void @somethingElse(...) 1039 1040attributes #5 = { nounwind "no-frame-pointer-elim-non-leaf" } 1041