1; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE 2; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE 3; 4; Note: Lots of tests use inline asm instead of regular calls. 5; This allows to have a better control on what the allocation will do. 6; Otherwise, we may have spill right in the entry block, defeating 7; shrink-wrapping. Moreover, some of the inline asm statement (nop) 8; are here to ensure that the related paths do not end up as critical 9; edges. 10target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 11target triple = "x86_64-apple-macosx" 12 13 14; Initial motivating example: Simple diamond with a call just on one side. 15; CHECK-LABEL: foo: 16; 17; Compare the arguments and jump to exit. 18; No prologue needed. 19; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] 20; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]] 21; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 22; 23; Prologue code. 24; (What we push does not matter. It should be some random sratch register.) 25; CHECK: pushq 26; 27; Compare the arguments and jump to exit. 28; After the prologue is set. 29; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] 30; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]] 31; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 32; 33; Store %a in the alloca. 34; CHECK: movl [[ARG0CPY]], 4(%rsp) 35; Set the alloca address in the second argument. 36; CHECK-NEXT: leaq 4(%rsp), %rsi 37; Set the first argument to zero. 38; CHECK-NEXT: xorl %edi, %edi 39; CHECK-NEXT: callq _doSomething 40; 41; With shrink-wrapping, epilogue is just after the call. 42; ENABLE-NEXT: addq $8, %rsp 43; 44; CHECK: [[EXIT_LABEL]]: 45; 46; Without shrink-wrapping, epilogue is in the exit block. 47; Epilogue code. (What we pop does not matter.) 48; DISABLE-NEXT: popq 49; 50; CHECK-NEXT: retq 51define i32 @foo(i32 %a, i32 %b) { 52 %tmp = alloca i32, align 4 53 %tmp2 = icmp slt i32 %a, %b 54 br i1 %tmp2, label %true, label %false 55 56true: 57 store i32 %a, i32* %tmp, align 4 58 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) 59 br label %false 60 61false: 62 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] 63 ret i32 %tmp.0 64} 65 66; Function Attrs: optsize 67declare i32 @doSomething(i32, i32*) 68 69 70; Check that we do not perform the restore inside the loop whereas the save 71; is outside. 72; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: 73; 74; Shrink-wrapping allows to skip the prologue in the else case. 75; ENABLE: testl %edi, %edi 76; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]] 77; 78; Prologue code. 79; Make sure we save the CSR used in the inline asm: rbx. 80; CHECK: pushq %rbx 81; 82; DISABLE: testl %edi, %edi 83; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] 84; 85; SUM is in %esi because it is coalesced with the second 86; argument on the else path. 87; CHECK: xorl [[SUM:%esi]], [[SUM]] 88; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 89; 90; Next BB. 91; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body 92; CHECK: movl $1, [[TMP:%e[a-z]+]] 93; CHECK: addl [[TMP]], [[SUM]] 94; CHECK-NEXT: decl [[IV]] 95; CHECK-NEXT: jne [[LOOP]] 96; 97; Next BB. 98; SUM << 3. 99; CHECK: shll $3, [[SUM]] 100; 101; Jump to epilogue. 102; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 103; 104; DISABLE: [[ELSE_LABEL]]: ## %if.else 105; Shift second argument by one and store into returned register. 106; DISABLE: addl %esi, %esi 107; DISABLE: [[EPILOG_BB]]: ## %if.end 108; 109; Epilogue code. 110; CHECK-DAG: popq %rbx 111; CHECK-DAG: movl %esi, %eax 112; CHECK: retq 113; 114; ENABLE: [[ELSE_LABEL]]: ## %if.else 115; Shift second argument by one and store into returned register. 116; ENABLE: addl %esi, %esi 117; ENABLE-NEXT: movl %esi, %eax 118; ENABLE-NEXT: retq 119define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { 120entry: 121 %tobool = icmp eq i32 %cond, 0 122 br i1 %tobool, label %if.else, label %for.preheader 123 124for.preheader: 125 tail call void asm "nop", ""() 126 br label %for.body 127 128for.body: ; preds = %entry, %for.body 129 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 130 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 131 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 132 %add = add nsw i32 %call, %sum.04 133 %inc = add nuw nsw i32 %i.05, 1 134 %exitcond = icmp eq i32 %inc, 10 135 br i1 %exitcond, label %for.end, label %for.body 136 137for.end: ; preds = %for.body 138 %shl = shl i32 %add, 3 139 br label %if.end 140 141if.else: ; preds = %entry 142 %mul = shl nsw i32 %N, 1 143 br label %if.end 144 145if.end: ; preds = %if.else, %for.end 146 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 147 ret i32 %sum.1 148} 149 150declare i32 @something(...) 151 152; Check that we do not perform the shrink-wrapping inside the loop even 153; though that would be legal. The cost model must prevent that. 154; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: 155; Prologue code. 156; Make sure we save the CSR used in the inline asm: rbx. 157; CHECK: pushq %rbx 158; CHECK: nop 159; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]] 160; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 161; Next BB. 162; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 163; CHECK: movl $1, [[TMP:%e[a-z]+]] 164; CHECK: addl [[TMP]], [[SUM]] 165; CHECK-NEXT: decl [[IV]] 166; CHECK-NEXT: jne [[LOOP_LABEL]] 167; Next BB. 168; CHECK: ## %for.exit 169; CHECK: nop 170; CHECK: popq %rbx 171; CHECK-NEXT: retq 172define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { 173entry: 174 br label %for.preheader 175 176for.preheader: 177 tail call void asm "nop", ""() 178 br label %for.body 179 180for.body: ; preds = %for.body, %entry 181 %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] 182 %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] 183 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 184 %add = add nsw i32 %call, %sum.03 185 %inc = add nuw nsw i32 %i.04, 1 186 %exitcond = icmp eq i32 %inc, 10 187 br i1 %exitcond, label %for.exit, label %for.body 188 189for.exit: 190 tail call void asm "nop", ""() 191 br label %for.end 192 193for.end: ; preds = %for.body 194 ret i32 %add 195} 196 197; Check with a more complex case that we do not have save within the loop and 198; restore outside. 199; CHECK-LABEL: loopInfoSaveOutsideLoop: 200; 201; ENABLE: testl %edi, %edi 202; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 203; 204; Prologue code. 205; Make sure we save the CSR used in the inline asm: rbx. 206; CHECK: pushq %rbx 207; 208; DISABLE: testl %edi, %edi 209; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 210; 211; CHECK: nop 212; CHECK: xorl [[SUM:%esi]], [[SUM]] 213; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 214; 215; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 216; CHECK: movl $1, [[TMP:%e[a-z]+]] 217; CHECK: addl [[TMP]], [[SUM]] 218; CHECK-NEXT: decl [[IV]] 219; CHECK-NEXT: jne [[LOOP_LABEL]] 220; Next BB. 221; CHECK: nop 222; CHECK: shll $3, [[SUM]] 223; 224; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 225; 226; DISABLE: [[ELSE_LABEL]]: ## %if.else 227; Shift second argument by one and store into returned register. 228; DISABLE: addl %esi, %esi 229; DISABLE: [[EPILOG_BB]]: ## %if.end 230; 231; Epilogue code. 232; CHECK-DAG: popq %rbx 233; CHECK-DAG: movl %esi, %eax 234; CHECK: retq 235; 236; ENABLE: [[ELSE_LABEL]]: ## %if.else 237; Shift second argument by one and store into returned register. 238; ENABLE: addl %esi, %esi 239; ENABLE-NEXT: movl %esi, %eax 240; ENABLE-NEXT: retq 241define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { 242entry: 243 %tobool = icmp eq i32 %cond, 0 244 br i1 %tobool, label %if.else, label %for.preheader 245 246for.preheader: 247 tail call void asm "nop", ""() 248 br label %for.body 249 250for.body: ; preds = %entry, %for.body 251 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 252 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] 253 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 254 %add = add nsw i32 %call, %sum.04 255 %inc = add nuw nsw i32 %i.05, 1 256 %exitcond = icmp eq i32 %inc, 10 257 br i1 %exitcond, label %for.end, label %for.body 258 259for.end: ; preds = %for.body 260 tail call void asm "nop", "~{ebx}"() 261 %shl = shl i32 %add, 3 262 br label %if.end 263 264if.else: ; preds = %entry 265 %mul = shl nsw i32 %N, 1 266 br label %if.end 267 268if.end: ; preds = %if.else, %for.end 269 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 270 ret i32 %sum.1 271} 272 273declare void @somethingElse(...) 274 275; Check with a more complex case that we do not have restore within the loop and 276; save outside. 277; CHECK-LABEL: loopInfoRestoreOutsideLoop: 278; 279; ENABLE: testl %edi, %edi 280; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 281; 282; Prologue code. 283; Make sure we save the CSR used in the inline asm: rbx. 284; CHECK: pushq %rbx 285; 286; DISABLE: testl %edi, %edi 287; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 288; 289; CHECK: nop 290; CHECK: xorl [[SUM:%esi]], [[SUM]] 291; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] 292; 293; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 294; CHECK: movl $1, [[TMP:%e[a-z]+]] 295; CHECK: addl [[TMP]], [[SUM]] 296; CHECK-NEXT: decl [[IV]] 297; CHECK-NEXT: jne [[LOOP_LABEL]] 298; Next BB. 299; CHECK: shll $3, [[SUM]] 300; 301; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 302; 303; DISABLE: [[ELSE_LABEL]]: ## %if.else 304 305; Shift second argument by one and store into returned register. 306; DISABLE: addl %esi, %esi 307; DISABLE: [[EPILOG_BB]]: ## %if.end 308; 309; Epilogue code. 310; CHECK-DAG: popq %rbx 311; CHECK-DAG: movl %esi, %eax 312; CHECK: retq 313; 314; ENABLE: [[ELSE_LABEL]]: ## %if.else 315; Shift second argument by one and store into returned register. 316; ENABLE: addl %esi, %esi 317; ENABLE-NEXT: movl %esi, %eax 318; ENABLE-NEXT: retq 319define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { 320entry: 321 %tobool = icmp eq i32 %cond, 0 322 br i1 %tobool, label %if.else, label %if.then 323 324if.then: ; preds = %entry 325 tail call void asm "nop", "~{ebx}"() 326 br label %for.body 327 328for.body: ; preds = %for.body, %if.then 329 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] 330 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 331 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 332 %add = add nsw i32 %call, %sum.04 333 %inc = add nuw nsw i32 %i.05, 1 334 %exitcond = icmp eq i32 %inc, 10 335 br i1 %exitcond, label %for.end, label %for.body 336 337for.end: ; preds = %for.body 338 %shl = shl i32 %add, 3 339 br label %if.end 340 341if.else: ; preds = %entry 342 %mul = shl nsw i32 %N, 1 343 br label %if.end 344 345if.end: ; preds = %if.else, %for.end 346 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] 347 ret i32 %sum.1 348} 349 350; Check that we handle function with no frame information correctly. 351; CHECK-LABEL: emptyFrame: 352; CHECK: ## %entry 353; CHECK-NEXT: xorl %eax, %eax 354; CHECK-NEXT: retq 355define i32 @emptyFrame() { 356entry: 357 ret i32 0 358} 359 360; Check that we handle inline asm correctly. 361; CHECK-LABEL: inlineAsm: 362; 363; ENABLE: testl %edi, %edi 364; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 365; 366; Prologue code. 367; Make sure we save the CSR used in the inline asm: rbx. 368; CHECK: pushq %rbx 369; 370; DISABLE: testl %edi, %edi 371; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 372; 373; CHECK: nop 374; CHECK: movl $10, [[IV:%e[a-z]+]] 375; 376; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body 377; Inline asm statement. 378; CHECK: addl $1, %ebx 379; CHECK: decl [[IV]] 380; CHECK-NEXT: jne [[LOOP_LABEL]] 381; Next BB. 382; CHECK: nop 383; CHECK: xorl %esi, %esi 384; 385; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] 386; 387; DISABLE: [[ELSE_LABEL]]: ## %if.else 388; Shift second argument by one and store into returned register. 389; DISABLE: addl %esi, %esi 390; DISABLE: [[EPILOG_BB]]: ## %if.end 391; 392; Epilogue code. 393; CHECK-DAG: popq %rbx 394; CHECK-DAG: movl %esi, %eax 395; CHECK: retq 396; 397; ENABLE: [[ELSE_LABEL]]: ## %if.else 398; Shift second argument by one and store into returned register. 399; ENABLE: addl %esi, %esi 400; ENABLE-NEXT: movl %esi, %eax 401; ENABLE-NEXT: retq 402define i32 @inlineAsm(i32 %cond, i32 %N) { 403entry: 404 %tobool = icmp eq i32 %cond, 0 405 br i1 %tobool, label %if.else, label %for.preheader 406 407for.preheader: 408 tail call void asm "nop", ""() 409 br label %for.body 410 411for.body: ; preds = %entry, %for.body 412 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] 413 tail call void asm "addl $$1, %ebx", "~{ebx}"() 414 %inc = add nuw nsw i32 %i.03, 1 415 %exitcond = icmp eq i32 %inc, 10 416 br i1 %exitcond, label %for.exit, label %for.body 417 418for.exit: 419 tail call void asm "nop", ""() 420 br label %if.end 421 422if.else: ; preds = %entry 423 %mul = shl nsw i32 %N, 1 424 br label %if.end 425 426if.end: ; preds = %for.body, %if.else 427 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] 428 ret i32 %sum.0 429} 430 431; Check that we handle calls to variadic functions correctly. 432; CHECK-LABEL: callVariadicFunc: 433; 434; ENABLE: testl %edi, %edi 435; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 436; 437; Prologue code. 438; CHECK: pushq 439; 440; DISABLE: testl %edi, %edi 441; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] 442; 443; Setup of the varags. 444; CHECK: movl %esi, (%rsp) 445; CHECK-NEXT: xorl %eax, %eax 446; CHECK-NEXT: %esi, %edi 447; CHECK-NEXT: %esi, %edx 448; CHECK-NEXT: %esi, %ecx 449; CHECK-NEXT: %esi, %r8d 450; CHECK-NEXT: %esi, %r9d 451; CHECK-NEXT: callq _someVariadicFunc 452; CHECK-NEXT: movl %eax, %esi 453; CHECK-NEXT: shll $3, %esi 454; 455; ENABLE-NEXT: addq $8, %rsp 456; ENABLE-NEXT: movl %esi, %eax 457; ENABLE-NEXT: retq 458; 459; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]] 460; 461; CHECK: [[ELSE_LABEL]]: ## %if.else 462; Shift second argument by one and store into returned register. 463; CHECK: addl %esi, %esi 464; 465; DISABLE: [[IFEND_LABEL]]: ## %if.end 466; 467; Epilogue code. 468; CHECK-NEXT: movl %esi, %eax 469; DISABLE-NEXT: popq 470; CHECK-NEXT: retq 471define i32 @callVariadicFunc(i32 %cond, i32 %N) { 472entry: 473 %tobool = icmp eq i32 %cond, 0 474 br i1 %tobool, label %if.else, label %if.then 475 476if.then: ; preds = %entry 477 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) 478 %shl = shl i32 %call, 3 479 br label %if.end 480 481if.else: ; preds = %entry 482 %mul = shl nsw i32 %N, 1 483 br label %if.end 484 485if.end: ; preds = %if.else, %if.then 486 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] 487 ret i32 %sum.0 488} 489 490declare i32 @someVariadicFunc(i32, ...) 491 492; Check that we use LEA not to clobber EFLAGS. 493%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 } 494%union.tree_node = type { %struct.tree_decl } 495%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* } 496%struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 } 497%union.anon = type { i64 } 498%union.anon.1 = type { %struct.function* } 499%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 } 500%struct.eh_status = type opaque 501%struct.stmt_status = type opaque 502%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } 503%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** } 504%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* } 505%struct.varasm_status = type opaque 506%struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 } 507%struct.initial_value_struct = type opaque 508%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } 509%struct.machine_function = type opaque 510%struct.language_function = type opaque 511%struct.lang_decl = type opaque 512%struct.rtx_def = type { i32, [1 x %union.rtunion_def] } 513%union.rtunion_def = type { i64 } 514 515declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly) 516 517; CHECK-LABEL: useLEA: 518; DISABLE: pushq 519; 520; CHECK: testq %rdi, %rdi 521; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]] 522; 523; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]] 524; CHECK-NEXT: cmpl $66, [[BF_LOAD]] 525; CHECK-NEXT: jne [[CLEANUP]] 526; 527; CHECK: movq 8(%rdi), %rdi 528; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]] 529; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]] 530; CHECK-NEXT: cmpl $14, [[TMP]] 531; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]] 532; 533; CHECK: movl $24599, [[TMP2:%e[a-z]+]] 534; CHECK-NEXT: btl [[TMP]], [[TMP2]] 535; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]] 536; 537; CHECK: [[CLEANUP]]: ## %cleanup 538; DISABLE: popq 539; CHECK-NEXT: retq 540; 541; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false 542; CHECK: cmpl $134, %e[[BF_LOAD2]] 543; CHECK-NEXT: je [[CLEANUP]] 544; 545; CHECK: cmpl $140, %e[[BF_LOAD2]] 546; CHECK-NEXT: je [[CLEANUP]] 547; 548; ENABLE: pushq 549; CHECK: callq _find_temp_slot_from_address 550; CHECK-NEXT: testq %rax, %rax 551; 552; The adjustment must use LEA here (or be moved above the test). 553; ENABLE-NEXT: leaq 8(%rsp), %rsp 554; 555; CHECK-NEXT: je [[CLEANUP]] 556; 557; CHECK: movb $1, 57(%rax) 558define void @useLEA(%struct.rtx_def* readonly %x) { 559entry: 560 %cmp = icmp eq %struct.rtx_def* %x, null 561 br i1 %cmp, label %cleanup, label %if.end 562 563if.end: ; preds = %entry 564 %tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0 565 %bf.load = load i32, i32* %tmp, align 8 566 %bf.clear = and i32 %bf.load, 65535 567 %cmp1 = icmp eq i32 %bf.clear, 66 568 br i1 %cmp1, label %lor.lhs.false, label %cleanup 569 570lor.lhs.false: ; preds = %if.end 571 %arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0 572 %rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def** 573 %tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8 574 %tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0 575 %bf.load2 = load i32, i32* %tmp2, align 8 576 %bf.clear3 = and i32 %bf.load2, 65535 577 switch i32 %bf.clear3, label %if.end.55 [ 578 i32 67, label %cleanup 579 i32 68, label %cleanup 580 i32 54, label %cleanup 581 i32 55, label %cleanup 582 i32 58, label %cleanup 583 i32 134, label %cleanup 584 i32 56, label %cleanup 585 i32 140, label %cleanup 586 ] 587 588if.end.55: ; preds = %lor.lhs.false 589 %call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2 590 %cmp59 = icmp eq %struct.temp_slot* %call, null 591 br i1 %cmp59, label %cleanup, label %if.then.60 592 593if.then.60: ; preds = %if.end.55 594 %addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8 595 store i8 1, i8* %addr_taken, align 1 596 br label %cleanup 597 598cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry 599 ret void 600} 601 602; Make sure we do not insert unreachable code after noreturn function. 603; Although this is not incorrect to insert such code, it is useless 604; and it hurts the binary size. 605; 606; CHECK-LABEL: noreturn: 607; DISABLE: pushq 608; 609; CHECK: testb %dil, %dil 610; CHECK-NEXT: jne [[ABORT:LBB[0-9_]+]] 611; 612; CHECK: movl $42, %eax 613; 614; DISABLE-NEXT: popq 615; 616; CHECK-NEXT: retq 617; 618; CHECK: [[ABORT]]: ## %if.abort 619; 620; ENABLE: pushq 621; 622; CHECK: callq _abort 623; ENABLE-NOT: popq 624define i32 @noreturn(i8 signext %bad_thing) { 625entry: 626 %tobool = icmp eq i8 %bad_thing, 0 627 br i1 %tobool, label %if.end, label %if.abort 628 629if.abort: 630 tail call void @abort() #0 631 unreachable 632 633if.end: 634 ret i32 42 635} 636 637declare void @abort() #0 638 639attributes #0 = { noreturn nounwind } 640 641 642; Make sure that we handle infinite loops properly When checking that the Save 643; and Restore blocks are control flow equivalent, the loop searches for the 644; immediate (post) dominator for the (restore) save blocks. When either the Save 645; or Restore block is located in an infinite loop the only immediate (post) 646; dominator is itself. In this case, we cannot perform shrink wrapping, but we 647; should return gracefully and continue compilation. 648; The only condition for this test is the compilation finishes correctly. 649; 650; CHECK-LABEL: infiniteloop 651; CHECK: retq 652define void @infiniteloop() { 653entry: 654 br i1 undef, label %if.then, label %if.end 655 656if.then: 657 %ptr = alloca i32, i32 4 658 br label %for.body 659 660for.body: ; preds = %for.body, %entry 661 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] 662 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 663 %add = add nsw i32 %call, %sum.03 664 store i32 %add, i32* %ptr 665 br label %for.body 666 667if.end: 668 ret void 669} 670 671; Another infinite loop test this time with a body bigger than just one block. 672; CHECK-LABEL: infiniteloop2 673; CHECK: retq 674define void @infiniteloop2() { 675entry: 676 br i1 undef, label %if.then, label %if.end 677 678if.then: 679 %ptr = alloca i32, i32 4 680 br label %for.body 681 682for.body: ; preds = %for.body, %entry 683 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] 684 %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() 685 %add = add nsw i32 %call, %sum.03 686 store i32 %add, i32* %ptr 687 br i1 undef, label %body1, label %body2 688 689body1: 690 tail call void asm sideeffect "nop", "~{ebx}"() 691 br label %for.body 692 693body2: 694 tail call void asm sideeffect "nop", "~{ebx}"() 695 br label %for.body 696 697if.end: 698 ret void 699} 700 701; Another infinite loop test this time with two nested infinite loop. 702; CHECK-LABEL: infiniteloop3 703; CHECK: retq 704define void @infiniteloop3() { 705entry: 706 br i1 undef, label %loop2a, label %body 707 708body: ; preds = %entry 709 br i1 undef, label %loop2a, label %end 710 711loop1: ; preds = %loop2a, %loop2b 712 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 713 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 714 %0 = icmp eq i32* %var, null 715 %next.load = load i32*, i32** undef 716 br i1 %0, label %loop2a, label %loop2b 717 718loop2a: ; preds = %loop1, %body, %entry 719 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] 720 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] 721 br label %loop1 722 723loop2b: ; preds = %loop1 724 %gep1 = bitcast i32* %var.phi to i32* 725 %next.ptr = bitcast i32* %gep1 to i32** 726 store i32* %next.phi, i32** %next.ptr 727 br label %loop1 728 729end: 730 ret void 731} 732 733; Check that we just don't bail out on RegMask. 734; In this case, the RegMask does not touch a CSR so we are good to go! 735; CHECK-LABEL: regmask: 736; 737; Compare the arguments and jump to exit. 738; No prologue needed. 739; ENABLE: cmpl %esi, %edi 740; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 741; 742; Prologue code. 743; (What we push does not matter. It should be some random sratch register.) 744; CHECK: pushq 745; 746; Compare the arguments and jump to exit. 747; After the prologue is set. 748; DISABLE: cmpl %esi, %edi 749; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] 750; 751; CHECK: nop 752; Set the first argument to zero. 753; CHECK: xorl %edi, %edi 754; Set the second argument to addr. 755; CHECK-NEXT: movq %rdx, %rsi 756; CHECK-NEXT: callq _doSomething 757; CHECK-NEXT: popq 758; CHECK-NEXT: retq 759; 760; CHECK: [[EXIT_LABEL]]: 761; Set the first argument to 6. 762; CHECK-NEXT: movl $6, %edi 763; Set the second argument to addr. 764; CHECK-NEXT: movq %rdx, %rsi 765; 766; Without shrink-wrapping, we need to restore the stack before 767; making the tail call. 768; Epilogue code. 769; DISABLE-NEXT: popq 770; 771; CHECK-NEXT: jmp _doSomething 772define i32 @regmask(i32 %a, i32 %b, i32* %addr) { 773 %tmp2 = icmp slt i32 %a, %b 774 br i1 %tmp2, label %true, label %false 775 776true: 777 ; Clobber a CSR so that we check something on the regmask 778 ; of the tail call. 779 tail call void asm sideeffect "nop", "~{ebx}"() 780 %tmp4 = call i32 @doSomething(i32 0, i32* %addr) 781 br label %end 782 783false: 784 %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr) 785 br label %end 786 787end: 788 %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ] 789 ret i32 %tmp.0 790} 791 792@b = internal unnamed_addr global i1 false 793@c = internal unnamed_addr global i8 0, align 1 794@a = common global i32 0, align 4 795 796; Make sure the prologue does not clobber the EFLAGS when 797; it is live accross. 798; PR25629. 799; Note: The registers may change in the following patterns, but 800; because they imply register hierarchy (e.g., eax, al) this is 801; tricky to write robust patterns. 802; 803; CHECK-LABEL: useLEAForPrologue: 804; 805; Prologue is at the beginning of the function when shrink-wrapping 806; is disabled. 807; DISABLE: pushq 808; The stack adjustment can use SUB instr because we do not need to 809; preserve the EFLAGS at this point. 810; DISABLE-NEXT: subq $16, %rsp 811; 812; Load the value of b. 813; CHECK: movb _b(%rip), [[BOOL:%cl]] 814; Create the zero value for the select assignment. 815; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] 816; CHECK-NEXT: testb [[BOOL]], [[BOOL]] 817; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]] 818; 819; CHECK: movb $48, [[CMOVE_VAL:%al]] 820; 821; CHECK: [[STOREC_LABEL]]: 822; 823; ENABLE-NEXT: pushq 824; For the stack adjustment, we need to preserve the EFLAGS. 825; ENABLE-NEXT: leaq -16(%rsp), %rsp 826; 827; Technically, we should use CMOVE_VAL here or its subregister. 828; CHECK-NEXT: movb %al, _c(%rip) 829; testb set the EFLAGS read here. 830; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]] 831; 832; The code of the loop is not interesting. 833; [...] 834; 835; CHECK: [[VARFUNC_CALL]]: 836; Set the null parameter. 837; CHECK-NEXT: xorl %edi, %edi 838; CHECK-NEXT: callq _varfunc 839; 840; Set the return value. 841; CHECK-NEXT: xorl %eax, %eax 842; 843; Epilogue code. 844; CHECK-NEXT: addq $16, %rsp 845; CHECK-NEXT: popq 846; CHECK-NEXT: retq 847define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { 848entry: 849 %tmp = alloca i3 850 %.b = load i1, i1* @b, align 1 851 %bool = select i1 %.b, i8 0, i8 48 852 store i8 %bool, i8* @c, align 1 853 br i1 %.b, label %for.body.lr.ph, label %for.end 854 855for.body.lr.ph: ; preds = %entry 856 tail call void asm sideeffect "nop", "~{ebx}"() 857 br label %for.body 858 859for.body: ; preds = %for.body.lr.ph, %for.body 860 %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ] 861 %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ] 862 %cmp2 = icmp slt i32 %d, %cond5 863 %conv3 = zext i1 %cmp2 to i32 864 %inc = add i8 %inc6, 1 865 %cmp = icmp slt i8 %inc, 45 866 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge 867 868for.cond.for.end_crit_edge: ; preds = %for.body 869 store i32 %conv3, i32* @a, align 4 870 br label %for.end 871 872for.end: ; preds = %for.cond.for.end_crit_edge, %entry 873 %call = tail call i32 (i8*) @varfunc(i8* null) 874 ret i32 0 875} 876 877declare i32 @varfunc(i8* nocapture readonly) 878 879@sum1 = external hidden thread_local global i32, align 4 880 881 882; Function Attrs: nounwind 883; Make sure the TLS call used to access @sum1 happens after the prologue 884; and before the epilogue. 885; TLS calls used to be wrongly model and shrink-wrapping would have inserted 886; the prologue and epilogue just around the call to doSomething. 887; PR25820. 888; 889; CHECK-LABEL: tlsCall: 890; CHECK: pushq 891; CHECK: testb $1, %dil 892; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]] 893; 894; master bb 895; CHECK: movq _sum1@TLVP(%rip), %rdi 896; CHECK-NEXT: callq *(%rdi) 897; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]] 898; 899; [[ELSE_LABEL]]: 900; CHECK: callq _doSomething 901; 902; [[EXIT_LABEL]]: 903; CHECK: popq 904; CHECK-NEXT: retq 905define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 { 906entry: 907 br i1 %bool1, label %master, label %else 908 909master: 910 %tmp1 = load i32, i32* %sum1, align 4 911 store i32 %tmp1, i32* @sum1, align 4 912 br label %exit 913 914else: 915 %call = call i32 @doSomething(i32 0, i32* null) 916 br label %exit 917 918exit: 919 %res = phi i32 [ %arg, %master], [ %call, %else ] 920 ret i32 %res 921} 922 923attributes #3 = { nounwind } 924 925@irreducibleCFGa = common global i32 0, align 4 926@irreducibleCFGf = common global i8 0, align 1 927@irreducibleCFGb = common global i32 0, align 4 928 929; Check that we do not run shrink-wrapping on irreducible CFGs until 930; it is actually supported. 931; At the moment, on those CFGs the loop information may be incorrect 932; and since we use that information to do the placement, we may end up 933; inserting the prologue/epilogue at incorrect places. 934; PR25988. 935; 936; CHECK-LABEL: irreducibleCFG: 937; CHECK: %entry 938; Make sure the prologue happens in the entry block. 939; CHECK-NEXT: pushq 940; ... 941; Make sure the epilogue happens in the exit block. 942; CHECK-NOT: popq 943; CHECK: popq 944; CHECK-NEXT: popq 945; CHECK-NEXT: retq 946define i32 @irreducibleCFG() #4 { 947entry: 948 %i0 = load i32, i32* @irreducibleCFGa, align 4 949 %.pr = load i8, i8* @irreducibleCFGf, align 1 950 %bool = icmp eq i8 %.pr, 0 951 br i1 %bool, label %split, label %preheader 952 953preheader: 954 br label %preheader 955 956split: 957 %i1 = load i32, i32* @irreducibleCFGb, align 4 958 %tobool1.i = icmp ne i32 %i1, 0 959 br i1 %tobool1.i, label %for.body4.i, label %for.cond8.i.preheader 960 961for.body4.i: 962 %call.i = tail call i32 (...) @something(i32 %i0) 963 br label %for.cond8 964 965for.cond8: 966 %p1 = phi i32 [ %inc18.i, %for.inc ], [ 0, %for.body4.i ] 967 %.pr1.pr = load i32, i32* @irreducibleCFGb, align 4 968 br label %for.cond8.i.preheader 969 970for.cond8.i.preheader: 971 %.pr1 = phi i32 [ %.pr1.pr, %for.cond8 ], [ %i1, %split ] 972 %p13 = phi i32 [ %p1, %for.cond8 ], [ 0, %split ] 973 br label %for.inc 974 975fn1.exit: 976 ret i32 0 977 978for.inc: 979 %inc18.i = add nuw nsw i32 %p13, 1 980 %cmp = icmp slt i32 %inc18.i, 7 981 br i1 %cmp, label %for.cond8, label %fn1.exit 982} 983 984attributes #4 = { "no-frame-pointer-elim"="true" } 985