1; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s 2; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn<load-pre>" -enable-load-pre=false -S | FileCheck %s 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 4 5define i32 @test1(i32* %p, i1 %C) { 6; CHECK-LABEL: @test1( 7block1: 8 br i1 %C, label %block2, label %block3 9 10block2: 11 br label %block4 12; CHECK: block2: 13; CHECK-NEXT: load i32, i32* %p 14 15block3: 16 store i32 0, i32* %p 17 br label %block4 18 19block4: 20 %PRE = load i32, i32* %p 21 ret i32 %PRE 22; CHECK: block4: 23; CHECK-NEXT: phi i32 24; CHECK-NEXT: ret i32 25} 26 27; This is a simple phi translation case. 28define i32 @test2(i32* %p, i32* %q, i1 %C) { 29; CHECK-LABEL: @test2( 30block1: 31 br i1 %C, label %block2, label %block3 32 33block2: 34 br label %block4 35; CHECK: block2: 36; CHECK-NEXT: load i32, i32* %q 37 38block3: 39 store i32 0, i32* %p 40 br label %block4 41 42block4: 43 %P2 = phi i32* [%p, %block3], [%q, %block2] 44 %PRE = load i32, i32* %P2 45 ret i32 %PRE 46; CHECK: block4: 47; CHECK-NEXT: phi i32 [ 48; CHECK-NOT: load 49; CHECK: ret i32 50} 51 52; This is a PRE case that requires phi translation through a GEP. 53define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) { 54; CHECK-LABEL: @test3( 55block1: 56 %B = getelementptr i32, i32* %q, i32 1 57 store i32* %B, i32** %Hack 58 br i1 %C, label %block2, label %block3 59 60block2: 61 br label %block4 62; CHECK: block2: 63; CHECK-NEXT: load i32, i32* %B 64 65block3: 66 %A = getelementptr i32, i32* %p, i32 1 67 store i32 0, i32* %A 68 br label %block4 69 70block4: 71 %P2 = phi i32* [%p, %block3], [%q, %block2] 72 %P3 = getelementptr i32, i32* %P2, i32 1 73 %PRE = load i32, i32* %P3 74 ret i32 %PRE 75; CHECK: block4: 76; CHECK: phi i32 [ 77; CHECK-NOT: load 78; CHECK: ret i32 79} 80 81;; Here the loaded address is available, but the computation is in 'block3' 82;; which does not dominate 'block2'. 83define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) { 84; CHECK-LABEL: @test4( 85block1: 86 br i1 %C, label %block2, label %block3 87 88block2: 89 br label %block4 90; CHECK: block2: 91; CHECK: load i32, i32* 92; CHECK: br label %block4 93 94block3: 95 %B = getelementptr i32, i32* %q, i32 1 96 store i32* %B, i32** %Hack 97 98 %A = getelementptr i32, i32* %p, i32 1 99 store i32 0, i32* %A 100 br label %block4 101 102block4: 103 %P2 = phi i32* [%p, %block3], [%q, %block2] 104 %P3 = getelementptr i32, i32* %P2, i32 1 105 %PRE = load i32, i32* %P3 106 ret i32 %PRE 107; CHECK: block4: 108; CHECK: phi i32 [ 109; CHECK-NOT: load 110; CHECK: ret i32 111} 112 113;void test5(int N, double *G) { 114; int j; 115; for (j = 0; j < N - 1; j++) 116; G[j] = G[j] + G[j+1]; 117;} 118 119define void @test5(i32 %N, double* nocapture %G) nounwind ssp { 120; CHECK-LABEL: @test5( 121entry: 122 %0 = add i32 %N, -1 123 %1 = icmp sgt i32 %0, 0 124 br i1 %1, label %bb.nph, label %return 125 126bb.nph: 127 %tmp = zext i32 %0 to i64 128 br label %bb 129 130; CHECK: bb.nph: 131; CHECK: load double, double* 132; CHECK: br label %bb 133 134bb: 135 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] 136 %tmp6 = add i64 %indvar, 1 137 %scevgep = getelementptr double, double* %G, i64 %tmp6 138 %scevgep7 = getelementptr double, double* %G, i64 %indvar 139 %2 = load double, double* %scevgep7, align 8 140 %3 = load double, double* %scevgep, align 8 141 %4 = fadd double %2, %3 142 store double %4, double* %scevgep7, align 8 143 %exitcond = icmp eq i64 %tmp6, %tmp 144 br i1 %exitcond, label %return, label %bb 145 146; Should only be one load in the loop. 147; CHECK: bb: 148; CHECK: load double, double* 149; CHECK-NOT: load double, double* 150; CHECK: br i1 %exitcond 151 152return: 153 ret void 154} 155 156;void test6(int N, double *G) { 157; int j; 158; for (j = 0; j < N - 1; j++) 159; G[j+1] = G[j] + G[j+1]; 160;} 161 162define void @test6(i32 %N, double* nocapture %G) nounwind ssp { 163; CHECK-LABEL: @test6( 164entry: 165 %0 = add i32 %N, -1 166 %1 = icmp sgt i32 %0, 0 167 br i1 %1, label %bb.nph, label %return 168 169bb.nph: 170 %tmp = zext i32 %0 to i64 171 br label %bb 172 173; CHECK: bb.nph: 174; CHECK: load double, double* 175; CHECK: br label %bb 176 177bb: 178 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] 179 %tmp6 = add i64 %indvar, 1 180 %scevgep = getelementptr double, double* %G, i64 %tmp6 181 %scevgep7 = getelementptr double, double* %G, i64 %indvar 182 %2 = load double, double* %scevgep7, align 8 183 %3 = load double, double* %scevgep, align 8 184 %4 = fadd double %2, %3 185 store double %4, double* %scevgep, align 8 186 %exitcond = icmp eq i64 %tmp6, %tmp 187 br i1 %exitcond, label %return, label %bb 188 189; Should only be one load in the loop. 190; CHECK: bb: 191; CHECK: load double, double* 192; CHECK-NOT: load double, double* 193; CHECK: br i1 %exitcond 194 195return: 196 ret void 197} 198 199;void test7(int N, double* G) { 200; long j; 201; G[1] = 1; 202; for (j = 1; j < N - 1; j++) 203; G[j+1] = G[j] + G[j+1]; 204;} 205 206; This requires phi translation of the adds. 207define void @test7(i32 %N, double* nocapture %G) nounwind ssp { 208entry: 209 %0 = getelementptr inbounds double, double* %G, i64 1 210 store double 1.000000e+00, double* %0, align 8 211 %1 = add i32 %N, -1 212 %2 = icmp sgt i32 %1, 1 213 br i1 %2, label %bb.nph, label %return 214 215bb.nph: 216 %tmp = sext i32 %1 to i64 217 %tmp7 = add i64 %tmp, -1 218 br label %bb 219 220bb: 221 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 222 %tmp8 = add i64 %indvar, 2 223 %scevgep = getelementptr double, double* %G, i64 %tmp8 224 %tmp9 = add i64 %indvar, 1 225 %scevgep10 = getelementptr double, double* %G, i64 %tmp9 226 %3 = load double, double* %scevgep10, align 8 227 %4 = load double, double* %scevgep, align 8 228 %5 = fadd double %3, %4 229 store double %5, double* %scevgep, align 8 230 %exitcond = icmp eq i64 %tmp9, %tmp7 231 br i1 %exitcond, label %return, label %bb 232 233; Should only be one load in the loop. 234; CHECK: bb: 235; CHECK: load double, double* 236; CHECK-NOT: load double, double* 237; CHECK: br i1 %exitcond 238 239return: 240 ret void 241} 242 243;; Here the loaded address isn't available in 'block2' at all, requiring a new 244;; GEP to be inserted into it. 245define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) { 246; CHECK-LABEL: @test8( 247block1: 248 br i1 %C, label %block2, label %block3 249 250block2: 251 br label %block4 252; CHECK: block2: 253; CHECK: load i32, i32* 254; CHECK: br label %block4 255 256block3: 257 %A = getelementptr i32, i32* %p, i32 1 258 store i32 0, i32* %A 259 br label %block4 260 261block4: 262 %P2 = phi i32* [%p, %block3], [%q, %block2] 263 %P3 = getelementptr i32, i32* %P2, i32 1 264 %PRE = load i32, i32* %P3 265 ret i32 %PRE 266; CHECK: block4: 267; CHECK: phi i32 [ 268; CHECK-NOT: load 269; CHECK: ret i32 270} 271 272;void test9(int N, double* G) { 273; long j; 274; for (j = 1; j < N - 1; j++) 275; G[j+1] = G[j] + G[j+1]; 276;} 277 278; This requires phi translation of the adds. 279define void @test9(i32 %N, double* nocapture %G) nounwind ssp { 280entry: 281 add i32 0, 0 282 %1 = add i32 %N, -1 283 %2 = icmp sgt i32 %1, 1 284 br i1 %2, label %bb.nph, label %return 285 286bb.nph: 287 %tmp = sext i32 %1 to i64 288 %tmp7 = add i64 %tmp, -1 289 br label %bb 290 291; CHECK: bb.nph: 292; CHECK: load double, double* 293; CHECK: br label %bb 294 295bb: 296 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 297 %tmp8 = add i64 %indvar, 2 298 %scevgep = getelementptr double, double* %G, i64 %tmp8 299 %tmp9 = add i64 %indvar, 1 300 %scevgep10 = getelementptr double, double* %G, i64 %tmp9 301 %3 = load double, double* %scevgep10, align 8 302 %4 = load double, double* %scevgep, align 8 303 %5 = fadd double %3, %4 304 store double %5, double* %scevgep, align 8 305 %exitcond = icmp eq i64 %tmp9, %tmp7 306 br i1 %exitcond, label %return, label %bb 307 308; Should only be one load in the loop. 309; CHECK: bb: 310; CHECK: load double, double* 311; CHECK-NOT: load double, double* 312; CHECK: br i1 %exitcond 313 314return: 315 ret void 316} 317 318;void test10(int N, double* G) { 319; long j; 320; for (j = 1; j < N - 1; j++) 321; G[j] = G[j] + G[j+1] + G[j-1]; 322;} 323 324; PR5501 325define void @test10(i32 %N, double* nocapture %G) nounwind ssp { 326entry: 327 %0 = add i32 %N, -1 328 %1 = icmp sgt i32 %0, 1 329 br i1 %1, label %bb.nph, label %return 330 331bb.nph: 332 %tmp = sext i32 %0 to i64 333 %tmp8 = add i64 %tmp, -1 334 br label %bb 335; CHECK: bb.nph: 336; CHECK: load double, double* 337; CHECK: load double, double* 338; CHECK: br label %bb 339 340 341bb: 342 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ] 343 %scevgep = getelementptr double, double* %G, i64 %indvar 344 %tmp9 = add i64 %indvar, 2 345 %scevgep10 = getelementptr double, double* %G, i64 %tmp9 346 %tmp11 = add i64 %indvar, 1 347 %scevgep12 = getelementptr double, double* %G, i64 %tmp11 348 %2 = load double, double* %scevgep12, align 8 349 %3 = load double, double* %scevgep10, align 8 350 %4 = fadd double %2, %3 351 %5 = load double, double* %scevgep, align 8 352 %6 = fadd double %4, %5 353 store double %6, double* %scevgep12, align 8 354 %exitcond = icmp eq i64 %tmp11, %tmp8 355 br i1 %exitcond, label %return, label %bb 356 357; Should only be one load in the loop. 358; CHECK: bb: 359; CHECK: load double, double* 360; CHECK-NOT: load double, double* 361; CHECK: br i1 %exitcond 362 363return: 364 ret void 365} 366 367; Test critical edge splitting. 368define i32 @test11(i32* %p, i1 %C, i32 %N) { 369; CHECK-LABEL: @test11( 370block1: 371 br i1 %C, label %block2, label %block3 372 373block2: 374 %cond = icmp sgt i32 %N, 1 375 br i1 %cond, label %block4, label %block5 376; CHECK: load i32, i32* %p 377; CHECK-NEXT: br label %block4 378 379block3: 380 store i32 0, i32* %p 381 br label %block4 382 383block4: 384 %PRE = load i32, i32* %p 385 br label %block5 386 387block5: 388 %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ] 389 ret i32 %ret 390; CHECK: block4: 391; CHECK-NEXT: phi i32 392} 393 394declare void @f() 395declare void @g(i32) 396declare i32 @__CxxFrameHandler3(...) 397 398; Test that loads aren't PRE'd into EH pads. 399define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 { 400; CHECK-LABEL: @test12( 401block1: 402 invoke void @f() 403 to label %block2 unwind label %catch.dispatch 404 405block2: 406 invoke void @f() 407 to label %block3 unwind label %cleanup 408 409block3: 410 ret void 411 412catch.dispatch: 413 %cs1 = catchswitch within none [label %catch] unwind label %cleanup2 414 415catch: 416 %c = catchpad within %cs1 [] 417 catchret from %c to label %block2 418 419cleanup: 420 %c1 = cleanuppad within none [] 421 store i32 0, i32* %p 422 cleanupret from %c1 unwind label %cleanup2 423 424; CHECK: cleanup2: 425; CHECK-NOT: phi 426; CHECK-NEXT: %c2 = cleanuppad within none [] 427; CHECK-NEXT: %NOTPRE = load i32, i32* %p 428cleanup2: 429 %c2 = cleanuppad within none [] 430 %NOTPRE = load i32, i32* %p 431 call void @g(i32 %NOTPRE) 432 cleanupret from %c2 unwind to caller 433} 434 435; Don't PRE load across potentially throwing calls. 436 437define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { 438 439; CHECK-LABEL: @test13( 440; CHECK: entry: 441; CHECK-NEXT: icmp eq 442; CHECK-NEXT: br i1 443 444entry: 445 %tobool = icmp eq i32 %a, 0 446 br i1 %tobool, label %if.end, label %if.then 447 448; CHECK: if.then: 449; CHECK-NEXT: load i32 450; CHECK-NEXT: store i32 451 452if.then: 453 %uu = load i32, i32* %x, align 4 454 store i32 %uu, i32* %r, align 4 455 br label %if.end 456 457; CHECK: if.end: 458; CHECK-NEXT: call void @f() 459; CHECK-NEXT: load i32 460 461if.end: 462 call void @f() 463 %vv = load i32, i32* %x, align 4 464 ret i32 %vv 465} 466 467; Same as test13, but now the blocking function is not immediately in load's 468; block. 469 470define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { 471 472; CHECK-LABEL: @test14( 473; CHECK: entry: 474; CHECK-NEXT: icmp eq 475; CHECK-NEXT: br i1 476 477entry: 478 %tobool = icmp eq i32 %a, 0 479 br i1 %tobool, label %if.end, label %if.then 480 481; CHECK: if.then: 482; CHECK-NEXT: load i32 483; CHECK-NEXT: store i32 484 485if.then: 486 %uu = load i32, i32* %x, align 4 487 store i32 %uu, i32* %r, align 4 488 br label %if.end 489 490; CHECK: if.end: 491; CHECK-NEXT: call void @f() 492; CHECK-NEXT: load i32 493 494if.end: 495 call void @f() 496 br label %follow_1 497 498follow_1: 499 br label %follow_2 500 501follow_2: 502 %vv = load i32, i32* %x, align 4 503 ret i32 %vv 504} 505 506; Same as test13, but %x here is dereferenceable. A pointer that is 507; dereferenceable can be loaded from speculatively without a risk of trapping. 508; Since it is OK to speculate, PRE is allowed. 509 510define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) { 511 512; CHECK-LABEL: @test15 513; CHECK: entry: 514; CHECK-NEXT: icmp eq 515; CHECK-NEXT: br i1 516 517entry: 518 %tobool = icmp eq i32 %a, 0 519 br i1 %tobool, label %if.end, label %if.then 520 521; CHECK: entry.if.end_crit_edge: 522; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4 523; CHECK-NEXT: br label %if.end 524 525if.then: 526 %uu = load i32, i32* %x, align 4 527 store i32 %uu, i32* %r, align 4 528 br label %if.end 529 530; CHECK: if.then: 531; CHECK-NEXT: %uu = load i32, i32* %x, align 4 532; CHECK-NEXT: store i32 %uu, i32* %r, align 4 533; CHECK-NEXT: br label %if.end 534 535if.end: 536 call void @f() 537 %vv = load i32, i32* %x, align 4 538 ret i32 %vv 539 540; CHECK: if.end: 541; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ] 542; CHECK-NEXT: call void @f() 543; CHECK-NEXT: ret i32 %vv 544 545} 546 547; Same as test14, but %x here is dereferenceable. A pointer that is 548; dereferenceable can be loaded from speculatively without a risk of trapping. 549; Since it is OK to speculate, PRE is allowed. 550 551define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) { 552 553; CHECK-LABEL: @test16( 554; CHECK: entry: 555; CHECK-NEXT: icmp eq 556; CHECK-NEXT: br i1 557 558entry: 559 %tobool = icmp eq i32 %a, 0 560 br i1 %tobool, label %if.end, label %if.then 561 562; CHECK: entry.if.end_crit_edge: 563; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4 564; CHECK-NEXT: br label %if.end 565 566if.then: 567 %uu = load i32, i32* %x, align 4 568 store i32 %uu, i32* %r, align 4 569 br label %if.end 570 571; CHECK: if.then: 572; CHECK-NEXT: %uu = load i32, i32* %x, align 4 573; CHECK-NEXT: store i32 %uu, i32* %r, align 4 574; CHECK-NEXT: br label %if.end 575 576if.end: 577 call void @f() 578 br label %follow_1 579 580; CHECK: if.end: 581; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ] 582; CHECK-NEXT: call void @f() 583; CHECK-NEXT: ret i32 %vv 584 585follow_1: 586 br label %follow_2 587 588follow_2: 589 %vv = load i32, i32* %x, align 4 590 ret i32 %vv 591} 592