1; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4define i32 @test1(i32* %p, i1 %C) { 5; CHECK: @test1 6block1: 7 br i1 %C, label %block2, label %block3 8 9block2: 10 br label %block4 11; CHECK: block2: 12; CHECK-NEXT: load i32* %p 13 14block3: 15 store i32 0, i32* %p 16 br label %block4 17 18block4: 19 %PRE = load i32* %p 20 ret i32 %PRE 21; CHECK: block4: 22; CHECK-NEXT: phi i32 23; CHECK-NEXT: ret i32 24} 25 26; This is a simple phi translation case. 27define i32 @test2(i32* %p, i32* %q, i1 %C) { 28; CHECK: @test2 29block1: 30 br i1 %C, label %block2, label %block3 31 32block2: 33 br label %block4 34; CHECK: block2: 35; CHECK-NEXT: load i32* %q 36 37block3: 38 store i32 0, i32* %p 39 br label %block4 40 41block4: 42 %P2 = phi i32* [%p, %block3], [%q, %block2] 43 %PRE = load i32* %P2 44 ret i32 %PRE 45; CHECK: block4: 46; CHECK-NEXT: phi i32 [ 47; CHECK-NOT: load 48; CHECK: ret i32 49} 50 51; This is a PRE case that requires phi translation through a GEP. 52define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) { 53; CHECK: @test3 54block1: 55 %B = getelementptr i32* %q, i32 1 56 store i32* %B, i32** %Hack 57 br i1 %C, label %block2, label %block3 58 59block2: 60 br label %block4 61; CHECK: block2: 62; CHECK-NEXT: load i32* %B 63 64block3: 65 %A = getelementptr i32* %p, i32 1 66 store i32 0, i32* %A 67 br label %block4 68 69block4: 70 %P2 = phi i32* [%p, %block3], [%q, %block2] 71 %P3 = getelementptr i32* %P2, i32 1 72 %PRE = load i32* %P3 73 ret i32 %PRE 74; CHECK: block4: 75; CHECK-NEXT: phi i32 [ 76; CHECK-NOT: load 77; CHECK: ret i32 78} 79 80;; Here the loaded address is available, but the computation is in 'block3' 81;; which does not dominate 'block2'. 82define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) { 83; CHECK: @test4 84block1: 85 br i1 %C, label %block2, label %block3 86 87block2: 88 br label %block4 89; CHECK: block2: 90; CHECK: load i32* 91; CHECK: br label %block4 92 93block3: 94 %B = getelementptr i32* %q, i32 1 95 store i32* %B, i32** %Hack 96 97 %A = getelementptr i32* %p, i32 1 98 store i32 0, i32* %A 99 br label %block4 100 101block4: 102 %P2 = phi i32* [%p, %block3], [%q, %block2] 103 %P3 = getelementptr i32* %P2, i32 1 104 %PRE = load i32* %P3 105 ret i32 %PRE 106; CHECK: block4: 107; CHECK-NEXT: phi i32 [ 108; CHECK-NOT: load 109; CHECK: ret i32 110} 111 112;void test5(int N, double *G) { 113; int j; 114; for (j = 0; j < N - 1; j++) 115; G[j] = G[j] + G[j+1]; 116;} 117 118define void @test5(i32 %N, double* nocapture %G) nounwind ssp { 119; CHECK: @test5 120entry: 121 %0 = add i32 %N, -1 122 %1 = icmp sgt i32 %0, 0 123 br i1 %1, label %bb.nph, label %return 124 125bb.nph: 126 %tmp = zext i32 %0 to i64 127 br label %bb 128 129; CHECK: bb.nph: 130; CHECK: load double* 131; CHECK: br label %bb 132 133bb: 134 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] 135 %tmp6 = add i64 %indvar, 1 136 %scevgep = getelementptr double* %G, i64 %tmp6 137 %scevgep7 = getelementptr double* %G, i64 %indvar 138 %2 = load double* %scevgep7, align 8 139 %3 = load double* %scevgep, align 8 140 %4 = fadd double %2, %3 141 store double %4, double* %scevgep7, align 8 142 %exitcond = icmp eq i64 %tmp6, %tmp 143 br i1 %exitcond, label %return, label %bb 144 145; Should only be one load in the loop. 146; CHECK: bb: 147; CHECK: load double* 148; CHECK-NOT: load double* 149; CHECK: br i1 %exitcond 150 151return: 152 ret void 153} 154 155;void test6(int N, double *G) { 156; int j; 157; for (j = 0; j < N - 1; j++) 158; G[j+1] = G[j] + G[j+1]; 159;} 160 161define void @test6(i32 %N, double* nocapture %G) nounwind ssp { 162; CHECK: @test6 163entry: 164 %0 = add i32 %N, -1 165 %1 = icmp sgt i32 %0, 0 166 br i1 %1, label %bb.nph, label %return 167 168bb.nph: 169 %tmp = zext i32 %0 to i64 170 br label %bb 171 172; CHECK: bb.nph: 173; CHECK: load double* 174; CHECK: br label %bb 175 176bb: 177 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] 178 %tmp6 = add i64 %indvar, 1 179 %scevgep = getelementptr double* %G, i64 %tmp6 180 %scevgep7 = getelementptr double* %G, i64 %indvar 181 %2 = load double* %scevgep7, align 8 182 %3 = load double* %scevgep, align 8 183 %4 = fadd double %2, %3 184 store double %4, double* %scevgep, align 8 185 %exitcond = icmp eq i64 %tmp6, %tmp 186 br i1 %exitcond, label %return, label %bb 187 188; Should only be one load in the loop. 189; CHECK: bb: 190; CHECK: load double* 191; CHECK-NOT: load double* 192; CHECK: br i1 %exitcond 193 194return: 195 ret void 196} 197 198;void test7(int N, double* G) { 199; long j; 200; G[1] = 1; 201; for (j = 1; j < N - 1; j++) 202; G[j+1] = G[j] + G[j+1]; 203;} 204 205; This requires phi translation of the adds. 206define void @test7(i32 %N, double* nocapture %G) nounwind ssp { 207entry: 208 %0 = getelementptr inbounds double* %G, i64 1 209 store double 1.000000e+00, double* %0, align 8 210 %1 = add i32 %N, -1 211 %2 = icmp sgt i32 %1, 1 212 br i1 %2, label %bb.nph, label %return 213 214bb.nph: 215 %tmp = sext i32 %1 to i64 216 %tmp7 = add i64 %tmp, -1 217 br label %bb 218 219bb: 220 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 221 %tmp8 = add i64 %indvar, 2 222 %scevgep = getelementptr double* %G, i64 %tmp8 223 %tmp9 = add i64 %indvar, 1 224 %scevgep10 = getelementptr double* %G, i64 %tmp9 225 %3 = load double* %scevgep10, align 8 226 %4 = load double* %scevgep, align 8 227 %5 = fadd double %3, %4 228 store double %5, double* %scevgep, align 8 229 %exitcond = icmp eq i64 %tmp9, %tmp7 230 br i1 %exitcond, label %return, label %bb 231 232; Should only be one load in the loop. 233; CHECK: bb: 234; CHECK: load double* 235; CHECK-NOT: load double* 236; CHECK: br i1 %exitcond 237 238return: 239 ret void 240} 241 242;; Here the loaded address isn't available in 'block2' at all, requiring a new 243;; GEP to be inserted into it. 244define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) { 245; CHECK: @test8 246block1: 247 br i1 %C, label %block2, label %block3 248 249block2: 250 br label %block4 251; CHECK: block2: 252; CHECK: load i32* 253; CHECK: br label %block4 254 255block3: 256 %A = getelementptr i32* %p, i32 1 257 store i32 0, i32* %A 258 br label %block4 259 260block4: 261 %P2 = phi i32* [%p, %block3], [%q, %block2] 262 %P3 = getelementptr i32* %P2, i32 1 263 %PRE = load i32* %P3 264 ret i32 %PRE 265; CHECK: block4: 266; CHECK-NEXT: phi i32 [ 267; CHECK-NOT: load 268; CHECK: ret i32 269} 270 271;void test9(int N, double* G) { 272; long j; 273; for (j = 1; j < N - 1; j++) 274; G[j+1] = G[j] + G[j+1]; 275;} 276 277; This requires phi translation of the adds. 278define void @test9(i32 %N, double* nocapture %G) nounwind ssp { 279entry: 280 add i32 0, 0 281 %1 = add i32 %N, -1 282 %2 = icmp sgt i32 %1, 1 283 br i1 %2, label %bb.nph, label %return 284 285bb.nph: 286 %tmp = sext i32 %1 to i64 287 %tmp7 = add i64 %tmp, -1 288 br label %bb 289 290; CHECK: bb.nph: 291; CHECK: load double* 292; CHECK: br label %bb 293 294bb: 295 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 296 %tmp8 = add i64 %indvar, 2 297 %scevgep = getelementptr double* %G, i64 %tmp8 298 %tmp9 = add i64 %indvar, 1 299 %scevgep10 = getelementptr double* %G, i64 %tmp9 300 %3 = load double* %scevgep10, align 8 301 %4 = load double* %scevgep, align 8 302 %5 = fadd double %3, %4 303 store double %5, double* %scevgep, align 8 304 %exitcond = icmp eq i64 %tmp9, %tmp7 305 br i1 %exitcond, label %return, label %bb 306 307; Should only be one load in the loop. 308; CHECK: bb: 309; CHECK: load double* 310; CHECK-NOT: load double* 311; CHECK: br i1 %exitcond 312 313return: 314 ret void 315} 316 317;void test10(int N, double* G) { 318; long j; 319; for (j = 1; j < N - 1; j++) 320; G[j] = G[j] + G[j+1] + G[j-1]; 321;} 322 323; PR5501 324define void @test10(i32 %N, double* nocapture %G) nounwind ssp { 325entry: 326 %0 = add i32 %N, -1 327 %1 = icmp sgt i32 %0, 1 328 br i1 %1, label %bb.nph, label %return 329 330bb.nph: 331 %tmp = sext i32 %0 to i64 332 %tmp8 = add i64 %tmp, -1 333 br label %bb 334; CHECK: bb.nph: 335; CHECK: load double* 336; CHECK: load double* 337; CHECK: br label %bb 338 339 340bb: 341 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ] 342 %scevgep = getelementptr double* %G, i64 %indvar 343 %tmp9 = add i64 %indvar, 2 344 %scevgep10 = getelementptr double* %G, i64 %tmp9 345 %tmp11 = add i64 %indvar, 1 346 %scevgep12 = getelementptr double* %G, i64 %tmp11 347 %2 = load double* %scevgep12, align 8 348 %3 = load double* %scevgep10, align 8 349 %4 = fadd double %2, %3 350 %5 = load double* %scevgep, align 8 351 %6 = fadd double %4, %5 352 store double %6, double* %scevgep12, align 8 353 %exitcond = icmp eq i64 %tmp11, %tmp8 354 br i1 %exitcond, label %return, label %bb 355 356; Should only be one load in the loop. 357; CHECK: bb: 358; CHECK: load double* 359; CHECK-NOT: load double* 360; CHECK: br i1 %exitcond 361 362return: 363 ret void 364} 365 366; Test critical edge splitting. 367define i32 @test11(i32* %p, i1 %C, i32 %N) { 368; CHECK: @test11 369block1: 370 br i1 %C, label %block2, label %block3 371 372block2: 373 %cond = icmp sgt i32 %N, 1 374 br i1 %cond, label %block4, label %block5 375; CHECK: load i32* %p 376; CHECK-NEXT: br label %block4 377 378block3: 379 store i32 0, i32* %p 380 br label %block4 381 382block4: 383 %PRE = load i32* %p 384 br label %block5 385 386block5: 387 %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ] 388 ret i32 %ret 389; CHECK: block4: 390; CHECK-NEXT: phi i32 391} 392