1; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4; For @test11_pattern 5; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1] 6 7; For @test13_pattern 8; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G] 9 10target triple = "x86_64-apple-darwin10.0.0" 11 12define void @test1(i8* %Base, i64 %Size) nounwind ssp { 13bb.nph: ; preds = %entry 14 br label %for.body 15 16for.body: ; preds = %bb.nph, %for.body 17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 19 store i8 0, i8* %I.0.014, align 1 20 %indvar.next = add i64 %indvar, 1 21 %exitcond = icmp eq i64 %indvar.next, %Size 22 br i1 %exitcond, label %for.end, label %for.body 23 24for.end: ; preds = %for.body, %entry 25 ret void 26; CHECK-LABEL: @test1( 27; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 28; CHECK-NOT: store 29} 30 31; This is a loop that was rotated but where the blocks weren't merged. This 32; shouldn't perturb us. 33define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 34bb.nph: ; preds = %entry 35 br label %for.body 36 37for.body: ; preds = %bb.nph, %for.body 38 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 39 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 40 store i8 0, i8* %I.0.014, align 1 41 %indvar.next = add i64 %indvar, 1 42 br label %for.body.cont 43for.body.cont: 44 %exitcond = icmp eq i64 %indvar.next, %Size 45 br i1 %exitcond, label %for.end, label %for.body 46 47for.end: ; preds = %for.body, %entry 48 ret void 49; CHECK-LABEL: @test1a( 50; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 51; CHECK-NOT: store 52} 53 54 55define void @test2(i32* %Base, i64 %Size) nounwind ssp { 56entry: 57 %cmp10 = icmp eq i64 %Size, 0 58 br i1 %cmp10, label %for.end, label %for.body 59 60for.body: ; preds = %entry, %for.body 61 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 62 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 63 store i32 16843009, i32* %add.ptr.i, align 4 64 %inc = add nsw i64 %i.011, 1 65 %exitcond = icmp eq i64 %inc, %Size 66 br i1 %exitcond, label %for.end, label %for.body 67 68for.end: ; preds = %for.body, %entry 69 ret void 70; CHECK-LABEL: @test2( 71; CHECK: br i1 %cmp10, 72; CHECK: %0 = shl i64 %Size, 2 73; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false) 74; CHECK-NOT: store 75} 76 77; This is a case where there is an extra may-aliased store in the loop, we can't 78; promote the memset. 79define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 80entry: 81 br label %for.body 82 83for.body: ; preds = %entry, %for.body 84 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 85 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 86 store i32 16843009, i32* %add.ptr.i, align 4 87 88 store i8 42, i8* %MayAlias 89 %inc = add nsw i64 %i.011, 1 90 %exitcond = icmp eq i64 %inc, %Size 91 br i1 %exitcond, label %for.end, label %for.body 92 93for.end: ; preds = %entry 94 ret void 95; CHECK-LABEL: @test3( 96; CHECK-NOT: memset 97; CHECK: ret void 98} 99 100 101;; TODO: We should be able to promote this memset. Not yet though. 102define void @test4(i8* %Base) nounwind ssp { 103bb.nph: ; preds = %entry 104 %Base100 = getelementptr i8, i8* %Base, i64 1000 105 br label %for.body 106 107for.body: ; preds = %bb.nph, %for.body 108 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 109 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 110 store i8 0, i8* %I.0.014, align 1 111 112 ;; Store beyond the range memset, should be safe to promote. 113 store i8 42, i8* %Base100 114 115 %indvar.next = add i64 %indvar, 1 116 %exitcond = icmp eq i64 %indvar.next, 100 117 br i1 %exitcond, label %for.end, label %for.body 118 119for.end: ; preds = %for.body, %entry 120 ret void 121; CHECK-TODO-LABEL: @test4( 122; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false) 123; CHECK-TODO-NOT: store 124} 125 126; This can't be promoted: the memset is a store of a loop variant value. 127define void @test5(i8* %Base, i64 %Size) nounwind ssp { 128bb.nph: ; preds = %entry 129 br label %for.body 130 131for.body: ; preds = %bb.nph, %for.body 132 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 133 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 134 135 %V = trunc i64 %indvar to i8 136 store i8 %V, i8* %I.0.014, align 1 137 %indvar.next = add i64 %indvar, 1 138 %exitcond = icmp eq i64 %indvar.next, %Size 139 br i1 %exitcond, label %for.end, label %for.body 140 141for.end: ; preds = %for.body, %entry 142 ret void 143; CHECK-LABEL: @test5( 144; CHECK-NOT: memset 145; CHECK: ret void 146} 147 148 149;; memcpy formation 150define void @test6(i64 %Size) nounwind ssp { 151bb.nph: 152 %Base = alloca i8, i32 10000 153 %Dest = alloca i8, i32 10000 154 br label %for.body 155 156for.body: ; preds = %bb.nph, %for.body 157 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 158 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 159 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 160 %V = load i8, i8* %I.0.014, align 1 161 store i8 %V, i8* %DestI, align 1 162 %indvar.next = add i64 %indvar, 1 163 %exitcond = icmp eq i64 %indvar.next, %Size 164 br i1 %exitcond, label %for.end, label %for.body 165 166for.end: ; preds = %for.body, %entry 167 ret void 168; CHECK-LABEL: @test6( 169; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false) 170; CHECK-NOT: store 171; CHECK: ret void 172} 173 174 175; This is a loop that was rotated but where the blocks weren't merged. This 176; shouldn't perturb us. 177define void @test7(i8* %Base, i64 %Size) nounwind ssp { 178bb.nph: ; preds = %entry 179 br label %for.body 180 181for.body: ; preds = %bb.nph, %for.body 182 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 183 br label %for.body.cont 184for.body.cont: 185 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 186 store i8 0, i8* %I.0.014, align 1 187 %indvar.next = add i64 %indvar, 1 188 %exitcond = icmp eq i64 %indvar.next, %Size 189 br i1 %exitcond, label %for.end, label %for.body 190 191for.end: ; preds = %for.body, %entry 192 ret void 193; CHECK-LABEL: @test7( 194; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 195; CHECK-NOT: store 196} 197 198; This is a loop should not be transformed, it only executes one iteration. 199define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 200bb.nph: ; preds = %entry 201 br label %for.body 202 203for.body: ; preds = %bb.nph, %for.body 204 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 205 %PI = getelementptr i64, i64* %Ptr, i64 %indvar 206 store i64 0, i64 *%PI 207 %indvar.next = add i64 %indvar, 1 208 %exitcond = icmp eq i64 %indvar.next, 1 209 br i1 %exitcond, label %for.end, label %for.body 210 211for.end: ; preds = %for.body, %entry 212 ret void 213; CHECK-LABEL: @test8( 214; CHECK: store i64 0, i64* %PI 215} 216 217declare i8* @external(i8*) 218 219;; This cannot be transformed into a memcpy, because the read-from location is 220;; mutated by the loop. 221define void @test9(i64 %Size) nounwind ssp { 222bb.nph: 223 %Base = alloca i8, i32 10000 224 %Dest = alloca i8, i32 10000 225 226 %BaseAlias = call i8* @external(i8* %Base) 227 br label %for.body 228 229for.body: ; preds = %bb.nph, %for.body 230 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 231 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 232 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 233 %V = load i8, i8* %I.0.014, align 1 234 store i8 %V, i8* %DestI, align 1 235 236 ;; This store can clobber the input. 237 store i8 4, i8* %BaseAlias 238 239 %indvar.next = add i64 %indvar, 1 240 %exitcond = icmp eq i64 %indvar.next, %Size 241 br i1 %exitcond, label %for.end, label %for.body 242 243for.end: ; preds = %for.body, %entry 244 ret void 245; CHECK-LABEL: @test9( 246; CHECK-NOT: llvm.memcpy 247; CHECK: ret void 248} 249 250; Two dimensional nested loop should be promoted to one big memset. 251define void @test10(i8* %X) nounwind ssp { 252entry: 253 br label %bb.nph 254 255bb.nph: ; preds = %entry, %for.inc10 256 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 257 br label %for.body5 258 259for.body5: ; preds = %for.body5, %bb.nph 260 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 261 %mul = mul nsw i32 %i.04, 100 262 %add = add nsw i32 %j.02, %mul 263 %idxprom = sext i32 %add to i64 264 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 265 store i8 0, i8* %arrayidx, align 1 266 %inc = add nsw i32 %j.02, 1 267 %cmp4 = icmp eq i32 %inc, 100 268 br i1 %cmp4, label %for.inc10, label %for.body5 269 270for.inc10: ; preds = %for.body5 271 %inc12 = add nsw i32 %i.04, 1 272 %cmp = icmp eq i32 %inc12, 100 273 br i1 %cmp, label %for.end13, label %bb.nph 274 275for.end13: ; preds = %for.inc10 276 ret void 277; CHECK-LABEL: @test10( 278; CHECK: entry: 279; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false) 280; CHECK-NOT: store 281; CHECK: ret void 282} 283 284; On darwin10 (which is the triple in this .ll file) this loop can be turned 285; into a memset_pattern call. 286; rdar://9009151 287define void @test11_pattern(i32* nocapture %P) nounwind ssp { 288entry: 289 br label %for.body 290 291for.body: ; preds = %entry, %for.body 292 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 293 %arrayidx = getelementptr i32, i32* %P, i64 %indvar 294 store i32 1, i32* %arrayidx, align 4 295 %indvar.next = add i64 %indvar, 1 296 %exitcond = icmp eq i64 %indvar.next, 10000 297 br i1 %exitcond, label %for.end, label %for.body 298 299for.end: ; preds = %for.body 300 ret void 301; CHECK-LABEL: @test11_pattern( 302; CHECK-NEXT: entry: 303; CHECK-NEXT: bitcast 304; CHECK-NEXT: memset_pattern 305; CHECK-NOT: store 306; CHECK: ret void 307} 308 309; Store of null should turn into memset of zero. 310define void @test12(i32** nocapture %P) nounwind ssp { 311entry: 312 br label %for.body 313 314for.body: ; preds = %entry, %for.body 315 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 316 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 317 store i32* null, i32** %arrayidx, align 4 318 %indvar.next = add i64 %indvar, 1 319 %exitcond = icmp eq i64 %indvar.next, 10000 320 br i1 %exitcond, label %for.end, label %for.body 321 322for.end: ; preds = %for.body 323 ret void 324; CHECK-LABEL: @test12( 325; CHECK-NEXT: entry: 326; CHECK-NEXT: bitcast 327; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false) 328; CHECK-NOT: store 329; CHECK: ret void 330} 331 332@G = global i32 5 333 334; This store-of-address loop can be turned into a memset_pattern call. 335; rdar://9009151 336define void @test13_pattern(i32** nocapture %P) nounwind ssp { 337entry: 338 br label %for.body 339 340for.body: ; preds = %entry, %for.body 341 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 342 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 343 store i32* @G, i32** %arrayidx, align 4 344 %indvar.next = add i64 %indvar, 1 345 %exitcond = icmp eq i64 %indvar.next, 10000 346 br i1 %exitcond, label %for.end, label %for.body 347 348for.end: ; preds = %for.body 349 ret void 350; CHECK-LABEL: @test13_pattern( 351; CHECK-NEXT: entry: 352; CHECK-NEXT: bitcast 353; CHECK-NEXT: memset_pattern 354; CHECK-NOT: store 355; CHECK: ret void 356} 357 358 359 360; PR9815 - This is a partial overlap case that cannot be safely transformed 361; into a memcpy. 362@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 363 364define i32 @test14() nounwind { 365entry: 366 br label %for.body 367 368for.body: ; preds = %for.inc, %for.body.lr.ph 369 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 370 %add = add nsw i32 %tmp5, 4 371 %idxprom = sext i32 %add to i64 372 %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom 373 %tmp2 = load i32, i32* %arrayidx, align 4 374 %add4 = add nsw i32 %tmp5, 5 375 %idxprom5 = sext i32 %add4 to i64 376 %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5 377 store i32 %tmp2, i32* %arrayidx6, align 4 378 %inc = add nsw i32 %tmp5, 1 379 %cmp = icmp slt i32 %inc, 2 380 br i1 %cmp, label %for.body, label %for.end 381 382for.end: ; preds = %for.inc 383 %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4 384 ret i32 %tmp8 385; CHECK-LABEL: @test14( 386; CHECK: for.body: 387; CHECK: load i32 388; CHECK: store i32 389; CHECK: br i1 %cmp 390 391} 392 393define void @PR14241(i32* %s, i64 %size) { 394; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught 395; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy 396; instead of a memmove. If we get the memmove transform back, this will catch 397; regressions. 398; 399; CHECK-LABEL: @PR14241( 400 401entry: 402 %end.idx = add i64 %size, -1 403 %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx 404 br label %while.body 405; CHECK-NOT: memcpy 406; 407; FIXME: When we regain the ability to form a memmove here, this test should be 408; reversed and turned into a positive assertion. 409; CHECK-NOT: memmove 410 411while.body: 412 %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] 413 %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 414 %val = load i32, i32* %src.ptr, align 4 415; CHECK: load 416 %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0 417 store i32 %val, i32* %dst.ptr, align 4 418; CHECK: store 419 %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 420 %cmp = icmp eq i32* %next.ptr, %end.ptr 421 br i1 %cmp, label %exit, label %while.body 422 423exit: 424 ret void 425; CHECK: ret void 426} 427 428; Recognize loops with a negative stride. 429define void @test15(i32* nocapture %f) { 430entry: 431 br label %for.body 432 433for.body: 434 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 435 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 436 store i32 0, i32* %arrayidx, align 4 437 %indvars.iv.next = add nsw i64 %indvars.iv, -1 438 %cmp = icmp sgt i64 %indvars.iv, 0 439 br i1 %cmp, label %for.body, label %for.cond.cleanup 440 441for.cond.cleanup: 442 ret void 443; CHECK-LABEL: @test15( 444; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false) 445; CHECK-NOT: store 446; CHECK: ret void 447} 448 449; Loop with a negative stride. Verify an aliasing write to f[65536] prevents 450; the creation of a memset. 451define void @test16(i32* nocapture %f) { 452entry: 453 %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536 454 br label %for.body 455 456for.body: ; preds = %entry, %for.body 457 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 458 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 459 store i32 0, i32* %arrayidx, align 4 460 store i32 1, i32* %arrayidx1, align 4 461 %indvars.iv.next = add nsw i64 %indvars.iv, -1 462 %cmp = icmp sgt i64 %indvars.iv, 0 463 br i1 %cmp, label %for.body, label %for.cond.cleanup 464 465for.cond.cleanup: ; preds = %for.body 466 ret void 467; CHECK-LABEL: @test16( 468; CHECK-NOT: call void @llvm.memset.p0i8.i64 469; CHECK: ret void 470} 471 472; Handle memcpy-able loops with negative stride. 473define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) { 474entry: 475 %conv = sext i32 %c to i64 476 %mul = shl nsw i64 %conv, 2 477 %call = tail call noalias i8* @malloc(i64 %mul) 478 %0 = bitcast i8* %call to i32* 479 %tobool.9 = icmp eq i32 %c, 0 480 br i1 %tobool.9, label %while.end, label %while.body.preheader 481 482while.body.preheader: ; preds = %entry 483 br label %while.body 484 485while.body: ; preds = %while.body.preheader, %while.body 486 %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ] 487 %dec10 = add nsw i32 %dec10.in, -1 488 %idxprom = sext i32 %dec10 to i64 489 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom 490 %1 = load i32, i32* %arrayidx, align 4 491 %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom 492 store i32 %1, i32* %arrayidx2, align 4 493 %tobool = icmp eq i32 %dec10, 0 494 br i1 %tobool, label %while.end.loopexit, label %while.body 495 496while.end.loopexit: ; preds = %while.body 497 br label %while.end 498 499while.end: ; preds = %while.end.loopexit, %entry 500 ret i32* %0 501; CHECK-LABEL: @test17( 502; CHECK: call void @llvm.memcpy 503; CHECK: ret i32* 504} 505 506declare noalias i8* @malloc(i64) 507 508; Handle memcpy-able loops with negative stride. 509; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) { 510; for (int i = 2047; i >= 0; --i) { 511; a[i] = b[i]; 512; } 513; } 514define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 { 515entry: 516 br label %for.body 517 518for.body: ; preds = %entry, %for.body 519 %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ] 520 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 521 %0 = load i32, i32* %arrayidx, align 4 522 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 523 store i32 %0, i32* %arrayidx2, align 4 524 %indvars.iv.next = add nsw i64 %indvars.iv, -1 525 %cmp = icmp sgt i64 %indvars.iv, 0 526 br i1 %cmp, label %for.body, label %for.cond.cleanup 527 528for.cond.cleanup: ; preds = %for.body 529 ret void 530; CHECK-LABEL: @test18( 531; CHECK: call void @llvm.memcpy 532; CHECK: ret 533} 534