1; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4; For @test11_pattern 5; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1] 6 7; For @test13_pattern 8; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G] 9 10target triple = "x86_64-apple-darwin10.0.0" 11 12define void @test1(i8* %Base, i64 %Size) nounwind ssp { 13bb.nph: ; preds = %entry 14 br label %for.body 15 16for.body: ; preds = %bb.nph, %for.body 17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 19 store i8 0, i8* %I.0.014, align 1 20 %indvar.next = add i64 %indvar, 1 21 %exitcond = icmp eq i64 %indvar.next, %Size 22 br i1 %exitcond, label %for.end, label %for.body 23 24for.end: ; preds = %for.body, %entry 25 ret void 26; CHECK-LABEL: @test1( 27; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 28; CHECK-NOT: store 29} 30 31; Make sure memset is formed for larger than 1 byte stores, and that the 32; alignment of the store is preserved 33define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp { 34bb.nph: ; preds = %entry 35 br label %for.body 36 37for.body: ; preds = %bb.nph, %for.body 38 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 39 %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar 40 store i16 0, i16* %I.0.014, align 2 41 %indvar.next = add i64 %indvar, 1 42 %exitcond = icmp eq i64 %indvar.next, %Size 43 br i1 %exitcond, label %for.end, label %for.body 44 45for.end: ; preds = %for.body, %entry 46 ret void 47; CHECK-LABEL: @test1_i16( 48; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8* 49; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1 50; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false) 51; CHECK-NOT: store 52} 53 54; This is a loop that was rotated but where the blocks weren't merged. This 55; shouldn't perturb us. 56define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 57bb.nph: ; preds = %entry 58 br label %for.body 59 60for.body: ; preds = %bb.nph, %for.body 61 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 62 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 63 store i8 0, i8* %I.0.014, align 1 64 %indvar.next = add i64 %indvar, 1 65 br label %for.body.cont 66for.body.cont: 67 %exitcond = icmp eq i64 %indvar.next, %Size 68 br i1 %exitcond, label %for.end, label %for.body 69 70for.end: ; preds = %for.body, %entry 71 ret void 72; CHECK-LABEL: @test1a( 73; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 74; CHECK-NOT: store 75} 76 77 78define void @test2(i32* %Base, i64 %Size) nounwind ssp { 79entry: 80 %cmp10 = icmp eq i64 %Size, 0 81 br i1 %cmp10, label %for.end, label %for.body 82 83for.body: ; preds = %entry, %for.body 84 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 85 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 86 store i32 16843009, i32* %add.ptr.i, align 4 87 %inc = add nsw i64 %i.011, 1 88 %exitcond = icmp eq i64 %inc, %Size 89 br i1 %exitcond, label %for.end, label %for.body 90 91for.end: ; preds = %for.body, %entry 92 ret void 93; CHECK-LABEL: @test2( 94; CHECK: br i1 %cmp10, 95; CHECK: %0 = shl i64 %Size, 2 96; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false) 97; CHECK-NOT: store 98} 99 100; This is a case where there is an extra may-aliased store in the loop, we can't 101; promote the memset. 102define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 103entry: 104 br label %for.body 105 106for.body: ; preds = %entry, %for.body 107 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 108 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 109 store i32 16843009, i32* %add.ptr.i, align 4 110 111 store i8 42, i8* %MayAlias 112 %inc = add nsw i64 %i.011, 1 113 %exitcond = icmp eq i64 %inc, %Size 114 br i1 %exitcond, label %for.end, label %for.body 115 116for.end: ; preds = %entry 117 ret void 118; CHECK-LABEL: @test3( 119; CHECK-NOT: memset 120; CHECK: ret void 121} 122 123; Make sure the first store in the loop is turned into a memset. 124define void @test4(i8* %Base) nounwind ssp { 125bb.nph: ; preds = %entry 126 %Base100 = getelementptr i8, i8* %Base, i64 1000 127 br label %for.body 128 129for.body: ; preds = %bb.nph, %for.body 130 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 131 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 132 store i8 0, i8* %I.0.014, align 1 133 134 ;; Store beyond the range memset, should be safe to promote. 135 store i8 42, i8* %Base100 136 137 %indvar.next = add i64 %indvar, 1 138 %exitcond = icmp eq i64 %indvar.next, 100 139 br i1 %exitcond, label %for.end, label %for.body 140 141for.end: ; preds = %for.body, %entry 142 ret void 143; CHECK-LABEL: @test4( 144; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 100, i1 false) 145} 146 147; This can't be promoted: the memset is a store of a loop variant value. 148define void @test5(i8* %Base, i64 %Size) nounwind ssp { 149bb.nph: ; preds = %entry 150 br label %for.body 151 152for.body: ; preds = %bb.nph, %for.body 153 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 154 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 155 156 %V = trunc i64 %indvar to i8 157 store i8 %V, i8* %I.0.014, align 1 158 %indvar.next = add i64 %indvar, 1 159 %exitcond = icmp eq i64 %indvar.next, %Size 160 br i1 %exitcond, label %for.end, label %for.body 161 162for.end: ; preds = %for.body, %entry 163 ret void 164; CHECK-LABEL: @test5( 165; CHECK-NOT: memset 166; CHECK: ret void 167} 168 169 170;; memcpy formation 171define void @test6(i64 %Size) nounwind ssp { 172bb.nph: 173 %Base = alloca i8, i32 10000 174 %Dest = alloca i8, i32 10000 175 br label %for.body 176 177for.body: ; preds = %bb.nph, %for.body 178 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 179 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 180 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 181 %V = load i8, i8* %I.0.014, align 1 182 store i8 %V, i8* %DestI, align 1 183 %indvar.next = add i64 %indvar, 1 184 %exitcond = icmp eq i64 %indvar.next, %Size 185 br i1 %exitcond, label %for.end, label %for.body 186 187for.end: ; preds = %for.body, %entry 188 ret void 189; CHECK-LABEL: @test6( 190; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i1 false) 191; CHECK-NOT: store 192; CHECK: ret void 193} 194 195;; memcpy formation, check alignment 196define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp { 197bb.nph: 198 br label %for.body 199 200for.body: ; preds = %bb.nph, %for.body 201 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 202 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 203 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 204 %V = load i32, i32* %I.0.014, align 1 205 store i32 %V, i32* %DestI, align 4 206 %indvar.next = add i64 %indvar, 1 207 %exitcond = icmp eq i64 %indvar.next, %Size 208 br i1 %exitcond, label %for.end, label %for.body 209 210for.end: ; preds = %for.body, %entry 211 ret void 212; CHECK-LABEL: @test6_dest_align( 213; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8* 214; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8* 215; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2 216; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false) 217; CHECK-NOT: store 218; CHECK: ret void 219} 220 221;; memcpy formation, check alignment 222define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp { 223bb.nph: 224 br label %for.body 225 226for.body: ; preds = %bb.nph, %for.body 227 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 228 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 229 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 230 %V = load i32, i32* %I.0.014, align 4 231 store i32 %V, i32* %DestI, align 1 232 %indvar.next = add i64 %indvar, 1 233 %exitcond = icmp eq i64 %indvar.next, %Size 234 br i1 %exitcond, label %for.end, label %for.body 235 236for.end: ; preds = %for.body, %entry 237 ret void 238; CHECK-LABEL: @test6_src_align( 239; CHECK: %[[Dst]] = bitcast i32* %Dest to i8* 240; CHECK: %[[Src]] = bitcast i32* %Base to i8* 241; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2 242; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false) 243; CHECK-NOT: store 244; CHECK: ret void 245} 246 247 248; This is a loop that was rotated but where the blocks weren't merged. This 249; shouldn't perturb us. 250define void @test7(i8* %Base, i64 %Size) nounwind ssp { 251bb.nph: ; preds = %entry 252 br label %for.body 253 254for.body: ; preds = %bb.nph, %for.body 255 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 256 br label %for.body.cont 257for.body.cont: 258 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 259 store i8 0, i8* %I.0.014, align 1 260 %indvar.next = add i64 %indvar, 1 261 %exitcond = icmp eq i64 %indvar.next, %Size 262 br i1 %exitcond, label %for.end, label %for.body 263 264for.end: ; preds = %for.body, %entry 265 ret void 266; CHECK-LABEL: @test7( 267; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 268; CHECK-NOT: store 269} 270 271; This is a loop should not be transformed, it only executes one iteration. 272define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 273bb.nph: ; preds = %entry 274 br label %for.body 275 276for.body: ; preds = %bb.nph, %for.body 277 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 278 %PI = getelementptr i64, i64* %Ptr, i64 %indvar 279 store i64 0, i64 *%PI 280 %indvar.next = add i64 %indvar, 1 281 %exitcond = icmp eq i64 %indvar.next, 1 282 br i1 %exitcond, label %for.end, label %for.body 283 284for.end: ; preds = %for.body, %entry 285 ret void 286; CHECK-LABEL: @test8( 287; CHECK: store i64 0, i64* %PI 288} 289 290declare i8* @external(i8*) 291 292;; This cannot be transformed into a memcpy, because the read-from location is 293;; mutated by the loop. 294define void @test9(i64 %Size) nounwind ssp { 295bb.nph: 296 %Base = alloca i8, i32 10000 297 %Dest = alloca i8, i32 10000 298 299 %BaseAlias = call i8* @external(i8* %Base) 300 br label %for.body 301 302for.body: ; preds = %bb.nph, %for.body 303 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 304 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 305 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 306 %V = load i8, i8* %I.0.014, align 1 307 store i8 %V, i8* %DestI, align 1 308 309 ;; This store can clobber the input. 310 store i8 4, i8* %BaseAlias 311 312 %indvar.next = add i64 %indvar, 1 313 %exitcond = icmp eq i64 %indvar.next, %Size 314 br i1 %exitcond, label %for.end, label %for.body 315 316for.end: ; preds = %for.body, %entry 317 ret void 318; CHECK-LABEL: @test9( 319; CHECK-NOT: llvm.memcpy 320; CHECK: ret void 321} 322 323; Two dimensional nested loop should be promoted to one big memset. 324define void @test10(i8* %X) nounwind ssp { 325entry: 326 br label %bb.nph 327 328bb.nph: ; preds = %entry, %for.inc10 329 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 330 br label %for.body5 331 332for.body5: ; preds = %for.body5, %bb.nph 333 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 334 %mul = mul nsw i32 %i.04, 100 335 %add = add nsw i32 %j.02, %mul 336 %idxprom = sext i32 %add to i64 337 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 338 store i8 0, i8* %arrayidx, align 1 339 %inc = add nsw i32 %j.02, 1 340 %cmp4 = icmp eq i32 %inc, 100 341 br i1 %cmp4, label %for.inc10, label %for.body5 342 343for.inc10: ; preds = %for.body5 344 %inc12 = add nsw i32 %i.04, 1 345 %cmp = icmp eq i32 %inc12, 100 346 br i1 %cmp, label %for.end13, label %bb.nph 347 348for.end13: ; preds = %for.inc10 349 ret void 350; CHECK-LABEL: @test10( 351; CHECK: entry: 352; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false) 353; CHECK-NOT: store 354; CHECK: ret void 355} 356 357; On darwin10 (which is the triple in this .ll file) this loop can be turned 358; into a memset_pattern call. 359; rdar://9009151 360define void @test11_pattern(i32* nocapture %P) nounwind ssp { 361entry: 362 br label %for.body 363 364for.body: ; preds = %entry, %for.body 365 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 366 %arrayidx = getelementptr i32, i32* %P, i64 %indvar 367 store i32 1, i32* %arrayidx, align 4 368 %indvar.next = add i64 %indvar, 1 369 %exitcond = icmp eq i64 %indvar.next, 10000 370 br i1 %exitcond, label %for.end, label %for.body 371 372for.end: ; preds = %for.body 373 ret void 374; CHECK-LABEL: @test11_pattern( 375; CHECK-NEXT: entry: 376; CHECK-NEXT: bitcast 377; CHECK-NEXT: memset_pattern 378; CHECK-NOT: store 379; CHECK: ret void 380} 381 382; Store of null should turn into memset of zero. 383define void @test12(i32** nocapture %P) nounwind ssp { 384entry: 385 br label %for.body 386 387for.body: ; preds = %entry, %for.body 388 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 389 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 390 store i32* null, i32** %arrayidx, align 4 391 %indvar.next = add i64 %indvar, 1 392 %exitcond = icmp eq i64 %indvar.next, 10000 393 br i1 %exitcond, label %for.end, label %for.body 394 395for.end: ; preds = %for.body 396 ret void 397; CHECK-LABEL: @test12( 398; CHECK-NEXT: entry: 399; CHECK-NEXT: bitcast 400; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 %P1, i8 0, i64 80000, i1 false) 401; CHECK-NOT: store 402; CHECK: ret void 403} 404 405@G = global i32 5 406 407; This store-of-address loop can be turned into a memset_pattern call. 408; rdar://9009151 409define void @test13_pattern(i32** nocapture %P) nounwind ssp { 410entry: 411 br label %for.body 412 413for.body: ; preds = %entry, %for.body 414 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 415 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 416 store i32* @G, i32** %arrayidx, align 4 417 %indvar.next = add i64 %indvar, 1 418 %exitcond = icmp eq i64 %indvar.next, 10000 419 br i1 %exitcond, label %for.end, label %for.body 420 421for.end: ; preds = %for.body 422 ret void 423; CHECK-LABEL: @test13_pattern( 424; CHECK-NEXT: entry: 425; CHECK-NEXT: bitcast 426; CHECK-NEXT: memset_pattern 427; CHECK-NOT: store 428; CHECK: ret void 429} 430 431 432 433; PR9815 - This is a partial overlap case that cannot be safely transformed 434; into a memcpy. 435@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 436 437define i32 @test14() nounwind { 438entry: 439 br label %for.body 440 441for.body: ; preds = %for.inc, %for.body.lr.ph 442 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 443 %add = add nsw i32 %tmp5, 4 444 %idxprom = sext i32 %add to i64 445 %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom 446 %tmp2 = load i32, i32* %arrayidx, align 4 447 %add4 = add nsw i32 %tmp5, 5 448 %idxprom5 = sext i32 %add4 to i64 449 %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5 450 store i32 %tmp2, i32* %arrayidx6, align 4 451 %inc = add nsw i32 %tmp5, 1 452 %cmp = icmp slt i32 %inc, 2 453 br i1 %cmp, label %for.body, label %for.end 454 455for.end: ; preds = %for.inc 456 %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4 457 ret i32 %tmp8 458; CHECK-LABEL: @test14( 459; CHECK: for.body: 460; CHECK: load i32 461; CHECK: store i32 462; CHECK: br i1 %cmp 463 464} 465 466define void @PR14241(i32* %s, i64 %size) { 467; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught 468; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy 469; instead of a memmove. If we get the memmove transform back, this will catch 470; regressions. 471; 472; CHECK-LABEL: @PR14241( 473 474entry: 475 %end.idx = add i64 %size, -1 476 %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx 477 br label %while.body 478; CHECK-NOT: memcpy 479; 480; FIXME: When we regain the ability to form a memmove here, this test should be 481; reversed and turned into a positive assertion. 482; CHECK-NOT: memmove 483 484while.body: 485 %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] 486 %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 487 %val = load i32, i32* %src.ptr, align 4 488; CHECK: load 489 %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0 490 store i32 %val, i32* %dst.ptr, align 4 491; CHECK: store 492 %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 493 %cmp = icmp eq i32* %next.ptr, %end.ptr 494 br i1 %cmp, label %exit, label %while.body 495 496exit: 497 ret void 498; CHECK: ret void 499} 500 501; Recognize loops with a negative stride. 502define void @test15(i32* nocapture %f) { 503entry: 504 br label %for.body 505 506for.body: 507 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 508 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 509 store i32 0, i32* %arrayidx, align 4 510 %indvars.iv.next = add nsw i64 %indvars.iv, -1 511 %cmp = icmp sgt i64 %indvars.iv, 0 512 br i1 %cmp, label %for.body, label %for.cond.cleanup 513 514for.cond.cleanup: 515 ret void 516; CHECK-LABEL: @test15( 517; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %f1, i8 0, i64 262148, i1 false) 518; CHECK-NOT: store 519; CHECK: ret void 520} 521 522; Loop with a negative stride. Verify an aliasing write to f[65536] prevents 523; the creation of a memset. 524define void @test16(i32* nocapture %f) { 525entry: 526 %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536 527 br label %for.body 528 529for.body: ; preds = %entry, %for.body 530 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 531 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 532 store i32 0, i32* %arrayidx, align 4 533 store i32 1, i32* %arrayidx1, align 4 534 %indvars.iv.next = add nsw i64 %indvars.iv, -1 535 %cmp = icmp sgt i64 %indvars.iv, 0 536 br i1 %cmp, label %for.body, label %for.cond.cleanup 537 538for.cond.cleanup: ; preds = %for.body 539 ret void 540; CHECK-LABEL: @test16( 541; CHECK-NOT: call void @llvm.memset.p0i8.i64 542; CHECK: ret void 543} 544 545; Handle memcpy-able loops with negative stride. 546define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) { 547entry: 548 %conv = sext i32 %c to i64 549 %mul = shl nsw i64 %conv, 2 550 %call = tail call noalias i8* @malloc(i64 %mul) 551 %0 = bitcast i8* %call to i32* 552 %tobool.9 = icmp eq i32 %c, 0 553 br i1 %tobool.9, label %while.end, label %while.body.preheader 554 555while.body.preheader: ; preds = %entry 556 br label %while.body 557 558while.body: ; preds = %while.body.preheader, %while.body 559 %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ] 560 %dec10 = add nsw i32 %dec10.in, -1 561 %idxprom = sext i32 %dec10 to i64 562 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom 563 %1 = load i32, i32* %arrayidx, align 4 564 %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom 565 store i32 %1, i32* %arrayidx2, align 4 566 %tobool = icmp eq i32 %dec10, 0 567 br i1 %tobool, label %while.end.loopexit, label %while.body 568 569while.end.loopexit: ; preds = %while.body 570 br label %while.end 571 572while.end: ; preds = %while.end.loopexit, %entry 573 ret i32* %0 574; CHECK-LABEL: @test17( 575; CHECK: call void @llvm.memcpy 576; CHECK: ret i32* 577} 578 579declare noalias i8* @malloc(i64) 580 581; Handle memcpy-able loops with negative stride. 582; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) { 583; for (int i = 2047; i >= 0; --i) { 584; a[i] = b[i]; 585; } 586; } 587define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 { 588entry: 589 br label %for.body 590 591for.body: ; preds = %entry, %for.body 592 %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ] 593 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 594 %0 = load i32, i32* %arrayidx, align 4 595 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 596 store i32 %0, i32* %arrayidx2, align 4 597 %indvars.iv.next = add nsw i64 %indvars.iv, -1 598 %cmp = icmp sgt i64 %indvars.iv, 0 599 br i1 %cmp, label %for.body, label %for.cond.cleanup 600 601for.cond.cleanup: ; preds = %for.body 602 ret void 603; CHECK-LABEL: @test18( 604; CHECK: call void @llvm.memcpy 605; CHECK: ret 606} 607 608; Two dimensional nested loop with negative stride should be promoted to one big memset. 609define void @test19(i8* nocapture %X) { 610entry: 611 br label %for.cond1.preheader 612 613for.cond1.preheader: ; preds = %entry, %for.inc4 614 %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ] 615 %mul = mul nsw i32 %i.06, 100 616 br label %for.body3 617 618for.body3: ; preds = %for.cond1.preheader, %for.body3 619 %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ] 620 %add = add nsw i32 %j.05, %mul 621 %idxprom = sext i32 %add to i64 622 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 623 store i8 0, i8* %arrayidx, align 1 624 %dec = add nsw i32 %j.05, -1 625 %cmp2 = icmp sgt i32 %j.05, 0 626 br i1 %cmp2, label %for.body3, label %for.inc4 627 628for.inc4: ; preds = %for.body3 629 %dec5 = add nsw i32 %i.06, -1 630 %cmp = icmp sgt i32 %i.06, 0 631 br i1 %cmp, label %for.cond1.preheader, label %for.end6 632 633for.end6: ; preds = %for.inc4 634 ret void 635; CHECK-LABEL: @test19( 636; CHECK: entry: 637; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false) 638; CHECK: ret void 639} 640 641; Handle loops where the trip count is a narrow integer that needs to be 642; extended. 643define void @form_memset_narrow_size(i64* %ptr, i32 %size) { 644; CHECK-LABEL: @form_memset_narrow_size( 645entry: 646 %cmp1 = icmp sgt i32 %size, 0 647 br i1 %cmp1, label %loop.ph, label %exit 648; CHECK: entry: 649; CHECK: %[[C1:.*]] = icmp sgt i32 %size, 0 650; CHECK-NEXT: br i1 %[[C1]], label %loop.ph, label %exit 651 652loop.ph: 653 br label %loop.body 654; CHECK: loop.ph: 655; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64 656; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3 657; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %{{.*}}, i8 0, i64 %[[SCALED_SIZE]], i1 false) 658 659loop.body: 660 %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ] 661 %idxprom = sext i32 %storemerge4 to i64 662 %arrayidx = getelementptr inbounds i64, i64* %ptr, i64 %idxprom 663 store i64 0, i64* %arrayidx, align 8 664 %inc = add nsw i32 %storemerge4, 1 665 %cmp2 = icmp slt i32 %inc, %size 666 br i1 %cmp2, label %loop.body, label %loop.exit 667 668loop.exit: 669 br label %exit 670 671exit: 672 ret void 673} 674 675define void @form_memcpy_narrow_size(i64* noalias %dst, i64* noalias %src, i32 %size) { 676; CHECK-LABEL: @form_memcpy_narrow_size( 677entry: 678 %cmp1 = icmp sgt i32 %size, 0 679 br i1 %cmp1, label %loop.ph, label %exit 680; CHECK: entry: 681; CHECK: %[[C1:.*]] = icmp sgt i32 %size, 0 682; CHECK-NEXT: br i1 %[[C1]], label %loop.ph, label %exit 683 684loop.ph: 685 br label %loop.body 686; CHECK: loop.ph: 687; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64 688; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3 689; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 %[[SCALED_SIZE]], i1 false) 690 691loop.body: 692 %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ] 693 %idxprom1 = sext i32 %storemerge4 to i64 694 %arrayidx1 = getelementptr inbounds i64, i64* %src, i64 %idxprom1 695 %v = load i64, i64* %arrayidx1, align 8 696 %idxprom2 = sext i32 %storemerge4 to i64 697 %arrayidx2 = getelementptr inbounds i64, i64* %dst, i64 %idxprom2 698 store i64 %v, i64* %arrayidx2, align 8 699 %inc = add nsw i32 %storemerge4, 1 700 %cmp2 = icmp slt i32 %inc, %size 701 br i1 %cmp2, label %loop.body, label %loop.exit 702 703loop.exit: 704 br label %exit 705 706exit: 707 ret void 708} 709 710; Validate that "memset_pattern" has the proper attributes. 711; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]] 712; CHECK: [[ATTRS]] = { argmemonly } 713