1; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s 2 3; 32-bit little endian target. 4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" 5 6;; Trivial RLE test. 7define i32 @test0(i32 %V, i32* %P) { 8 store i32 %V, i32* %P 9 10 %A = load i32* %P 11 ret i32 %A 12; CHECK: @test0 13; CHECK: ret i32 %V 14} 15 16 17;;===----------------------------------------------------------------------===;; 18;; Tests for crashers 19;;===----------------------------------------------------------------------===;; 20 21;; PR5016 22define i8 @crash0({i32, i32} %A, {i32, i32}* %P) { 23 store {i32, i32} %A, {i32, i32}* %P 24 %X = bitcast {i32, i32}* %P to i8* 25 %Y = load i8* %X 26 ret i8 %Y 27} 28 29 30;;===----------------------------------------------------------------------===;; 31;; Store -> Load and Load -> Load forwarding where src and dst are different 32;; types, but where the base pointer is a must alias. 33;;===----------------------------------------------------------------------===;; 34 35;; i32 -> f32 forwarding. 36define float @coerce_mustalias1(i32 %V, i32* %P) { 37 store i32 %V, i32* %P 38 39 %P2 = bitcast i32* %P to float* 40 41 %A = load float* %P2 42 ret float %A 43; CHECK: @coerce_mustalias1 44; CHECK-NOT: load 45; CHECK: ret float 46} 47 48;; i32* -> float forwarding. 49define float @coerce_mustalias2(i32* %V, i32** %P) { 50 store i32* %V, i32** %P 51 52 %P2 = bitcast i32** %P to float* 53 54 %A = load float* %P2 55 ret float %A 56; CHECK: @coerce_mustalias2 57; CHECK-NOT: load 58; CHECK: ret float 59} 60 61;; float -> i32* forwarding. 62define i32* @coerce_mustalias3(float %V, float* %P) { 63 store float %V, float* %P 64 65 %P2 = bitcast float* %P to i32** 66 67 %A = load i32** %P2 68 ret i32* %A 69; CHECK: @coerce_mustalias3 70; CHECK-NOT: load 71; CHECK: ret i32* 72} 73 74;; i32 -> f32 load forwarding. 75define float @coerce_mustalias4(i32* %P, i1 %cond) { 76 %A = load i32* %P 77 78 %P2 = bitcast i32* %P to float* 79 %B = load float* %P2 80 br i1 %cond, label %T, label %F 81T: 82 ret float %B 83 84F: 85 %X = bitcast i32 %A to float 86 ret float %X 87 88; CHECK: @coerce_mustalias4 89; CHECK: %A = load i32* %P 90; CHECK-NOT: load 91; CHECK: ret float 92; CHECK: F: 93} 94 95;; i32 -> i8 forwarding 96define i8 @coerce_mustalias5(i32 %V, i32* %P) { 97 store i32 %V, i32* %P 98 99 %P2 = bitcast i32* %P to i8* 100 101 %A = load i8* %P2 102 ret i8 %A 103; CHECK: @coerce_mustalias5 104; CHECK-NOT: load 105; CHECK: ret i8 106} 107 108;; i64 -> float forwarding 109define float @coerce_mustalias6(i64 %V, i64* %P) { 110 store i64 %V, i64* %P 111 112 %P2 = bitcast i64* %P to float* 113 114 %A = load float* %P2 115 ret float %A 116; CHECK: @coerce_mustalias6 117; CHECK-NOT: load 118; CHECK: ret float 119} 120 121;; i64 -> i8* (32-bit) forwarding 122define i8* @coerce_mustalias7(i64 %V, i64* %P) { 123 store i64 %V, i64* %P 124 125 %P2 = bitcast i64* %P to i8** 126 127 %A = load i8** %P2 128 ret i8* %A 129; CHECK: @coerce_mustalias7 130; CHECK-NOT: load 131; CHECK: ret i8* 132} 133 134; memset -> i16 forwarding. 135define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp { 136entry: 137 %conv = bitcast i16* %A to i8* 138 tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false) 139 %arrayidx = getelementptr inbounds i16* %A, i64 42 140 %tmp2 = load i16* %arrayidx 141 ret i16 %tmp2 142; CHECK: @memset_to_i16_local 143; CHECK-NOT: load 144; CHECK: ret i16 257 145} 146 147; memset -> float forwarding. 148define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp { 149entry: 150 %conv = bitcast float* %A to i8* ; <i8*> [#uses=1] 151 tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false) 152 %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1] 153 %tmp2 = load float* %arrayidx ; <float> [#uses=1] 154 ret float %tmp2 155; CHECK: @memset_to_float_local 156; CHECK-NOT: load 157; CHECK: zext 158; CHECK-NEXT: shl 159; CHECK-NEXT: or 160; CHECK-NEXT: shl 161; CHECK-NEXT: or 162; CHECK-NEXT: bitcast 163; CHECK-NEXT: ret float 164} 165 166;; non-local memset -> i16 load forwarding. 167define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) { 168 %P3 = bitcast i16* %P to i8* 169 br i1 %cond, label %T, label %F 170T: 171 tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false) 172 br label %Cont 173 174F: 175 tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false) 176 br label %Cont 177 178Cont: 179 %P2 = getelementptr i16* %P, i32 4 180 %A = load i16* %P2 181 ret i16 %A 182 183; CHECK: @memset_to_i16_nonlocal0 184; CHECK: Cont: 185; CHECK-NEXT: %A = phi i16 [ 514, %F ], [ 257, %T ] 186; CHECK-NOT: load 187; CHECK: ret i16 %A 188} 189 190@GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 } 191 192; memset -> float forwarding. 193define float @memcpy_to_float_local(float* %A) nounwind ssp { 194entry: 195 %conv = bitcast float* %A to i8* ; <i8*> [#uses=1] 196 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false) 197 %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1] 198 %tmp2 = load float* %arrayidx ; <float> [#uses=1] 199 ret float %tmp2 200; CHECK: @memcpy_to_float_local 201; CHECK-NOT: load 202; CHECK: ret float 1.400000e+01 203} 204 205 206 207;; non-local i32/float -> i8 load forwarding. 208define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) { 209 %P2 = bitcast i32* %P to float* 210 %P3 = bitcast i32* %P to i8* 211 br i1 %cond, label %T, label %F 212T: 213 store i32 42, i32* %P 214 br label %Cont 215 216F: 217 store float 1.0, float* %P2 218 br label %Cont 219 220Cont: 221 %A = load i8* %P3 222 ret i8 %A 223 224; CHECK: @coerce_mustalias_nonlocal0 225; CHECK: Cont: 226; CHECK: %A = phi i8 [ 227; CHECK-NOT: load 228; CHECK: ret i8 %A 229} 230 231 232;; non-local i32/float -> i8 load forwarding. This also tests that the "P3" 233;; bitcast equivalence can be properly phi translated. 234define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) { 235 %P2 = bitcast i32* %P to float* 236 br i1 %cond, label %T, label %F 237T: 238 store i32 42, i32* %P 239 br label %Cont 240 241F: 242 store float 1.0, float* %P2 243 br label %Cont 244 245Cont: 246 %P3 = bitcast i32* %P to i8* 247 %A = load i8* %P3 248 ret i8 %A 249 250;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc 251;; bootstrap, see r82411 252; 253; HECK: @coerce_mustalias_nonlocal1 254; HECK: Cont: 255; HECK: %A = phi i8 [ 256; HECK-NOT: load 257; HECK: ret i8 %A 258} 259 260 261;; non-local i32 -> i8 partial redundancy load forwarding. 262define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) { 263 %P3 = bitcast i32* %P to i8* 264 br i1 %cond, label %T, label %F 265T: 266 store i32 42, i32* %P 267 br label %Cont 268 269F: 270 br label %Cont 271 272Cont: 273 %A = load i8* %P3 274 ret i8 %A 275 276; CHECK: @coerce_mustalias_pre0 277; CHECK: F: 278; CHECK: load i8* %P3 279; CHECK: Cont: 280; CHECK: %A = phi i8 [ 281; CHECK-NOT: load 282; CHECK: ret i8 %A 283} 284 285;;===----------------------------------------------------------------------===;; 286;; Store -> Load and Load -> Load forwarding where src and dst are different 287;; types, and the reload is an offset from the store pointer. 288;;===----------------------------------------------------------------------===;; 289 290;; i32 -> i8 forwarding. 291;; PR4216 292define i8 @coerce_offset0(i32 %V, i32* %P) { 293 store i32 %V, i32* %P 294 295 %P2 = bitcast i32* %P to i8* 296 %P3 = getelementptr i8* %P2, i32 2 297 298 %A = load i8* %P3 299 ret i8 %A 300; CHECK: @coerce_offset0 301; CHECK-NOT: load 302; CHECK: ret i8 303} 304 305;; non-local i32/float -> i8 load forwarding. 306define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) { 307 %P2 = bitcast i32* %P to float* 308 %P3 = bitcast i32* %P to i8* 309 %P4 = getelementptr i8* %P3, i32 2 310 br i1 %cond, label %T, label %F 311T: 312 store i32 42, i32* %P 313 br label %Cont 314 315F: 316 store float 1.0, float* %P2 317 br label %Cont 318 319Cont: 320 %A = load i8* %P4 321 ret i8 %A 322 323; CHECK: @coerce_offset_nonlocal0 324; CHECK: Cont: 325; CHECK: %A = phi i8 [ 326; CHECK-NOT: load 327; CHECK: ret i8 %A 328} 329 330 331;; non-local i32 -> i8 partial redundancy load forwarding. 332define i8 @coerce_offset_pre0(i32* %P, i1 %cond) { 333 %P3 = bitcast i32* %P to i8* 334 %P4 = getelementptr i8* %P3, i32 2 335 br i1 %cond, label %T, label %F 336T: 337 store i32 42, i32* %P 338 br label %Cont 339 340F: 341 br label %Cont 342 343Cont: 344 %A = load i8* %P4 345 ret i8 %A 346 347; CHECK: @coerce_offset_pre0 348; CHECK: F: 349; CHECK: load i8* %P4 350; CHECK: Cont: 351; CHECK: %A = phi i8 [ 352; CHECK-NOT: load 353; CHECK: ret i8 %A 354} 355 356define i32 @chained_load(i32** %p) { 357block1: 358 %A = alloca i32* 359 360 %z = load i32** %p 361 store i32* %z, i32** %A 362 br i1 true, label %block2, label %block3 363 364block2: 365 %a = load i32** %p 366 br label %block4 367 368block3: 369 %b = load i32** %p 370 br label %block4 371 372block4: 373 %c = load i32** %p 374 %d = load i32* %c 375 ret i32 %d 376 377; CHECK: @chained_load 378; CHECK: %z = load i32** %p 379; CHECK-NOT: load 380; CHECK: %d = load i32* %z 381; CHECK-NEXT: ret i32 %d 382} 383 384 385declare i1 @cond() readonly 386declare i1 @cond2() readonly 387 388define i32 @phi_trans2() { 389; CHECK: @phi_trans2 390entry: 391 %P = alloca i32, i32 400 392 br label %F1 393 394F1: 395 %A = phi i32 [1, %entry], [2, %F] 396 %cond2 = call i1 @cond() 397 br i1 %cond2, label %T1, label %TY 398 399T1: 400 %P2 = getelementptr i32* %P, i32 %A 401 %x = load i32* %P2 402 %cond = call i1 @cond2() 403 br i1 %cond, label %TX, label %F 404 405F: 406 %P3 = getelementptr i32* %P, i32 2 407 store i32 17, i32* %P3 408 409 store i32 42, i32* %P2 ; Provides "P[A]". 410 br label %F1 411 412TX: 413 ; This load should not be compiled to 'ret i32 42'. An overly clever 414 ; implementation of GVN would see that we're returning 17 if the loop 415 ; executes once or 42 if it executes more than that, but we'd have to do 416 ; loop restructuring to expose this, and GVN shouldn't do this sort of CFG 417 ; transformation. 418 419; CHECK: TX: 420; CHECK: ret i32 %x 421 ret i32 %x 422TY: 423 ret i32 0 424} 425 426define i32 @phi_trans3(i32* %p) { 427; CHECK: @phi_trans3 428block1: 429 br i1 true, label %block2, label %block3 430 431block2: 432 store i32 87, i32* %p 433 br label %block4 434 435block3: 436 %p2 = getelementptr i32* %p, i32 43 437 store i32 97, i32* %p2 438 br label %block4 439 440block4: 441 %A = phi i32 [-1, %block2], [42, %block3] 442 br i1 true, label %block5, label %exit 443 444; CHECK: block4: 445; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ] 446; CHECK-NOT: load 447 448block5: 449 %B = add i32 %A, 1 450 br i1 true, label %block6, label %exit 451 452block6: 453 %C = getelementptr i32* %p, i32 %B 454 br i1 true, label %block7, label %exit 455 456block7: 457 %D = load i32* %C 458 ret i32 %D 459 460; CHECK: block7: 461; CHECK-NEXT: ret i32 %D 462 463exit: 464 ret i32 -1 465} 466 467define i8 @phi_trans4(i8* %p) { 468; CHECK: @phi_trans4 469entry: 470 %X3 = getelementptr i8* %p, i32 192 471 store i8 192, i8* %X3 472 473 %X = getelementptr i8* %p, i32 4 474 %Y = load i8* %X 475 br label %loop 476 477loop: 478 %i = phi i32 [4, %entry], [192, %loop] 479 %X2 = getelementptr i8* %p, i32 %i 480 %Y2 = load i8* %X2 481 482; CHECK: loop: 483; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ] 484; CHECK-NOT: load i8 485 486 %cond = call i1 @cond2() 487 488 %Z = bitcast i8 *%X3 to i32* 489 store i32 0, i32* %Z 490 br i1 %cond, label %loop, label %out 491 492out: 493 %R = add i8 %Y, %Y2 494 ret i8 %R 495} 496 497define i8 @phi_trans5(i8* %p) { 498; CHECK: @phi_trans5 499entry: 500 501 %X4 = getelementptr i8* %p, i32 2 502 store i8 19, i8* %X4 503 504 %X = getelementptr i8* %p, i32 4 505 %Y = load i8* %X 506 br label %loop 507 508loop: 509 %i = phi i32 [4, %entry], [3, %cont] 510 %X2 = getelementptr i8* %p, i32 %i 511 %Y2 = load i8* %X2 ; Ensure this load is not being incorrectly replaced. 512 %cond = call i1 @cond2() 513 br i1 %cond, label %cont, label %out 514 515cont: 516 %Z = getelementptr i8* %X2, i32 -1 517 %Z2 = bitcast i8 *%Z to i32* 518 store i32 50462976, i32* %Z2 ;; (1 << 8) | (2 << 16) | (3 << 24) 519 520 521; CHECK: store i32 522; CHECK-NEXT: getelementptr i8* %p, i32 3 523; CHECK-NEXT: load i8* 524 br label %loop 525 526out: 527 %R = add i8 %Y, %Y2 528 ret i8 %R 529} 530 531 532; PR6642 533define i32 @memset_to_load() nounwind readnone { 534entry: 535 %x = alloca [256 x i32], align 4 ; <[256 x i32]*> [#uses=2] 536 %tmp = bitcast [256 x i32]* %x to i8* ; <i8*> [#uses=1] 537 call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false) 538 %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*> 539 %tmp1 = load i32* %arraydecay ; <i32> [#uses=1] 540 ret i32 %tmp1 541; CHECK: @memset_to_load 542; CHECK: ret i32 0 543} 544 545 546;;===----------------------------------------------------------------------===;; 547;; Load -> Load forwarding in partial alias case. 548;;===----------------------------------------------------------------------===;; 549 550define i32 @load_load_partial_alias(i8* %P) nounwind ssp { 551entry: 552 %0 = bitcast i8* %P to i32* 553 %tmp2 = load i32* %0 554 %add.ptr = getelementptr inbounds i8* %P, i64 1 555 %tmp5 = load i8* %add.ptr 556 %conv = zext i8 %tmp5 to i32 557 %add = add nsw i32 %tmp2, %conv 558 ret i32 %add 559 560; TEMPORARILYDISABLED: @load_load_partial_alias 561; TEMPORARILYDISABLED: load i32* 562; TEMPORARILYDISABLED-NOT: load 563; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8 564; TEMPORARILYDISABLED-NOT: load 565; TEMPORARILYDISABLED: trunc i32 {{.*}} to i8 566; TEMPORARILYDISABLED-NOT: load 567; TEMPORARILYDISABLED: ret i32 568} 569 570 571; Cross block partial alias case. 572define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp { 573entry: 574 %xx = bitcast i8* %P to i32* 575 %x1 = load i32* %xx, align 4 576 %cmp = icmp eq i32 %x1, 127 577 br i1 %cmp, label %land.lhs.true, label %if.end 578 579land.lhs.true: ; preds = %entry 580 %arrayidx4 = getelementptr inbounds i8* %P, i64 1 581 %tmp5 = load i8* %arrayidx4, align 1 582 %conv6 = zext i8 %tmp5 to i32 583 ret i32 %conv6 584 585if.end: 586 ret i32 52 587; TEMPORARILY_DISABLED: @load_load_partial_alias_cross_block 588; TEMPORARILY_DISABLED: land.lhs.true: 589; TEMPORARILY_DISABLED-NOT: load i8 590; TEMPORARILY_DISABLED: ret i32 %conv6 591} 592 593 594;;===----------------------------------------------------------------------===;; 595;; Load Widening 596;;===----------------------------------------------------------------------===;; 597 598%widening1 = type { i32, i8, i8, i8, i8 } 599 600@f = global %widening1 zeroinitializer, align 4 601 602define i32 @test_widening1(i8* %P) nounwind ssp noredzone { 603entry: 604 %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4 605 %conv = zext i8 %tmp to i32 606 %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1 607 %conv2 = zext i8 %tmp1 to i32 608 %add = add nsw i32 %conv, %conv2 609 ret i32 %add 610; CHECK: @test_widening1 611; CHECK-NOT: load 612; CHECK: load i16* 613; CHECK-NOT: load 614; CHECK-ret i32 615} 616 617define i32 @test_widening2() nounwind ssp noredzone { 618entry: 619 %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4 620 %conv = zext i8 %tmp to i32 621 %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1 622 %conv2 = zext i8 %tmp1 to i32 623 %add = add nsw i32 %conv, %conv2 624 625 %tmp2 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2 626 %conv3 = zext i8 %tmp2 to i32 627 %add2 = add nsw i32 %add, %conv3 628 629 %tmp3 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1 630 %conv4 = zext i8 %tmp3 to i32 631 %add3 = add nsw i32 %add2, %conv3 632 633 ret i32 %add3 634; CHECK: @test_widening2 635; CHECK-NOT: load 636; CHECK: load i32* 637; CHECK-NOT: load 638; CHECK-ret i32 639} 640 641declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind 642 643declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind 644 645