1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s 3; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s 4target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" 5 6declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind 7declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind 8declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind 9declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind 10declare void @llvm.init.trampoline(i8*, i8*, i8*) 11 12define void @test1(i32* %Q, i32* %P) { 13; CHECK-LABEL: @test1( 14; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 15; CHECK-NEXT: ret void 16; 17 %DEAD = load i32, i32* %Q 18 store i32 %DEAD, i32* %P 19 store i32 0, i32* %P 20 ret void 21} 22 23; PR8677 24@g = global i32 1 25 26define i32 @test3(i32* %g_addr) nounwind { 27; CHECK-LABEL: @test3( 28; CHECK-NEXT: [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4 29; CHECK-NEXT: store i32 -1, i32* @g, align 4 30; CHECK-NEXT: store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4 31; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 32; CHECK-NEXT: ret i32 [[TMP3]] 33; 34 %g_value = load i32, i32* %g_addr, align 4 35 store i32 -1, i32* @g, align 4 36 store i32 %g_value, i32* %g_addr, align 4 37 %tmp3 = load i32, i32* @g, align 4 38 ret i32 %tmp3 39} 40 41 42define void @test4(i32* %Q) { 43; CHECK-LABEL: @test4( 44; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4 45; CHECK-NEXT: store volatile i32 [[A]], i32* [[Q]], align 4 46; CHECK-NEXT: ret void 47; 48 %a = load i32, i32* %Q 49 store volatile i32 %a, i32* %Q 50 ret void 51} 52 53; PR8576 - Should delete store of 10 even though p/q are may aliases. 54define void @test2(i32 *%p, i32 *%q) { 55; CHECK-LABEL: @test2( 56; CHECK-NEXT: store i32 20, i32* [[Q:%.*]], align 4 57; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 58; CHECK-NEXT: ret void 59; 60 store i32 10, i32* %p, align 4 61 store i32 20, i32* %q, align 4 62 store i32 30, i32* %p, align 4 63 ret void 64} 65 66; Should delete store of 10 even though memset is a may-store to P (P and Q may 67; alias). 68define void @test6(i32 *%p, i8 *%q) { 69; CHECK-LABEL: @test6( 70; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false) 71; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 72; CHECK-NEXT: ret void 73; 74 store i32 10, i32* %p, align 4 ;; dead. 75 call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false) 76 store i32 30, i32* %p, align 4 77 ret void 78} 79 80; Should delete store of 10 even though memset is a may-store to P (P and Q may 81; alias). 82define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) { 83; CHECK-LABEL: @test6_atomic( 84; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4) 85; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 86; CHECK-NEXT: ret void 87; 88 store atomic i32 10, i32* %p unordered, align 4 ;; dead. 89 call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4) 90 store atomic i32 30, i32* %p unordered, align 4 91 ret void 92} 93 94; Should delete store of 10 even though memcpy is a may-store to P (P and Q may 95; alias). 96define void @test7(i32 *%p, i8 *%q, i8* noalias %r) { 97; CHECK-LABEL: @test7( 98; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false) 99; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 100; CHECK-NEXT: ret void 101; 102 store i32 10, i32* %p, align 4 ;; dead. 103 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false) 104 store i32 30, i32* %p, align 4 105 ret void 106} 107 108; Should delete store of 10 even though memcpy is a may-store to P (P and Q may 109; alias). 110define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) { 111; CHECK-LABEL: @test7_atomic( 112; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4) 113; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 114; CHECK-NEXT: ret void 115; 116 store atomic i32 10, i32* %p unordered, align 4 ;; dead. 117 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4) 118 store atomic i32 30, i32* %p unordered, align 4 119 ret void 120} 121 122; Do not delete stores that are only partially killed. 123define i32 @test8() { 124; CHECK-LABEL: @test8( 125; CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 126; CHECK-NEXT: store i32 1234567, i32* [[V]], align 4 127; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 128; CHECK-NEXT: ret i32 [[X]] 129; 130 %V = alloca i32 131 store i32 1234567, i32* %V 132 %V2 = bitcast i32* %V to i8* 133 store i8 0, i8* %V2 134 %X = load i32, i32* %V 135 ret i32 %X 136 137} 138 139; Test for byval handling. 140%struct.x = type { i32, i32, i32, i32 } 141define void @test9(%struct.x* byval(%struct.x) %a) nounwind { 142; CHECK-LABEL: @test9( 143; CHECK-NEXT: ret void 144; 145 %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 146 store i32 1, i32* %tmp2, align 4 147 ret void 148} 149 150; Test for inalloca handling. 151define void @test9_2(%struct.x* inalloca %a) nounwind { 152; CHECK-LABEL: @test9_2( 153; CHECK-NEXT: ret void 154; 155 %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 156 store i32 1, i32* %tmp2, align 4 157 ret void 158} 159 160; Test for preallocated handling. 161define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind { 162; CHECK-LABEL: @test9_3( 163; CHECK-NEXT: ret void 164; 165 %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 166 store i32 1, i32* %tmp2, align 4 167 ret void 168} 169 170; va_arg has fuzzy dependence, the store shouldn't be zapped. 171define double @test10(i8* %X) { 172; CHECK-LABEL: @test10( 173; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8*, align 8 174; CHECK-NEXT: store i8* [[X:%.*]], i8** [[X_ADDR]], align 8 175; CHECK-NEXT: [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double 176; CHECK-NEXT: ret double [[TMP_0]] 177; 178 %X_addr = alloca i8* 179 store i8* %X, i8** %X_addr 180 %tmp.0 = va_arg i8** %X_addr, double 181 ret double %tmp.0 182} 183 184; DSE should delete the dead trampoline. 185declare void @test11f() 186define void @test11() { 187; CHECK-LABEL: @test11( 188; CHECK-NEXT: ret void 189; 190 %storage = alloca [10 x i8], align 16 ; <[10 x i8]*> [#uses=1] 191 %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0 ; <i8*> [#uses=1] 192 call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null ) ; <i8*> [#uses=1] 193 ret void 194} 195 196; %P doesn't escape, the DEAD instructions should be removed. 197declare void @test13f() 198define i32* @test13() { 199; CHECK-LABEL: @test13( 200; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 4) 201; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[PTR]] to i32* 202; CHECK-NEXT: call void @test13f() 203; CHECK-NEXT: store i32 0, i32* [[P]], align 4 204; CHECK-NEXT: ret i32* [[P]] 205; 206 %ptr = tail call i8* @malloc(i32 4) 207 %P = bitcast i8* %ptr to i32* 208 %DEAD = load i32, i32* %P 209 %DEAD2 = add i32 %DEAD, 1 210 store i32 %DEAD2, i32* %P 211 call void @test13f( ) 212 store i32 0, i32* %P 213 ret i32* %P 214} 215 216define i32 addrspace(1)* @test13_addrspacecast() { 217; CHECK-LABEL: @test13_addrspacecast( 218; CHECK-NEXT: [[P:%.*]] = tail call i8* @malloc(i32 4) 219; CHECK-NEXT: [[P_BC:%.*]] = bitcast i8* [[P]] to i32* 220; CHECK-NEXT: [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)* 221; CHECK-NEXT: call void @test13f() 222; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P]], align 4 223; CHECK-NEXT: ret i32 addrspace(1)* [[P]] 224; 225 %p = tail call i8* @malloc(i32 4) 226 %p.bc = bitcast i8* %p to i32* 227 %P = addrspacecast i32* %p.bc to i32 addrspace(1)* 228 %DEAD = load i32, i32 addrspace(1)* %P 229 %DEAD2 = add i32 %DEAD, 1 230 store i32 %DEAD2, i32 addrspace(1)* %P 231 call void @test13f( ) 232 store i32 0, i32 addrspace(1)* %P 233 ret i32 addrspace(1)* %P 234} 235 236 237declare noalias i8* @malloc(i32) 238declare noalias i8* @calloc(i32, i32) 239 240define void @test14(i32* %Q) { 241; CHECK-LABEL: @test14( 242; CHECK-NEXT: ret void 243; 244 %P = alloca i32 245 %DEAD = load i32, i32* %Q 246 store i32 %DEAD, i32* %P 247 ret void 248 249} 250 251; The store here is not dead because the byval call reads it. 252declare void @test19f({i32}* byval({i32}) align 4 %P) 253 254define void @test19({i32}* nocapture byval({i32}) align 4 %arg5) nounwind ssp { 255; CHECK-LABEL: @test19( 256; CHECK-NEXT: bb: 257; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds { i32 }, { i32 }* [[ARG5:%.*]], i32 0, i32 0 258; CHECK-NEXT: store i32 912, i32* [[TMP7]], align 4 259; CHECK-NEXT: call void @test19f({ i32 }* byval({ i32 }) align 4 [[ARG5]]) 260; CHECK-NEXT: ret void 261; 262bb: 263 %tmp7 = getelementptr inbounds {i32}, {i32}* %arg5, i32 0, i32 0 264 store i32 912, i32* %tmp7 265 call void @test19f({i32}* byval({i32}) align 4 %arg5) 266 ret void 267 268} 269 270define void @test20() { 271; CHECK-LABEL: @test20( 272; CHECK-NEXT: ret void 273; 274 %m = call i8* @malloc(i32 24) 275 store i8 0, i8* %m 276 ret void 277} 278 279define void @test21() { 280; CHECK-LABEL: @test21( 281; CHECK-NEXT: ret void 282; 283 %m = call i8* @calloc(i32 9, i32 7) 284 store i8 0, i8* %m 285 ret void 286} 287 288; Currently elimination of stores at the end of a function is limited to a 289; single underlying object, for compile-time. This case appears to not be 290; very important in practice. 291define void @test22(i1 %i, i32 %k, i32 %m) nounwind { 292; CHECK-LABEL: @test22( 293; CHECK-NEXT: [[K_ADDR:%.*]] = alloca i32, align 4 294; CHECK-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 295; CHECK-NEXT: [[K_ADDR_M_ADDR:%.*]] = select i1 [[I:%.*]], i32* [[K_ADDR]], i32* [[M_ADDR]] 296; CHECK-NEXT: store i32 0, i32* [[K_ADDR_M_ADDR]], align 4 297; CHECK-NEXT: ret void 298; 299 %k.addr = alloca i32 300 %m.addr = alloca i32 301 %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr 302 store i32 0, i32* %k.addr.m.addr, align 4 303 ret void 304} 305 306; PR13547 307declare noalias i8* @strdup(i8* nocapture) nounwind 308define noalias i8* @test23() nounwind uwtable ssp { 309; CHECK-LABEL: @test23( 310; CHECK-NEXT: [[X:%.*]] = alloca [2 x i8], align 1 311; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 0 312; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 313; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 314; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 315; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) [[ATTR3:#.*]] 316; CHECK-NEXT: ret i8* [[CALL]] 317; 318 %x = alloca [2 x i8], align 1 319 %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 0 320 store i8 97, i8* %arrayidx, align 1 321 %arrayidx1 = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 1 322 store i8 0, i8* %arrayidx1, align 1 323 %call = call i8* @strdup(i8* %arrayidx) nounwind 324 ret i8* %call 325} 326 327; Make sure same sized store to later element is deleted 328define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind { 329; CHECK-LABEL: @test24( 330; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0 331; CHECK-NEXT: store i32 [[B:%.*]], i32* [[TMP1]], align 4 332; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1 333; CHECK-NEXT: store i32 [[C:%.*]], i32* [[TMP2]], align 4 334; CHECK-NEXT: ret void 335; 336 %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 337 store i32 0, i32* %1, align 4 338 %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 339 store i32 0, i32* %2, align 4 340 %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 341 store i32 %b, i32* %3, align 4 342 %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 343 store i32 %c, i32* %4, align 4 344 ret void 345} 346 347; Check another case like PR13547 where strdup is not like malloc. 348define i8* @test25(i8* %p) nounwind { 349; CHECK-LABEL: @test25( 350; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 351; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 352; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 353; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) [[ATTR6:#.*]] 354; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 355; CHECK-NEXT: ret i8* [[Q]] 356; 357 %p.4 = getelementptr i8, i8* %p, i64 4 358 %tmp = load i8, i8* %p.4, align 1 359 store i8 0, i8* %p.4, align 1 360 %q = call i8* @strdup(i8* %p) nounwind optsize 361 store i8 %tmp, i8* %p.4, align 1 362 ret i8* %q 363} 364 365; Don't remove redundant store because of may-aliased store. 366define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) { 367; CHECK-LABEL: @test28( 368; CHECK-NEXT: entry: 369; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 370; CHECK-NEXT: store i32 [[I:%.*]], i32* [[P2:%.*]], align 4 371; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] 372; CHECK: bb1: 373; CHECK-NEXT: br label [[BB3:%.*]] 374; CHECK: bb2: 375; CHECK-NEXT: br label [[BB3]] 376; CHECK: bb3: 377; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 378; CHECK-NEXT: ret i32 0 379; 380entry: 381 %v = load i32, i32* %p, align 4 382 383 ; Might overwrite value at %p 384 store i32 %i, i32* %p2, align 4 385 br i1 %c, label %bb1, label %bb2 386bb1: 387 br label %bb3 388bb2: 389 br label %bb3 390bb3: 391 store i32 %v, i32* %p, align 4 392 ret i32 0 393} 394 395; Don't remove redundant store because of may-aliased store. 396define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) { 397; CHECK-LABEL: @test29( 398; CHECK-NEXT: entry: 399; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 400; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] 401; CHECK: bb1: 402; CHECK-NEXT: br label [[BB3:%.*]] 403; CHECK: bb2: 404; CHECK-NEXT: store i32 [[I:%.*]], i32* [[P2:%.*]], align 4 405; CHECK-NEXT: br label [[BB3]] 406; CHECK: bb3: 407; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 408; CHECK-NEXT: ret i32 0 409; 410entry: 411 %v = load i32, i32* %p, align 4 412 br i1 %c, label %bb1, label %bb2 413bb1: 414 br label %bb3 415bb2: 416 ; Might overwrite value at %p 417 store i32 %i, i32* %p2, align 4 418 br label %bb3 419bb3: 420 store i32 %v, i32* %p, align 4 421 ret i32 0 422} 423 424declare void @unknown_func() 425 426; Don't remove redundant store because of unknown call. 427define i32 @test30(i1 %c, i32* %p, i32 %i) { 428; CHECK-LABEL: @test30( 429; CHECK-NEXT: entry: 430; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 431; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] 432; CHECK: bb1: 433; CHECK-NEXT: br label [[BB3:%.*]] 434; CHECK: bb2: 435; CHECK-NEXT: call void @unknown_func() 436; CHECK-NEXT: br label [[BB3]] 437; CHECK: bb3: 438; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 439; CHECK-NEXT: ret i32 0 440; 441entry: 442 %v = load i32, i32* %p, align 4 443 br i1 %c, label %bb1, label %bb2 444bb1: 445 br label %bb3 446bb2: 447 ; Might overwrite value at %p 448 call void @unknown_func() 449 br label %bb3 450bb3: 451 store i32 %v, i32* %p, align 4 452 ret i32 0 453} 454 455; Don't remove redundant store in a loop with a may-alias store. 456define i32 @test32(i1 %c, i32* %p, i32 %i) { 457; CHECK-LABEL: @test32( 458; CHECK-NEXT: entry: 459; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 460; CHECK-NEXT: br label [[BB1:%.*]] 461; CHECK: bb1: 462; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 463; CHECK-NEXT: call void @unknown_func() 464; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] 465; CHECK: bb2: 466; CHECK-NEXT: ret i32 0 467; 468entry: 469 %v = load i32, i32* %p, align 4 470 br label %bb1 471bb1: 472 store i32 %v, i32* %p, align 4 473 ; Might read and overwrite value at %p 474 call void @unknown_func() 475 br i1 undef, label %bb1, label %bb2 476bb2: 477 ret i32 0 478} 479 480; We cannot remove any stores, because @unknown_func may unwind and the caller 481; may read %p while unwinding. 482define void @test34(i32* noalias %p) { 483; CHECK-LABEL: @test34( 484; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 485; CHECK-NEXT: call void @unknown_func() 486; CHECK-NEXT: store i32 0, i32* [[P]], align 4 487; CHECK-NEXT: ret void 488; 489 store i32 1, i32* %p 490 call void @unknown_func() 491 store i32 0, i32* %p 492 ret void 493} 494 495; Remove redundant store even with an unwinding function in the same block 496define void @test35(i32* noalias %p) { 497; CHECK-LABEL: @test35( 498; CHECK-NEXT: call void @unknown_func() 499; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 500; CHECK-NEXT: ret void 501; 502 call void @unknown_func() 503 store i32 1, i32* %p 504 store i32 0, i32* %p 505 ret void 506} 507 508; We cannot optimize away the first memmove since %P could overlap with %Q. 509define void @test36(i8* %P, i8* %Q) { 510; CHECK-LABEL: @test36( 511; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) 512; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[Q]], i64 12, i1 false) 513; CHECK-NEXT: ret void 514; 515 516 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) 517 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) 518 ret void 519} 520 521define void @test36_atomic(i8* %P, i8* %Q) { 522; CHECK-LABEL: @test36_atomic( 523; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) 524; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) 525; CHECK-NEXT: ret void 526; 527 528 tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) 529 tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) 530 ret void 531} 532 533define void @test37(i8* %P, i8* %Q, i8* %R) { 534; CHECK-LABEL: @test37( 535; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) 536; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) 537; CHECK-NEXT: ret void 538; 539 540 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) 541 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) 542 ret void 543} 544 545define void @test37_atomic(i8* %P, i8* %Q, i8* %R) { 546; CHECK-LABEL: @test37_atomic( 547; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) 548; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) 549; CHECK-NEXT: ret void 550; 551 552 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) 553 tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) 554 ret void 555} 556 557; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not 558; inequal and overlapping). 559define void @test38(i8* %P, i8* %Q, i8* %R) { 560; CHECK-LABEL: @test38( 561; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) 562; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) 563; CHECK-NEXT: ret void 564; 565 566 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) 567 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) 568 ret void 569} 570 571; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not 572; inequal and overlapping). 573define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { 574; CHECK-LABEL: @test38_atomic( 575; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) 576; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) 577; CHECK-NEXT: ret void 578; 579 580 tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) 581 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) 582 ret void 583} 584 585define void @test39(i8* %P, i8* %Q, i8* %R) { 586; CHECK-LABEL: @test39( 587; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) 588; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 8, i1 false) 589; CHECK-NEXT: ret void 590; 591 592 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) 593 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) 594 ret void 595} 596 597define void @test39_atomic(i8* %P, i8* %Q, i8* %R) { 598; CHECK-LABEL: @test39_atomic( 599; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) 600; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 8, i32 1) 601; CHECK-NEXT: ret void 602; 603 604 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) 605 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 8, i32 1) 606 ret void 607} 608 609declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) 610declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32) 611 612declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind 613declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind 614define void @test40(i32** noalias %Pp, i32* noalias %Q) { 615; CHECK-LABEL: @test40( 616; CHECK-NEXT: entry: 617; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 618; CHECK-NEXT: [[AC:%.*]] = bitcast i32* [[A]] to i8* 619; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[AC]]) 620; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PP:%.*]] to i8** 621; CHECK-NEXT: [[PC:%.*]] = load i8*, i8** [[TMP0]], align 8 622; CHECK-NEXT: [[QC:%.*]] = bitcast i32* [[Q:%.*]] to i8* 623; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 [[AC]], i8* align 4 [[QC]], i64 4, i1 false) 624; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[PC]], i8* nonnull align 4 [[AC]], i64 4, i1 true) 625; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[AC]]) 626; CHECK-NEXT: ret void 627; 628entry: 629 %A = alloca i32, align 4 630 %Ac = bitcast i32* %A to i8* 631 call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %Ac) 632 %0 = bitcast i32** %Pp to i8** 633 %Pc = load i8*, i8** %0, align 8 634 %Qc = bitcast i32* %Q to i8* 635 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %Ac, i8* align 4 %Qc, i64 4, i1 false) 636 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %Pc, i8* nonnull align 4 %Ac, i64 4, i1 true) 637 call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %Ac) 638 ret void 639} 640 641declare void @free(i8* nocapture) 642 643; We cannot remove `store i32 1, i32* %p`, because @unknown_func may unwind 644; and the caller may read %p while unwinding. 645define void @test41(i32* noalias %P) { 646; CHECK-LABEL: @test41( 647; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* 648; CHECK-NEXT: store i32 1, i32* [[P]], align 4 649; CHECK-NEXT: call void @unknown_func() 650; CHECK-NEXT: call void @free(i8* [[P2]]) 651; CHECK-NEXT: ret void 652; 653 %P2 = bitcast i32* %P to i8* 654 store i32 1, i32* %P 655 call void @unknown_func() 656 store i32 2, i32* %P 657 call void @free(i8* %P2) 658 ret void 659} 660 661define void @test42(i32* %P, i32* %Q) { 662; CHECK-LABEL: @test42( 663; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 664; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* 665; CHECK-NEXT: store i32 2, i32* [[Q:%.*]], align 4 666; CHECK-NEXT: store i8 3, i8* [[P2]], align 1 667; CHECK-NEXT: ret void 668; 669 store i32 1, i32* %P 670 %P2 = bitcast i32* %P to i8* 671 store i32 2, i32* %Q 672 store i8 3, i8* %P2 673 ret void 674} 675 676define void @test42a(i32* %P, i32* %Q) { 677; CHECK-LABEL: @test42a( 678; CHECK-NEXT: store atomic i32 1, i32* [[P:%.*]] unordered, align 4 679; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* 680; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 681; CHECK-NEXT: store atomic i8 3, i8* [[P2]] unordered, align 4 682; CHECK-NEXT: ret void 683; 684 store atomic i32 1, i32* %P unordered, align 4 685 %P2 = bitcast i32* %P to i8* 686 store atomic i32 2, i32* %Q unordered, align 4 687 store atomic i8 3, i8* %P2 unordered, align 4 688 ret void 689} 690 691define void @test43a(i32* %P, i32* noalias %Q) { 692; CHECK-LABEL: @test43a( 693; CHECK-NEXT: entry: 694; CHECK-NEXT: store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4 695; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 696; CHECK-NEXT: ret void 697; 698entry: 699 store atomic i32 1, i32* %P unordered, align 4 700 %P2 = bitcast i32* %P to i8* 701 store atomic i32 2, i32* %Q unordered, align 4 702 store atomic i8 3, i8* %P2 unordered, align 4 703 ret void 704} 705 706; Some tests where volatile may block removing a store. 707 708; Here we can remove the first non-volatile store. We cannot remove the 709; volatile store. 710define void @test44_volatile(i32* %P) { 711; CHECK-LABEL: @test44_volatile( 712; CHECK-NEXT: store volatile i32 2, i32* [[P:%.*]], align 4 713; CHECK-NEXT: store i32 3, i32* [[P]], align 4 714; CHECK-NEXT: ret void 715; 716 store i32 1, i32* %P, align 4 717 store volatile i32 2, i32* %P, align 4 718 store i32 3, i32* %P, align 4 719 ret void 720} 721 722define void @test45_volatile(i32* %P) { 723; CHECK-LABEL: @test45_volatile( 724; CHECK-NEXT: store volatile i32 2, i32* [[P:%.*]], align 4 725; CHECK-NEXT: store volatile i32 3, i32* [[P]], align 4 726; CHECK-NEXT: ret void 727; 728 store i32 1, i32* %P, align 4 729 store volatile i32 2, i32* %P, align 4 730 store volatile i32 3, i32* %P, align 4 731 ret void 732} 733 734define void @test46_volatile(i32* %P) { 735; CHECK-LABEL: @test46_volatile( 736; CHECK-NEXT: store volatile i32 2, i32* [[P:%.*]], align 4 737; CHECK-NEXT: store volatile i32 3, i32* [[P]], align 4 738; CHECK-NEXT: ret void 739; 740 store volatile i32 2, i32* %P, align 4 741 store i32 1, i32* %P, align 4 742 store volatile i32 3, i32* %P, align 4 743 ret void 744} 745 746define void @test47_volatile(i32* %P) { 747; CHECK-LABEL: @test47_volatile( 748; CHECK-NEXT: store volatile i32 2, i32* [[P:%.*]], align 4 749; CHECK-NEXT: store volatile i32 3, i32* [[P]], align 4 750; CHECK-NEXT: ret void 751; 752 store volatile i32 2, i32* %P, align 4 753 store volatile i32 3, i32* %P, align 4 754 ret void 755} 756 757define i32 @test48(i32* %P, i32* noalias %Q, i32* %R) { 758; CHECK-LABEL: @test48( 759; CHECK-NEXT: store i32 2, i32* [[P:%.*]], align 4 760; CHECK-NEXT: store i32 3, i32* [[Q:%.*]], align 4 761; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]], align 4 762; CHECK-NEXT: ret i32 [[L]] 763; 764 store i32 1, i32* %Q 765 store i32 2, i32* %P 766 store i32 3, i32* %Q 767 %l = load i32, i32* %R 768 ret i32 %l 769} 770