1; RUN: opt -basicaa -gvn -S < %s | FileCheck %s 2 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 4target triple = "x86_64-apple-macosx10.7.0" 5 6@x = common global i32 0, align 4 7@y = common global i32 0, align 4 8 9; GVN across unordered store (allowed) 10define i32 @test1() nounwind uwtable ssp { 11; CHECK-LABEL: test1 12; CHECK: add i32 %x, %x 13entry: 14 %x = load i32, i32* @y 15 store atomic i32 %x, i32* @x unordered, align 4 16 %y = load i32, i32* @y 17 %z = add i32 %x, %y 18 ret i32 %z 19} 20 21; GVN across unordered load (allowed) 22define i32 @test3() nounwind uwtable ssp { 23; CHECK-LABEL: test3 24; CHECK: add i32 %x, %x 25entry: 26 %x = load i32, i32* @y 27 %y = load atomic i32, i32* @x unordered, align 4 28 %z = load i32, i32* @y 29 %a = add i32 %x, %z 30 %b = add i32 %y, %a 31 ret i32 %b 32} 33 34; GVN load to unordered load (allowed) 35define i32 @test5() nounwind uwtable ssp { 36; CHECK-LABEL: test5 37; CHECK: add i32 %x, %x 38entry: 39 %x = load atomic i32, i32* @x unordered, align 4 40 %y = load i32, i32* @x 41 %z = add i32 %x, %y 42 ret i32 %z 43} 44 45; GVN unordered load to load (unordered load must not be removed) 46define i32 @test6() nounwind uwtable ssp { 47; CHECK-LABEL: test6 48; CHECK: load atomic i32, i32* @x unordered 49entry: 50 %x = load i32, i32* @x 51 %x2 = load atomic i32, i32* @x unordered, align 4 52 %x3 = add i32 %x, %x2 53 ret i32 %x3 54} 55 56; GVN across release-acquire pair (forbidden) 57define i32 @test7() nounwind uwtable ssp { 58; CHECK-LABEL: test7 59; CHECK: add i32 %x, %y 60entry: 61 %x = load i32, i32* @y 62 store atomic i32 %x, i32* @x release, align 4 63 %w = load atomic i32, i32* @x acquire, align 4 64 %y = load i32, i32* @y 65 %z = add i32 %x, %y 66 ret i32 %z 67} 68 69; GVN across monotonic store (allowed) 70define i32 @test9() nounwind uwtable ssp { 71; CHECK-LABEL: test9 72; CHECK: add i32 %x, %x 73entry: 74 %x = load i32, i32* @y 75 store atomic i32 %x, i32* @x monotonic, align 4 76 %y = load i32, i32* @y 77 %z = add i32 %x, %y 78 ret i32 %z 79} 80 81; GVN of an unordered across monotonic load (not allowed) 82define i32 @test10() nounwind uwtable ssp { 83; CHECK-LABEL: test10 84; CHECK: add i32 %x, %y 85entry: 86 %x = load atomic i32, i32* @y unordered, align 4 87 %clobber = load atomic i32, i32* @x monotonic, align 4 88 %y = load atomic i32, i32* @y monotonic, align 4 89 %z = add i32 %x, %y 90 ret i32 %z 91} 92 93define i32 @PR22708(i1 %flag) { 94; CHECK-LABEL: PR22708 95entry: 96 br i1 %flag, label %if.then, label %if.end 97 98if.then: 99 store i32 43, i32* @y, align 4 100; CHECK: store i32 43, i32* @y, align 4 101 br label %if.end 102 103if.end: 104 load atomic i32, i32* @x acquire, align 4 105 %load = load i32, i32* @y, align 4 106; CHECK: load atomic i32, i32* @x acquire, align 4 107; CHECK: load i32, i32* @y, align 4 108 ret i32 %load 109} 110 111; CHECK-LABEL: @test12( 112; Can't remove a load over a ordering barrier 113define i32 @test12(i1 %B, i32* %P1, i32* %P2) { 114 %load0 = load i32, i32* %P1 115 %1 = load atomic i32, i32* %P2 seq_cst, align 4 116 %load1 = load i32, i32* %P1 117 %sel = select i1 %B, i32 %load0, i32 %load1 118 ret i32 %sel 119 ; CHECK: load i32, i32* %P1 120 ; CHECK: load i32, i32* %P1 121} 122 123; CHECK-LABEL: @test13( 124; atomic to non-atomic forwarding is legal 125define i32 @test13(i32* %P1) { 126 %a = load atomic i32, i32* %P1 seq_cst, align 4 127 %b = load i32, i32* %P1 128 %res = sub i32 %a, %b 129 ret i32 %res 130 ; CHECK: load atomic i32, i32* %P1 131 ; CHECK: ret i32 0 132} 133 134; CHECK-LABEL: @test13b( 135define i32 @test13b(i32* %P1) { 136 store atomic i32 0, i32* %P1 unordered, align 4 137 %b = load i32, i32* %P1 138 ret i32 %b 139 ; CHECK: ret i32 0 140} 141 142; CHECK-LABEL: @test14( 143; atomic to unordered atomic forwarding is legal 144define i32 @test14(i32* %P1) { 145 %a = load atomic i32, i32* %P1 seq_cst, align 4 146 %b = load atomic i32, i32* %P1 unordered, align 4 147 %res = sub i32 %a, %b 148 ret i32 %res 149 ; CHECK: load atomic i32, i32* %P1 seq_cst 150 ; CHECK-NEXT: ret i32 0 151} 152 153; CHECK-LABEL: @test15( 154; implementation restriction: can't forward to stonger 155; than unordered 156define i32 @test15(i32* %P1, i32* %P2) { 157 %a = load atomic i32, i32* %P1 seq_cst, align 4 158 %b = load atomic i32, i32* %P1 seq_cst, align 4 159 %res = sub i32 %a, %b 160 ret i32 %res 161 ; CHECK: load atomic i32, i32* %P1 162 ; CHECK: load atomic i32, i32* %P1 163} 164 165; CHECK-LABEL: @test16( 166; forwarding non-atomic to atomic is wrong! (However, 167; it would be legal to use the later value in place of the 168; former in this particular example. We just don't 169; do that right now.) 170define i32 @test16(i32* %P1, i32* %P2) { 171 %a = load i32, i32* %P1, align 4 172 %b = load atomic i32, i32* %P1 unordered, align 4 173 %res = sub i32 %a, %b 174 ret i32 %res 175 ; CHECK: load i32, i32* %P1 176 ; CHECK: load atomic i32, i32* %P1 177} 178 179; CHECK-LABEL: @test16b( 180define i32 @test16b(i32* %P1) { 181 store i32 0, i32* %P1 182 %b = load atomic i32, i32* %P1 unordered, align 4 183 ret i32 %b 184 ; CHECK: load atomic i32, i32* %P1 185} 186 187; Can't DSE across a full fence 188define void @fence_seq_cst_store(i32* %P1, i32* %P2) { 189; CHECK-LABEL: @fence_seq_cst_store( 190; CHECK: store 191; CHECK: store atomic 192; CHECK: store 193 store i32 0, i32* %P1, align 4 194 store atomic i32 0, i32* %P2 seq_cst, align 4 195 store i32 0, i32* %P1, align 4 196 ret void 197} 198 199; Can't DSE across a full fence 200define void @fence_seq_cst(i32* %P1, i32* %P2) { 201; CHECK-LABEL: @fence_seq_cst( 202; CHECK: store 203; CHECK: fence seq_cst 204; CHECK: store 205 store i32 0, i32* %P1, align 4 206 fence seq_cst 207 store i32 0, i32* %P1, align 4 208 ret void 209} 210 211; Can't DSE across a full singlethread fence 212define void @fence_seq_cst_st(i32* %P1, i32* %P2) { 213; CHECK-LABEL: @fence_seq_cst_st( 214; CHECK: store 215; CHECK: fence singlethread seq_cst 216; CHECK: store 217 store i32 0, i32* %P1, align 4 218 fence singlethread seq_cst 219 store i32 0, i32* %P1, align 4 220 ret void 221} 222 223; Can't DSE across a full fence 224define void @fence_asm_sideeffect(i32* %P1, i32* %P2) { 225; CHECK-LABEL: @fence_asm_sideeffect( 226; CHECK: store 227; CHECK: call void asm sideeffect 228; CHECK: store 229 store i32 0, i32* %P1, align 4 230 call void asm sideeffect "", ""() 231 store i32 0, i32* %P1, align 4 232 ret void 233} 234 235; Can't DSE across a full fence 236define void @fence_asm_memory(i32* %P1, i32* %P2) { 237; CHECK-LABEL: @fence_asm_memory( 238; CHECK: store 239; CHECK: call void asm 240; CHECK: store 241 store i32 0, i32* %P1, align 4 242 call void asm "", "~{memory}"() 243 store i32 0, i32* %P1, align 4 244 ret void 245} 246 247; Can't remove a volatile load 248define i32 @volatile_load(i32* %P1, i32* %P2) { 249 %a = load i32, i32* %P1, align 4 250 %b = load volatile i32, i32* %P1, align 4 251 %res = sub i32 %a, %b 252 ret i32 %res 253 ; CHECK-LABEL: @volatile_load( 254 ; CHECK: load i32, i32* %P1 255 ; CHECK: load volatile i32, i32* %P1 256} 257 258; Can't remove redundant volatile loads 259define i32 @redundant_volatile_load(i32* %P1, i32* %P2) { 260 %a = load volatile i32, i32* %P1, align 4 261 %b = load volatile i32, i32* %P1, align 4 262 %res = sub i32 %a, %b 263 ret i32 %res 264 ; CHECK-LABEL: @redundant_volatile_load( 265 ; CHECK: load volatile i32, i32* %P1 266 ; CHECK: load volatile i32, i32* %P1 267 ; CHECK: sub 268} 269 270; Can't DSE a volatile store 271define void @volatile_store(i32* %P1, i32* %P2) { 272; CHECK-LABEL: @volatile_store( 273; CHECK: store volatile 274; CHECK: store 275 store volatile i32 0, i32* %P1, align 4 276 store i32 3, i32* %P1, align 4 277 ret void 278} 279 280; Can't DSE a redundant volatile store 281define void @redundant_volatile_store(i32* %P1, i32* %P2) { 282; CHECK-LABEL: @redundant_volatile_store( 283; CHECK: store volatile 284; CHECK: store volatile 285 store volatile i32 0, i32* %P1, align 4 286 store volatile i32 0, i32* %P1, align 4 287 ret void 288} 289 290; Can value forward from volatiles 291define i32 @test20(i32* %P1, i32* %P2) { 292 %a = load volatile i32, i32* %P1, align 4 293 %b = load i32, i32* %P1, align 4 294 %res = sub i32 %a, %b 295 ret i32 %res 296 ; CHECK-LABEL: @test20( 297 ; CHECK: load volatile i32, i32* %P1 298 ; CHECK: ret i32 0 299} 300 301; We're currently conservative about widening 302define i64 @widen1(i32* %P1) { 303 ; CHECK-LABEL: @widen1( 304 ; CHECK: load atomic i32, i32* %P1 305 ; CHECK: load atomic i64, i64* %p2 306 %p2 = bitcast i32* %P1 to i64* 307 %a = load atomic i32, i32* %P1 unordered, align 4 308 %b = load atomic i64, i64* %p2 unordered, align 4 309 %a64 = sext i32 %a to i64 310 %res = sub i64 %a64, %b 311 ret i64 %res 312} 313 314; narrowing does work 315define i64 @narrow(i32* %P1) { 316 ; CHECK-LABEL: @narrow( 317 ; CHECK: load atomic i64, i64* %p2 318 ; CHECK-NOT: load atomic i32, i32* %P1 319 %p2 = bitcast i32* %P1 to i64* 320 %a64 = load atomic i64, i64* %p2 unordered, align 4 321 %b = load atomic i32, i32* %P1 unordered, align 4 322 %b64 = sext i32 %b to i64 323 %res = sub i64 %a64, %b64 324 ret i64 %res 325} 326 327; Missed optimization, we don't yet optimize ordered loads 328define i64 @narrow2(i32* %P1) { 329 ; CHECK-LABEL: @narrow2( 330 ; CHECK: load atomic i64, i64* %p2 331 ; CHECK: load atomic i32, i32* %P1 332 %p2 = bitcast i32* %P1 to i64* 333 %a64 = load atomic i64, i64* %p2 acquire, align 4 334 %b = load atomic i32, i32* %P1 acquire, align 4 335 %b64 = sext i32 %b to i64 336 %res = sub i64 %a64, %b64 337 ret i64 %res 338} 339 340; Note: The cross block FRE testing is deliberately light. All of the tricky 341; bits of legality are shared code with the block-local FRE above. These 342; are here only to show that we haven't obviously broken anything. 343 344; unordered atomic to unordered atomic 345define i32 @non_local_fre(i32* %P1) { 346; CHECK-LABEL: @non_local_fre( 347; CHECK: load atomic i32, i32* %P1 348; CHECK: ret i32 0 349; CHECK: ret i32 0 350 %a = load atomic i32, i32* %P1 unordered, align 4 351 %cmp = icmp eq i32 %a, 0 352 br i1 %cmp, label %early, label %next 353early: 354 ret i32 %a 355next: 356 %b = load atomic i32, i32* %P1 unordered, align 4 357 %res = sub i32 %a, %b 358 ret i32 %res 359} 360 361; unordered atomic to non-atomic 362define i32 @non_local_fre2(i32* %P1) { 363; CHECK-LABEL: @non_local_fre2( 364; CHECK: load atomic i32, i32* %P1 365; CHECK: ret i32 0 366; CHECK: ret i32 0 367 %a = load atomic i32, i32* %P1 unordered, align 4 368 %cmp = icmp eq i32 %a, 0 369 br i1 %cmp, label %early, label %next 370early: 371 ret i32 %a 372next: 373 %b = load i32, i32* %P1 374 %res = sub i32 %a, %b 375 ret i32 %res 376} 377 378; Can't forward ordered atomics. 379define i32 @non_local_fre3(i32* %P1) { 380; CHECK-LABEL: @non_local_fre3( 381; CHECK: load atomic i32, i32* %P1 acquire 382; CHECK: ret i32 0 383; CHECK: load atomic i32, i32* %P1 acquire 384; CHECK: ret i32 %res 385 %a = load atomic i32, i32* %P1 acquire, align 4 386 %cmp = icmp eq i32 %a, 0 387 br i1 %cmp, label %early, label %next 388early: 389 ret i32 %a 390next: 391 %b = load atomic i32, i32* %P1 acquire, align 4 392 %res = sub i32 %a, %b 393 ret i32 %res 394} 395 396declare void @clobber() 397 398; unordered atomic to unordered atomic 399define i32 @non_local_pre(i32* %P1) { 400; CHECK-LABEL: @non_local_pre( 401; CHECK: load atomic i32, i32* %P1 unordered 402; CHECK: load atomic i32, i32* %P1 unordered 403; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ] 404; CHECK: ret i32 %b 405 %a = load atomic i32, i32* %P1 unordered, align 4 406 %cmp = icmp eq i32 %a, 0 407 br i1 %cmp, label %early, label %next 408early: 409 call void @clobber() 410 br label %next 411next: 412 %b = load atomic i32, i32* %P1 unordered, align 4 413 ret i32 %b 414} 415 416; unordered atomic to non-atomic 417define i32 @non_local_pre2(i32* %P1) { 418; CHECK-LABEL: @non_local_pre2( 419; CHECK: load atomic i32, i32* %P1 unordered 420; CHECK: load i32, i32* %P1 421; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ] 422; CHECK: ret i32 %b 423 %a = load atomic i32, i32* %P1 unordered, align 4 424 %cmp = icmp eq i32 %a, 0 425 br i1 %cmp, label %early, label %next 426early: 427 call void @clobber() 428 br label %next 429next: 430 %b = load i32, i32* %P1 431 ret i32 %b 432} 433 434; non-atomic to unordered atomic - can't forward! 435define i32 @non_local_pre3(i32* %P1) { 436; CHECK-LABEL: @non_local_pre3( 437; CHECK: %a = load i32, i32* %P1 438; CHECK: %b = load atomic i32, i32* %P1 unordered 439; CHECK: ret i32 %b 440 %a = load i32, i32* %P1 441 %cmp = icmp eq i32 %a, 0 442 br i1 %cmp, label %early, label %next 443early: 444 call void @clobber() 445 br label %next 446next: 447 %b = load atomic i32, i32* %P1 unordered, align 4 448 ret i32 %b 449} 450 451; ordered atomic to ordered atomic - can't forward 452define i32 @non_local_pre4(i32* %P1) { 453; CHECK-LABEL: @non_local_pre4( 454; CHECK: %a = load atomic i32, i32* %P1 seq_cst 455; CHECK: %b = load atomic i32, i32* %P1 seq_cst 456; CHECK: ret i32 %b 457 %a = load atomic i32, i32* %P1 seq_cst, align 4 458 %cmp = icmp eq i32 %a, 0 459 br i1 %cmp, label %early, label %next 460early: 461 call void @clobber() 462 br label %next 463next: 464 %b = load atomic i32, i32* %P1 seq_cst, align 4 465 ret i32 %b 466} 467 468; can't remove volatile on any path 469define i32 @non_local_pre5(i32* %P1) { 470; CHECK-LABEL: @non_local_pre5( 471; CHECK: %a = load atomic i32, i32* %P1 seq_cst 472; CHECK: %b = load volatile i32, i32* %P1 473; CHECK: ret i32 %b 474 %a = load atomic i32, i32* %P1 seq_cst, align 4 475 %cmp = icmp eq i32 %a, 0 476 br i1 %cmp, label %early, label %next 477early: 478 call void @clobber() 479 br label %next 480next: 481 %b = load volatile i32, i32* %P1 482 ret i32 %b 483} 484 485 486; ordered atomic to unordered atomic 487define i32 @non_local_pre6(i32* %P1) { 488; CHECK-LABEL: @non_local_pre6( 489; CHECK: load atomic i32, i32* %P1 seq_cst 490; CHECK: load atomic i32, i32* %P1 unordered 491; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ] 492; CHECK: ret i32 %b 493 %a = load atomic i32, i32* %P1 seq_cst, align 4 494 %cmp = icmp eq i32 %a, 0 495 br i1 %cmp, label %early, label %next 496early: 497 call void @clobber() 498 br label %next 499next: 500 %b = load atomic i32, i32* %P1 unordered, align 4 501 ret i32 %b 502} 503 504