1; RUN: opt < %s -licm -S | FileCheck %s 2; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s 3; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s 4 5@X = global i32 0 ; <i32*> [#uses=1] 6 7declare void @foo() 8 9declare i32 @llvm.bitreverse.i32(i32) 10 11; This testcase tests for a problem where LICM hoists 12; potentially trapping instructions when they are not guaranteed to execute. 13define i32 @test1(i1 %c) { 14; CHECK-LABEL: @test1( 15 %A = load i32, i32* @X ; <i32> [#uses=2] 16 br label %Loop 17Loop: ; preds = %LoopTail, %0 18 call void @foo( ) 19 br i1 %c, label %LoopTail, label %IfUnEqual 20 21IfUnEqual: ; preds = %Loop 22; CHECK: IfUnEqual: 23; CHECK-NEXT: sdiv i32 4, %A 24 %B1 = sdiv i32 4, %A ; <i32> [#uses=1] 25 br label %LoopTail 26 27LoopTail: ; preds = %IfUnEqual, %Loop 28 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1] 29 br i1 %c, label %Loop, label %Out 30Out: ; preds = %LoopTail 31 %C = sub i32 %A, %B ; <i32> [#uses=1] 32 ret i32 %C 33} 34 35 36declare void @foo2(i32) nounwind 37 38 39;; It is ok and desirable to hoist this potentially trapping instruction. 40define i32 @test2(i1 %c) { 41; CHECK-LABEL: @test2( 42; CHECK-NEXT: load i32, i32* @X 43; CHECK-NEXT: %B = sdiv i32 4, %A 44 %A = load i32, i32* @X 45 br label %Loop 46 47Loop: 48 ;; Should have hoisted this div! 49 %B = sdiv i32 4, %A 50 br label %loop2 51 52loop2: 53 call void @foo2( i32 %B ) 54 br i1 %c, label %Loop, label %Out 55 56Out: 57 %C = sub i32 %A, %B 58 ret i32 %C 59} 60 61 62; This loop invariant instruction should be constant folded, not hoisted. 63define i32 @test3(i1 %c) { 64; CHECK-LABEL: define i32 @test3( 65; CHECK: call void @foo2(i32 6) 66 %A = load i32, i32* @X ; <i32> [#uses=2] 67 br label %Loop 68Loop: 69 %B = add i32 4, 2 ; <i32> [#uses=2] 70 call void @foo2( i32 %B ) 71 br i1 %c, label %Loop, label %Out 72Out: ; preds = %Loop 73 %C = sub i32 %A, %B ; <i32> [#uses=1] 74 ret i32 %C 75} 76 77; CHECK-LABEL: @test4( 78; CHECK: call 79; CHECK: sdiv 80; CHECK: ret 81define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { 82entry: 83 br label %for.body 84 85for.body: ; preds = %entry, %for.body 86 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 87 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] 88 call void @foo_may_call_exit(i32 0) 89 %div = sdiv i32 %x, %y 90 %add = add nsw i32 %n.01, %div 91 %inc = add nsw i32 %i.02, 1 92 %cmp = icmp slt i32 %inc, 10000 93 br i1 %cmp, label %for.body, label %for.end 94 95for.end: ; preds = %for.body 96 %n.0.lcssa = phi i32 [ %add, %for.body ] 97 ret i32 %n.0.lcssa 98} 99 100declare void @foo_may_call_exit(i32) 101 102; PR14854 103; CHECK-LABEL: @test5( 104; CHECK: extractvalue 105; CHECK: br label %tailrecurse 106; CHECK: tailrecurse: 107; CHECK: ifend: 108; CHECK: insertvalue 109define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) { 110entry: 111 br label %tailrecurse 112 113tailrecurse: ; preds = %then, %entry 114 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ] 115 %out = extractvalue { i32*, i32 } %e, 1 116 %d = insertvalue { i32*, i32 } %e, i32* null, 0 117 %cmp1 = icmp sgt i32 %out, %i.tr 118 br i1 %cmp1, label %then, label %ifend 119 120then: ; preds = %tailrecurse 121 call void @foo() 122 %cmp2 = add i32 %i.tr, 1 123 br label %tailrecurse 124 125ifend: ; preds = %tailrecurse 126 ret { i32*, i32 } %d 127} 128 129; CHECK: define void @test6(float %f) 130; CHECK: fneg 131; CHECK: br label %for.body 132define void @test6(float %f) #2 { 133entry: 134 br label %for.body 135 136for.body: ; preds = %for.body, %entry 137 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 138 call void @foo_may_call_exit(i32 0) 139 %neg = fneg float %f 140 call void @use(float %neg) 141 %inc = add nsw i32 %i, 1 142 %cmp = icmp slt i32 %inc, 10000 143 br i1 %cmp, label %for.body, label %for.end 144 145for.end: ; preds = %for.body 146 ret void 147} 148 149declare void @use(float) 150 151; CHECK: define i32 @hoist_bitreverse(i32 %0) 152; CHECK: bitreverse 153; CHECK: br label %header 154define i32 @hoist_bitreverse(i32 %0) { 155 br label %header 156 157header: 158 %sum = phi i32 [ 0, %1 ], [ %5, %latch ] 159 %2 = phi i32 [ 0, %1 ], [ %6, %latch ] 160 %3 = icmp slt i32 %2, 1024 161 br i1 %3, label %body, label %return 162 163body: 164 %4 = call i32 @llvm.bitreverse.i32(i32 %0) 165 %5 = add i32 %sum, %4 166 br label %latch 167 168latch: 169 %6 = add nsw i32 %2, 1 170 br label %header 171 172return: 173 ret i32 %sum 174} 175 176; Can neither sink nor hoist 177define i32 @test_volatile(i1 %c) { 178; CHECK-LABEL: @test_volatile( 179; CHECK-LABEL: Loop: 180; CHECK: load volatile i32, i32* @X 181; CHECK-LABEL: Out: 182 br label %Loop 183 184Loop: 185 %A = load volatile i32, i32* @X 186 br i1 %c, label %Loop, label %Out 187 188Out: 189 ret i32 %A 190} 191 192 193declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly 194declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind 195declare void @escaping.invariant.start({}*) nounwind 196; invariant.start dominates the load, and in this scope, the 197; load is invariant. So, we can hoist the `addrld` load out of the loop. 198define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) { 199; CHECK-LABEL: @test_fence 200; CHECK-LABEL: entry 201; CHECK: invariant.start 202; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8 203; CHECK: br label %loop 204entry: 205 %gep = getelementptr inbounds i8, i8* %addr, i64 8 206 %addr.i = bitcast i8* %gep to i32 * 207 store atomic i32 5, i32 * %addr.i unordered, align 8 208 fence release 209 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 210 br label %loop 211 212loop: 213 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 214 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 215 %volload = load atomic i8, i8* %volatile unordered, align 8 216 fence acquire 217 %volchk = icmp eq i8 %volload, 0 218 %addrld = load atomic i32, i32* %addr.i unordered, align 8 219 %sel = select i1 %volchk, i32 0, i32 %addrld 220 %sum.next = add i32 %sel, %sum 221 %indvar.next = add i32 %indvar, 1 222 %cond = icmp slt i32 %indvar.next, %n 223 br i1 %cond, label %loop, label %loopexit 224 225loopexit: 226 ret i32 %sum 227} 228 229 230 231; Same as test above, but the load is no longer invariant (presence of 232; invariant.end). We cannot hoist the addrld out of loop. 233define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) { 234; CHECK-LABEL: @test_fence1 235; CHECK-LABEL: entry 236; CHECK: invariant.start 237; CHECK-NEXT: invariant.end 238; CHECK-NEXT: br label %loop 239entry: 240 %gep = getelementptr inbounds i8, i8* %addr, i64 8 241 %addr.i = bitcast i8* %gep to i32 * 242 store atomic i32 5, i32 * %addr.i unordered, align 8 243 fence release 244 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 245 call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep) 246 br label %loop 247 248loop: 249 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 250 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 251 %volload = load atomic i8, i8* %volatile unordered, align 8 252 fence acquire 253 %volchk = icmp eq i8 %volload, 0 254 %addrld = load atomic i32, i32* %addr.i unordered, align 8 255 %sel = select i1 %volchk, i32 0, i32 %addrld 256 %sum.next = add i32 %sel, %sum 257 %indvar.next = add i32 %indvar, 1 258 %cond = icmp slt i32 %indvar.next, %n 259 br i1 %cond, label %loop, label %loopexit 260 261loopexit: 262 ret i32 %sum 263} 264 265; same as test above, but instead of invariant.end, we have the result of 266; invariant.start escaping through a call. We cannot hoist the load. 267define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) { 268; CHECK-LABEL: @test_fence2 269; CHECK-LABEL: entry 270; CHECK-NOT: load 271; CHECK: br label %loop 272entry: 273 %gep = getelementptr inbounds i8, i8* %addr, i64 8 274 %addr.i = bitcast i8* %gep to i32 * 275 store atomic i32 5, i32 * %addr.i unordered, align 8 276 fence release 277 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 278 call void @escaping.invariant.start({}* %invst) 279 br label %loop 280 281loop: 282 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 283 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 284 %volload = load atomic i8, i8* %volatile unordered, align 8 285 fence acquire 286 %volchk = icmp eq i8 %volload, 0 287 %addrld = load atomic i32, i32* %addr.i unordered, align 8 288 %sel = select i1 %volchk, i32 0, i32 %addrld 289 %sum.next = add i32 %sel, %sum 290 %indvar.next = add i32 %indvar, 1 291 %cond = icmp slt i32 %indvar.next, %n 292 br i1 %cond, label %loop, label %loopexit 293 294loopexit: 295 ret i32 %sum 296} 297 298; FIXME: invariant.start dominates the load, and in this scope, the 299; load is invariant. So, we can hoist the `addrld` load out of the loop. 300; Consider the loadoperand addr.i bitcasted before being passed to 301; invariant.start 302define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) { 303; CHECK-LABEL: @test_fence3 304; CHECK-LABEL: entry 305; CHECK: invariant.start 306; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 307; CHECK: br label %loop 308entry: 309 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 310 %gep = bitcast i32* %addr.i to i8 * 311 store atomic i32 5, i32 * %addr.i unordered, align 8 312 fence release 313 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 314 br label %loop 315 316loop: 317 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 318 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 319 %volload = load atomic i8, i8* %volatile unordered, align 8 320 fence acquire 321 %volchk = icmp eq i8 %volload, 0 322 %addrld = load atomic i32, i32* %addr.i unordered, align 8 323 %sel = select i1 %volchk, i32 0, i32 %addrld 324 %sum.next = add i32 %sel, %sum 325 %indvar.next = add i32 %indvar, 1 326 %cond = icmp slt i32 %indvar.next, %n 327 br i1 %cond, label %loop, label %loopexit 328 329loopexit: 330 ret i32 %sum 331} 332 333; We should not hoist the addrld out of the loop. 334define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) { 335; CHECK-LABEL: @test_fence4 336; CHECK-LABEL: entry 337; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 338; CHECK: br label %loop 339entry: 340 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 341 %gep = bitcast i32* %addr.i to i8 * 342 br label %loop 343 344loop: 345 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 346 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 347 store atomic i32 5, i32 * %addr.i unordered, align 8 348 fence release 349 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 350 %volload = load atomic i8, i8* %volatile unordered, align 8 351 fence acquire 352 %volchk = icmp eq i8 %volload, 0 353 %addrld = load atomic i32, i32* %addr.i unordered, align 8 354 %sel = select i1 %volchk, i32 0, i32 %addrld 355 %sum.next = add i32 %sel, %sum 356 %indvar.next = add i32 %indvar, 1 357 %cond = icmp slt i32 %indvar.next, %n 358 br i1 %cond, label %loop, label %loopexit 359 360loopexit: 361 ret i32 %sum 362} 363 364; We can't hoist the invariant load out of the loop because 365; the marker is given a variable size (-1). 366define i32 @test_fence5(i8* %addr, i32 %n, i8* %volatile) { 367; CHECK-LABEL: @test_fence5 368; CHECK-LABEL: entry 369; CHECK: invariant.start 370; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 371; CHECK: br label %loop 372entry: 373 %gep = getelementptr inbounds i8, i8* %addr, i64 8 374 %addr.i = bitcast i8* %gep to i32 * 375 store atomic i32 5, i32 * %addr.i unordered, align 8 376 fence release 377 %invst = call {}* @llvm.invariant.start.p0i8(i64 -1, i8* %gep) 378 br label %loop 379 380loop: 381 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 382 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 383 %volload = load atomic i8, i8* %volatile unordered, align 8 384 fence acquire 385 %volchk = icmp eq i8 %volload, 0 386 %addrld = load atomic i32, i32* %addr.i unordered, align 8 387 %sel = select i1 %volchk, i32 0, i32 %addrld 388 %sum.next = add i32 %sel, %sum 389 %indvar.next = add i32 %indvar, 1 390 %cond = icmp slt i32 %indvar.next, %n 391 br i1 %cond, label %loop, label %loopexit 392 393loopexit: 394 ret i32 %sum 395} 396