1; RUN: opt < %s -licm -S | FileCheck %s 2; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s 3 4@X = global i32 0 ; <i32*> [#uses=1] 5 6declare void @foo() 7 8declare i32 @llvm.bitreverse.i32(i32) 9 10; This testcase tests for a problem where LICM hoists 11; potentially trapping instructions when they are not guaranteed to execute. 12define i32 @test1(i1 %c) { 13; CHECK-LABEL: @test1( 14 %A = load i32, i32* @X ; <i32> [#uses=2] 15 br label %Loop 16Loop: ; preds = %LoopTail, %0 17 call void @foo( ) 18 br i1 %c, label %LoopTail, label %IfUnEqual 19 20IfUnEqual: ; preds = %Loop 21; CHECK: IfUnEqual: 22; CHECK-NEXT: sdiv i32 4, %A 23 %B1 = sdiv i32 4, %A ; <i32> [#uses=1] 24 br label %LoopTail 25 26LoopTail: ; preds = %IfUnEqual, %Loop 27 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1] 28 br i1 %c, label %Loop, label %Out 29Out: ; preds = %LoopTail 30 %C = sub i32 %A, %B ; <i32> [#uses=1] 31 ret i32 %C 32} 33 34 35declare void @foo2(i32) nounwind 36 37 38;; It is ok and desirable to hoist this potentially trapping instruction. 39define i32 @test2(i1 %c) { 40; CHECK-LABEL: @test2( 41; CHECK-NEXT: load i32, i32* @X 42; CHECK-NEXT: %B = sdiv i32 4, %A 43 %A = load i32, i32* @X 44 br label %Loop 45 46Loop: 47 ;; Should have hoisted this div! 48 %B = sdiv i32 4, %A 49 br label %loop2 50 51loop2: 52 call void @foo2( i32 %B ) 53 br i1 %c, label %Loop, label %Out 54 55Out: 56 %C = sub i32 %A, %B 57 ret i32 %C 58} 59 60 61; This loop invariant instruction should be constant folded, not hoisted. 62define i32 @test3(i1 %c) { 63; CHECK-LABEL: define i32 @test3( 64; CHECK: call void @foo2(i32 6) 65 %A = load i32, i32* @X ; <i32> [#uses=2] 66 br label %Loop 67Loop: 68 %B = add i32 4, 2 ; <i32> [#uses=2] 69 call void @foo2( i32 %B ) 70 br i1 %c, label %Loop, label %Out 71Out: ; preds = %Loop 72 %C = sub i32 %A, %B ; <i32> [#uses=1] 73 ret i32 %C 74} 75 76; CHECK-LABEL: @test4( 77; CHECK: call 78; CHECK: sdiv 79; CHECK: ret 80define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { 81entry: 82 br label %for.body 83 84for.body: ; preds = %entry, %for.body 85 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 86 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] 87 call void @foo_may_call_exit(i32 0) 88 %div = sdiv i32 %x, %y 89 %add = add nsw i32 %n.01, %div 90 %inc = add nsw i32 %i.02, 1 91 %cmp = icmp slt i32 %inc, 10000 92 br i1 %cmp, label %for.body, label %for.end 93 94for.end: ; preds = %for.body 95 %n.0.lcssa = phi i32 [ %add, %for.body ] 96 ret i32 %n.0.lcssa 97} 98 99declare void @foo_may_call_exit(i32) 100 101; PR14854 102; CHECK-LABEL: @test5( 103; CHECK: extractvalue 104; CHECK: br label %tailrecurse 105; CHECK: tailrecurse: 106; CHECK: ifend: 107; CHECK: insertvalue 108define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) { 109entry: 110 br label %tailrecurse 111 112tailrecurse: ; preds = %then, %entry 113 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ] 114 %out = extractvalue { i32*, i32 } %e, 1 115 %d = insertvalue { i32*, i32 } %e, i32* null, 0 116 %cmp1 = icmp sgt i32 %out, %i.tr 117 br i1 %cmp1, label %then, label %ifend 118 119then: ; preds = %tailrecurse 120 call void @foo() 121 %cmp2 = add i32 %i.tr, 1 122 br label %tailrecurse 123 124ifend: ; preds = %tailrecurse 125 ret { i32*, i32 } %d 126} 127 128; CHECK: define i32 @hoist_bitreverse(i32) 129; CHECK: bitreverse 130; CHECK: br label %header 131define i32 @hoist_bitreverse(i32) { 132 br label %header 133 134header: 135 %sum = phi i32 [ 0, %1 ], [ %5, %latch ] 136 %2 = phi i32 [ 0, %1 ], [ %6, %latch ] 137 %3 = icmp slt i32 %2, 1024 138 br i1 %3, label %body, label %return 139 140body: 141 %4 = call i32 @llvm.bitreverse.i32(i32 %0) 142 %5 = add i32 %sum, %4 143 br label %latch 144 145latch: 146 %6 = add nsw i32 %2, 1 147 br label %header 148 149return: 150 ret i32 %sum 151} 152 153declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly 154declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind 155declare void @escaping.invariant.start({}*) nounwind 156; invariant.start dominates the load, and in this scope, the 157; load is invariant. So, we can hoist the `addrld` load out of the loop. 158define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) { 159; CHECK-LABEL: @test_fence 160; CHECK-LABEL: entry 161; CHECK: invariant.start 162; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8 163; CHECK: br label %loop 164entry: 165 %gep = getelementptr inbounds i8, i8* %addr, i64 8 166 %addr.i = bitcast i8* %gep to i32 * 167 store atomic i32 5, i32 * %addr.i unordered, align 8 168 fence release 169 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 170 br label %loop 171 172loop: 173 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 174 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 175 %volload = load atomic i8, i8* %volatile unordered, align 8 176 fence acquire 177 %volchk = icmp eq i8 %volload, 0 178 %addrld = load atomic i32, i32* %addr.i unordered, align 8 179 %sel = select i1 %volchk, i32 0, i32 %addrld 180 %sum.next = add i32 %sel, %sum 181 %indvar.next = add i32 %indvar, 1 182 %cond = icmp slt i32 %indvar.next, %n 183 br i1 %cond, label %loop, label %loopexit 184 185loopexit: 186 ret i32 %sum 187} 188 189 190 191; Same as test above, but the load is no longer invariant (presence of 192; invariant.end). We cannot hoist the addrld out of loop. 193define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) { 194; CHECK-LABEL: @test_fence1 195; CHECK-LABEL: entry 196; CHECK: invariant.start 197; CHECK-NEXT: invariant.end 198; CHECK-NEXT: br label %loop 199entry: 200 %gep = getelementptr inbounds i8, i8* %addr, i64 8 201 %addr.i = bitcast i8* %gep to i32 * 202 store atomic i32 5, i32 * %addr.i unordered, align 8 203 fence release 204 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 205 call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep) 206 br label %loop 207 208loop: 209 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 210 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 211 %volload = load atomic i8, i8* %volatile unordered, align 8 212 fence acquire 213 %volchk = icmp eq i8 %volload, 0 214 %addrld = load atomic i32, i32* %addr.i unordered, align 8 215 %sel = select i1 %volchk, i32 0, i32 %addrld 216 %sum.next = add i32 %sel, %sum 217 %indvar.next = add i32 %indvar, 1 218 %cond = icmp slt i32 %indvar.next, %n 219 br i1 %cond, label %loop, label %loopexit 220 221loopexit: 222 ret i32 %sum 223} 224 225; same as test above, but instead of invariant.end, we have the result of 226; invariant.start escaping through a call. We cannot hoist the load. 227define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) { 228; CHECK-LABEL: @test_fence2 229; CHECK-LABEL: entry 230; CHECK-NOT: load 231; CHECK: br label %loop 232entry: 233 %gep = getelementptr inbounds i8, i8* %addr, i64 8 234 %addr.i = bitcast i8* %gep to i32 * 235 store atomic i32 5, i32 * %addr.i unordered, align 8 236 fence release 237 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 238 call void @escaping.invariant.start({}* %invst) 239 br label %loop 240 241loop: 242 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 243 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 244 %volload = load atomic i8, i8* %volatile unordered, align 8 245 fence acquire 246 %volchk = icmp eq i8 %volload, 0 247 %addrld = load atomic i32, i32* %addr.i unordered, align 8 248 %sel = select i1 %volchk, i32 0, i32 %addrld 249 %sum.next = add i32 %sel, %sum 250 %indvar.next = add i32 %indvar, 1 251 %cond = icmp slt i32 %indvar.next, %n 252 br i1 %cond, label %loop, label %loopexit 253 254loopexit: 255 ret i32 %sum 256} 257 258; FIXME: invariant.start dominates the load, and in this scope, the 259; load is invariant. So, we can hoist the `addrld` load out of the loop. 260; Consider the loadoperand addr.i bitcasted before being passed to 261; invariant.start 262define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) { 263; CHECK-LABEL: @test_fence3 264; CHECK-LABEL: entry 265; CHECK: invariant.start 266; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 267; CHECK: br label %loop 268entry: 269 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 270 %gep = bitcast i32* %addr.i to i8 * 271 store atomic i32 5, i32 * %addr.i unordered, align 8 272 fence release 273 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 274 br label %loop 275 276loop: 277 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 278 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 279 %volload = load atomic i8, i8* %volatile unordered, align 8 280 fence acquire 281 %volchk = icmp eq i8 %volload, 0 282 %addrld = load atomic i32, i32* %addr.i unordered, align 8 283 %sel = select i1 %volchk, i32 0, i32 %addrld 284 %sum.next = add i32 %sel, %sum 285 %indvar.next = add i32 %indvar, 1 286 %cond = icmp slt i32 %indvar.next, %n 287 br i1 %cond, label %loop, label %loopexit 288 289loopexit: 290 ret i32 %sum 291} 292 293; We should not hoist the addrld out of the loop. 294define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) { 295; CHECK-LABEL: @test_fence4 296; CHECK-LABEL: entry 297; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 298; CHECK: br label %loop 299entry: 300 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 301 %gep = bitcast i32* %addr.i to i8 * 302 br label %loop 303 304loop: 305 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 306 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 307 store atomic i32 5, i32 * %addr.i unordered, align 8 308 fence release 309 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 310 %volload = load atomic i8, i8* %volatile unordered, align 8 311 fence acquire 312 %volchk = icmp eq i8 %volload, 0 313 %addrld = load atomic i32, i32* %addr.i unordered, align 8 314 %sel = select i1 %volchk, i32 0, i32 %addrld 315 %sum.next = add i32 %sel, %sum 316 %indvar.next = add i32 %indvar, 1 317 %cond = icmp slt i32 %indvar.next, %n 318 br i1 %cond, label %loop, label %loopexit 319 320loopexit: 321 ret i32 %sum 322} 323