• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -licm -S | FileCheck %s
2; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
3
4@X = global i32 0		; <i32*> [#uses=1]
5
6declare void @foo()
7
8declare i32 @llvm.bitreverse.i32(i32)
9
10; This testcase tests for a problem where LICM hoists
11; potentially trapping instructions when they are not guaranteed to execute.
12define i32 @test1(i1 %c) {
13; CHECK-LABEL: @test1(
14	%A = load i32, i32* @X		; <i32> [#uses=2]
15	br label %Loop
16Loop:		; preds = %LoopTail, %0
17	call void @foo( )
18	br i1 %c, label %LoopTail, label %IfUnEqual
19
20IfUnEqual:		; preds = %Loop
21; CHECK: IfUnEqual:
22; CHECK-NEXT: sdiv i32 4, %A
23	%B1 = sdiv i32 4, %A		; <i32> [#uses=1]
24	br label %LoopTail
25
26LoopTail:		; preds = %IfUnEqual, %Loop
27	%B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ]		; <i32> [#uses=1]
28	br i1 %c, label %Loop, label %Out
29Out:		; preds = %LoopTail
30	%C = sub i32 %A, %B		; <i32> [#uses=1]
31	ret i32 %C
32}
33
34
35declare void @foo2(i32) nounwind
36
37
38;; It is ok and desirable to hoist this potentially trapping instruction.
39define i32 @test2(i1 %c) {
40; CHECK-LABEL: @test2(
41; CHECK-NEXT: load i32, i32* @X
42; CHECK-NEXT: %B = sdiv i32 4, %A
43  %A = load i32, i32* @X
44  br label %Loop
45
46Loop:
47  ;; Should have hoisted this div!
48  %B = sdiv i32 4, %A
49  br label %loop2
50
51loop2:
52  call void @foo2( i32 %B )
53  br i1 %c, label %Loop, label %Out
54
55Out:
56  %C = sub i32 %A, %B
57  ret i32 %C
58}
59
60
61; This loop invariant instruction should be constant folded, not hoisted.
62define i32 @test3(i1 %c) {
63; CHECK-LABEL: define i32 @test3(
64; CHECK: call void @foo2(i32 6)
65	%A = load i32, i32* @X		; <i32> [#uses=2]
66	br label %Loop
67Loop:
68	%B = add i32 4, 2		; <i32> [#uses=2]
69	call void @foo2( i32 %B )
70	br i1 %c, label %Loop, label %Out
71Out:		; preds = %Loop
72	%C = sub i32 %A, %B		; <i32> [#uses=1]
73	ret i32 %C
74}
75
76; CHECK-LABEL: @test4(
77; CHECK: call
78; CHECK: sdiv
79; CHECK: ret
80define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
81entry:
82  br label %for.body
83
84for.body:                                         ; preds = %entry, %for.body
85  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
86  %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
87  call void @foo_may_call_exit(i32 0)
88  %div = sdiv i32 %x, %y
89  %add = add nsw i32 %n.01, %div
90  %inc = add nsw i32 %i.02, 1
91  %cmp = icmp slt i32 %inc, 10000
92  br i1 %cmp, label %for.body, label %for.end
93
94for.end:                                          ; preds = %for.body
95  %n.0.lcssa = phi i32 [ %add, %for.body ]
96  ret i32 %n.0.lcssa
97}
98
99declare void @foo_may_call_exit(i32)
100
101; PR14854
102; CHECK-LABEL: @test5(
103; CHECK: extractvalue
104; CHECK: br label %tailrecurse
105; CHECK: tailrecurse:
106; CHECK: ifend:
107; CHECK: insertvalue
108define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
109entry:
110  br label %tailrecurse
111
112tailrecurse:                                      ; preds = %then, %entry
113  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
114  %out = extractvalue { i32*, i32 } %e, 1
115  %d = insertvalue { i32*, i32 } %e, i32* null, 0
116  %cmp1 = icmp sgt i32 %out, %i.tr
117  br i1 %cmp1, label %then, label %ifend
118
119then:                                             ; preds = %tailrecurse
120  call void @foo()
121  %cmp2 = add i32 %i.tr, 1
122  br label %tailrecurse
123
124ifend:                                            ; preds = %tailrecurse
125  ret { i32*, i32 } %d
126}
127
128; CHECK: define i32 @hoist_bitreverse(i32)
129; CHECK: bitreverse
130; CHECK: br label %header
131define i32 @hoist_bitreverse(i32)  {
132  br label %header
133
134header:
135  %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
136  %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
137  %3 = icmp slt i32 %2, 1024
138  br i1 %3, label %body, label %return
139
140body:
141  %4 = call i32 @llvm.bitreverse.i32(i32 %0)
142  %5 = add i32 %sum, %4
143  br label %latch
144
145latch:
146  %6 = add nsw i32 %2, 1
147  br label %header
148
149return:
150  ret i32 %sum
151}
152
153declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
154declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
155declare void @escaping.invariant.start({}*) nounwind
156; invariant.start dominates the load, and in this scope, the
157; load is invariant. So, we can hoist the `addrld` load out of the loop.
158define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
159; CHECK-LABEL: @test_fence
160; CHECK-LABEL: entry
161; CHECK: invariant.start
162; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
163; CHECK: br label %loop
164entry:
165  %gep = getelementptr inbounds i8, i8* %addr, i64 8
166  %addr.i = bitcast i8* %gep to i32 *
167  store atomic i32 5, i32 * %addr.i unordered, align 8
168  fence release
169  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
170  br label %loop
171
172loop:
173  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
174  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
175  %volload = load atomic i8, i8* %volatile unordered, align 8
176  fence acquire
177  %volchk = icmp eq i8 %volload, 0
178  %addrld = load atomic i32, i32* %addr.i unordered, align 8
179  %sel = select i1 %volchk, i32 0, i32 %addrld
180  %sum.next = add i32 %sel, %sum
181  %indvar.next = add i32 %indvar, 1
182  %cond = icmp slt i32 %indvar.next, %n
183  br i1 %cond, label %loop, label %loopexit
184
185loopexit:
186  ret i32 %sum
187}
188
189
190
191; Same as test above, but the load is no longer invariant (presence of
192; invariant.end). We cannot hoist the addrld out of loop.
193define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
194; CHECK-LABEL: @test_fence1
195; CHECK-LABEL: entry
196; CHECK: invariant.start
197; CHECK-NEXT: invariant.end
198; CHECK-NEXT: br label %loop
199entry:
200  %gep = getelementptr inbounds i8, i8* %addr, i64 8
201  %addr.i = bitcast i8* %gep to i32 *
202  store atomic i32 5, i32 * %addr.i unordered, align 8
203  fence release
204  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
205  call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
206  br label %loop
207
208loop:
209  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
210  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
211  %volload = load atomic i8, i8* %volatile unordered, align 8
212  fence acquire
213  %volchk = icmp eq i8 %volload, 0
214  %addrld = load atomic i32, i32* %addr.i unordered, align 8
215  %sel = select i1 %volchk, i32 0, i32 %addrld
216  %sum.next = add i32 %sel, %sum
217  %indvar.next = add i32 %indvar, 1
218  %cond = icmp slt i32 %indvar.next, %n
219  br i1 %cond, label %loop, label %loopexit
220
221loopexit:
222  ret i32 %sum
223}
224
225; same as test above, but instead of invariant.end, we have the result of
226; invariant.start escaping through a call. We cannot hoist the load.
227define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
228; CHECK-LABEL: @test_fence2
229; CHECK-LABEL: entry
230; CHECK-NOT: load
231; CHECK: br label %loop
232entry:
233  %gep = getelementptr inbounds i8, i8* %addr, i64 8
234  %addr.i = bitcast i8* %gep to i32 *
235  store atomic i32 5, i32 * %addr.i unordered, align 8
236  fence release
237  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
238  call void @escaping.invariant.start({}* %invst)
239  br label %loop
240
241loop:
242  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
243  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
244  %volload = load atomic i8, i8* %volatile unordered, align 8
245  fence acquire
246  %volchk = icmp eq i8 %volload, 0
247  %addrld = load atomic i32, i32* %addr.i unordered, align 8
248  %sel = select i1 %volchk, i32 0, i32 %addrld
249  %sum.next = add i32 %sel, %sum
250  %indvar.next = add i32 %indvar, 1
251  %cond = icmp slt i32 %indvar.next, %n
252  br i1 %cond, label %loop, label %loopexit
253
254loopexit:
255  ret i32 %sum
256}
257
258; FIXME: invariant.start dominates the load, and in this scope, the
259; load is invariant. So, we can hoist the `addrld` load out of the loop.
260; Consider the loadoperand addr.i bitcasted before being passed to
261; invariant.start
262define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
263; CHECK-LABEL: @test_fence3
264; CHECK-LABEL: entry
265; CHECK: invariant.start
266; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
267; CHECK: br label %loop
268entry:
269  %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
270  %gep = bitcast i32* %addr.i to i8 *
271  store atomic i32 5, i32 * %addr.i unordered, align 8
272  fence release
273  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
274  br label %loop
275
276loop:
277  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
278  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
279  %volload = load atomic i8, i8* %volatile unordered, align 8
280  fence acquire
281  %volchk = icmp eq i8 %volload, 0
282  %addrld = load atomic i32, i32* %addr.i unordered, align 8
283  %sel = select i1 %volchk, i32 0, i32 %addrld
284  %sum.next = add i32 %sel, %sum
285  %indvar.next = add i32 %indvar, 1
286  %cond = icmp slt i32 %indvar.next, %n
287  br i1 %cond, label %loop, label %loopexit
288
289loopexit:
290  ret i32 %sum
291}
292
293; We should not hoist the addrld out of the loop.
294define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
295; CHECK-LABEL: @test_fence4
296; CHECK-LABEL: entry
297; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
298; CHECK: br label %loop
299entry:
300  %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
301  %gep = bitcast i32* %addr.i to i8 *
302  br label %loop
303
304loop:
305  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
306  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
307  store atomic i32 5, i32 * %addr.i unordered, align 8
308  fence release
309  %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
310  %volload = load atomic i8, i8* %volatile unordered, align 8
311  fence acquire
312  %volchk = icmp eq i8 %volload, 0
313  %addrld = load atomic i32, i32* %addr.i unordered, align 8
314  %sel = select i1 %volchk, i32 0, i32 %addrld
315  %sum.next = add i32 %sel, %sum
316  %indvar.next = add i32 %indvar, 1
317  %cond = icmp slt i32 %indvar.next, %n
318  br i1 %cond, label %loop, label %loopexit
319
320loopexit:
321  ret i32 %sum
322}
323