• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
3;
4; Run loop unrolling twice to verify that loop unrolling metadata is properly
5; removed and further unrolling is disabled after the pass is run once.
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-linux-gnu"
9
10; loop4 contains a small loop which should be completely unrolled by
11; the default unrolling heuristics.  It serves as a control for the
12; unroll(disable) pragma test loop4_with_disable.
13;
14; CHECK-LABEL: @loop4(
15; CHECK-NOT: br i1
16define void @loop4(i32* nocapture %a) {
17entry:
18  br label %for.body
19
20for.body:                                         ; preds = %for.body, %entry
21  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
22  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
23  %0 = load i32, i32* %arrayidx, align 4
24  %inc = add nsw i32 %0, 1
25  store i32 %inc, i32* %arrayidx, align 4
26  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
27  %exitcond = icmp eq i64 %indvars.iv.next, 4
28  br i1 %exitcond, label %for.end, label %for.body
29
30for.end:                                          ; preds = %for.body
31  ret void
32}
33
34; #pragma clang loop unroll(disable)
35;
36; CHECK-LABEL: @loop4_with_disable(
37; CHECK: store i32
38; CHECK-NOT: store i32
39; CHECK: br i1
40define void @loop4_with_disable(i32* nocapture %a) {
41entry:
42  br label %for.body
43
44for.body:                                         ; preds = %for.body, %entry
45  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
46  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
47  %0 = load i32, i32* %arrayidx, align 4
48  %inc = add nsw i32 %0, 1
49  store i32 %inc, i32* %arrayidx, align 4
50  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
51  %exitcond = icmp eq i64 %indvars.iv.next, 4
52  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
53
54for.end:                                          ; preds = %for.body
55  ret void
56}
57!1 = !{!1, !2}
58!2 = !{!"llvm.loop.unroll.disable"}
59
60; loop64 has a high enough count that it should *not* be unrolled by
61; the default unrolling heuristic.  It serves as the control for the
62; unroll(full) pragma test loop64_with_.* tests below.
63;
64; CHECK-LABEL: @loop64(
65; CHECK: store i32
66; CHECK-NOT: store i32
67; CHECK: br i1
68define void @loop64(i32* nocapture %a) {
69entry:
70  br label %for.body
71
72for.body:                                         ; preds = %for.body, %entry
73  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
74  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
75  %0 = load i32, i32* %arrayidx, align 4
76  %inc = add nsw i32 %0, 1
77  store i32 %inc, i32* %arrayidx, align 4
78  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
79  %exitcond = icmp eq i64 %indvars.iv.next, 64
80  br i1 %exitcond, label %for.end, label %for.body
81
82for.end:                                          ; preds = %for.body
83  ret void
84}
85
86; #pragma clang loop unroll(full)
87; Loop should be fully unrolled.
88;
89; CHECK-LABEL: @loop64_with_full(
90; CHECK-NOT: br i1
91define void @loop64_with_full(i32* nocapture %a) {
92entry:
93  br label %for.body
94
95for.body:                                         ; preds = %for.body, %entry
96  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
97  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
98  %0 = load i32, i32* %arrayidx, align 4
99  %inc = add nsw i32 %0, 1
100  store i32 %inc, i32* %arrayidx, align 4
101  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
102  %exitcond = icmp eq i64 %indvars.iv.next, 64
103  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
104
105for.end:                                          ; preds = %for.body
106  ret void
107}
108!3 = !{!3, !4}
109!4 = !{!"llvm.loop.unroll.full"}
110
111; #pragma clang loop unroll(full)
112; Loop should be fully unrolled, even for optsize.
113;
114; CHECK-LABEL: @loop64_with_full_optsize(
115; CHECK-NOT: br i1
116define void @loop64_with_full_optsize(i32* nocapture %a) optsize {
117entry:
118  br label %for.body
119
120for.body:                                         ; preds = %for.body, %entry
121  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
122  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
123  %0 = load i32, i32* %arrayidx, align 4
124  %inc = add nsw i32 %0, 1
125  store i32 %inc, i32* %arrayidx, align 4
126  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
127  %exitcond = icmp eq i64 %indvars.iv.next, 64
128  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
129
130for.end:                                          ; preds = %for.body
131  ret void
132}
133
134; #pragma clang loop unroll_count(4)
135; Loop should be unrolled 4 times.
136;
137; CHECK-LABEL: @loop64_with_count4(
138; CHECK: store i32
139; CHECK: store i32
140; CHECK: store i32
141; CHECK: store i32
142; CHECK-NOT: store i32
143; CHECK: br i1
144define void @loop64_with_count4(i32* nocapture %a) {
145entry:
146  br label %for.body
147
148for.body:                                         ; preds = %for.body, %entry
149  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
150  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
151  %0 = load i32, i32* %arrayidx, align 4
152  %inc = add nsw i32 %0, 1
153  store i32 %inc, i32* %arrayidx, align 4
154  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
155  %exitcond = icmp eq i64 %indvars.iv.next, 64
156  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
157
158for.end:                                          ; preds = %for.body
159  ret void
160}
161!5 = !{!5, !6}
162!6 = !{!"llvm.loop.unroll.count", i32 4}
163
164; #pragma clang loop unroll(full)
165; Full unrolling is requested, but loop has a runtime trip count so
166; no unrolling should occur.
167;
168; CHECK-LABEL: @runtime_loop_with_full(
169; CHECK: store i32
170; CHECK-NOT: store i32
171define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
172entry:
173  %cmp3 = icmp sgt i32 %b, 0
174  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
175
176for.body:                                         ; preds = %entry, %for.body
177  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
178  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
179  %0 = load i32, i32* %arrayidx, align 4
180  %inc = add nsw i32 %0, 1
181  store i32 %inc, i32* %arrayidx, align 4
182  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
183  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
184  %exitcond = icmp eq i32 %lftr.wideiv, %b
185  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
186
187for.end:                                          ; preds = %for.body, %entry
188  ret void
189}
190!8 = !{!8, !4}
191
192; #pragma clang loop unroll_count(4)
193; Loop has a runtime trip count.  Runtime unrolling should occur and loop
194; should be duplicated (original and 4x unrolled).
195;
196; CHECK-LABEL: @runtime_loop_with_count4(
197; CHECK: for.body
198; CHECK: store
199; CHECK: store
200; CHECK: store
201; CHECK: store
202; CHECK-NOT: store
203; CHECK: br i1
204; CHECK: for.body.epil:
205; CHECK: store
206; CHECK-NOT: store
207; CHECK: br i1
208define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
209entry:
210  %cmp3 = icmp sgt i32 %b, 0
211  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
212
213for.body:                                         ; preds = %entry, %for.body
214  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
215  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
216  %0 = load i32, i32* %arrayidx, align 4
217  %inc = add nsw i32 %0, 1
218  store i32 %inc, i32* %arrayidx, align 4
219  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
220  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
221  %exitcond = icmp eq i32 %lftr.wideiv, %b
222  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
223
224for.end:                                          ; preds = %for.body, %entry
225  ret void
226}
227!9 = !{!9, !6}
228
229; #pragma clang loop unroll_count(1)
230; Loop should not be unrolled
231;
232; CHECK-LABEL: @unroll_1(
233; CHECK: store i32
234; CHECK-NOT: store i32
235; CHECK: br i1
236define void @unroll_1(i32* nocapture %a, i32 %b) {
237entry:
238  br label %for.body
239
240for.body:                                         ; preds = %for.body, %entry
241  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
242  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
243  %0 = load i32, i32* %arrayidx, align 4
244  %inc = add nsw i32 %0, 1
245  store i32 %inc, i32* %arrayidx, align 4
246  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
247  %exitcond = icmp eq i64 %indvars.iv.next, 4
248  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
249
250for.end:                                          ; preds = %for.body
251  ret void
252}
253!10 = !{!10, !11}
254!11 = !{!"llvm.loop.unroll.count", i32 1}
255
256; #pragma clang loop unroll(full)
257; Loop has very high loop count (1 million) and full unrolling was requested.
258; Loop should unrolled up to the pragma threshold, but not completely.
259;
260; CHECK-LABEL: @unroll_1M(
261; CHECK: store i32
262; CHECK: store i32
263; CHECK: br i1
264define void @unroll_1M(i32* nocapture %a, i32 %b) {
265entry:
266  br label %for.body
267
268for.body:                                         ; preds = %for.body, %entry
269  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
270  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
271  %0 = load i32, i32* %arrayidx, align 4
272  %inc = add nsw i32 %0, 1
273  store i32 %inc, i32* %arrayidx, align 4
274  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
275  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
276  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
277
278for.end:                                          ; preds = %for.body
279  ret void
280}
281!12 = !{!12, !4}
282
283; #pragma clang loop unroll(enable)
284; Loop should be fully unrolled.
285;
286; CHECK-LABEL: @loop64_with_enable(
287; CHECK-NOT: br i1
288define void @loop64_with_enable(i32* nocapture %a) {
289entry:
290  br label %for.body
291
292for.body:                                         ; preds = %for.body, %entry
293  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
294  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
295  %0 = load i32, i32* %arrayidx, align 4
296  %inc = add nsw i32 %0, 1
297  store i32 %inc, i32* %arrayidx, align 4
298  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
299  %exitcond = icmp eq i64 %indvars.iv.next, 64
300  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
301
302for.end:                                          ; preds = %for.body
303  ret void
304}
305!13 = !{!13, !14}
306!14 = !{!"llvm.loop.unroll.enable"}
307
308; #pragma clang loop unroll(enable)
309; Loop has a runtime trip count and should be runtime unrolled and duplicated
310; (original and 8x).
311;
312; CHECK-LABEL: @runtime_loop_with_enable(
313; CHECK: for.body:
314; CHECK: store i32
315; CHECK: store i32
316; CHECK: store i32
317; CHECK: store i32
318; CHECK: store i32
319; CHECK: store i32
320; CHECK: store i32
321; CHECK: store i32
322; CHECK-NOT: store i32
323; CHECK: br i1
324; CHECK: for.body.epil:
325; CHECK: store
326; CHECK-NOT: store
327; CHECK: br i1
328define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
329entry:
330  %cmp3 = icmp sgt i32 %b, 0
331  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
332
333for.body:                                         ; preds = %entry, %for.body
334  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
335  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
336  %0 = load i32, i32* %arrayidx, align 4
337  %inc = add nsw i32 %0, 1
338  store i32 %inc, i32* %arrayidx, align 4
339  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
340  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
341  %exitcond = icmp eq i32 %lftr.wideiv, %b
342  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
343
344for.end:                                          ; preds = %for.body, %entry
345  ret void
346}
347!15 = !{!15, !14}
348
349; #pragma clang loop unroll_count(3)
350; Loop has a runtime trip count.  Runtime unrolling should occur and loop
351; should be duplicated (original and 3x unrolled).
352;
353; CHECK-LABEL: @runtime_loop_with_count3(
354; CHECK: for.body
355; CHECK: store
356; CHECK: store
357; CHECK: store
358; CHECK-NOT: store
359; CHECK: br i1
360; CHECK: for.body.epil:
361; CHECK: store
362; CHECK-NOT: store
363; CHECK: br i1
364define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
365entry:
366  %cmp3 = icmp sgt i32 %b, 0
367  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
368
369for.body:                                         ; preds = %entry, %for.body
370  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
371  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
372  %0 = load i32, i32* %arrayidx, align 4
373  %inc = add nsw i32 %0, 1
374  store i32 %inc, i32* %arrayidx, align 4
375  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
376  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
377  %exitcond = icmp eq i32 %lftr.wideiv, %b
378  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
379
380for.end:                                          ; preds = %for.body, %entry
381  ret void
382}
383!16 = !{!16, !17}
384!17 = !{!"llvm.loop.unroll.count", i32 3}
385