• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \
2; RUN:     FileCheck %s
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi %s -o - | \
4; RUN:     FileCheck %s --check-prefix=CHECK-LLC
5; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | \
6; RUN:     llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
7; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \
8; RUN:     -pass-remarks-analysis=hardware-loops  %s -S -o - 2>&1 | \
9; RUN:     FileCheck %s --check-prefix=CHECK-REMARKS
10
11
12; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
13; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
14; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
15; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
16; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
17; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
18; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
19; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
20; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
21; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
22
23
24; CHECK-LABEL: early_exit
25; CHECK-NOT: llvm.set.loop.iterations
26; CHECK-NOT: llvm.loop.decrement
27define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) {
28entry:
29  br label %do.body
30
31do.body:
32  %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
33  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0
34  %0 = load i32, i32* %arrayidx, align 4
35  %cmp = icmp sgt i32 %0, %max
36  br i1 %cmp, label %do.end, label %if.end
37
38if.end:
39  %inc = add nuw i32 %i.0, 1
40  %cmp1 = icmp ult i32 %inc, %n
41  br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge
42
43if.end.do.end_crit_edge:
44  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc
45  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
46  br label %do.end
47
48do.end:
49  %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ]
50  ret i32 %1
51}
52
53; CHECK-LABEL: nested
54; CHECK-NOT: call i32 @llvm.start.loop.iterations.i32(i32 %N)
55; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
56
57; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 %N)
58; CHECK: br label %while.body3.us
59
60; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[START]], %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ]
61; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[REM]], i32 1)
62; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
63; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us
64
65; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
66; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7
67
68; CHECK-LLC:      nested:
69; CHECK-LLC-NOT:    mov lr, r1
70; CHECK-LLC:        dls lr, r1
71; CHECK-LLC-NOT:    mov lr, r1
72; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9._]+]]:
73; CHECK-LLC:        le lr, [[LOOP_HEADER]]
74; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9._]+]]
75; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9._]+]]:
76
77define void @nested(i32* nocapture %A, i32 %N) {
78entry:
79  %cmp20 = icmp eq i32 %N, 0
80  br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
81
82while.cond1.preheader.us:
83  %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
84  %mul.us = mul i32 %i.021.us, %N
85  br label %while.body3.us
86
87while.body3.us:
88  %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
89  %add.us = add i32 %j.019.us, %mul.us
90  %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
91  store i32 %add.us, i32* %arrayidx.us, align 4
92  %inc.us = add nuw i32 %j.019.us, 1
93  %exitcond = icmp eq i32 %inc.us, %N
94  br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us
95
96while.cond1.while.end_crit_edge.us:
97  %inc6.us = add nuw i32 %i.021.us, 1
98  %exitcond23 = icmp eq i32 %inc6.us, %N
99  br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
100
101while.end7:
102  ret void
103}
104
105; CHECK-LABEL: pre_existing
106; CHECK: llvm.start.loop.iterations
107; CHECK-NOT: llvm.start.loop.iterations
108; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
109; CHECK-NOT: call i32 @llvm.loop.decrement.reg
110define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
111entry:
112  %start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
113  br label %while.body
114
115while.body:                                       ; preds = %while.body, %entry
116  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ]
117  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ]
118  %0 = phi i32 [ %start, %entry ], [ %2, %while.body ]
119  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
120  %1 = load i32, i32* %q.addr.05, align 4
121  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
122  store i32 %1, i32* %p.addr.04, align 4
123  %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
124  %3 = icmp ne i32 %2, 0
125  br i1 %3, label %while.body, label %while.end
126
127while.end:                                        ; preds = %while.body
128  ret i32 0
129}
130
131; CHECK-LABEL: pre_existing_test_set
132; CHECK: call i1 @llvm.test.set.loop.iterations
133; CHECK-NOT: llvm.set{{.*}}.loop.iterations
134; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
135; CHECK-NOT: call i32 @llvm.loop.decrement.reg
136define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
137entry:
138  %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
139  br i1 %guard, label %while.preheader, label %while.end
140
141while.preheader:
142  br label %while.body
143
144while.body:                                       ; preds = %while.body, %entry
145  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ]
146  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ]
147  %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ]
148  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
149  %1 = load i32, i32* %q.addr.05, align 4
150  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
151  store i32 %1, i32* %p.addr.04, align 4
152  %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
153  %3 = icmp ne i32 %2, 0
154  br i1 %3, label %while.body, label %while.end
155
156while.end:                                        ; preds = %while.body
157  ret i32 0
158}
159
160; CHECK-LABEL: pre_existing_inner
161; CHECK-NOT: llvm.start.loop.iterations
162; CHECK: while.cond1.preheader.us:
163; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
164; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
165; CHECK: br i1
166; CHECK-NOT: call i32 @llvm.loop.decrement
167define void @pre_existing_inner(i32* nocapture %A, i32 %N) {
168entry:
169  %cmp20 = icmp eq i32 %N, 0
170  br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
171
172while.cond1.preheader.us:
173  %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
174  %mul.us = mul i32 %i.021.us, %N
175  %start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
176  br label %while.body3.us
177
178while.body3.us:
179  %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
180  %0 = phi i32 [ %start, %while.cond1.preheader.us ], [ %1, %while.body3.us ]
181  %add.us = add i32 %j.019.us, %mul.us
182  %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
183  store i32 %add.us, i32* %arrayidx.us, align 4
184  %inc.us = add nuw i32 %j.019.us, 1
185  %1 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
186  %2 = icmp ne i32 %1, 0
187  br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us
188
189while.cond1.while.end_crit_edge.us:
190  %inc6.us = add nuw i32 %i.021.us, 1
191  %exitcond23 = icmp eq i32 %inc6.us, %N
192  br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
193
194while.end7:
195  ret void
196}
197
198; CHECK-LABEL: not_rotated
199; CHECK-NOT: call i32 @llvm.start.loop.iterations
200; CHECK-NOT: call i32 @llvm.loop.decrement.i32
201define void @not_rotated(i32, i16* nocapture, i16 signext) {
202  br label %4
203
2044:
205  %5 = phi i32 [ 0, %3 ], [ %19, %18 ]
206  %6 = icmp eq i32 %5, %0
207  br i1 %6, label %20, label %7
208
2097:
210  %8 = mul i32 %5, %0
211  br label %9
212
2139:
214  %10 = phi i32 [ %17, %12 ], [ 0, %7 ]
215  %11 = icmp eq i32 %10, %0
216  br i1 %11, label %18, label %12
217
21812:
219  %13 = add i32 %10, %8
220  %14 = getelementptr inbounds i16, i16* %1, i32 %13
221  %15 = load i16, i16* %14, align 2
222  %16 = add i16 %15, %2
223  store i16 %16, i16* %14, align 2
224  %17 = add i32 %10, 1
225  br label %9
226
22718:
228  %19 = add i32 %5, 1
229  br label %4
230
23120:
232  ret void
233}
234
235; CHECK-LABEL: multi_latch
236; CHECK-NOT: call i32 @llvm.start.loop.iterations
237; CHECK-NOT: call i32 @llvm.loop.decrement
238define void @multi_latch(i32* %a, i32* %b, i32 %N) {
239entry:
240  %half = lshr i32 %N, 1
241  br label %header
242
243header:
244  %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
245  %cmp = icmp ult i32 %iv, %half
246  %addr.a = getelementptr i32, i32* %a, i32 %iv
247  %addr.b = getelementptr i32, i32* %b, i32 %iv
248  br i1 %cmp, label %if.then, label %if.else
249
250if.then:
251  store i32 %iv, i32* %addr.a
252  br label %latch.0
253
254if.else:
255  store i32 %iv, i32* %addr.b
256  br label %latch.0
257
258latch.0:
259  %count.next = add nuw i32 %iv, 1
260  %cmp.1 = icmp ult i32 %count.next, %half
261  br i1 %cmp.1, label %header, label %latch.1
262
263latch.1:
264  %ld = load i32, i32* %addr.a
265  store i32 %ld, i32* %addr.b
266  %cmp.2 = icmp ult i32 %count.next, %N
267  br i1 %cmp.2, label %header, label %latch.1
268
269exit:
270  ret void
271}
272
273; CHECK-LABEL: search
274; CHECK: entry:
275; CHECK:   [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
276; CHECK:   br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup
277; CHECK: for.body.preheader:
278; CHECK:   br label %for.body
279; CHECK: for.body:
280; CHECK: for.inc:
281; CHECK:   [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32(
282; CHECK:   [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
283; CHECK:   br i1 [[CMP]], label %for.body, label %for.cond.cleanup
284define i32 @search(i8* nocapture readonly %c, i32 %N) {
285entry:
286  %cmp11 = icmp eq i32 %N, 0
287  br i1 %cmp11, label %for.cond.cleanup, label %for.body
288
289for.cond.cleanup:
290  %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
291  %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
292  %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
293  ret i32 %sub
294
295for.body:
296  %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ]
297  %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ]
298  %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ]
299  %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014
300  %0 = load i8, i8* %arrayidx, align 1
301  switch i8 %0, label %for.inc [
302    i8 108, label %sw.bb
303    i8 111, label %sw.bb
304    i8 112, label %sw.bb
305    i8 32, label %sw.bb1
306  ]
307
308sw.bb:                                            ; preds = %for.body, %for.body, %for.body
309  %inc = add nsw i32 %found.012, 1
310  br label %for.inc
311
312sw.bb1:                                           ; preds = %for.body
313  %inc2 = add nsw i32 %spaces.013, 1
314  br label %for.inc
315
316for.inc:                                          ; preds = %sw.bb, %sw.bb1, %for.body
317  %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
318  %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
319  %inc3 = add nuw i32 %i.014, 1
320  %exitcond = icmp eq i32 %inc3, %N
321  br i1 %exitcond, label %for.cond.cleanup, label %for.body
322}
323
324; CHECK-LABEL: unroll_inc_int
325; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
326; CHECK: call i32 @llvm.loop.decrement.reg.i32(
327
328; TODO: We should be able to support the unrolled loop body.
329; CHECK-UNROLL-LABEL: unroll_inc_int
330; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
331; CHECK-UNROLL-NOT: dls
332; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
333; CHECK-UNROLL-NOT: le lr, [[LOOP]]
334; CHECK-UNROLL:     bne [[LOOP]]
335; CHECK-UNROLL:     wls lr, lr, [[EXIT:.LBB[0-9_]+]]
336; CHECK-UNROLL:     [[EPIL:.LBB[0-9_]+]]:
337; CHECK-UNROLL:     le lr, [[EPIL]]
338; CHECK-UNROLL-NEXT: [[EXIT]]
339
340define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
341entry:
342  %cmp8 = icmp sgt i32 %N, 0
343  br i1 %cmp8, label %for.body, label %for.cond.cleanup
344
345for.cond.cleanup:
346  ret void
347
348for.body:
349  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
350  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
351  %0 = load i32, i32* %arrayidx, align 4
352  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
353  %1 = load i32, i32* %arrayidx1, align 4
354  %mul = mul nsw i32 %1, %0
355  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
356  store i32 %mul, i32* %arrayidx2, align 4
357  %inc = add nuw nsw i32 %i.09, 1
358  %exitcond = icmp eq i32 %inc, %N
359  br i1 %exitcond, label %for.cond.cleanup, label %for.body
360}
361
362; CHECK-LABEL: unroll_inc_unsigned
363; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
364; CHECK: call i32 @llvm.loop.decrement.reg.i32(
365
366; CHECK-LLC-LABEL: unroll_inc_unsigned:
367; CHECK-LLC: wls lr, r3, [[EXIT:.LBB[0-9_]+]]
368; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]:
369; CHECK-LLC: le lr, [[HEADER]]
370; CHECK-LLC-NEXT: [[EXIT]]:
371
372; TODO: We should be able to support the unrolled loop body.
373; CHECK-UNROLL-LABEL: unroll_inc_unsigned
374; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
375; CHECK-UNROLL-NOT: dls
376; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
377; CHECK-UNROLL-NOT: le lr, [[LOOP]]
378; CHECK-UNROLL:     bne [[LOOP]]
379; CHECK-UNROLL:     wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]]
380; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
381; CHECK-UNROLL:     le lr, [[EPIL]]
382; CHECK-UNROLL: [[EPIL_EXIT]]:
383; CHECK-UNROLL:     pop
384define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
385entry:
386  %cmp8 = icmp eq i32 %N, 0
387  br i1 %cmp8, label %for.cond.cleanup, label %for.body
388
389for.cond.cleanup:
390  ret void
391
392for.body:
393  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
394  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
395  %0 = load i32, i32* %arrayidx, align 4
396  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
397  %1 = load i32, i32* %arrayidx1, align 4
398  %mul = mul nsw i32 %1, %0
399  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
400  store i32 %mul, i32* %arrayidx2, align 4
401  %inc = add nuw i32 %i.09, 1
402  %exitcond = icmp eq i32 %inc, %N
403  br i1 %exitcond, label %for.cond.cleanup, label %for.body
404}
405
406; CHECK-LABEL: unroll_dec_int
407; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
408; CHECK: call i32 @llvm.loop.decrement.reg.i32(
409
410; TODO: An unnecessary register is being held to hold COUNT, lr should just
411; be used instead.
412; CHECK-LLC-LABEL: unroll_dec_int:
413; CHECK-LLC: dls lr, r3
414; CHECK-LLC-NOT: mov lr, r3
415; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]:
416; CHECK-LLC: le lr, [[HEADER]]
417
418; CHECK-UNROLL-LABEL: unroll_dec_int:
419; CHECK-UNROLL:         wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]]
420; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]:
421; CHECK-UNROLL:         le lr, [[PROLOGUE]]
422; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]:
423; CHECK-UNROLL:         dls lr, lr
424; CHECK-UNROLL:      [[BODY:.LBB[0-9_]+]]:
425; CHECK-UNROLL:         le lr, [[BODY]]
426; CHECK-UNROLL-NOT:     b
427; CHECK-UNROLL:         pop
428define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
429entry:
430  %cmp8 = icmp sgt i32 %N, 0
431  br i1 %cmp8, label %for.body, label %for.cond.cleanup
432
433for.cond.cleanup:
434  ret void
435
436for.body:
437  %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ]
438  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
439  %0 = load i32, i32* %arrayidx, align 4
440  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
441  %1 = load i32, i32* %arrayidx1, align 4
442  %mul = mul nsw i32 %1, %0
443  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
444  store i32 %mul, i32* %arrayidx2, align 4
445  %dec = add nsw i32 %i.09, -1
446  %cmp = icmp sgt i32 %dec, 0
447  br i1 %cmp, label %for.body, label %for.cond.cleanup
448}
449
450declare i32 @llvm.start.loop.iterations.i32(i32) #0
451declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
452declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #0
453
454