• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s
2; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn<load-pre>" -enable-load-pre=false -S | FileCheck %s
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4
5define i32 @test1(i32* %p, i1 %C) {
6; CHECK-LABEL: @test1(
7block1:
8	br i1 %C, label %block2, label %block3
9
10block2:
11 br label %block4
12; CHECK: block2:
13; CHECK-NEXT: load i32, i32* %p
14
15block3:
16  store i32 0, i32* %p
17  br label %block4
18
19block4:
20  %PRE = load i32, i32* %p
21  ret i32 %PRE
22; CHECK: block4:
23; CHECK-NEXT: phi i32
24; CHECK-NEXT: ret i32
25}
26
27; This is a simple phi translation case.
28define i32 @test2(i32* %p, i32* %q, i1 %C) {
29; CHECK-LABEL: @test2(
30block1:
31	br i1 %C, label %block2, label %block3
32
33block2:
34 br label %block4
35; CHECK: block2:
36; CHECK-NEXT: load i32, i32* %q
37
38block3:
39  store i32 0, i32* %p
40  br label %block4
41
42block4:
43  %P2 = phi i32* [%p, %block3], [%q, %block2]
44  %PRE = load i32, i32* %P2
45  ret i32 %PRE
46; CHECK: block4:
47; CHECK-NEXT: phi i32 [
48; CHECK-NOT: load
49; CHECK: ret i32
50}
51
52; This is a PRE case that requires phi translation through a GEP.
53define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
54; CHECK-LABEL: @test3(
55block1:
56  %B = getelementptr i32, i32* %q, i32 1
57  store i32* %B, i32** %Hack
58	br i1 %C, label %block2, label %block3
59
60block2:
61 br label %block4
62; CHECK: block2:
63; CHECK-NEXT: load i32, i32* %B
64
65block3:
66  %A = getelementptr i32, i32* %p, i32 1
67  store i32 0, i32* %A
68  br label %block4
69
70block4:
71  %P2 = phi i32* [%p, %block3], [%q, %block2]
72  %P3 = getelementptr i32, i32* %P2, i32 1
73  %PRE = load i32, i32* %P3
74  ret i32 %PRE
75; CHECK: block4:
76; CHECK: phi i32 [
77; CHECK-NOT: load
78; CHECK: ret i32
79}
80
81;; Here the loaded address is available, but the computation is in 'block3'
82;; which does not dominate 'block2'.
83define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
84; CHECK-LABEL: @test4(
85block1:
86	br i1 %C, label %block2, label %block3
87
88block2:
89 br label %block4
90; CHECK: block2:
91; CHECK:   load i32, i32*
92; CHECK:   br label %block4
93
94block3:
95  %B = getelementptr i32, i32* %q, i32 1
96  store i32* %B, i32** %Hack
97
98  %A = getelementptr i32, i32* %p, i32 1
99  store i32 0, i32* %A
100  br label %block4
101
102block4:
103  %P2 = phi i32* [%p, %block3], [%q, %block2]
104  %P3 = getelementptr i32, i32* %P2, i32 1
105  %PRE = load i32, i32* %P3
106  ret i32 %PRE
107; CHECK: block4:
108; CHECK: phi i32 [
109; CHECK-NOT: load
110; CHECK: ret i32
111}
112
113;void test5(int N, double *G) {
114;  int j;
115;  for (j = 0; j < N - 1; j++)
116;    G[j] = G[j] + G[j+1];
117;}
118
119define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
120; CHECK-LABEL: @test5(
121entry:
122  %0 = add i32 %N, -1
123  %1 = icmp sgt i32 %0, 0
124  br i1 %1, label %bb.nph, label %return
125
126bb.nph:
127  %tmp = zext i32 %0 to i64
128  br label %bb
129
130; CHECK: bb.nph:
131; CHECK: load double, double*
132; CHECK: br label %bb
133
134bb:
135  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
136  %tmp6 = add i64 %indvar, 1
137  %scevgep = getelementptr double, double* %G, i64 %tmp6
138  %scevgep7 = getelementptr double, double* %G, i64 %indvar
139  %2 = load double, double* %scevgep7, align 8
140  %3 = load double, double* %scevgep, align 8
141  %4 = fadd double %2, %3
142  store double %4, double* %scevgep7, align 8
143  %exitcond = icmp eq i64 %tmp6, %tmp
144  br i1 %exitcond, label %return, label %bb
145
146; Should only be one load in the loop.
147; CHECK: bb:
148; CHECK: load double, double*
149; CHECK-NOT: load double, double*
150; CHECK: br i1 %exitcond
151
152return:
153  ret void
154}
155
156;void test6(int N, double *G) {
157;  int j;
158;  for (j = 0; j < N - 1; j++)
159;    G[j+1] = G[j] + G[j+1];
160;}
161
162define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
163; CHECK-LABEL: @test6(
164entry:
165  %0 = add i32 %N, -1
166  %1 = icmp sgt i32 %0, 0
167  br i1 %1, label %bb.nph, label %return
168
169bb.nph:
170  %tmp = zext i32 %0 to i64
171  br label %bb
172
173; CHECK: bb.nph:
174; CHECK: load double, double*
175; CHECK: br label %bb
176
177bb:
178  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
179  %tmp6 = add i64 %indvar, 1
180  %scevgep = getelementptr double, double* %G, i64 %tmp6
181  %scevgep7 = getelementptr double, double* %G, i64 %indvar
182  %2 = load double, double* %scevgep7, align 8
183  %3 = load double, double* %scevgep, align 8
184  %4 = fadd double %2, %3
185  store double %4, double* %scevgep, align 8
186  %exitcond = icmp eq i64 %tmp6, %tmp
187  br i1 %exitcond, label %return, label %bb
188
189; Should only be one load in the loop.
190; CHECK: bb:
191; CHECK: load double, double*
192; CHECK-NOT: load double, double*
193; CHECK: br i1 %exitcond
194
195return:
196  ret void
197}
198
199;void test7(int N, double* G) {
200;  long j;
201;  G[1] = 1;
202;  for (j = 1; j < N - 1; j++)
203;      G[j+1] = G[j] + G[j+1];
204;}
205
206; This requires phi translation of the adds.
207define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
208entry:
209  %0 = getelementptr inbounds double, double* %G, i64 1
210  store double 1.000000e+00, double* %0, align 8
211  %1 = add i32 %N, -1
212  %2 = icmp sgt i32 %1, 1
213  br i1 %2, label %bb.nph, label %return
214
215bb.nph:
216  %tmp = sext i32 %1 to i64
217  %tmp7 = add i64 %tmp, -1
218  br label %bb
219
220bb:
221  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
222  %tmp8 = add i64 %indvar, 2
223  %scevgep = getelementptr double, double* %G, i64 %tmp8
224  %tmp9 = add i64 %indvar, 1
225  %scevgep10 = getelementptr double, double* %G, i64 %tmp9
226  %3 = load double, double* %scevgep10, align 8
227  %4 = load double, double* %scevgep, align 8
228  %5 = fadd double %3, %4
229  store double %5, double* %scevgep, align 8
230  %exitcond = icmp eq i64 %tmp9, %tmp7
231  br i1 %exitcond, label %return, label %bb
232
233; Should only be one load in the loop.
234; CHECK: bb:
235; CHECK: load double, double*
236; CHECK-NOT: load double, double*
237; CHECK: br i1 %exitcond
238
239return:
240  ret void
241}
242
243;; Here the loaded address isn't available in 'block2' at all, requiring a new
244;; GEP to be inserted into it.
245define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
246; CHECK-LABEL: @test8(
247block1:
248	br i1 %C, label %block2, label %block3
249
250block2:
251 br label %block4
252; CHECK: block2:
253; CHECK:   load i32, i32*
254; CHECK:   br label %block4
255
256block3:
257  %A = getelementptr i32, i32* %p, i32 1
258  store i32 0, i32* %A
259  br label %block4
260
261block4:
262  %P2 = phi i32* [%p, %block3], [%q, %block2]
263  %P3 = getelementptr i32, i32* %P2, i32 1
264  %PRE = load i32, i32* %P3
265  ret i32 %PRE
266; CHECK: block4:
267; CHECK: phi i32 [
268; CHECK-NOT: load
269; CHECK: ret i32
270}
271
272;void test9(int N, double* G) {
273;  long j;
274;  for (j = 1; j < N - 1; j++)
275;      G[j+1] = G[j] + G[j+1];
276;}
277
278; This requires phi translation of the adds.
279define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
280entry:
281  add i32 0, 0
282  %1 = add i32 %N, -1
283  %2 = icmp sgt i32 %1, 1
284  br i1 %2, label %bb.nph, label %return
285
286bb.nph:
287  %tmp = sext i32 %1 to i64
288  %tmp7 = add i64 %tmp, -1
289  br label %bb
290
291; CHECK: bb.nph:
292; CHECK:   load double, double*
293; CHECK:   br label %bb
294
295bb:
296  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
297  %tmp8 = add i64 %indvar, 2
298  %scevgep = getelementptr double, double* %G, i64 %tmp8
299  %tmp9 = add i64 %indvar, 1
300  %scevgep10 = getelementptr double, double* %G, i64 %tmp9
301  %3 = load double, double* %scevgep10, align 8
302  %4 = load double, double* %scevgep, align 8
303  %5 = fadd double %3, %4
304  store double %5, double* %scevgep, align 8
305  %exitcond = icmp eq i64 %tmp9, %tmp7
306  br i1 %exitcond, label %return, label %bb
307
308; Should only be one load in the loop.
309; CHECK: bb:
310; CHECK: load double, double*
311; CHECK-NOT: load double, double*
312; CHECK: br i1 %exitcond
313
314return:
315  ret void
316}
317
318;void test10(int N, double* G) {
319;  long j;
320;  for (j = 1; j < N - 1; j++)
321;      G[j] = G[j] + G[j+1] + G[j-1];
322;}
323
324; PR5501
325define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
326entry:
327  %0 = add i32 %N, -1
328  %1 = icmp sgt i32 %0, 1
329  br i1 %1, label %bb.nph, label %return
330
331bb.nph:
332  %tmp = sext i32 %0 to i64
333  %tmp8 = add i64 %tmp, -1
334  br label %bb
335; CHECK: bb.nph:
336; CHECK:   load double, double*
337; CHECK:   load double, double*
338; CHECK:   br label %bb
339
340
341bb:
342  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
343  %scevgep = getelementptr double, double* %G, i64 %indvar
344  %tmp9 = add i64 %indvar, 2
345  %scevgep10 = getelementptr double, double* %G, i64 %tmp9
346  %tmp11 = add i64 %indvar, 1
347  %scevgep12 = getelementptr double, double* %G, i64 %tmp11
348  %2 = load double, double* %scevgep12, align 8
349  %3 = load double, double* %scevgep10, align 8
350  %4 = fadd double %2, %3
351  %5 = load double, double* %scevgep, align 8
352  %6 = fadd double %4, %5
353  store double %6, double* %scevgep12, align 8
354  %exitcond = icmp eq i64 %tmp11, %tmp8
355  br i1 %exitcond, label %return, label %bb
356
357; Should only be one load in the loop.
358; CHECK: bb:
359; CHECK: load double, double*
360; CHECK-NOT: load double, double*
361; CHECK: br i1 %exitcond
362
363return:
364  ret void
365}
366
367; Test critical edge splitting.
368define i32 @test11(i32* %p, i1 %C, i32 %N) {
369; CHECK-LABEL: @test11(
370block1:
371        br i1 %C, label %block2, label %block3
372
373block2:
374 %cond = icmp sgt i32 %N, 1
375 br i1 %cond, label %block4, label %block5
376; CHECK: load i32, i32* %p
377; CHECK-NEXT: br label %block4
378
379block3:
380  store i32 0, i32* %p
381  br label %block4
382
383block4:
384  %PRE = load i32, i32* %p
385  br label %block5
386
387block5:
388  %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
389  ret i32 %ret
390; CHECK: block4:
391; CHECK-NEXT: phi i32
392}
393
394declare void @f()
395declare void @g(i32)
396declare i32 @__CxxFrameHandler3(...)
397
398; Test that loads aren't PRE'd into EH pads.
399define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
400; CHECK-LABEL: @test12(
401block1:
402  invoke void @f()
403          to label %block2 unwind label %catch.dispatch
404
405block2:
406  invoke void @f()
407          to label %block3 unwind label %cleanup
408
409block3:
410  ret void
411
412catch.dispatch:
413  %cs1 = catchswitch within none [label %catch] unwind label %cleanup2
414
415catch:
416  %c = catchpad within %cs1 []
417  catchret from %c to label %block2
418
419cleanup:
420  %c1 = cleanuppad within none []
421  store i32 0, i32* %p
422  cleanupret from %c1 unwind label %cleanup2
423
424; CHECK: cleanup2:
425; CHECK-NOT: phi
426; CHECK-NEXT: %c2 = cleanuppad within none []
427; CHECK-NEXT: %NOTPRE = load i32, i32* %p
428cleanup2:
429  %c2 = cleanuppad within none []
430  %NOTPRE = load i32, i32* %p
431  call void @g(i32 %NOTPRE)
432  cleanupret from %c2 unwind to caller
433}
434
435; Don't PRE load across potentially throwing calls.
436
437define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
438
439; CHECK-LABEL: @test13(
440; CHECK: entry:
441; CHECK-NEXT: icmp eq
442; CHECK-NEXT: br i1
443
444entry:
445  %tobool = icmp eq i32 %a, 0
446  br i1 %tobool, label %if.end, label %if.then
447
448; CHECK: if.then:
449; CHECK-NEXT: load i32
450; CHECK-NEXT: store i32
451
452if.then:
453  %uu = load i32, i32* %x, align 4
454  store i32 %uu, i32* %r, align 4
455  br label %if.end
456
457; CHECK: if.end:
458; CHECK-NEXT: call void @f()
459; CHECK-NEXT: load i32
460
461if.end:
462  call void @f()
463  %vv = load i32, i32* %x, align 4
464  ret i32 %vv
465}
466
467; Same as test13, but now the blocking function is not immediately in load's
468; block.
469
470define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
471
472; CHECK-LABEL: @test14(
473; CHECK: entry:
474; CHECK-NEXT: icmp eq
475; CHECK-NEXT: br i1
476
477entry:
478  %tobool = icmp eq i32 %a, 0
479  br i1 %tobool, label %if.end, label %if.then
480
481; CHECK: if.then:
482; CHECK-NEXT: load i32
483; CHECK-NEXT: store i32
484
485if.then:
486  %uu = load i32, i32* %x, align 4
487  store i32 %uu, i32* %r, align 4
488  br label %if.end
489
490; CHECK: if.end:
491; CHECK-NEXT: call void @f()
492; CHECK-NEXT: load i32
493
494if.end:
495  call void @f()
496  br label %follow_1
497
498follow_1:
499  br label %follow_2
500
501follow_2:
502  %vv = load i32, i32* %x, align 4
503  ret i32 %vv
504}
505
506; Same as test13, but %x here is dereferenceable. A pointer that is
507; dereferenceable can be loaded from speculatively without a risk of trapping.
508; Since it is OK to speculate, PRE is allowed.
509
510define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
511
512; CHECK-LABEL: @test15
513; CHECK: entry:
514; CHECK-NEXT: icmp eq
515; CHECK-NEXT: br i1
516
517entry:
518  %tobool = icmp eq i32 %a, 0
519  br i1 %tobool, label %if.end, label %if.then
520
521; CHECK: entry.if.end_crit_edge:
522; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
523; CHECK-NEXT: br label %if.end
524
525if.then:
526  %uu = load i32, i32* %x, align 4
527  store i32 %uu, i32* %r, align 4
528  br label %if.end
529
530; CHECK: if.then:
531; CHECK-NEXT: %uu = load i32, i32* %x, align 4
532; CHECK-NEXT: store i32 %uu, i32* %r, align 4
533; CHECK-NEXT: br label %if.end
534
535if.end:
536  call void @f()
537  %vv = load i32, i32* %x, align 4
538  ret i32 %vv
539
540; CHECK: if.end:
541; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
542; CHECK-NEXT: call void @f()
543; CHECK-NEXT: ret i32 %vv
544
545}
546
547; Same as test14, but %x here is dereferenceable. A pointer that is
548; dereferenceable can be loaded from speculatively without a risk of trapping.
549; Since it is OK to speculate, PRE is allowed.
550
551define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
552
553; CHECK-LABEL: @test16(
554; CHECK: entry:
555; CHECK-NEXT: icmp eq
556; CHECK-NEXT: br i1
557
558entry:
559  %tobool = icmp eq i32 %a, 0
560  br i1 %tobool, label %if.end, label %if.then
561
562; CHECK: entry.if.end_crit_edge:
563; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
564; CHECK-NEXT: br label %if.end
565
566if.then:
567  %uu = load i32, i32* %x, align 4
568  store i32 %uu, i32* %r, align 4
569  br label %if.end
570
571; CHECK: if.then:
572; CHECK-NEXT: %uu = load i32, i32* %x, align 4
573; CHECK-NEXT: store i32 %uu, i32* %r, align 4
574; CHECK-NEXT: br label %if.end
575
576if.end:
577  call void @f()
578  br label %follow_1
579
580; CHECK: if.end:
581; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
582; CHECK-NEXT: call void @f()
583; CHECK-NEXT: ret i32 %vv
584
585follow_1:
586  br label %follow_2
587
588follow_2:
589  %vv = load i32, i32* %x, align 4
590  ret i32 %vv
591}
592