• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -O3 -rotation-max-header-size=0 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=HOIST
3; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s  | FileCheck %s --check-prefix=HOIST
4
5; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=HOIST
6; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s  | FileCheck %s --check-prefix=HOIST
7
8; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=ROTATED_LATER_OLDPM
9; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s  | FileCheck %s --check-prefix=ROTATED_LATER_NEWPM
10
11; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=ROTATE_OLDPM
12; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s  | FileCheck %s --check-prefix=ROTATE_NEWPM
13
14; This example is produced from a very basic C code:
15;
16;   void f0();
17;   void f1();
18;   void f2();
19;
20;   void loop(int width) {
21;       if(width < 1)
22;           return;
23;       for(int i = 0; i < width - 1; ++i) {
24;           f0();
25;           f1();
26;       }
27;       f0();
28;       f2();
29;   }
30
31; We have a choice here. We can either
32; * hoist the f0() call into loop header,
33;   * which potentially makes loop rotation unprofitable since loop header might
34;     have grown above certain threshold, and such unrotated loops will be
35;     ignored by LoopVectorizer, preventing vectorization
36;   * or loop rotation will succeed, resulting in some weird PHIs that will also
37;     harm vectorization
38; * or not hoist f0() call before performing loop rotation,
39;   at the cost of potential code bloat and/or potentially successfully rotating
40;   the loops, vectorizing them at the cost of compile time.
41
42target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
43
44declare void @f0()
45declare void @f1()
46declare void @f2()
47
48declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
49declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
50
51define void @_Z4loopi(i32 %width) {
52; HOIST-LABEL: @_Z4loopi(
53; HOIST-NEXT:  entry:
54; HOIST-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
55; HOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
56; HOIST:       for.cond.preheader:
57; HOIST-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
58; HOIST-NEXT:    br label [[FOR_COND:%.*]]
59; HOIST:       for.cond:
60; HOIST-NEXT:    [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
61; HOIST-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]]
62; HOIST-NEXT:    tail call void @f0()
63; HOIST-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
64; HOIST:       for.cond.cleanup:
65; HOIST-NEXT:    tail call void @f2()
66; HOIST-NEXT:    br label [[RETURN]]
67; HOIST:       for.body:
68; HOIST-NEXT:    tail call void @f1()
69; HOIST-NEXT:    [[INC]] = add nuw i32 [[I_0]], 1
70; HOIST-NEXT:    br label [[FOR_COND]]
71; HOIST:       return:
72; HOIST-NEXT:    ret void
73;
74; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi(
75; ROTATED_LATER_OLDPM-NEXT:  entry:
76; ROTATED_LATER_OLDPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
77; ROTATED_LATER_OLDPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
78; ROTATED_LATER_OLDPM:       for.cond.preheader:
79; ROTATED_LATER_OLDPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
80; ROTATED_LATER_OLDPM-NEXT:    [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
81; ROTATED_LATER_OLDPM-NEXT:    br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
82; ROTATED_LATER_OLDPM:       for.cond.cleanup:
83; ROTATED_LATER_OLDPM-NEXT:    tail call void @f0()
84; ROTATED_LATER_OLDPM-NEXT:    tail call void @f2()
85; ROTATED_LATER_OLDPM-NEXT:    br label [[RETURN]]
86; ROTATED_LATER_OLDPM:       for.body:
87; ROTATED_LATER_OLDPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ]
88; ROTATED_LATER_OLDPM-NEXT:    tail call void @f0()
89; ROTATED_LATER_OLDPM-NEXT:    tail call void @f1()
90; ROTATED_LATER_OLDPM-NEXT:    [[INC]] = add nuw i32 [[I_04]], 1
91; ROTATED_LATER_OLDPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
92; ROTATED_LATER_OLDPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
93; ROTATED_LATER_OLDPM:       return:
94; ROTATED_LATER_OLDPM-NEXT:    ret void
95;
96; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi(
97; ROTATED_LATER_NEWPM-NEXT:  entry:
98; ROTATED_LATER_NEWPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
99; ROTATED_LATER_NEWPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
100; ROTATED_LATER_NEWPM:       for.cond.preheader:
101; ROTATED_LATER_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
102; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
103; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]]
104; ROTATED_LATER_NEWPM:       for.cond.preheader.for.body_crit_edge:
105; ROTATED_LATER_NEWPM-NEXT:    [[INC_1:%.*]] = add nuw i32 0, 1
106; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
107; ROTATED_LATER_NEWPM:       for.cond.cleanup:
108; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
109; ROTATED_LATER_NEWPM-NEXT:    tail call void @f2()
110; ROTATED_LATER_NEWPM-NEXT:    br label [[RETURN]]
111; ROTATED_LATER_NEWPM:       for.body:
112; ROTATED_LATER_NEWPM-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ]
113; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
114; ROTATED_LATER_NEWPM-NEXT:    tail call void @f1()
115; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
116; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
117; ROTATED_LATER_NEWPM:       for.body.for.body_crit_edge:
118; ROTATED_LATER_NEWPM-NEXT:    [[INC_0]] = add nuw i32 [[INC_PHI]], 1
119; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY]]
120; ROTATED_LATER_NEWPM:       return:
121; ROTATED_LATER_NEWPM-NEXT:    ret void
122;
123; ROTATE_OLDPM-LABEL: @_Z4loopi(
124; ROTATE_OLDPM-NEXT:  entry:
125; ROTATE_OLDPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
126; ROTATE_OLDPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
127; ROTATE_OLDPM:       for.cond.preheader:
128; ROTATE_OLDPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
129; ROTATE_OLDPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
130; ROTATE_OLDPM:       for.body.preheader:
131; ROTATE_OLDPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
132; ROTATE_OLDPM-NEXT:    br label [[FOR_BODY:%.*]]
133; ROTATE_OLDPM:       for.cond.cleanup:
134; ROTATE_OLDPM-NEXT:    tail call void @f0()
135; ROTATE_OLDPM-NEXT:    tail call void @f2()
136; ROTATE_OLDPM-NEXT:    br label [[RETURN]]
137; ROTATE_OLDPM:       for.body:
138; ROTATE_OLDPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
139; ROTATE_OLDPM-NEXT:    tail call void @f0()
140; ROTATE_OLDPM-NEXT:    tail call void @f1()
141; ROTATE_OLDPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
142; ROTATE_OLDPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
143; ROTATE_OLDPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
144; ROTATE_OLDPM:       return:
145; ROTATE_OLDPM-NEXT:    ret void
146;
147; ROTATE_NEWPM-LABEL: @_Z4loopi(
148; ROTATE_NEWPM-NEXT:  entry:
149; ROTATE_NEWPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
150; ROTATE_NEWPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
151; ROTATE_NEWPM:       for.cond.preheader:
152; ROTATE_NEWPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
153; ROTATE_NEWPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
154; ROTATE_NEWPM:       for.body.preheader:
155; ROTATE_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
156; ROTATE_NEWPM-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 0, 1
157; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
158; ROTATE_NEWPM:       for.cond.cleanup:
159; ROTATE_NEWPM-NEXT:    tail call void @f0()
160; ROTATE_NEWPM-NEXT:    tail call void @f2()
161; ROTATE_NEWPM-NEXT:    br label [[RETURN]]
162; ROTATE_NEWPM:       for.body:
163; ROTATE_NEWPM-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ]
164; ROTATE_NEWPM-NEXT:    tail call void @f0()
165; ROTATE_NEWPM-NEXT:    tail call void @f1()
166; ROTATE_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
167; ROTATE_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
168; ROTATE_NEWPM:       for.body.for.body_crit_edge:
169; ROTATE_NEWPM-NEXT:    [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1
170; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY]]
171; ROTATE_NEWPM:       return:
172; ROTATE_NEWPM-NEXT:    ret void
173;
174entry:
175  %width.addr = alloca i32, align 4
176  %i = alloca i32, align 4
177  store i32 %width, i32* %width.addr, align 4
178  %i1 = load i32, i32* %width.addr, align 4
179  %cmp = icmp slt i32 %i1, 1
180  br i1 %cmp, label %if.then, label %if.end
181
182if.then:
183  br label %return
184
185if.end:
186  %i2 = bitcast i32* %i to i8*
187  call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
188  store i32 0, i32* %i, align 4
189  br label %for.cond
190
191for.cond:
192  %i3 = load i32, i32* %i, align 4
193  %i4 = load i32, i32* %width.addr, align 4
194  %sub = sub nsw i32 %i4, 1
195  %cmp1 = icmp slt i32 %i3, %sub
196  br i1 %cmp1, label %for.body, label %for.cond.cleanup
197
198for.cond.cleanup:
199  %i5 = bitcast i32* %i to i8*
200  call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
201  br label %for.end
202
203for.body:
204  call void @f0()
205  call void @f1()
206  br label %for.inc
207
208for.inc:
209  %i6 = load i32, i32* %i, align 4
210  %inc = add nsw i32 %i6, 1
211  store i32 %inc, i32* %i, align 4
212  br label %for.cond
213
214for.end:
215  call void @f0()
216  call void @f2()
217  br label %return
218
219return:
220  ret void
221}
222