1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -O3 -rotation-max-header-size=0 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST 3; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s | FileCheck %s --check-prefix=HOIST 4 5; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST 6; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefix=HOIST 7 8; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATED_LATER_OLDPM 9; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefix=ROTATED_LATER_NEWPM 10 11; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATE_OLDPM 12; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefix=ROTATE_NEWPM 13 14; This example is produced from a very basic C code: 15; 16; void f0(); 17; void f1(); 18; void f2(); 19; 20; void loop(int width) { 21; if(width < 1) 22; return; 23; for(int i = 0; i < width - 1; ++i) { 24; f0(); 25; f1(); 26; } 27; f0(); 28; f2(); 29; } 30 31; We have a choice here. We can either 32; * hoist the f0() call into loop header, 33; * which potentially makes loop rotation unprofitable since loop header might 34; have grown above certain threshold, and such unrotated loops will be 35; ignored by LoopVectorizer, preventing vectorization 36; * or loop rotation will succeed, resulting in some weird PHIs that will also 37; harm vectorization 38; * or not hoist f0() call before performing loop rotation, 39; at the cost of potential code bloat and/or potentially successfully rotating 40; the loops, vectorizing them at the cost of compile time. 41 42target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 43 44declare void @f0() 45declare void @f1() 46declare void @f2() 47 48declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) 49declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) 50 51define void @_Z4loopi(i32 %width) { 52; HOIST-LABEL: @_Z4loopi( 53; HOIST-NEXT: entry: 54; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 55; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 56; HOIST: for.cond.preheader: 57; HOIST-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 58; HOIST-NEXT: br label [[FOR_COND:%.*]] 59; HOIST: for.cond: 60; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] 61; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] 62; HOIST-NEXT: tail call void @f0() 63; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 64; HOIST: for.cond.cleanup: 65; HOIST-NEXT: tail call void @f2() 66; HOIST-NEXT: br label [[RETURN]] 67; HOIST: for.body: 68; HOIST-NEXT: tail call void @f1() 69; HOIST-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 70; HOIST-NEXT: br label [[FOR_COND]] 71; HOIST: return: 72; HOIST-NEXT: ret void 73; 74; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi( 75; ROTATED_LATER_OLDPM-NEXT: entry: 76; ROTATED_LATER_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 77; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 78; ROTATED_LATER_OLDPM: for.cond.preheader: 79; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 80; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 81; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] 82; ROTATED_LATER_OLDPM: for.cond.cleanup: 83; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() 84; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() 85; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] 86; ROTATED_LATER_OLDPM: for.body: 87; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] 88; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() 89; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() 90; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 91; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] 92; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] 93; ROTATED_LATER_OLDPM: return: 94; ROTATED_LATER_OLDPM-NEXT: ret void 95; 96; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi( 97; ROTATED_LATER_NEWPM-NEXT: entry: 98; ROTATED_LATER_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 99; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 100; ROTATED_LATER_NEWPM: for.cond.preheader: 101; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 102; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 103; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]] 104; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge: 105; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 106; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] 107; ROTATED_LATER_NEWPM: for.cond.cleanup: 108; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() 109; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() 110; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] 111; ROTATED_LATER_NEWPM: for.body: 112; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ] 113; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() 114; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() 115; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] 116; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] 117; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: 118; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1 119; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]] 120; ROTATED_LATER_NEWPM: return: 121; ROTATED_LATER_NEWPM-NEXT: ret void 122; 123; ROTATE_OLDPM-LABEL: @_Z4loopi( 124; ROTATE_OLDPM-NEXT: entry: 125; ROTATE_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 126; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 127; ROTATE_OLDPM: for.cond.preheader: 128; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 129; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 130; ROTATE_OLDPM: for.body.preheader: 131; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 132; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] 133; ROTATE_OLDPM: for.cond.cleanup: 134; ROTATE_OLDPM-NEXT: tail call void @f0() 135; ROTATE_OLDPM-NEXT: tail call void @f2() 136; ROTATE_OLDPM-NEXT: br label [[RETURN]] 137; ROTATE_OLDPM: for.body: 138; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 139; ROTATE_OLDPM-NEXT: tail call void @f0() 140; ROTATE_OLDPM-NEXT: tail call void @f1() 141; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 142; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] 143; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] 144; ROTATE_OLDPM: return: 145; ROTATE_OLDPM-NEXT: ret void 146; 147; ROTATE_NEWPM-LABEL: @_Z4loopi( 148; ROTATE_NEWPM-NEXT: entry: 149; ROTATE_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 150; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 151; ROTATE_NEWPM: for.cond.preheader: 152; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 153; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 154; ROTATE_NEWPM: for.body.preheader: 155; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 156; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 157; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] 158; ROTATE_NEWPM: for.cond.cleanup: 159; ROTATE_NEWPM-NEXT: tail call void @f0() 160; ROTATE_NEWPM-NEXT: tail call void @f2() 161; ROTATE_NEWPM-NEXT: br label [[RETURN]] 162; ROTATE_NEWPM: for.body: 163; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] 164; ROTATE_NEWPM-NEXT: tail call void @f0() 165; ROTATE_NEWPM-NEXT: tail call void @f1() 166; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] 167; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] 168; ROTATE_NEWPM: for.body.for.body_crit_edge: 169; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 170; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]] 171; ROTATE_NEWPM: return: 172; ROTATE_NEWPM-NEXT: ret void 173; 174entry: 175 %width.addr = alloca i32, align 4 176 %i = alloca i32, align 4 177 store i32 %width, i32* %width.addr, align 4 178 %i1 = load i32, i32* %width.addr, align 4 179 %cmp = icmp slt i32 %i1, 1 180 br i1 %cmp, label %if.then, label %if.end 181 182if.then: 183 br label %return 184 185if.end: 186 %i2 = bitcast i32* %i to i8* 187 call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2) 188 store i32 0, i32* %i, align 4 189 br label %for.cond 190 191for.cond: 192 %i3 = load i32, i32* %i, align 4 193 %i4 = load i32, i32* %width.addr, align 4 194 %sub = sub nsw i32 %i4, 1 195 %cmp1 = icmp slt i32 %i3, %sub 196 br i1 %cmp1, label %for.body, label %for.cond.cleanup 197 198for.cond.cleanup: 199 %i5 = bitcast i32* %i to i8* 200 call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5) 201 br label %for.end 202 203for.body: 204 call void @f0() 205 call void @f1() 206 br label %for.inc 207 208for.inc: 209 %i6 = load i32, i32* %i, align 4 210 %inc = add nsw i32 %i6, 1 211 store i32 %inc, i32* %i, align 4 212 br label %for.cond 213 214for.end: 215 call void @f0() 216 call void @f2() 217 br label %return 218 219return: 220 ret void 221} 222