; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -O3 -rotation-max-header-size=0 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST ; RUN: opt -passes='default' -rotation-max-header-size=0 -S < %s | FileCheck %s --check-prefix=HOIST ; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST ; RUN: opt -passes='default' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefix=HOIST ; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATED_LATER_OLDPM ; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefix=ROTATED_LATER_NEWPM ; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATE_OLDPM ; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefix=ROTATE_NEWPM ; This example is produced from a very basic C code: ; ; void f0(); ; void f1(); ; void f2(); ; ; void loop(int width) { ; if(width < 1) ; return; ; for(int i = 0; i < width - 1; ++i) { ; f0(); ; f1(); ; } ; f0(); ; f2(); ; } ; We have a choice here. We can either ; * hoist the f0() call into loop header, ; * which potentially makes loop rotation unprofitable since loop header might ; have grown above certain threshold, and such unrotated loops will be ; ignored by LoopVectorizer, preventing vectorization ; * or loop rotation will succeed, resulting in some weird PHIs that will also ; harm vectorization ; * or not hoist f0() call before performing loop rotation, ; at the cost of potential code bloat and/or potentially successfully rotating ; the loops, vectorizing them at the cost of compile time. target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" declare void @f0() declare void @f1() declare void @f2() declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) define void @_Z4loopi(i32 %width) { ; HOIST-LABEL: @_Z4loopi( ; HOIST-NEXT: entry: ; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; HOIST: for.cond.preheader: ; HOIST-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; HOIST-NEXT: br label [[FOR_COND:%.*]] ; HOIST: for.cond: ; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] ; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; HOIST: for.cond.cleanup: ; HOIST-NEXT: tail call void @f2() ; HOIST-NEXT: br label [[RETURN]] ; HOIST: for.body: ; HOIST-NEXT: tail call void @f1() ; HOIST-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 ; HOIST-NEXT: br label [[FOR_COND]] ; HOIST: return: ; HOIST-NEXT: ret void ; ; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi( ; ROTATED_LATER_OLDPM-NEXT: entry: ; ROTATED_LATER_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATED_LATER_OLDPM: for.cond.cleanup: ; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: ; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() ; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_OLDPM: return: ; ROTATED_LATER_OLDPM-NEXT: ret void ; ; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi( ; ROTATED_LATER_NEWPM-NEXT: entry: ; ROTATED_LATER_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge: ; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: ; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ] ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: ; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]] ; ROTATED_LATER_NEWPM: return: ; ROTATED_LATER_NEWPM-NEXT: ret void ; ; ROTATE_OLDPM-LABEL: @_Z4loopi( ; ROTATE_OLDPM-NEXT: entry: ; ROTATE_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_OLDPM: for.cond.preheader: ; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 ; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_OLDPM: for.body.preheader: ; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_OLDPM: for.cond.cleanup: ; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f2() ; ROTATE_OLDPM-NEXT: br label [[RETURN]] ; ROTATE_OLDPM: for.body: ; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f1() ; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 ; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_OLDPM: return: ; ROTATE_OLDPM-NEXT: ret void ; ; ROTATE_NEWPM-LABEL: @_Z4loopi( ; ROTATE_NEWPM-NEXT: entry: ; ROTATE_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_NEWPM: for.cond.preheader: ; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 ; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: ; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATE_NEWPM: for.body.for.body_crit_edge: ; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]] ; ROTATE_NEWPM: return: ; ROTATE_NEWPM-NEXT: ret void ; entry: %width.addr = alloca i32, align 4 %i = alloca i32, align 4 store i32 %width, i32* %width.addr, align 4 %i1 = load i32, i32* %width.addr, align 4 %cmp = icmp slt i32 %i1, 1 br i1 %cmp, label %if.then, label %if.end if.then: br label %return if.end: %i2 = bitcast i32* %i to i8* call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2) store i32 0, i32* %i, align 4 br label %for.cond for.cond: %i3 = load i32, i32* %i, align 4 %i4 = load i32, i32* %width.addr, align 4 %sub = sub nsw i32 %i4, 1 %cmp1 = icmp slt i32 %i3, %sub br i1 %cmp1, label %for.body, label %for.cond.cleanup for.cond.cleanup: %i5 = bitcast i32* %i to i8* call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5) br label %for.end for.body: call void @f0() call void @f1() br label %for.inc for.inc: %i6 = load i32, i32* %i, align 4 %inc = add nsw i32 %i6, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: call void @f0() call void @f2() br label %return return: ret void }