1; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ 2; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s 3; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s 4 5 6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7target triple = "x86_64-unknown-linux-gnu" 8 9; REMARKS: --- !Passed 10; REMARKS-NEXT: Pass: loop-interchange 11; REMARKS-NEXT: Name: Interchanged 12; REMARKS-NEXT: Function: test1 13 14define i64 @test1([100 x [100 x i64]]* %Arr) { 15; CHECK-LABEL: @test1( 16; CHECK-NEXT: entry: 17; CHECK-NEXT: br label [[FOR2_PREHEADER:%.*]] 18; CHECK: for1.header.preheader: 19; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] 20; CHECK: for1.header: 21; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] 22; CHECK-NEXT: [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ] 23; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]] 24; CHECK: for2.preheader: 25; CHECK-NEXT: br label [[FOR2:%.*]] 26; CHECK: for2: 27; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ] 28; CHECK-NEXT: [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ] 29; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] 30; CHECK: for2.split1: 31; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]] 32; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 33; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]] 34; CHECK-NEXT: [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 35; CHECK-NEXT: [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100 36; CHECK-NEXT: br label [[FOR1_INC]] 37; CHECK: for2.split: 38; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ] 39; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1 40; CHECK-NEXT: [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100 41; CHECK-NEXT: br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]] 42; CHECK: for1.inc: 43; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1 44; CHECK-NEXT: [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100 45; CHECK-NEXT: br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]] 46; CHECK: for1.loopexit: 47; CHECK-NEXT: [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ] 48; CHECK-NEXT: ret i64 [[SUM_INC_LCSSA2]] 49; 50entry: 51 br label %for1.header 52 53for1.header: ; preds = %for1.inc, %entry 54 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 55 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 56 br label %for2 57 58for2: ; preds = %for2, %for1.header 59 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 60 %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ] 61 %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 62 %lv = load i64, i64* %arrayidx, align 4 63 %sum.inc = add i64 %sum.inner, %lv 64 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 65 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 66 br i1 %exit1, label %for1.inc, label %for2 67 68for1.inc: ; preds = %for2 69 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 70 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 71 %exit2 = icmp eq i64 %indvars.iv.next24, 100 72 br i1 %exit2, label %for1.loopexit, label %for1.header 73 74for1.loopexit: ; preds = %for1.inc 75 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 76 ret i64 %sum.inc.lcssa2 77} 78 79; In this test case, the inner reduction PHI %inner does not involve the outer 80; reduction PHI %sum.outer, do not interchange. 81; REMARKS: --- !Missed 82; REMARKS-NEXT: Pass: loop-interchange 83; REMARKS-NEXT: Name: UnsupportedPHIOuter 84; REMARKS-NEXT: Function: test2 85 86define i64 @test2([100 x [100 x i64]]* %Arr) { 87entry: 88 br label %for1.header 89 90for1.header: ; preds = %for1.inc, %entry 91 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 92 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 93 br label %for2 94 95for2: ; preds = %for2, %for1.header 96 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 97 %inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ] 98 %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 99 %lv = load i64, i64* %arrayidx, align 4 100 %sum.inc = add i64 %inner, %lv 101 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 102 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 103 br i1 %exit1, label %for1.inc, label %for2 104 105for1.inc: ; preds = %for2 106 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 107 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 108 %exit2 = icmp eq i64 %indvars.iv.next24, 100 109 br i1 %exit2, label %for1.loopexit, label %for1.header 110 111for1.loopexit: ; preds = %for1.inc 112 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 113 ret i64 %sum.inc.lcssa2 114} 115 116; Check that we do not interchange if there is an additional instruction 117; between the outer and inner reduction PHIs. 118; REMARKS: --- !Missed 119; REMARKS-NEXT: Pass: loop-interchange 120; REMARKS-NEXT: Name: UnsupportedPHIOuter 121; REMARKS-NEXT: Function: test3 122 123define i64 @test3([100 x [100 x i64]]* %Arr) { 124entry: 125 br label %for1.header 126 127for1.header: ; preds = %for1.inc, %entry 128 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 129 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 130 %so = add i64 %sum.outer, 10 131 br label %for2 132 133for2: ; preds = %for2, %for1.header 134 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 135 %sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ] 136 %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 137 %lv = load i64, i64* %arrayidx, align 4 138 %sum.inc = add i64 %sum.inner, %lv 139 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 140 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 141 br i1 %exit1, label %for1.inc, label %for2 142 143for1.inc: ; preds = %for2 144 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 145 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 146 %exit2 = icmp eq i64 %indvars.iv.next24, 100 147 br i1 %exit2, label %for1.loopexit, label %for1.header 148 149for1.loopexit: ; preds = %for1.inc 150 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 151 ret i64 %sum.inc.lcssa2 152} 153