1; REQUIRES: asserts 2; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -S -debug 2>&1 | FileCheck %s 3 4@A = common global [500 x [500 x i32]] zeroinitializer 5@X = common global i32 0 6@B = common global [500 x [500 x i32]] zeroinitializer 7@Y = common global i32 0 8 9;; for( int i=1;i<N;i++) 10;; for( int j=1;j<N;j++) 11;; X+=A[j][i]; 12 13;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi. 14; CHECK: Loops interchanged. 15 16define void @reduction_01(i32 %N) { 17entry: 18 %cmp16 = icmp sgt i32 %N, 1 19 br i1 %cmp16, label %for.body3.lr.ph, label %for.end8 20 21for.body3.lr.ph: ; preds = %entry, %for.cond1.for.inc6_crit_edge 22 %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ] 23 %X.promoted = load i32, i32* @X 24 br label %for.body3 25 26for.body3: ; preds = %for.body3, %for.body3.lr.ph 27 %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] 28 %add15 = phi i32 [ %X.promoted, %for.body3.lr.ph ], [ %add, %for.body3 ] 29 %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18 30 %0 = load i32, i32* %arrayidx5 31 %add = add nsw i32 %add15, %0 32 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 33 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 34 %exitcond = icmp eq i32 %lftr.wideiv, %N 35 br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body3 36 37for.cond1.for.inc6_crit_edge: ; preds = %for.body3 38 store i32 %add, i32* @X 39 %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1 40 %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32 41 %exitcond21 = icmp eq i32 %lftr.wideiv20, %N 42 br i1 %exitcond21, label %for.end8, label %for.body3.lr.ph 43 44for.end8: ; preds = %for.cond1.for.inc6_crit_edge, %entry 45 ret void 46} 47 48;; Test for more than 1 reductions inside a loop. 49;; for( int i=1;i<N;i++) 50;; for( int j=1;j<N;j++) 51;; for( int k=1;k<N;k++) { 52;; X+=A[k][j]; 53;; Y+=B[k][i]; 54;; } 55 56;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi. 57; CHECK: Loops interchanged. 58 59define void @reduction_02(i32 %N) { 60entry: 61 %cmp34 = icmp sgt i32 %N, 1 62 br i1 %cmp34, label %for.cond4.preheader.preheader, label %for.end19 63 64for.cond4.preheader.preheader: ; preds = %entry, %for.inc17 65 %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.inc17 ], [ 1, %entry ] 66 br label %for.body6.lr.ph 67 68for.body6.lr.ph: ; preds = %for.cond4.for.inc14_crit_edge, %for.cond4.preheader.preheader 69 %indvars.iv36 = phi i64 [ %indvars.iv.next37, %for.cond4.for.inc14_crit_edge ], [ 1, %for.cond4.preheader.preheader ] 70 %X.promoted = load i32, i32* @X 71 %Y.promoted = load i32, i32* @Y 72 br label %for.body6 73 74for.body6: ; preds = %for.body6, %for.body6.lr.ph 75 %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ] 76 %add1331 = phi i32 [ %Y.promoted, %for.body6.lr.ph ], [ %add13, %for.body6 ] 77 %add30 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ] 78 %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv36 79 %0 = load i32, i32* %arrayidx8 80 %add = add nsw i32 %add30, %0 81 %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv40 82 %1 = load i32, i32* %arrayidx12 83 %add13 = add nsw i32 %add1331, %1 84 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 85 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 86 %exitcond = icmp eq i32 %lftr.wideiv, %N 87 br i1 %exitcond, label %for.cond4.for.inc14_crit_edge, label %for.body6 88 89for.cond4.for.inc14_crit_edge: ; preds = %for.body6 90 store i32 %add, i32* @X 91 store i32 %add13, i32* @Y 92 %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, 1 93 %lftr.wideiv38 = trunc i64 %indvars.iv.next37 to i32 94 %exitcond39 = icmp eq i32 %lftr.wideiv38, %N 95 br i1 %exitcond39, label %for.inc17, label %for.body6.lr.ph 96 97for.inc17: ; preds = %for.cond4.for.inc14_crit_edge 98 %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1 99 %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32 100 %exitcond43 = icmp eq i32 %lftr.wideiv42, %N 101 br i1 %exitcond43, label %for.end19, label %for.cond4.preheader.preheader 102 103for.end19: ; preds = %for.inc17, %entry 104 ret void 105} 106 107;; Not tightly nested. Do not interchange. 108;; for( int i=1;i<N;i++) 109;; for( int j=1;j<N;j++) { 110;; for( int k=1;k<N;k++) { 111;; X+=A[k][j]; 112;; } 113;; Y+=B[j][i]; 114;; } 115 116;; Not tightly nested. Do not interchange. 117;; Not interchanged hence the phi's in the inner loop will not be split. 118; CHECK: Outer loops with reductions are not supported currently. 119 120define void @reduction_03(i32 %N) { 121entry: 122 %cmp35 = icmp sgt i32 %N, 1 123 br i1 %cmp35, label %for.cond4.preheader.lr.ph, label %for.end19 124 125for.cond4.preheader.lr.ph: ; preds = %entry, %for.cond1.for.inc17_crit_edge 126 %indvars.iv41 = phi i64 [ %indvars.iv.next42, %for.cond1.for.inc17_crit_edge ], [ 1, %entry ] 127 %Y.promoted = load i32, i32* @Y 128 br label %for.body6.lr.ph 129 130for.body6.lr.ph: ; preds = %for.cond4.preheader.lr.ph, %for.cond4.for.end_crit_edge 131 %indvars.iv37 = phi i64 [ 1, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next38, %for.cond4.for.end_crit_edge ] 132 %add1334 = phi i32 [ %Y.promoted, %for.cond4.preheader.lr.ph ], [ %add13, %for.cond4.for.end_crit_edge ] 133 %X.promoted = load i32, i32* @X 134 br label %for.body6 135 136for.body6: ; preds = %for.body6, %for.body6.lr.ph 137 %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ] 138 %add31 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ] 139 %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv37 140 %0 = load i32, i32* %arrayidx8 141 %add = add nsw i32 %add31, %0 142 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 143 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 144 %exitcond = icmp eq i32 %lftr.wideiv, %N 145 br i1 %exitcond, label %for.cond4.for.end_crit_edge, label %for.body6 146 147for.cond4.for.end_crit_edge: ; preds = %for.body6 148 store i32 %add, i32* @X 149 %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv37, i64 %indvars.iv41 150 %1 = load i32, i32* %arrayidx12 151 %add13 = add nsw i32 %add1334, %1 152 %indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1 153 %lftr.wideiv39 = trunc i64 %indvars.iv.next38 to i32 154 %exitcond40 = icmp eq i32 %lftr.wideiv39, %N 155 br i1 %exitcond40, label %for.cond1.for.inc17_crit_edge, label %for.body6.lr.ph 156 157for.cond1.for.inc17_crit_edge: ; preds = %for.cond4.for.end_crit_edge 158 store i32 %add13, i32* @Y 159 %indvars.iv.next42 = add nuw nsw i64 %indvars.iv41, 1 160 %lftr.wideiv43 = trunc i64 %indvars.iv.next42 to i32 161 %exitcond44 = icmp eq i32 %lftr.wideiv43, %N 162 br i1 %exitcond44, label %for.end19, label %for.cond4.preheader.lr.ph 163 164for.end19: ; preds = %for.cond1.for.inc17_crit_edge, %entry 165 ret void 166} 167 168;; Multiple use of reduction not safe. Do not interchange. 169;; for( int i=1;i<N;i++) 170;; for( int j=1;j<N;j++) 171;; for( int k=1;k<N;k++) { 172;; X+=A[k][j]; 173;; Y+=X; 174;; } 175 176;; Not interchanged hence the phi's in the inner loop will not be split. 177; CHECK: Only inner loops with induction or reduction PHI nodes are supported currently. 178 179define void @reduction_04(i32 %N) { 180entry: 181 %cmp28 = icmp sgt i32 %N, 1 182 br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.end15 183 184for.cond4.preheader.preheader: ; preds = %entry, %for.inc13 185 %i.029 = phi i32 [ %inc14, %for.inc13 ], [ 1, %entry ] 186 br label %for.body6.lr.ph 187 188for.body6.lr.ph: ; preds = %for.cond4.for.inc10_crit_edge, %for.cond4.preheader.preheader 189 %indvars.iv30 = phi i64 [ %indvars.iv.next31, %for.cond4.for.inc10_crit_edge ], [ 1, %for.cond4.preheader.preheader ] 190 %X.promoted = load i32, i32* @X 191 %Y.promoted = load i32, i32* @Y 192 br label %for.body6 193 194for.body6: ; preds = %for.body6, %for.body6.lr.ph 195 %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ] 196 %add925 = phi i32 [ %Y.promoted, %for.body6.lr.ph ], [ %add9, %for.body6 ] 197 %add24 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ] 198 %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30 199 %0 = load i32, i32* %arrayidx8 200 %add = add nsw i32 %add24, %0 201 %add9 = add nsw i32 %add925, %add 202 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 203 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 204 %exitcond = icmp eq i32 %lftr.wideiv, %N 205 br i1 %exitcond, label %for.cond4.for.inc10_crit_edge, label %for.body6 206 207for.cond4.for.inc10_crit_edge: ; preds = %for.body6 208 store i32 %add, i32* @X 209 store i32 %add9, i32* @Y 210 %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1 211 %lftr.wideiv32 = trunc i64 %indvars.iv.next31 to i32 212 %exitcond33 = icmp eq i32 %lftr.wideiv32, %N 213 br i1 %exitcond33, label %for.inc13, label %for.body6.lr.ph 214 215for.inc13: ; preds = %for.cond4.for.inc10_crit_edge 216 %inc14 = add nuw nsw i32 %i.029, 1 217 %exitcond34 = icmp eq i32 %inc14, %N 218 br i1 %exitcond34, label %for.end15, label %for.cond4.preheader.preheader 219 220for.end15: ; preds = %for.inc13, %entry 221 ret void 222} 223 224;; for( int i=1;i<N;i++) 225;; for( int j=1;j<N;j++) 226;; X+=A[j][i]; 227;; Y = X 228; CHECK: Loops interchanged. 229define void @reduction_05(i32 %N) { 230entry: 231 %cmp16 = icmp sgt i32 %N, 1 232 br i1 %cmp16, label %for.body7.lr.ph, label %for.end8 233 234for.body7.lr.ph: ; preds = %entry, %for.cond1.for.inc6_crit_edge 235 %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ] 236 %X.promoted = load i32, i32* @X 237 br label %for.body7 238 239for.body7: ; preds = %for.body7, %for.body7.lr.ph 240 %indvars.iv = phi i64 [ 1, %for.body7.lr.ph ], [ %indvars.iv.next, %for.body7 ] 241 %add15 = phi i32 [ %X.promoted, %for.body7.lr.ph ], [ %add, %for.body7 ] 242 %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18 243 %0 = load i32, i32* %arrayidx5 244 %add = add nsw i32 %add15, %0 245 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 246 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 247 %exitcond = icmp eq i32 %lftr.wideiv, %N 248 br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body7 249 250for.cond1.for.inc6_crit_edge: ; preds = %for.body7 251 store i32 %add, i32* @X 252 %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1 253 %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32 254 %exitcond21 = icmp eq i32 %lftr.wideiv20, %N 255 br i1 %exitcond21, label %for.end8, label %for.body7.lr.ph 256 257for.end8: ; preds = %for.cond1.for.inc6_crit_edge, %entry 258 %add.res = phi i32 [ %add, %for.cond1.for.inc6_crit_edge], [ 0, %entry ] 259 store i32 %add.res, i32* @Y 260 261 ret void 262} 263