1; RUN: opt < %s -passes='print<loopnest>' -disable-output 2>&1 | FileCheck %s 2 3; Test a perfect 2-dim loop nest of the form: 4; for(i=0; i<nx; ++i) 5; for(j=0; j<nx; ++j) 6; y[i][j] = x[i][j]; 7 8define void @perf_nest_2D_1(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) { 9; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_1_loop_j, Loops: ( perf_nest_2D_1_loop_j ) 10; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_1_loop_i, Loops: ( perf_nest_2D_1_loop_i perf_nest_2D_1_loop_j ) 11entry: 12 br label %perf_nest_2D_1_loop_i 13 14perf_nest_2D_1_loop_i: 15 %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ] 16 %cmp21 = icmp slt i64 0, %ny 17 br i1 %cmp21, label %perf_nest_2D_1_loop_j, label %inc_i 18 19perf_nest_2D_1_loop_j: 20 %j = phi i64 [ 0, %perf_nest_2D_1_loop_i ], [ %inc, %inc_j ] 21 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j 22 %0 = load i32*, i32** %arrayidx, align 8 23 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j 24 %1 = load i32, i32* %arrayidx6, align 4 25 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j 26 %2 = load i32*, i32** %arrayidx8, align 8 27 %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i 28 store i32 %1, i32* %arrayidx11, align 4 29 br label %inc_j 30 31inc_j: 32 %inc = add nsw i64 %j, 1 33 %cmp2 = icmp slt i64 %inc, %ny 34 br i1 %cmp2, label %perf_nest_2D_1_loop_j, label %inc_i 35 36inc_i: 37 %inc13 = add nsw i64 %i, 1 38 %cmp = icmp slt i64 %inc13, %nx 39 br i1 %cmp, label %perf_nest_2D_1_loop_i, label %perf_nest_2D_1_loop_i_end 40 41perf_nest_2D_1_loop_i_end: 42 ret void 43} 44 45; Test a perfect 2-dim loop nest of the form: 46; for (i=0; i<100; ++i) 47; for (j=0; j<100; ++j) 48; y[i][j] = x[i][j]; 49define void @perf_nest_2D_2(i32** %y, i32** %x) { 50; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_2_loop_j, Loops: ( perf_nest_2D_2_loop_j ) 51; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_2_loop_i, Loops: ( perf_nest_2D_2_loop_i perf_nest_2D_2_loop_j ) 52entry: 53 br label %perf_nest_2D_2_loop_i 54 55perf_nest_2D_2_loop_i: 56 %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ] 57 br label %perf_nest_2D_2_loop_j 58 59perf_nest_2D_2_loop_j: 60 %j = phi i64 [ 0, %perf_nest_2D_2_loop_i ], [ %inc, %inc_j ] 61 %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j 62 %0 = load i32*, i32** %arrayidx, align 8 63 %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j 64 %1 = load i32, i32* %arrayidx6, align 4 65 %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j 66 %2 = load i32*, i32** %arrayidx8, align 8 67 %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i 68 store i32 %1, i32* %arrayidx11, align 4 69 br label %inc_j 70 71inc_j: 72 %inc = add nsw i64 %j, 1 73 %cmp2 = icmp slt i64 %inc, 100 74 br i1 %cmp2, label %perf_nest_2D_2_loop_j, label %loop_j_end 75 76loop_j_end: 77 br label %inc_i 78 79inc_i: 80 %inc13 = add nsw i64 %i, 1 81 %cmp = icmp slt i64 %inc13, 100 82 br i1 %cmp, label %perf_nest_2D_2_loop_i, label %perf_nest_2D_2_loop_i_end 83 84perf_nest_2D_2_loop_i_end: 85 ret void 86} 87 88; Test a perfect 3-dim loop nest of the form: 89; for (i=0; i<nx; ++i) 90; for (j=0; j<ny; ++j) 91; for (k=0; j<nk; ++k) 92; y[j][j][k] = x[i][j][k]; 93; 94 95define void @perf_nest_3D_1(i32*** %y, i32*** %x, i32 signext %nx, i32 signext %ny, i32 signext %nk) { 96; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_3D_1_loop_k, Loops: ( perf_nest_3D_1_loop_k ) 97; CHECK-NEXT: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_3D_1_loop_j, Loops: ( perf_nest_3D_1_loop_j perf_nest_3D_1_loop_k ) 98; CHECK-NEXT: IsPerfect=true, Depth=3, OutermostLoop: perf_nest_3D_1_loop_i, Loops: ( perf_nest_3D_1_loop_i perf_nest_3D_1_loop_j perf_nest_3D_1_loop_k ) 99entry: 100 br label %perf_nest_3D_1_loop_i 101 102perf_nest_3D_1_loop_i: 103 %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ] 104 %cmp21 = icmp slt i32 0, %ny 105 br i1 %cmp21, label %perf_nest_3D_1_loop_j, label %for.inci 106 107perf_nest_3D_1_loop_j: 108 %j = phi i32 [ 0, %perf_nest_3D_1_loop_i ], [ %incj, %for.incj ] 109 %cmp22 = icmp slt i32 0, %nk 110 br i1 %cmp22, label %perf_nest_3D_1_loop_k, label %for.incj 111 112perf_nest_3D_1_loop_k: 113 %k = phi i32 [ 0, %perf_nest_3D_1_loop_j ], [ %inck, %for.inck ] 114 %idxprom = sext i32 %i to i64 115 %arrayidx = getelementptr inbounds i32**, i32*** %x, i64 %idxprom 116 %0 = load i32**, i32*** %arrayidx, align 8 117 %idxprom7 = sext i32 %j to i64 118 %arrayidx8 = getelementptr inbounds i32*, i32** %0, i64 %idxprom7 119 %1 = load i32*, i32** %arrayidx8, align 8 120 %idxprom9 = sext i32 %k to i64 121 %arrayidx10 = getelementptr inbounds i32, i32* %1, i64 %idxprom9 122 %2 = load i32, i32* %arrayidx10, align 4 123 %idxprom11 = sext i32 %j to i64 124 %arrayidx12 = getelementptr inbounds i32**, i32*** %y, i64 %idxprom11 125 %3 = load i32**, i32*** %arrayidx12, align 8 126 %idxprom13 = sext i32 %j to i64 127 %arrayidx14 = getelementptr inbounds i32*, i32** %3, i64 %idxprom13 128 %4 = load i32*, i32** %arrayidx14, align 8 129 %idxprom15 = sext i32 %k to i64 130 %arrayidx16 = getelementptr inbounds i32, i32* %4, i64 %idxprom15 131 store i32 %2, i32* %arrayidx16, align 4 132 br label %for.inck 133 134for.inck: 135 %inck = add nsw i32 %k, 1 136 %cmp5 = icmp slt i32 %inck, %nk 137 br i1 %cmp5, label %perf_nest_3D_1_loop_k, label %for.incj 138 139for.incj: 140 %incj = add nsw i32 %j, 1 141 %cmp2 = icmp slt i32 %incj, %ny 142 br i1 %cmp2, label %perf_nest_3D_1_loop_j, label %for.inci 143 144for.inci: 145 %inci = add nsw i32 %i, 1 146 %cmp = icmp slt i32 %inci, %nx 147 br i1 %cmp, label %perf_nest_3D_1_loop_i, label %perf_nest_3D_1_loop_i_end 148 149perf_nest_3D_1_loop_i_end: 150 ret void 151} 152 153; Test a perfect 3-dim loop nest of the form: 154; for (i=0; i<100; ++i) 155; for (j=0; j<100; ++j) 156; for (k=0; j<100; ++k) 157; y[j][j][k] = x[i][j][k]; 158; 159 160define void @perf_nest_3D_2(i32*** %y, i32*** %x) { 161; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_3D_2_loop_k, Loops: ( perf_nest_3D_2_loop_k ) 162; CHECK-NEXT: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_3D_2_loop_j, Loops: ( perf_nest_3D_2_loop_j perf_nest_3D_2_loop_k ) 163; CHECK-NEXT: IsPerfect=true, Depth=3, OutermostLoop: perf_nest_3D_2_loop_i, Loops: ( perf_nest_3D_2_loop_i perf_nest_3D_2_loop_j perf_nest_3D_2_loop_k ) 164entry: 165 br label %perf_nest_3D_2_loop_i 166 167perf_nest_3D_2_loop_i: 168 %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ] 169 br label %perf_nest_3D_2_loop_j 170 171perf_nest_3D_2_loop_j: 172 %j = phi i32 [ 0, %perf_nest_3D_2_loop_i ], [ %incj, %for.incj ] 173 br label %perf_nest_3D_2_loop_k 174 175perf_nest_3D_2_loop_k: 176 %k = phi i32 [ 0, %perf_nest_3D_2_loop_j ], [ %inck, %for.inck ] 177 %idxprom = sext i32 %i to i64 178 %arrayidx = getelementptr inbounds i32**, i32*** %x, i64 %idxprom 179 %0 = load i32**, i32*** %arrayidx, align 8 180 %idxprom7 = sext i32 %j to i64 181 %arrayidx8 = getelementptr inbounds i32*, i32** %0, i64 %idxprom7 182 %1 = load i32*, i32** %arrayidx8, align 8 183 %idxprom9 = sext i32 %k to i64 184 %arrayidx10 = getelementptr inbounds i32, i32* %1, i64 %idxprom9 185 %2 = load i32, i32* %arrayidx10, align 4 186 %idxprom11 = sext i32 %j to i64 187 %arrayidx12 = getelementptr inbounds i32**, i32*** %y, i64 %idxprom11 188 %3 = load i32**, i32*** %arrayidx12, align 8 189 %idxprom13 = sext i32 %j to i64 190 %arrayidx14 = getelementptr inbounds i32*, i32** %3, i64 %idxprom13 191 %4 = load i32*, i32** %arrayidx14, align 8 192 %idxprom15 = sext i32 %k to i64 193 %arrayidx16 = getelementptr inbounds i32, i32* %4, i64 %idxprom15 194 store i32 %2, i32* %arrayidx16, align 4 195 br label %for.inck 196 197for.inck: 198 %inck = add nsw i32 %k, 1 199 %cmp5 = icmp slt i32 %inck, 100 200 br i1 %cmp5, label %perf_nest_3D_2_loop_k, label %loop_k_end 201 202loop_k_end: 203 br label %for.incj 204 205for.incj: 206 %incj = add nsw i32 %j, 1 207 %cmp2 = icmp slt i32 %incj, 100 208 br i1 %cmp2, label %perf_nest_3D_2_loop_j, label %loop_j_end 209 210loop_j_end: 211 br label %for.inci 212 213for.inci: 214 %inci = add nsw i32 %i, 1 215 %cmp = icmp slt i32 %inci, 100 216 br i1 %cmp, label %perf_nest_3D_2_loop_i, label %perf_nest_3D_2_loop_i_end 217 218perf_nest_3D_2_loop_i_end: 219 ret void 220} 221 222; Test a perfect loop nest with a live out reduction: 223; for (i = 0; i<ni; ++i) 224; if (0<nj) { // guard branch for the j-loop 225; for (j=0; j<nj; j+=1) 226; x+=(i+j); 227; } 228; return x; 229 230define signext i32 @perf_nest_live_out(i32 signext %x, i32 signext %ni, i32 signext %nj) { 231; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_live_out_loop_j, Loops: ( perf_nest_live_out_loop_j ) 232; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_live_out_loop_i, Loops: ( perf_nest_live_out_loop_i perf_nest_live_out_loop_j ) 233entry: 234 %cmp4 = icmp slt i32 0, %ni 235 br i1 %cmp4, label %perf_nest_live_out_loop_i.lr.ph, label %for.end7 236 237perf_nest_live_out_loop_i.lr.ph: 238 br label %perf_nest_live_out_loop_i 239 240perf_nest_live_out_loop_i: 241 %x.addr.06 = phi i32 [ %x, %perf_nest_live_out_loop_i.lr.ph ], [ %x.addr.1.lcssa, %for.inc5 ] 242 %i.05 = phi i32 [ 0, %perf_nest_live_out_loop_i.lr.ph ], [ %inc6, %for.inc5 ] 243 %cmp21 = icmp slt i32 0, %nj 244 br i1 %cmp21, label %perf_nest_live_out_loop_j.lr.ph, label %for.inc5 245 246perf_nest_live_out_loop_j.lr.ph: 247 br label %perf_nest_live_out_loop_j 248 249perf_nest_live_out_loop_j: 250 %x.addr.13 = phi i32 [ %x.addr.06, %perf_nest_live_out_loop_j.lr.ph ], [ %add4, %perf_nest_live_out_loop_j ] 251 %j.02 = phi i32 [ 0, %perf_nest_live_out_loop_j.lr.ph ], [ %inc, %perf_nest_live_out_loop_j ] 252 %add = add nsw i32 %i.05, %j.02 253 %add4 = add nsw i32 %x.addr.13, %add 254 %inc = add nsw i32 %j.02, 1 255 %cmp2 = icmp slt i32 %inc, %nj 256 br i1 %cmp2, label %perf_nest_live_out_loop_j, label %for.cond1.for.inc5_crit_edge 257 258for.cond1.for.inc5_crit_edge: 259 %split = phi i32 [ %add4, %perf_nest_live_out_loop_j ] 260 br label %for.inc5 261 262for.inc5: 263 %x.addr.1.lcssa = phi i32 [ %split, %for.cond1.for.inc5_crit_edge ], [ %x.addr.06, %perf_nest_live_out_loop_i ] 264 %inc6 = add nsw i32 %i.05, 1 265 %cmp = icmp slt i32 %inc6, %ni 266 br i1 %cmp, label %perf_nest_live_out_loop_i, label %for.cond.for.end7_crit_edge 267 268for.cond.for.end7_crit_edge: 269 %split7 = phi i32 [ %x.addr.1.lcssa, %for.inc5 ] 270 br label %for.end7 271 272for.end7: 273 %x.addr.0.lcssa = phi i32 [ %split7, %for.cond.for.end7_crit_edge ], [ %x, %entry ] 274 ret i32 %x.addr.0.lcssa 275} 276