1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 3; 4; Run loop unrolling twice to verify that loop unrolling metadata is properly 5; removed and further unrolling is disabled after the pass is run once. 6 7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 8target triple = "x86_64-unknown-linux-gnu" 9 10; loop4 contains a small loop which should be completely unrolled by 11; the default unrolling heuristics. It serves as a control for the 12; unroll(disable) pragma test loop4_with_disable. 13; 14; CHECK-LABEL: @loop4( 15; CHECK-NOT: br i1 16define void @loop4(i32* nocapture %a) { 17entry: 18 br label %for.body 19 20for.body: ; preds = %for.body, %entry 21 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 22 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 23 %0 = load i32, i32* %arrayidx, align 4 24 %inc = add nsw i32 %0, 1 25 store i32 %inc, i32* %arrayidx, align 4 26 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 27 %exitcond = icmp eq i64 %indvars.iv.next, 4 28 br i1 %exitcond, label %for.end, label %for.body 29 30for.end: ; preds = %for.body 31 ret void 32} 33 34; #pragma clang loop unroll(disable) 35; 36; CHECK-LABEL: @loop4_with_disable( 37; CHECK: store i32 38; CHECK-NOT: store i32 39; CHECK: br i1 40define void @loop4_with_disable(i32* nocapture %a) { 41entry: 42 br label %for.body 43 44for.body: ; preds = %for.body, %entry 45 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 46 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 47 %0 = load i32, i32* %arrayidx, align 4 48 %inc = add nsw i32 %0, 1 49 store i32 %inc, i32* %arrayidx, align 4 50 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 51 %exitcond = icmp eq i64 %indvars.iv.next, 4 52 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 53 54for.end: ; preds = %for.body 55 ret void 56} 57!1 = !{!1, !2} 58!2 = !{!"llvm.loop.unroll.disable"} 59 60; loop64 has a high enough count that it should *not* be unrolled by 61; the default unrolling heuristic. It serves as the control for the 62; unroll(full) pragma test loop64_with_.* tests below. 63; 64; CHECK-LABEL: @loop64( 65; CHECK: store i32 66; CHECK-NOT: store i32 67; CHECK: br i1 68define void @loop64(i32* nocapture %a) { 69entry: 70 br label %for.body 71 72for.body: ; preds = %for.body, %entry 73 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 74 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 75 %0 = load i32, i32* %arrayidx, align 4 76 %inc = add nsw i32 %0, 1 77 store i32 %inc, i32* %arrayidx, align 4 78 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 79 %exitcond = icmp eq i64 %indvars.iv.next, 64 80 br i1 %exitcond, label %for.end, label %for.body 81 82for.end: ; preds = %for.body 83 ret void 84} 85 86; #pragma clang loop unroll(full) 87; Loop should be fully unrolled. 88; 89; CHECK-LABEL: @loop64_with_full( 90; CHECK-NOT: br i1 91define void @loop64_with_full(i32* nocapture %a) { 92entry: 93 br label %for.body 94 95for.body: ; preds = %for.body, %entry 96 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 97 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 98 %0 = load i32, i32* %arrayidx, align 4 99 %inc = add nsw i32 %0, 1 100 store i32 %inc, i32* %arrayidx, align 4 101 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 102 %exitcond = icmp eq i64 %indvars.iv.next, 64 103 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 104 105for.end: ; preds = %for.body 106 ret void 107} 108!3 = !{!3, !4} 109!4 = !{!"llvm.loop.unroll.full"} 110 111; #pragma clang loop unroll(full) 112; Loop should be fully unrolled, even for optsize. 113; 114; CHECK-LABEL: @loop64_with_full_optsize( 115; CHECK-NOT: br i1 116define void @loop64_with_full_optsize(i32* nocapture %a) optsize { 117entry: 118 br label %for.body 119 120for.body: ; preds = %for.body, %entry 121 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 122 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 123 %0 = load i32, i32* %arrayidx, align 4 124 %inc = add nsw i32 %0, 1 125 store i32 %inc, i32* %arrayidx, align 4 126 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 127 %exitcond = icmp eq i64 %indvars.iv.next, 64 128 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 129 130for.end: ; preds = %for.body 131 ret void 132} 133 134; #pragma clang loop unroll_count(4) 135; Loop should be unrolled 4 times. 136; 137; CHECK-LABEL: @loop64_with_count4( 138; CHECK: store i32 139; CHECK: store i32 140; CHECK: store i32 141; CHECK: store i32 142; CHECK-NOT: store i32 143; CHECK: br i1 144define void @loop64_with_count4(i32* nocapture %a) { 145entry: 146 br label %for.body 147 148for.body: ; preds = %for.body, %entry 149 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 150 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 151 %0 = load i32, i32* %arrayidx, align 4 152 %inc = add nsw i32 %0, 1 153 store i32 %inc, i32* %arrayidx, align 4 154 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 155 %exitcond = icmp eq i64 %indvars.iv.next, 64 156 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 157 158for.end: ; preds = %for.body 159 ret void 160} 161!5 = !{!5, !6} 162!6 = !{!"llvm.loop.unroll.count", i32 4} 163 164; #pragma clang loop unroll(full) 165; Full unrolling is requested, but loop has a runtime trip count so 166; no unrolling should occur. 167; 168; CHECK-LABEL: @runtime_loop_with_full( 169; CHECK: store i32 170; CHECK-NOT: store i32 171define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { 172entry: 173 %cmp3 = icmp sgt i32 %b, 0 174 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 175 176for.body: ; preds = %entry, %for.body 177 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 178 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 179 %0 = load i32, i32* %arrayidx, align 4 180 %inc = add nsw i32 %0, 1 181 store i32 %inc, i32* %arrayidx, align 4 182 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 183 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 184 %exitcond = icmp eq i32 %lftr.wideiv, %b 185 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 186 187for.end: ; preds = %for.body, %entry 188 ret void 189} 190!8 = !{!8, !4} 191 192; #pragma clang loop unroll_count(4) 193; Loop has a runtime trip count. Runtime unrolling should occur and loop 194; should be duplicated (original and 4x unrolled). 195; 196; CHECK-LABEL: @runtime_loop_with_count4( 197; CHECK: for.body 198; CHECK: store 199; CHECK: store 200; CHECK: store 201; CHECK: store 202; CHECK-NOT: store 203; CHECK: br i1 204; CHECK: for.body.epil: 205; CHECK: store 206; CHECK-NOT: store 207; CHECK: br i1 208define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { 209entry: 210 %cmp3 = icmp sgt i32 %b, 0 211 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 212 213for.body: ; preds = %entry, %for.body 214 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 215 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 216 %0 = load i32, i32* %arrayidx, align 4 217 %inc = add nsw i32 %0, 1 218 store i32 %inc, i32* %arrayidx, align 4 219 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 220 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 221 %exitcond = icmp eq i32 %lftr.wideiv, %b 222 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 223 224for.end: ; preds = %for.body, %entry 225 ret void 226} 227!9 = !{!9, !6} 228 229; #pragma clang loop unroll_count(1) 230; Loop should not be unrolled 231; 232; CHECK-LABEL: @unroll_1( 233; CHECK: store i32 234; CHECK-NOT: store i32 235; CHECK: br i1 236define void @unroll_1(i32* nocapture %a, i32 %b) { 237entry: 238 br label %for.body 239 240for.body: ; preds = %for.body, %entry 241 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 242 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 243 %0 = load i32, i32* %arrayidx, align 4 244 %inc = add nsw i32 %0, 1 245 store i32 %inc, i32* %arrayidx, align 4 246 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 247 %exitcond = icmp eq i64 %indvars.iv.next, 4 248 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 249 250for.end: ; preds = %for.body 251 ret void 252} 253!10 = !{!10, !11} 254!11 = !{!"llvm.loop.unroll.count", i32 1} 255 256; #pragma clang loop unroll(full) 257; Loop has very high loop count (1 million) and full unrolling was requested. 258; Loop should unrolled up to the pragma threshold, but not completely. 259; 260; CHECK-LABEL: @unroll_1M( 261; CHECK: store i32 262; CHECK: store i32 263; CHECK: br i1 264define void @unroll_1M(i32* nocapture %a, i32 %b) { 265entry: 266 br label %for.body 267 268for.body: ; preds = %for.body, %entry 269 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 270 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 271 %0 = load i32, i32* %arrayidx, align 4 272 %inc = add nsw i32 %0, 1 273 store i32 %inc, i32* %arrayidx, align 4 274 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 275 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 276 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 277 278for.end: ; preds = %for.body 279 ret void 280} 281!12 = !{!12, !4} 282 283; #pragma clang loop unroll(enable) 284; Loop should be fully unrolled. 285; 286; CHECK-LABEL: @loop64_with_enable( 287; CHECK-NOT: br i1 288define void @loop64_with_enable(i32* nocapture %a) { 289entry: 290 br label %for.body 291 292for.body: ; preds = %for.body, %entry 293 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 294 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 295 %0 = load i32, i32* %arrayidx, align 4 296 %inc = add nsw i32 %0, 1 297 store i32 %inc, i32* %arrayidx, align 4 298 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 299 %exitcond = icmp eq i64 %indvars.iv.next, 64 300 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 301 302for.end: ; preds = %for.body 303 ret void 304} 305!13 = !{!13, !14} 306!14 = !{!"llvm.loop.unroll.enable"} 307 308; #pragma clang loop unroll(enable) 309; Loop has a runtime trip count and should be runtime unrolled and duplicated 310; (original and 8x). 311; 312; CHECK-LABEL: @runtime_loop_with_enable( 313; CHECK: for.body: 314; CHECK: store i32 315; CHECK: store i32 316; CHECK: store i32 317; CHECK: store i32 318; CHECK: store i32 319; CHECK: store i32 320; CHECK: store i32 321; CHECK: store i32 322; CHECK-NOT: store i32 323; CHECK: br i1 324; CHECK: for.body.epil: 325; CHECK: store 326; CHECK-NOT: store 327; CHECK: br i1 328define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { 329entry: 330 %cmp3 = icmp sgt i32 %b, 0 331 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 332 333for.body: ; preds = %entry, %for.body 334 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 335 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 336 %0 = load i32, i32* %arrayidx, align 4 337 %inc = add nsw i32 %0, 1 338 store i32 %inc, i32* %arrayidx, align 4 339 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 340 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 341 %exitcond = icmp eq i32 %lftr.wideiv, %b 342 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 343 344for.end: ; preds = %for.body, %entry 345 ret void 346} 347!15 = !{!15, !14} 348 349; #pragma clang loop unroll_count(3) 350; Loop has a runtime trip count. Runtime unrolling should occur and loop 351; should be duplicated (original and 3x unrolled). 352; 353; CHECK-LABEL: @runtime_loop_with_count3( 354; CHECK: for.body 355; CHECK: store 356; CHECK: store 357; CHECK: store 358; CHECK-NOT: store 359; CHECK: br i1 360; CHECK: for.body.epil: 361; CHECK: store 362; CHECK-NOT: store 363; CHECK: br i1 364define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) { 365entry: 366 %cmp3 = icmp sgt i32 %b, 0 367 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 368 369for.body: ; preds = %entry, %for.body 370 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 371 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 372 %0 = load i32, i32* %arrayidx, align 4 373 %inc = add nsw i32 %0, 1 374 store i32 %inc, i32* %arrayidx, align 4 375 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 376 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 377 %exitcond = icmp eq i32 %lftr.wideiv, %b 378 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16 379 380for.end: ; preds = %for.body, %entry 381 ret void 382} 383!16 = !{!16, !17} 384!17 = !{!"llvm.loop.unroll.count", i32 3} 385