1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 3; 4; Run loop unrolling twice to verify that loop unrolling metadata is properly 5; removed and further unrolling is disabled after the pass is run once. 6 7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 8target triple = "x86_64-unknown-linux-gnu" 9 10; loop4 contains a small loop which should be completely unrolled by 11; the default unrolling heuristics. It serves as a control for the 12; unroll(disable) pragma test loop4_with_disable. 13; 14; CHECK-LABEL: @loop4( 15; CHECK-NOT: br i1 16define void @loop4(i32* nocapture %a) { 17entry: 18 br label %for.body 19 20for.body: ; preds = %for.body, %entry 21 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 22 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 23 %0 = load i32, i32* %arrayidx, align 4 24 %inc = add nsw i32 %0, 1 25 store i32 %inc, i32* %arrayidx, align 4 26 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 27 %exitcond = icmp eq i64 %indvars.iv.next, 4 28 br i1 %exitcond, label %for.end, label %for.body 29 30for.end: ; preds = %for.body 31 ret void 32} 33 34; #pragma clang loop unroll(disable) 35; 36; CHECK-LABEL: @loop4_with_disable( 37; CHECK: store i32 38; CHECK-NOT: store i32 39; CHECK: br i1 40define void @loop4_with_disable(i32* nocapture %a) { 41entry: 42 br label %for.body 43 44for.body: ; preds = %for.body, %entry 45 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 46 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 47 %0 = load i32, i32* %arrayidx, align 4 48 %inc = add nsw i32 %0, 1 49 store i32 %inc, i32* %arrayidx, align 4 50 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 51 %exitcond = icmp eq i64 %indvars.iv.next, 4 52 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 53 54for.end: ; preds = %for.body 55 ret void 56} 57!1 = !{!1, !2} 58!2 = !{!"llvm.loop.unroll.disable"} 59 60; loop64 has a high enough count that it should *not* be unrolled by 61; the default unrolling heuristic. It serves as the control for the 62; unroll(full) pragma test loop64_with_.* tests below. 63; 64; CHECK-LABEL: @loop64( 65; CHECK: store i32 66; CHECK-NOT: store i32 67; CHECK: br i1 68define void @loop64(i32* nocapture %a) { 69entry: 70 br label %for.body 71 72for.body: ; preds = %for.body, %entry 73 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 74 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 75 %0 = load i32, i32* %arrayidx, align 4 76 %inc = add nsw i32 %0, 1 77 store i32 %inc, i32* %arrayidx, align 4 78 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 79 %exitcond = icmp eq i64 %indvars.iv.next, 64 80 br i1 %exitcond, label %for.end, label %for.body 81 82for.end: ; preds = %for.body 83 ret void 84} 85 86; #pragma clang loop unroll(full) 87; Loop should be fully unrolled. 88; 89; CHECK-LABEL: @loop64_with_enable( 90; CHECK-NOT: br i1 91define void @loop64_with_enable(i32* nocapture %a) { 92entry: 93 br label %for.body 94 95for.body: ; preds = %for.body, %entry 96 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 97 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 98 %0 = load i32, i32* %arrayidx, align 4 99 %inc = add nsw i32 %0, 1 100 store i32 %inc, i32* %arrayidx, align 4 101 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 102 %exitcond = icmp eq i64 %indvars.iv.next, 64 103 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 104 105for.end: ; preds = %for.body 106 ret void 107} 108!3 = !{!3, !4} 109!4 = !{!"llvm.loop.unroll.full"} 110 111; #pragma clang loop unroll_count(4) 112; Loop should be unrolled 4 times. 113; 114; CHECK-LABEL: @loop64_with_count4( 115; CHECK: store i32 116; CHECK: store i32 117; CHECK: store i32 118; CHECK: store i32 119; CHECK-NOT: store i32 120; CHECK: br i1 121define void @loop64_with_count4(i32* nocapture %a) { 122entry: 123 br label %for.body 124 125for.body: ; preds = %for.body, %entry 126 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 127 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 128 %0 = load i32, i32* %arrayidx, align 4 129 %inc = add nsw i32 %0, 1 130 store i32 %inc, i32* %arrayidx, align 4 131 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 132 %exitcond = icmp eq i64 %indvars.iv.next, 64 133 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 134 135for.end: ; preds = %for.body 136 ret void 137} 138!5 = !{!5, !6} 139!6 = !{!"llvm.loop.unroll.count", i32 4} 140 141; #pragma clang loop unroll(full) 142; Full unrolling is requested, but loop has a dynamic trip count so 143; no unrolling should occur. 144; 145; CHECK-LABEL: @dynamic_loop_with_enable( 146; CHECK: store i32 147; CHECK-NOT: store i32 148; CHECK: br i1 149define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) { 150entry: 151 %cmp3 = icmp sgt i32 %b, 0 152 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 153 154for.body: ; preds = %entry, %for.body 155 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 156 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 157 %0 = load i32, i32* %arrayidx, align 4 158 %inc = add nsw i32 %0, 1 159 store i32 %inc, i32* %arrayidx, align 4 160 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 161 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 162 %exitcond = icmp eq i32 %lftr.wideiv, %b 163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 164 165for.end: ; preds = %for.body, %entry 166 ret void 167} 168!8 = !{!8, !4} 169 170; #pragma clang loop unroll_count(4) 171; Loop has a dynamic trip count. Unrolling should occur, but no 172; conditional branches can be removed. 173; 174; CHECK-LABEL: @dynamic_loop_with_count4( 175; CHECK-NOT: store 176; CHECK: br i1 177; CHECK: store 178; CHECK: br i1 179; CHECK: store 180; CHECK: br i1 181; CHECK: store 182; CHECK: br i1 183; CHECK: store 184; CHECK: br i1 185; CHECK-NOT: br i1 186define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) { 187entry: 188 %cmp3 = icmp sgt i32 %b, 0 189 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 190 191for.body: ; preds = %entry, %for.body 192 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 193 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 194 %0 = load i32, i32* %arrayidx, align 4 195 %inc = add nsw i32 %0, 1 196 store i32 %inc, i32* %arrayidx, align 4 197 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 198 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 199 %exitcond = icmp eq i32 %lftr.wideiv, %b 200 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 201 202for.end: ; preds = %for.body, %entry 203 ret void 204} 205!9 = !{!9, !6} 206 207; #pragma clang loop unroll_count(1) 208; Loop should not be unrolled 209; 210; CHECK-LABEL: @unroll_1( 211; CHECK: store i32 212; CHECK-NOT: store i32 213; CHECK: br i1 214define void @unroll_1(i32* nocapture %a, i32 %b) { 215entry: 216 br label %for.body 217 218for.body: ; preds = %for.body, %entry 219 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 220 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 221 %0 = load i32, i32* %arrayidx, align 4 222 %inc = add nsw i32 %0, 1 223 store i32 %inc, i32* %arrayidx, align 4 224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 225 %exitcond = icmp eq i64 %indvars.iv.next, 4 226 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 227 228for.end: ; preds = %for.body 229 ret void 230} 231!10 = !{!10, !11} 232!11 = !{!"llvm.loop.unroll.count", i32 1} 233 234; #pragma clang loop unroll(full) 235; Loop has very high loop count (1 million) and full unrolling was requested. 236; Loop should unrolled up to the pragma threshold, but not completely. 237; 238; CHECK-LABEL: @unroll_1M( 239; CHECK: store i32 240; CHECK: store i32 241; CHECK: br i1 242define void @unroll_1M(i32* nocapture %a, i32 %b) { 243entry: 244 br label %for.body 245 246for.body: ; preds = %for.body, %entry 247 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 248 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 249 %0 = load i32, i32* %arrayidx, align 4 250 %inc = add nsw i32 %0, 1 251 store i32 %inc, i32* %arrayidx, align 4 252 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 253 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 254 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 255 256for.end: ; preds = %for.body 257 ret void 258} 259!12 = !{!12, !4} 260