1; This test verifies that the loop vectorizer will not vectorizes low trip count 2; loops that require runtime checks (Trip count is computed with profile info). 3; REQUIRES: asserts 4; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s 5 6target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" 7 8@tab = common global [32 x i8] zeroinitializer, align 1 9 10define i32 @foo_low_trip_count1(i32 %bound) { 11; Simple loop with low tripcount. Should not be vectorized. 12 13; CHECK-LABEL: @foo_low_trip_count1( 14; CHECK-NOT: <{{[0-9]+}} x i8> 15 16entry: 17 br label %for.body 18 19for.body: ; preds = %for.body, %entry 20 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 21 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 22 %0 = load i8, i8* %arrayidx, align 1 23 %cmp1 = icmp eq i8 %0, 0 24 %. = select i1 %cmp1, i8 2, i8 1 25 store i8 %., i8* %arrayidx, align 1 26 %inc = add nsw i32 %i.08, 1 27 %exitcond = icmp eq i32 %i.08, %bound 28 br i1 %exitcond, label %for.end, label %for.body, !prof !1 29 30for.end: ; preds = %for.body 31 ret i32 0 32} 33 34define i32 @foo_low_trip_count2(i32 %bound) !prof !0 { 35; The loop has a same invocation count with the function, but has a low 36; trip_count per invocation and not worth to vectorize. 37 38; CHECK-LABEL: @foo_low_trip_count2( 39; CHECK-NOT: <{{[0-9]+}} x i8> 40 41entry: 42 br label %for.body 43 44for.body: ; preds = %for.body, %entry 45 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 46 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 47 %0 = load i8, i8* %arrayidx, align 1 48 %cmp1 = icmp eq i8 %0, 0 49 %. = select i1 %cmp1, i8 2, i8 1 50 store i8 %., i8* %arrayidx, align 1 51 %inc = add nsw i32 %i.08, 1 52 %exitcond = icmp eq i32 %i.08, %bound 53 br i1 %exitcond, label %for.end, label %for.body, !prof !1 54 55for.end: ; preds = %for.body 56 ret i32 0 57} 58 59define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 { 60; The loop has low invocation count compare to the function invocation count, 61; but has a high trip count per invocation. Vectorize it. 62 63; CHECK-LABEL: @foo_low_trip_count3( 64; CHECK: [[VECTOR_BODY:vector\.body]]: 65; CHECK: br i1 [[TMP9:%.*]], label [[MIDDLE_BLOCK:%.*]], label %[[VECTOR_BODY]], !prof [[LP3:\!.*]], 66; CHECK: [[FOR_BODY:for\.body]]: 67; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_END_LOOPEXIT:%.*]], label %[[FOR_BODY]], !prof [[LP6:\!.*]], 68entry: 69 br i1 %cond, label %for.preheader, label %for.end, !prof !2 70 71for.preheader: 72 br label %for.body 73 74for.body: ; preds = %for.body, %entry 75 %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] 76 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 77 %0 = load i8, i8* %arrayidx, align 1 78 %cmp1 = icmp eq i8 %0, 0 79 %. = select i1 %cmp1, i8 2, i8 1 80 store i8 %., i8* %arrayidx, align 1 81 %inc = add nsw i32 %i.08, 1 82 %exitcond = icmp eq i32 %i.08, %bound 83 br i1 %exitcond, label %for.end, label %for.body, !prof !3 84 85for.end: ; preds = %for.body 86 ret i32 0 87} 88 89define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) { 90; Simple loop with low tripcount and inequality test for exit. 91; Should not be vectorized. 92 93; CHECK-LABEL: @foo_low_trip_count_icmp_sgt( 94; CHECK-NOT: <{{[0-9]+}} x i8> 95 96entry: 97 br label %for.body 98 99for.body: ; preds = %for.body, %entry 100 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 101 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 102 %0 = load i8, i8* %arrayidx, align 1 103 %cmp1 = icmp eq i8 %0, 0 104 %. = select i1 %cmp1, i8 2, i8 1 105 store i8 %., i8* %arrayidx, align 1 106 %inc = add nsw i32 %i.08, 1 107 %exitcond = icmp sgt i32 %i.08, %bound 108 br i1 %exitcond, label %for.end, label %for.body, !prof !1 109 110for.end: ; preds = %for.body 111 ret i32 0 112} 113 114define i32 @const_low_trip_count() { 115; Simple loop with constant, small trip count and no profiling info. 116 117; CHECK-LABEL: @const_low_trip_count 118; CHECK-NOT: <{{[0-9]+}} x i8> 119 120entry: 121 br label %for.body 122 123for.body: ; preds = %for.body, %entry 124 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 125 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 126 %0 = load i8, i8* %arrayidx, align 1 127 %cmp1 = icmp eq i8 %0, 0 128 %. = select i1 %cmp1, i8 2, i8 1 129 store i8 %., i8* %arrayidx, align 1 130 %inc = add nsw i32 %i.08, 1 131 %exitcond = icmp slt i32 %i.08, 2 132 br i1 %exitcond, label %for.body, label %for.end 133 134for.end: ; preds = %for.body 135 ret i32 0 136} 137 138define i32 @const_large_trip_count() { 139; Simple loop with constant large trip count and no profiling info. 140 141; CHECK-LABEL: @const_large_trip_count 142; CHECK: <{{[0-9]+}} x i8> 143 144entry: 145 br label %for.body 146 147for.body: ; preds = %for.body, %entry 148 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 149 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 150 %0 = load i8, i8* %arrayidx, align 1 151 %cmp1 = icmp eq i8 %0, 0 152 %. = select i1 %cmp1, i8 2, i8 1 153 store i8 %., i8* %arrayidx, align 1 154 %inc = add nsw i32 %i.08, 1 155 %exitcond = icmp slt i32 %i.08, 1000 156 br i1 %exitcond, label %for.body, label %for.end 157 158for.end: ; preds = %for.body 159 ret i32 0 160} 161 162define i32 @const_small_trip_count_step() { 163; Simple loop with static, small trip count and no profiling info. 164 165; CHECK-LABEL: @const_small_trip_count_step 166; CHECK-NOT: <{{[0-9]+}} x i8> 167 168entry: 169 br label %for.body 170 171for.body: ; preds = %for.body, %entry 172 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 173 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 174 %0 = load i8, i8* %arrayidx, align 1 175 %cmp1 = icmp eq i8 %0, 0 176 %. = select i1 %cmp1, i8 2, i8 1 177 store i8 %., i8* %arrayidx, align 1 178 %inc = add nsw i32 %i.08, 5 179 %exitcond = icmp slt i32 %i.08, 10 180 br i1 %exitcond, label %for.body, label %for.end 181 182for.end: ; preds = %for.body 183 ret i32 0 184} 185 186define i32 @const_trip_over_profile() { 187; constant trip count takes precedence over profile data 188 189; CHECK-LABEL: @const_trip_over_profile 190; CHECK: <{{[0-9]+}} x i8> 191 192entry: 193 br label %for.body 194 195for.body: ; preds = %for.body, %entry 196 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 197 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 198 %0 = load i8, i8* %arrayidx, align 1 199 %cmp1 = icmp eq i8 %0, 0 200 %. = select i1 %cmp1, i8 2, i8 1 201 store i8 %., i8* %arrayidx, align 1 202 %inc = add nsw i32 %i.08, 1 203 %exitcond = icmp slt i32 %i.08, 1000 204 br i1 %exitcond, label %for.body, label %for.end, !prof !1 205 206for.end: ; preds = %for.body 207 ret i32 0 208} 209 210; CHECK: [[LP3]] = !{!"branch_weights", i32 10, i32 2490} 211; CHECK: [[LP6]] = !{!"branch_weights", i32 10, i32 0} 212; original loop has latchExitWeight=10 and backedgeTakenWeight=10,000, 213; therefore estimatedBackedgeTakenCount=1,000 and estimatedTripCount=1,001. 214; Vectorizing by 4 produces estimatedTripCounts of 1,001/4=250 and 1,001%4=1 215; for vectorized and remainder loops, respectively, therefore their 216; estimatedBackedgeTakenCounts are 249 and 0, and so the weights recorded with 217; loop invocation weights of 10 are the above {10, 2490} and {10, 0}. 218 219!0 = !{!"function_entry_count", i64 100} 220!1 = !{!"branch_weights", i32 100, i32 0} 221!2 = !{!"branch_weights", i32 10, i32 90} 222!3 = !{!"branch_weights", i32 10, i32 10000} 223