1; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8 2; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5 6; Given a loop with an induction variable which is being 7; truncated/extended using casts that had been proven to 8; be redundant under a runtime test, we want to make sure 9; that these casts, do not get vectorized/scalarized/widened. 10; This is the case for inductions whose SCEV expression is 11; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc" 12; can be a result of the IR sequences we check below. 13; 14; See also pr30654. 15; 16 17; Case1: Check the following induction pattern: 18; 19; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] 20; %sext = shl i32 %p.09, 24 21; %conv = ashr exact i32 %sext, 24 22; %add = add nsw i32 %conv, %step 23; 24; This is the case in the following code: 25; 26; void doit1(int n, int step) { 27; int i; 28; char p = 0; 29; for (i = 0; i < n; i++) { 30; a[i] = p; 31; p = p + step; 32; } 33; } 34; 35; The "ExtTrunc" IR sequence here is: 36; "%sext = shl i32 %p.09, 24" 37; "%conv = ashr exact i32 %sext, 24" 38; We check that it does not appear in the vector loop body, whether 39; we vectorize or scalarize the induction. 40; In the case of widened induction, this means that the induction phi 41; is directly used, without shl/ashr on the way. 42 43; VF8-LABEL: @doit1 44; VF8: vector.body: 45; VF8: %vec.ind = phi <8 x i32> 46; VF8: store <8 x i32> %vec.ind 47; VF8: middle.block: 48 49; VF1-LABEL: @doit1 50; VF1: vector.body: 51; VF1-NOT: %{{.*}} = shl i32 52; VF1: middle.block: 53 54@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 55 56define void @doit1(i32 %n, i32 %step) { 57entry: 58 %cmp7 = icmp sgt i32 %n, 0 59 br i1 %cmp7, label %for.body.lr.ph, label %for.end 60 61for.body.lr.ph: 62 %wide.trip.count = zext i32 %n to i64 63 br label %for.body 64 65for.body: 66 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 67 %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] 68 %sext = shl i32 %p.09, 24 69 %conv = ashr exact i32 %sext, 24 70 %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv 71 store i32 %conv, i32* %arrayidx, align 4 72 %add = add nsw i32 %conv, %step 73 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 74 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 75 br i1 %exitcond, label %for.end.loopexit, label %for.body 76 77for.end.loopexit: 78 br label %for.end 79 80for.end: 81 ret void 82} 83 84 85; Case2: Another variant of the above pattern is where the induction variable 86; is used only for address compuation (i.e. it is a GEP index) and therefore 87; the induction is not vectorized but rather only the step is widened. 88; 89; This is the case in the following code, where the induction variable 'w_ix' 90; is only used to access the array 'in': 91; 92; void doit2(int *in, int *out, size_t size, size_t step) 93; { 94; int w_ix = 0; 95; for (size_t offset = 0; offset < size; ++offset) 96; { 97; int w = in[w_ix]; 98; out[offset] = w; 99; w_ix += step; 100; } 101; } 102; 103; The "ExtTrunc" IR sequence here is similar to the previous case: 104; "%sext = shl i64 %w_ix.012, 32 105; %idxprom = ashr exact i64 %sext, 32" 106; We check that it does not appear in the vector loop body, whether 107; we widen or scalarize the induction. 108; In the case of widened induction, this means that the induction phi 109; is directly used, without shl/ashr on the way. 110 111; VF8-LABEL: @doit2 112; VF8: vector.body: 113; VF8: %vec.ind = phi <8 x i64> 114; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind 115; VF8: middle.block: 116 117; VF1-LABEL: @doit2 118; VF1: vector.body: 119; VF1-NOT: %{{.*}} = shl i64 120; VF1: middle.block: 121; 122 123define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) { 124entry: 125 %cmp9 = icmp eq i64 %size, 0 126 br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph 127 128for.body.lr.ph: 129 br label %for.body 130 131for.cond.cleanup.loopexit: 132 br label %for.cond.cleanup 133 134for.cond.cleanup: 135 ret void 136 137for.body: 138 %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ] 139 %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 140 %sext = shl i64 %w_ix.011, 32 141 %idxprom = ashr exact i64 %sext, 32 142 %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom 143 %0 = load i32, i32* %arrayidx, align 4 144 %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010 145 store i32 %0, i32* %arrayidx1, align 4 146 %add = add i64 %idxprom, %step 147 %inc = add nuw i64 %offset.010, 1 148 %exitcond = icmp eq i64 %inc, %size 149 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 150} 151 152; Case3: Lastly, check also the following induction pattern: 153; 154; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ] 155; %conv = and i32 %p.09, 255 156; %add = add nsw i32 %conv, %step 157; 158; This is the case in the following code: 159; 160; int a[N]; 161; void doit3(int n, int step) { 162; int i; 163; unsigned char p = 0; 164; for (i = 0; i < n; i++) { 165; a[i] = p; 166; p = p + step; 167; } 168; } 169; 170; The "ExtTrunc" IR sequence here is: 171; "%conv = and i32 %p.09, 255". 172; We check that it does not appear in the vector loop body, whether 173; we vectorize or scalarize the induction. 174 175; VF8-LABEL: @doit3 176; VF8: vector.body: 177; VF8: %vec.ind = phi <8 x i32> 178; VF8: store <8 x i32> %vec.ind 179; VF8: middle.block: 180 181; VF1-LABEL: @doit3 182; VF1: vector.body: 183; VF1-NOT: %{{.*}} = and i32 184; VF1: middle.block: 185 186define void @doit3(i32 %n, i32 %step) { 187entry: 188 %cmp7 = icmp sgt i32 %n, 0 189 br i1 %cmp7, label %for.body.lr.ph, label %for.end 190 191for.body.lr.ph: 192 %wide.trip.count = zext i32 %n to i64 193 br label %for.body 194 195for.body: 196 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 197 %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] 198 %conv = and i32 %p.09, 255 199 %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv 200 store i32 %conv, i32* %arrayidx, align 4 201 %add = add nsw i32 %conv, %step 202 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 203 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 204 br i1 %exitcond, label %for.end.loopexit, label %for.body 205 206for.end.loopexit: 207 br label %for.end 208 209for.end: 210 ret void 211} 212