1; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 < %s | FileCheck %s --check-prefix=CHECKUF1 2; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 < %s | FileCheck %s --check-prefix=CHECKUF2 3 4; CHECKUF1: for.body.preheader: 5; CHECKUF1-DAG: %wide.trip.count = zext i32 %N to i64 6; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 7; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 8; CHECKUF1-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX4]], %wide.trip.count 9 10; CHECKUF1: vector.ph: 11; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 12; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 13; CHECKUF1-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX4]] 14; CHECKUF1: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf 15 16; CHECKUF1: vector.body: 17; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 18; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index 19; CHECKUF1: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>* 20; CHECKUF1: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8, !alias.scope !0 21; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> undef, double 1.000000e+00, i32 0), <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer) 22; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index 23; CHECKUF1: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>* 24; CHECKUF1: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0 25; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 26; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 27; CHECKUF1: %index.next = add i64 %index, %[[VSCALEX4]] 28; CHECKUF1: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec 29; CHECKUF1: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !5 30 31 32; For an interleave factor of 2, vscale is scaled by 8 instead of 4 (and thus shifted left by 3 instead of 2). 33; There is also the increment for the next iteration, e.g. instead of indexing IDXB, it indexes at IDXB + vscale * 4. 34 35; CHECKUF2: for.body.preheader: 36; CHECKUF2-DAG: %wide.trip.count = zext i32 %N to i64 37; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 38; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 39; CHECKUF2-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX8]], %wide.trip.count 40 41; CHECKUF2: vector.ph: 42; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 43; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 44; CHECKUF2-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX8]] 45; CHECKUF2: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf 46 47; CHECKUF2: vector.body: 48; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 49; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index 50; CHECKUF2: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>* 51; CHECKUF2: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8, !alias.scope !0 52; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() 53; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2 54; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64 55; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXB]], i64 %[[VSCALE2_EXT]] 56; CHECKUF2: %[[IDXB_NEXT_CAST:.*]] = bitcast double* %[[IDXB_NEXT]] to <vscale x 4 x double>* 57; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_NEXT_CAST]], align 8, !alias.scope !0 58; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> undef, double 1.000000e+00, i32 0), <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer) 59; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> undef, double 1.000000e+00, i32 0), <vscale x 4 x double> undef, <vscale x 4 x i32> zeroinitializer) 60; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index 61; CHECKUF2: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>* 62; CHECKUF2: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0 63; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() 64; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2 65; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64 66; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXA]], i64 %[[VSCALE2_EXT]] 67; CHECKUF2: %[[IDXA_NEXT_CAST:.*]] = bitcast double* %[[IDXA_NEXT]] to <vscale x 4 x double>* 68; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], <vscale x 4 x double>* %[[IDXA_NEXT_CAST]], align 8, !alias.scope !3, !noalias !0 69; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 70; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 71; CHECKUF2: %index.next = add i64 %index, %[[VSCALEX8]] 72; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec 73; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !5 74 75define void @loop(i32 %N, double* nocapture %a, double* nocapture readonly %b) { 76entry: 77 %cmp7 = icmp sgt i32 %N, 0 78 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup 79 80for.body.preheader: ; preds = %entry 81 %wide.trip.count = zext i32 %N to i64 82 br label %for.body 83 84for.cond.cleanup: ; preds = %for.body, %entry 85 ret void 86 87for.body: ; preds = %for.body.preheader, %for.body 88 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 89 %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv 90 %0 = load double, double* %arrayidx, align 8 91 %add = fadd double %0, 1.000000e+00 92 %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv 93 store double %add, double* %arrayidx2, align 8 94 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 95 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 96 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1 97} 98 99!1 = distinct !{!1, !2, !3} 100!2 = !{!"llvm.loop.vectorize.width", i32 4} 101!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 102