1; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm -debug 2>&1 | FileCheck -check-prefix=MSG %s 2; REQUIRES: asserts 3; This test should not be vectorized in X86\SLM arch 4; Vectorizing the 64bit multiply in this case is wrong since 5; it can be done with a lower bit mode (notice that the sources is 16bit) 6; Also addq\subq (quad word) has a high cost on SLM arch. 7; this test has a bad performance (regression of -70%) if vectorized on SLM arch 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-linux-gnu" 10 11define i32 @no_vec(i32 %LastIndex, i16* nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) { 12entry: 13; MSG: LV: Selecting VF: 1. 14 %cmp17 = icmp sgt i32 %LastIndex, 0 15 br i1 %cmp17, label %for.body.lr.ph, label %for.cond.cleanup 16 17for.body.lr.ph: ; preds = %entry 18 %conv5 = sext i16 %Scale to i64 19 %sh_prom = and i64 %conv5, 4294967295 20 %0 = sext i16 %lag to i64 21 %wide.trip.count = zext i32 %LastIndex to i64 22 br label %for.body 23 24for.cond.cleanup.loopexit: ; preds = %for.body 25 %conv8 = trunc i64 %add7 to i32 26 br label %for.cond.cleanup 27 28for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 29 %Accumulator.0.lcssa = phi i32 [ 0, %entry ], [ %conv8, %for.cond.cleanup.loopexit ] 30 ret i32 %Accumulator.0.lcssa 31 32for.body: ; preds = %for.body, %for.body.lr.ph 33 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 34 %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ] 35 %arrayidx = getelementptr inbounds i16, i16* %InputData, i64 %indvars.iv 36 %1 = load i16, i16* %arrayidx, align 2 37 %conv = sext i16 %1 to i64 38 %2 = add nsw i64 %indvars.iv, %0 39 %arrayidx3 = getelementptr inbounds i16, i16* %InputData, i64 %2 40 %3 = load i16, i16* %arrayidx3, align 2 41 %conv4 = sext i16 %3 to i64 42 %mul = mul nsw i64 %conv4, %conv 43 %shr = ashr i64 %mul, %sh_prom 44 %add7 = add i64 %shr, %Accumulator.018 45 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 46 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 47 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 48} 49 50