1; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s 2 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-linux-gnu" 5 6;CHECK-LABEL: @sqrt_f32( 7;CHECK: vsqrtf{{.*}}<4 x float> 8;CHECK: ret void 9declare float @sqrtf(float) nounwind readnone 10define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 11entry: 12 %cmp6 = icmp sgt i32 %n, 0 13 br i1 %cmp6, label %for.body, label %for.end 14 15for.body: ; preds = %entry, %for.body 16 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 17 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 18 %0 = load float, float* %arrayidx, align 4 19 %call = tail call float @sqrtf(float %0) nounwind readnone 20 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 21 store float %call, float* %arrayidx2, align 4 22 %indvars.iv.next = add i64 %indvars.iv, 1 23 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 24 %exitcond = icmp eq i32 %lftr.wideiv, %n 25 br i1 %exitcond, label %for.end, label %for.body 26 27for.end: ; preds = %for.body, %entry 28 ret void 29} 30 31;CHECK-LABEL: @exp_f32( 32;CHECK: vexpf{{.*}}<4 x float> 33;CHECK: ret void 34declare float @expf(float) nounwind readnone 35define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 36entry: 37 %cmp6 = icmp sgt i32 %n, 0 38 br i1 %cmp6, label %for.body, label %for.end 39 40for.body: ; preds = %entry, %for.body 41 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 42 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 43 %0 = load float, float* %arrayidx, align 4 44 %call = tail call float @expf(float %0) nounwind readnone 45 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 46 store float %call, float* %arrayidx2, align 4 47 %indvars.iv.next = add i64 %indvars.iv, 1 48 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 49 %exitcond = icmp eq i32 %lftr.wideiv, %n 50 br i1 %exitcond, label %for.end, label %for.body 51 52for.end: ; preds = %for.body, %entry 53 ret void 54} 55 56;CHECK-LABEL: @log_f32( 57;CHECK: vlogf{{.*}}<4 x float> 58;CHECK: ret void 59declare float @logf(float) nounwind readnone 60define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 61entry: 62 %cmp6 = icmp sgt i32 %n, 0 63 br i1 %cmp6, label %for.body, label %for.end 64 65for.body: ; preds = %entry, %for.body 66 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 67 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 68 %0 = load float, float* %arrayidx, align 4 69 %call = tail call float @logf(float %0) nounwind readnone 70 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 71 store float %call, float* %arrayidx2, align 4 72 %indvars.iv.next = add i64 %indvars.iv, 1 73 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 74 %exitcond = icmp eq i32 %lftr.wideiv, %n 75 br i1 %exitcond, label %for.end, label %for.body 76 77for.end: ; preds = %for.body, %entry 78 ret void 79} 80 81; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call. 82;CHECK-LABEL: @fabs_f32( 83;CHECK: fabs{{.*}}<4 x float> 84;CHECK: ret void 85declare float @fabsf(float) nounwind readnone 86define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 87entry: 88 %cmp6 = icmp sgt i32 %n, 0 89 br i1 %cmp6, label %for.body, label %for.end 90 91for.body: ; preds = %entry, %for.body 92 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 93 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 94 %0 = load float, float* %arrayidx, align 4 95 %call = tail call float @fabsf(float %0) nounwind readnone 96 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 97 store float %call, float* %arrayidx2, align 4 98 %indvars.iv.next = add i64 %indvars.iv, 1 99 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 100 %exitcond = icmp eq i32 %lftr.wideiv, %n 101 br i1 %exitcond, label %for.end, label %for.body 102 103for.end: ; preds = %for.body, %entry 104 ret void 105} 106 107; Test that we can vectorize an intrinsic into a vector call. 108;CHECK-LABEL: @exp_f32_intrin( 109;CHECK: vexpf{{.*}}<4 x float> 110;CHECK: ret void 111declare float @llvm.exp.f32(float) nounwind readnone 112define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 113entry: 114 %cmp6 = icmp sgt i32 %n, 0 115 br i1 %cmp6, label %for.body, label %for.end 116 117for.body: ; preds = %entry, %for.body 118 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 119 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 120 %0 = load float, float* %arrayidx, align 4 121 %call = tail call float @llvm.exp.f32(float %0) nounwind readnone 122 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 123 store float %call, float* %arrayidx2, align 4 124 %indvars.iv.next = add i64 %indvars.iv, 1 125 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 126 %exitcond = icmp eq i32 %lftr.wideiv, %n 127 br i1 %exitcond, label %for.end, label %for.body 128 129for.end: ; preds = %for.body, %entry 130 ret void 131} 132 133; Test that we don't vectorize arbitrary functions. 134;CHECK-LABEL: @foo_f32( 135;CHECK-NOT: foo{{.*}}<4 x float> 136;CHECK: ret void 137declare float @foo(float) nounwind readnone 138define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 139entry: 140 %cmp6 = icmp sgt i32 %n, 0 141 br i1 %cmp6, label %for.body, label %for.end 142 143for.body: ; preds = %entry, %for.body 144 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 145 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 146 %0 = load float, float* %arrayidx, align 4 147 %call = tail call float @foo(float %0) nounwind readnone 148 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 149 store float %call, float* %arrayidx2, align 4 150 %indvars.iv.next = add i64 %indvars.iv, 1 151 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 152 %exitcond = icmp eq i32 %lftr.wideiv, %n 153 br i1 %exitcond, label %for.end, label %for.body 154 155for.end: ; preds = %for.body, %entry 156 ret void 157} 158 159; Test that we don't vectorize calls with nobuiltin attribute. 160;CHECK-LABEL: @sqrt_f32_nobuiltin( 161;CHECK-NOT: vsqrtf{{.*}}<4 x float> 162;CHECK: ret void 163define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 164entry: 165 %cmp6 = icmp sgt i32 %n, 0 166 br i1 %cmp6, label %for.body, label %for.end 167 168for.body: ; preds = %entry, %for.body 169 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 170 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 171 %0 = load float, float* %arrayidx, align 4 172 %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin 173 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 174 store float %call, float* %arrayidx2, align 4 175 %indvars.iv.next = add i64 %indvars.iv, 1 176 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 177 %exitcond = icmp eq i32 %lftr.wideiv, %n 178 br i1 %exitcond, label %for.end, label %for.body 179 180for.end: ; preds = %for.body, %entry 181 ret void 182} 183