1target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 2target triple = "x86_64-apple-macosx10.8.0" 3; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s 4 5 6; CHECK: tiny_tree_fully_vectorizable 7; CHECK: load <2 x double> 8; CHECK: store <2 x double> 9; CHECK: ret 10 11define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 { 12entry: 13 %cmp12 = icmp eq i64 %count, 0 14 br i1 %cmp12, label %for.end, label %for.body 15 16for.body: ; preds = %entry, %for.body 17 %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 18 %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ] 19 %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ] 20 %0 = load double, double* %src.addr.013, align 8 21 store double %0, double* %dst.addr.014, align 8 22 %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1 23 %1 = load double, double* %arrayidx2, align 8 24 %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1 25 store double %1, double* %arrayidx3, align 8 26 %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015 27 %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015 28 %inc = add i64 %i.015, 1 29 %exitcond = icmp eq i64 %inc, %count 30 br i1 %exitcond, label %for.end, label %for.body 31 32for.end: ; preds = %for.body, %entry 33 ret void 34} 35 36; CHECK: tiny_tree_fully_vectorizable2 37; CHECK: load <4 x float> 38; CHECK: store <4 x float> 39; CHECK: ret 40 41define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 { 42entry: 43 %cmp20 = icmp eq i64 %count, 0 44 br i1 %cmp20, label %for.end, label %for.body 45 46for.body: ; preds = %entry, %for.body 47 %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 48 %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ] 49 %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ] 50 %0 = load float, float* %src.addr.021, align 4 51 store float %0, float* %dst.addr.022, align 4 52 %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1 53 %1 = load float, float* %arrayidx2, align 4 54 %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1 55 store float %1, float* %arrayidx3, align 4 56 %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2 57 %2 = load float, float* %arrayidx4, align 4 58 %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2 59 store float %2, float* %arrayidx5, align 4 60 %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3 61 %3 = load float, float* %arrayidx6, align 4 62 %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3 63 store float %3, float* %arrayidx7, align 4 64 %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023 65 %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023 66 %inc = add i64 %i.023, 1 67 %exitcond = icmp eq i64 %inc, %count 68 br i1 %exitcond, label %for.end, label %for.body 69 70for.end: ; preds = %for.body, %entry 71 ret void 72} 73 74; We do not vectorize the tiny tree which is not fully vectorizable. 75; CHECK: tiny_tree_not_fully_vectorizable 76; CHECK-NOT: <2 x double> 77; CHECK: ret 78 79define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 { 80entry: 81 %cmp12 = icmp eq i64 %count, 0 82 br i1 %cmp12, label %for.end, label %for.body 83 84for.body: ; preds = %entry, %for.body 85 %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 86 %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ] 87 %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ] 88 %0 = load double, double* %src.addr.013, align 8 89 store double %0, double* %dst.addr.014, align 8 90 %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2 91 %1 = load double, double* %arrayidx2, align 8 92 %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1 93 store double %1, double* %arrayidx3, align 8 94 %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015 95 %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015 96 %inc = add i64 %i.015, 1 97 %exitcond = icmp eq i64 %inc, %count 98 br i1 %exitcond, label %for.end, label %for.body 99 100for.end: ; preds = %for.body, %entry 101 ret void 102} 103 104 105; CHECK: tiny_tree_not_fully_vectorizable2 106; CHECK-NOT: <2 x double> 107; CHECK: ret 108 109define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 { 110entry: 111 %cmp20 = icmp eq i64 %count, 0 112 br i1 %cmp20, label %for.end, label %for.body 113 114for.body: ; preds = %entry, %for.body 115 %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 116 %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ] 117 %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ] 118 %0 = load float, float* %src.addr.021, align 4 119 store float %0, float* %dst.addr.022, align 4 120 %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4 121 %1 = load float, float* %arrayidx2, align 4 122 %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1 123 store float %1, float* %arrayidx3, align 4 124 %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2 125 %2 = load float, float* %arrayidx4, align 4 126 %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2 127 store float %2, float* %arrayidx5, align 4 128 %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3 129 %3 = load float, float* %arrayidx6, align 4 130 %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3 131 store float %3, float* %arrayidx7, align 4 132 %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023 133 %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023 134 %inc = add i64 %i.023, 1 135 %exitcond = icmp eq i64 %inc, %count 136 br i1 %exitcond, label %for.end, label %for.body 137 138for.end: ; preds = %for.body, %entry 139 ret void 140} 141 142 143; CHECK-LABEL: store_splat 144; CHECK: store <4 x float> 145define void @store_splat(float*, float) { 146 %3 = getelementptr inbounds float, float* %0, i64 0 147 store float %1, float* %3, align 4 148 %4 = getelementptr inbounds float, float* %0, i64 1 149 store float %1, float* %4, align 4 150 %5 = getelementptr inbounds float, float* %0, i64 2 151 store float %1, float* %5, align 4 152 %6 = getelementptr inbounds float, float* %0, i64 3 153 store float %1, float* %6, align 4 154 ret void 155} 156 157 158; CHECK-LABEL: store_const 159; CHECK: store <4 x i32> 160define void @store_const(i32* %a) { 161entry: 162 %ptr0 = getelementptr inbounds i32, i32* %a, i64 0 163 store i32 10, i32* %ptr0, align 4 164 %ptr1 = getelementptr inbounds i32, i32* %a, i64 1 165 store i32 30, i32* %ptr1, align 4 166 %ptr2 = getelementptr inbounds i32, i32* %a, i64 2 167 store i32 20, i32* %ptr2, align 4 168 %ptr3 = getelementptr inbounds i32, i32* %a, i64 3 169 store i32 40, i32* %ptr3, align 4 170 ret void 171} 172