1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -prefer-predicated-reduction-select -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5 6define i32 @reduction_sum_single(i32* noalias nocapture %A) { 7; CHECK-LABEL: @reduction_sum_single( 8; CHECK: vector.body: 9; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP25:%.*]], %pred.load.continue6 ] 10; CHECK: [[TMP24:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP23:%.*]], <4 x i32> zeroinitializer 11; CHECK: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]] 12; CHECK: middle.block: 13; CHECK: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) 14; 15entry: 16 br label %.lr.ph 17 18.lr.ph: ; preds = %entry, %.lr.ph 19 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 20 %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] 21 %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 22 %l3 = load i32, i32* %l2, align 4 23 %l7 = add i32 %sum.02, %l3 24 %indvars.iv.next = add i32 %indvars.iv, 1 25 %exitcond = icmp eq i32 %indvars.iv.next, 257 26 br i1 %exitcond, label %._crit_edge, label %.lr.ph 27 28._crit_edge: ; preds = %.lr.ph 29 %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] 30 ret i32 %sum.0.lcssa 31} 32 33define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { 34; CHECK-LABEL: @reduction_sum( 35; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue14 ] 36; CHECK: [[TMP44:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND:%.*]] 37; CHECK: [[TMP45:%.*]] = add <4 x i32> [[TMP44]], [[TMP23:%.*]] 38; CHECK: [[TMP46:%.*]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]] 39; CHECK: [[TMP47]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] 40; CHECK: middle.block: 41; CHECK: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP47]]) 42; 43entry: 44 br label %.lr.ph 45 46.lr.ph: ; preds = %entry, %.lr.ph 47 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 48 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 49 %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 50 %l3 = load i32, i32* %l2, align 4 51 %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv 52 %l5 = load i32, i32* %l4, align 4 53 %l7 = add i32 %sum.02, %indvars.iv 54 %l8 = add i32 %l7, %l3 55 %l9 = add i32 %l8, %l5 56 %indvars.iv.next = add i32 %indvars.iv, 1 57 %exitcond = icmp eq i32 %indvars.iv.next, 257 58 br i1 %exitcond, label %._crit_edge, label %.lr.ph 59 60._crit_edge: ; preds = %.lr.ph 61 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 62 ret i32 %sum.0.lcssa 63} 64 65define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { 66; CHECK-LABEL: @reduction_prod( 67; CHECK: vector.body: 68; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] 69; CHECK: [[TMP44:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] 70; CHECK: [[TMP45:%.*]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]] 71; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] 72; CHECK: middle.block: 73; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) 74; 75entry: 76 br label %.lr.ph 77 78.lr.ph: ; preds = %entry, %.lr.ph 79 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 80 %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] 81 %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 82 %l3 = load i32, i32* %l2, align 4 83 %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv 84 %l5 = load i32, i32* %l4, align 4 85 %l8 = mul i32 %prod.02, %l3 86 %l9 = mul i32 %l8, %l5 87 %indvars.iv.next = add i32 %indvars.iv, 1 88 %exitcond = icmp eq i32 %indvars.iv.next, 257 89 br i1 %exitcond, label %._crit_edge, label %.lr.ph 90 91._crit_edge: ; preds = %.lr.ph 92 %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] 93 ret i32 %prod.0.lcssa 94} 95 96define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { 97; CHECK-LABEL: @reduction_and( 98; CHECK: vector.body: 99; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] 100; CHECK: [[TMP44:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] 101; CHECK: [[TMP45:%.*]] = and <4 x i32> [[TMP44]], [[TMP43:%.*]] 102; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] 103; CHECK: middle.block: 104; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP46]]) 105; 106entry: 107 br label %for.body 108 109for.body: ; preds = %entry, %for.body 110 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 111 %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] 112 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 113 %l0 = load i32, i32* %arrayidx, align 4 114 %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv 115 %l1 = load i32, i32* %arrayidx2, align 4 116 %add = and i32 %result.08, %l0 117 %and = and i32 %add, %l1 118 %indvars.iv.next = add i32 %indvars.iv, 1 119 %exitcond = icmp eq i32 %indvars.iv.next, 257 120 br i1 %exitcond, label %for.end, label %for.body 121 122for.end: ; preds = %for.body, %entry 123 %result.0.lcssa = phi i32 [ %and, %for.body ] 124 ret i32 %result.0.lcssa 125} 126 127define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { 128; CHECK-LABEL: @reduction_or( 129; CHECK: vector.body: 130; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] 131; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer 132; CHECK: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP45]] 133; CHECK: middle.block: 134; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP46]]) 135; 136entry: 137 br label %for.body 138 139for.body: ; preds = %entry, %for.body 140 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 141 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] 142 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 143 %l0 = load i32, i32* %arrayidx, align 4 144 %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv 145 %l1 = load i32, i32* %arrayidx2, align 4 146 %add = add nsw i32 %l1, %l0 147 %or = or i32 %add, %result.08 148 %indvars.iv.next = add i32 %indvars.iv, 1 149 %exitcond = icmp eq i32 %indvars.iv.next, 257 150 br i1 %exitcond, label %for.end, label %for.body 151 152for.end: ; preds = %for.body, %entry 153 %result.0.lcssa = phi i32 [ %or, %for.body ] 154 ret i32 %result.0.lcssa 155} 156 157define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { 158; CHECK-LABEL: @reduction_xor( 159; CHECK: vector.body: 160; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] 161; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer 162; CHECK: [[TMP46]] = xor <4 x i32> [[VEC_PHI]], [[TMP45]] 163; CHECK: middle.block: 164; CHECK: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP46]]) 165; 166entry: 167 br label %for.body 168 169for.body: ; preds = %entry, %for.body 170 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 171 %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] 172 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 173 %l0 = load i32, i32* %arrayidx, align 4 174 %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv 175 %l1 = load i32, i32* %arrayidx2, align 4 176 %add = add nsw i32 %l1, %l0 177 %xor = xor i32 %add, %result.08 178 %indvars.iv.next = add i32 %indvars.iv, 1 179 %exitcond = icmp eq i32 %indvars.iv.next, 257 180 br i1 %exitcond, label %for.end, label %for.body 181 182for.end: ; preds = %for.body, %entry 183 %result.0.lcssa = phi i32 [ %xor, %for.body ] 184 ret i32 %result.0.lcssa 185} 186 187define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { 188; CHECK-LABEL: @reduction_fadd( 189; CHECK: vector.body: 190; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] 191; CHECK: [[TMP44:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] 192; CHECK: [[TMP45:%.*]] = fadd fast <4 x float> [[TMP44]], [[TMP43:%.*]] 193; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] 194; CHECK: middle.block: 195; CHECK: [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP46]]) 196; 197entry: 198 br label %for.body 199 200for.body: ; preds = %entry, %for.body 201 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 202 %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] 203 %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv 204 %l0 = load float, float* %arrayidx, align 4 205 %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv 206 %l1 = load float, float* %arrayidx2, align 4 207 %add = fadd fast float %result.08, %l0 208 %fadd = fadd fast float %add, %l1 209 %indvars.iv.next = add i32 %indvars.iv, 1 210 %exitcond = icmp eq i32 %indvars.iv.next, 257 211 br i1 %exitcond, label %for.end, label %for.body 212 213for.end: ; preds = %for.body, %entry 214 %result.0.lcssa = phi float [ %fadd, %for.body ] 215 ret float %result.0.lcssa 216} 217 218define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { 219; CHECK-LABEL: @reduction_fmul( 220; CHECK: vector.body: 221; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] 222; CHECK: [[TMP44:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] 223; CHECK: [[TMP45:%.*]] = fmul fast <4 x float> [[TMP44]], [[TMP43:%.*]] 224; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] 225; CHECK: middle.block: 226; CHECK: [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP46]]) 227; 228entry: 229 br label %for.body 230 231for.body: ; preds = %entry, %for.body 232 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 233 %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 234 %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv 235 %l0 = load float, float* %arrayidx, align 4 236 %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv 237 %l1 = load float, float* %arrayidx2, align 4 238 %add = fmul fast float %result.08, %l0 239 %fmul = fmul fast float %add, %l1 240 %indvars.iv.next = add i32 %indvars.iv, 1 241 %exitcond = icmp eq i32 %indvars.iv.next, 257 242 br i1 %exitcond, label %for.end, label %for.body 243 244for.end: ; preds = %for.body, %entry 245 %result.0.lcssa = phi float [ %fmul, %for.body ] 246 ret float %result.0.lcssa 247} 248 249define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { 250; CHECK-LABEL: @reduction_min( 251; CHECK: vector.body: 252; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, %vector.ph ], [ [[TMP26:%.*]], %pred.load.continue6 ] 253; CHECK: [[TMP24:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] 254; CHECK: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] 255; CHECK: [[TMP26]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] 256; CHECK: middle.block: 257; CHECK: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP26]]) 258; 259entry: 260 br label %for.body 261 262for.body: ; preds = %entry, %for.body 263 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 264 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 265 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 266 %l0 = load i32, i32* %arrayidx, align 4 267 %c0 = icmp slt i32 %result.08, %l0 268 %v0 = select i1 %c0, i32 %result.08, i32 %l0 269 %indvars.iv.next = add i32 %indvars.iv, 1 270 %exitcond = icmp eq i32 %indvars.iv.next, 257 271 br i1 %exitcond, label %for.end, label %for.body 272 273for.end: ; preds = %for.body, %entry 274 %result.0.lcssa = phi i32 [ %v0, %for.body ] 275 ret i32 %result.0.lcssa 276} 277 278define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { 279; CHECK-LABEL: @reduction_max( 280; CHECK: vector.body: 281; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, %vector.ph ], [ [[TMP26:%.*]], %pred.load.continue6 ] 282; CHECK: [[TMP24:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] 283; CHECK: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] 284; CHECK: [[TMP26]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] 285; CHECK: middle.block: 286; CHECK: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP26]]) 287; 288entry: 289 br label %for.body 290 291for.body: ; preds = %entry, %for.body 292 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 293 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 294 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv 295 %l0 = load i32, i32* %arrayidx, align 4 296 %c0 = icmp ugt i32 %result.08, %l0 297 %v0 = select i1 %c0, i32 %result.08, i32 %l0 298 %indvars.iv.next = add i32 %indvars.iv, 1 299 %exitcond = icmp eq i32 %indvars.iv.next, 257 300 br i1 %exitcond, label %for.end, label %for.body 301 302for.end: ; preds = %for.body, %entry 303 %result.0.lcssa = phi i32 [ %v0, %for.body ] 304 ret i32 %result.0.lcssa 305} 306