1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s 3; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE 4 5; #include <stdint.h> 6; 7; int foo(float *A, int n) { 8; float sum = 0; 9; for (intptr_t i=0; i < n; ++i) { 10; sum += 7*A[i*4 ] + 11; 7*A[i*4+1] + 12; 7*A[i*4+2] + 13; 7*A[i*4+3]; 14; } 15; return sum; 16; } 17 18define i32 @add_red(float* %A, i32 %n) { 19; CHECK-LABEL: @add_red( 20; CHECK-NEXT: entry: 21; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 22; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 23; CHECK: for.body.lr.ph: 24; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 25; CHECK-NEXT: br label [[FOR_BODY:%.*]] 26; CHECK: for.body: 27; CHECK-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 28; CHECK-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] 29; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 30; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 31; CHECK-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 32; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] 33; CHECK-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 34; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] 35; CHECK-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 36; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] 37; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* 38; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 39; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00> 40; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) 41; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] 42; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 43; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] 44; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 45; CHECK: for.cond.for.end_crit_edge: 46; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 47; CHECK-NEXT: br label [[FOR_END]] 48; CHECK: for.end: 49; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 50; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 51; 52; STORE-LABEL: @add_red( 53; STORE-NEXT: entry: 54; STORE-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 55; STORE-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 56; STORE: for.body.lr.ph: 57; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 58; STORE-NEXT: br label [[FOR_BODY:%.*]] 59; STORE: for.body: 60; STORE-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 61; STORE-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] 62; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 63; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 64; STORE-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 65; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] 66; STORE-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 67; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] 68; STORE-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 69; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] 70; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* 71; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 72; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00> 73; STORE-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) 74; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] 75; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 76; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] 77; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 78; STORE: for.cond.for.end_crit_edge: 79; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 80; STORE-NEXT: br label [[FOR_END]] 81; STORE: for.end: 82; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 83; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] 84; 85entry: 86 %cmp31 = icmp sgt i32 %n, 0 87 br i1 %cmp31, label %for.body.lr.ph, label %for.end 88 89for.body.lr.ph: 90 %0 = sext i32 %n to i64 91 br label %for.body 92 93for.body: 94 %i.033 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 95 %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ] 96 %mul = shl nsw i64 %i.033, 2 97 %arrayidx = getelementptr inbounds float, float* %A, i64 %mul 98 %1 = load float, float* %arrayidx, align 4 99 %mul2 = fmul float %1, 7.000000e+00 100 %add28 = or i64 %mul, 1 101 %arrayidx4 = getelementptr inbounds float, float* %A, i64 %add28 102 %2 = load float, float* %arrayidx4, align 4 103 %mul5 = fmul float %2, 7.000000e+00 104 %add6 = fadd fast float %mul2, %mul5 105 %add829 = or i64 %mul, 2 106 %arrayidx9 = getelementptr inbounds float, float* %A, i64 %add829 107 %3 = load float, float* %arrayidx9, align 4 108 %mul10 = fmul float %3, 7.000000e+00 109 %add11 = fadd fast float %add6, %mul10 110 %add1330 = or i64 %mul, 3 111 %arrayidx14 = getelementptr inbounds float, float* %A, i64 %add1330 112 %4 = load float, float* %arrayidx14, align 4 113 %mul15 = fmul float %4, 7.000000e+00 114 %add16 = fadd fast float %add11, %mul15 115 %add17 = fadd fast float %sum.032, %add16 116 %inc = add nsw i64 %i.033, 1 117 %exitcond = icmp eq i64 %inc, %0 118 br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body 119 120for.cond.for.end_crit_edge: 121 %phitmp = fptosi float %add17 to i32 122 br label %for.end 123 124for.end: 125 %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] 126 ret i32 %sum.0.lcssa 127} 128 129; int foo(float * restrict A, float * restrict B, int n) { 130; float sum = 0; 131; for (intptr_t i=0; i < n; ++i) { 132; sum *= B[0]*A[i*4 ] + 133; B[1]*A[i*4+1] + 134; B[2]*A[i*4+2] + 135; B[3]*A[i*4+3]; 136; } 137; return sum; 138; } 139 140define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { 141; CHECK-LABEL: @mul_red( 142; CHECK-NEXT: entry: 143; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 144; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 145; CHECK: for.body.lr.ph: 146; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 147; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 148; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 149; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* 150; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 151; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 152; CHECK-NEXT: br label [[FOR_BODY:%.*]] 153; CHECK: for.body: 154; CHECK-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 155; CHECK-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] 156; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 157; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 158; CHECK-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 159; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] 160; CHECK-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 161; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] 162; CHECK-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 163; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] 164; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* 165; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 166; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] 167; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) 168; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] 169; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 170; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] 171; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 172; CHECK: for.cond.for.end_crit_edge: 173; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 174; CHECK-NEXT: br label [[FOR_END]] 175; CHECK: for.end: 176; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 177; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 178; 179; STORE-LABEL: @mul_red( 180; STORE-NEXT: entry: 181; STORE-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 182; STORE-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 183; STORE: for.body.lr.ph: 184; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 185; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 186; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 187; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* 188; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 189; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 190; STORE-NEXT: br label [[FOR_BODY:%.*]] 191; STORE: for.body: 192; STORE-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 193; STORE-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] 194; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 195; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 196; STORE-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 197; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] 198; STORE-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 199; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] 200; STORE-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 201; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] 202; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* 203; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 204; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] 205; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) 206; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] 207; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 208; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] 209; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 210; STORE: for.cond.for.end_crit_edge: 211; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 212; STORE-NEXT: br label [[FOR_END]] 213; STORE: for.end: 214; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 215; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] 216; 217entry: 218 %cmp38 = icmp sgt i32 %n, 0 219 br i1 %cmp38, label %for.body.lr.ph, label %for.end 220 221for.body.lr.ph: 222 %0 = load float, float* %B, align 4 223 %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 224 %1 = load float, float* %arrayidx4, align 4 225 %arrayidx9 = getelementptr inbounds float, float* %B, i64 2 226 %2 = load float, float* %arrayidx9, align 4 227 %arrayidx15 = getelementptr inbounds float, float* %B, i64 3 228 %3 = load float, float* %arrayidx15, align 4 229 %4 = sext i32 %n to i64 230 br label %for.body 231 232for.body: 233 %i.040 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 234 %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ] 235 %mul = shl nsw i64 %i.040, 2 236 %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul 237 %5 = load float, float* %arrayidx2, align 4 238 %mul3 = fmul float %0, %5 239 %add35 = or i64 %mul, 1 240 %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add35 241 %6 = load float, float* %arrayidx6, align 4 242 %mul7 = fmul float %1, %6 243 %add8 = fadd fast float %mul3, %mul7 244 %add1136 = or i64 %mul, 2 245 %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1136 246 %7 = load float, float* %arrayidx12, align 4 247 %mul13 = fmul float %2, %7 248 %add14 = fadd fast float %add8, %mul13 249 %add1737 = or i64 %mul, 3 250 %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1737 251 %8 = load float, float* %arrayidx18, align 4 252 %mul19 = fmul float %3, %8 253 %add20 = fadd fast float %add14, %mul19 254 %mul21 = fmul float %sum.039, %add20 255 %inc = add nsw i64 %i.040, 1 256 %exitcond = icmp eq i64 %inc, %4 257 br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body 258 259for.cond.for.end_crit_edge: 260 %phitmp = fptosi float %mul21 to i32 261 br label %for.end 262 263for.end: 264 %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] 265 ret i32 %sum.0.lcssa 266} 267 268; int foo(float * restrict A, float * restrict B, int n) { 269; float sum = 0; 270; for (intptr_t i=0; i < n; ++i) { 271; sum += B[0]*A[i*6 ] + 272; B[1]*A[i*6+1] + 273; B[2]*A[i*6+2] + 274; B[3]*A[i*6+3] + 275; B[4]*A[i*6+4] + 276; B[5]*A[i*6+5] + 277; B[6]*A[i*6+6] + 278; B[7]*A[i*6+7] + 279; B[8]*A[i*6+8]; 280; } 281; return sum; 282; } 283 284define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { 285; CHECK-LABEL: @long_red( 286; CHECK-NEXT: entry: 287; CHECK-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 288; CHECK-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 289; CHECK: for.body.lr.ph: 290; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 291; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 292; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 293; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 294; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 295; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 296; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 297; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* 298; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 299; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 300; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 301; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 302; CHECK-NEXT: br label [[FOR_BODY:%.*]] 303; CHECK: for.body: 304; CHECK-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 305; CHECK-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] 306; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 307; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 308; CHECK-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 309; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] 310; CHECK-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 311; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] 312; CHECK-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 313; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] 314; CHECK-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 315; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] 316; CHECK-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 317; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] 318; CHECK-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 319; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] 320; CHECK-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 321; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] 322; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* 323; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 324; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] 325; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 326; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] 327; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 328; CHECK-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] 329; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) 330; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] 331; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] 332; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 333; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] 334; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 335; CHECK: for.cond.for.end_crit_edge: 336; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 337; CHECK-NEXT: br label [[FOR_END]] 338; CHECK: for.end: 339; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 340; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 341; 342; STORE-LABEL: @long_red( 343; STORE-NEXT: entry: 344; STORE-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 345; STORE-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 346; STORE: for.body.lr.ph: 347; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 348; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 349; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 350; STORE-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 351; STORE-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 352; STORE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 353; STORE-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 354; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* 355; STORE-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 356; STORE-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 357; STORE-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 358; STORE-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 359; STORE-NEXT: br label [[FOR_BODY:%.*]] 360; STORE: for.body: 361; STORE-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 362; STORE-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] 363; STORE-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 364; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 365; STORE-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 366; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] 367; STORE-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 368; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] 369; STORE-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 370; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] 371; STORE-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 372; STORE-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] 373; STORE-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 374; STORE-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] 375; STORE-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 376; STORE-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] 377; STORE-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 378; STORE-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] 379; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* 380; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 381; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] 382; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 383; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] 384; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 385; STORE-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] 386; STORE-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) 387; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] 388; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] 389; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 390; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] 391; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 392; STORE: for.cond.for.end_crit_edge: 393; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 394; STORE-NEXT: br label [[FOR_END]] 395; STORE: for.end: 396; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 397; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] 398; 399entry: 400 %cmp81 = icmp sgt i32 %n, 0 401 br i1 %cmp81, label %for.body.lr.ph, label %for.end 402 403for.body.lr.ph: 404 %0 = load float, float* %B, align 4 405 %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 406 %1 = load float, float* %arrayidx4, align 4 407 %arrayidx9 = getelementptr inbounds float, float* %B, i64 2 408 %2 = load float, float* %arrayidx9, align 4 409 %arrayidx15 = getelementptr inbounds float, float* %B, i64 3 410 %3 = load float, float* %arrayidx15, align 4 411 %arrayidx21 = getelementptr inbounds float, float* %B, i64 4 412 %4 = load float, float* %arrayidx21, align 4 413 %arrayidx27 = getelementptr inbounds float, float* %B, i64 5 414 %5 = load float, float* %arrayidx27, align 4 415 %arrayidx33 = getelementptr inbounds float, float* %B, i64 6 416 %6 = load float, float* %arrayidx33, align 4 417 %arrayidx39 = getelementptr inbounds float, float* %B, i64 7 418 %7 = load float, float* %arrayidx39, align 4 419 %arrayidx45 = getelementptr inbounds float, float* %B, i64 8 420 %8 = load float, float* %arrayidx45, align 4 421 %9 = sext i32 %n to i64 422 br label %for.body 423 424for.body: 425 %i.083 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 426 %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ] 427 %mul = mul nsw i64 %i.083, 6 428 %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul 429 %10 = load float, float* %arrayidx2, align 4 430 %mul3 = fmul fast float %0, %10 431 %add80 = or i64 %mul, 1 432 %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add80 433 %11 = load float, float* %arrayidx6, align 4 434 %mul7 = fmul fast float %1, %11 435 %add8 = fadd fast float %mul3, %mul7 436 %add11 = add nsw i64 %mul, 2 437 %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add11 438 %12 = load float, float* %arrayidx12, align 4 439 %mul13 = fmul fast float %2, %12 440 %add14 = fadd fast float %add8, %mul13 441 %add17 = add nsw i64 %mul, 3 442 %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add17 443 %13 = load float, float* %arrayidx18, align 4 444 %mul19 = fmul fast float %3, %13 445 %add20 = fadd fast float %add14, %mul19 446 %add23 = add nsw i64 %mul, 4 447 %arrayidx24 = getelementptr inbounds float, float* %A, i64 %add23 448 %14 = load float, float* %arrayidx24, align 4 449 %mul25 = fmul fast float %4, %14 450 %add26 = fadd fast float %add20, %mul25 451 %add29 = add nsw i64 %mul, 5 452 %arrayidx30 = getelementptr inbounds float, float* %A, i64 %add29 453 %15 = load float, float* %arrayidx30, align 4 454 %mul31 = fmul fast float %5, %15 455 %add32 = fadd fast float %add26, %mul31 456 %add35 = add nsw i64 %mul, 6 457 %arrayidx36 = getelementptr inbounds float, float* %A, i64 %add35 458 %16 = load float, float* %arrayidx36, align 4 459 %mul37 = fmul fast float %6, %16 460 %add38 = fadd fast float %add32, %mul37 461 %add41 = add nsw i64 %mul, 7 462 %arrayidx42 = getelementptr inbounds float, float* %A, i64 %add41 463 %17 = load float, float* %arrayidx42, align 4 464 %mul43 = fmul fast float %7, %17 465 %add44 = fadd fast float %add38, %mul43 466 %add47 = add nsw i64 %mul, 8 467 %arrayidx48 = getelementptr inbounds float, float* %A, i64 %add47 468 %18 = load float, float* %arrayidx48, align 4 469 %mul49 = fmul fast float %8, %18 470 %add50 = fadd fast float %add44, %mul49 471 %add51 = fadd fast float %sum.082, %add50 472 %inc = add nsw i64 %i.083, 1 473 %exitcond = icmp eq i64 %inc, %9 474 br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body 475 476for.cond.for.end_crit_edge: 477 %phitmp = fptosi float %add51 to i32 478 br label %for.end 479 480for.end: 481 %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] 482 ret i32 %sum.0.lcssa 483} 484 485; int foo(float * restrict A, float * restrict B, int n) { 486; float sum = 0; 487; for (intptr_t i=0; i < n; ++i) { 488; sum += B[0]*A[i*4 ]; 489; sum += B[1]*A[i*4+1]; 490; sum += B[2]*A[i*4+2]; 491; sum += B[3]*A[i*4+3]; 492; } 493; return sum; 494; } 495 496define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { 497; CHECK-LABEL: @chain_red( 498; CHECK-NEXT: entry: 499; CHECK-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 500; CHECK-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 501; CHECK: for.body.lr.ph: 502; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 503; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 504; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 505; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* 506; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 507; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 508; CHECK-NEXT: br label [[FOR_BODY:%.*]] 509; CHECK: for.body: 510; CHECK-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 511; CHECK-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] 512; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 513; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 514; CHECK-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 515; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] 516; CHECK-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 517; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] 518; CHECK-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 519; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] 520; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* 521; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 522; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] 523; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) 524; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] 525; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 526; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] 527; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 528; CHECK: for.cond.for.end_crit_edge: 529; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 530; CHECK-NEXT: br label [[FOR_END]] 531; CHECK: for.end: 532; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 533; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 534; 535; STORE-LABEL: @chain_red( 536; STORE-NEXT: entry: 537; STORE-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 538; STORE-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 539; STORE: for.body.lr.ph: 540; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 541; STORE-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 542; STORE-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 543; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* 544; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 545; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 546; STORE-NEXT: br label [[FOR_BODY:%.*]] 547; STORE: for.body: 548; STORE-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 549; STORE-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] 550; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 551; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 552; STORE-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 553; STORE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] 554; STORE-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 555; STORE-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] 556; STORE-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 557; STORE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] 558; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* 559; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 560; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] 561; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) 562; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] 563; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 564; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] 565; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] 566; STORE: for.cond.for.end_crit_edge: 567; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 568; STORE-NEXT: br label [[FOR_END]] 569; STORE: for.end: 570; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] 571; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] 572; 573entry: 574 %cmp41 = icmp sgt i32 %n, 0 575 br i1 %cmp41, label %for.body.lr.ph, label %for.end 576 577for.body.lr.ph: 578 %0 = load float, float* %B, align 4 579 %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 580 %1 = load float, float* %arrayidx4, align 4 581 %arrayidx10 = getelementptr inbounds float, float* %B, i64 2 582 %2 = load float, float* %arrayidx10, align 4 583 %arrayidx16 = getelementptr inbounds float, float* %B, i64 3 584 %3 = load float, float* %arrayidx16, align 4 585 %4 = sext i32 %n to i64 586 br label %for.body 587 588for.body: 589 %i.043 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 590 %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ] 591 %mul = shl nsw i64 %i.043, 2 592 %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul 593 %5 = load float, float* %arrayidx2, align 4 594 %mul3 = fmul fast float %0, %5 595 %add = fadd fast float %sum.042, %mul3 596 %add638 = or i64 %mul, 1 597 %arrayidx7 = getelementptr inbounds float, float* %A, i64 %add638 598 %6 = load float, float* %arrayidx7, align 4 599 %mul8 = fmul fast float %1, %6 600 %add9 = fadd fast float %add, %mul8 601 %add1239 = or i64 %mul, 2 602 %arrayidx13 = getelementptr inbounds float, float* %A, i64 %add1239 603 %7 = load float, float* %arrayidx13, align 4 604 %mul14 = fmul fast float %2, %7 605 %add15 = fadd fast float %add9, %mul14 606 %add1840 = or i64 %mul, 3 607 %arrayidx19 = getelementptr inbounds float, float* %A, i64 %add1840 608 %8 = load float, float* %arrayidx19, align 4 609 %mul20 = fmul fast float %3, %8 610 %add21 = fadd fast float %add15, %mul20 611 %inc = add nsw i64 %i.043, 1 612 %exitcond = icmp eq i64 %inc, %4 613 br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body 614 615for.cond.for.end_crit_edge: 616 %phitmp = fptosi float %add21 to i32 617 br label %for.end 618 619for.end: 620 %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] 621 ret i32 %sum.0.lcssa 622} 623 624; void foo(const float *arg_A, unsigned arg_B, float *array) { 625; for (uint32_t i = 0; i < 6; ++i) { 626; const float *ptr = arg_A + i; 627; float w0 = array[i * 4 + 0]; 628; float w1 = array[i * 4 + 1]; 629; float w2 = array[i * 4 + 2]; 630; float w3 = array[i * 4 + 3]; 631; 632; for (unsigned j = 0; j < arg_B; ++j) { 633; const float x1 = *ptr - (-1.1f * w0) - (1.2f * w1); 634; const float x2 = (2.1f * x1) + (-2.2f * w0) + (2.3f * w1); 635; const float x3 = x2 - (-3.1f * w2) - (3.2f * w3); 636; const float x4 = x3 + (-4.0f * w2) + w3; 637; w1 = w0; 638; w0 = x1; 639; w3 = w2; 640; w2 = x3; 641; } 642; 643; array[i * 4 + 0] = w0; 644; array[i * 4 + 1] = w1; 645; array[i * 4 + 2] = w2; 646; array[i * 4 + 3] = w3; 647; } 648; } 649 650define void @foo(float* nocapture readonly %arg_A, i32 %arg_B, float* nocapture %array) { 651; CHECK-LABEL: @foo( 652; CHECK-NEXT: entry: 653; CHECK-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 654; CHECK-NEXT: br label [[FOR_BODY:%.*]] 655; CHECK: for.cond.cleanup: 656; CHECK-NEXT: ret void 657; CHECK: for.body: 658; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] 659; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 660; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] 661; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 662; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 663; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] 664; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 665; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 666; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] 667; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 668; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 669; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] 670; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 671; CHECK-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] 672; CHECK: for.body16.lr.ph: 673; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] 674; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 675; CHECK-NEXT: br label [[FOR_BODY16:%.*]] 676; CHECK: for.cond.cleanup15: 677; CHECK-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] 678; CHECK-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] 679; CHECK-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] 680; CHECK-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] 681; CHECK-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 682; CHECK-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 683; CHECK-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 684; CHECK-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 685; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 686; CHECK-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 687; CHECK-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 688; CHECK: for.body16: 689; CHECK-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] 690; CHECK-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] 691; CHECK-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] 692; CHECK-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] 693; CHECK-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] 694; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 695; CHECK-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 696; CHECK-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] 697; CHECK-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] 698; CHECK-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 699; CHECK-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 700; CHECK-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 701; CHECK-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 702; CHECK-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 703; CHECK-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] 704; CHECK-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] 705; CHECK-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] 706; CHECK-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] 707; CHECK-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 708; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] 709; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] 710; 711; STORE-LABEL: @foo( 712; STORE-NEXT: entry: 713; STORE-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 714; STORE-NEXT: br label [[FOR_BODY:%.*]] 715; STORE: for.cond.cleanup: 716; STORE-NEXT: ret void 717; STORE: for.body: 718; STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] 719; STORE-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 720; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] 721; STORE-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 722; STORE-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 723; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] 724; STORE-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 725; STORE-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 726; STORE-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] 727; STORE-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 728; STORE-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 729; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] 730; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 731; STORE-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] 732; STORE: for.body16.lr.ph: 733; STORE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] 734; STORE-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 735; STORE-NEXT: br label [[FOR_BODY16:%.*]] 736; STORE: for.cond.cleanup15: 737; STORE-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] 738; STORE-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] 739; STORE-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] 740; STORE-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] 741; STORE-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 742; STORE-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 743; STORE-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 744; STORE-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 745; STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 746; STORE-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 747; STORE-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 748; STORE: for.body16: 749; STORE-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] 750; STORE-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] 751; STORE-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] 752; STORE-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] 753; STORE-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] 754; STORE-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 755; STORE-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 756; STORE-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] 757; STORE-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] 758; STORE-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 759; STORE-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 760; STORE-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 761; STORE-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 762; STORE-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 763; STORE-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] 764; STORE-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] 765; STORE-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] 766; STORE-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] 767; STORE-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 768; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] 769; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] 770; 771entry: 772 %cmp1495 = icmp eq i32 %arg_B, 0 773 br label %for.body 774 775for.cond.cleanup: ; preds = %for.cond.cleanup15 776 ret void 777 778for.body: ; preds = %for.cond.cleanup15, %entry 779 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond.cleanup15 ] 780 %0 = shl i64 %indvars.iv, 2 781 %arrayidx = getelementptr inbounds float, float* %array, i64 %0 782 %1 = load float, float* %arrayidx, align 4 783 %2 = or i64 %0, 1 784 %arrayidx4 = getelementptr inbounds float, float* %array, i64 %2 785 %3 = load float, float* %arrayidx4, align 4 786 %4 = or i64 %0, 2 787 %arrayidx8 = getelementptr inbounds float, float* %array, i64 %4 788 %5 = load float, float* %arrayidx8, align 4 789 %6 = or i64 %0, 3 790 %arrayidx12 = getelementptr inbounds float, float* %array, i64 %6 791 %7 = load float, float* %arrayidx12, align 4 792 br i1 %cmp1495, label %for.cond.cleanup15, label %for.body16.lr.ph 793 794for.body16.lr.ph: ; preds = %for.body 795 %add.ptr = getelementptr inbounds float, float* %arg_A, i64 %indvars.iv 796 %8 = load float, float* %add.ptr, align 4 797 br label %for.body16 798 799for.cond.cleanup15: ; preds = %for.body16, %for.body 800 %w2.0.lcssa = phi float [ %5, %for.body ], [ %sub28, %for.body16 ] 801 %w3.0.lcssa = phi float [ %7, %for.body ], [ %w2.096, %for.body16 ] 802 %w1.0.lcssa = phi float [ %3, %for.body ], [ %w0.0100, %for.body16 ] 803 %w0.0.lcssa = phi float [ %1, %for.body ], [ %sub19, %for.body16 ] 804 store float %w0.0.lcssa, float* %arrayidx, align 4 805 store float %w1.0.lcssa, float* %arrayidx4, align 4 806 store float %w2.0.lcssa, float* %arrayidx8, align 4 807 store float %w3.0.lcssa, float* %arrayidx12, align 4 808 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 809 %exitcond109 = icmp eq i64 %indvars.iv.next, 6 810 br i1 %exitcond109, label %for.cond.cleanup, label %for.body 811 812for.body16: ; preds = %for.body16, %for.body16.lr.ph 813 %w0.0100 = phi float [ %1, %for.body16.lr.ph ], [ %sub19, %for.body16 ] 814 %w1.099 = phi float [ %3, %for.body16.lr.ph ], [ %w0.0100, %for.body16 ] 815 %j.098 = phi i32 [ 0, %for.body16.lr.ph ], [ %inc, %for.body16 ] 816 %w3.097 = phi float [ %7, %for.body16.lr.ph ], [ %w2.096, %for.body16 ] 817 %w2.096 = phi float [ %5, %for.body16.lr.ph ], [ %sub28, %for.body16 ] 818 %mul17 = fmul fast float %w0.0100, 0x3FF19999A0000000 819 %mul18.neg = fmul fast float %w1.099, 0xBFF3333340000000 820 %sub92 = fadd fast float %mul17, %mul18.neg 821 %sub19 = fadd fast float %sub92, %8 822 %mul20 = fmul fast float %sub19, 0x4000CCCCC0000000 823 %mul21.neg = fmul fast float %w0.0100, 0xC0019999A0000000 824 %mul23 = fmul fast float %w1.099, 0x4002666660000000 825 %mul25 = fmul fast float %w2.096, 0x4008CCCCC0000000 826 %mul27.neg = fmul fast float %w3.097, 0xC0099999A0000000 827 %add2293 = fadd fast float %mul27.neg, %mul25 828 %add24 = fadd fast float %add2293, %mul23 829 %sub2694 = fadd fast float %add24, %mul21.neg 830 %sub28 = fadd fast float %sub2694, %mul20 831 %inc = add nuw i32 %j.098, 1 832 %exitcond = icmp eq i32 %inc, %arg_B 833 br i1 %exitcond, label %for.cond.cleanup15, label %for.body16 834} 835 836 837; void foo(double * restrict A, double * restrict B, double * restrict C, 838; int n) { 839; for (intptr_t i=0; i < n; ++i) { 840; C[i] = B[0] *A[i*4 ] + B[1] *A[i*4+1]; 841; } 842; } 843 844define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) { 845; CHECK-LABEL: @store_red_double( 846; CHECK-NEXT: entry: 847; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 848; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 849; CHECK: for.body.lr.ph: 850; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[B:%.*]], align 8 851; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 852; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8 853; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 854; CHECK-NEXT: br label [[FOR_BODY:%.*]] 855; CHECK: for.body: 856; CHECK-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 857; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 858; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] 859; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[ARRAYIDX2]], align 8 860; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[TMP0]], [[TMP3]] 861; CHECK-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1 862; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]] 863; CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX6]], align 8 864; CHECK-NEXT: [[MUL7:%.*]] = fmul fast double [[TMP1]], [[TMP4]] 865; CHECK-NEXT: [[ADD8:%.*]] = fadd fast double [[MUL3]], [[MUL7]] 866; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]] 867; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 868; CHECK-NEXT: [[INC]] = add nsw i64 [[I_018]], 1 869; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] 870; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] 871; CHECK: for.end: 872; CHECK-NEXT: ret void 873; 874; STORE-LABEL: @store_red_double( 875; STORE-NEXT: entry: 876; STORE-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 877; STORE-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 878; STORE: for.body.lr.ph: 879; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 880; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B]] to <2 x double>* 881; STORE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 882; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 883; STORE-NEXT: br label [[FOR_BODY:%.*]] 884; STORE: for.body: 885; STORE-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 886; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 887; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] 888; STORE-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1 889; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]] 890; STORE-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>* 891; STORE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 892; STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]] 893; STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 894; STORE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 895; STORE-NEXT: [[ADD8:%.*]] = fadd fast double [[TMP6]], [[TMP7]] 896; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]] 897; STORE-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 898; STORE-NEXT: [[INC]] = add nsw i64 [[I_018]], 1 899; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] 900; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] 901; STORE: for.end: 902; STORE-NEXT: ret void 903; 904entry: 905 %cmp17 = icmp sgt i32 %n, 0 906 br i1 %cmp17, label %for.body.lr.ph, label %for.end 907 908for.body.lr.ph: 909 %0 = load double, double* %B, align 8 910 %arrayidx4 = getelementptr inbounds double, double* %B, i64 1 911 %1 = load double, double* %arrayidx4, align 8 912 %2 = sext i32 %n to i64 913 br label %for.body 914 915for.body: 916 %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 917 %mul = shl nsw i64 %i.018, 2 918 %arrayidx2 = getelementptr inbounds double, double* %A, i64 %mul 919 %3 = load double, double* %arrayidx2, align 8 920 %mul3 = fmul fast double %0, %3 921 %add16 = or i64 %mul, 1 922 %arrayidx6 = getelementptr inbounds double, double* %A, i64 %add16 923 %4 = load double, double* %arrayidx6, align 8 924 %mul7 = fmul fast double %1, %4 925 %add8 = fadd fast double %mul3, %mul7 926 %arrayidx9 = getelementptr inbounds double, double* %C, i64 %i.018 927 store double %add8, double* %arrayidx9, align 8 928 %inc = add nsw i64 %i.018, 1 929 %exitcond = icmp eq i64 %inc, %2 930 br i1 %exitcond, label %for.end, label %for.body 931 932for.end: 933 ret void 934} 935 936; int foo(float * restrict A, float * restrict B, float * restrict C, int n) { 937; float sum = 0; 938; for (intptr_t i=0; i < n; ++i) { 939; C[i] = B[0] *A[i*4 ] + 940; B[1] *A[i*4+1] + 941; B[2] *A[i*4+2] + 942; B[3] *A[i*4+3]; 943; } 944; return sum; 945; } 946 947define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) { 948; CHECK-LABEL: @store_red( 949; CHECK-NEXT: entry: 950; CHECK-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 951; CHECK-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 952; CHECK: for.body.lr.ph: 953; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 954; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 955; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 956; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 957; CHECK-NEXT: br label [[FOR_BODY:%.*]] 958; CHECK: for.body: 959; CHECK-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 960; CHECK-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] 961; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B]], align 4 962; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 963; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 964; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4 965; CHECK-NEXT: [[MUL3:%.*]] = fmul fast float [[TMP1]], [[TMP2]] 966; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 967; CHECK-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1 968; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]] 969; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX6]], align 4 970; CHECK-NEXT: [[MUL7:%.*]] = fmul fast float [[TMP3]], [[TMP4]] 971; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float [[MUL3]], [[MUL7]] 972; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX9]], align 4 973; CHECK-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2 974; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]] 975; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX12]], align 4 976; CHECK-NEXT: [[MUL13:%.*]] = fmul fast float [[TMP5]], [[TMP6]] 977; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], [[MUL13]] 978; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX15]], align 4 979; CHECK-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3 980; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]] 981; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX18]], align 4 982; CHECK-NEXT: [[MUL19:%.*]] = fmul fast float [[TMP7]], [[TMP8]] 983; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], [[MUL19]] 984; CHECK-NEXT: store float [[ADD20]], float* [[C_ADDR_038]], align 4 985; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 986; CHECK-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 987; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] 988; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] 989; CHECK: for.end: 990; CHECK-NEXT: ret i32 0 991; 992; STORE-LABEL: @store_red( 993; STORE-NEXT: entry: 994; STORE-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 995; STORE-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 996; STORE: for.body.lr.ph: 997; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 998; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 999; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 1000; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 1001; STORE-NEXT: br label [[FOR_BODY:%.*]] 1002; STORE: for.body: 1003; STORE-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 1004; STORE-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] 1005; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 1006; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] 1007; STORE-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1 1008; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]] 1009; STORE-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2 1010; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]] 1011; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B]] to <4 x float>* 1012; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 1013; STORE-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3 1014; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]] 1015; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* 1016; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 1017; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] 1018; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) 1019; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 1020; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 1021; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 1022; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] 1023; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] 1024; STORE: for.end: 1025; STORE-NEXT: ret i32 0 1026; 1027entry: 1028 %cmp37 = icmp sgt i32 %n, 0 1029 br i1 %cmp37, label %for.body.lr.ph, label %for.end 1030 1031for.body.lr.ph: 1032 %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 1033 %arrayidx9 = getelementptr inbounds float, float* %B, i64 2 1034 %arrayidx15 = getelementptr inbounds float, float* %B, i64 3 1035 %0 = sext i32 %n to i64 1036 br label %for.body 1037 1038for.body: 1039 %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] 1040 %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ] 1041 %1 = load float, float* %B, align 4 1042 %mul = shl nsw i64 %i.039, 2 1043 %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul 1044 %2 = load float, float* %arrayidx2, align 4 1045 %mul3 = fmul fast float %1, %2 1046 %3 = load float, float* %arrayidx4, align 4 1047 %add34 = or i64 %mul, 1 1048 %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add34 1049 %4 = load float, float* %arrayidx6, align 4 1050 %mul7 = fmul fast float %3, %4 1051 %add8 = fadd fast float %mul3, %mul7 1052 %5 = load float, float* %arrayidx9, align 4 1053 %add1135 = or i64 %mul, 2 1054 %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1135 1055 %6 = load float, float* %arrayidx12, align 4 1056 %mul13 = fmul fast float %5, %6 1057 %add14 = fadd fast float %add8, %mul13 1058 %7 = load float, float* %arrayidx15, align 4 1059 %add1736 = or i64 %mul, 3 1060 %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1736 1061 %8 = load float, float* %arrayidx18, align 4 1062 %mul19 = fmul fast float %7, %8 1063 %add20 = fadd fast float %add14, %mul19 1064 store float %add20, float* %C.addr.038, align 4 1065 %incdec.ptr = getelementptr inbounds float, float* %C.addr.038, i64 1 1066 %inc = add nsw i64 %i.039, 1 1067 %exitcond = icmp eq i64 %inc, %0 1068 br i1 %exitcond, label %for.end, label %for.body 1069 1070for.end: 1071 ret i32 0 1072} 1073 1074@arr_i32 = global [32 x i32] zeroinitializer, align 16 1075@arr_float = global [32 x float] zeroinitializer, align 16 1076 1077define void @float_red_example4(float* %res) { 1078; CHECK-LABEL: @float_red_example4( 1079; CHECK-NEXT: entry: 1080; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 1081; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 1082; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] 1083; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 1084; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1085; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 1086; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] 1087; CHECK-NEXT: store float [[ADD_2]], float* [[RES:%.*]], align 16 1088; CHECK-NEXT: ret void 1089; 1090; STORE-LABEL: @float_red_example4( 1091; STORE-NEXT: entry: 1092; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 1093; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP0]]) 1094; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 1095; STORE-NEXT: ret void 1096; 1097entry: 1098 %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 1099 %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 1100 %add = fadd fast float %1, %0 1101 %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 1102 %add.1 = fadd fast float %2, %add 1103 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 1104 %add.2 = fadd fast float %3, %add.1 1105 store float %add.2, float* %res, align 16 1106 ret void 1107} 1108 1109define void @float_red_example8(float* %res) { 1110; CHECK-LABEL: @float_red_example8( 1111; CHECK-NEXT: entry: 1112; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 1113; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 1114; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] 1115; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 1116; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1117; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 1118; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] 1119; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 1120; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]] 1121; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 1122; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]] 1123; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 1124; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]] 1125; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 1126; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]] 1127; CHECK-NEXT: store float [[ADD_6]], float* [[RES:%.*]], align 16 1128; CHECK-NEXT: ret void 1129; 1130; STORE-LABEL: @float_red_example8( 1131; STORE-NEXT: entry: 1132; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 1133; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) 1134; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 1135; STORE-NEXT: ret void 1136; 1137entry: 1138 %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 1139 %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 1140 %add = fadd fast float %1, %0 1141 %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 1142 %add.1 = fadd fast float %2, %add 1143 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 1144 %add.2 = fadd fast float %3, %add.1 1145 %4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 1146 %add.3 = fadd fast float %4, %add.2 1147 %5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 1148 %add.4 = fadd fast float %5, %add.3 1149 %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 1150 %add.5 = fadd fast float %6, %add.4 1151 %7 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 1152 %add.6 = fadd fast float %7, %add.5 1153 store float %add.6, float* %res, align 16 1154 ret void 1155} 1156 1157define void @float_red_example16(float* %res) { 1158; CHECK-LABEL: @float_red_example16( 1159; CHECK-NEXT: entry: 1160; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 1161; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 1162; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] 1163; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 1164; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1165; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 1166; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] 1167; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 1168; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]] 1169; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 1170; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]] 1171; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 1172; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]] 1173; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 1174; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]] 1175; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16 1176; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP8]], [[ADD_6]] 1177; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4 1178; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP9]], [[ADD_7]] 1179; CHECK-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8 1180; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP10]], [[ADD_8]] 1181; CHECK-NEXT: [[TMP11:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4 1182; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP11]], [[ADD_9]] 1183; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16 1184; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP12]], [[ADD_10]] 1185; CHECK-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4 1186; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP13]], [[ADD_11]] 1187; CHECK-NEXT: [[TMP14:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8 1188; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP14]], [[ADD_12]] 1189; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4 1190; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[TMP15]], [[ADD_13]] 1191; CHECK-NEXT: store float [[ADD_14]], float* [[RES:%.*]], align 16 1192; CHECK-NEXT: ret void 1193; 1194; STORE-LABEL: @float_red_example16( 1195; STORE-NEXT: entry: 1196; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 1197; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]]) 1198; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 1199; STORE-NEXT: ret void 1200; 1201entry: 1202 %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 1203 %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 1204 %add = fadd fast float %1, %0 1205 %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 1206 %add.1 = fadd fast float %2, %add 1207 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 1208 %add.2 = fadd fast float %3, %add.1 1209 %4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 1210 %add.3 = fadd fast float %4, %add.2 1211 %5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 1212 %add.4 = fadd fast float %5, %add.3 1213 %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 1214 %add.5 = fadd fast float %6, %add.4 1215 %7 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 1216 %add.6 = fadd fast float %7, %add.5 1217 %8 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16 1218 %add.7 = fadd fast float %8, %add.6 1219 %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4 1220 %add.8 = fadd fast float %9, %add.7 1221 %10 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8 1222 %add.9 = fadd fast float %10, %add.8 1223 %11 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4 1224 %add.10 = fadd fast float %11, %add.9 1225 %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16 1226 %add.11 = fadd fast float %12, %add.10 1227 %13 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4 1228 %add.12 = fadd fast float %13, %add.11 1229 %14 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8 1230 %add.13 = fadd fast float %14, %add.12 1231 %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4 1232 %add.14 = fadd fast float %15, %add.13 1233 store float %add.14, float* %res, align 16 1234 ret void 1235} 1236 1237define void @i32_red_example4(i32* %res) { 1238; CHECK-LABEL: @i32_red_example4( 1239; CHECK-NEXT: entry: 1240; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1241; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1242; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] 1243; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1244; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] 1245; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1246; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] 1247; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16 1248; CHECK-NEXT: ret void 1249; 1250; STORE-LABEL: @i32_red_example4( 1251; STORE-NEXT: entry: 1252; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 1253; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) 1254; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 1255; STORE-NEXT: ret void 1256; 1257entry: 1258 %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1259 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1260 %add = add nsw i32 %1, %0 1261 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1262 %add.1 = add nsw i32 %2, %add 1263 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1264 %add.2 = add nsw i32 %3, %add.1 1265 store i32 %add.2, i32* %res, align 16 1266 ret void 1267} 1268 1269define void @i32_red_example8(i32* %res) { 1270; CHECK-LABEL: @i32_red_example8( 1271; CHECK-NEXT: entry: 1272; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1273; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1274; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] 1275; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1276; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] 1277; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1278; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] 1279; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1280; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] 1281; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1282; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] 1283; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1284; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] 1285; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1286; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] 1287; CHECK-NEXT: store i32 [[ADD_6]], i32* [[RES:%.*]], align 16 1288; CHECK-NEXT: ret void 1289; 1290; STORE-LABEL: @i32_red_example8( 1291; STORE-NEXT: entry: 1292; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 1293; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) 1294; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 1295; STORE-NEXT: ret void 1296; 1297entry: 1298 %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1299 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1300 %add = add nsw i32 %1, %0 1301 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1302 %add.1 = add nsw i32 %2, %add 1303 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1304 %add.2 = add nsw i32 %3, %add.1 1305 %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1306 %add.3 = add nsw i32 %4, %add.2 1307 %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1308 %add.4 = add nsw i32 %5, %add.3 1309 %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1310 %add.5 = add nsw i32 %6, %add.4 1311 %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1312 %add.6 = add nsw i32 %7, %add.5 1313 store i32 %add.6, i32* %res, align 16 1314 ret void 1315} 1316 1317define void @i32_red_example16(i32* %res) { 1318; CHECK-LABEL: @i32_red_example16( 1319; CHECK-NEXT: entry: 1320; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1321; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1322; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] 1323; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1324; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] 1325; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1326; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] 1327; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1328; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] 1329; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1330; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] 1331; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1332; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] 1333; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1334; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] 1335; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 1336; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]] 1337; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 1338; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]] 1339; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 1340; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]] 1341; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 1342; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]] 1343; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 1344; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]] 1345; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 1346; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]] 1347; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 1348; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]] 1349; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 1350; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]] 1351; CHECK-NEXT: store i32 [[ADD_14]], i32* [[RES:%.*]], align 16 1352; CHECK-NEXT: ret void 1353; 1354; STORE-LABEL: @i32_red_example16( 1355; STORE-NEXT: entry: 1356; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 1357; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP0]]) 1358; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 1359; STORE-NEXT: ret void 1360; 1361entry: 1362 %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1363 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1364 %add = add nsw i32 %1, %0 1365 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1366 %add.1 = add nsw i32 %2, %add 1367 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1368 %add.2 = add nsw i32 %3, %add.1 1369 %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1370 %add.3 = add nsw i32 %4, %add.2 1371 %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1372 %add.4 = add nsw i32 %5, %add.3 1373 %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1374 %add.5 = add nsw i32 %6, %add.4 1375 %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1376 %add.6 = add nsw i32 %7, %add.5 1377 %8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 1378 %add.7 = add nsw i32 %8, %add.6 1379 %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 1380 %add.8 = add nsw i32 %9, %add.7 1381 %10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 1382 %add.9 = add nsw i32 %10, %add.8 1383 %11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 1384 %add.10 = add nsw i32 %11, %add.9 1385 %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 1386 %add.11 = add nsw i32 %12, %add.10 1387 %13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 1388 %add.12 = add nsw i32 %13, %add.11 1389 %14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 1390 %add.13 = add nsw i32 %14, %add.12 1391 %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 1392 %add.14 = add nsw i32 %15, %add.13 1393 store i32 %add.14, i32* %res, align 16 1394 ret void 1395} 1396 1397define void @i32_red_example32(i32* %res) { 1398; CHECK-LABEL: @i32_red_example32( 1399; CHECK-NEXT: entry: 1400; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1401; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1402; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] 1403; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1404; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] 1405; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1406; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] 1407; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1408; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] 1409; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1410; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] 1411; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1412; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] 1413; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1414; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] 1415; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 1416; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]] 1417; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 1418; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]] 1419; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 1420; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]] 1421; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 1422; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]] 1423; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 1424; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]] 1425; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 1426; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]] 1427; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 1428; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]] 1429; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 1430; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]] 1431; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16 1432; CHECK-NEXT: [[ADD_15:%.*]] = add nsw i32 [[TMP16]], [[ADD_14]] 1433; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4 1434; CHECK-NEXT: [[ADD_16:%.*]] = add nsw i32 [[TMP17]], [[ADD_15]] 1435; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8 1436; CHECK-NEXT: [[ADD_17:%.*]] = add nsw i32 [[TMP18]], [[ADD_16]] 1437; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4 1438; CHECK-NEXT: [[ADD_18:%.*]] = add nsw i32 [[TMP19]], [[ADD_17]] 1439; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16 1440; CHECK-NEXT: [[ADD_19:%.*]] = add nsw i32 [[TMP20]], [[ADD_18]] 1441; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4 1442; CHECK-NEXT: [[ADD_20:%.*]] = add nsw i32 [[TMP21]], [[ADD_19]] 1443; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8 1444; CHECK-NEXT: [[ADD_21:%.*]] = add nsw i32 [[TMP22]], [[ADD_20]] 1445; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4 1446; CHECK-NEXT: [[ADD_22:%.*]] = add nsw i32 [[TMP23]], [[ADD_21]] 1447; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16 1448; CHECK-NEXT: [[ADD_23:%.*]] = add nsw i32 [[TMP24]], [[ADD_22]] 1449; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4 1450; CHECK-NEXT: [[ADD_24:%.*]] = add nsw i32 [[TMP25]], [[ADD_23]] 1451; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8 1452; CHECK-NEXT: [[ADD_25:%.*]] = add nsw i32 [[TMP26]], [[ADD_24]] 1453; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4 1454; CHECK-NEXT: [[ADD_26:%.*]] = add nsw i32 [[TMP27]], [[ADD_25]] 1455; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16 1456; CHECK-NEXT: [[ADD_27:%.*]] = add nsw i32 [[TMP28]], [[ADD_26]] 1457; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4 1458; CHECK-NEXT: [[ADD_28:%.*]] = add nsw i32 [[TMP29]], [[ADD_27]] 1459; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8 1460; CHECK-NEXT: [[ADD_29:%.*]] = add nsw i32 [[TMP30]], [[ADD_28]] 1461; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4 1462; CHECK-NEXT: [[ADD_30:%.*]] = add nsw i32 [[TMP31]], [[ADD_29]] 1463; CHECK-NEXT: store i32 [[ADD_30]], i32* [[RES:%.*]], align 16 1464; CHECK-NEXT: ret void 1465; 1466; STORE-LABEL: @i32_red_example32( 1467; STORE-NEXT: entry: 1468; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 1469; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP0]]) 1470; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 1471; STORE-NEXT: ret void 1472; 1473entry: 1474 %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1475 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1476 %add = add nsw i32 %1, %0 1477 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1478 %add.1 = add nsw i32 %2, %add 1479 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1480 %add.2 = add nsw i32 %3, %add.1 1481 %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1482 %add.3 = add nsw i32 %4, %add.2 1483 %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1484 %add.4 = add nsw i32 %5, %add.3 1485 %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1486 %add.5 = add nsw i32 %6, %add.4 1487 %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1488 %add.6 = add nsw i32 %7, %add.5 1489 %8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 1490 %add.7 = add nsw i32 %8, %add.6 1491 %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 1492 %add.8 = add nsw i32 %9, %add.7 1493 %10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 1494 %add.9 = add nsw i32 %10, %add.8 1495 %11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 1496 %add.10 = add nsw i32 %11, %add.9 1497 %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 1498 %add.11 = add nsw i32 %12, %add.10 1499 %13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 1500 %add.12 = add nsw i32 %13, %add.11 1501 %14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 1502 %add.13 = add nsw i32 %14, %add.12 1503 %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 1504 %add.14 = add nsw i32 %15, %add.13 1505 %16 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16 1506 %add.15 = add nsw i32 %16, %add.14 1507 %17 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4 1508 %add.16 = add nsw i32 %17, %add.15 1509 %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8 1510 %add.17 = add nsw i32 %18, %add.16 1511 %19 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4 1512 %add.18 = add nsw i32 %19, %add.17 1513 %20 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16 1514 %add.19 = add nsw i32 %20, %add.18 1515 %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4 1516 %add.20 = add nsw i32 %21, %add.19 1517 %22 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8 1518 %add.21 = add nsw i32 %22, %add.20 1519 %23 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4 1520 %add.22 = add nsw i32 %23, %add.21 1521 %24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16 1522 %add.23 = add nsw i32 %24, %add.22 1523 %25 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4 1524 %add.24 = add nsw i32 %25, %add.23 1525 %26 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8 1526 %add.25 = add nsw i32 %26, %add.24 1527 %27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4 1528 %add.26 = add nsw i32 %27, %add.25 1529 %28 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16 1530 %add.27 = add nsw i32 %28, %add.26 1531 %29 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4 1532 %add.28 = add nsw i32 %29, %add.27 1533 %30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8 1534 %add.29 = add nsw i32 %30, %add.28 1535 %31 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4 1536 %add.30 = add nsw i32 %31, %add.29 1537 store i32 %add.30, i32* %res, align 16 1538 ret void 1539} 1540 1541declare i32 @foobar(i32) 1542 1543define void @i32_red_call(i32 %val) { 1544; CHECK-LABEL: @i32_red_call( 1545; CHECK-NEXT: entry: 1546; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 1547; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) 1548; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) 1549; CHECK-NEXT: ret void 1550; 1551; STORE-LABEL: @i32_red_call( 1552; STORE-NEXT: entry: 1553; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 1554; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) 1555; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) 1556; STORE-NEXT: ret void 1557; 1558entry: 1559 %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1560 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1561 %add = add nsw i32 %1, %0 1562 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1563 %add.1 = add nsw i32 %2, %add 1564 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1565 %add.2 = add nsw i32 %3, %add.1 1566 %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1567 %add.3 = add nsw i32 %4, %add.2 1568 %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1569 %add.4 = add nsw i32 %5, %add.3 1570 %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1571 %add.5 = add nsw i32 %6, %add.4 1572 %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1573 %add.6 = add nsw i32 %7, %add.5 1574 %res = call i32 @foobar(i32 %add.6) 1575 ret void 1576} 1577 1578define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 { 1579; CHECK-LABEL: @i32_red_invoke( 1580; CHECK-NEXT: entry: 1581; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 1582; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) 1583; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) 1584; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] 1585; CHECK: exception: 1586; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 1587; CHECK-NEXT: cleanup 1588; CHECK-NEXT: br label [[NORMAL]] 1589; CHECK: normal: 1590; CHECK-NEXT: ret void 1591; 1592; STORE-LABEL: @i32_red_invoke( 1593; STORE-NEXT: entry: 1594; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 1595; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) 1596; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) 1597; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] 1598; STORE: exception: 1599; STORE-NEXT: [[CLEANUP:%.*]] = landingpad i8 1600; STORE-NEXT: cleanup 1601; STORE-NEXT: br label [[NORMAL]] 1602; STORE: normal: 1603; STORE-NEXT: ret void 1604; 1605entry: 1606 %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 1607 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 1608 %add = add nsw i32 %1, %0 1609 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 1610 %add.1 = add nsw i32 %2, %add 1611 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 1612 %add.2 = add nsw i32 %3, %add.1 1613 %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 1614 %add.3 = add nsw i32 %4, %add.2 1615 %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 1616 %add.4 = add nsw i32 %5, %add.3 1617 %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 1618 %add.5 = add nsw i32 %6, %add.4 1619 %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 1620 %add.6 = add nsw i32 %7, %add.5 1621 %res = invoke i32 @foobar(i32 %add.6) to label %normal unwind label %exception 1622exception: 1623 %cleanup = landingpad i8 cleanup 1624 br label %normal 1625normal: 1626 ret void 1627} 1628 1629; Test case from PR47670. Reduction result is used as incoming value in phi. 1630define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) { 1631; CHECK-LABEL: @reduction_result_used_in_phi( 1632; CHECK-NEXT: entry: 1633; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] 1634; CHECK: bb: 1635; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 1636; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 1637; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 1638; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* 1639; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1640; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) 1641; CHECK-NEXT: br label [[EXIT]] 1642; CHECK: exit: 1643; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] 1644; CHECK-NEXT: ret i32 [[SUM_1]] 1645; 1646; STORE-LABEL: @reduction_result_used_in_phi( 1647; STORE-NEXT: entry: 1648; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] 1649; STORE: bb: 1650; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 1651; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 1652; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 1653; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* 1654; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1655; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) 1656; STORE-NEXT: br label [[EXIT]] 1657; STORE: exit: 1658; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] 1659; STORE-NEXT: ret i32 [[SUM_1]] 1660; 1661entry: 1662 br i1 %b, label %bb, label %exit 1663 1664bb: 1665 %l.0 = load i32, i32* %data, align 4 1666 %idx.1 = getelementptr inbounds i32, i32* %data, i64 1 1667 %l.1 = load i32, i32* %idx.1, align 4 1668 %add.1 = add i32 %l.1, %l.0 1669 %idx.2 = getelementptr inbounds i32, i32* %data, i64 2 1670 %l.2 = load i32, i32* %idx.2, align 4 1671 %add.2 = add i32 %l.2, %add.1 1672 %idx.3 = getelementptr inbounds i32, i32* %data, i64 3 1673 %l.3 = load i32, i32* %idx.3, align 4 1674 %add.3 = add i32 %l.3, %add.2 1675 br label %exit 1676 1677exit: 1678 %sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb] 1679 ret i32 %sum.1 1680} 1681 1682define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1 zeroext %b) { 1683; CHECK-LABEL: @reduction_result_used_in_phi_loop( 1684; CHECK-NEXT: entry: 1685; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] 1686; CHECK: bb: 1687; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 1688; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 1689; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 1690; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* 1691; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1692; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) 1693; CHECK-NEXT: br label [[EXIT]] 1694; CHECK: exit: 1695; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] 1696; CHECK-NEXT: ret i32 [[SUM_1]] 1697; 1698; STORE-LABEL: @reduction_result_used_in_phi_loop( 1699; STORE-NEXT: entry: 1700; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] 1701; STORE: bb: 1702; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 1703; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 1704; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 1705; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* 1706; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1707; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) 1708; STORE-NEXT: br label [[EXIT]] 1709; STORE: exit: 1710; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] 1711; STORE-NEXT: ret i32 [[SUM_1]] 1712; 1713entry: 1714 br i1 %b, label %bb, label %exit 1715 1716bb: 1717 %l.0 = load i32, i32* %data, align 4 1718 %idx.1 = getelementptr inbounds i32, i32* %data, i64 1 1719 %l.1 = load i32, i32* %idx.1, align 4 1720 %add.1 = add i32 %l.1, %l.0 1721 %idx.2 = getelementptr inbounds i32, i32* %data, i64 2 1722 %l.2 = load i32, i32* %idx.2, align 4 1723 %add.2 = add i32 %l.2, %add.1 1724 %idx.3 = getelementptr inbounds i32, i32* %data, i64 3 1725 %l.3 = load i32, i32* %idx.3, align 4 1726 %add.3 = add i32 %l.3, %add.2 1727 br label %exit 1728 1729exit: 1730 %sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb] 1731 ret i32 %sum.1 1732} 1733 1734; Make sure we do not crash or infinite loop on ill-formed IR. 1735 1736define void @unreachable_block() { 1737; CHECK-LABEL: @unreachable_block( 1738; CHECK-NEXT: bb.0: 1739; CHECK-NEXT: br label [[BB_1:%.*]] 1740; CHECK: dead: 1741; CHECK-NEXT: [[T0:%.*]] = add i16 [[T0]], undef 1742; CHECK-NEXT: br label [[BB_1]] 1743; CHECK: bb.1: 1744; CHECK-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] 1745; CHECK-NEXT: ret void 1746; 1747; STORE-LABEL: @unreachable_block( 1748; STORE-NEXT: bb.0: 1749; STORE-NEXT: br label [[BB_1:%.*]] 1750; STORE: dead: 1751; STORE-NEXT: [[T0:%.*]] = add i16 [[T0]], undef 1752; STORE-NEXT: br label [[BB_1]] 1753; STORE: bb.1: 1754; STORE-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] 1755; STORE-NEXT: ret void 1756; 1757bb.0: 1758 br label %bb.1 1759 1760dead: 1761 %t0 = add i16 %t0, undef ; unreachable IR may depend on itself 1762 br label %bb.1 1763 1764bb.1: 1765 %t1 = phi i16 [ undef, %bb.0 ], [ %t0, %dead ] 1766 ret void 1767} 1768 1769declare i32 @__gxx_personality_v0(...) 1770