1; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s 2; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s 3 4; ModuleID = 'tsc.c' 5target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" 6target triple = "powerpc64-unknown-linux-gnu" 7 8@a = common global [32000 x float] zeroinitializer, align 16 9@b = common global [32000 x float] zeroinitializer, align 16 10@c = common global [32000 x float] zeroinitializer, align 16 11@d = common global [32000 x float] zeroinitializer, align 16 12@e = common global [32000 x float] zeroinitializer, align 16 13@aa = common global [256 x [256 x float]] zeroinitializer, align 16 14@bb = common global [256 x [256 x float]] zeroinitializer, align 16 15@cc = common global [256 x [256 x float]] zeroinitializer, align 16 16 17@.str11 = private unnamed_addr constant [6 x i8] c"s122 \00", align 1 18@.str152 = private unnamed_addr constant [14 x i8] c"S122\09 %.2f \09\09\00", align 1 19 20declare i32 @printf(i8* nocapture, ...) nounwind 21declare i32 @init(i8* %name) nounwind 22declare i64 @clock() nounwind 23declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float) 24declare void @check(i32 %name) nounwind 25 26; CHECK: mfcr 27; CHECK: mtcr 28 29define i32 @s122(i32 %n1, i32 %n3) nounwind { 30entry: 31 %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str11, i64 0, i64 0)) 32 %call1 = tail call i64 @clock() nounwind 33 %sub = add nsw i32 %n1, -1 34 %cmp316 = icmp slt i32 %sub, 32000 35 br i1 %cmp316, label %entry.split.us, label %for.end.7 36 37entry.split.us: ; preds = %entry 38 %0 = sext i32 %sub to i64 39 %1 = sext i32 %n3 to i64 40 br label %for.body4.lr.ph.us 41 42for.body4.us: ; preds = %for.body4.lr.ph.us, %for.body4.us 43 %indvars.iv20 = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next21, %for.body4.us ] 44 %indvars.iv = phi i64 [ %0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ] 45 %indvars.iv.next21 = add i64 %indvars.iv20, 1 46 %sub5.us = sub i64 31999, %indvars.iv20 47 %sext = shl i64 %sub5.us, 32 48 %idxprom.us = ashr exact i64 %sext, 32 49 %arrayidx.us = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us 50 %2 = load float, float* %arrayidx.us, align 4 51 %arrayidx7.us = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv 52 %3 = load float, float* %arrayidx7.us, align 4 53 %add8.us = fadd float %3, %2 54 store float %add8.us, float* %arrayidx7.us, align 4 55 %indvars.iv.next = add i64 %indvars.iv, %1 56 %4 = trunc i64 %indvars.iv.next to i32 57 %cmp3.us = icmp slt i32 %4, 32000 58 br i1 %cmp3.us, label %for.body4.us, label %for.body4.lr.ph.us.1 59 60for.body4.lr.ph.us: ; preds = %entry.split.us, %for.end.us.4 61 %nl.019.us = phi i32 [ 0, %entry.split.us ], [ %inc.us.4, %for.end.us.4 ] 62 br label %for.body4.us 63 64for.end12: ; preds = %for.end.7, %for.end.us.4 65 %call13 = tail call i64 @clock() nounwind 66 %sub14 = sub nsw i64 %call13, %call1 67 %conv = sitofp i64 %sub14 to double 68 %div = fdiv double %conv, 1.000000e+06 69 %call15 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind 70 tail call void @check(i32 1) 71 ret i32 0 72 73for.body4.lr.ph.us.1: ; preds = %for.body4.us 74 %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 75 br label %for.body4.us.1 76 77for.body4.us.1: ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1 78 %indvars.iv20.1 = phi i64 [ 0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next21.1, %for.body4.us.1 ] 79 %indvars.iv.1 = phi i64 [ %0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next.1, %for.body4.us.1 ] 80 %indvars.iv.next21.1 = add i64 %indvars.iv20.1, 1 81 %sub5.us.1 = sub i64 31999, %indvars.iv20.1 82 %sext23 = shl i64 %sub5.us.1, 32 83 %idxprom.us.1 = ashr exact i64 %sext23, 32 84 %arrayidx.us.1 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.1 85 %5 = load float, float* %arrayidx.us.1, align 4 86 %arrayidx7.us.1 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.1 87 %6 = load float, float* %arrayidx7.us.1, align 4 88 %add8.us.1 = fadd float %6, %5 89 store float %add8.us.1, float* %arrayidx7.us.1, align 4 90 %indvars.iv.next.1 = add i64 %indvars.iv.1, %1 91 %7 = trunc i64 %indvars.iv.next.1 to i32 92 %cmp3.us.1 = icmp slt i32 %7, 32000 93 br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2 94 95for.body4.lr.ph.us.2: ; preds = %for.body4.us.1 96 %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 97 br label %for.body4.us.2 98 99for.body4.us.2: ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2 100 %indvars.iv20.2 = phi i64 [ 0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next21.2, %for.body4.us.2 ] 101 %indvars.iv.2 = phi i64 [ %0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next.2, %for.body4.us.2 ] 102 %indvars.iv.next21.2 = add i64 %indvars.iv20.2, 1 103 %sub5.us.2 = sub i64 31999, %indvars.iv20.2 104 %sext24 = shl i64 %sub5.us.2, 32 105 %idxprom.us.2 = ashr exact i64 %sext24, 32 106 %arrayidx.us.2 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.2 107 %8 = load float, float* %arrayidx.us.2, align 4 108 %arrayidx7.us.2 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.2 109 %9 = load float, float* %arrayidx7.us.2, align 4 110 %add8.us.2 = fadd float %9, %8 111 store float %add8.us.2, float* %arrayidx7.us.2, align 4 112 %indvars.iv.next.2 = add i64 %indvars.iv.2, %1 113 %10 = trunc i64 %indvars.iv.next.2 to i32 114 %cmp3.us.2 = icmp slt i32 %10, 32000 115 br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3 116 117for.body4.lr.ph.us.3: ; preds = %for.body4.us.2 118 %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 119 br label %for.body4.us.3 120 121for.body4.us.3: ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3 122 %indvars.iv20.3 = phi i64 [ 0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next21.3, %for.body4.us.3 ] 123 %indvars.iv.3 = phi i64 [ %0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next.3, %for.body4.us.3 ] 124 %indvars.iv.next21.3 = add i64 %indvars.iv20.3, 1 125 %sub5.us.3 = sub i64 31999, %indvars.iv20.3 126 %sext25 = shl i64 %sub5.us.3, 32 127 %idxprom.us.3 = ashr exact i64 %sext25, 32 128 %arrayidx.us.3 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.3 129 %11 = load float, float* %arrayidx.us.3, align 4 130 %arrayidx7.us.3 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.3 131 %12 = load float, float* %arrayidx7.us.3, align 4 132 %add8.us.3 = fadd float %12, %11 133 store float %add8.us.3, float* %arrayidx7.us.3, align 4 134 %indvars.iv.next.3 = add i64 %indvars.iv.3, %1 135 %13 = trunc i64 %indvars.iv.next.3 to i32 136 %cmp3.us.3 = icmp slt i32 %13, 32000 137 br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4 138 139for.body4.lr.ph.us.4: ; preds = %for.body4.us.3 140 %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 141 br label %for.body4.us.4 142 143for.body4.us.4: ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4 144 %indvars.iv20.4 = phi i64 [ 0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next21.4, %for.body4.us.4 ] 145 %indvars.iv.4 = phi i64 [ %0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next.4, %for.body4.us.4 ] 146 %indvars.iv.next21.4 = add i64 %indvars.iv20.4, 1 147 %sub5.us.4 = sub i64 31999, %indvars.iv20.4 148 %sext26 = shl i64 %sub5.us.4, 32 149 %idxprom.us.4 = ashr exact i64 %sext26, 32 150 %arrayidx.us.4 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.4 151 %14 = load float, float* %arrayidx.us.4, align 4 152 %arrayidx7.us.4 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.4 153 %15 = load float, float* %arrayidx7.us.4, align 4 154 %add8.us.4 = fadd float %15, %14 155 store float %add8.us.4, float* %arrayidx7.us.4, align 4 156 %indvars.iv.next.4 = add i64 %indvars.iv.4, %1 157 %16 = trunc i64 %indvars.iv.next.4 to i32 158 %cmp3.us.4 = icmp slt i32 %16, 32000 159 br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4 160 161for.end.us.4: ; preds = %for.body4.us.4 162 %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 163 %inc.us.4 = add nsw i32 %nl.019.us, 5 164 %exitcond.4 = icmp eq i32 %inc.us.4, 200000 165 br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us 166 167for.end.7: ; preds = %entry, %for.end.7 168 %nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ] 169 %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 170 %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 171 %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 172 %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 173 %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 174 %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 175 %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 176 %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind 177 %inc.7 = add nsw i32 %nl.019, 8 178 %exitcond.7 = icmp eq i32 %inc.7, 200000 179 br i1 %exitcond.7, label %for.end12, label %for.end.7 180} 181 182declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind 183 184declare i32 @puts(i8* nocapture) nounwind 185 186!3 = !{!"branch_weights", i32 64, i32 4} 187