1target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" 2target triple = "powerpc64-bgq-linux" 3; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s 4; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s 5 6@aa = external global [256 x [256 x double]], align 32 7@bb = external global [256 x [256 x double]], align 32 8@cc = external global [256 x [256 x double]], align 32 9@.str1 = external hidden unnamed_addr constant [6 x i8], align 1 10@X = external global [16000 x double], align 32 11@Y = external global [16000 x double], align 32 12@Z = external global [16000 x double], align 32 13@U = external global [16000 x double], align 32 14@V = external global [16000 x double], align 32 15@.str137 = external hidden unnamed_addr constant [14 x i8], align 1 16 17declare void @check(i32 signext) nounwind 18 19declare signext i32 @printf(i8* nocapture, ...) nounwind 20 21declare signext i32 @init(i8*) nounwind 22 23define signext i32 @s000() nounwind { 24entry: 25 %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i64 0, i64 0)) 26 %call1 = tail call i64 @clock() nounwind 27 br label %for.cond2.preheader 28 29; CHECK: @s000 30 31for.cond2.preheader: ; preds = %for.end, %entry 32 %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ] 33 br label %for.body4 34 35for.body4: ; preds = %for.body4, %for.cond2.preheader 36 %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ] 37 %arrayidx = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv 38 %arrayidx6 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv 39 %0 = bitcast double* %arrayidx to <1 x double>* 40 %1 = load <1 x double>, <1 x double>* %0, align 32 41 %add = fadd <1 x double> %1, <double 1.000000e+00> 42 %2 = bitcast double* %arrayidx6 to <1 x double>* 43 store <1 x double> %add, <1 x double>* %2, align 32 44 %indvars.iv.next.322 = or i64 %indvars.iv, 4 45 %arrayidx.4 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322 46 %arrayidx6.4 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322 47 %3 = bitcast double* %arrayidx.4 to <1 x double>* 48 %4 = load <1 x double>, <1 x double>* %3, align 32 49 %add.4 = fadd <1 x double> %4, <double 1.000000e+00> 50 %5 = bitcast double* %arrayidx6.4 to <1 x double>* 51 store <1 x double> %add.4, <1 x double>* %5, align 32 52 %indvars.iv.next.726 = or i64 %indvars.iv, 8 53 %arrayidx.8 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726 54 %arrayidx6.8 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726 55 %6 = bitcast double* %arrayidx.8 to <1 x double>* 56 %7 = load <1 x double>, <1 x double>* %6, align 32 57 %add.8 = fadd <1 x double> %7, <double 1.000000e+00> 58 %8 = bitcast double* %arrayidx6.8 to <1 x double>* 59 store <1 x double> %add.8, <1 x double>* %8, align 32 60 %indvars.iv.next.1130 = or i64 %indvars.iv, 12 61 %arrayidx.12 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130 62 %arrayidx6.12 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130 63 %9 = bitcast double* %arrayidx.12 to <1 x double>* 64 %10 = load <1 x double>, <1 x double>* %9, align 32 65 %add.12 = fadd <1 x double> %10, <double 1.000000e+00> 66 %11 = bitcast double* %arrayidx6.12 to <1 x double>* 67 store <1 x double> %add.12, <1 x double>* %11, align 32 68 %indvars.iv.next.15 = add i64 %indvars.iv, 16 69 %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32 70 %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000 71 br i1 %exitcond.15, label %for.end, label %for.body4 72 73; All of the loads should come before all of the stores. 74; CHECK: mtctr 75; CHECK: stfd 76; CHECK-NOT: lfd 77; CHECK: bdnz 78 79for.end: ; preds = %for.body4 80 %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double], [16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind 81 %inc9 = add nsw i32 %nl.018, 1 82 %exitcond = icmp eq i32 %inc9, 400000 83 br i1 %exitcond, label %for.end10, label %for.cond2.preheader 84 85for.end10: ; preds = %for.end 86 %call11 = tail call i64 @clock() nounwind 87 %sub = sub nsw i64 %call11, %call1 88 %conv = sitofp i64 %sub to double 89 %div = fdiv double %conv, 1.000000e+06 90 %call12 = tail call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind 91 tail call void @check(i32 signext 1) 92 ret i32 0 93} 94 95declare i64 @clock() nounwind 96 97declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double) 98