1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s 2; 3; Verify that misched resource/latency balancy heuristics are sane. 4 5define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, 6 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, 7 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) 8 nounwind uwtable ssp { 9entry: 10 br label %for.body 11 12; imull folded loads should be in order and interleaved with addl, never 13; adjacent. Also check that we have no spilling. 14; 15; Since mmult1 IR is already in good order, this effectively ensure 16; the scheduler maintains source order. 17; 18; CHECK-LABEL: %for.body 19; CHECK-NOT: %rsp 20; CHECK: imull 4 21; CHECK-NOT: {{imull|rsp}} 22; CHECK: addl 23; CHECK: imull 8 24; CHECK-NOT: {{imull|rsp}} 25; CHECK: addl 26; CHECK: imull 12 27; CHECK-NOT: {{imull|rsp}} 28; CHECK: addl 29; CHECK: imull 16 30; CHECK-NOT: {{imull|rsp}} 31; CHECK: addl 32; CHECK: imull 20 33; CHECK-NOT: {{imull|rsp}} 34; CHECK: addl 35; CHECK: imull 24 36; CHECK-NOT: {{imull|rsp}} 37; CHECK: addl 38; CHECK: imull 28 39; CHECK-NOT: {{imull|rsp}} 40; CHECK: addl 41; CHECK: imull 32 42; CHECK-NOT: {{imull|rsp}} 43; CHECK: addl 44; CHECK: imull 36 45; CHECK-NOT: {{imull|rsp}} 46; CHECK: addl 47; CHECK-NOT: {{imull|rsp}} 48; CHECK-LABEL: %end 49for.body: 50 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ] 51 %tmp57 = load i32, i32* %tmp56, align 4 52 %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i 53 %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4 54 %mul.us.i = mul nsw i32 %tmp58, %tmp57 55 %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1 56 %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4 57 %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i 58 %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4 59 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59 60 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i 61 %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2 62 %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4 63 %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i 64 %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4 65 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61 66 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1 67 %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3 68 %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4 69 %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i 70 %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4 71 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63 72 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2 73 %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4 74 %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4 75 %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i 76 %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4 77 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65 78 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3 79 %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5 80 %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4 81 %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i 82 %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4 83 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67 84 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4 85 %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6 86 %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4 87 %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i 88 %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4 89 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69 90 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5 91 %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7 92 %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4 93 %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i 94 %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4 95 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71 96 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6 97 %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8 98 %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4 99 %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i 100 %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4 101 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73 102 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7 103 %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9 104 %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4 105 %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i 106 %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4 107 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75 108 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8 109 %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i 110 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4 111 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1 112 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32 113 %exitcond = icmp eq i32 %lftr.wideiv, 10 114 br i1 %exitcond, label %end, label %for.body 115 116end: 117 ret void 118} 119 120; Unlike the above loop, this IR starts out bad and must be 121; rescheduled. 122; 123; CHECK-LABEL: %for.body 124; CHECK-NOT: %rsp 125; CHECK: imull 4 126; CHECK-NOT: {{imull|rsp}} 127; CHECK: addl 128; CHECK: imull 8 129; CHECK-NOT: {{imull|rsp}} 130; CHECK: addl 131; CHECK: imull 12 132; CHECK-NOT: {{imull|rsp}} 133; CHECK: addl 134; CHECK: imull 16 135; CHECK-NOT: {{imull|rsp}} 136; CHECK: addl 137; CHECK: imull 20 138; CHECK-NOT: {{imull|rsp}} 139; CHECK: addl 140; CHECK: imull 24 141; CHECK-NOT: {{imull|rsp}} 142; CHECK: addl 143; CHECK: imull 28 144; CHECK-NOT: {{imull|rsp}} 145; CHECK: addl 146; CHECK: imull 32 147; CHECK-NOT: {{imull|rsp}} 148; CHECK: addl 149; CHECK: imull 36 150; CHECK-NOT: {{imull|rsp}} 151; CHECK: addl 152; CHECK-NOT: {{imull|rsp}} 153; CHECK-LABEL: %end 154define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, 155 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, 156 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) 157 nounwind uwtable ssp { 158entry: 159 br label %for.body 160for.body: 161 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ] 162 %tmp57 = load i32, i32* %tmp56, align 4 163 %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i 164 %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4 165 %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1 166 %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4 167 %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i 168 %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4 169 %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2 170 %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4 171 %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i 172 %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4 173 %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3 174 %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4 175 %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i 176 %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4 177 %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4 178 %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4 179 %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i 180 %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4 181 %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5 182 %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4 183 %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i 184 %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4 185 %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6 186 %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4 187 %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i 188 %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4 189 %mul.us.i = mul nsw i32 %tmp58, %tmp57 190 %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7 191 %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4 192 %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i 193 %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4 194 %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8 195 %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4 196 %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i 197 %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4 198 %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9 199 %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4 200 %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i 201 %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4 202 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59 203 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i 204 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61 205 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1 206 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63 207 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2 208 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65 209 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3 210 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67 211 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4 212 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69 213 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5 214 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71 215 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6 216 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73 217 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7 218 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75 219 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8 220 %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i 221 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4 222 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1 223 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32 224 %exitcond = icmp eq i32 %lftr.wideiv, 10 225 br i1 %exitcond, label %end, label %for.body 226 227end: 228 ret void 229} 230 231; A mildly interesting little block extracted from a cipher. The 232; balanced heuristics are interesting here because we have resource, 233; latency, and register limits all at once. For now, simply check that 234; we don't use any callee-saves. 235; CHECK-LABEL: @encpc1 236; CHECK-LABEL: %entry 237; CHECK-NOT: push 238; CHECK-NOT: pop 239; CHECK: ret 240@a = external global i32, align 4 241@b = external global i32, align 4 242@c = external global i32, align 4 243@d = external global i32, align 4 244define i32 @encpc1() nounwind { 245entry: 246 %l1 = load i32, i32* @a, align 16 247 %conv = shl i32 %l1, 8 248 %s5 = lshr i32 %l1, 8 249 %add = or i32 %conv, %s5 250 store i32 %add, i32* @b 251 %l6 = load i32, i32* @a 252 %l7 = load i32, i32* @c 253 %add.i = add i32 %l7, %l6 254 %idxprom.i = zext i32 %l7 to i64 255 %arrayidx.i = getelementptr inbounds i32, i32* @d, i64 %idxprom.i 256 %l8 = load i32, i32* %arrayidx.i 257 store i32 346, i32* @c 258 store i32 20021, i32* @d 259 %l9 = load i32, i32* @a 260 store i32 %l8, i32* @a 261 store i32 %l9, i32* @b 262 store i32 %add.i, i32* @c 263 store i32 %l9, i32* @d 264 %cmp.i = icmp eq i32 %add.i, 0 265 %s10 = lshr i32 %l1, 16 266 %s12 = lshr i32 %l1, 24 267 %s14 = lshr i32 %l1, 30 268 br i1 %cmp.i, label %if, label %return 269if: 270 %sa = add i32 %s5, %s10 271 %sb = add i32 %sa, %s12 272 %sc = add i32 %sb, %s14 273 br label %return 274return: 275 %result = phi i32 [0, %entry], [%sc, %if] 276 ret i32 %result 277} 278