1; RUN: llc -march=x86-64 < %s -block-placement-exit-block-bias=20 -no-phi-elim-live-out-early-exit | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 3target triple = "x86_64-apple-macosx10.9.0" 4 5; This is longest_match, the hot function from zlib's deflate implementation. 6 7%struct.internal_state = type { %struct.z_stream_s*, i32, i8*, i64, i8*, i32, i32, %struct.gz_header_s*, i32, i8, i32, i32, i32, i32, i8*, i64, i16*, i16*, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i64, i64, i32, i32, i16, i32, i64 } 8%struct.z_stream_s = type { i8*, i32, i64, i8*, i32, i64, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i64, i64 } 9%struct.gz_header_s = type { i32, i64, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 } 10%struct.ct_data_s = type { %union.anon, %union.anon.0 } 11%union.anon = type { i16 } 12%union.anon.0 = type { i16 } 13%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc_s* } 14%struct.static_tree_desc_s = type { i32 } 15 16; CHECK-LABEL: longest_match: 17 18; Verify that there are no spills or reloads in the loop exit block. This loop 19; is mostly cold, only %do.cond125 and %land.rhs131 are hot. 20; CHECK: %do.cond125 21; CHECK-NOT: {{Spill|Reload}} 22; CHECK: jbe 23 24; Verify that block placement doesn't destroy source order. It's important that 25; the two hot blocks are laid out close to each other. 26; CHECK-NEXT: %land.rhs131 27; CHECK: jne 28define i32 @longest_match(%struct.internal_state* nocapture %s, i32 %cur_match) nounwind { 29entry: 30 %max_chain_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 31 31 %0 = load i32, i32* %max_chain_length, align 4 32 %window = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 14 33 %1 = load i8*, i8** %window, align 8 34 %strstart = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 27 35 %2 = load i32, i32* %strstart, align 4 36 %idx.ext = zext i32 %2 to i64 37 %add.ptr = getelementptr inbounds i8, i8* %1, i64 %idx.ext 38 %prev_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 30 39 %3 = load i32, i32* %prev_length, align 4 40 %nice_match1 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 36 41 %4 = load i32, i32* %nice_match1, align 4 42 %w_size = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 11 43 %5 = load i32, i32* %w_size, align 4 44 %sub = add i32 %5, -262 45 %cmp = icmp ugt i32 %2, %sub 46 %sub6 = sub i32 %2, %sub 47 %sub6. = select i1 %cmp, i32 %sub6, i32 0 48 %prev7 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 16 49 %6 = load i16*, i16** %prev7, align 8 50 %w_mask = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 13 51 %7 = load i32, i32* %w_mask, align 4 52 %add.ptr11.sum = add i64 %idx.ext, 258 53 %add.ptr12 = getelementptr inbounds i8, i8* %1, i64 %add.ptr11.sum 54 %sub13 = add nsw i32 %3, -1 55 %idxprom = sext i32 %sub13 to i64 56 %add.ptr.sum = add i64 %idxprom, %idx.ext 57 %arrayidx = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum 58 %8 = load i8, i8* %arrayidx, align 1 59 %idxprom14 = sext i32 %3 to i64 60 %add.ptr.sum213 = add i64 %idxprom14, %idx.ext 61 %arrayidx15 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum213 62 %9 = load i8, i8* %arrayidx15, align 1 63 %good_match = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 35 64 %10 = load i32, i32* %good_match, align 4 65 %cmp17 = icmp ult i32 %3, %10 66 %shr = lshr i32 %0, 2 67 %chain_length.0 = select i1 %cmp17, i32 %0, i32 %shr 68 %lookahead = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 29 69 %11 = load i32, i32* %lookahead, align 4 70 %cmp18 = icmp ugt i32 %4, %11 71 %. = select i1 %cmp18, i32 %11, i32 %4 72 %match_start = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 28 73 %add.ptr.sum217 = add i64 %idx.ext, 1 74 %arrayidx44 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum217 75 %add.ptr.sum218 = add i64 %idx.ext, 2 76 %add.ptr50 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum218 77 %sub.ptr.lhs.cast = ptrtoint i8* %add.ptr12 to i64 78 br label %do.body 79 80do.body: ; preds = %land.rhs131, %entry 81 %best_len.0 = phi i32 [ %best_len.1, %land.rhs131 ], [ %3, %entry ] 82 %chain_length.1 = phi i32 [ %dec, %land.rhs131 ], [ %chain_length.0, %entry ] 83 %cur_match.addr.0 = phi i32 [ %conv128, %land.rhs131 ], [ %cur_match, %entry ] 84 %scan_end1.0 = phi i8 [ %scan_end1.1, %land.rhs131 ], [ %8, %entry ] 85 %scan_end.0 = phi i8 [ %scan_end.1, %land.rhs131 ], [ %9, %entry ] 86 %idx.ext23 = zext i32 %cur_match.addr.0 to i64 87 %add.ptr24 = getelementptr inbounds i8, i8* %1, i64 %idx.ext23 88 %idxprom25 = sext i32 %best_len.0 to i64 89 %add.ptr24.sum = add i64 %idx.ext23, %idxprom25 90 %arrayidx26 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum 91 %12 = load i8, i8* %arrayidx26, align 1 92 %cmp28 = icmp eq i8 %12, %scan_end.0 93 br i1 %cmp28, label %lor.lhs.false, label %do.cond125 94 95lor.lhs.false: ; preds = %do.body 96 %sub30 = add nsw i32 %best_len.0, -1 97 %idxprom31 = sext i32 %sub30 to i64 98 %add.ptr24.sum214 = add i64 %idx.ext23, %idxprom31 99 %arrayidx32 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum214 100 %13 = load i8, i8* %arrayidx32, align 1 101 %cmp35 = icmp eq i8 %13, %scan_end1.0 102 br i1 %cmp35, label %lor.lhs.false37, label %do.cond125 103 104lor.lhs.false37: ; preds = %lor.lhs.false 105 %14 = load i8, i8* %add.ptr24, align 1 106 %15 = load i8, i8* %add.ptr, align 1 107 %cmp40 = icmp eq i8 %14, %15 108 br i1 %cmp40, label %lor.lhs.false42, label %do.cond125 109 110lor.lhs.false42: ; preds = %lor.lhs.false37 111 %add.ptr24.sum215 = add i64 %idx.ext23, 1 112 %incdec.ptr = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum215 113 %16 = load i8, i8* %incdec.ptr, align 1 114 %17 = load i8, i8* %arrayidx44, align 1 115 %cmp46 = icmp eq i8 %16, %17 116 br i1 %cmp46, label %if.end49, label %do.cond125 117 118if.end49: ; preds = %lor.lhs.false42 119 %incdec.ptr.sum = add i64 %idx.ext23, 2 120 %incdec.ptr51 = getelementptr inbounds i8, i8* %1, i64 %incdec.ptr.sum 121 br label %do.cond 122 123do.cond: ; preds = %land.lhs.true100, %if.end49 124 %match.0 = phi i8* [ %incdec.ptr51, %if.end49 ], [ %incdec.ptr103, %land.lhs.true100 ] 125 %scan.1 = phi i8* [ %add.ptr50, %if.end49 ], [ %incdec.ptr101, %land.lhs.true100 ] 126 %incdec.ptr53 = getelementptr inbounds i8, i8* %scan.1, i64 1 127 %18 = load i8, i8* %incdec.ptr53, align 1 128 %incdec.ptr55 = getelementptr inbounds i8, i8* %match.0, i64 1 129 %19 = load i8, i8* %incdec.ptr55, align 1 130 %cmp57 = icmp eq i8 %18, %19 131 br i1 %cmp57, label %land.lhs.true, label %do.end 132 133land.lhs.true: ; preds = %do.cond 134 %incdec.ptr59 = getelementptr inbounds i8, i8* %scan.1, i64 2 135 %20 = load i8, i8* %incdec.ptr59, align 1 136 %incdec.ptr61 = getelementptr inbounds i8, i8* %match.0, i64 2 137 %21 = load i8, i8* %incdec.ptr61, align 1 138 %cmp63 = icmp eq i8 %20, %21 139 br i1 %cmp63, label %land.lhs.true65, label %do.end 140 141land.lhs.true65: ; preds = %land.lhs.true 142 %incdec.ptr66 = getelementptr inbounds i8, i8* %scan.1, i64 3 143 %22 = load i8, i8* %incdec.ptr66, align 1 144 %incdec.ptr68 = getelementptr inbounds i8, i8* %match.0, i64 3 145 %23 = load i8, i8* %incdec.ptr68, align 1 146 %cmp70 = icmp eq i8 %22, %23 147 br i1 %cmp70, label %land.lhs.true72, label %do.end 148 149land.lhs.true72: ; preds = %land.lhs.true65 150 %incdec.ptr73 = getelementptr inbounds i8, i8* %scan.1, i64 4 151 %24 = load i8, i8* %incdec.ptr73, align 1 152 %incdec.ptr75 = getelementptr inbounds i8, i8* %match.0, i64 4 153 %25 = load i8, i8* %incdec.ptr75, align 1 154 %cmp77 = icmp eq i8 %24, %25 155 br i1 %cmp77, label %land.lhs.true79, label %do.end 156 157land.lhs.true79: ; preds = %land.lhs.true72 158 %incdec.ptr80 = getelementptr inbounds i8, i8* %scan.1, i64 5 159 %26 = load i8, i8* %incdec.ptr80, align 1 160 %incdec.ptr82 = getelementptr inbounds i8, i8* %match.0, i64 5 161 %27 = load i8, i8* %incdec.ptr82, align 1 162 %cmp84 = icmp eq i8 %26, %27 163 br i1 %cmp84, label %land.lhs.true86, label %do.end 164 165land.lhs.true86: ; preds = %land.lhs.true79 166 %incdec.ptr87 = getelementptr inbounds i8, i8* %scan.1, i64 6 167 %28 = load i8, i8* %incdec.ptr87, align 1 168 %incdec.ptr89 = getelementptr inbounds i8, i8* %match.0, i64 6 169 %29 = load i8, i8* %incdec.ptr89, align 1 170 %cmp91 = icmp eq i8 %28, %29 171 br i1 %cmp91, label %land.lhs.true93, label %do.end 172 173land.lhs.true93: ; preds = %land.lhs.true86 174 %incdec.ptr94 = getelementptr inbounds i8, i8* %scan.1, i64 7 175 %30 = load i8, i8* %incdec.ptr94, align 1 176 %incdec.ptr96 = getelementptr inbounds i8, i8* %match.0, i64 7 177 %31 = load i8, i8* %incdec.ptr96, align 1 178 %cmp98 = icmp eq i8 %30, %31 179 br i1 %cmp98, label %land.lhs.true100, label %do.end 180 181land.lhs.true100: ; preds = %land.lhs.true93 182 %incdec.ptr101 = getelementptr inbounds i8, i8* %scan.1, i64 8 183 %32 = load i8, i8* %incdec.ptr101, align 1 184 %incdec.ptr103 = getelementptr inbounds i8, i8* %match.0, i64 8 185 %33 = load i8, i8* %incdec.ptr103, align 1 186 %cmp105 = icmp eq i8 %32, %33 187 %cmp107 = icmp ult i8* %incdec.ptr101, %add.ptr12 188 %or.cond = and i1 %cmp105, %cmp107 189 br i1 %or.cond, label %do.cond, label %do.end 190 191do.end: ; preds = %land.lhs.true100, %land.lhs.true93, %land.lhs.true86, %land.lhs.true79, %land.lhs.true72, %land.lhs.true65, %land.lhs.true, %do.cond 192 %scan.2 = phi i8* [ %incdec.ptr101, %land.lhs.true100 ], [ %incdec.ptr94, %land.lhs.true93 ], [ %incdec.ptr87, %land.lhs.true86 ], [ %incdec.ptr80, %land.lhs.true79 ], [ %incdec.ptr73, %land.lhs.true72 ], [ %incdec.ptr66, %land.lhs.true65 ], [ %incdec.ptr59, %land.lhs.true ], [ %incdec.ptr53, %do.cond ] 193 %sub.ptr.rhs.cast = ptrtoint i8* %scan.2 to i64 194 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast 195 %conv109 = trunc i64 %sub.ptr.sub to i32 196 %sub110 = sub nsw i32 258, %conv109 197 %cmp112 = icmp sgt i32 %sub110, %best_len.0 198 br i1 %cmp112, label %if.then114, label %do.cond125 199 200if.then114: ; preds = %do.end 201 store i32 %cur_match.addr.0, i32* %match_start, align 4 202 %cmp115 = icmp slt i32 %sub110, %. 203 br i1 %cmp115, label %if.end118, label %do.end135 204 205if.end118: ; preds = %if.then114 206 %sub119 = add nsw i32 %sub110, -1 207 %idxprom120 = sext i32 %sub119 to i64 208 %add.ptr111.sum = add i64 %idxprom120, %idx.ext 209 %arrayidx121 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum 210 %34 = load i8, i8* %arrayidx121, align 1 211 %idxprom122 = sext i32 %sub110 to i64 212 %add.ptr111.sum216 = add i64 %idxprom122, %idx.ext 213 %arrayidx123 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum216 214 %35 = load i8, i8* %arrayidx123, align 1 215 br label %do.cond125 216 217do.cond125: ; preds = %if.end118, %do.end, %lor.lhs.false42, %lor.lhs.false37, %lor.lhs.false, %do.body 218 %best_len.1 = phi i32 [ %best_len.0, %do.body ], [ %best_len.0, %lor.lhs.false ], [ %best_len.0, %lor.lhs.false37 ], [ %best_len.0, %lor.lhs.false42 ], [ %sub110, %if.end118 ], [ %best_len.0, %do.end ] 219 %scan_end1.1 = phi i8 [ %scan_end1.0, %do.body ], [ %scan_end1.0, %lor.lhs.false ], [ %scan_end1.0, %lor.lhs.false37 ], [ %scan_end1.0, %lor.lhs.false42 ], [ %34, %if.end118 ], [ %scan_end1.0, %do.end ] 220 %scan_end.1 = phi i8 [ %scan_end.0, %do.body ], [ %scan_end.0, %lor.lhs.false ], [ %scan_end.0, %lor.lhs.false37 ], [ %scan_end.0, %lor.lhs.false42 ], [ %35, %if.end118 ], [ %scan_end.0, %do.end ] 221 %and = and i32 %cur_match.addr.0, %7 222 %idxprom126 = zext i32 %and to i64 223 %arrayidx127 = getelementptr inbounds i16, i16* %6, i64 %idxprom126 224 %36 = load i16, i16* %arrayidx127, align 2 225 %conv128 = zext i16 %36 to i32 226 %cmp129 = icmp ugt i32 %conv128, %sub6. 227 br i1 %cmp129, label %land.rhs131, label %do.end135 228 229land.rhs131: ; preds = %do.cond125 230 %dec = add i32 %chain_length.1, -1 231 %cmp132 = icmp eq i32 %dec, 0 232 br i1 %cmp132, label %do.end135, label %do.body 233 234do.end135: ; preds = %land.rhs131, %do.cond125, %if.then114 235 %best_len.2 = phi i32 [ %best_len.1, %land.rhs131 ], [ %best_len.1, %do.cond125 ], [ %sub110, %if.then114 ] 236 %cmp137 = icmp ugt i32 %best_len.2, %11 237 %.best_len.2 = select i1 %cmp137, i32 %11, i32 %best_len.2 238 ret i32 %.best_len.2 239} 240