1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the LinearScan class, which performs the linear-scan
12 /// register allocation after liveness analysis has been performed.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceRegAlloc.h"
17
18 #include "IceBitVector.h"
19 #include "IceCfg.h"
20 #include "IceCfgNode.h"
21 #include "IceInst.h"
22 #include "IceInstVarIter.h"
23 #include "IceOperand.h"
24 #include "IceTargetLowering.h"
25
26 #include "llvm/Support/Format.h"
27
28 namespace Ice {
29
30 namespace {
31
32 // Returns true if Var has any definitions within Item's live range.
33 // TODO(stichnot): Consider trimming the Definitions list similar to how the
34 // live ranges are trimmed, since all the overlapsDefs() tests are whether some
35 // variable's definitions overlap Cur, and trimming is with respect Cur.start.
36 // Initial tests show no measurable performance difference, so we'll keep the
37 // code simple for now.
overlapsDefs(const Cfg * Func,const Variable * Item,const Variable * Var)38 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
39 constexpr bool UseTrimmed = true;
40 VariablesMetadata *VMetadata = Func->getVMetadata();
41 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
42 if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
43 return true;
44 for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) {
45 if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed))
46 return true;
47 }
48 return false;
49 }
50
dumpDisableOverlap(const Cfg * Func,const Variable * Var,const char * Reason)51 void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
52 const char *Reason) {
53 if (!BuildDefs::dump())
54 return;
55 if (!Func->isVerbose(IceV_LinearScan))
56 return;
57
58 VariablesMetadata *VMetadata = Func->getVMetadata();
59 Ostream &Str = Func->getContext()->getStrDump();
60 Str << "Disabling Overlap due to " << Reason << " " << *Var
61 << " LIVE=" << Var->getLiveRange() << " Defs=";
62 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
63 Str << FirstDef->getNumber();
64 const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
65 for (size_t i = 0; i < Defs.size(); ++i) {
66 Str << "," << Defs[i]->getNumber();
67 }
68 Str << "\n";
69 }
70
dumpLiveRange(const Variable * Var,const Cfg * Func)71 void dumpLiveRange(const Variable *Var, const Cfg *Func) {
72 if (!BuildDefs::dump())
73 return;
74 Ostream &Str = Func->getContext()->getStrDump();
75 Str << "R=";
76 if (Var->hasRegTmp()) {
77 Str << llvm::format("%2d", int(Var->getRegNumTmp()));
78 } else {
79 Str << "NA";
80 }
81 Str << " V=";
82 Var->dump(Func);
83 Str << " Range=" << Var->getLiveRange();
84 }
85
findMinWeightIndex(const SmallBitVector & RegMask,const llvm::SmallVector<RegWeight,LinearScan::REGS_SIZE> & Weights)86 int32_t findMinWeightIndex(
87 const SmallBitVector &RegMask,
88 const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
89 int MinWeightIndex = -1;
90 for (RegNumT i : RegNumBVIter(RegMask)) {
91 if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
92 MinWeightIndex = i;
93 }
94 assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
95 return MinWeightIndex;
96 }
97
98 } // end of anonymous namespace
99
LinearScan(Cfg * Func)100 LinearScan::LinearScan(Cfg *Func)
101 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
102 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
103 UseReserve(getFlags().getRegAllocReserve()) {}
104
105 // Prepare for full register allocation of all variables. We depend on liveness
106 // analysis to have calculated live ranges.
initForGlobal()107 void LinearScan::initForGlobal() {
108 TimerMarker T(TimerStack::TT_initUnhandled, Func);
109 FindPreference = true;
110 // For full register allocation, normally we want to enable FindOverlap
111 // (meaning we look for opportunities for two overlapping live ranges to
112 // safely share the same register). However, we disable it for phi-lowering
113 // register allocation since no overlap opportunities should be available and
114 // it's more expensive to look for opportunities.
115 FindOverlap = (Kind != RAK_Phi);
116 Unhandled.reserve(Vars.size());
117 UnhandledPrecolored.reserve(Vars.size());
118 // Gather the live ranges of all variables and add them to the Unhandled set.
119 for (Variable *Var : Vars) {
120 // Don't consider rematerializable variables.
121 if (Var->isRematerializable())
122 continue;
123 // Explicitly don't consider zero-weight variables, which are meant to be
124 // spill slots.
125 if (Var->mustNotHaveReg())
126 continue;
127 // Don't bother if the variable has a null live range, which means it was
128 // never referenced.
129 if (Var->getLiveRange().isEmpty())
130 continue;
131 Var->untrimLiveRange();
132 Unhandled.push_back(Var);
133 if (Var->hasReg()) {
134 Var->setRegNumTmp(Var->getRegNum());
135 Var->setMustHaveReg();
136 UnhandledPrecolored.push_back(Var);
137 }
138 }
139
140 // Build the (ordered) list of FakeKill instruction numbers.
141 Kills.clear();
142 // Phi lowering should not be creating new call instructions, so there should
143 // be no infinite-weight not-yet-colored live ranges that span a call
144 // instruction, hence no need to construct the Kills list.
145 if (Kind == RAK_Phi)
146 return;
147 for (CfgNode *Node : Func->getNodes()) {
148 for (Inst &I : Node->getInsts()) {
149 if (auto *Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
150 if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
151 Kills.push_back(I.getNumber());
152 }
153 }
154 }
155 }
156
157 // Validate the integrity of the live ranges. If there are any errors, it
158 // prints details and returns false. On success, it returns true.
livenessValidateIntervals(const DefUseErrorList & DefsWithoutUses,const DefUseErrorList & UsesBeforeDefs,const CfgVector<InstNumberT> & LRBegin,const CfgVector<InstNumberT> & LREnd) const159 bool LinearScan::livenessValidateIntervals(
160 const DefUseErrorList &DefsWithoutUses,
161 const DefUseErrorList &UsesBeforeDefs,
162 const CfgVector<InstNumberT> &LRBegin,
163 const CfgVector<InstNumberT> &LREnd) const {
164 if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
165 return true;
166
167 if (!BuildDefs::dump())
168 return false;
169
170 OstreamLocker L(Ctx);
171 Ostream &Str = Ctx->getStrDump();
172 for (SizeT VarNum : DefsWithoutUses) {
173 Variable *Var = Vars[VarNum];
174 Str << "LR def without use, instruction " << LRBegin[VarNum]
175 << ", variable " << Var->getName() << "\n";
176 }
177 for (SizeT VarNum : UsesBeforeDefs) {
178 Variable *Var = Vars[VarNum];
179 Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable "
180 << Var->getName() << "\n";
181 }
182 return false;
183 }
184
185 // Prepare for very simple register allocation of only infinite-weight Variables
186 // while respecting pre-colored Variables. Some properties we take advantage of:
187 //
188 // * Live ranges of interest consist of a single segment.
189 //
190 // * Live ranges of interest never span a call instruction.
191 //
192 // * Phi instructions are not considered because either phis have already been
193 // lowered, or they don't contain any pre-colored or infinite-weight
194 // Variables.
195 //
196 // * We don't need to renumber instructions before computing live ranges because
197 // all the high-level ICE instructions are deleted prior to lowering, and the
198 // low-level instructions are added in monotonically increasing order.
199 //
200 // * There are no opportunities for register preference or allowing overlap.
201 //
202 // Some properties we aren't (yet) taking advantage of:
203 //
204 // * Because live ranges are a single segment, the Inactive set will always be
205 // empty, and the live range trimming operation is unnecessary.
206 //
207 // * Calculating overlap of single-segment live ranges could be optimized a bit.
initForInfOnly()208 void LinearScan::initForInfOnly() {
209 TimerMarker T(TimerStack::TT_initUnhandled, Func);
210 FindPreference = false;
211 FindOverlap = false;
212 SizeT NumVars = 0;
213
214 // Iterate across all instructions and record the begin and end of the live
215 // range for each variable that is pre-colored or infinite weight.
216 CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
217 CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
218 DefUseErrorList DefsWithoutUses, UsesBeforeDefs;
219 for (CfgNode *Node : Func->getNodes()) {
220 for (Inst &Instr : Node->getInsts()) {
221 if (Instr.isDeleted())
222 continue;
223 FOREACH_VAR_IN_INST(Var, Instr) {
224 if (Var->getIgnoreLiveness())
225 continue;
226 if (Var->hasReg() || Var->mustHaveReg()) {
227 SizeT VarNum = Var->getIndex();
228 LREnd[VarNum] = Instr.getNumber();
229 if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel)
230 UsesBeforeDefs.push_back(VarNum);
231 }
232 }
233 if (const Variable *Var = Instr.getDest()) {
234 if (!Var->getIgnoreLiveness() &&
235 (Var->hasReg() || Var->mustHaveReg())) {
236 if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
237 LRBegin[Var->getIndex()] = Instr.getNumber();
238 ++NumVars;
239 }
240 }
241 }
242 }
243 }
244
245 Unhandled.reserve(NumVars);
246 UnhandledPrecolored.reserve(NumVars);
247 for (SizeT i = 0; i < Vars.size(); ++i) {
248 Variable *Var = Vars[i];
249 if (Var->isRematerializable())
250 continue;
251 if (LRBegin[i] != Inst::NumberSentinel) {
252 if (LREnd[i] == Inst::NumberSentinel) {
253 DefsWithoutUses.push_back(i);
254 continue;
255 }
256 Unhandled.push_back(Var);
257 Var->resetLiveRange();
258 Var->addLiveRange(LRBegin[i], LREnd[i]);
259 Var->untrimLiveRange();
260 if (Var->hasReg()) {
261 Var->setRegNumTmp(Var->getRegNum());
262 Var->setMustHaveReg();
263 UnhandledPrecolored.push_back(Var);
264 }
265 --NumVars;
266 }
267 }
268
269 if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin,
270 LREnd)) {
271 llvm::report_fatal_error("initForInfOnly: Liveness error");
272 return;
273 }
274
275 if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) {
276 if (BuildDefs::dump()) {
277 OstreamLocker L(Ctx);
278 Ostream &Str = Ctx->getStrDump();
279 for (SizeT VarNum : DefsWithoutUses) {
280 Variable *Var = Vars[VarNum];
281 Str << "LR def without use, instruction " << LRBegin[VarNum]
282 << ", variable " << Var->getName() << "\n";
283 }
284 for (SizeT VarNum : UsesBeforeDefs) {
285 Variable *Var = Vars[VarNum];
286 Str << "LR use before def, instruction " << LREnd[VarNum]
287 << ", variable " << Var->getName() << "\n";
288 }
289 }
290 llvm::report_fatal_error("initForInfOnly: Liveness error");
291 }
292 // This isn't actually a fatal condition, but it would be nice to know if we
293 // somehow pre-calculated Unhandled's size wrong.
294 assert(NumVars == 0);
295
296 // Don't build up the list of Kills because we know that no infinite-weight
297 // Variable has a live range spanning a call.
298 Kills.clear();
299 }
300
initForSecondChance()301 void LinearScan::initForSecondChance() {
302 TimerMarker T(TimerStack::TT_initUnhandled, Func);
303 FindPreference = true;
304 FindOverlap = true;
305 const VarList &Vars = Func->getVariables();
306 Unhandled.reserve(Vars.size());
307 UnhandledPrecolored.reserve(Vars.size());
308 for (Variable *Var : Vars) {
309 if (Var->isRematerializable())
310 continue;
311 if (Var->hasReg()) {
312 Var->untrimLiveRange();
313 Var->setRegNumTmp(Var->getRegNum());
314 Var->setMustHaveReg();
315 UnhandledPrecolored.push_back(Var);
316 Unhandled.push_back(Var);
317 }
318 }
319 for (Variable *Var : Evicted) {
320 Var->untrimLiveRange();
321 Unhandled.push_back(Var);
322 }
323 }
324
init(RegAllocKind Kind,CfgSet<Variable * > ExcludeVars)325 void LinearScan::init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars) {
326 this->Kind = Kind;
327 Unhandled.clear();
328 UnhandledPrecolored.clear();
329 Handled.clear();
330 Inactive.clear();
331 Active.clear();
332 Vars.clear();
333 Vars.reserve(Func->getVariables().size() - ExcludeVars.size());
334 for (auto *Var : Func->getVariables()) {
335 if (ExcludeVars.find(Var) == ExcludeVars.end())
336 Vars.emplace_back(Var);
337 }
338
339 SizeT NumRegs = Target->getNumRegisters();
340 RegAliases.resize(NumRegs);
341 for (SizeT Reg = 0; Reg < NumRegs; ++Reg) {
342 RegAliases[Reg] = &Target->getAliasesForRegister(RegNumT::fromInt(Reg));
343 }
344
345 switch (Kind) {
346 case RAK_Unknown:
347 llvm::report_fatal_error("Invalid RAK_Unknown");
348 break;
349 case RAK_Global:
350 case RAK_Phi:
351 initForGlobal();
352 break;
353 case RAK_InfOnly:
354 initForInfOnly();
355 break;
356 case RAK_SecondChance:
357 initForSecondChance();
358 break;
359 }
360
361 Evicted.clear();
362
363 auto CompareRanges = [](const Variable *L, const Variable *R) {
364 InstNumberT Lstart = L->getLiveRange().getStart();
365 InstNumberT Rstart = R->getLiveRange().getStart();
366 if (Lstart == Rstart)
367 return L->getIndex() < R->getIndex();
368 return Lstart < Rstart;
369 };
370 // Do a reverse sort so that erasing elements (from the end) is fast.
371 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges);
372 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
373 CompareRanges);
374
375 Handled.reserve(Unhandled.size());
376 Inactive.reserve(Unhandled.size());
377 Active.reserve(Unhandled.size());
378 Evicted.reserve(Unhandled.size());
379 }
380
381 // This is called when Cur must be allocated a register but no registers are
382 // available across Cur's live range. To handle this, we find a register that is
383 // not explicitly used during Cur's live range, spill that register to a stack
384 // location right before Cur's live range begins, and fill (reload) the register
385 // from the stack location right after Cur's live range ends.
addSpillFill(IterationState & Iter)386 void LinearScan::addSpillFill(IterationState &Iter) {
387 // Identify the actual instructions that begin and end Iter.Cur's live range.
388 // Iterate through Iter.Cur's node's instruction list until we find the actual
389 // instructions with instruction numbers corresponding to Iter.Cur's recorded
390 // live range endpoints. This sounds inefficient but shouldn't be a problem
391 // in practice because:
392 // (1) This function is almost never called in practice.
393 // (2) Since this register over-subscription problem happens only for
394 // phi-lowered instructions, the number of instructions in the node is
395 // proportional to the number of phi instructions in the original node,
396 // which is never very large in practice.
397 // (3) We still have to iterate through all instructions of Iter.Cur's live
398 // range to find all explicitly used registers (though the live range is
399 // usually only 2-3 instructions), so the main cost that could be avoided
400 // would be finding the instruction that begin's Iter.Cur's live range.
401 assert(!Iter.Cur->getLiveRange().isEmpty());
402 InstNumberT Start = Iter.Cur->getLiveRange().getStart();
403 InstNumberT End = Iter.Cur->getLiveRange().getEnd();
404 CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
405 assert(Node);
406 InstList &Insts = Node->getInsts();
407 InstList::iterator SpillPoint = Insts.end();
408 InstList::iterator FillPoint = Insts.end();
409 // Stop searching after we have found both the SpillPoint and the FillPoint.
410 for (auto I = Insts.begin(), E = Insts.end();
411 I != E && (SpillPoint == E || FillPoint == E); ++I) {
412 if (I->getNumber() == Start)
413 SpillPoint = I;
414 if (I->getNumber() == End)
415 FillPoint = I;
416 if (SpillPoint != E) {
417 // Remove from RegMask any physical registers referenced during Cur's live
418 // range. Start looking after SpillPoint gets set, i.e. once Cur's live
419 // range begins.
420 FOREACH_VAR_IN_INST(Var, *I) {
421 if (!Var->hasRegTmp())
422 continue;
423 const auto &Aliases = *RegAliases[Var->getRegNumTmp()];
424 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
425 Iter.RegMask[RegAlias] = false;
426 }
427 }
428 }
429 }
430 assert(SpillPoint != Insts.end());
431 assert(FillPoint != Insts.end());
432 ++FillPoint;
433 // TODO(stichnot): Randomize instead of *.begin() which maps to find_first().
434 const RegNumT RegNum = *RegNumBVIter(Iter.RegMask).begin();
435 Iter.Cur->setRegNumTmp(RegNum);
436 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
437 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
438 // is correctly identified as !isMultiBlock(), reducing stack frame size.
439 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
440 // Add "reg=FakeDef;spill=reg" before SpillPoint
441 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
442 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
443 // add "reg=spill;FakeUse(reg)" before FillPoint
444 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
445 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
446 }
447
handleActiveRangeExpiredOrInactive(const Variable * Cur)448 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
449 for (SizeT I = Active.size(); I > 0; --I) {
450 const SizeT Index = I - 1;
451 Variable *Item = Active[Index];
452 Item->trimLiveRange(Cur->getLiveRange().getStart());
453 bool Moved = false;
454 if (Item->rangeEndsBefore(Cur)) {
455 // Move Item from Active to Handled list.
456 dumpLiveRangeTrace("Expiring ", Item);
457 moveItem(Active, Index, Handled);
458 Moved = true;
459 } else if (!Item->rangeOverlapsStart(Cur)) {
460 // Move Item from Active to Inactive list.
461 dumpLiveRangeTrace("Inactivating ", Item);
462 moveItem(Active, Index, Inactive);
463 Moved = true;
464 }
465 if (Moved) {
466 // Decrement Item from RegUses[].
467 assert(Item->hasRegTmp());
468 const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
469 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
470 --RegUses[RegAlias];
471 assert(RegUses[RegAlias] >= 0);
472 }
473 }
474 }
475 }
476
handleInactiveRangeExpiredOrReactivated(const Variable * Cur)477 void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
478 for (SizeT I = Inactive.size(); I > 0; --I) {
479 const SizeT Index = I - 1;
480 Variable *Item = Inactive[Index];
481 Item->trimLiveRange(Cur->getLiveRange().getStart());
482 if (Item->rangeEndsBefore(Cur)) {
483 // Move Item from Inactive to Handled list.
484 dumpLiveRangeTrace("Expiring ", Item);
485 moveItem(Inactive, Index, Handled);
486 } else if (Item->rangeOverlapsStart(Cur)) {
487 // Move Item from Inactive to Active list.
488 dumpLiveRangeTrace("Reactivating ", Item);
489 moveItem(Inactive, Index, Active);
490 // Increment Item in RegUses[].
491 assert(Item->hasRegTmp());
492 const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
493 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
494 assert(RegUses[RegAlias] >= 0);
495 ++RegUses[RegAlias];
496 }
497 }
498 }
499 }
500
501 // Infer register preference and allowable overlap. Only form a preference when
502 // the current Variable has an unambiguous "first" definition. The preference is
503 // some source Variable of the defining instruction that either is assigned a
504 // register that is currently free, or that is assigned a register that is not
505 // free but overlap is allowed. Overlap is allowed when the Variable under
506 // consideration is single-definition, and its definition is a simple assignment
507 // - i.e., the register gets copied/aliased but is never modified. Furthermore,
508 // overlap is only allowed when preferred Variable definition instructions do
509 // not appear within the current Variable's live range.
findRegisterPreference(IterationState & Iter)510 void LinearScan::findRegisterPreference(IterationState &Iter) {
511 Iter.Prefer = nullptr;
512 Iter.PreferReg = RegNumT();
513 Iter.AllowOverlap = false;
514
515 if (!FindPreference)
516 return;
517
518 VariablesMetadata *VMetadata = Func->getVMetadata();
519 const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur);
520 if (DefInst == nullptr)
521 return;
522
523 assert(DefInst->getDest() == Iter.Cur);
524 const bool IsSingleDefAssign =
525 DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur);
526 FOREACH_VAR_IN_INST(SrcVar, *DefInst) {
527 // Only consider source variables that have (so far) been assigned a
528 // register.
529 if (!SrcVar->hasRegTmp())
530 continue;
531
532 // That register must be one in the RegMask set, e.g. don't try to prefer
533 // the stack pointer as a result of the stacksave intrinsic.
534 const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
535 const int SrcReg = (Iter.RegMask & Aliases).find_first();
536 if (SrcReg == -1)
537 continue;
538
539 if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) {
540 // Don't bother trying to enable AllowOverlap if the register is already
541 // free (hence the test on Iter.Free[SrcReg]).
542 Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar);
543 }
544 if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
545 Iter.Prefer = SrcVar;
546 Iter.PreferReg = RegNumT::fromInt(SrcReg);
547 // Stop looking for a preference after the first valid preference is
548 // found. One might think that we should look at all instruction
549 // variables to find the best <Prefer,AllowOverlap> combination, but note
550 // that AllowOverlap can only be true for a simple assignment statement
551 // which can have only one source operand, so it's not possible for
552 // AllowOverlap to be true beyond the first source operand.
553 FOREACH_VAR_IN_INST_BREAK;
554 }
555 }
556 if (BuildDefs::dump() && Verbose && Iter.Prefer) {
557 Ostream &Str = Ctx->getStrDump();
558 Str << "Initial Iter.Prefer=";
559 Iter.Prefer->dump(Func);
560 Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange()
561 << " Overlap=" << Iter.AllowOverlap << "\n";
562 }
563 }
564
565 // Remove registers from the Iter.Free[] list where an Inactive range overlaps
566 // with the current range.
filterFreeWithInactiveRanges(IterationState & Iter)567 void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
568 for (const Variable *Item : Inactive) {
569 if (!Item->rangeOverlaps(Iter.Cur))
570 continue;
571 const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
572 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
573 // Don't assert(Iter.Free[RegAlias]) because in theory (though probably
574 // never in practice) there could be two inactive variables that were
575 // marked with AllowOverlap.
576 Iter.Free[RegAlias] = false;
577 Iter.FreeUnfiltered[RegAlias] = false;
578 // Disable AllowOverlap if an Inactive variable, which is not Prefer,
579 // shares Prefer's register, and has a definition within Cur's live range.
580 if (Iter.AllowOverlap && Item != Iter.Prefer &&
581 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
582 Iter.AllowOverlap = false;
583 dumpDisableOverlap(Func, Item, "Inactive");
584 }
585 }
586 }
587 }
588
589 // Remove registers from the Iter.Free[] list where an Unhandled pre-colored
590 // range overlaps with the current range, and set those registers to infinite
591 // weight so that they aren't candidates for eviction.
592 // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
593 // O(N^2) algorithm into expected linear complexity.
filterFreeWithPrecoloredRanges(IterationState & Iter)594 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
595 // TODO(stichnot): Partition UnhandledPrecolored according to register class,
596 // to restrict the number of overlap comparisons needed.
597 for (Variable *Item : reverse_range(UnhandledPrecolored)) {
598 assert(Item->hasReg());
599 if (Iter.Cur->rangeEndsBefore(Item))
600 break;
601 if (!Item->rangeOverlaps(Iter.Cur))
602 continue;
603 const auto &Aliases =
604 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp()
605 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
606 Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
607 Iter.Free[RegAlias] = false;
608 Iter.FreeUnfiltered[RegAlias] = false;
609 Iter.PrecoloredUnhandledMask[RegAlias] = true;
610 // Disable Iter.AllowOverlap if the preferred register is one of these
611 // pre-colored unhandled overlapping ranges.
612 if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) {
613 Iter.AllowOverlap = false;
614 dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
615 }
616 }
617 }
618 }
619
allocatePrecoloredRegister(Variable * Cur)620 void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
621 const auto RegNum = Cur->getRegNum();
622 // RegNumTmp should have already been set above.
623 assert(Cur->getRegNumTmp() == RegNum);
624 dumpLiveRangeTrace("Precoloring ", Cur);
625 Active.push_back(Cur);
626 const auto &Aliases = *RegAliases[RegNum];
627 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
628 assert(RegUses[RegAlias] >= 0);
629 ++RegUses[RegAlias];
630 }
631 assert(!UnhandledPrecolored.empty());
632 assert(UnhandledPrecolored.back() == Cur);
633 UnhandledPrecolored.pop_back();
634 }
635
allocatePreferredRegister(IterationState & Iter)636 void LinearScan::allocatePreferredRegister(IterationState &Iter) {
637 Iter.Cur->setRegNumTmp(Iter.PreferReg);
638 dumpLiveRangeTrace("Preferring ", Iter.Cur);
639 const auto &Aliases = *RegAliases[Iter.PreferReg];
640 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
641 assert(RegUses[RegAlias] >= 0);
642 ++RegUses[RegAlias];
643 }
644 Active.push_back(Iter.Cur);
645 }
646
allocateFreeRegister(IterationState & Iter,bool Filtered)647 void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
648 const RegNumT RegNum =
649 *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin();
650 Iter.Cur->setRegNumTmp(RegNum);
651 if (Filtered)
652 dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
653 else
654 dumpLiveRangeTrace("Allocating X ", Iter.Cur);
655 const auto &Aliases = *RegAliases[RegNum];
656 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
657 assert(RegUses[RegAlias] >= 0);
658 ++RegUses[RegAlias];
659 }
660 Active.push_back(Iter.Cur);
661 }
662
handleNoFreeRegisters(IterationState & Iter)663 void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
664 // Check Active ranges.
665 for (const Variable *Item : Active) {
666 assert(Item->rangeOverlaps(Iter.Cur));
667 assert(Item->hasRegTmp());
668 const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
669 // We add the Item's weight to each alias/subregister to represent that,
670 // should we decide to pick any of them, then we would incur that many
671 // memory accesses.
672 RegWeight W = Item->getWeight(Func);
673 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
674 Iter.Weights[RegAlias].addWeight(W);
675 }
676 }
677 // Same as above, but check Inactive ranges instead of Active.
678 for (const Variable *Item : Inactive) {
679 if (!Item->rangeOverlaps(Iter.Cur))
680 continue;
681 assert(Item->hasRegTmp());
682 const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
683 RegWeight W = Item->getWeight(Func);
684 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
685 Iter.Weights[RegAlias].addWeight(W);
686 }
687 }
688
689 // All the weights are now calculated. Find the register with smallest weight.
690 int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
691
692 if (MinWeightIndex < 0 ||
693 Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
694 if (!Iter.Cur->mustHaveReg()) {
695 // Iter.Cur doesn't have priority over any other live ranges, so don't
696 // allocate any register to it, and move it to the Handled state.
697 Handled.push_back(Iter.Cur);
698 return;
699 }
700 if (Kind == RAK_Phi) {
701 // Iter.Cur is infinite-weight but all physical registers are already
702 // taken, so we need to force one to be temporarily available.
703 addSpillFill(Iter);
704 Handled.push_back(Iter.Cur);
705 return;
706 }
707 // The remaining portion of the enclosing "if" block should only be
708 // reachable if we are manually limiting physical registers for testing.
709 if (UseReserve) {
710 if (Iter.FreeUnfiltered.any()) {
711 // There is some available physical register held in reserve, so use it.
712 constexpr bool NotFiltered = false;
713 allocateFreeRegister(Iter, NotFiltered);
714 // Iter.Cur is now on the Active list.
715 return;
716 }
717 // At this point, we need to find some reserve register that is already
718 // assigned to a non-infinite-weight variable. This could happen if some
719 // variable was previously assigned an alias of such a register.
720 MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
721 }
722 if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
723 dumpLiveRangeTrace("Failing ", Iter.Cur);
724 Func->setError("Unable to find a physical register for an "
725 "infinite-weight live range "
726 "(consider using -reg-reserve): " +
727 Iter.Cur->getName());
728 Handled.push_back(Iter.Cur);
729 return;
730 }
731 // At this point, MinWeightIndex points to a valid reserve register to
732 // reallocate to Iter.Cur, so drop into the eviction code.
733 }
734
735 // Evict all live ranges in Active that register number MinWeightIndex is
736 // assigned to.
737 const auto &Aliases = *RegAliases[MinWeightIndex];
738 for (SizeT I = Active.size(); I > 0; --I) {
739 const SizeT Index = I - 1;
740 Variable *Item = Active[Index];
741 const auto RegNum = Item->getRegNumTmp();
742 if (Aliases[RegNum]) {
743 dumpLiveRangeTrace("Evicting A ", Item);
744 const auto &Aliases = *RegAliases[RegNum];
745 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
746 --RegUses[RegAlias];
747 assert(RegUses[RegAlias] >= 0);
748 }
749 Item->setRegNumTmp(RegNumT());
750 moveItem(Active, Index, Handled);
751 Evicted.push_back(Item);
752 }
753 }
754 // Do the same for Inactive.
755 for (SizeT I = Inactive.size(); I > 0; --I) {
756 const SizeT Index = I - 1;
757 Variable *Item = Inactive[Index];
758 // Note: The Item->rangeOverlaps(Cur) clause is not part of the description
759 // of AssignMemLoc() in the original paper. But there doesn't seem to be any
760 // need to evict an inactive live range that doesn't overlap with the live
761 // range currently being considered. It's especially bad if we would end up
762 // evicting an infinite-weight but currently-inactive live range. The most
763 // common situation for this would be a scratch register kill set for call
764 // instructions.
765 if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
766 dumpLiveRangeTrace("Evicting I ", Item);
767 Item->setRegNumTmp(RegNumT());
768 moveItem(Inactive, Index, Handled);
769 Evicted.push_back(Item);
770 }
771 }
772 // Assign the register to Cur.
773 Iter.Cur->setRegNumTmp(RegNumT::fromInt(MinWeightIndex));
774 for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
775 assert(RegUses[RegAlias] >= 0);
776 ++RegUses[RegAlias];
777 }
778 Active.push_back(Iter.Cur);
779 dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
780 }
781
assignFinalRegisters(const SmallBitVector & RegMaskFull,const SmallBitVector & PreDefinedRegisters,bool Randomized)782 void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull,
783 const SmallBitVector &PreDefinedRegisters,
784 bool Randomized) {
785 const size_t NumRegisters = RegMaskFull.size();
786 llvm::SmallVector<RegNumT, REGS_SIZE> Permutation(NumRegisters);
787 if (Randomized) {
788 // Create a random number generator for regalloc randomization. Merge
789 // function's sequence and Kind value as the Salt. Because regAlloc() is
790 // called twice under O2, the second time with RAK_Phi, we check Kind ==
791 // RAK_Phi to determine the lowest-order bit to make sure the Salt is
792 // different.
793 uint64_t Salt =
794 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
795 Target->makeRandomRegisterPermutation(
796 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
797 }
798
799 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
800 // for each Variable.
801 for (Variable *Item : Handled) {
802 const auto RegNum = Item->getRegNumTmp();
803 auto AssignedRegNum = RegNum;
804
805 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
806 AssignedRegNum = Permutation[RegNum];
807 }
808 if (BuildDefs::dump() && Verbose) {
809 Ostream &Str = Ctx->getStrDump();
810 if (!Item->hasRegTmp()) {
811 Str << "Not assigning ";
812 Item->dump(Func);
813 Str << "\n";
814 } else {
815 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
816 : "Assigning ")
817 << Target->getRegName(AssignedRegNum, Item->getType()) << "(r"
818 << AssignedRegNum << ") to ";
819 Item->dump(Func);
820 Str << "\n";
821 }
822 }
823 Item->setRegNum(AssignedRegNum);
824 }
825 }
826
827 // Implements the linear-scan algorithm. Based on "Linear Scan Register
828 // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
829 // Mössenböck and Michael Pfeiffer,
830 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
831 // modified to take affinity into account and allow two interfering variables to
832 // share the same register in certain cases.
833 //
834 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
835 // are assigned to Variable::RegNum for each Variable.
scan(const SmallBitVector & RegMaskFull,bool Randomized)836 void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
837 TimerMarker T(TimerStack::TT_linearScan, Func);
838 assert(RegMaskFull.any()); // Sanity check
839 if (Verbose)
840 Ctx->lockStr();
841 Func->resetCurrentNode();
842 const size_t NumRegisters = RegMaskFull.size();
843 SmallBitVector PreDefinedRegisters(NumRegisters);
844 if (Randomized) {
845 for (Variable *Var : UnhandledPrecolored) {
846 PreDefinedRegisters[Var->getRegNum()] = true;
847 }
848 }
849
850 // Build a LiveRange representing the Kills list.
851 LiveRange KillsRange(Kills);
852 KillsRange.untrim();
853
854 // Reset the register use count.
855 RegUses.resize(NumRegisters);
856 std::fill(RegUses.begin(), RegUses.end(), 0);
857
858 // Unhandled is already set to all ranges in increasing order of start points.
859 assert(Active.empty());
860 assert(Inactive.empty());
861 assert(Handled.empty());
862 const TargetLowering::RegSetMask RegsInclude =
863 TargetLowering::RegSet_CallerSave;
864 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
865 const SmallBitVector KillsMask =
866 Target->getRegisterSet(RegsInclude, RegsExclude);
867
868 // Allocate memory once outside the loop.
869 IterationState Iter;
870 Iter.Weights.reserve(NumRegisters);
871 Iter.PrecoloredUnhandledMask.reserve(NumRegisters);
872
873 while (!Unhandled.empty()) {
874 Iter.Cur = Unhandled.back();
875 Unhandled.pop_back();
876 dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
877 if (UseReserve)
878 assert(Target->getAllRegistersForVariable(Iter.Cur).any());
879 else
880 assert(Target->getRegistersForVariable(Iter.Cur).any());
881 Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
882 Iter.RegMaskUnfiltered =
883 RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
884 KillsRange.trim(Iter.Cur->getLiveRange().getStart());
885
886 // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
887 // that register. Previously processed live ranges would have avoided that
888 // register due to it being pre-colored. Future processed live ranges won't
889 // evict that register because the live range has infinite weight.
890 if (Iter.Cur->hasReg()) {
891 allocatePrecoloredRegister(Iter.Cur);
892 continue;
893 }
894
895 handleActiveRangeExpiredOrInactive(Iter.Cur);
896 handleInactiveRangeExpiredOrReactivated(Iter.Cur);
897
898 // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
899 Iter.Free = Iter.RegMask;
900 Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
901 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
902 if (RegUses[i] > 0) {
903 Iter.Free[i] = false;
904 Iter.FreeUnfiltered[i] = false;
905 }
906 }
907
908 findRegisterPreference(Iter);
909 filterFreeWithInactiveRanges(Iter);
910
911 // Disable AllowOverlap if an Active variable, which is not Prefer, shares
912 // Prefer's register, and has a definition within Cur's live range.
913 if (Iter.AllowOverlap) {
914 const auto &Aliases = *RegAliases[Iter.PreferReg];
915 for (const Variable *Item : Active) {
916 const RegNumT RegNum = Item->getRegNumTmp();
917 if (Item != Iter.Prefer && Aliases[RegNum] &&
918 overlapsDefs(Func, Iter.Cur, Item)) {
919 Iter.AllowOverlap = false;
920 dumpDisableOverlap(Func, Item, "Active");
921 }
922 }
923 }
924
925 Iter.Weights.resize(Iter.RegMask.size());
926 std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());
927
928 Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
929 Iter.PrecoloredUnhandledMask.reset();
930
931 filterFreeWithPrecoloredRanges(Iter);
932
933 // Remove scratch registers from the Iter.Free[] list, and mark their
934 // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
935 constexpr bool UseTrimmed = true;
936 if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
937 Iter.Free.reset(KillsMask);
938 Iter.FreeUnfiltered.reset(KillsMask);
939 for (RegNumT i : RegNumBVIter(KillsMask)) {
940 Iter.Weights[i].setWeight(RegWeight::Inf);
941 if (Iter.PreferReg == i)
942 Iter.AllowOverlap = false;
943 }
944 }
945
946 // Print info about physical register availability.
947 if (BuildDefs::dump() && Verbose) {
948 Ostream &Str = Ctx->getStrDump();
949 for (RegNumT i : RegNumBVIter(Iter.RegMaskUnfiltered)) {
950 Str << Target->getRegName(i, Iter.Cur->getType()) << "(U=" << RegUses[i]
951 << ",F=" << Iter.Free[i] << ",P=" << Iter.PrecoloredUnhandledMask[i]
952 << ") ";
953 }
954 Str << "\n";
955 }
956
957 if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
958 // First choice: a preferred register that is either free or is allowed to
959 // overlap with its linked variable.
960 allocatePreferredRegister(Iter);
961 } else if (Iter.Free.any()) {
962 // Second choice: any free register.
963 constexpr bool Filtered = true;
964 allocateFreeRegister(Iter, Filtered);
965 } else {
966 // Fallback: there are no free registers, so we look for the lowest-weight
967 // register and see if Cur has higher weight.
968 handleNoFreeRegisters(Iter);
969 }
970 dump(Func);
971 }
972
973 // Move anything Active or Inactive to Handled for easier handling.
974 Handled.insert(Handled.end(), Active.begin(), Active.end());
975 Active.clear();
976 Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
977 Inactive.clear();
978 dump(Func);
979
980 assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);
981
982 if (Verbose)
983 Ctx->unlockStr();
984 }
985
986 // ======================== Dump routines ======================== //
987
dumpLiveRangeTrace(const char * Label,const Variable * Item)988 void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
989 if (!BuildDefs::dump())
990 return;
991
992 if (Verbose) {
993 Ostream &Str = Ctx->getStrDump();
994 Str << Label;
995 dumpLiveRange(Item, Func);
996 Str << "\n";
997 }
998 }
999
dump(Cfg * Func) const1000 void LinearScan::dump(Cfg *Func) const {
1001 if (!BuildDefs::dump())
1002 return;
1003 if (!Verbose)
1004 return;
1005 Ostream &Str = Func->getContext()->getStrDump();
1006 Func->resetCurrentNode();
1007 Str << "**** Current regalloc state:\n";
1008 Str << "++++++ Handled:\n";
1009 for (const Variable *Item : Handled) {
1010 dumpLiveRange(Item, Func);
1011 Str << "\n";
1012 }
1013 Str << "++++++ Unhandled:\n";
1014 for (const Variable *Item : reverse_range(Unhandled)) {
1015 dumpLiveRange(Item, Func);
1016 Str << "\n";
1017 }
1018 Str << "++++++ Active:\n";
1019 for (const Variable *Item : Active) {
1020 dumpLiveRange(Item, Func);
1021 Str << "\n";
1022 }
1023 Str << "++++++ Inactive:\n";
1024 for (const Variable *Item : Inactive) {
1025 dumpLiveRange(Item, Func);
1026 Str << "\n";
1027 }
1028 }
1029
1030 } // end of namespace Ice
1031