//===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// // // The Subzero Code Generator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Implements the LinearScan class, which performs the linear-scan /// register allocation after liveness analysis has been performed. /// //===----------------------------------------------------------------------===// #include "IceRegAlloc.h" #include "IceBitVector.h" #include "IceCfg.h" #include "IceCfgNode.h" #include "IceInst.h" #include "IceInstVarIter.h" #include "IceOperand.h" #include "IceTargetLowering.h" #include "llvm/Support/Format.h" namespace Ice { namespace { // Returns true if Var has any definitions within Item's live range. // TODO(stichnot): Consider trimming the Definitions list similar to how the // live ranges are trimmed, since all the overlapsDefs() tests are whether some // variable's definitions overlap Cur, and trimming is with respect Cur.start. // Initial tests show no measurable performance difference, so we'll keep the // code simple for now. bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) { constexpr bool UseTrimmed = true; VariablesMetadata *VMetadata = Func->getVMetadata(); if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed)) return true; for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) { if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed)) return true; } return false; } void dumpDisableOverlap(const Cfg *Func, const Variable *Var, const char *Reason) { if (!BuildDefs::dump()) return; if (!Func->isVerbose(IceV_LinearScan)) return; VariablesMetadata *VMetadata = Func->getVMetadata(); Ostream &Str = Func->getContext()->getStrDump(); Str << "Disabling Overlap due to " << Reason << " " << *Var << " LIVE=" << Var->getLiveRange() << " Defs="; if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) Str << FirstDef->getNumber(); const InstDefList &Defs = VMetadata->getLatterDefinitions(Var); for (size_t i = 0; i < Defs.size(); ++i) { Str << "," << Defs[i]->getNumber(); } Str << "\n"; } void dumpLiveRange(const Variable *Var, const Cfg *Func) { if (!BuildDefs::dump()) return; Ostream &Str = Func->getContext()->getStrDump(); Str << "R="; if (Var->hasRegTmp()) { Str << llvm::format("%2d", int(Var->getRegNumTmp())); } else { Str << "NA"; } Str << " V="; Var->dump(Func); Str << " Range=" << Var->getLiveRange(); } int32_t findMinWeightIndex( const SmallBitVector &RegMask, const llvm::SmallVector &Weights) { int MinWeightIndex = -1; for (RegNumT i : RegNumBVIter(RegMask)) { if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex]) MinWeightIndex = i; } assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0); return MinWeightIndex; } } // end of anonymous namespace LinearScan::LinearScan(Cfg *Func) : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)), UseReserve(getFlags().getRegAllocReserve()) {} // Prepare for full register allocation of all variables. We depend on liveness // analysis to have calculated live ranges. void LinearScan::initForGlobal() { TimerMarker T(TimerStack::TT_initUnhandled, Func); FindPreference = true; // For full register allocation, normally we want to enable FindOverlap // (meaning we look for opportunities for two overlapping live ranges to // safely share the same register). However, we disable it for phi-lowering // register allocation since no overlap opportunities should be available and // it's more expensive to look for opportunities. FindOverlap = (Kind != RAK_Phi); Unhandled.reserve(Vars.size()); UnhandledPrecolored.reserve(Vars.size()); // Gather the live ranges of all variables and add them to the Unhandled set. for (Variable *Var : Vars) { // Don't consider rematerializable variables. if (Var->isRematerializable()) continue; // Explicitly don't consider zero-weight variables, which are meant to be // spill slots. if (Var->mustNotHaveReg()) continue; // Don't bother if the variable has a null live range, which means it was // never referenced. if (Var->getLiveRange().isEmpty()) continue; Var->untrimLiveRange(); Unhandled.push_back(Var); if (Var->hasReg()) { Var->setRegNumTmp(Var->getRegNum()); Var->setMustHaveReg(); UnhandledPrecolored.push_back(Var); } } // Build the (ordered) list of FakeKill instruction numbers. Kills.clear(); // Phi lowering should not be creating new call instructions, so there should // be no infinite-weight not-yet-colored live ranges that span a call // instruction, hence no need to construct the Kills list. if (Kind == RAK_Phi) return; for (CfgNode *Node : Func->getNodes()) { for (Inst &I : Node->getInsts()) { if (auto *Kill = llvm::dyn_cast(&I)) { if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted()) Kills.push_back(I.getNumber()); } } } } // Validate the integrity of the live ranges. If there are any errors, it // prints details and returns false. On success, it returns true. bool LinearScan::livenessValidateIntervals( const DefUseErrorList &DefsWithoutUses, const DefUseErrorList &UsesBeforeDefs, const CfgVector &LRBegin, const CfgVector &LREnd) const { if (DefsWithoutUses.empty() && UsesBeforeDefs.empty()) return true; if (!BuildDefs::dump()) return false; OstreamLocker L(Ctx); Ostream &Str = Ctx->getStrDump(); for (SizeT VarNum : DefsWithoutUses) { Variable *Var = Vars[VarNum]; Str << "LR def without use, instruction " << LRBegin[VarNum] << ", variable " << Var->getName() << "\n"; } for (SizeT VarNum : UsesBeforeDefs) { Variable *Var = Vars[VarNum]; Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable " << Var->getName() << "\n"; } return false; } // Prepare for very simple register allocation of only infinite-weight Variables // while respecting pre-colored Variables. Some properties we take advantage of: // // * Live ranges of interest consist of a single segment. // // * Live ranges of interest never span a call instruction. // // * Phi instructions are not considered because either phis have already been // lowered, or they don't contain any pre-colored or infinite-weight // Variables. // // * We don't need to renumber instructions before computing live ranges because // all the high-level ICE instructions are deleted prior to lowering, and the // low-level instructions are added in monotonically increasing order. // // * There are no opportunities for register preference or allowing overlap. // // Some properties we aren't (yet) taking advantage of: // // * Because live ranges are a single segment, the Inactive set will always be // empty, and the live range trimming operation is unnecessary. // // * Calculating overlap of single-segment live ranges could be optimized a bit. void LinearScan::initForInfOnly() { TimerMarker T(TimerStack::TT_initUnhandled, Func); FindPreference = false; FindOverlap = false; SizeT NumVars = 0; // Iterate across all instructions and record the begin and end of the live // range for each variable that is pre-colored or infinite weight. CfgVector LRBegin(Vars.size(), Inst::NumberSentinel); CfgVector LREnd(Vars.size(), Inst::NumberSentinel); DefUseErrorList DefsWithoutUses, UsesBeforeDefs; for (CfgNode *Node : Func->getNodes()) { for (Inst &Instr : Node->getInsts()) { if (Instr.isDeleted()) continue; FOREACH_VAR_IN_INST(Var, Instr) { if (Var->getIgnoreLiveness()) continue; if (Var->hasReg() || Var->mustHaveReg()) { SizeT VarNum = Var->getIndex(); LREnd[VarNum] = Instr.getNumber(); if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel) UsesBeforeDefs.push_back(VarNum); } } if (const Variable *Var = Instr.getDest()) { if (!Var->getIgnoreLiveness() && (Var->hasReg() || Var->mustHaveReg())) { if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) { LRBegin[Var->getIndex()] = Instr.getNumber(); ++NumVars; } } } } } Unhandled.reserve(NumVars); UnhandledPrecolored.reserve(NumVars); for (SizeT i = 0; i < Vars.size(); ++i) { Variable *Var = Vars[i]; if (Var->isRematerializable()) continue; if (LRBegin[i] != Inst::NumberSentinel) { if (LREnd[i] == Inst::NumberSentinel) { DefsWithoutUses.push_back(i); continue; } Unhandled.push_back(Var); Var->resetLiveRange(); Var->addLiveRange(LRBegin[i], LREnd[i]); Var->untrimLiveRange(); if (Var->hasReg()) { Var->setRegNumTmp(Var->getRegNum()); Var->setMustHaveReg(); UnhandledPrecolored.push_back(Var); } --NumVars; } } if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin, LREnd)) { llvm::report_fatal_error("initForInfOnly: Liveness error"); return; } if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) { if (BuildDefs::dump()) { OstreamLocker L(Ctx); Ostream &Str = Ctx->getStrDump(); for (SizeT VarNum : DefsWithoutUses) { Variable *Var = Vars[VarNum]; Str << "LR def without use, instruction " << LRBegin[VarNum] << ", variable " << Var->getName() << "\n"; } for (SizeT VarNum : UsesBeforeDefs) { Variable *Var = Vars[VarNum]; Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable " << Var->getName() << "\n"; } } llvm::report_fatal_error("initForInfOnly: Liveness error"); } // This isn't actually a fatal condition, but it would be nice to know if we // somehow pre-calculated Unhandled's size wrong. assert(NumVars == 0); // Don't build up the list of Kills because we know that no infinite-weight // Variable has a live range spanning a call. Kills.clear(); } void LinearScan::initForSecondChance() { TimerMarker T(TimerStack::TT_initUnhandled, Func); FindPreference = true; FindOverlap = true; const VarList &Vars = Func->getVariables(); Unhandled.reserve(Vars.size()); UnhandledPrecolored.reserve(Vars.size()); for (Variable *Var : Vars) { if (Var->isRematerializable()) continue; if (Var->hasReg()) { Var->untrimLiveRange(); Var->setRegNumTmp(Var->getRegNum()); Var->setMustHaveReg(); UnhandledPrecolored.push_back(Var); Unhandled.push_back(Var); } } for (Variable *Var : Evicted) { Var->untrimLiveRange(); Unhandled.push_back(Var); } } void LinearScan::init(RegAllocKind Kind, CfgSet ExcludeVars) { this->Kind = Kind; Unhandled.clear(); UnhandledPrecolored.clear(); Handled.clear(); Inactive.clear(); Active.clear(); Vars.clear(); Vars.reserve(Func->getVariables().size() - ExcludeVars.size()); for (auto *Var : Func->getVariables()) { if (ExcludeVars.find(Var) == ExcludeVars.end()) Vars.emplace_back(Var); } SizeT NumRegs = Target->getNumRegisters(); RegAliases.resize(NumRegs); for (SizeT Reg = 0; Reg < NumRegs; ++Reg) { RegAliases[Reg] = &Target->getAliasesForRegister(RegNumT::fromInt(Reg)); } switch (Kind) { case RAK_Unknown: llvm::report_fatal_error("Invalid RAK_Unknown"); break; case RAK_Global: case RAK_Phi: initForGlobal(); break; case RAK_InfOnly: initForInfOnly(); break; case RAK_SecondChance: initForSecondChance(); break; } Evicted.clear(); auto CompareRanges = [](const Variable *L, const Variable *R) { InstNumberT Lstart = L->getLiveRange().getStart(); InstNumberT Rstart = R->getLiveRange().getStart(); if (Lstart == Rstart) return L->getIndex() < R->getIndex(); return Lstart < Rstart; }; // Do a reverse sort so that erasing elements (from the end) is fast. std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), CompareRanges); Handled.reserve(Unhandled.size()); Inactive.reserve(Unhandled.size()); Active.reserve(Unhandled.size()); Evicted.reserve(Unhandled.size()); } // This is called when Cur must be allocated a register but no registers are // available across Cur's live range. To handle this, we find a register that is // not explicitly used during Cur's live range, spill that register to a stack // location right before Cur's live range begins, and fill (reload) the register // from the stack location right after Cur's live range ends. void LinearScan::addSpillFill(IterationState &Iter) { // Identify the actual instructions that begin and end Iter.Cur's live range. // Iterate through Iter.Cur's node's instruction list until we find the actual // instructions with instruction numbers corresponding to Iter.Cur's recorded // live range endpoints. This sounds inefficient but shouldn't be a problem // in practice because: // (1) This function is almost never called in practice. // (2) Since this register over-subscription problem happens only for // phi-lowered instructions, the number of instructions in the node is // proportional to the number of phi instructions in the original node, // which is never very large in practice. // (3) We still have to iterate through all instructions of Iter.Cur's live // range to find all explicitly used registers (though the live range is // usually only 2-3 instructions), so the main cost that could be avoided // would be finding the instruction that begin's Iter.Cur's live range. assert(!Iter.Cur->getLiveRange().isEmpty()); InstNumberT Start = Iter.Cur->getLiveRange().getStart(); InstNumberT End = Iter.Cur->getLiveRange().getEnd(); CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur); assert(Node); InstList &Insts = Node->getInsts(); InstList::iterator SpillPoint = Insts.end(); InstList::iterator FillPoint = Insts.end(); // Stop searching after we have found both the SpillPoint and the FillPoint. for (auto I = Insts.begin(), E = Insts.end(); I != E && (SpillPoint == E || FillPoint == E); ++I) { if (I->getNumber() == Start) SpillPoint = I; if (I->getNumber() == End) FillPoint = I; if (SpillPoint != E) { // Remove from RegMask any physical registers referenced during Cur's live // range. Start looking after SpillPoint gets set, i.e. once Cur's live // range begins. FOREACH_VAR_IN_INST(Var, *I) { if (!Var->hasRegTmp()) continue; const auto &Aliases = *RegAliases[Var->getRegNumTmp()]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { Iter.RegMask[RegAlias] = false; } } } } assert(SpillPoint != Insts.end()); assert(FillPoint != Insts.end()); ++FillPoint; // TODO(stichnot): Randomize instead of *.begin() which maps to find_first(). const RegNumT RegNum = *RegNumBVIter(Iter.RegMask).begin(); Iter.Cur->setRegNumTmp(RegNum); Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc // is correctly identified as !isMultiBlock(), reducing stack frame size. Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); // Add "reg=FakeDef;spill=reg" before SpillPoint Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); // add "reg=spill;FakeUse(reg)" before FillPoint Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); } void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { for (SizeT I = Active.size(); I > 0; --I) { const SizeT Index = I - 1; Variable *Item = Active[Index]; Item->trimLiveRange(Cur->getLiveRange().getStart()); bool Moved = false; if (Item->rangeEndsBefore(Cur)) { // Move Item from Active to Handled list. dumpLiveRangeTrace("Expiring ", Item); moveItem(Active, Index, Handled); Moved = true; } else if (!Item->rangeOverlapsStart(Cur)) { // Move Item from Active to Inactive list. dumpLiveRangeTrace("Inactivating ", Item); moveItem(Active, Index, Inactive); Moved = true; } if (Moved) { // Decrement Item from RegUses[]. assert(Item->hasRegTmp()); const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { --RegUses[RegAlias]; assert(RegUses[RegAlias] >= 0); } } } } void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) { for (SizeT I = Inactive.size(); I > 0; --I) { const SizeT Index = I - 1; Variable *Item = Inactive[Index]; Item->trimLiveRange(Cur->getLiveRange().getStart()); if (Item->rangeEndsBefore(Cur)) { // Move Item from Inactive to Handled list. dumpLiveRangeTrace("Expiring ", Item); moveItem(Inactive, Index, Handled); } else if (Item->rangeOverlapsStart(Cur)) { // Move Item from Inactive to Active list. dumpLiveRangeTrace("Reactivating ", Item); moveItem(Inactive, Index, Active); // Increment Item in RegUses[]. assert(Item->hasRegTmp()); const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { assert(RegUses[RegAlias] >= 0); ++RegUses[RegAlias]; } } } } // Infer register preference and allowable overlap. Only form a preference when // the current Variable has an unambiguous "first" definition. The preference is // some source Variable of the defining instruction that either is assigned a // register that is currently free, or that is assigned a register that is not // free but overlap is allowed. Overlap is allowed when the Variable under // consideration is single-definition, and its definition is a simple assignment // - i.e., the register gets copied/aliased but is never modified. Furthermore, // overlap is only allowed when preferred Variable definition instructions do // not appear within the current Variable's live range. void LinearScan::findRegisterPreference(IterationState &Iter) { Iter.Prefer = nullptr; Iter.PreferReg = RegNumT(); Iter.AllowOverlap = false; if (!FindPreference) return; VariablesMetadata *VMetadata = Func->getVMetadata(); const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur); if (DefInst == nullptr) return; assert(DefInst->getDest() == Iter.Cur); const bool IsSingleDefAssign = DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur); FOREACH_VAR_IN_INST(SrcVar, *DefInst) { // Only consider source variables that have (so far) been assigned a // register. if (!SrcVar->hasRegTmp()) continue; // That register must be one in the RegMask set, e.g. don't try to prefer // the stack pointer as a result of the stacksave intrinsic. const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()]; const int SrcReg = (Iter.RegMask & Aliases).find_first(); if (SrcReg == -1) continue; if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) { // Don't bother trying to enable AllowOverlap if the register is already // free (hence the test on Iter.Free[SrcReg]). Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar); } if (Iter.AllowOverlap || Iter.Free[SrcReg]) { Iter.Prefer = SrcVar; Iter.PreferReg = RegNumT::fromInt(SrcReg); // Stop looking for a preference after the first valid preference is // found. One might think that we should look at all instruction // variables to find the best combination, but note // that AllowOverlap can only be true for a simple assignment statement // which can have only one source operand, so it's not possible for // AllowOverlap to be true beyond the first source operand. FOREACH_VAR_IN_INST_BREAK; } } if (BuildDefs::dump() && Verbose && Iter.Prefer) { Ostream &Str = Ctx->getStrDump(); Str << "Initial Iter.Prefer="; Iter.Prefer->dump(Func); Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange() << " Overlap=" << Iter.AllowOverlap << "\n"; } } // Remove registers from the Iter.Free[] list where an Inactive range overlaps // with the current range. void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { for (const Variable *Item : Inactive) { if (!Item->rangeOverlaps(Iter.Cur)) continue; const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { // Don't assert(Iter.Free[RegAlias]) because in theory (though probably // never in practice) there could be two inactive variables that were // marked with AllowOverlap. Iter.Free[RegAlias] = false; Iter.FreeUnfiltered[RegAlias] = false; // Disable AllowOverlap if an Inactive variable, which is not Prefer, // shares Prefer's register, and has a definition within Cur's live range. if (Iter.AllowOverlap && Item != Iter.Prefer && RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { Iter.AllowOverlap = false; dumpDisableOverlap(Func, Item, "Inactive"); } } } } // Remove registers from the Iter.Free[] list where an Unhandled pre-colored // range overlaps with the current range, and set those registers to infinite // weight so that they aren't candidates for eviction. // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed // O(N^2) algorithm into expected linear complexity. void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { // TODO(stichnot): Partition UnhandledPrecolored according to register class, // to restrict the number of overlap comparisons needed. for (Variable *Item : reverse_range(UnhandledPrecolored)) { assert(Item->hasReg()); if (Iter.Cur->rangeEndsBefore(Item)) break; if (!Item->rangeOverlaps(Iter.Cur)) continue; const auto &Aliases = *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() for (RegNumT RegAlias : RegNumBVIter(Aliases)) { Iter.Weights[RegAlias].setWeight(RegWeight::Inf); Iter.Free[RegAlias] = false; Iter.FreeUnfiltered[RegAlias] = false; Iter.PrecoloredUnhandledMask[RegAlias] = true; // Disable Iter.AllowOverlap if the preferred register is one of these // pre-colored unhandled overlapping ranges. if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) { Iter.AllowOverlap = false; dumpDisableOverlap(Func, Item, "PrecoloredUnhandled"); } } } } void LinearScan::allocatePrecoloredRegister(Variable *Cur) { const auto RegNum = Cur->getRegNum(); // RegNumTmp should have already been set above. assert(Cur->getRegNumTmp() == RegNum); dumpLiveRangeTrace("Precoloring ", Cur); Active.push_back(Cur); const auto &Aliases = *RegAliases[RegNum]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { assert(RegUses[RegAlias] >= 0); ++RegUses[RegAlias]; } assert(!UnhandledPrecolored.empty()); assert(UnhandledPrecolored.back() == Cur); UnhandledPrecolored.pop_back(); } void LinearScan::allocatePreferredRegister(IterationState &Iter) { Iter.Cur->setRegNumTmp(Iter.PreferReg); dumpLiveRangeTrace("Preferring ", Iter.Cur); const auto &Aliases = *RegAliases[Iter.PreferReg]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { assert(RegUses[RegAlias] >= 0); ++RegUses[RegAlias]; } Active.push_back(Iter.Cur); } void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) { const RegNumT RegNum = *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin(); Iter.Cur->setRegNumTmp(RegNum); if (Filtered) dumpLiveRangeTrace("Allocating Y ", Iter.Cur); else dumpLiveRangeTrace("Allocating X ", Iter.Cur); const auto &Aliases = *RegAliases[RegNum]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { assert(RegUses[RegAlias] >= 0); ++RegUses[RegAlias]; } Active.push_back(Iter.Cur); } void LinearScan::handleNoFreeRegisters(IterationState &Iter) { // Check Active ranges. for (const Variable *Item : Active) { assert(Item->rangeOverlaps(Iter.Cur)); assert(Item->hasRegTmp()); const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; // We add the Item's weight to each alias/subregister to represent that, // should we decide to pick any of them, then we would incur that many // memory accesses. RegWeight W = Item->getWeight(Func); for (RegNumT RegAlias : RegNumBVIter(Aliases)) { Iter.Weights[RegAlias].addWeight(W); } } // Same as above, but check Inactive ranges instead of Active. for (const Variable *Item : Inactive) { if (!Item->rangeOverlaps(Iter.Cur)) continue; assert(Item->hasRegTmp()); const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; RegWeight W = Item->getWeight(Func); for (RegNumT RegAlias : RegNumBVIter(Aliases)) { Iter.Weights[RegAlias].addWeight(W); } } // All the weights are now calculated. Find the register with smallest weight. int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights); if (MinWeightIndex < 0 || Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { if (!Iter.Cur->mustHaveReg()) { // Iter.Cur doesn't have priority over any other live ranges, so don't // allocate any register to it, and move it to the Handled state. Handled.push_back(Iter.Cur); return; } if (Kind == RAK_Phi) { // Iter.Cur is infinite-weight but all physical registers are already // taken, so we need to force one to be temporarily available. addSpillFill(Iter); Handled.push_back(Iter.Cur); return; } // The remaining portion of the enclosing "if" block should only be // reachable if we are manually limiting physical registers for testing. if (UseReserve) { if (Iter.FreeUnfiltered.any()) { // There is some available physical register held in reserve, so use it. constexpr bool NotFiltered = false; allocateFreeRegister(Iter, NotFiltered); // Iter.Cur is now on the Active list. return; } // At this point, we need to find some reserve register that is already // assigned to a non-infinite-weight variable. This could happen if some // variable was previously assigned an alias of such a register. MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights); } if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { dumpLiveRangeTrace("Failing ", Iter.Cur); Func->setError("Unable to find a physical register for an " "infinite-weight live range " "(consider using -reg-reserve): " + Iter.Cur->getName()); Handled.push_back(Iter.Cur); return; } // At this point, MinWeightIndex points to a valid reserve register to // reallocate to Iter.Cur, so drop into the eviction code. } // Evict all live ranges in Active that register number MinWeightIndex is // assigned to. const auto &Aliases = *RegAliases[MinWeightIndex]; for (SizeT I = Active.size(); I > 0; --I) { const SizeT Index = I - 1; Variable *Item = Active[Index]; const auto RegNum = Item->getRegNumTmp(); if (Aliases[RegNum]) { dumpLiveRangeTrace("Evicting A ", Item); const auto &Aliases = *RegAliases[RegNum]; for (RegNumT RegAlias : RegNumBVIter(Aliases)) { --RegUses[RegAlias]; assert(RegUses[RegAlias] >= 0); } Item->setRegNumTmp(RegNumT()); moveItem(Active, Index, Handled); Evicted.push_back(Item); } } // Do the same for Inactive. for (SizeT I = Inactive.size(); I > 0; --I) { const SizeT Index = I - 1; Variable *Item = Inactive[Index]; // Note: The Item->rangeOverlaps(Cur) clause is not part of the description // of AssignMemLoc() in the original paper. But there doesn't seem to be any // need to evict an inactive live range that doesn't overlap with the live // range currently being considered. It's especially bad if we would end up // evicting an infinite-weight but currently-inactive live range. The most // common situation for this would be a scratch register kill set for call // instructions. if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) { dumpLiveRangeTrace("Evicting I ", Item); Item->setRegNumTmp(RegNumT()); moveItem(Inactive, Index, Handled); Evicted.push_back(Item); } } // Assign the register to Cur. Iter.Cur->setRegNumTmp(RegNumT::fromInt(MinWeightIndex)); for (RegNumT RegAlias : RegNumBVIter(Aliases)) { assert(RegUses[RegAlias] >= 0); ++RegUses[RegAlias]; } Active.push_back(Iter.Cur); dumpLiveRangeTrace("Allocating Z ", Iter.Cur); } void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull, const SmallBitVector &PreDefinedRegisters, bool Randomized) { const size_t NumRegisters = RegMaskFull.size(); llvm::SmallVector Permutation(NumRegisters); if (Randomized) { // Create a random number generator for regalloc randomization. Merge // function's sequence and Kind value as the Salt. Because regAlloc() is // called twice under O2, the second time with RAK_Phi, we check Kind == // RAK_Phi to determine the lowest-order bit to make sure the Salt is // different. uint64_t Salt = (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); Target->makeRandomRegisterPermutation( Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); } // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) // for each Variable. for (Variable *Item : Handled) { const auto RegNum = Item->getRegNumTmp(); auto AssignedRegNum = RegNum; if (Randomized && Item->hasRegTmp() && !Item->hasReg()) { AssignedRegNum = Permutation[RegNum]; } if (BuildDefs::dump() && Verbose) { Ostream &Str = Ctx->getStrDump(); if (!Item->hasRegTmp()) { Str << "Not assigning "; Item->dump(Func); Str << "\n"; } else { Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning " : "Assigning ") << Target->getRegName(AssignedRegNum, Item->getType()) << "(r" << AssignedRegNum << ") to "; Item->dump(Func); Str << "\n"; } } Item->setRegNum(AssignedRegNum); } } // Implements the linear-scan algorithm. Based on "Linear Scan Register // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter // Mössenböck and Michael Pfeiffer, // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is // modified to take affinity into account and allow two interfering variables to // share the same register in certain cases. // // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results // are assigned to Variable::RegNum for each Variable. void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) { TimerMarker T(TimerStack::TT_linearScan, Func); assert(RegMaskFull.any()); // Sanity check if (Verbose) Ctx->lockStr(); Func->resetCurrentNode(); const size_t NumRegisters = RegMaskFull.size(); SmallBitVector PreDefinedRegisters(NumRegisters); if (Randomized) { for (Variable *Var : UnhandledPrecolored) { PreDefinedRegisters[Var->getRegNum()] = true; } } // Build a LiveRange representing the Kills list. LiveRange KillsRange(Kills); KillsRange.untrim(); // Reset the register use count. RegUses.resize(NumRegisters); std::fill(RegUses.begin(), RegUses.end(), 0); // Unhandled is already set to all ranges in increasing order of start points. assert(Active.empty()); assert(Inactive.empty()); assert(Handled.empty()); const TargetLowering::RegSetMask RegsInclude = TargetLowering::RegSet_CallerSave; const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None; const SmallBitVector KillsMask = Target->getRegisterSet(RegsInclude, RegsExclude); // Allocate memory once outside the loop. IterationState Iter; Iter.Weights.reserve(NumRegisters); Iter.PrecoloredUnhandledMask.reserve(NumRegisters); while (!Unhandled.empty()) { Iter.Cur = Unhandled.back(); Unhandled.pop_back(); dumpLiveRangeTrace("\nConsidering ", Iter.Cur); if (UseReserve) assert(Target->getAllRegistersForVariable(Iter.Cur).any()); else assert(Target->getRegistersForVariable(Iter.Cur).any()); Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur); Iter.RegMaskUnfiltered = RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur); KillsRange.trim(Iter.Cur->getLiveRange().getStart()); // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets // that register. Previously processed live ranges would have avoided that // register due to it being pre-colored. Future processed live ranges won't // evict that register because the live range has infinite weight. if (Iter.Cur->hasReg()) { allocatePrecoloredRegister(Iter.Cur); continue; } handleActiveRangeExpiredOrInactive(Iter.Cur); handleInactiveRangeExpiredOrReactivated(Iter.Cur); // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[]. Iter.Free = Iter.RegMask; Iter.FreeUnfiltered = Iter.RegMaskUnfiltered; for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { if (RegUses[i] > 0) { Iter.Free[i] = false; Iter.FreeUnfiltered[i] = false; } } findRegisterPreference(Iter); filterFreeWithInactiveRanges(Iter); // Disable AllowOverlap if an Active variable, which is not Prefer, shares // Prefer's register, and has a definition within Cur's live range. if (Iter.AllowOverlap) { const auto &Aliases = *RegAliases[Iter.PreferReg]; for (const Variable *Item : Active) { const RegNumT RegNum = Item->getRegNumTmp(); if (Item != Iter.Prefer && Aliases[RegNum] && overlapsDefs(Func, Iter.Cur, Item)) { Iter.AllowOverlap = false; dumpDisableOverlap(Func, Item, "Active"); } } } Iter.Weights.resize(Iter.RegMask.size()); std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight()); Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size()); Iter.PrecoloredUnhandledMask.reset(); filterFreeWithPrecoloredRanges(Iter); // Remove scratch registers from the Iter.Free[] list, and mark their // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range. constexpr bool UseTrimmed = true; if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) { Iter.Free.reset(KillsMask); Iter.FreeUnfiltered.reset(KillsMask); for (RegNumT i : RegNumBVIter(KillsMask)) { Iter.Weights[i].setWeight(RegWeight::Inf); if (Iter.PreferReg == i) Iter.AllowOverlap = false; } } // Print info about physical register availability. if (BuildDefs::dump() && Verbose) { Ostream &Str = Ctx->getStrDump(); for (RegNumT i : RegNumBVIter(Iter.RegMaskUnfiltered)) { Str << Target->getRegName(i, Iter.Cur->getType()) << "(U=" << RegUses[i] << ",F=" << Iter.Free[i] << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") "; } Str << "\n"; } if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) { // First choice: a preferred register that is either free or is allowed to // overlap with its linked variable. allocatePreferredRegister(Iter); } else if (Iter.Free.any()) { // Second choice: any free register. constexpr bool Filtered = true; allocateFreeRegister(Iter, Filtered); } else { // Fallback: there are no free registers, so we look for the lowest-weight // register and see if Cur has higher weight. handleNoFreeRegisters(Iter); } dump(Func); } // Move anything Active or Inactive to Handled for easier handling. Handled.insert(Handled.end(), Active.begin(), Active.end()); Active.clear(); Handled.insert(Handled.end(), Inactive.begin(), Inactive.end()); Inactive.clear(); dump(Func); assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized); if (Verbose) Ctx->unlockStr(); } // ======================== Dump routines ======================== // void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) { if (!BuildDefs::dump()) return; if (Verbose) { Ostream &Str = Ctx->getStrDump(); Str << Label; dumpLiveRange(Item, Func); Str << "\n"; } } void LinearScan::dump(Cfg *Func) const { if (!BuildDefs::dump()) return; if (!Verbose) return; Ostream &Str = Func->getContext()->getStrDump(); Func->resetCurrentNode(); Str << "**** Current regalloc state:\n"; Str << "++++++ Handled:\n"; for (const Variable *Item : Handled) { dumpLiveRange(Item, Func); Str << "\n"; } Str << "++++++ Unhandled:\n"; for (const Variable *Item : reverse_range(Unhandled)) { dumpLiveRange(Item, Func); Str << "\n"; } Str << "++++++ Active:\n"; for (const Variable *Item : Active) { dumpLiveRange(Item, Func); Str << "\n"; } Str << "++++++ Inactive:\n"; for (const Variable *Item : Inactive) { dumpLiveRange(Item, Func); Str << "\n"; } } } // end of namespace Ice