• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "register_allocator_graph_color.h"
18 
19 #include "code_generator.h"
20 #include "linear_order.h"
21 #include "register_allocation_resolver.h"
22 #include "ssa_liveness_analysis.h"
23 #include "thread-current-inl.h"
24 
25 namespace art {
26 
27 // Highest number of registers that we support for any platform. This can be used for std::bitset,
28 // for example, which needs to know its size at compile time.
29 static constexpr size_t kMaxNumRegs = 32;
30 
31 // The maximum number of graph coloring attempts before triggering a DCHECK.
32 // This is meant to catch changes to the graph coloring algorithm that undermine its forward
33 // progress guarantees. Forward progress for the algorithm means splitting live intervals on
34 // every graph coloring attempt so that eventually the interference graph will be sparse enough
35 // to color. The main threat to forward progress is trying to split short intervals which cannot be
36 // split further; this could cause infinite looping because the interference graph would never
37 // change. This is avoided by prioritizing short intervals before long ones, so that long
38 // intervals are split when coloring fails.
39 static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
40 
41 // We always want to avoid spilling inside loops.
42 static constexpr size_t kLoopSpillWeightMultiplier = 10;
43 
44 // If we avoid moves in single jump blocks, we can avoid jumps to jumps.
45 static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
46 
47 // We avoid moves in blocks that dominate the exit block, since these blocks will
48 // be executed on every path through the method.
49 static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
50 
51 enum class CoalesceKind {
52   kAdjacentSibling,       // Prevents moves at interval split points.
53   kFixedOutputSibling,    // Prevents moves from a fixed output location.
54   kFixedInput,            // Prevents moves into a fixed input location.
55   kNonlinearControlFlow,  // Prevents moves between blocks.
56   kPhi,                   // Prevents phi resolution moves.
57   kFirstInput,            // Prevents a single input move.
58   kAnyInput,              // May lead to better instruction selection / smaller encodings.
59 };
60 
operator <<(std::ostream & os,const CoalesceKind & kind)61 std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
62   return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
63 }
64 
LoopDepthAt(HBasicBlock * block)65 static size_t LoopDepthAt(HBasicBlock* block) {
66   HLoopInformation* loop_info = block->GetLoopInformation();
67   size_t depth = 0;
68   while (loop_info != nullptr) {
69     ++depth;
70     loop_info = loop_info->GetPreHeader()->GetLoopInformation();
71   }
72   return depth;
73 }
74 
75 // Return the runtime cost of inserting a move instruction at the specified location.
CostForMoveAt(size_t position,const SsaLivenessAnalysis & liveness)76 static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
77   HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
78   DCHECK(block != nullptr);
79   size_t cost = 1;
80   if (block->IsSingleJump()) {
81     cost *= kSingleJumpBlockWeightMultiplier;
82   }
83   if (block->Dominates(block->GetGraph()->GetExitBlock())) {
84     cost *= kDominatesExitBlockWeightMultiplier;
85   }
86   for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
87     cost *= kLoopSpillWeightMultiplier;
88   }
89   return cost;
90 }
91 
92 // In general, we estimate coalesce priority by whether it will definitely avoid a move,
93 // and by how likely it is to create an interference graph that's harder to color.
ComputeCoalescePriority(CoalesceKind kind,size_t position,const SsaLivenessAnalysis & liveness)94 static size_t ComputeCoalescePriority(CoalesceKind kind,
95                                       size_t position,
96                                       const SsaLivenessAnalysis& liveness) {
97   if (kind == CoalesceKind::kAnyInput) {
98     // This type of coalescing can affect instruction selection, but not moves, so we
99     // give it the lowest priority.
100     return 0;
101   } else {
102     return CostForMoveAt(position, liveness);
103   }
104 }
105 
106 enum class CoalesceStage {
107   kWorklist,  // Currently in the iterative coalescing worklist.
108   kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
109   kInactive,  // No longer considered until last-chance coalescing.
110   kDefunct,   // Either the two nodes interfere, or have already been coalesced.
111 };
112 
operator <<(std::ostream & os,const CoalesceStage & stage)113 std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
114   return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
115 }
116 
117 // Represents a coalesce opportunity between two nodes.
118 struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
CoalesceOpportunityart::CoalesceOpportunity119   CoalesceOpportunity(InterferenceNode* a,
120                       InterferenceNode* b,
121                       CoalesceKind kind,
122                       size_t position,
123                       const SsaLivenessAnalysis& liveness)
124         : node_a(a),
125           node_b(b),
126           stage(CoalesceStage::kWorklist),
127           priority(ComputeCoalescePriority(kind, position, liveness)) {}
128 
129   // Compare two coalesce opportunities based on their priority.
130   // Return true if lhs has a lower priority than that of rhs.
CmpPriorityart::CoalesceOpportunity131   static bool CmpPriority(const CoalesceOpportunity* lhs,
132                           const CoalesceOpportunity* rhs) {
133     return lhs->priority < rhs->priority;
134   }
135 
136   InterferenceNode* const node_a;
137   InterferenceNode* const node_b;
138 
139   // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
140   // and whether it should still be considered.
141   CoalesceStage stage;
142 
143   // The priority of this coalesce opportunity, based on heuristics.
144   const size_t priority;
145 };
146 
147 enum class NodeStage {
148   kInitial,           // Uninitialized.
149   kPrecolored,        // Marks fixed nodes.
150   kSafepoint,         // Marks safepoint nodes.
151   kPrunable,          // Marks uncolored nodes in the interference graph.
152   kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
153   kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
154   kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
155   kPruned             // Marks nodes already pruned from the interference graph.
156 };
157 
operator <<(std::ostream & os,const NodeStage & stage)158 std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
159   return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
160 }
161 
162 // Returns the estimated cost of spilling a particular live interval.
ComputeSpillWeight(LiveInterval * interval,const SsaLivenessAnalysis & liveness)163 static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
164   if (interval->HasRegister()) {
165     // Intervals with a fixed register cannot be spilled.
166     return std::numeric_limits<float>::min();
167   }
168 
169   size_t length = interval->GetLength();
170   if (length == 1) {
171     // Tiny intervals should have maximum priority, since they cannot be split any further.
172     return std::numeric_limits<float>::max();
173   }
174 
175   size_t use_weight = 0;
176   if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
177     // Cost for spilling at a register definition point.
178     use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
179   }
180 
181   // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
182   // [interval->GetStart() + 1, interval->GetEnd() + 1)
183   auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
184                                                  interval->GetUses().end(),
185                                                  interval->GetStart() + 1u,
186                                                  interval->GetEnd() + 1u);
187   for (const UsePosition& use : matching_use_range) {
188     if (use.GetUser() != nullptr && use.RequiresRegister()) {
189       // Cost for spilling at a register use point.
190       use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
191     }
192   }
193 
194   // We divide by the length of the interval because we want to prioritize
195   // short intervals; we do not benefit much if we split them further.
196   return static_cast<float>(use_weight) / static_cast<float>(length);
197 }
198 
199 // Interference nodes make up the interference graph, which is the primary data structure in
200 // graph coloring register allocation. Each node represents a single live interval, and contains
201 // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
202 // pre-colored nodes never contain outgoing edges (only incoming ones).
203 //
204 // As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
205 // but outgoing edges remain in order to later color the node based on the colors of its neighbors.
206 //
207 // Note that a pair interval is represented by a single node in the interference graph, which
208 // essentially requires two colors. One consequence of this is that the degree of a node is not
209 // necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
210 // number of colors with which a node could interfere. We model this by giving edges different
211 // weights (1 or 2) to control how much it increases the degree of adjacent nodes.
212 // For example, the edge between two single nodes will have weight 1. On the other hand,
213 // the edge between a single node and a pair node will have weight 2. This is because the pair
214 // node could block up to two colors for the single node, and because the single node could
215 // block an entire two-register aligned slot for the pair node.
216 // The degree is defined this way because we use it to decide whether a node is guaranteed a color,
217 // and thus whether it is safe to prune it from the interference graph early on.
218 class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
219  public:
InterferenceNode(LiveInterval * interval,const SsaLivenessAnalysis & liveness)220   InterferenceNode(LiveInterval* interval,
221                    const SsaLivenessAnalysis& liveness)
222         : stage(NodeStage::kInitial),
223           interval_(interval),
224           adjacent_nodes_(nullptr),
225           coalesce_opportunities_(nullptr),
226           out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
227           alias_(this),
228           spill_weight_(ComputeSpillWeight(interval, liveness)),
229           requires_color_(interval->RequiresRegister()),
230           needs_spill_slot_(false) {
231     DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
232   }
233 
AddInterference(InterferenceNode * other,bool guaranteed_not_interfering_yet,ScopedArenaDeque<ScopedArenaVector<InterferenceNode * >> * storage)234   void AddInterference(InterferenceNode* other,
235                        bool guaranteed_not_interfering_yet,
236                        ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
237     DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
238     DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
239     DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
240     DCHECK_NE(stage, NodeStage::kPruned);
241     DCHECK_NE(other->stage, NodeStage::kPruned);
242     if (adjacent_nodes_ == nullptr) {
243       ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
244       storage->emplace_back(adapter);
245       adjacent_nodes_ = &storage->back();
246     }
247     if (guaranteed_not_interfering_yet) {
248       DCHECK(!ContainsElement(GetAdjacentNodes(), other));
249       adjacent_nodes_->push_back(other);
250       out_degree_ += EdgeWeightWith(other);
251     } else {
252       if (!ContainsElement(GetAdjacentNodes(), other)) {
253         adjacent_nodes_->push_back(other);
254         out_degree_ += EdgeWeightWith(other);
255       }
256     }
257   }
258 
RemoveInterference(InterferenceNode * other)259   void RemoveInterference(InterferenceNode* other) {
260     DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
261     DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
262     if (adjacent_nodes_ != nullptr) {
263       auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
264       if (it != adjacent_nodes_->end()) {
265         adjacent_nodes_->erase(it);
266         out_degree_ -= EdgeWeightWith(other);
267       }
268     }
269   }
270 
ContainsInterference(InterferenceNode * other) const271   bool ContainsInterference(InterferenceNode* other) const {
272     DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
273     DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
274     return ContainsElement(GetAdjacentNodes(), other);
275   }
276 
GetInterval() const277   LiveInterval* GetInterval() const {
278     return interval_;
279   }
280 
GetAdjacentNodes() const281   ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
282     return adjacent_nodes_ != nullptr
283         ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
284         : ArrayRef<InterferenceNode*>();
285   }
286 
GetOutDegree() const287   size_t GetOutDegree() const {
288     // Pre-colored nodes have infinite degree.
289     DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max());
290     return out_degree_;
291   }
292 
AddCoalesceOpportunity(CoalesceOpportunity * opportunity,ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity * >> * storage)293   void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
294                               ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
295     if (coalesce_opportunities_ == nullptr) {
296       ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
297       storage->emplace_back(adapter);
298       coalesce_opportunities_ = &storage->back();
299     }
300     coalesce_opportunities_->push_back(opportunity);
301   }
302 
ClearCoalesceOpportunities()303   void ClearCoalesceOpportunities() {
304     coalesce_opportunities_ = nullptr;
305   }
306 
IsMoveRelated() const307   bool IsMoveRelated() const {
308     for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
309       if (opportunity->stage == CoalesceStage::kWorklist ||
310           opportunity->stage == CoalesceStage::kActive) {
311         return true;
312       }
313     }
314     return false;
315   }
316 
317   // Return whether this node already has a color.
318   // Used to find fixed nodes in the interference graph before coloring.
IsPrecolored() const319   bool IsPrecolored() const {
320     return interval_->HasRegister();
321   }
322 
IsPair() const323   bool IsPair() const {
324     return interval_->HasHighInterval();
325   }
326 
SetAlias(InterferenceNode * rep)327   void SetAlias(InterferenceNode* rep) {
328     DCHECK_NE(rep->stage, NodeStage::kPruned);
329     DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
330     alias_ = rep;
331   }
332 
GetAlias()333   InterferenceNode* GetAlias() {
334     if (alias_ != this) {
335       // Recurse in order to flatten tree of alias pointers.
336       alias_ = alias_->GetAlias();
337     }
338     return alias_;
339   }
340 
GetCoalesceOpportunities() const341   ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
342     return coalesce_opportunities_ != nullptr
343         ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
344         : ArrayRef<CoalesceOpportunity*>();
345   }
346 
GetSpillWeight() const347   float GetSpillWeight() const {
348     return spill_weight_;
349   }
350 
RequiresColor() const351   bool RequiresColor() const {
352     return requires_color_;
353   }
354 
355   // We give extra weight to edges adjacent to pair nodes. See the general comment on the
356   // interference graph above.
EdgeWeightWith(const InterferenceNode * other) const357   size_t EdgeWeightWith(const InterferenceNode* other) const {
358     return (IsPair() || other->IsPair()) ? 2 : 1;
359   }
360 
NeedsSpillSlot() const361   bool NeedsSpillSlot() const {
362     return needs_spill_slot_;
363   }
364 
SetNeedsSpillSlot()365   void SetNeedsSpillSlot() {
366     needs_spill_slot_ = true;
367   }
368 
369   // The current stage of this node, indicating which worklist it belongs to.
370   NodeStage stage;
371 
372  private:
373   // The live interval that this node represents.
374   LiveInterval* const interval_;
375 
376   // All nodes interfering with this one.
377   // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
378   // set sizes that we encounter. Using a vector leads to much better performance.
379   ScopedArenaVector<InterferenceNode*>* adjacent_nodes_;  // Owned by ColoringIteration.
380 
381   // Interference nodes that this node should be coalesced with to reduce moves.
382   ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_;  // Owned by ColoringIteration.
383 
384   // The maximum number of colors with which this node could interfere. This could be more than
385   // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
386   // We use "out" degree because incoming edges come from nodes already pruned from the graph,
387   // and do not affect the coloring of this node.
388   // Pre-colored nodes are treated as having infinite degree.
389   size_t out_degree_;
390 
391   // The node representing this node in the interference graph.
392   // Initially set to `this`, and only changed if this node is coalesced into another.
393   InterferenceNode* alias_;
394 
395   // The cost of splitting and spilling this interval to the stack.
396   // Nodes with a higher spill weight should be prioritized when assigning registers.
397   // This is essentially based on use density and location; short intervals with many uses inside
398   // deeply nested loops have a high spill weight.
399   const float spill_weight_;
400 
401   const bool requires_color_;
402 
403   bool needs_spill_slot_;
404 
405   DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
406 };
407 
408 // The order in which we color nodes is important. To guarantee forward progress,
409 // we prioritize intervals that require registers, and after that we prioritize
410 // short intervals. That way, if we fail to color a node, it either won't require a
411 // register, or it will be a long interval that can be split in order to make the
412 // interference graph sparser.
413 // To improve code quality, we prioritize intervals used frequently in deeply nested loops.
414 // (This metric is secondary to the forward progress requirements above.)
415 // TODO: May also want to consider:
416 // - Constants (since they can be rematerialized)
417 // - Allocated spill slots
HasGreaterNodePriority(const InterferenceNode * lhs,const InterferenceNode * rhs)418 static bool HasGreaterNodePriority(const InterferenceNode* lhs,
419                                    const InterferenceNode* rhs) {
420   // (1) Prioritize the node that requires a color.
421   if (lhs->RequiresColor() != rhs->RequiresColor()) {
422     return lhs->RequiresColor();
423   }
424 
425   // (2) Prioritize the interval that has a higher spill weight.
426   return lhs->GetSpillWeight() > rhs->GetSpillWeight();
427 }
428 
429 // A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
430 // and provides methods for each phase of the attempt.
431 class ColoringIteration {
432  public:
ColoringIteration(RegisterAllocatorGraphColor * register_allocator,ScopedArenaAllocator * allocator,bool processing_core_regs,size_t num_regs)433   ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
434                     ScopedArenaAllocator* allocator,
435                     bool processing_core_regs,
436                     size_t num_regs)
437         : register_allocator_(register_allocator),
438           allocator_(allocator),
439           processing_core_regs_(processing_core_regs),
440           num_regs_(num_regs),
441           interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
442           prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
443           pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
444           simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
445           freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
446           spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
447           coalesce_worklist_(CoalesceOpportunity::CmpPriority,
448                              allocator->Adapter(kArenaAllocRegisterAllocator)),
449           adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
450           coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}
451 
452   // Use the intervals collected from instructions to construct an
453   // interference graph mapping intervals to adjacency lists.
454   // Also, collect synthesized safepoint nodes, used to keep
455   // track of live intervals across safepoints.
456   // TODO: Should build safepoints elsewhere.
457   void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
458                               const ScopedArenaVector<InterferenceNode*>& physical_nodes);
459 
460   // Add coalesce opportunities to interference nodes.
461   void FindCoalesceOpportunities();
462 
463   // Prune nodes from the interference graph to be colored later. Build
464   // a stack (pruned_nodes) containing these intervals in an order determined
465   // by various heuristics.
466   void PruneInterferenceGraph();
467 
468   // Process pruned_intervals_ to color the interference graph, spilling when
469   // necessary. Returns true if successful. Else, some intervals have been
470   // split, and the interference graph should be rebuilt for another attempt.
471   bool ColorInterferenceGraph();
472 
473   // Return prunable nodes.
474   // The register allocator will need to access prunable nodes after coloring
475   // in order to tell the code generator which registers have been assigned.
GetPrunableNodes() const476   ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
477     return ArrayRef<InterferenceNode* const>(prunable_nodes_);
478   }
479 
480  private:
481   // Create a coalesce opportunity between two nodes.
482   void CreateCoalesceOpportunity(InterferenceNode* a,
483                                  InterferenceNode* b,
484                                  CoalesceKind kind,
485                                  size_t position);
486 
487   // Add an edge in the interference graph, if valid.
488   // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
489   // when possible.
490   void AddPotentialInterference(InterferenceNode* from,
491                                 InterferenceNode* to,
492                                 bool guaranteed_not_interfering_yet,
493                                 bool both_directions = true);
494 
495   // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
496   // may be pruned from the interference graph.
497   void FreezeMoves(InterferenceNode* node);
498 
499   // Prune a node from the interference graph, updating worklists if necessary.
500   void PruneNode(InterferenceNode* node);
501 
502   // Add coalesce opportunities associated with this node to the coalesce worklist.
503   void EnableCoalesceOpportunities(InterferenceNode* node);
504 
505   // If needed, from `node` from the freeze worklist to the simplify worklist.
506   void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
507 
508   // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
509   bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
510 
511   // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
512   bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
513 
514   void Coalesce(CoalesceOpportunity* opportunity);
515 
516   // Merge `from` into `into` in the interference graph.
517   void Combine(InterferenceNode* from, InterferenceNode* into);
518 
519   // A reference to the register allocator instance,
520   // needed to split intervals and assign spill slots.
521   RegisterAllocatorGraphColor* register_allocator_;
522 
523   // A scoped arena allocator used for a single graph coloring attempt.
524   ScopedArenaAllocator* allocator_;
525 
526   const bool processing_core_regs_;
527 
528   const size_t num_regs_;
529 
530   // A map from live intervals to interference nodes.
531   ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
532 
533   // Uncolored nodes that should be pruned from the interference graph.
534   ScopedArenaVector<InterferenceNode*> prunable_nodes_;
535 
536   // A stack of nodes pruned from the interference graph, waiting to be pruned.
537   ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;
538 
539   // A queue containing low degree, non-move-related nodes that can pruned immediately.
540   ScopedArenaDeque<InterferenceNode*> simplify_worklist_;
541 
542   // A queue containing low degree, move-related nodes.
543   ScopedArenaDeque<InterferenceNode*> freeze_worklist_;
544 
545   // A queue containing high degree nodes.
546   // If we have to prune from the spill worklist, we cannot guarantee
547   // the pruned node a color, so we order the worklist by priority.
548   ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
549 
550   // A queue containing coalesce opportunities.
551   // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
552   // inside of loops) are more important than others.
553   ScopedArenaPriorityQueue<CoalesceOpportunity*,
554                            decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
555 
556   // Storage for links to adjacent nodes for interference nodes.
557   // Using std::deque so that elements do not move when adding new ones.
558   ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;
559 
560   // Storage for links to coalesce opportunities for interference nodes.
561   // Using std::deque so that elements do not move when adding new ones.
562   ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;
563 
564   DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
565 };
566 
IsCoreInterval(LiveInterval * interval)567 static bool IsCoreInterval(LiveInterval* interval) {
568   return !DataType::IsFloatingPointType(interval->GetType());
569 }
570 
ComputeReservedArtMethodSlots(const CodeGenerator & codegen)571 static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
572   return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
573 }
574 
RegisterAllocatorGraphColor(ScopedArenaAllocator * allocator,CodeGenerator * codegen,const SsaLivenessAnalysis & liveness,bool iterative_move_coalescing)575 RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
576                                                          CodeGenerator* codegen,
577                                                          const SsaLivenessAnalysis& liveness,
578                                                          bool iterative_move_coalescing)
579       : RegisterAllocator(allocator, codegen, liveness),
580         iterative_move_coalescing_(iterative_move_coalescing),
581         core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
582         fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
583         temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
584         safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
585         physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
586         physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
587         num_int_spill_slots_(0),
588         num_double_spill_slots_(0),
589         num_float_spill_slots_(0),
590         num_long_spill_slots_(0),
591         catch_phi_spill_slot_counter_(0),
592         reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
593         reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
594   // Before we ask for blocked registers, set them up in the code generator.
595   codegen->SetupBlockedRegisters();
596 
597   // Initialize physical core register live intervals and blocked registers.
598   // This includes globally blocked registers, such as the stack pointer.
599   physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
600   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
601     LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
602     physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
603     physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
604     core_intervals_.push_back(interval);
605     if (codegen_->IsBlockedCoreRegister(i)) {
606       interval->AddRange(0, liveness.GetMaxLifetimePosition());
607     }
608   }
609   // Initialize physical floating point register live intervals and blocked registers.
610   physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
611   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
612     LiveInterval* interval =
613         LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
614     physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
615     physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
616     fp_intervals_.push_back(interval);
617     if (codegen_->IsBlockedFloatingPointRegister(i)) {
618       interval->AddRange(0, liveness.GetMaxLifetimePosition());
619     }
620   }
621 }
622 
~RegisterAllocatorGraphColor()623 RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}
624 
AllocateRegisters()625 void RegisterAllocatorGraphColor::AllocateRegisters() {
626   // (1) Collect and prepare live intervals.
627   ProcessInstructions();
628 
629   for (bool processing_core_regs : {true, false}) {
630     ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
631         ? core_intervals_
632         : fp_intervals_;
633     size_t num_registers = processing_core_regs
634         ? codegen_->GetNumberOfCoreRegisters()
635         : codegen_->GetNumberOfFloatingPointRegisters();
636 
637     size_t attempt = 0;
638     while (true) {
639       ++attempt;
640       DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
641           << "Exceeded debug max graph coloring register allocation attempts. "
642           << "This could indicate that the register allocator is not making forward progress, "
643           << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
644           << "should be prioritized over long ones, because they cannot be split further.)";
645 
646       // Many data structures are cleared between graph coloring attempts, so we reduce
647       // total memory usage by using a new scoped arena allocator for each attempt.
648       ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
649       ColoringIteration iteration(this,
650                                   &coloring_attempt_allocator,
651                                   processing_core_regs,
652                                   num_registers);
653 
654       // (2) Build the interference graph.
655       ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
656           ? physical_core_nodes_
657           : physical_fp_nodes_;
658       iteration.BuildInterferenceGraph(intervals, physical_nodes);
659 
660       // (3) Add coalesce opportunities.
661       //     If we have tried coloring the graph a suspiciously high number of times, give
662       //     up on move coalescing, just in case the coalescing heuristics are not conservative.
663       //     (This situation will be caught if DCHECKs are turned on.)
664       if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
665         iteration.FindCoalesceOpportunities();
666       }
667 
668       // (4) Prune all uncolored nodes from interference graph.
669       iteration.PruneInterferenceGraph();
670 
671       // (5) Color pruned nodes based on interferences.
672       bool successful = iteration.ColorInterferenceGraph();
673 
674       // We manually clear coalesce opportunities for physical nodes,
675       // since they persist across coloring attempts.
676       for (InterferenceNode* node : physical_core_nodes_) {
677         node->ClearCoalesceOpportunities();
678       }
679       for (InterferenceNode* node : physical_fp_nodes_) {
680         node->ClearCoalesceOpportunities();
681       }
682 
683       if (successful) {
684         // Assign spill slots.
685         AllocateSpillSlots(iteration.GetPrunableNodes());
686 
687         // Tell the code generator which registers were allocated.
688         // We only look at prunable_nodes because we already told the code generator about
689         // fixed intervals while processing instructions. We also ignore the fixed intervals
690         // placed at the top of catch blocks.
691         for (InterferenceNode* node : iteration.GetPrunableNodes()) {
692           LiveInterval* interval = node->GetInterval();
693           if (interval->HasRegister()) {
694             Location low_reg = processing_core_regs
695                 ? Location::RegisterLocation(interval->GetRegister())
696                 : Location::FpuRegisterLocation(interval->GetRegister());
697             codegen_->AddAllocatedRegister(low_reg);
698             if (interval->HasHighInterval()) {
699               LiveInterval* high = interval->GetHighInterval();
700               DCHECK(high->HasRegister());
701               Location high_reg = processing_core_regs
702                   ? Location::RegisterLocation(high->GetRegister())
703                   : Location::FpuRegisterLocation(high->GetRegister());
704               codegen_->AddAllocatedRegister(high_reg);
705             }
706           } else {
707             DCHECK_IMPLIES(interval->HasHighInterval(),
708                            !interval->GetHighInterval()->HasRegister());
709           }
710         }
711 
712         break;
713       }
714     }  // while unsuccessful
715   }  // for processing_core_instructions
716 
717   // (6) Resolve locations and deconstruct SSA form.
718   RegisterAllocationResolver(codegen_, liveness_)
719       .Resolve(ArrayRef<HInstruction* const>(safepoints_),
720                reserved_art_method_slots_ + reserved_out_slots_,
721                num_int_spill_slots_,
722                num_long_spill_slots_,
723                num_float_spill_slots_,
724                num_double_spill_slots_,
725                catch_phi_spill_slot_counter_,
726                ArrayRef<LiveInterval* const>(temp_intervals_));
727 
728   if (kIsDebugBuild) {
729     Validate(/*log_fatal_on_failure*/ true);
730   }
731 }
732 
Validate(bool log_fatal_on_failure)733 bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
734   for (bool processing_core_regs : {true, false}) {
735     ScopedArenaAllocator allocator(allocator_->GetArenaStack());
736     ScopedArenaVector<LiveInterval*> intervals(
737         allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
738     for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
739       HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
740       LiveInterval* interval = instruction->GetLiveInterval();
741       if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
742         intervals.push_back(instruction->GetLiveInterval());
743       }
744     }
745 
746     ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
747         ? physical_core_nodes_
748         : physical_fp_nodes_;
749     for (InterferenceNode* fixed : physical_nodes) {
750       LiveInterval* interval = fixed->GetInterval();
751       if (interval->GetFirstRange() != nullptr) {
752         // Ideally we would check fixed ranges as well, but currently there are times when
753         // two fixed intervals for the same register will overlap. For example, a fixed input
754         // and a fixed output may sometimes share the same register, in which there will be two
755         // fixed intervals for the same place.
756       }
757     }
758 
759     for (LiveInterval* temp : temp_intervals_) {
760       if (IsCoreInterval(temp) == processing_core_regs) {
761         intervals.push_back(temp);
762       }
763     }
764 
765     size_t spill_slots = num_int_spill_slots_
766                        + num_long_spill_slots_
767                        + num_float_spill_slots_
768                        + num_double_spill_slots_
769                        + catch_phi_spill_slot_counter_;
770     bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
771                                 spill_slots,
772                                 reserved_art_method_slots_ + reserved_out_slots_,
773                                 *codegen_,
774                                 processing_core_regs,
775                                 log_fatal_on_failure);
776     if (!ok) {
777       return false;
778     }
779   }  // for processing_core_regs
780 
781   return true;
782 }
783 
ProcessInstructions()784 void RegisterAllocatorGraphColor::ProcessInstructions() {
785   for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
786     // Note that we currently depend on this ordering, since some helper
787     // code is designed for linear scan register allocation.
788     for (HBackwardInstructionIterator instr_it(block->GetInstructions());
789           !instr_it.Done();
790           instr_it.Advance()) {
791       ProcessInstruction(instr_it.Current());
792     }
793 
794     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
795       ProcessInstruction(phi_it.Current());
796     }
797 
798     if (block->IsCatchBlock()
799         || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
800       // By blocking all registers at the top of each catch block or irreducible loop, we force
801       // intervals belonging to the live-in set of the catch/header block to be spilled.
802       // TODO(ngeoffray): Phis in this block could be allocated in register.
803       size_t position = block->GetLifetimeStart();
804       BlockRegisters(position, position + 1);
805     }
806   }
807 }
808 
TryRemoveSuspendCheckEntry(HInstruction * instruction)809 bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) {
810   LocationSummary* locations = instruction->GetLocations();
811   if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) {
812     // TODO: We do this here because we do not want the suspend check to artificially
813     // create live registers. We should find another place, but this is currently the
814     // simplest.
815     DCHECK_EQ(locations->GetTempCount(), 0u);
816     instruction->GetBlock()->RemoveInstruction(instruction);
817     return true;
818   }
819   return false;
820 }
821 
ProcessInstruction(HInstruction * instruction)822 void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
823   LocationSummary* locations = instruction->GetLocations();
824   if (locations == nullptr) {
825     return;
826   }
827   if (TryRemoveSuspendCheckEntry(instruction)) {
828     return;
829   }
830 
831   CheckForTempLiveIntervals(instruction);
832   CheckForSafepoint(instruction);
833   if (locations->WillCall()) {
834     // If a call will happen, create fixed intervals for caller-save registers.
835     // TODO: Note that it may be beneficial to later split intervals at this point,
836     //       so that we allow last-minute moves from a caller-save register
837     //       to a callee-save register.
838     BlockRegisters(instruction->GetLifetimePosition(),
839                    instruction->GetLifetimePosition() + 1,
840                    /*caller_save_only*/ true);
841   }
842   CheckForFixedInputs(instruction);
843 
844   LiveInterval* interval = instruction->GetLiveInterval();
845   if (interval == nullptr) {
846     // Instructions lacking a valid output location do not have a live interval.
847     DCHECK(!locations->Out().IsValid());
848     return;
849   }
850 
851   // Low intervals act as representatives for their corresponding high interval.
852   DCHECK(!interval->IsHighInterval());
853   if (codegen_->NeedsTwoRegisters(interval->GetType())) {
854     interval->AddHighInterval();
855   }
856   AddSafepointsFor(instruction);
857   CheckForFixedOutput(instruction);
858   AllocateSpillSlotForCatchPhi(instruction);
859 
860   ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
861       ? core_intervals_
862       : fp_intervals_;
863   if (interval->HasSpillSlot() || instruction->IsConstant()) {
864     // Note that if an interval already has a spill slot, then its value currently resides
865     // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
866     // register use. This is also true for constants, which can be materialized at any point.
867     size_t first_register_use = interval->FirstRegisterUse();
868     if (first_register_use != kNoLifetime) {
869       LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
870       intervals.push_back(split);
871     } else {
872       // We won't allocate a register for this value.
873     }
874   } else {
875     intervals.push_back(interval);
876   }
877 }
878 
CheckForFixedInputs(HInstruction * instruction)879 void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
880   // We simply block physical registers where necessary.
881   // TODO: Ideally we would coalesce the physical register with the register
882   //       allocated to the input value, but this can be tricky if, e.g., there
883   //       could be multiple physical register uses of the same value at the
884   //       same instruction. Furthermore, there's currently no distinction between
885   //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
886   //       may not be clobbered).
887   LocationSummary* locations = instruction->GetLocations();
888   size_t position = instruction->GetLifetimePosition();
889   for (size_t i = 0; i < locations->GetInputCount(); ++i) {
890     Location input = locations->InAt(i);
891     if (input.IsRegister() || input.IsFpuRegister()) {
892       BlockRegister(input, position, position + 1);
893       codegen_->AddAllocatedRegister(input);
894     } else if (input.IsPair()) {
895       BlockRegister(input.ToLow(), position, position + 1);
896       BlockRegister(input.ToHigh(), position, position + 1);
897       codegen_->AddAllocatedRegister(input.ToLow());
898       codegen_->AddAllocatedRegister(input.ToHigh());
899     }
900   }
901 }
902 
CheckForFixedOutput(HInstruction * instruction)903 void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
904   // If an instruction has a fixed output location, we give the live interval a register and then
905   // proactively split it just after the definition point to avoid creating too many interferences
906   // with a fixed node.
907   LiveInterval* interval = instruction->GetLiveInterval();
908   Location out = interval->GetDefinedBy()->GetLocations()->Out();
909   size_t position = instruction->GetLifetimePosition();
910   DCHECK_GE(interval->GetEnd() - position, 2u);
911 
912   if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
913     out = instruction->GetLocations()->InAt(0);
914   }
915 
916   if (out.IsRegister() || out.IsFpuRegister()) {
917     interval->SetRegister(out.reg());
918     codegen_->AddAllocatedRegister(out);
919     Split(interval, position + 1);
920   } else if (out.IsPair()) {
921     interval->SetRegister(out.low());
922     interval->GetHighInterval()->SetRegister(out.high());
923     codegen_->AddAllocatedRegister(out.ToLow());
924     codegen_->AddAllocatedRegister(out.ToHigh());
925     Split(interval, position + 1);
926   } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
927     interval->SetSpillSlot(out.GetStackIndex());
928   } else {
929     DCHECK(out.IsUnallocated() || out.IsConstant());
930   }
931 }
932 
AddSafepointsFor(HInstruction * instruction)933 void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
934   LiveInterval* interval = instruction->GetLiveInterval();
935   for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
936     HInstruction* safepoint = safepoints_[safepoint_index - 1u];
937     size_t safepoint_position = safepoint->GetLifetimePosition();
938 
939     // Test that safepoints_ are ordered in the optimal way.
940     DCHECK(safepoint_index == safepoints_.size() ||
941            safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
942 
943     if (safepoint_position == interval->GetStart()) {
944       // The safepoint is for this instruction, so the location of the instruction
945       // does not need to be saved.
946       DCHECK_EQ(safepoint_index, safepoints_.size());
947       DCHECK_EQ(safepoint, instruction);
948       continue;
949     } else if (interval->IsDeadAt(safepoint_position)) {
950       break;
951     } else if (!interval->Covers(safepoint_position)) {
952       // Hole in the interval.
953       continue;
954     }
955     interval->AddSafepoint(safepoint);
956   }
957 }
958 
CheckForTempLiveIntervals(HInstruction * instruction)959 void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
960   LocationSummary* locations = instruction->GetLocations();
961   size_t position = instruction->GetLifetimePosition();
962   for (size_t i = 0; i < locations->GetTempCount(); ++i) {
963     Location temp = locations->GetTemp(i);
964     if (temp.IsRegister() || temp.IsFpuRegister()) {
965       BlockRegister(temp, position, position + 1);
966       codegen_->AddAllocatedRegister(temp);
967     } else {
968       DCHECK(temp.IsUnallocated());
969       switch (temp.GetPolicy()) {
970         case Location::kRequiresRegister: {
971           LiveInterval* interval =
972               LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
973           interval->AddTempUse(instruction, i);
974           core_intervals_.push_back(interval);
975           temp_intervals_.push_back(interval);
976           break;
977         }
978 
979         case Location::kRequiresFpuRegister: {
980           LiveInterval* interval =
981               LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
982           interval->AddTempUse(instruction, i);
983           fp_intervals_.push_back(interval);
984           temp_intervals_.push_back(interval);
985           if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
986             interval->AddHighInterval(/*is_temp*/ true);
987             temp_intervals_.push_back(interval->GetHighInterval());
988           }
989           break;
990         }
991 
992         default:
993           LOG(FATAL) << "Unexpected policy for temporary location "
994                      << temp.GetPolicy();
995       }
996     }
997   }
998 }
999 
CheckForSafepoint(HInstruction * instruction)1000 void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
1001   LocationSummary* locations = instruction->GetLocations();
1002 
1003   if (locations->NeedsSafepoint()) {
1004     safepoints_.push_back(instruction);
1005   }
1006 }
1007 
TrySplit(LiveInterval * interval,size_t position)1008 LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
1009   if (interval->GetStart() < position && position < interval->GetEnd()) {
1010     return Split(interval, position);
1011   } else {
1012     return interval;
1013   }
1014 }
1015 
SplitAtRegisterUses(LiveInterval * interval)1016 void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
1017   DCHECK(!interval->IsHighInterval());
1018 
1019   // Split just after a register definition.
1020   if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
1021     interval = TrySplit(interval, interval->GetStart() + 1);
1022   }
1023 
1024   // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
1025   // [interval->GetStart(), interval->GetEnd() + 1)
1026   auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
1027                                                  interval->GetUses().end(),
1028                                                  interval->GetStart(),
1029                                                  interval->GetEnd() + 1u);
1030   // Split around register uses.
1031   for (const UsePosition& use : matching_use_range) {
1032     if (use.RequiresRegister()) {
1033       size_t position = use.GetPosition();
1034       interval = TrySplit(interval, position - 1);
1035       if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
1036         // If we are at the very end of a basic block, we cannot split right
1037         // at the use. Split just after instead.
1038         interval = TrySplit(interval, position + 1);
1039       } else {
1040         interval = TrySplit(interval, position);
1041       }
1042     }
1043   }
1044 }
1045 
AllocateSpillSlotForCatchPhi(HInstruction * instruction)1046 void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
1047   if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
1048     HPhi* phi = instruction->AsPhi();
1049     LiveInterval* interval = phi->GetLiveInterval();
1050 
1051     HInstruction* previous_phi = phi->GetPrevious();
1052     DCHECK(previous_phi == nullptr ||
1053            previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
1054         << "Phis expected to be sorted by vreg number, "
1055         << "so that equivalent phis are adjacent.";
1056 
1057     if (phi->IsVRegEquivalentOf(previous_phi)) {
1058       // Assign the same spill slot.
1059       DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
1060       interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
1061     } else {
1062       interval->SetSpillSlot(catch_phi_spill_slot_counter_);
1063       catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
1064     }
1065   }
1066 }
1067 
BlockRegister(Location location,size_t start,size_t end)1068 void RegisterAllocatorGraphColor::BlockRegister(Location location,
1069                                                 size_t start,
1070                                                 size_t end) {
1071   DCHECK(location.IsRegister() || location.IsFpuRegister());
1072   int reg = location.reg();
1073   LiveInterval* interval = location.IsRegister()
1074       ? physical_core_nodes_[reg]->GetInterval()
1075       : physical_fp_nodes_[reg]->GetInterval();
1076   DCHECK(interval->GetRegister() == reg);
1077   bool blocked_by_codegen = location.IsRegister()
1078       ? codegen_->IsBlockedCoreRegister(reg)
1079       : codegen_->IsBlockedFloatingPointRegister(reg);
1080   if (blocked_by_codegen) {
1081     // We've already blocked this register for the entire method. (And adding a
1082     // range inside another range violates the preconditions of AddRange).
1083   } else {
1084     interval->AddRange(start, end);
1085   }
1086 }
1087 
BlockRegisters(size_t start,size_t end,bool caller_save_only)1088 void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
1089   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
1090     if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
1091       BlockRegister(Location::RegisterLocation(i), start, end);
1092     }
1093   }
1094   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
1095     if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
1096       BlockRegister(Location::FpuRegisterLocation(i), start, end);
1097     }
1098   }
1099 }
1100 
AddPotentialInterference(InterferenceNode * from,InterferenceNode * to,bool guaranteed_not_interfering_yet,bool both_directions)1101 void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
1102                                                  InterferenceNode* to,
1103                                                  bool guaranteed_not_interfering_yet,
1104                                                  bool both_directions) {
1105   if (from->IsPrecolored()) {
1106     // We save space by ignoring outgoing edges from fixed nodes.
1107   } else if (to->IsPrecolored()) {
1108     // It is important that only a single node represents a given fixed register in the
1109     // interference graph. We retrieve that node here.
1110     const ScopedArenaVector<InterferenceNode*>& physical_nodes =
1111         to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
1112                                              : register_allocator_->physical_core_nodes_;
1113     InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
1114     from->AddInterference(
1115         physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
1116     DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
1117     DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
1118 
1119     // If a node interferes with a fixed pair node, the weight of the edge may
1120     // be inaccurate after using the alias of the pair node, because the alias of the pair node
1121     // is a singular node.
1122     // We could make special pair fixed nodes, but that ends up being too conservative because
1123     // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
1124     // three rather than two.
1125     // Instead, we explicitly add an interference with the high node of the fixed pair node.
1126     // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
1127     //       can be unaligned on x86 complicates things.
1128     if (to->IsPair()) {
1129       InterferenceNode* high_node =
1130           physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
1131       DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
1132                 high_node->GetInterval()->GetRegister());
1133       from->AddInterference(
1134           high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
1135     }
1136   } else {
1137     // Standard interference between two uncolored nodes.
1138     from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
1139   }
1140 
1141   if (both_directions) {
1142     AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
1143   }
1144 }
1145 
1146 // Returns true if `in_node` represents an input interval of `out_node`, and the output interval
1147 // is allowed to have the same register as the input interval.
1148 // TODO: Ideally we should just produce correct intervals in liveness analysis.
1149 //       We would need to refactor the current live interval layout to do so, which is
1150 //       no small task.
CheckInputOutputCanOverlap(InterferenceNode * in_node,InterferenceNode * out_node)1151 static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
1152   LiveInterval* output_interval = out_node->GetInterval();
1153   HInstruction* defined_by = output_interval->GetDefinedBy();
1154   if (defined_by == nullptr) {
1155     // This must not be a definition point.
1156     return false;
1157   }
1158 
1159   LocationSummary* locations = defined_by->GetLocations();
1160   if (locations->OutputCanOverlapWithInputs()) {
1161     // This instruction does not allow the output to reuse a register from an input.
1162     return false;
1163   }
1164 
1165   LiveInterval* input_interval = in_node->GetInterval();
1166   LiveInterval* next_sibling = input_interval->GetNextSibling();
1167   size_t def_position = defined_by->GetLifetimePosition();
1168   size_t use_position = def_position + 1;
1169   if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
1170     // The next sibling starts at the use position, so reusing the input register in the output
1171     // would clobber the input before it's moved into the sibling interval location.
1172     return false;
1173   }
1174 
1175   if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
1176     // The input interval is live after the use position.
1177     return false;
1178   }
1179 
1180   HInputsRef inputs = defined_by->GetInputs();
1181   for (size_t i = 0; i < inputs.size(); ++i) {
1182     if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
1183       DCHECK(input_interval->SameRegisterKind(*output_interval));
1184       return true;
1185     }
1186   }
1187 
1188   // The input interval was not an input for this instruction.
1189   return false;
1190 }
1191 
BuildInterferenceGraph(const ScopedArenaVector<LiveInterval * > & intervals,const ScopedArenaVector<InterferenceNode * > & physical_nodes)1192 void ColoringIteration::BuildInterferenceGraph(
1193     const ScopedArenaVector<LiveInterval*>& intervals,
1194     const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
1195   DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
1196   // Build the interference graph efficiently by ordering range endpoints
1197   // by position and doing a linear sweep to find interferences. (That is, we
1198   // jump from endpoint to endpoint, maintaining a set of intervals live at each
1199   // point. If two nodes are ever in the live set at the same time, then they
1200   // interfere with each other.)
1201   //
1202   // We order by both position and (secondarily) by whether the endpoint
1203   // begins or ends a range; we want to process range endings before range
1204   // beginnings at the same position because they should not conflict.
1205   //
1206   // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
1207   // Tuple contents: (position, is_range_beginning, node).
1208   ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
1209       allocator_->Adapter(kArenaAllocRegisterAllocator));
1210 
1211   // We reserve plenty of space to avoid excessive copying.
1212   range_endpoints.reserve(4 * prunable_nodes_.size());
1213 
1214   for (LiveInterval* parent : intervals) {
1215     for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
1216       LiveRange* range = sibling->GetFirstRange();
1217       if (range != nullptr) {
1218         InterferenceNode* node =
1219             new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
1220         interval_node_map_.insert(std::make_pair(sibling, node));
1221 
1222         if (sibling->HasRegister()) {
1223           // Fixed nodes should alias the canonical node for the corresponding register.
1224           node->stage = NodeStage::kPrecolored;
1225           InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
1226           node->SetAlias(physical_node);
1227           DCHECK_EQ(node->GetInterval()->GetRegister(),
1228                     physical_node->GetInterval()->GetRegister());
1229         } else {
1230           node->stage = NodeStage::kPrunable;
1231           prunable_nodes_.push_back(node);
1232         }
1233 
1234         while (range != nullptr) {
1235           range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
1236           range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
1237           range = range->GetNext();
1238         }
1239       }
1240     }
1241   }
1242 
1243   // Sort the endpoints.
1244   // We explicitly ignore the third entry of each tuple (the node pointer) in order
1245   // to maintain determinism.
1246   std::sort(range_endpoints.begin(), range_endpoints.end(),
1247             [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
1248                 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
1249     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1250          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1251   });
1252 
1253   // Nodes live at the current position in the linear sweep.
1254   ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));
1255 
1256   // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
1257   // live set. When we encounter the end of a range, we remove the corresponding node
1258   // from the live set. Nodes interfere if they are in the live set at the same time.
1259   for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
1260     bool is_range_beginning;
1261     InterferenceNode* node;
1262     size_t position;
1263     // Extract information from the tuple, including the node this tuple represents.
1264     std::tie(position, is_range_beginning, node) = *it;
1265 
1266     if (is_range_beginning) {
1267       bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
1268       for (InterferenceNode* conflicting : live) {
1269         DCHECK_NE(node, conflicting);
1270         if (CheckInputOutputCanOverlap(conflicting, node)) {
1271           // We do not add an interference, because the instruction represented by `node` allows
1272           // its output to share a register with an input, represented here by `conflicting`.
1273         } else {
1274           AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
1275         }
1276       }
1277       DCHECK(std::find(live.begin(), live.end(), node) == live.end());
1278       live.push_back(node);
1279     } else {
1280       // End of range.
1281       auto live_it = std::find(live.begin(), live.end(), node);
1282       DCHECK(live_it != live.end());
1283       live.erase(live_it);
1284     }
1285   }
1286   DCHECK(live.empty());
1287 }
1288 
CreateCoalesceOpportunity(InterferenceNode * a,InterferenceNode * b,CoalesceKind kind,size_t position)1289 void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
1290                                                   InterferenceNode* b,
1291                                                   CoalesceKind kind,
1292                                                   size_t position) {
1293   DCHECK_EQ(a->IsPair(), b->IsPair())
1294       << "Nodes of different memory widths should never be coalesced";
1295   CoalesceOpportunity* opportunity =
1296       new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
1297   a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1298   b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1299   coalesce_worklist_.push(opportunity);
1300 }
1301 
1302 // When looking for coalesce opportunities, we use the interval_node_map_ to find the node
1303 // corresponding to an interval. Note that not all intervals are in this map, notably the parents
1304 // of constants and stack arguments. (However, these interval should not be involved in coalesce
1305 // opportunities anyway, because they're not going to be in registers.)
FindCoalesceOpportunities()1306 void ColoringIteration::FindCoalesceOpportunities() {
1307   DCHECK(coalesce_worklist_.empty());
1308 
1309   for (InterferenceNode* node : prunable_nodes_) {
1310     LiveInterval* interval = node->GetInterval();
1311 
1312     // Coalesce siblings.
1313     LiveInterval* next_sibling = interval->GetNextSibling();
1314     if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
1315       auto it = interval_node_map_.find(next_sibling);
1316       if (it != interval_node_map_.end()) {
1317         InterferenceNode* sibling_node = it->second;
1318         CreateCoalesceOpportunity(node,
1319                                   sibling_node,
1320                                   CoalesceKind::kAdjacentSibling,
1321                                   interval->GetEnd());
1322       }
1323     }
1324 
1325     // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
1326     LiveInterval* parent = interval->GetParent();
1327     if (parent->HasRegister()
1328         && parent->GetNextSibling() == interval
1329         && parent->GetEnd() == interval->GetStart()) {
1330       auto it = interval_node_map_.find(parent);
1331       if (it != interval_node_map_.end()) {
1332         InterferenceNode* parent_node = it->second;
1333         CreateCoalesceOpportunity(node,
1334                                   parent_node,
1335                                   CoalesceKind::kFixedOutputSibling,
1336                                   parent->GetEnd());
1337       }
1338     }
1339 
1340     // Try to prevent moves across blocks.
1341     // Note that this does not lead to many succeeding coalesce attempts, so could be removed
1342     // if found to add to compile time.
1343     const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
1344     if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
1345       // If the start of this interval is at a block boundary, we look at the
1346       // location of the interval in blocks preceding the block this interval
1347       // starts at. This can avoid a move between the two blocks.
1348       HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
1349       for (HBasicBlock* predecessor : block->GetPredecessors()) {
1350         size_t position = predecessor->GetLifetimeEnd() - 1;
1351         LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
1352         if (existing != nullptr) {
1353           auto it = interval_node_map_.find(existing);
1354           if (it != interval_node_map_.end()) {
1355             InterferenceNode* existing_node = it->second;
1356             CreateCoalesceOpportunity(node,
1357                                       existing_node,
1358                                       CoalesceKind::kNonlinearControlFlow,
1359                                       position);
1360           }
1361         }
1362       }
1363     }
1364 
1365     // Coalesce phi inputs with the corresponding output.
1366     HInstruction* defined_by = interval->GetDefinedBy();
1367     if (defined_by != nullptr && defined_by->IsPhi()) {
1368       ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
1369       HInputsRef inputs = defined_by->GetInputs();
1370 
1371       for (size_t i = 0, e = inputs.size(); i < e; ++i) {
1372         // We want the sibling at the end of the appropriate predecessor block.
1373         size_t position = predecessors[i]->GetLifetimeEnd() - 1;
1374         LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
1375 
1376         auto it = interval_node_map_.find(input_interval);
1377         if (it != interval_node_map_.end()) {
1378           InterferenceNode* input_node = it->second;
1379           CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
1380         }
1381       }
1382     }
1383 
1384     // Coalesce output with first input when policy is kSameAsFirstInput.
1385     if (defined_by != nullptr) {
1386       Location out = defined_by->GetLocations()->Out();
1387       if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
1388         LiveInterval* input_interval
1389             = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
1390         // TODO: Could we consider lifetime holes here?
1391         if (input_interval->GetEnd() == interval->GetStart()) {
1392           auto it = interval_node_map_.find(input_interval);
1393           if (it != interval_node_map_.end()) {
1394             InterferenceNode* input_node = it->second;
1395             CreateCoalesceOpportunity(node,
1396                                       input_node,
1397                                       CoalesceKind::kFirstInput,
1398                                       interval->GetStart());
1399           }
1400         }
1401       }
1402     }
1403 
1404     // An interval that starts an instruction (that is, it is not split), may
1405     // re-use the registers used by the inputs of that instruction, based on the
1406     // location summary.
1407     if (defined_by != nullptr) {
1408       DCHECK(!interval->IsSplit());
1409       LocationSummary* locations = defined_by->GetLocations();
1410       if (!locations->OutputCanOverlapWithInputs()) {
1411         HInputsRef inputs = defined_by->GetInputs();
1412         for (size_t i = 0; i < inputs.size(); ++i) {
1413           size_t def_point = defined_by->GetLifetimePosition();
1414           // TODO: Getting the sibling at the def_point might not be quite what we want
1415           //       for fixed inputs, since the use will be *at* the def_point rather than after.
1416           LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
1417           if (input_interval != nullptr &&
1418               input_interval->HasHighInterval() == interval->HasHighInterval()) {
1419             auto it = interval_node_map_.find(input_interval);
1420             if (it != interval_node_map_.end()) {
1421               InterferenceNode* input_node = it->second;
1422               CreateCoalesceOpportunity(node,
1423                                         input_node,
1424                                         CoalesceKind::kAnyInput,
1425                                         interval->GetStart());
1426             }
1427           }
1428         }
1429       }
1430     }
1431 
1432     // Try to prevent moves into fixed input locations.
1433     // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
1434     // [interval->GetStart() + 1, interval->GetEnd() + 1)
1435     auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
1436                                                    interval->GetUses().end(),
1437                                                    interval->GetStart() + 1u,
1438                                                    interval->GetEnd() + 1u);
1439     for (const UsePosition& use : matching_use_range) {
1440       HInstruction* user = use.GetUser();
1441       if (user == nullptr) {
1442         // User may be null for certain intervals, such as temp intervals.
1443         continue;
1444       }
1445       LocationSummary* locations = user->GetLocations();
1446       Location input = locations->InAt(use.GetInputIndex());
1447       if (input.IsRegister() || input.IsFpuRegister()) {
1448         // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
1449         //       is currently not supported.
1450         InterferenceNode* fixed_node = input.IsRegister()
1451             ? register_allocator_->physical_core_nodes_[input.reg()]
1452             : register_allocator_->physical_fp_nodes_[input.reg()];
1453         CreateCoalesceOpportunity(node,
1454                                   fixed_node,
1455                                   CoalesceKind::kFixedInput,
1456                                   user->GetLifetimePosition());
1457       }
1458     }
1459   }  // for node in prunable_nodes
1460 }
1461 
IsLowDegreeNode(InterferenceNode * node,size_t num_regs)1462 static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
1463   return node->GetOutDegree() < num_regs;
1464 }
1465 
IsHighDegreeNode(InterferenceNode * node,size_t num_regs)1466 static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
1467   return !IsLowDegreeNode(node, num_regs);
1468 }
1469 
PruneInterferenceGraph()1470 void ColoringIteration::PruneInterferenceGraph() {
1471   DCHECK(pruned_nodes_.empty()
1472       && simplify_worklist_.empty()
1473       && freeze_worklist_.empty()
1474       && spill_worklist_.empty());
1475   // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
1476   // and all others as high degree nodes. The distinction is important: low degree nodes are
1477   // guaranteed a color, while high degree nodes are not.
1478 
1479   // Build worklists. Note that the coalesce worklist has already been
1480   // filled by FindCoalesceOpportunities().
1481   for (InterferenceNode* node : prunable_nodes_) {
1482     DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
1483     if (IsLowDegreeNode(node, num_regs_)) {
1484       if (node->GetCoalesceOpportunities().empty()) {
1485         // Simplify Worklist.
1486         node->stage = NodeStage::kSimplifyWorklist;
1487         simplify_worklist_.push_back(node);
1488       } else {
1489         // Freeze Worklist.
1490         node->stage = NodeStage::kFreezeWorklist;
1491         freeze_worklist_.push_back(node);
1492       }
1493     } else {
1494       // Spill worklist.
1495       node->stage = NodeStage::kSpillWorklist;
1496       spill_worklist_.push(node);
1497     }
1498   }
1499 
1500   // Prune graph.
1501   // Note that we do not remove a node from its current worklist if it moves to another, so it may
1502   // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
1503   while (true) {
1504     if (!simplify_worklist_.empty()) {
1505       // Prune low-degree nodes.
1506       // TODO: pop_back() should work as well, but it didn't; we get a
1507       //       failed check while pruning. We should look into this.
1508       InterferenceNode* node = simplify_worklist_.front();
1509       simplify_worklist_.pop_front();
1510       DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
1511       DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
1512       DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
1513       PruneNode(node);
1514     } else if (!coalesce_worklist_.empty()) {
1515       // Coalesce.
1516       CoalesceOpportunity* opportunity = coalesce_worklist_.top();
1517       coalesce_worklist_.pop();
1518       if (opportunity->stage == CoalesceStage::kWorklist) {
1519         Coalesce(opportunity);
1520       }
1521     } else if (!freeze_worklist_.empty()) {
1522       // Freeze moves and prune a low-degree move-related node.
1523       InterferenceNode* node = freeze_worklist_.front();
1524       freeze_worklist_.pop_front();
1525       if (node->stage == NodeStage::kFreezeWorklist) {
1526         DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
1527         DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
1528         FreezeMoves(node);
1529         PruneNode(node);
1530       }
1531     } else if (!spill_worklist_.empty()) {
1532       // We spill the lowest-priority node, because pruning a node earlier
1533       // gives it a higher chance of being spilled.
1534       InterferenceNode* node = spill_worklist_.top();
1535       spill_worklist_.pop();
1536       if (node->stage == NodeStage::kSpillWorklist) {
1537         DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
1538         FreezeMoves(node);
1539         PruneNode(node);
1540       }
1541     } else {
1542       // Pruning complete.
1543       break;
1544     }
1545   }
1546   DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
1547 }
1548 
EnableCoalesceOpportunities(InterferenceNode * node)1549 void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
1550   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1551     if (opportunity->stage == CoalesceStage::kActive) {
1552       opportunity->stage = CoalesceStage::kWorklist;
1553       coalesce_worklist_.push(opportunity);
1554     }
1555   }
1556 }
1557 
PruneNode(InterferenceNode * node)1558 void ColoringIteration::PruneNode(InterferenceNode* node) {
1559   DCHECK_NE(node->stage, NodeStage::kPruned);
1560   DCHECK(!node->IsPrecolored());
1561   node->stage = NodeStage::kPruned;
1562   pruned_nodes_.push(node);
1563 
1564   for (InterferenceNode* adj : node->GetAdjacentNodes()) {
1565     DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
1566 
1567     if (adj->IsPrecolored()) {
1568       // No effect on pre-colored nodes; they're never pruned.
1569     } else {
1570       // Remove the interference.
1571       bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
1572       DCHECK(adj->ContainsInterference(node))
1573           << "Missing reflexive interference from non-fixed node";
1574       adj->RemoveInterference(node);
1575 
1576       // Handle transitions from high degree to low degree.
1577       if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
1578         EnableCoalesceOpportunities(adj);
1579         for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
1580           EnableCoalesceOpportunities(adj_adj);
1581         }
1582 
1583         DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
1584         if (adj->IsMoveRelated()) {
1585           adj->stage = NodeStage::kFreezeWorklist;
1586           freeze_worklist_.push_back(adj);
1587         } else {
1588           adj->stage = NodeStage::kSimplifyWorklist;
1589           simplify_worklist_.push_back(adj);
1590         }
1591       }
1592     }
1593   }
1594 }
1595 
CheckTransitionFromFreezeWorklist(InterferenceNode * node)1596 void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
1597   if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
1598     DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
1599     node->stage = NodeStage::kSimplifyWorklist;
1600     simplify_worklist_.push_back(node);
1601   }
1602 }
1603 
FreezeMoves(InterferenceNode * node)1604 void ColoringIteration::FreezeMoves(InterferenceNode* node) {
1605   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1606     if (opportunity->stage == CoalesceStage::kDefunct) {
1607       // Constrained moves should remain constrained, since they will not be considered
1608       // during last-chance coalescing.
1609     } else {
1610       opportunity->stage = CoalesceStage::kInactive;
1611     }
1612     InterferenceNode* other = opportunity->node_a->GetAlias() == node
1613         ? opportunity->node_b->GetAlias()
1614         : opportunity->node_a->GetAlias();
1615     if (other != node && other->stage == NodeStage::kFreezeWorklist) {
1616       DCHECK(IsLowDegreeNode(node, num_regs_));
1617       CheckTransitionFromFreezeWorklist(other);
1618     }
1619   }
1620 }
1621 
PrecoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1622 bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
1623                                             InterferenceNode* into) {
1624   if (!into->IsPrecolored()) {
1625     // The uncolored heuristic will cover this case.
1626     return false;
1627   }
1628   if (from->IsPair() || into->IsPair()) {
1629     // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
1630     //       are currently represented as two single fixed nodes in the graph, and `into` is
1631     //       only one of them. (We may lose the implicit connections to the second one in a merge.)
1632     return false;
1633   }
1634 
1635   // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
1636   // Reasons an adjacent node `adj` can be "ok":
1637   // (1) If `adj` is low degree, interference with `into` will not affect its existing
1638   //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
1639   // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
1640   // (3) If there's already an interference with `into`, coalescing will not add interferences.
1641   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1642     if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
1643       // Ok.
1644     } else {
1645       return false;
1646     }
1647   }
1648   return true;
1649 }
1650 
UncoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1651 bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
1652                                            InterferenceNode* into) {
1653   if (into->IsPrecolored()) {
1654     // The pre-colored heuristic will handle this case.
1655     return false;
1656   }
1657 
1658   // Arbitrary cap to improve compile time. Tests show that this has negligible affect
1659   // on generated code.
1660   if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
1661     return false;
1662   }
1663 
1664   // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
1665   // of high degree. (Low degree neighbors can be ignored, because they will eventually be
1666   // pruned from the interference graph in the simplify stage.)
1667   size_t high_degree_interferences = 0;
1668   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1669     if (IsHighDegreeNode(adj, num_regs_)) {
1670       high_degree_interferences += from->EdgeWeightWith(adj);
1671     }
1672   }
1673   for (InterferenceNode* adj : into->GetAdjacentNodes()) {
1674     if (IsHighDegreeNode(adj, num_regs_)) {
1675       if (from->ContainsInterference(adj)) {
1676         // We've already counted this adjacent node.
1677         // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
1678         // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
1679         // but remains conservative.)
1680         if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
1681           high_degree_interferences -= from->EdgeWeightWith(adj);
1682         }
1683       } else {
1684         high_degree_interferences += into->EdgeWeightWith(adj);
1685       }
1686     }
1687   }
1688 
1689   return high_degree_interferences < num_regs_;
1690 }
1691 
Combine(InterferenceNode * from,InterferenceNode * into)1692 void ColoringIteration::Combine(InterferenceNode* from,
1693                                 InterferenceNode* into) {
1694   from->SetAlias(into);
1695 
1696   // Add interferences.
1697   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1698     bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
1699     AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
1700     if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
1701       // This is a (temporary) transition to a high degree node. Its degree will decrease again
1702       // when we prune `from`, but it's best to be consistent about the current worklist.
1703       adj->stage = NodeStage::kSpillWorklist;
1704       spill_worklist_.push(adj);
1705     }
1706   }
1707 
1708   // Add coalesce opportunities.
1709   for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
1710     if (opportunity->stage != CoalesceStage::kDefunct) {
1711       into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
1712     }
1713   }
1714   EnableCoalesceOpportunities(from);
1715 
1716   // Prune and update worklists.
1717   PruneNode(from);
1718   if (IsLowDegreeNode(into, num_regs_)) {
1719     // Coalesce(...) takes care of checking for a transition to the simplify worklist.
1720     DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
1721   } else if (into->stage == NodeStage::kFreezeWorklist) {
1722     // This is a transition to a high degree node.
1723     into->stage = NodeStage::kSpillWorklist;
1724     spill_worklist_.push(into);
1725   } else {
1726     DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
1727   }
1728 }
1729 
Coalesce(CoalesceOpportunity * opportunity)1730 void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
1731   InterferenceNode* from = opportunity->node_a->GetAlias();
1732   InterferenceNode* into = opportunity->node_b->GetAlias();
1733   DCHECK_NE(from->stage, NodeStage::kPruned);
1734   DCHECK_NE(into->stage, NodeStage::kPruned);
1735 
1736   if (from->IsPrecolored()) {
1737     // If we have one pre-colored node, make sure it's the `into` node.
1738     std::swap(from, into);
1739   }
1740 
1741   if (from == into) {
1742     // These nodes have already been coalesced.
1743     opportunity->stage = CoalesceStage::kDefunct;
1744     CheckTransitionFromFreezeWorklist(from);
1745   } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
1746     // These nodes interfere.
1747     opportunity->stage = CoalesceStage::kDefunct;
1748     CheckTransitionFromFreezeWorklist(from);
1749     CheckTransitionFromFreezeWorklist(into);
1750   } else if (PrecoloredHeuristic(from, into)
1751           || UncoloredHeuristic(from, into)) {
1752     // We can coalesce these nodes.
1753     opportunity->stage = CoalesceStage::kDefunct;
1754     Combine(from, into);
1755     CheckTransitionFromFreezeWorklist(into);
1756   } else {
1757     // We cannot coalesce, but we may be able to later.
1758     opportunity->stage = CoalesceStage::kActive;
1759   }
1760 }
1761 
1762 // Build a mask with a bit set for each register assigned to some
1763 // interval in `intervals`.
1764 template <typename Container>
BuildConflictMask(const Container & intervals)1765 static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
1766   std::bitset<kMaxNumRegs> conflict_mask;
1767   for (InterferenceNode* adjacent : intervals) {
1768     LiveInterval* conflicting = adjacent->GetInterval();
1769     if (conflicting->HasRegister()) {
1770       conflict_mask.set(conflicting->GetRegister());
1771       if (conflicting->HasHighInterval()) {
1772         DCHECK(conflicting->GetHighInterval()->HasRegister());
1773         conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
1774       }
1775     } else {
1776       DCHECK(!conflicting->HasHighInterval()
1777           || !conflicting->GetHighInterval()->HasRegister());
1778     }
1779   }
1780   return conflict_mask;
1781 }
1782 
IsCallerSave(size_t reg,bool processing_core_regs)1783 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
1784   return processing_core_regs
1785       ? !codegen_->IsCoreCalleeSaveRegister(reg)
1786       : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
1787 }
1788 
RegisterIsAligned(size_t reg)1789 static bool RegisterIsAligned(size_t reg) {
1790   return reg % 2 == 0;
1791 }
1792 
FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask)1793 static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
1794   // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
1795   // Note that CTZ is undefined if all bits are 0, so we special-case it.
1796   return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
1797 }
1798 
ColorInterferenceGraph()1799 bool ColoringIteration::ColorInterferenceGraph() {
1800   DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
1801   ScopedArenaVector<LiveInterval*> colored_intervals(
1802       allocator_->Adapter(kArenaAllocRegisterAllocator));
1803   bool successful = true;
1804 
1805   while (!pruned_nodes_.empty()) {
1806     InterferenceNode* node = pruned_nodes_.top();
1807     pruned_nodes_.pop();
1808     LiveInterval* interval = node->GetInterval();
1809     size_t reg = 0;
1810 
1811     InterferenceNode* alias = node->GetAlias();
1812     if (alias != node) {
1813       // This node was coalesced with another.
1814       LiveInterval* alias_interval = alias->GetInterval();
1815       if (alias_interval->HasRegister()) {
1816         reg = alias_interval->GetRegister();
1817         DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
1818             << "This node conflicts with the register it was coalesced with";
1819       } else {
1820         DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
1821             << "Move coalescing was not conservative, causing a node to be coalesced "
1822             << "with another node that could not be colored";
1823         if (interval->RequiresRegister()) {
1824           successful = false;
1825         }
1826       }
1827     } else {
1828       // Search for free register(s).
1829       std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
1830       if (interval->HasHighInterval()) {
1831         // Note that the graph coloring allocator assumes that pair intervals are aligned here,
1832         // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
1833         // change the alignment requirements here, we will have to update the algorithm (e.g.,
1834         // be more conservative about the weight of edges adjacent to pair nodes.)
1835         while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
1836           reg += 2;
1837         }
1838 
1839         // Try to use a caller-save register first.
1840         for (size_t i = 0; i < num_regs_ - 1; i += 2) {
1841           bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
1842           bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
1843           if (!conflict_mask[i] && !conflict_mask[i + 1]) {
1844             if (low_caller_save && high_caller_save) {
1845               reg = i;
1846               break;
1847             } else if (low_caller_save || high_caller_save) {
1848               reg = i;
1849               // Keep looking to try to get both parts in caller-save registers.
1850             }
1851           }
1852         }
1853       } else {
1854         // Not a pair interval.
1855         reg = FindFirstZeroInConflictMask(conflict_mask);
1856 
1857         // Try to use caller-save registers first.
1858         for (size_t i = 0; i < num_regs_; ++i) {
1859           if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
1860             reg = i;
1861             break;
1862           }
1863         }
1864       }
1865 
1866       // Last-chance coalescing.
1867       for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1868         if (opportunity->stage == CoalesceStage::kDefunct) {
1869           continue;
1870         }
1871         LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
1872             ? opportunity->node_b->GetAlias()->GetInterval()
1873             : opportunity->node_a->GetAlias()->GetInterval();
1874         if (other_interval->HasRegister()) {
1875           size_t coalesce_register = other_interval->GetRegister();
1876           if (interval->HasHighInterval()) {
1877             if (!conflict_mask[coalesce_register] &&
1878                 !conflict_mask[coalesce_register + 1] &&
1879                 RegisterIsAligned(coalesce_register)) {
1880               reg = coalesce_register;
1881               break;
1882             }
1883           } else if (!conflict_mask[coalesce_register]) {
1884             reg = coalesce_register;
1885             break;
1886           }
1887         }
1888       }
1889     }
1890 
1891     if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
1892       // Assign register.
1893       DCHECK(!interval->HasRegister());
1894       interval->SetRegister(reg);
1895       colored_intervals.push_back(interval);
1896       if (interval->HasHighInterval()) {
1897         DCHECK(!interval->GetHighInterval()->HasRegister());
1898         interval->GetHighInterval()->SetRegister(reg + 1);
1899         colored_intervals.push_back(interval->GetHighInterval());
1900       }
1901     } else if (interval->RequiresRegister()) {
1902       // The interference graph is too dense to color. Make it sparser by
1903       // splitting this live interval.
1904       successful = false;
1905       register_allocator_->SplitAtRegisterUses(interval);
1906       // We continue coloring, because there may be additional intervals that cannot
1907       // be colored, and that we should split.
1908     } else {
1909       // Spill.
1910       node->SetNeedsSpillSlot();
1911     }
1912   }
1913 
1914   // If unsuccessful, reset all register assignments.
1915   if (!successful) {
1916     for (LiveInterval* interval : colored_intervals) {
1917       interval->ClearRegister();
1918     }
1919   }
1920 
1921   return successful;
1922 }
1923 
AllocateSpillSlots(ArrayRef<InterferenceNode * const> nodes)1924 void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
1925   // The register allocation resolver will organize the stack based on value type,
1926   // so we assign stack slots for each value type separately.
1927   ScopedArenaAllocator allocator(allocator_->GetArenaStack());
1928   ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
1929   ScopedArenaVector<LiveInterval*> double_intervals(adapter);
1930   ScopedArenaVector<LiveInterval*> long_intervals(adapter);
1931   ScopedArenaVector<LiveInterval*> float_intervals(adapter);
1932   ScopedArenaVector<LiveInterval*> int_intervals(adapter);
1933 
1934   // The set of parent intervals already handled.
1935   ScopedArenaSet<LiveInterval*> seen(adapter);
1936 
1937   // Find nodes that need spill slots.
1938   for (InterferenceNode* node : nodes) {
1939     if (!node->NeedsSpillSlot()) {
1940       continue;
1941     }
1942 
1943     LiveInterval* parent = node->GetInterval()->GetParent();
1944     if (seen.find(parent) != seen.end()) {
1945       // We've already handled this interval.
1946       // This can happen if multiple siblings of the same interval request a stack slot.
1947       continue;
1948     }
1949     seen.insert(parent);
1950 
1951     HInstruction* defined_by = parent->GetDefinedBy();
1952     if (parent->HasSpillSlot()) {
1953       // We already have a spill slot for this value that we can reuse.
1954     } else if (defined_by->IsParameterValue()) {
1955       // Parameters already have a stack slot.
1956       parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
1957     } else if (defined_by->IsCurrentMethod()) {
1958       // The current method is always at stack slot 0.
1959       parent->SetSpillSlot(0);
1960     } else if (defined_by->IsConstant()) {
1961       // Constants don't need a spill slot.
1962     } else {
1963       // We need to find a spill slot for this interval. Place it in the correct
1964       // worklist to be processed later.
1965       switch (node->GetInterval()->GetType()) {
1966         case DataType::Type::kFloat64:
1967           double_intervals.push_back(parent);
1968           break;
1969         case DataType::Type::kInt64:
1970           long_intervals.push_back(parent);
1971           break;
1972         case DataType::Type::kFloat32:
1973           float_intervals.push_back(parent);
1974           break;
1975         case DataType::Type::kReference:
1976         case DataType::Type::kInt32:
1977         case DataType::Type::kUint16:
1978         case DataType::Type::kUint8:
1979         case DataType::Type::kInt8:
1980         case DataType::Type::kBool:
1981         case DataType::Type::kInt16:
1982           int_intervals.push_back(parent);
1983           break;
1984         case DataType::Type::kUint32:
1985         case DataType::Type::kUint64:
1986         case DataType::Type::kVoid:
1987           LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
1988           UNREACHABLE();
1989       }
1990     }
1991   }
1992 
1993   // Color spill slots for each value type.
1994   ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
1995   ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
1996   ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
1997   ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
1998 }
1999 
ColorSpillSlots(ArrayRef<LiveInterval * const> intervals,size_t * num_stack_slots_used)2000 void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
2001                                                   /* out */ size_t* num_stack_slots_used) {
2002   // We cannot use the original interference graph here because spill slots are assigned to
2003   // all of the siblings of an interval, whereas an interference node represents only a single
2004   // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
2005   // by position, and assigning the lowest spill slot available when we encounter an interval
2006   // beginning. We ignore lifetime holes for simplicity.
2007   ScopedArenaAllocator allocator(allocator_->GetArenaStack());
2008   ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
2009       allocator.Adapter(kArenaAllocRegisterAllocator));
2010 
2011   for (LiveInterval* parent_interval : intervals) {
2012     DCHECK(parent_interval->IsParent());
2013     DCHECK(!parent_interval->HasSpillSlot());
2014     size_t start = parent_interval->GetStart();
2015     size_t end = parent_interval->GetLastSibling()->GetEnd();
2016     DCHECK_LT(start, end);
2017     interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
2018     interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
2019   }
2020 
2021   // Sort by position.
2022   // We explicitly ignore the third entry of each tuple (the interval pointer) in order
2023   // to maintain determinism.
2024   std::sort(interval_endpoints.begin(), interval_endpoints.end(),
2025             [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
2026                 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
2027     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
2028          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
2029   });
2030 
2031   ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
2032   for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
2033     // Extract information from the current tuple.
2034     LiveInterval* parent_interval;
2035     bool is_interval_beginning;
2036     size_t position;
2037     std::tie(position, is_interval_beginning, parent_interval) = *it;
2038     size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
2039 
2040     if (is_interval_beginning) {
2041       DCHECK(!parent_interval->HasSpillSlot());
2042       DCHECK_EQ(position, parent_interval->GetStart());
2043 
2044       // Find first available free stack slot(s).
2045       size_t slot = 0;
2046       for (; ; ++slot) {
2047         bool found = true;
2048         for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2049           if (taken.IsBitSet(s)) {
2050             found = false;
2051             break;  // failure
2052           }
2053         }
2054         if (found) {
2055           break;  // success
2056         }
2057       }
2058 
2059       parent_interval->SetSpillSlot(slot);
2060 
2061       *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
2062       if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
2063         // The parallel move resolver requires that there be an even number of spill slots
2064         // allocated for pair value types.
2065         ++(*num_stack_slots_used);
2066       }
2067 
2068       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2069         taken.SetBit(s);
2070       }
2071     } else {
2072       DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
2073       DCHECK(parent_interval->HasSpillSlot());
2074 
2075       // Free up the stack slot(s) used by this interval.
2076       size_t slot = parent_interval->GetSpillSlot();
2077       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2078         DCHECK(taken.IsBitSet(s));
2079         taken.ClearBit(s);
2080       }
2081     }
2082   }
2083   DCHECK_EQ(taken.NumSetBits(), 0u);
2084 }
2085 
2086 }  // namespace art
2087