• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "register_allocator_graph_color.h"
18 
19 #include "code_generator.h"
20 #include "linear_order.h"
21 #include "register_allocation_resolver.h"
22 #include "ssa_liveness_analysis.h"
23 #include "thread-inl.h"
24 
25 namespace art {
26 
27 // Highest number of registers that we support for any platform. This can be used for std::bitset,
28 // for example, which needs to know its size at compile time.
29 static constexpr size_t kMaxNumRegs = 32;
30 
31 // The maximum number of graph coloring attempts before triggering a DCHECK.
32 // This is meant to catch changes to the graph coloring algorithm that undermine its forward
33 // progress guarantees. Forward progress for the algorithm means splitting live intervals on
34 // every graph coloring attempt so that eventually the interference graph will be sparse enough
35 // to color. The main threat to forward progress is trying to split short intervals which cannot be
36 // split further; this could cause infinite looping because the interference graph would never
37 // change. This is avoided by prioritizing short intervals before long ones, so that long
38 // intervals are split when coloring fails.
39 static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
40 
41 // We always want to avoid spilling inside loops.
42 static constexpr size_t kLoopSpillWeightMultiplier = 10;
43 
44 // If we avoid moves in single jump blocks, we can avoid jumps to jumps.
45 static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
46 
47 // We avoid moves in blocks that dominate the exit block, since these blocks will
48 // be executed on every path through the method.
49 static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
50 
51 enum class CoalesceKind {
52   kAdjacentSibling,       // Prevents moves at interval split points.
53   kFixedOutputSibling,    // Prevents moves from a fixed output location.
54   kFixedInput,            // Prevents moves into a fixed input location.
55   kNonlinearControlFlow,  // Prevents moves between blocks.
56   kPhi,                   // Prevents phi resolution moves.
57   kFirstInput,            // Prevents a single input move.
58   kAnyInput,              // May lead to better instruction selection / smaller encodings.
59 };
60 
operator <<(std::ostream & os,const CoalesceKind & kind)61 std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
62   return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
63 }
64 
LoopDepthAt(HBasicBlock * block)65 static size_t LoopDepthAt(HBasicBlock* block) {
66   HLoopInformation* loop_info = block->GetLoopInformation();
67   size_t depth = 0;
68   while (loop_info != nullptr) {
69     ++depth;
70     loop_info = loop_info->GetPreHeader()->GetLoopInformation();
71   }
72   return depth;
73 }
74 
75 // Return the runtime cost of inserting a move instruction at the specified location.
CostForMoveAt(size_t position,const SsaLivenessAnalysis & liveness)76 static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
77   HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
78   DCHECK(block != nullptr);
79   size_t cost = 1;
80   if (block->IsSingleJump()) {
81     cost *= kSingleJumpBlockWeightMultiplier;
82   }
83   if (block->Dominates(block->GetGraph()->GetExitBlock())) {
84     cost *= kDominatesExitBlockWeightMultiplier;
85   }
86   for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
87     cost *= kLoopSpillWeightMultiplier;
88   }
89   return cost;
90 }
91 
92 // In general, we estimate coalesce priority by whether it will definitely avoid a move,
93 // and by how likely it is to create an interference graph that's harder to color.
ComputeCoalescePriority(CoalesceKind kind,size_t position,const SsaLivenessAnalysis & liveness)94 static size_t ComputeCoalescePriority(CoalesceKind kind,
95                                       size_t position,
96                                       const SsaLivenessAnalysis& liveness) {
97   if (kind == CoalesceKind::kAnyInput) {
98     // This type of coalescing can affect instruction selection, but not moves, so we
99     // give it the lowest priority.
100     return 0;
101   } else {
102     return CostForMoveAt(position, liveness);
103   }
104 }
105 
106 enum class CoalesceStage {
107   kWorklist,  // Currently in the iterative coalescing worklist.
108   kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
109   kInactive,  // No longer considered until last-chance coalescing.
110   kDefunct,   // Either the two nodes interfere, or have already been coalesced.
111 };
112 
operator <<(std::ostream & os,const CoalesceStage & stage)113 std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
114   return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
115 }
116 
117 // Represents a coalesce opportunity between two nodes.
118 struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
CoalesceOpportunityart::CoalesceOpportunity119   CoalesceOpportunity(InterferenceNode* a,
120                       InterferenceNode* b,
121                       CoalesceKind kind,
122                       size_t position,
123                       const SsaLivenessAnalysis& liveness)
124         : node_a(a),
125           node_b(b),
126           stage(CoalesceStage::kWorklist),
127           priority(ComputeCoalescePriority(kind, position, liveness)) {}
128 
129   // Compare two coalesce opportunities based on their priority.
130   // Return true if lhs has a lower priority than that of rhs.
CmpPriorityart::CoalesceOpportunity131   static bool CmpPriority(const CoalesceOpportunity* lhs,
132                           const CoalesceOpportunity* rhs) {
133     return lhs->priority < rhs->priority;
134   }
135 
136   InterferenceNode* const node_a;
137   InterferenceNode* const node_b;
138 
139   // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
140   // and whether it should still be considered.
141   CoalesceStage stage;
142 
143   // The priority of this coalesce opportunity, based on heuristics.
144   const size_t priority;
145 };
146 
147 enum class NodeStage {
148   kInitial,           // Uninitialized.
149   kPrecolored,        // Marks fixed nodes.
150   kSafepoint,         // Marks safepoint nodes.
151   kPrunable,          // Marks uncolored nodes in the interference graph.
152   kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
153   kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
154   kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
155   kPruned             // Marks nodes already pruned from the interference graph.
156 };
157 
operator <<(std::ostream & os,const NodeStage & stage)158 std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
159   return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
160 }
161 
162 // Returns the estimated cost of spilling a particular live interval.
ComputeSpillWeight(LiveInterval * interval,const SsaLivenessAnalysis & liveness)163 static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
164   if (interval->HasRegister()) {
165     // Intervals with a fixed register cannot be spilled.
166     return std::numeric_limits<float>::min();
167   }
168 
169   size_t length = interval->GetLength();
170   if (length == 1) {
171     // Tiny intervals should have maximum priority, since they cannot be split any further.
172     return std::numeric_limits<float>::max();
173   }
174 
175   size_t use_weight = 0;
176   if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
177     // Cost for spilling at a register definition point.
178     use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
179   }
180 
181   UsePosition* use = interval->GetFirstUse();
182   while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
183     // Skip uses before the start of this live interval.
184     use = use->GetNext();
185   }
186 
187   while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
188     if (use->GetUser() != nullptr && use->RequiresRegister()) {
189       // Cost for spilling at a register use point.
190       use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
191     }
192     use = use->GetNext();
193   }
194 
195   // We divide by the length of the interval because we want to prioritize
196   // short intervals; we do not benefit much if we split them further.
197   return static_cast<float>(use_weight) / static_cast<float>(length);
198 }
199 
200 // Interference nodes make up the interference graph, which is the primary data structure in
201 // graph coloring register allocation. Each node represents a single live interval, and contains
202 // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
203 // pre-colored nodes never contain outgoing edges (only incoming ones).
204 //
205 // As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
206 // but outgoing edges remain in order to later color the node based on the colors of its neighbors.
207 //
208 // Note that a pair interval is represented by a single node in the interference graph, which
209 // essentially requires two colors. One consequence of this is that the degree of a node is not
210 // necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
211 // number of colors with which a node could interfere. We model this by giving edges different
212 // weights (1 or 2) to control how much it increases the degree of adjacent nodes.
213 // For example, the edge between two single nodes will have weight 1. On the other hand,
214 // the edge between a single node and a pair node will have weight 2. This is because the pair
215 // node could block up to two colors for the single node, and because the single node could
216 // block an entire two-register aligned slot for the pair node.
217 // The degree is defined this way because we use it to decide whether a node is guaranteed a color,
218 // and thus whether it is safe to prune it from the interference graph early on.
219 class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
220  public:
InterferenceNode(ArenaAllocator * allocator,LiveInterval * interval,const SsaLivenessAnalysis & liveness)221   InterferenceNode(ArenaAllocator* allocator,
222                    LiveInterval* interval,
223                    const SsaLivenessAnalysis& liveness)
224         : stage(NodeStage::kInitial),
225           interval_(interval),
226           adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
227           coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
228           out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
229           alias_(this),
230           spill_weight_(ComputeSpillWeight(interval, liveness)),
231           requires_color_(interval->RequiresRegister()),
232           needs_spill_slot_(false) {
233     DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
234   }
235 
AddInterference(InterferenceNode * other,bool guaranteed_not_interfering_yet)236   void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
237     DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
238     DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
239     DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
240     DCHECK_NE(stage, NodeStage::kPruned);
241     DCHECK_NE(other->stage, NodeStage::kPruned);
242     if (guaranteed_not_interfering_yet) {
243       DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
244              == adjacent_nodes_.end());
245       adjacent_nodes_.push_back(other);
246       out_degree_ += EdgeWeightWith(other);
247     } else {
248       auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
249       if (it == adjacent_nodes_.end()) {
250         adjacent_nodes_.push_back(other);
251         out_degree_ += EdgeWeightWith(other);
252       }
253     }
254   }
255 
RemoveInterference(InterferenceNode * other)256   void RemoveInterference(InterferenceNode* other) {
257     DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
258     DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
259     auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
260     if (it != adjacent_nodes_.end()) {
261       adjacent_nodes_.erase(it);
262       out_degree_ -= EdgeWeightWith(other);
263     }
264   }
265 
ContainsInterference(InterferenceNode * other) const266   bool ContainsInterference(InterferenceNode* other) const {
267     DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
268     DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
269     auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
270     return it != adjacent_nodes_.end();
271   }
272 
GetInterval() const273   LiveInterval* GetInterval() const {
274     return interval_;
275   }
276 
GetAdjacentNodes() const277   const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
278     return adjacent_nodes_;
279   }
280 
GetOutDegree() const281   size_t GetOutDegree() const {
282     // Pre-colored nodes have infinite degree.
283     DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
284     return out_degree_;
285   }
286 
AddCoalesceOpportunity(CoalesceOpportunity * opportunity)287   void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
288     coalesce_opportunities_.push_back(opportunity);
289   }
290 
ClearCoalesceOpportunities()291   void ClearCoalesceOpportunities() {
292     coalesce_opportunities_.clear();
293   }
294 
IsMoveRelated() const295   bool IsMoveRelated() const {
296     for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
297       if (opportunity->stage == CoalesceStage::kWorklist ||
298           opportunity->stage == CoalesceStage::kActive) {
299         return true;
300       }
301     }
302     return false;
303   }
304 
305   // Return whether this node already has a color.
306   // Used to find fixed nodes in the interference graph before coloring.
IsPrecolored() const307   bool IsPrecolored() const {
308     return interval_->HasRegister();
309   }
310 
IsPair() const311   bool IsPair() const {
312     return interval_->HasHighInterval();
313   }
314 
SetAlias(InterferenceNode * rep)315   void SetAlias(InterferenceNode* rep) {
316     DCHECK_NE(rep->stage, NodeStage::kPruned);
317     DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
318     alias_ = rep;
319   }
320 
GetAlias()321   InterferenceNode* GetAlias() {
322     if (alias_ != this) {
323       // Recurse in order to flatten tree of alias pointers.
324       alias_ = alias_->GetAlias();
325     }
326     return alias_;
327   }
328 
GetCoalesceOpportunities() const329   const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
330     return coalesce_opportunities_;
331   }
332 
GetSpillWeight() const333   float GetSpillWeight() const {
334     return spill_weight_;
335   }
336 
RequiresColor() const337   bool RequiresColor() const {
338     return requires_color_;
339   }
340 
341   // We give extra weight to edges adjacent to pair nodes. See the general comment on the
342   // interference graph above.
EdgeWeightWith(const InterferenceNode * other) const343   size_t EdgeWeightWith(const InterferenceNode* other) const {
344     return (IsPair() || other->IsPair()) ? 2 : 1;
345   }
346 
NeedsSpillSlot() const347   bool NeedsSpillSlot() const {
348     return needs_spill_slot_;
349   }
350 
SetNeedsSpillSlot()351   void SetNeedsSpillSlot() {
352     needs_spill_slot_ = true;
353   }
354 
355   // The current stage of this node, indicating which worklist it belongs to.
356   NodeStage stage;
357 
358  private:
359   // The live interval that this node represents.
360   LiveInterval* const interval_;
361 
362   // All nodes interfering with this one.
363   // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
364   // set sizes that we encounter. Using a vector leads to much better performance.
365   ArenaVector<InterferenceNode*> adjacent_nodes_;
366 
367   // Interference nodes that this node should be coalesced with to reduce moves.
368   ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
369 
370   // The maximum number of colors with which this node could interfere. This could be more than
371   // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
372   // We use "out" degree because incoming edges come from nodes already pruned from the graph,
373   // and do not affect the coloring of this node.
374   // Pre-colored nodes are treated as having infinite degree.
375   size_t out_degree_;
376 
377   // The node representing this node in the interference graph.
378   // Initially set to `this`, and only changed if this node is coalesced into another.
379   InterferenceNode* alias_;
380 
381   // The cost of splitting and spilling this interval to the stack.
382   // Nodes with a higher spill weight should be prioritized when assigning registers.
383   // This is essentially based on use density and location; short intervals with many uses inside
384   // deeply nested loops have a high spill weight.
385   const float spill_weight_;
386 
387   const bool requires_color_;
388 
389   bool needs_spill_slot_;
390 
391   DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
392 };
393 
394 // The order in which we color nodes is important. To guarantee forward progress,
395 // we prioritize intervals that require registers, and after that we prioritize
396 // short intervals. That way, if we fail to color a node, it either won't require a
397 // register, or it will be a long interval that can be split in order to make the
398 // interference graph sparser.
399 // To improve code quality, we prioritize intervals used frequently in deeply nested loops.
400 // (This metric is secondary to the forward progress requirements above.)
401 // TODO: May also want to consider:
402 // - Constants (since they can be rematerialized)
403 // - Allocated spill slots
HasGreaterNodePriority(const InterferenceNode * lhs,const InterferenceNode * rhs)404 static bool HasGreaterNodePriority(const InterferenceNode* lhs,
405                                    const InterferenceNode* rhs) {
406   // (1) Prioritize the node that requires a color.
407   if (lhs->RequiresColor() != rhs->RequiresColor()) {
408     return lhs->RequiresColor();
409   }
410 
411   // (2) Prioritize the interval that has a higher spill weight.
412   return lhs->GetSpillWeight() > rhs->GetSpillWeight();
413 }
414 
415 // A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
416 // and provides methods for each phase of the attempt.
417 class ColoringIteration {
418  public:
ColoringIteration(RegisterAllocatorGraphColor * register_allocator,ArenaAllocator * allocator,bool processing_core_regs,size_t num_regs)419   ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
420                     ArenaAllocator* allocator,
421                     bool processing_core_regs,
422                     size_t num_regs)
423         : register_allocator_(register_allocator),
424           allocator_(allocator),
425           processing_core_regs_(processing_core_regs),
426           num_regs_(num_regs),
427           interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
428           prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
429           pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
430           simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
431           freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
432           spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
433           coalesce_worklist_(CoalesceOpportunity::CmpPriority,
434                              allocator->Adapter(kArenaAllocRegisterAllocator)) {}
435 
436   // Use the intervals collected from instructions to construct an
437   // interference graph mapping intervals to adjacency lists.
438   // Also, collect synthesized safepoint nodes, used to keep
439   // track of live intervals across safepoints.
440   // TODO: Should build safepoints elsewhere.
441   void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
442                               const ArenaVector<InterferenceNode*>& physical_nodes);
443 
444   // Add coalesce opportunities to interference nodes.
445   void FindCoalesceOpportunities();
446 
447   // Prune nodes from the interference graph to be colored later. Build
448   // a stack (pruned_nodes) containing these intervals in an order determined
449   // by various heuristics.
450   void PruneInterferenceGraph();
451 
452   // Process pruned_intervals_ to color the interference graph, spilling when
453   // necessary. Returns true if successful. Else, some intervals have been
454   // split, and the interference graph should be rebuilt for another attempt.
455   bool ColorInterferenceGraph();
456 
457   // Return prunable nodes.
458   // The register allocator will need to access prunable nodes after coloring
459   // in order to tell the code generator which registers have been assigned.
GetPrunableNodes() const460   const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
461     return prunable_nodes_;
462   }
463 
464  private:
465   // Create a coalesce opportunity between two nodes.
466   void CreateCoalesceOpportunity(InterferenceNode* a,
467                                  InterferenceNode* b,
468                                  CoalesceKind kind,
469                                  size_t position);
470 
471   // Add an edge in the interference graph, if valid.
472   // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
473   // when possible.
474   void AddPotentialInterference(InterferenceNode* from,
475                                 InterferenceNode* to,
476                                 bool guaranteed_not_interfering_yet,
477                                 bool both_directions = true);
478 
479   // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
480   // may be pruned from the interference graph.
481   void FreezeMoves(InterferenceNode* node);
482 
483   // Prune a node from the interference graph, updating worklists if necessary.
484   void PruneNode(InterferenceNode* node);
485 
486   // Add coalesce opportunities associated with this node to the coalesce worklist.
487   void EnableCoalesceOpportunities(InterferenceNode* node);
488 
489   // If needed, from `node` from the freeze worklist to the simplify worklist.
490   void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
491 
492   // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
493   bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
494 
495   // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
496   bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
497 
498   void Coalesce(CoalesceOpportunity* opportunity);
499 
500   // Merge `from` into `into` in the interference graph.
501   void Combine(InterferenceNode* from, InterferenceNode* into);
502 
503   // A reference to the register allocator instance,
504   // needed to split intervals and assign spill slots.
505   RegisterAllocatorGraphColor* register_allocator_;
506 
507   // An arena allocator used for a single graph coloring attempt.
508   ArenaAllocator* allocator_;
509 
510   const bool processing_core_regs_;
511 
512   const size_t num_regs_;
513 
514   // A map from live intervals to interference nodes.
515   ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
516 
517   // Uncolored nodes that should be pruned from the interference graph.
518   ArenaVector<InterferenceNode*> prunable_nodes_;
519 
520   // A stack of nodes pruned from the interference graph, waiting to be pruned.
521   ArenaStdStack<InterferenceNode*> pruned_nodes_;
522 
523   // A queue containing low degree, non-move-related nodes that can pruned immediately.
524   ArenaDeque<InterferenceNode*> simplify_worklist_;
525 
526   // A queue containing low degree, move-related nodes.
527   ArenaDeque<InterferenceNode*> freeze_worklist_;
528 
529   // A queue containing high degree nodes.
530   // If we have to prune from the spill worklist, we cannot guarantee
531   // the pruned node a color, so we order the worklist by priority.
532   ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
533 
534   // A queue containing coalesce opportunities.
535   // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
536   // inside of loops) are more important than others.
537   ArenaPriorityQueue<CoalesceOpportunity*,
538                      decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
539 
540   DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
541 };
542 
IsCoreInterval(LiveInterval * interval)543 static bool IsCoreInterval(LiveInterval* interval) {
544   return !Primitive::IsFloatingPointType(interval->GetType());
545 }
546 
ComputeReservedArtMethodSlots(const CodeGenerator & codegen)547 static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
548   return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
549 }
550 
RegisterAllocatorGraphColor(ArenaAllocator * allocator,CodeGenerator * codegen,const SsaLivenessAnalysis & liveness,bool iterative_move_coalescing)551 RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
552                                                          CodeGenerator* codegen,
553                                                          const SsaLivenessAnalysis& liveness,
554                                                          bool iterative_move_coalescing)
555       : RegisterAllocator(allocator, codegen, liveness),
556         iterative_move_coalescing_(iterative_move_coalescing),
557         core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
558         fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
559         temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
560         safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
561         physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
562         physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
563         num_int_spill_slots_(0),
564         num_double_spill_slots_(0),
565         num_float_spill_slots_(0),
566         num_long_spill_slots_(0),
567         catch_phi_spill_slot_counter_(0),
568         reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
569         reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
570   // Before we ask for blocked registers, set them up in the code generator.
571   codegen->SetupBlockedRegisters();
572 
573   // Initialize physical core register live intervals and blocked registers.
574   // This includes globally blocked registers, such as the stack pointer.
575   physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
576   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
577     LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
578     physical_core_nodes_[i] =
579         new (allocator_) InterferenceNode(allocator_, interval, liveness);
580     physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
581     core_intervals_.push_back(interval);
582     if (codegen_->IsBlockedCoreRegister(i)) {
583       interval->AddRange(0, liveness.GetMaxLifetimePosition());
584     }
585   }
586   // Initialize physical floating point register live intervals and blocked registers.
587   physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
588   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
589     LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
590     physical_fp_nodes_[i] =
591         new (allocator_) InterferenceNode(allocator_, interval, liveness);
592     physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
593     fp_intervals_.push_back(interval);
594     if (codegen_->IsBlockedFloatingPointRegister(i)) {
595       interval->AddRange(0, liveness.GetMaxLifetimePosition());
596     }
597   }
598 }
599 
AllocateRegisters()600 void RegisterAllocatorGraphColor::AllocateRegisters() {
601   // (1) Collect and prepare live intervals.
602   ProcessInstructions();
603 
604   for (bool processing_core_regs : {true, false}) {
605     ArenaVector<LiveInterval*>& intervals = processing_core_regs
606         ? core_intervals_
607         : fp_intervals_;
608     size_t num_registers = processing_core_regs
609         ? codegen_->GetNumberOfCoreRegisters()
610         : codegen_->GetNumberOfFloatingPointRegisters();
611 
612     size_t attempt = 0;
613     while (true) {
614       ++attempt;
615       DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
616           << "Exceeded debug max graph coloring register allocation attempts. "
617           << "This could indicate that the register allocator is not making forward progress, "
618           << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
619           << "should be prioritized over long ones, because they cannot be split further.)";
620 
621       // Many data structures are cleared between graph coloring attempts, so we reduce
622       // total memory usage by using a new arena allocator for each attempt.
623       ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
624       ColoringIteration iteration(this,
625                                   &coloring_attempt_allocator,
626                                   processing_core_regs,
627                                   num_registers);
628 
629       // (2) Build the interference graph. Also gather safepoints.
630       ArenaVector<InterferenceNode*> safepoints(
631           coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
632       ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
633           ? physical_core_nodes_
634           : physical_fp_nodes_;
635       iteration.BuildInterferenceGraph(intervals, physical_nodes);
636 
637       // (3) Add coalesce opportunities.
638       //     If we have tried coloring the graph a suspiciously high number of times, give
639       //     up on move coalescing, just in case the coalescing heuristics are not conservative.
640       //     (This situation will be caught if DCHECKs are turned on.)
641       if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
642         iteration.FindCoalesceOpportunities();
643       }
644 
645       // (4) Prune all uncolored nodes from interference graph.
646       iteration.PruneInterferenceGraph();
647 
648       // (5) Color pruned nodes based on interferences.
649       bool successful = iteration.ColorInterferenceGraph();
650 
651       // We manually clear coalesce opportunities for physical nodes,
652       // since they persist across coloring attempts.
653       for (InterferenceNode* node : physical_core_nodes_) {
654         node->ClearCoalesceOpportunities();
655       }
656       for (InterferenceNode* node : physical_fp_nodes_) {
657         node->ClearCoalesceOpportunities();
658       }
659 
660       if (successful) {
661         // Assign spill slots.
662         AllocateSpillSlots(iteration.GetPrunableNodes());
663 
664         // Tell the code generator which registers were allocated.
665         // We only look at prunable_nodes because we already told the code generator about
666         // fixed intervals while processing instructions. We also ignore the fixed intervals
667         // placed at the top of catch blocks.
668         for (InterferenceNode* node : iteration.GetPrunableNodes()) {
669           LiveInterval* interval = node->GetInterval();
670           if (interval->HasRegister()) {
671             Location low_reg = processing_core_regs
672                 ? Location::RegisterLocation(interval->GetRegister())
673                 : Location::FpuRegisterLocation(interval->GetRegister());
674             codegen_->AddAllocatedRegister(low_reg);
675             if (interval->HasHighInterval()) {
676               LiveInterval* high = interval->GetHighInterval();
677               DCHECK(high->HasRegister());
678               Location high_reg = processing_core_regs
679                   ? Location::RegisterLocation(high->GetRegister())
680                   : Location::FpuRegisterLocation(high->GetRegister());
681               codegen_->AddAllocatedRegister(high_reg);
682             }
683           } else {
684             DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
685           }
686         }
687 
688         break;
689       }
690     }  // while unsuccessful
691   }  // for processing_core_instructions
692 
693   // (6) Resolve locations and deconstruct SSA form.
694   RegisterAllocationResolver(allocator_, codegen_, liveness_)
695       .Resolve(ArrayRef<HInstruction* const>(safepoints_),
696                reserved_art_method_slots_ + reserved_out_slots_,
697                num_int_spill_slots_,
698                num_long_spill_slots_,
699                num_float_spill_slots_,
700                num_double_spill_slots_,
701                catch_phi_spill_slot_counter_,
702                temp_intervals_);
703 
704   if (kIsDebugBuild) {
705     Validate(/*log_fatal_on_failure*/ true);
706   }
707 }
708 
Validate(bool log_fatal_on_failure)709 bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
710   for (bool processing_core_regs : {true, false}) {
711     ArenaVector<LiveInterval*> intervals(
712         allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
713     for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
714       HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
715       LiveInterval* interval = instruction->GetLiveInterval();
716       if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
717         intervals.push_back(instruction->GetLiveInterval());
718       }
719     }
720 
721     ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
722         ? physical_core_nodes_
723         : physical_fp_nodes_;
724     for (InterferenceNode* fixed : physical_nodes) {
725       LiveInterval* interval = fixed->GetInterval();
726       if (interval->GetFirstRange() != nullptr) {
727         // Ideally we would check fixed ranges as well, but currently there are times when
728         // two fixed intervals for the same register will overlap. For example, a fixed input
729         // and a fixed output may sometimes share the same register, in which there will be two
730         // fixed intervals for the same place.
731       }
732     }
733 
734     for (LiveInterval* temp : temp_intervals_) {
735       if (IsCoreInterval(temp) == processing_core_regs) {
736         intervals.push_back(temp);
737       }
738     }
739 
740     size_t spill_slots = num_int_spill_slots_
741                        + num_long_spill_slots_
742                        + num_float_spill_slots_
743                        + num_double_spill_slots_
744                        + catch_phi_spill_slot_counter_;
745     bool ok = ValidateIntervals(intervals,
746                                 spill_slots,
747                                 reserved_art_method_slots_ + reserved_out_slots_,
748                                 *codegen_,
749                                 allocator_,
750                                 processing_core_regs,
751                                 log_fatal_on_failure);
752     if (!ok) {
753       return false;
754     }
755   }  // for processing_core_regs
756 
757   return true;
758 }
759 
ProcessInstructions()760 void RegisterAllocatorGraphColor::ProcessInstructions() {
761   for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
762     // Note that we currently depend on this ordering, since some helper
763     // code is designed for linear scan register allocation.
764     for (HBackwardInstructionIterator instr_it(block->GetInstructions());
765           !instr_it.Done();
766           instr_it.Advance()) {
767       ProcessInstruction(instr_it.Current());
768     }
769 
770     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
771       ProcessInstruction(phi_it.Current());
772     }
773 
774     if (block->IsCatchBlock()
775         || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
776       // By blocking all registers at the top of each catch block or irreducible loop, we force
777       // intervals belonging to the live-in set of the catch/header block to be spilled.
778       // TODO(ngeoffray): Phis in this block could be allocated in register.
779       size_t position = block->GetLifetimeStart();
780       BlockRegisters(position, position + 1);
781     }
782   }
783 }
784 
ProcessInstruction(HInstruction * instruction)785 void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
786   LocationSummary* locations = instruction->GetLocations();
787   if (locations == nullptr) {
788     return;
789   }
790   if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
791     // We do this here because we do not want the suspend check to artificially
792     // create live registers.
793     DCHECK(instruction->IsSuspendCheckEntry());
794     DCHECK_EQ(locations->GetTempCount(), 0u);
795     instruction->GetBlock()->RemoveInstruction(instruction);
796     return;
797   }
798 
799   CheckForTempLiveIntervals(instruction);
800   CheckForSafepoint(instruction);
801   if (instruction->GetLocations()->WillCall()) {
802     // If a call will happen, create fixed intervals for caller-save registers.
803     // TODO: Note that it may be beneficial to later split intervals at this point,
804     //       so that we allow last-minute moves from a caller-save register
805     //       to a callee-save register.
806     BlockRegisters(instruction->GetLifetimePosition(),
807                    instruction->GetLifetimePosition() + 1,
808                    /*caller_save_only*/ true);
809   }
810   CheckForFixedInputs(instruction);
811 
812   LiveInterval* interval = instruction->GetLiveInterval();
813   if (interval == nullptr) {
814     // Instructions lacking a valid output location do not have a live interval.
815     DCHECK(!locations->Out().IsValid());
816     return;
817   }
818 
819   // Low intervals act as representatives for their corresponding high interval.
820   DCHECK(!interval->IsHighInterval());
821   if (codegen_->NeedsTwoRegisters(interval->GetType())) {
822     interval->AddHighInterval();
823   }
824   AddSafepointsFor(instruction);
825   CheckForFixedOutput(instruction);
826   AllocateSpillSlotForCatchPhi(instruction);
827 
828   ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
829       ? core_intervals_
830       : fp_intervals_;
831   if (interval->HasSpillSlot() || instruction->IsConstant()) {
832     // Note that if an interval already has a spill slot, then its value currently resides
833     // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
834     // register use. This is also true for constants, which can be materialized at any point.
835     size_t first_register_use = interval->FirstRegisterUse();
836     if (first_register_use != kNoLifetime) {
837       LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
838       intervals.push_back(split);
839     } else {
840       // We won't allocate a register for this value.
841     }
842   } else {
843     intervals.push_back(interval);
844   }
845 }
846 
CheckForFixedInputs(HInstruction * instruction)847 void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
848   // We simply block physical registers where necessary.
849   // TODO: Ideally we would coalesce the physical register with the register
850   //       allocated to the input value, but this can be tricky if, e.g., there
851   //       could be multiple physical register uses of the same value at the
852   //       same instruction. Furthermore, there's currently no distinction between
853   //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
854   //       may not be clobbered).
855   LocationSummary* locations = instruction->GetLocations();
856   size_t position = instruction->GetLifetimePosition();
857   for (size_t i = 0; i < locations->GetInputCount(); ++i) {
858     Location input = locations->InAt(i);
859     if (input.IsRegister() || input.IsFpuRegister()) {
860       BlockRegister(input, position, position + 1);
861       codegen_->AddAllocatedRegister(input);
862     } else if (input.IsPair()) {
863       BlockRegister(input.ToLow(), position, position + 1);
864       BlockRegister(input.ToHigh(), position, position + 1);
865       codegen_->AddAllocatedRegister(input.ToLow());
866       codegen_->AddAllocatedRegister(input.ToHigh());
867     }
868   }
869 }
870 
CheckForFixedOutput(HInstruction * instruction)871 void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
872   // If an instruction has a fixed output location, we give the live interval a register and then
873   // proactively split it just after the definition point to avoid creating too many interferences
874   // with a fixed node.
875   LiveInterval* interval = instruction->GetLiveInterval();
876   Location out = interval->GetDefinedBy()->GetLocations()->Out();
877   size_t position = instruction->GetLifetimePosition();
878   DCHECK_GE(interval->GetEnd() - position, 2u);
879 
880   if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
881     out = instruction->GetLocations()->InAt(0);
882   }
883 
884   if (out.IsRegister() || out.IsFpuRegister()) {
885     interval->SetRegister(out.reg());
886     codegen_->AddAllocatedRegister(out);
887     Split(interval, position + 1);
888   } else if (out.IsPair()) {
889     interval->SetRegister(out.low());
890     interval->GetHighInterval()->SetRegister(out.high());
891     codegen_->AddAllocatedRegister(out.ToLow());
892     codegen_->AddAllocatedRegister(out.ToHigh());
893     Split(interval, position + 1);
894   } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
895     interval->SetSpillSlot(out.GetStackIndex());
896   } else {
897     DCHECK(out.IsUnallocated() || out.IsConstant());
898   }
899 }
900 
AddSafepointsFor(HInstruction * instruction)901 void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
902   LiveInterval* interval = instruction->GetLiveInterval();
903   for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
904     HInstruction* safepoint = safepoints_[safepoint_index - 1u];
905     size_t safepoint_position = safepoint->GetLifetimePosition();
906 
907     // Test that safepoints_ are ordered in the optimal way.
908     DCHECK(safepoint_index == safepoints_.size() ||
909            safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
910 
911     if (safepoint_position == interval->GetStart()) {
912       // The safepoint is for this instruction, so the location of the instruction
913       // does not need to be saved.
914       DCHECK_EQ(safepoint_index, safepoints_.size());
915       DCHECK_EQ(safepoint, instruction);
916       continue;
917     } else if (interval->IsDeadAt(safepoint_position)) {
918       break;
919     } else if (!interval->Covers(safepoint_position)) {
920       // Hole in the interval.
921       continue;
922     }
923     interval->AddSafepoint(safepoint);
924   }
925 }
926 
CheckForTempLiveIntervals(HInstruction * instruction)927 void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
928   LocationSummary* locations = instruction->GetLocations();
929   size_t position = instruction->GetLifetimePosition();
930   for (size_t i = 0; i < locations->GetTempCount(); ++i) {
931     Location temp = locations->GetTemp(i);
932     if (temp.IsRegister() || temp.IsFpuRegister()) {
933       BlockRegister(temp, position, position + 1);
934       codegen_->AddAllocatedRegister(temp);
935     } else {
936       DCHECK(temp.IsUnallocated());
937       switch (temp.GetPolicy()) {
938         case Location::kRequiresRegister: {
939           LiveInterval* interval =
940               LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
941           interval->AddTempUse(instruction, i);
942           core_intervals_.push_back(interval);
943           temp_intervals_.push_back(interval);
944           break;
945         }
946 
947         case Location::kRequiresFpuRegister: {
948           LiveInterval* interval =
949               LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
950           interval->AddTempUse(instruction, i);
951           fp_intervals_.push_back(interval);
952           temp_intervals_.push_back(interval);
953           if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
954             interval->AddHighInterval(/*is_temp*/ true);
955             temp_intervals_.push_back(interval->GetHighInterval());
956           }
957           break;
958         }
959 
960         default:
961           LOG(FATAL) << "Unexpected policy for temporary location "
962                      << temp.GetPolicy();
963       }
964     }
965   }
966 }
967 
CheckForSafepoint(HInstruction * instruction)968 void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
969   LocationSummary* locations = instruction->GetLocations();
970 
971   if (locations->NeedsSafepoint()) {
972     safepoints_.push_back(instruction);
973   }
974 }
975 
TrySplit(LiveInterval * interval,size_t position)976 LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
977   if (interval->GetStart() < position && position < interval->GetEnd()) {
978     return Split(interval, position);
979   } else {
980     return interval;
981   }
982 }
983 
SplitAtRegisterUses(LiveInterval * interval)984 void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
985   DCHECK(!interval->IsHighInterval());
986 
987   // Split just after a register definition.
988   if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
989     interval = TrySplit(interval, interval->GetStart() + 1);
990   }
991 
992   UsePosition* use = interval->GetFirstUse();
993   while (use != nullptr && use->GetPosition() < interval->GetStart()) {
994     use = use->GetNext();
995   }
996 
997   // Split around register uses.
998   size_t end = interval->GetEnd();
999   while (use != nullptr && use->GetPosition() <= end) {
1000     if (use->RequiresRegister()) {
1001       size_t position = use->GetPosition();
1002       interval = TrySplit(interval, position - 1);
1003       if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
1004         // If we are at the very end of a basic block, we cannot split right
1005         // at the use. Split just after instead.
1006         interval = TrySplit(interval, position + 1);
1007       } else {
1008         interval = TrySplit(interval, position);
1009       }
1010     }
1011     use = use->GetNext();
1012   }
1013 }
1014 
AllocateSpillSlotForCatchPhi(HInstruction * instruction)1015 void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
1016   if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
1017     HPhi* phi = instruction->AsPhi();
1018     LiveInterval* interval = phi->GetLiveInterval();
1019 
1020     HInstruction* previous_phi = phi->GetPrevious();
1021     DCHECK(previous_phi == nullptr ||
1022            previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
1023         << "Phis expected to be sorted by vreg number, "
1024         << "so that equivalent phis are adjacent.";
1025 
1026     if (phi->IsVRegEquivalentOf(previous_phi)) {
1027       // Assign the same spill slot.
1028       DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
1029       interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
1030     } else {
1031       interval->SetSpillSlot(catch_phi_spill_slot_counter_);
1032       catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
1033     }
1034   }
1035 }
1036 
BlockRegister(Location location,size_t start,size_t end)1037 void RegisterAllocatorGraphColor::BlockRegister(Location location,
1038                                                 size_t start,
1039                                                 size_t end) {
1040   DCHECK(location.IsRegister() || location.IsFpuRegister());
1041   int reg = location.reg();
1042   LiveInterval* interval = location.IsRegister()
1043       ? physical_core_nodes_[reg]->GetInterval()
1044       : physical_fp_nodes_[reg]->GetInterval();
1045   DCHECK(interval->GetRegister() == reg);
1046   bool blocked_by_codegen = location.IsRegister()
1047       ? codegen_->IsBlockedCoreRegister(reg)
1048       : codegen_->IsBlockedFloatingPointRegister(reg);
1049   if (blocked_by_codegen) {
1050     // We've already blocked this register for the entire method. (And adding a
1051     // range inside another range violates the preconditions of AddRange).
1052   } else {
1053     interval->AddRange(start, end);
1054   }
1055 }
1056 
BlockRegisters(size_t start,size_t end,bool caller_save_only)1057 void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
1058   for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
1059     if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
1060       BlockRegister(Location::RegisterLocation(i), start, end);
1061     }
1062   }
1063   for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
1064     if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
1065       BlockRegister(Location::FpuRegisterLocation(i), start, end);
1066     }
1067   }
1068 }
1069 
AddPotentialInterference(InterferenceNode * from,InterferenceNode * to,bool guaranteed_not_interfering_yet,bool both_directions)1070 void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
1071                                                  InterferenceNode* to,
1072                                                  bool guaranteed_not_interfering_yet,
1073                                                  bool both_directions) {
1074   if (from->IsPrecolored()) {
1075     // We save space by ignoring outgoing edges from fixed nodes.
1076   } else if (to->IsPrecolored()) {
1077     // It is important that only a single node represents a given fixed register in the
1078     // interference graph. We retrieve that node here.
1079     const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
1080         ? register_allocator_->physical_fp_nodes_
1081         : register_allocator_->physical_core_nodes_;
1082     InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
1083     from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
1084     DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
1085     DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
1086 
1087     // If a node interferes with a fixed pair node, the weight of the edge may
1088     // be inaccurate after using the alias of the pair node, because the alias of the pair node
1089     // is a singular node.
1090     // We could make special pair fixed nodes, but that ends up being too conservative because
1091     // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
1092     // three rather than two.
1093     // Instead, we explicitly add an interference with the high node of the fixed pair node.
1094     // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
1095     //       can be unaligned on x86 complicates things.
1096     if (to->IsPair()) {
1097       InterferenceNode* high_node =
1098           physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
1099       DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
1100                 high_node->GetInterval()->GetRegister());
1101       from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
1102     }
1103   } else {
1104     // Standard interference between two uncolored nodes.
1105     from->AddInterference(to, guaranteed_not_interfering_yet);
1106   }
1107 
1108   if (both_directions) {
1109     AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
1110   }
1111 }
1112 
1113 // Returns true if `in_node` represents an input interval of `out_node`, and the output interval
1114 // is allowed to have the same register as the input interval.
1115 // TODO: Ideally we should just produce correct intervals in liveness analysis.
1116 //       We would need to refactor the current live interval layout to do so, which is
1117 //       no small task.
CheckInputOutputCanOverlap(InterferenceNode * in_node,InterferenceNode * out_node)1118 static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
1119   LiveInterval* output_interval = out_node->GetInterval();
1120   HInstruction* defined_by = output_interval->GetDefinedBy();
1121   if (defined_by == nullptr) {
1122     // This must not be a definition point.
1123     return false;
1124   }
1125 
1126   LocationSummary* locations = defined_by->GetLocations();
1127   if (locations->OutputCanOverlapWithInputs()) {
1128     // This instruction does not allow the output to reuse a register from an input.
1129     return false;
1130   }
1131 
1132   LiveInterval* input_interval = in_node->GetInterval();
1133   LiveInterval* next_sibling = input_interval->GetNextSibling();
1134   size_t def_position = defined_by->GetLifetimePosition();
1135   size_t use_position = def_position + 1;
1136   if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
1137     // The next sibling starts at the use position, so reusing the input register in the output
1138     // would clobber the input before it's moved into the sibling interval location.
1139     return false;
1140   }
1141 
1142   if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
1143     // The input interval is live after the use position.
1144     return false;
1145   }
1146 
1147   HInputsRef inputs = defined_by->GetInputs();
1148   for (size_t i = 0; i < inputs.size(); ++i) {
1149     if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
1150       DCHECK(input_interval->SameRegisterKind(*output_interval));
1151       return true;
1152     }
1153   }
1154 
1155   // The input interval was not an input for this instruction.
1156   return false;
1157 }
1158 
BuildInterferenceGraph(const ArenaVector<LiveInterval * > & intervals,const ArenaVector<InterferenceNode * > & physical_nodes)1159 void ColoringIteration::BuildInterferenceGraph(
1160     const ArenaVector<LiveInterval*>& intervals,
1161     const ArenaVector<InterferenceNode*>& physical_nodes) {
1162   DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
1163   // Build the interference graph efficiently by ordering range endpoints
1164   // by position and doing a linear sweep to find interferences. (That is, we
1165   // jump from endpoint to endpoint, maintaining a set of intervals live at each
1166   // point. If two nodes are ever in the live set at the same time, then they
1167   // interfere with each other.)
1168   //
1169   // We order by both position and (secondarily) by whether the endpoint
1170   // begins or ends a range; we want to process range endings before range
1171   // beginnings at the same position because they should not conflict.
1172   //
1173   // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
1174   // Tuple contents: (position, is_range_beginning, node).
1175   ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
1176       allocator_->Adapter(kArenaAllocRegisterAllocator));
1177 
1178   // We reserve plenty of space to avoid excessive copying.
1179   range_endpoints.reserve(4 * prunable_nodes_.size());
1180 
1181   for (LiveInterval* parent : intervals) {
1182     for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
1183       LiveRange* range = sibling->GetFirstRange();
1184       if (range != nullptr) {
1185         InterferenceNode* node = new (allocator_) InterferenceNode(
1186             allocator_, sibling, register_allocator_->liveness_);
1187         interval_node_map_.Insert(std::make_pair(sibling, node));
1188 
1189         if (sibling->HasRegister()) {
1190           // Fixed nodes should alias the canonical node for the corresponding register.
1191           node->stage = NodeStage::kPrecolored;
1192           InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
1193           node->SetAlias(physical_node);
1194           DCHECK_EQ(node->GetInterval()->GetRegister(),
1195                     physical_node->GetInterval()->GetRegister());
1196         } else {
1197           node->stage = NodeStage::kPrunable;
1198           prunable_nodes_.push_back(node);
1199         }
1200 
1201         while (range != nullptr) {
1202           range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
1203           range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
1204           range = range->GetNext();
1205         }
1206       }
1207     }
1208   }
1209 
1210   // Sort the endpoints.
1211   // We explicitly ignore the third entry of each tuple (the node pointer) in order
1212   // to maintain determinism.
1213   std::sort(range_endpoints.begin(), range_endpoints.end(),
1214             [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
1215                 const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
1216     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1217          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1218   });
1219 
1220   // Nodes live at the current position in the linear sweep.
1221   ArenaVector<InterferenceNode*> live(
1222       allocator_->Adapter(kArenaAllocRegisterAllocator));
1223 
1224   // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
1225   // live set. When we encounter the end of a range, we remove the corresponding node
1226   // from the live set. Nodes interfere if they are in the live set at the same time.
1227   for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
1228     bool is_range_beginning;
1229     InterferenceNode* node;
1230     size_t position;
1231     // Extract information from the tuple, including the node this tuple represents.
1232     std::tie(position, is_range_beginning, node) = *it;
1233 
1234     if (is_range_beginning) {
1235       bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
1236       for (InterferenceNode* conflicting : live) {
1237         DCHECK_NE(node, conflicting);
1238         if (CheckInputOutputCanOverlap(conflicting, node)) {
1239           // We do not add an interference, because the instruction represented by `node` allows
1240           // its output to share a register with an input, represented here by `conflicting`.
1241         } else {
1242           AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
1243         }
1244       }
1245       DCHECK(std::find(live.begin(), live.end(), node) == live.end());
1246       live.push_back(node);
1247     } else {
1248       // End of range.
1249       auto live_it = std::find(live.begin(), live.end(), node);
1250       DCHECK(live_it != live.end());
1251       live.erase(live_it);
1252     }
1253   }
1254   DCHECK(live.empty());
1255 }
1256 
CreateCoalesceOpportunity(InterferenceNode * a,InterferenceNode * b,CoalesceKind kind,size_t position)1257 void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
1258                                                   InterferenceNode* b,
1259                                                   CoalesceKind kind,
1260                                                   size_t position) {
1261   DCHECK_EQ(a->IsPair(), b->IsPair())
1262       << "Nodes of different memory widths should never be coalesced";
1263   CoalesceOpportunity* opportunity =
1264       new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
1265   a->AddCoalesceOpportunity(opportunity);
1266   b->AddCoalesceOpportunity(opportunity);
1267   coalesce_worklist_.push(opportunity);
1268 }
1269 
1270 // When looking for coalesce opportunities, we use the interval_node_map_ to find the node
1271 // corresponding to an interval. Note that not all intervals are in this map, notably the parents
1272 // of constants and stack arguments. (However, these interval should not be involved in coalesce
1273 // opportunities anyway, because they're not going to be in registers.)
FindCoalesceOpportunities()1274 void ColoringIteration::FindCoalesceOpportunities() {
1275   DCHECK(coalesce_worklist_.empty());
1276 
1277   for (InterferenceNode* node : prunable_nodes_) {
1278     LiveInterval* interval = node->GetInterval();
1279 
1280     // Coalesce siblings.
1281     LiveInterval* next_sibling = interval->GetNextSibling();
1282     if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
1283       auto it = interval_node_map_.Find(next_sibling);
1284       if (it != interval_node_map_.end()) {
1285         InterferenceNode* sibling_node = it->second;
1286         CreateCoalesceOpportunity(node,
1287                                   sibling_node,
1288                                   CoalesceKind::kAdjacentSibling,
1289                                   interval->GetEnd());
1290       }
1291     }
1292 
1293     // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
1294     LiveInterval* parent = interval->GetParent();
1295     if (parent->HasRegister()
1296         && parent->GetNextSibling() == interval
1297         && parent->GetEnd() == interval->GetStart()) {
1298       auto it = interval_node_map_.Find(parent);
1299       if (it != interval_node_map_.end()) {
1300         InterferenceNode* parent_node = it->second;
1301         CreateCoalesceOpportunity(node,
1302                                   parent_node,
1303                                   CoalesceKind::kFixedOutputSibling,
1304                                   parent->GetEnd());
1305       }
1306     }
1307 
1308     // Try to prevent moves across blocks.
1309     // Note that this does not lead to many succeeding coalesce attempts, so could be removed
1310     // if found to add to compile time.
1311     const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
1312     if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
1313       // If the start of this interval is at a block boundary, we look at the
1314       // location of the interval in blocks preceding the block this interval
1315       // starts at. This can avoid a move between the two blocks.
1316       HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
1317       for (HBasicBlock* predecessor : block->GetPredecessors()) {
1318         size_t position = predecessor->GetLifetimeEnd() - 1;
1319         LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
1320         if (existing != nullptr) {
1321           auto it = interval_node_map_.Find(existing);
1322           if (it != interval_node_map_.end()) {
1323             InterferenceNode* existing_node = it->second;
1324             CreateCoalesceOpportunity(node,
1325                                       existing_node,
1326                                       CoalesceKind::kNonlinearControlFlow,
1327                                       position);
1328           }
1329         }
1330       }
1331     }
1332 
1333     // Coalesce phi inputs with the corresponding output.
1334     HInstruction* defined_by = interval->GetDefinedBy();
1335     if (defined_by != nullptr && defined_by->IsPhi()) {
1336       const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
1337       HInputsRef inputs = defined_by->GetInputs();
1338 
1339       for (size_t i = 0, e = inputs.size(); i < e; ++i) {
1340         // We want the sibling at the end of the appropriate predecessor block.
1341         size_t position = predecessors[i]->GetLifetimeEnd() - 1;
1342         LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
1343 
1344         auto it = interval_node_map_.Find(input_interval);
1345         if (it != interval_node_map_.end()) {
1346           InterferenceNode* input_node = it->second;
1347           CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
1348         }
1349       }
1350     }
1351 
1352     // Coalesce output with first input when policy is kSameAsFirstInput.
1353     if (defined_by != nullptr) {
1354       Location out = defined_by->GetLocations()->Out();
1355       if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
1356         LiveInterval* input_interval
1357             = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
1358         // TODO: Could we consider lifetime holes here?
1359         if (input_interval->GetEnd() == interval->GetStart()) {
1360           auto it = interval_node_map_.Find(input_interval);
1361           if (it != interval_node_map_.end()) {
1362             InterferenceNode* input_node = it->second;
1363             CreateCoalesceOpportunity(node,
1364                                       input_node,
1365                                       CoalesceKind::kFirstInput,
1366                                       interval->GetStart());
1367           }
1368         }
1369       }
1370     }
1371 
1372     // An interval that starts an instruction (that is, it is not split), may
1373     // re-use the registers used by the inputs of that instruction, based on the
1374     // location summary.
1375     if (defined_by != nullptr) {
1376       DCHECK(!interval->IsSplit());
1377       LocationSummary* locations = defined_by->GetLocations();
1378       if (!locations->OutputCanOverlapWithInputs()) {
1379         HInputsRef inputs = defined_by->GetInputs();
1380         for (size_t i = 0; i < inputs.size(); ++i) {
1381           size_t def_point = defined_by->GetLifetimePosition();
1382           // TODO: Getting the sibling at the def_point might not be quite what we want
1383           //       for fixed inputs, since the use will be *at* the def_point rather than after.
1384           LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
1385           if (input_interval != nullptr &&
1386               input_interval->HasHighInterval() == interval->HasHighInterval()) {
1387             auto it = interval_node_map_.Find(input_interval);
1388             if (it != interval_node_map_.end()) {
1389               InterferenceNode* input_node = it->second;
1390               CreateCoalesceOpportunity(node,
1391                                         input_node,
1392                                         CoalesceKind::kAnyInput,
1393                                         interval->GetStart());
1394             }
1395           }
1396         }
1397       }
1398     }
1399 
1400     // Try to prevent moves into fixed input locations.
1401     UsePosition* use = interval->GetFirstUse();
1402     for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
1403       // Skip past uses before the start of this interval.
1404     }
1405     for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
1406       HInstruction* user = use->GetUser();
1407       if (user == nullptr) {
1408         // User may be null for certain intervals, such as temp intervals.
1409         continue;
1410       }
1411       LocationSummary* locations = user->GetLocations();
1412       Location input = locations->InAt(use->GetInputIndex());
1413       if (input.IsRegister() || input.IsFpuRegister()) {
1414         // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
1415         //       is currently not supported.
1416         InterferenceNode* fixed_node = input.IsRegister()
1417             ? register_allocator_->physical_core_nodes_[input.reg()]
1418             : register_allocator_->physical_fp_nodes_[input.reg()];
1419         CreateCoalesceOpportunity(node,
1420                                   fixed_node,
1421                                   CoalesceKind::kFixedInput,
1422                                   user->GetLifetimePosition());
1423       }
1424     }
1425   }  // for node in prunable_nodes
1426 }
1427 
IsLowDegreeNode(InterferenceNode * node,size_t num_regs)1428 static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
1429   return node->GetOutDegree() < num_regs;
1430 }
1431 
IsHighDegreeNode(InterferenceNode * node,size_t num_regs)1432 static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
1433   return !IsLowDegreeNode(node, num_regs);
1434 }
1435 
PruneInterferenceGraph()1436 void ColoringIteration::PruneInterferenceGraph() {
1437   DCHECK(pruned_nodes_.empty()
1438       && simplify_worklist_.empty()
1439       && freeze_worklist_.empty()
1440       && spill_worklist_.empty());
1441   // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
1442   // and all others as high degree nodes. The distinction is important: low degree nodes are
1443   // guaranteed a color, while high degree nodes are not.
1444 
1445   // Build worklists. Note that the coalesce worklist has already been
1446   // filled by FindCoalesceOpportunities().
1447   for (InterferenceNode* node : prunable_nodes_) {
1448     DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
1449     if (IsLowDegreeNode(node, num_regs_)) {
1450       if (node->GetCoalesceOpportunities().empty()) {
1451         // Simplify Worklist.
1452         node->stage = NodeStage::kSimplifyWorklist;
1453         simplify_worklist_.push_back(node);
1454       } else {
1455         // Freeze Worklist.
1456         node->stage = NodeStage::kFreezeWorklist;
1457         freeze_worklist_.push_back(node);
1458       }
1459     } else {
1460       // Spill worklist.
1461       node->stage = NodeStage::kSpillWorklist;
1462       spill_worklist_.push(node);
1463     }
1464   }
1465 
1466   // Prune graph.
1467   // Note that we do not remove a node from its current worklist if it moves to another, so it may
1468   // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
1469   while (true) {
1470     if (!simplify_worklist_.empty()) {
1471       // Prune low-degree nodes.
1472       // TODO: pop_back() should work as well, but it didn't; we get a
1473       //       failed check while pruning. We should look into this.
1474       InterferenceNode* node = simplify_worklist_.front();
1475       simplify_worklist_.pop_front();
1476       DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
1477       DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
1478       DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
1479       PruneNode(node);
1480     } else if (!coalesce_worklist_.empty()) {
1481       // Coalesce.
1482       CoalesceOpportunity* opportunity = coalesce_worklist_.top();
1483       coalesce_worklist_.pop();
1484       if (opportunity->stage == CoalesceStage::kWorklist) {
1485         Coalesce(opportunity);
1486       }
1487     } else if (!freeze_worklist_.empty()) {
1488       // Freeze moves and prune a low-degree move-related node.
1489       InterferenceNode* node = freeze_worklist_.front();
1490       freeze_worklist_.pop_front();
1491       if (node->stage == NodeStage::kFreezeWorklist) {
1492         DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
1493         DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
1494         FreezeMoves(node);
1495         PruneNode(node);
1496       }
1497     } else if (!spill_worklist_.empty()) {
1498       // We spill the lowest-priority node, because pruning a node earlier
1499       // gives it a higher chance of being spilled.
1500       InterferenceNode* node = spill_worklist_.top();
1501       spill_worklist_.pop();
1502       if (node->stage == NodeStage::kSpillWorklist) {
1503         DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
1504         FreezeMoves(node);
1505         PruneNode(node);
1506       }
1507     } else {
1508       // Pruning complete.
1509       break;
1510     }
1511   }
1512   DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
1513 }
1514 
EnableCoalesceOpportunities(InterferenceNode * node)1515 void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
1516   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1517     if (opportunity->stage == CoalesceStage::kActive) {
1518       opportunity->stage = CoalesceStage::kWorklist;
1519       coalesce_worklist_.push(opportunity);
1520     }
1521   }
1522 }
1523 
PruneNode(InterferenceNode * node)1524 void ColoringIteration::PruneNode(InterferenceNode* node) {
1525   DCHECK_NE(node->stage, NodeStage::kPruned);
1526   DCHECK(!node->IsPrecolored());
1527   node->stage = NodeStage::kPruned;
1528   pruned_nodes_.push(node);
1529 
1530   for (InterferenceNode* adj : node->GetAdjacentNodes()) {
1531     DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
1532 
1533     if (adj->IsPrecolored()) {
1534       // No effect on pre-colored nodes; they're never pruned.
1535     } else {
1536       // Remove the interference.
1537       bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
1538       DCHECK(adj->ContainsInterference(node))
1539           << "Missing reflexive interference from non-fixed node";
1540       adj->RemoveInterference(node);
1541 
1542       // Handle transitions from high degree to low degree.
1543       if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
1544         EnableCoalesceOpportunities(adj);
1545         for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
1546           EnableCoalesceOpportunities(adj_adj);
1547         }
1548 
1549         DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
1550         if (adj->IsMoveRelated()) {
1551           adj->stage = NodeStage::kFreezeWorklist;
1552           freeze_worklist_.push_back(adj);
1553         } else {
1554           adj->stage = NodeStage::kSimplifyWorklist;
1555           simplify_worklist_.push_back(adj);
1556         }
1557       }
1558     }
1559   }
1560 }
1561 
CheckTransitionFromFreezeWorklist(InterferenceNode * node)1562 void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
1563   if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
1564     DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
1565     node->stage = NodeStage::kSimplifyWorklist;
1566     simplify_worklist_.push_back(node);
1567   }
1568 }
1569 
FreezeMoves(InterferenceNode * node)1570 void ColoringIteration::FreezeMoves(InterferenceNode* node) {
1571   for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1572     if (opportunity->stage == CoalesceStage::kDefunct) {
1573       // Constrained moves should remain constrained, since they will not be considered
1574       // during last-chance coalescing.
1575     } else {
1576       opportunity->stage = CoalesceStage::kInactive;
1577     }
1578     InterferenceNode* other = opportunity->node_a->GetAlias() == node
1579         ? opportunity->node_b->GetAlias()
1580         : opportunity->node_a->GetAlias();
1581     if (other != node && other->stage == NodeStage::kFreezeWorklist) {
1582       DCHECK(IsLowDegreeNode(node, num_regs_));
1583       CheckTransitionFromFreezeWorklist(other);
1584     }
1585   }
1586 }
1587 
PrecoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1588 bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
1589                                             InterferenceNode* into) {
1590   if (!into->IsPrecolored()) {
1591     // The uncolored heuristic will cover this case.
1592     return false;
1593   }
1594   if (from->IsPair() || into->IsPair()) {
1595     // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
1596     //       are currently represented as two single fixed nodes in the graph, and `into` is
1597     //       only one of them. (We may lose the implicit connections to the second one in a merge.)
1598     return false;
1599   }
1600 
1601   // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
1602   // Reasons an adjacent node `adj` can be "ok":
1603   // (1) If `adj` is low degree, interference with `into` will not affect its existing
1604   //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
1605   // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
1606   // (3) If there's already an interference with `into`, coalescing will not add interferences.
1607   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1608     if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
1609       // Ok.
1610     } else {
1611       return false;
1612     }
1613   }
1614   return true;
1615 }
1616 
UncoloredHeuristic(InterferenceNode * from,InterferenceNode * into)1617 bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
1618                                            InterferenceNode* into) {
1619   if (into->IsPrecolored()) {
1620     // The pre-colored heuristic will handle this case.
1621     return false;
1622   }
1623 
1624   // Arbitrary cap to improve compile time. Tests show that this has negligible affect
1625   // on generated code.
1626   if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
1627     return false;
1628   }
1629 
1630   // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
1631   // of high degree. (Low degree neighbors can be ignored, because they will eventually be
1632   // pruned from the interference graph in the simplify stage.)
1633   size_t high_degree_interferences = 0;
1634   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1635     if (IsHighDegreeNode(adj, num_regs_)) {
1636       high_degree_interferences += from->EdgeWeightWith(adj);
1637     }
1638   }
1639   for (InterferenceNode* adj : into->GetAdjacentNodes()) {
1640     if (IsHighDegreeNode(adj, num_regs_)) {
1641       if (from->ContainsInterference(adj)) {
1642         // We've already counted this adjacent node.
1643         // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
1644         // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
1645         // but remains conservative.)
1646         if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
1647           high_degree_interferences -= from->EdgeWeightWith(adj);
1648         }
1649       } else {
1650         high_degree_interferences += into->EdgeWeightWith(adj);
1651       }
1652     }
1653   }
1654 
1655   return high_degree_interferences < num_regs_;
1656 }
1657 
Combine(InterferenceNode * from,InterferenceNode * into)1658 void ColoringIteration::Combine(InterferenceNode* from,
1659                                 InterferenceNode* into) {
1660   from->SetAlias(into);
1661 
1662   // Add interferences.
1663   for (InterferenceNode* adj : from->GetAdjacentNodes()) {
1664     bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
1665     AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
1666     if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
1667       // This is a (temporary) transition to a high degree node. Its degree will decrease again
1668       // when we prune `from`, but it's best to be consistent about the current worklist.
1669       adj->stage = NodeStage::kSpillWorklist;
1670       spill_worklist_.push(adj);
1671     }
1672   }
1673 
1674   // Add coalesce opportunities.
1675   for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
1676     if (opportunity->stage != CoalesceStage::kDefunct) {
1677       into->AddCoalesceOpportunity(opportunity);
1678     }
1679   }
1680   EnableCoalesceOpportunities(from);
1681 
1682   // Prune and update worklists.
1683   PruneNode(from);
1684   if (IsLowDegreeNode(into, num_regs_)) {
1685     // Coalesce(...) takes care of checking for a transition to the simplify worklist.
1686     DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
1687   } else if (into->stage == NodeStage::kFreezeWorklist) {
1688     // This is a transition to a high degree node.
1689     into->stage = NodeStage::kSpillWorklist;
1690     spill_worklist_.push(into);
1691   } else {
1692     DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
1693   }
1694 }
1695 
Coalesce(CoalesceOpportunity * opportunity)1696 void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
1697   InterferenceNode* from = opportunity->node_a->GetAlias();
1698   InterferenceNode* into = opportunity->node_b->GetAlias();
1699   DCHECK_NE(from->stage, NodeStage::kPruned);
1700   DCHECK_NE(into->stage, NodeStage::kPruned);
1701 
1702   if (from->IsPrecolored()) {
1703     // If we have one pre-colored node, make sure it's the `into` node.
1704     std::swap(from, into);
1705   }
1706 
1707   if (from == into) {
1708     // These nodes have already been coalesced.
1709     opportunity->stage = CoalesceStage::kDefunct;
1710     CheckTransitionFromFreezeWorklist(from);
1711   } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
1712     // These nodes interfere.
1713     opportunity->stage = CoalesceStage::kDefunct;
1714     CheckTransitionFromFreezeWorklist(from);
1715     CheckTransitionFromFreezeWorklist(into);
1716   } else if (PrecoloredHeuristic(from, into)
1717           || UncoloredHeuristic(from, into)) {
1718     // We can coalesce these nodes.
1719     opportunity->stage = CoalesceStage::kDefunct;
1720     Combine(from, into);
1721     CheckTransitionFromFreezeWorklist(into);
1722   } else {
1723     // We cannot coalesce, but we may be able to later.
1724     opportunity->stage = CoalesceStage::kActive;
1725   }
1726 }
1727 
1728 // Build a mask with a bit set for each register assigned to some
1729 // interval in `intervals`.
1730 template <typename Container>
BuildConflictMask(Container & intervals)1731 static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
1732   std::bitset<kMaxNumRegs> conflict_mask;
1733   for (InterferenceNode* adjacent : intervals) {
1734     LiveInterval* conflicting = adjacent->GetInterval();
1735     if (conflicting->HasRegister()) {
1736       conflict_mask.set(conflicting->GetRegister());
1737       if (conflicting->HasHighInterval()) {
1738         DCHECK(conflicting->GetHighInterval()->HasRegister());
1739         conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
1740       }
1741     } else {
1742       DCHECK(!conflicting->HasHighInterval()
1743           || !conflicting->GetHighInterval()->HasRegister());
1744     }
1745   }
1746   return conflict_mask;
1747 }
1748 
IsCallerSave(size_t reg,bool processing_core_regs)1749 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
1750   return processing_core_regs
1751       ? !codegen_->IsCoreCalleeSaveRegister(reg)
1752       : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
1753 }
1754 
RegisterIsAligned(size_t reg)1755 static bool RegisterIsAligned(size_t reg) {
1756   return reg % 2 == 0;
1757 }
1758 
FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask)1759 static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
1760   // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
1761   // Note that CTZ is undefined if all bits are 0, so we special-case it.
1762   return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
1763 }
1764 
ColorInterferenceGraph()1765 bool ColoringIteration::ColorInterferenceGraph() {
1766   DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
1767   ArenaVector<LiveInterval*> colored_intervals(
1768       allocator_->Adapter(kArenaAllocRegisterAllocator));
1769   bool successful = true;
1770 
1771   while (!pruned_nodes_.empty()) {
1772     InterferenceNode* node = pruned_nodes_.top();
1773     pruned_nodes_.pop();
1774     LiveInterval* interval = node->GetInterval();
1775     size_t reg = 0;
1776 
1777     InterferenceNode* alias = node->GetAlias();
1778     if (alias != node) {
1779       // This node was coalesced with another.
1780       LiveInterval* alias_interval = alias->GetInterval();
1781       if (alias_interval->HasRegister()) {
1782         reg = alias_interval->GetRegister();
1783         DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
1784             << "This node conflicts with the register it was coalesced with";
1785       } else {
1786         DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
1787             << "Move coalescing was not conservative, causing a node to be coalesced "
1788             << "with another node that could not be colored";
1789         if (interval->RequiresRegister()) {
1790           successful = false;
1791         }
1792       }
1793     } else {
1794       // Search for free register(s).
1795       std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
1796       if (interval->HasHighInterval()) {
1797         // Note that the graph coloring allocator assumes that pair intervals are aligned here,
1798         // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
1799         // change the alignment requirements here, we will have to update the algorithm (e.g.,
1800         // be more conservative about the weight of edges adjacent to pair nodes.)
1801         while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
1802           reg += 2;
1803         }
1804 
1805         // Try to use a caller-save register first.
1806         for (size_t i = 0; i < num_regs_ - 1; i += 2) {
1807           bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
1808           bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
1809           if (!conflict_mask[i] && !conflict_mask[i + 1]) {
1810             if (low_caller_save && high_caller_save) {
1811               reg = i;
1812               break;
1813             } else if (low_caller_save || high_caller_save) {
1814               reg = i;
1815               // Keep looking to try to get both parts in caller-save registers.
1816             }
1817           }
1818         }
1819       } else {
1820         // Not a pair interval.
1821         reg = FindFirstZeroInConflictMask(conflict_mask);
1822 
1823         // Try to use caller-save registers first.
1824         for (size_t i = 0; i < num_regs_; ++i) {
1825           if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
1826             reg = i;
1827             break;
1828           }
1829         }
1830       }
1831 
1832       // Last-chance coalescing.
1833       for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
1834         if (opportunity->stage == CoalesceStage::kDefunct) {
1835           continue;
1836         }
1837         LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
1838             ? opportunity->node_b->GetAlias()->GetInterval()
1839             : opportunity->node_a->GetAlias()->GetInterval();
1840         if (other_interval->HasRegister()) {
1841           size_t coalesce_register = other_interval->GetRegister();
1842           if (interval->HasHighInterval()) {
1843             if (!conflict_mask[coalesce_register] &&
1844                 !conflict_mask[coalesce_register + 1] &&
1845                 RegisterIsAligned(coalesce_register)) {
1846               reg = coalesce_register;
1847               break;
1848             }
1849           } else if (!conflict_mask[coalesce_register]) {
1850             reg = coalesce_register;
1851             break;
1852           }
1853         }
1854       }
1855     }
1856 
1857     if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
1858       // Assign register.
1859       DCHECK(!interval->HasRegister());
1860       interval->SetRegister(reg);
1861       colored_intervals.push_back(interval);
1862       if (interval->HasHighInterval()) {
1863         DCHECK(!interval->GetHighInterval()->HasRegister());
1864         interval->GetHighInterval()->SetRegister(reg + 1);
1865         colored_intervals.push_back(interval->GetHighInterval());
1866       }
1867     } else if (interval->RequiresRegister()) {
1868       // The interference graph is too dense to color. Make it sparser by
1869       // splitting this live interval.
1870       successful = false;
1871       register_allocator_->SplitAtRegisterUses(interval);
1872       // We continue coloring, because there may be additional intervals that cannot
1873       // be colored, and that we should split.
1874     } else {
1875       // Spill.
1876       node->SetNeedsSpillSlot();
1877     }
1878   }
1879 
1880   // If unsuccessful, reset all register assignments.
1881   if (!successful) {
1882     for (LiveInterval* interval : colored_intervals) {
1883       interval->ClearRegister();
1884     }
1885   }
1886 
1887   return successful;
1888 }
1889 
AllocateSpillSlots(const ArenaVector<InterferenceNode * > & nodes)1890 void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
1891   // The register allocation resolver will organize the stack based on value type,
1892   // so we assign stack slots for each value type separately.
1893   ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1894   ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1895   ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1896   ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
1897 
1898   // The set of parent intervals already handled.
1899   ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
1900 
1901   // Find nodes that need spill slots.
1902   for (InterferenceNode* node : nodes) {
1903     if (!node->NeedsSpillSlot()) {
1904       continue;
1905     }
1906 
1907     LiveInterval* parent = node->GetInterval()->GetParent();
1908     if (seen.find(parent) != seen.end()) {
1909       // We've already handled this interval.
1910       // This can happen if multiple siblings of the same interval request a stack slot.
1911       continue;
1912     }
1913     seen.insert(parent);
1914 
1915     HInstruction* defined_by = parent->GetDefinedBy();
1916     if (parent->HasSpillSlot()) {
1917       // We already have a spill slot for this value that we can reuse.
1918     } else if (defined_by->IsParameterValue()) {
1919       // Parameters already have a stack slot.
1920       parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
1921     } else if (defined_by->IsCurrentMethod()) {
1922       // The current method is always at stack slot 0.
1923       parent->SetSpillSlot(0);
1924     } else if (defined_by->IsConstant()) {
1925       // Constants don't need a spill slot.
1926     } else {
1927       // We need to find a spill slot for this interval. Place it in the correct
1928       // worklist to be processed later.
1929       switch (node->GetInterval()->GetType()) {
1930         case Primitive::kPrimDouble:
1931           double_intervals.push_back(parent);
1932           break;
1933         case Primitive::kPrimLong:
1934           long_intervals.push_back(parent);
1935           break;
1936         case Primitive::kPrimFloat:
1937           float_intervals.push_back(parent);
1938           break;
1939         case Primitive::kPrimNot:
1940         case Primitive::kPrimInt:
1941         case Primitive::kPrimChar:
1942         case Primitive::kPrimByte:
1943         case Primitive::kPrimBoolean:
1944         case Primitive::kPrimShort:
1945           int_intervals.push_back(parent);
1946           break;
1947         case Primitive::kPrimVoid:
1948           LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
1949           UNREACHABLE();
1950       }
1951     }
1952   }
1953 
1954   // Color spill slots for each value type.
1955   ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
1956   ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
1957   ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
1958   ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
1959 }
1960 
ColorSpillSlots(ArenaVector<LiveInterval * > * intervals,size_t * num_stack_slots_used)1961 void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
1962                                                   size_t* num_stack_slots_used) {
1963   // We cannot use the original interference graph here because spill slots are assigned to
1964   // all of the siblings of an interval, whereas an interference node represents only a single
1965   // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
1966   // by position, and assigning the lowest spill slot available when we encounter an interval
1967   // beginning. We ignore lifetime holes for simplicity.
1968   ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
1969       allocator_->Adapter(kArenaAllocRegisterAllocator));
1970 
1971   for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
1972     LiveInterval* parent_interval = *it;
1973     DCHECK(parent_interval->IsParent());
1974     DCHECK(!parent_interval->HasSpillSlot());
1975     size_t start = parent_interval->GetStart();
1976     size_t end = parent_interval->GetLastSibling()->GetEnd();
1977     DCHECK_LT(start, end);
1978     interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
1979     interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
1980   }
1981 
1982   // Sort by position.
1983   // We explicitly ignore the third entry of each tuple (the interval pointer) in order
1984   // to maintain determinism.
1985   std::sort(interval_endpoints.begin(), interval_endpoints.end(),
1986             [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
1987                 const std::tuple<size_t, bool, LiveInterval*>& rhs) {
1988     return std::tie(std::get<0>(lhs), std::get<1>(lhs))
1989          < std::tie(std::get<0>(rhs), std::get<1>(rhs));
1990   });
1991 
1992   ArenaBitVector taken(allocator_, 0, true);
1993   for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
1994     // Extract information from the current tuple.
1995     LiveInterval* parent_interval;
1996     bool is_interval_beginning;
1997     size_t position;
1998     std::tie(position, is_interval_beginning, parent_interval) = *it;
1999     size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
2000 
2001     if (is_interval_beginning) {
2002       DCHECK(!parent_interval->HasSpillSlot());
2003       DCHECK_EQ(position, parent_interval->GetStart());
2004 
2005       // Find first available free stack slot(s).
2006       size_t slot = 0;
2007       for (; ; ++slot) {
2008         bool found = true;
2009         for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2010           if (taken.IsBitSet(s)) {
2011             found = false;
2012             break;  // failure
2013           }
2014         }
2015         if (found) {
2016           break;  // success
2017         }
2018       }
2019 
2020       parent_interval->SetSpillSlot(slot);
2021 
2022       *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
2023       if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
2024         // The parallel move resolver requires that there be an even number of spill slots
2025         // allocated for pair value types.
2026         ++(*num_stack_slots_used);
2027       }
2028 
2029       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2030         taken.SetBit(s);
2031       }
2032     } else {
2033       DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
2034       DCHECK(parent_interval->HasSpillSlot());
2035 
2036       // Free up the stack slot(s) used by this interval.
2037       size_t slot = parent_interval->GetSpillSlot();
2038       for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
2039         DCHECK(taken.IsBitSet(s));
2040         taken.ClearBit(s);
2041       }
2042     }
2043   }
2044   DCHECK_EQ(taken.NumSetBits(), 0u);
2045 }
2046 
2047 }  // namespace art
2048