• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
19 
20 #include "base/arena_bit_vector.h"
21 #include "base/arena_containers.h"
22 #include "base/bit_vector-inl.h"
23 #include "base/macros.h"
24 #include "nodes.h"
25 
26 namespace art HIDDEN {
27 
28 class InductionVarRange;
29 
30 static const bool kSuperblockClonerLogging = false;
31 static const bool kSuperblockClonerVerify = false;
32 
33 // Represents an edge between two HBasicBlocks.
34 //
35 // Note: objects of this class are small - pass them by value.
36 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
37  public:
HEdge(HBasicBlock * from,HBasicBlock * to)38   HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
39     DCHECK_NE(to_, kInvalidBlockId);
40     DCHECK_NE(from_, kInvalidBlockId);
41   }
HEdge(uint32_t from,uint32_t to)42   HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
43     DCHECK_NE(to_, kInvalidBlockId);
44     DCHECK_NE(from_, kInvalidBlockId);
45   }
HEdge()46   HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
47 
GetFrom()48   uint32_t GetFrom() const { return from_; }
GetTo()49   uint32_t GetTo() const { return to_; }
50 
51   bool operator==(const HEdge& other) const {
52     return this->from_ == other.from_ && this->to_ == other.to_;
53   }
54 
55   bool operator!=(const HEdge& other) const { return !operator==(other); }
56   void Dump(std::ostream& stream) const;
57 
58   // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
59   // has to_ block as a successor.
IsValid()60   bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
61 
62  private:
63   // Predecessor block id.
64   uint32_t from_;
65   // Successor block id.
66   uint32_t to_;
67 };
68 
69 // Returns whether a HEdge edge corresponds to an existing edge in the graph.
IsEdgeValid(HEdge edge,HGraph * graph)70 inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
71   if (!edge.IsValid()) {
72     return false;
73   }
74   uint32_t from = edge.GetFrom();
75   uint32_t to = edge.GetTo();
76   if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
77     return false;
78   }
79 
80   HBasicBlock* block_from = graph->GetBlocks()[from];
81   HBasicBlock* block_to = graph->GetBlocks()[to];
82   if (block_from == nullptr || block_to == nullptr) {
83     return false;
84   }
85 
86   return block_from->HasSuccessor(block_to, 0);
87 }
88 
89 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
90 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
91 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy
92 // and a set of rules how to treat edges, remap their successors. By using this approach such
93 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
94 //
95 // The idea of the transformation is based on "Superblock cloning" technique described in the book
96 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
97 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
98 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
99 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
100 //
101 // There are two states of the IR graph: original graph (before the transformation) and
102 // copy graph (after).
103 //
104 // Before the transformation:
105 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
106 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
107 // where pred, succ - basic blocks):
108 //  - internal - pred, succ are members of ‘orig_bb_set’.
109 //  - outside  - pred, succ are not members of ‘orig_bb_set’.
110 //  - incoming - pred is not a member of ‘orig_bb_set’, succ is.
111 //  - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
112 //
113 // Transformation:
114 //
115 // 1. Initial cloning:
116 //   1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
117 //        form ‘copy_bb_set’.
118 //   1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
119 //        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
120 //        set.
121 //   1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
122 //        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
123 //        set.
124 // 2. Successors remapping.
125 //   2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
126 //        be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
127 //   2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
128 //        should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
129 //   2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
130 //        whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
131 //        (X, Y_1)).
132 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
133 // 4. Fix/resolve data flow.
134 // 5. Do cleanups e.g. critical edges splitting.
135 //
136 class SuperblockCloner : public ValueObject {
137  public:
138   // TODO: Investigate optimal types for the containers.
139   using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
140   using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
141   using HBasicBlockSet = ArenaBitVector;
142   using HEdgeSet = ArenaHashSet<HEdge>;
143 
144   SuperblockCloner(HGraph* graph,
145                    const HBasicBlockSet* orig_bb_set,
146                    HBasicBlockMap* bb_map,
147                    HInstructionMap* hir_map,
148                    InductionVarRange* induction_range);
149 
150   // Sets edge successor remapping info specified by corresponding edge sets.
151   void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
152                                  const HEdgeSet* remap_copy_internal,
153                                  const HEdgeSet* remap_incoming);
154 
155   // Returns whether the specified subgraph is copyable.
156   // TODO: Start from small range of graph patterns then extend it.
157   bool IsSubgraphClonable() const;
158 
159   // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
160   // when iterative DF algorithm is not required and dominators/instructions inputs can be
161   // trivially adjusted.
162   //
163   // TODO: formally describe the criteria.
164   //
165   // Loop peeling and unrolling satisfy the criteria.
166   bool IsFastCase() const;
167 
168   // Runs the copy algorithm according to the description.
169   void Run();
170 
171   // Cleans up the graph after transformation: splits critical edges, recalculates control flow
172   // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
173   void CleanUp();
174 
175   // Returns a clone of a basic block (orig_block).
176   //
177   //  - The copy block will have no successors/predecessors; they should be set up manually.
178   //  - For each instruction in the orig_block a copy is created and inserted into the copy block;
179   //    this correspondence is recorded in the map (old instruction, new instruction).
180   //  - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
181   //    same, as in the original block, PHIs do not reflect a correct correspondence between the
182   //    value and predecessors (as the copy block has no predecessors by now), etc.
183   HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
184 
185   // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
186   // and hir_map_.
187   void CloneBasicBlocks();
188 
GetInstrCopy(HInstruction * orig_instr)189   HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
190     auto copy_input_iter = hir_map_->find(orig_instr);
191     DCHECK(copy_input_iter != hir_map_->end());
192     return copy_input_iter->second;
193   }
194 
GetBlockCopy(HBasicBlock * orig_block)195   HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
196     HBasicBlock* block = bb_map_->Get(orig_block);
197     DCHECK(block != nullptr);
198     return block;
199   }
200 
GetInstrOrig(HInstruction * copy_instr)201   HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
202     for (auto it : *hir_map_) {
203       if (it.second == copy_instr) {
204         return it.first;
205       }
206     }
207     return nullptr;
208   }
209 
IsInOrigBBSet(uint32_t block_id)210   bool IsInOrigBBSet(uint32_t block_id) const {
211     return orig_bb_set_.IsBitSet(block_id);
212   }
213 
IsInOrigBBSet(const HBasicBlock * block)214   bool IsInOrigBBSet(const HBasicBlock* block) const {
215     return IsInOrigBBSet(block->GetBlockId());
216   }
217 
218   // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
219   // dominators) needs to be adjusted.
GetRegionToBeAdjusted()220   HLoopInformation* GetRegionToBeAdjusted() const {
221     return outer_loop_;
222   }
223 
224  private:
225   // Fills the 'exits' vector with the subgraph exits.
226   void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
227 
228   // Finds and records information about the area in the graph for which control flow (back edges,
229   // loops, dominators) needs to be adjusted.
230   void FindAndSetLocalAreaForAdjustments();
231 
232   // Remaps edges' successors according to the info specified in the edges sets.
233   //
234   // Only edge successors/predecessors and phis' input records (to have a correspondence between
235   // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
236   // phis' nor instructions' inputs values are resolved.
237   void RemapEdgesSuccessors();
238 
239   // Adjusts control flow (back edges, loops, dominators) for the local area defined by
240   // FindAndSetLocalAreaForAdjustments.
241   void AdjustControlFlowInfo();
242 
243   // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
244   // the SSA form.
245   void ResolveDataFlow();
246 
247   //
248   // Helpers for live-outs processing and Subgraph-closed SSA.
249   //
250   //  - live-outs - values which are defined inside the subgraph and have uses outside.
251   //  - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
252   //    have no outside uses except for the phi-nodes in the subgraph exits.
253   //
254   // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
255   // makes the subgraph-closed SSA form construction much easier.
256   //
257   // TODO: Support subgraphs with live-outs and multiple exits.
258   //
259 
260   // For each live-out value 'val' in the region puts a record <val, val> into the map.
261   // Returns whether all of the instructions in the subgraph are clonable.
262   bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
263 
264   // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
265   //
266   // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
267   // the record in the map to <val, phi> and replaces all outside uses with this phi.
268   void ConstructSubgraphClosedSSA();
269 
270   // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
271   // (<val, phi>) phi after the cloning is done.
272   void FixSubgraphClosedSSAAfterCloning();
273 
274   //
275   // Helpers for CloneBasicBlock.
276   //
277 
278   // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
279   // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
280   void ReplaceInputsWithCopies(HInstruction* copy_instr);
281 
282   // Recursively clones the environment for the copy instruction. If the input of the original
283   // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
284   // leaves it the same as original.
285   void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
286 
287   //
288   // Helpers for RemapEdgesSuccessors.
289   //
290 
291   // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
292   // copy_succ.
293   void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
294 
295   // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
296   void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
297 
298   // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
299   void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
300 
301   //
302   // Local versions of control flow calculation/adjustment routines.
303   //
304 
305   void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
306   void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
307   GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
308   void CleanUpControlFlow();
309 
310   //
311   // Helpers for ResolveDataFlow
312   //
313 
314   // Resolves the inputs of the phi.
315   void ResolvePhi(HPhi* phi);
316 
317   // Update induction range after when fixing SSA.
318   void UpdateInductionRangeInfoOf(
319       HInstruction* user, HInstruction* old_instruction, HInstruction* replacement);
320 
321   //
322   // Debug and logging methods.
323   //
324   void CheckInstructionInputsRemapping(HInstruction* orig_instr);
325   bool CheckRemappingInfoIsValid();
326   void VerifyGraph();
327   void DumpInputSets();
328 
GetBlockById(uint32_t block_id)329   HBasicBlock* GetBlockById(uint32_t block_id) const {
330     DCHECK(block_id < graph_->GetBlocks().size());
331     HBasicBlock* block = graph_->GetBlocks()[block_id];
332     DCHECK(block != nullptr);
333     return block;
334   }
335 
336   HGraph* const graph_;
337   ArenaAllocator* const arena_;
338 
339   // Set of basic block in the original graph to be copied.
340   HBasicBlockSet orig_bb_set_;
341 
342   // Sets of edges which require successors remapping.
343   const HEdgeSet* remap_orig_internal_;
344   const HEdgeSet* remap_copy_internal_;
345   const HEdgeSet* remap_incoming_;
346 
347   // Correspondence map for blocks: (original block, copy block).
348   HBasicBlockMap* bb_map_;
349   // Correspondence map for instructions: (original HInstruction, copy HInstruction).
350   HInstructionMap* hir_map_;
351   // As a result of cloning, the induction range analysis information can be invalidated
352   // and must be updated. If not null, the cloner updates it for changed instructions.
353   InductionVarRange* induction_range_;
354   // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
355   HLoopInformation* outer_loop_;
356   HBasicBlockSet outer_loop_bb_set_;
357 
358   HInstructionMap live_outs_;
359 
360   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
361   ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
362 
363   DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
364 };
365 
366 // Helper class to perform loop peeling/unrolling.
367 //
368 // This helper should be used when correspondence map between original and copied
369 // basic blocks/instructions are demanded.
370 class LoopClonerHelper : public ValueObject {
371  public:
LoopClonerHelper(HLoopInformation * info,SuperblockCloner::HBasicBlockMap * bb_map,SuperblockCloner::HInstructionMap * hir_map,InductionVarRange * induction_range)372   LoopClonerHelper(HLoopInformation* info,
373                    SuperblockCloner::HBasicBlockMap* bb_map,
374                    SuperblockCloner::HInstructionMap* hir_map,
375                    InductionVarRange* induction_range) :
376       loop_info_(info),
377       cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) {
378     // For now do transformations only for natural loops.
379     DCHECK(!info->IsIrreducible());
380   }
381 
382   // Returns whether the loop can be peeled/unrolled (static function).
383   static bool IsLoopClonable(HLoopInformation* loop_info);
384 
385   // Returns whether the loop can be peeled/unrolled.
IsLoopClonable()386   bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
387 
388   // Perform loop peeling.
389   //
390   // Control flow of an example (ignoring critical edges splitting).
391   //
392   //       Before                    After
393   //
394   //         |B|                      |B|
395   //          |                        |
396   //          v                        v
397   //         |1|                      |1|
398   //          |                        |
399   //          v                        v
400   //         |2|<-\                  |2A|
401   //         / \  /                   / \
402   //        v   v/                   /   v
403   //       |4|  |3|                 /   |3A|
404   //        |                      /     /
405   //        v                     |     v
406   //       |E|                     \   |2|<-\
407   //                                \ / \   /
408   //                                 v   v /
409   //                                |4|  |3|
410   //                                 |
411   //                                 v
412   //                                |E|
DoPeeling()413   HBasicBlock* DoPeeling() {
414     return DoLoopTransformationImpl(TransformationKind::kPeeling);
415   }
416 
417   // Perform loop unrolling.
418   //
419   // Control flow of an example (ignoring critical edges splitting).
420   //
421   //       Before                    After
422   //
423   //         |B|                      |B|
424   //          |                        |
425   //          v                        v
426   //         |1|                      |1|
427   //          |                        |
428   //          v                        v
429   //         |2|<-\                   |2A|<-\
430   //         / \  /                   / \    \
431   //        v   v/                   /   v    \
432   //       |4|  |3|                 /   |3A|   |
433   //        |                      /     /    /
434   //        v                     |     v    /
435   //       |E|                     \   |2|  /
436   //                                \ / \  /
437   //                                 v   v/
438   //                                |4| |3|
439   //                                 |
440   //                                 v
441   //                                |E|
DoUnrolling()442   HBasicBlock* DoUnrolling() {
443     return DoLoopTransformationImpl(TransformationKind::kUnrolling);
444   }
445 
GetRegionToBeAdjusted()446   HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
447 
448  protected:
449   enum class TransformationKind {
450     kPeeling,
451     kUnrolling,
452   };
453 
454   // Applies a specific loop transformation to the loop.
455   HBasicBlock* DoLoopTransformationImpl(TransformationKind transformation);
456 
457  private:
458   HLoopInformation* loop_info_;
459   SuperblockCloner cloner_;
460 
461   DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper);
462 };
463 
464 // Helper class to perform loop peeling/unrolling.
465 //
466 // This helper should be used when there is no need to get correspondence information between
467 // original and copied basic blocks/instructions.
468 class LoopClonerSimpleHelper : public ValueObject {
469  public:
470   LoopClonerSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range);
IsLoopClonable()471   bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
DoPeeling()472   HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
DoUnrolling()473   HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
GetRegionToBeAdjusted()474   HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
475 
GetBasicBlockMap()476   const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
GetInstructionMap()477   const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
478 
479  private:
480   SuperblockCloner::HBasicBlockMap bb_map_;
481   SuperblockCloner::HInstructionMap hir_map_;
482   LoopClonerHelper helper_;
483 
484   DISALLOW_COPY_AND_ASSIGN(LoopClonerSimpleHelper);
485 };
486 
487 // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
488 void CollectRemappingInfoForPeelUnroll(bool to_unroll,
489                                        HLoopInformation* loop_info,
490                                        SuperblockCloner::HEdgeSet* remap_orig_internal,
491                                        SuperblockCloner::HEdgeSet* remap_copy_internal,
492                                        SuperblockCloner::HEdgeSet* remap_incoming);
493 
494 // Returns whether blocks from 'work_set' are reachable from the rest of the graph.
495 //
496 // Returns whether such a set 'outer_entries' of basic blocks exists that:
497 // - each block from 'outer_entries' is not from 'work_set'.
498 // - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
499 //
500 // After the function returns work_set contains only blocks from the original 'work_set'
501 // which are unreachable from the rest of the graph.
502 bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
503 
504 // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
505 // graph.
506 HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
507 }  // namespace art
508 
509 namespace std {
510 
511 template <>
512 struct hash<art::HEdge> {
513   size_t operator()(art::HEdge const& x) const noexcept  {
514     // Use Cantor pairing function as the hash function.
515     size_t a = x.GetFrom();
516     size_t b = x.GetTo();
517     return (a + b) * (a + b + 1) / 2 + b;
518   }
519 };
520 ostream& operator<<(ostream& os, const art::HEdge& e);
521 
522 }  // namespace std
523 
524 #endif  // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
525