1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
19
20 #include "base/arena_bit_vector.h"
21 #include "base/arena_containers.h"
22 #include "base/bit_vector-inl.h"
23 #include "nodes.h"
24
25 namespace art {
26
27 static const bool kSuperblockClonerLogging = false;
28 static const bool kSuperblockClonerVerify = false;
29
30 // Represents an edge between two HBasicBlocks.
31 //
32 // Note: objects of this class are small - pass them by value.
33 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
34 public:
HEdge(HBasicBlock * from,HBasicBlock * to)35 HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
36 DCHECK_NE(to_, kInvalidBlockId);
37 DCHECK_NE(from_, kInvalidBlockId);
38 }
HEdge(uint32_t from,uint32_t to)39 HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
40 DCHECK_NE(to_, kInvalidBlockId);
41 DCHECK_NE(from_, kInvalidBlockId);
42 }
HEdge()43 HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
44
GetFrom()45 uint32_t GetFrom() const { return from_; }
GetTo()46 uint32_t GetTo() const { return to_; }
47
48 bool operator==(const HEdge& other) const {
49 return this->from_ == other.from_ && this->to_ == other.to_;
50 }
51
52 bool operator!=(const HEdge& other) const { return !operator==(other); }
53 void Dump(std::ostream& stream) const;
54
55 // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
56 // has to_ block as a successor.
IsValid()57 bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
58
59 private:
60 // Predecessor block id.
61 uint32_t from_;
62 // Successor block id.
63 uint32_t to_;
64 };
65
66 // Returns whether a HEdge edge corresponds to an existing edge in the graph.
IsEdgeValid(HEdge edge,HGraph * graph)67 inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
68 if (!edge.IsValid()) {
69 return false;
70 }
71 uint32_t from = edge.GetFrom();
72 uint32_t to = edge.GetTo();
73 if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
74 return false;
75 }
76
77 HBasicBlock* block_from = graph->GetBlocks()[from];
78 HBasicBlock* block_to = graph->GetBlocks()[to];
79 if (block_from == nullptr || block_to == nullptr) {
80 return false;
81 }
82
83 return block_from->HasSuccessor(block_to, 0);
84 }
85
86 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
87 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
88 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy
89 // and a set of rules how to treat edges, remap their successors. By using this approach such
90 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
91 //
92 // The idea of the transformation is based on "Superblock cloning" technique described in the book
93 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
94 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
95 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
96 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
97 //
98 // There are two states of the IR graph: original graph (before the transformation) and
99 // copy graph (after).
100 //
101 // Before the transformation:
102 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
103 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
104 // where pred, succ - basic blocks):
105 // - internal - pred, succ are members of ‘orig_bb_set’.
106 // - outside - pred, succ are not members of ‘orig_bb_set’.
107 // - incoming - pred is not a member of ‘orig_bb_set’, succ is.
108 // - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
109 //
110 // Transformation:
111 //
112 // 1. Initial cloning:
113 // 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
114 // form ‘copy_bb_set’.
115 // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
116 // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
117 // set.
118 // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
119 // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
120 // set.
121 // 2. Successors remapping.
122 // 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
123 // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
124 // 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
125 // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
126 // 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
127 // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
128 // (X, Y_1)).
129 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
130 // 4. Fix/resolve data flow.
131 // 5. Do cleanups (DCE, critical edges splitting, etc).
132 //
133 class SuperblockCloner : public ValueObject {
134 public:
135 // TODO: Investigate optimal types for the containers.
136 using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
137 using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
138 using HBasicBlockSet = ArenaBitVector;
139 using HEdgeSet = ArenaHashSet<HEdge>;
140
141 SuperblockCloner(HGraph* graph,
142 const HBasicBlockSet* orig_bb_set,
143 HBasicBlockMap* bb_map,
144 HInstructionMap* hir_map);
145
146 // Sets edge successor remapping info specified by corresponding edge sets.
147 void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
148 const HEdgeSet* remap_copy_internal,
149 const HEdgeSet* remap_incoming);
150
151 // Returns whether the specified subgraph is copyable.
152 // TODO: Start from small range of graph patterns then extend it.
153 bool IsSubgraphClonable() const;
154
155 // Runs the copy algorithm according to the description.
156 void Run();
157
158 // Cleans up the graph after transformation: splits critical edges, recalculates control flow
159 // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
160 void CleanUp();
161
162 // Returns a clone of a basic block (orig_block).
163 //
164 // - The copy block will have no successors/predecessors; they should be set up manually.
165 // - For each instruction in the orig_block a copy is created and inserted into the copy block;
166 // this correspondence is recorded in the map (old instruction, new instruction).
167 // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
168 // same, as in the original block, PHIs do not reflect a correct correspondence between the
169 // value and predecessors (as the copy block has no predecessors by now), etc.
170 HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
171
172 // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
173 // and hir_map_.
174 void CloneBasicBlocks();
175
GetInstrCopy(HInstruction * orig_instr)176 HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
177 auto copy_input_iter = hir_map_->find(orig_instr);
178 DCHECK(copy_input_iter != hir_map_->end());
179 return copy_input_iter->second;
180 }
181
GetBlockCopy(HBasicBlock * orig_block)182 HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
183 HBasicBlock* block = bb_map_->Get(orig_block);
184 DCHECK(block != nullptr);
185 return block;
186 }
187
GetInstrOrig(HInstruction * copy_instr)188 HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
189 for (auto it : *hir_map_) {
190 if (it.second == copy_instr) {
191 return it.first;
192 }
193 }
194 return nullptr;
195 }
196
IsInOrigBBSet(uint32_t block_id)197 bool IsInOrigBBSet(uint32_t block_id) const {
198 return orig_bb_set_.IsBitSet(block_id);
199 }
200
IsInOrigBBSet(const HBasicBlock * block)201 bool IsInOrigBBSet(const HBasicBlock* block) const {
202 return IsInOrigBBSet(block->GetBlockId());
203 }
204
205 private:
206 // Fills the 'exits' vector with the subgraph exits.
207 void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
208
209 // Finds and records information about the area in the graph for which control-flow (back edges,
210 // loops, dominators) needs to be adjusted.
211 void FindAndSetLocalAreaForAdjustments();
212
213 // Remaps edges' successors according to the info specified in the edges sets.
214 //
215 // Only edge successors/predecessors and phis' input records (to have a correspondence between
216 // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
217 // phis' nor instructions' inputs values are resolved.
218 void RemapEdgesSuccessors();
219
220 // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
221 // FindAndSetLocalAreaForAdjustments.
222 void AdjustControlFlowInfo();
223
224 // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
225 // the SSA form.
226 void ResolveDataFlow();
227
228 //
229 // Helpers for CloneBasicBlock.
230 //
231
232 // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
233 // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
234 void ReplaceInputsWithCopies(HInstruction* copy_instr);
235
236 // Recursively clones the environment for the copy instruction. If the input of the original
237 // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
238 // leaves it the same as original.
239 void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
240
241 //
242 // Helpers for RemapEdgesSuccessors.
243 //
244
245 // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
246 // copy_succ.
247 void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
248
249 // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
250 void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
251
252 // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
253 void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
254
255 //
256 // Local versions of control flow calculation/adjustment routines.
257 //
258
259 void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
260 void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
261 GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
262 void CleanUpControlFlow();
263
264 //
265 // Helpers for ResolveDataFlow
266 //
267
268 // Resolves the inputs of the phi.
269 void ResolvePhi(HPhi* phi);
270
271 //
272 // Debug and logging methods.
273 //
274 void CheckInstructionInputsRemapping(HInstruction* orig_instr);
275
GetBlockById(uint32_t block_id)276 HBasicBlock* GetBlockById(uint32_t block_id) const {
277 DCHECK(block_id < graph_->GetBlocks().size());
278 HBasicBlock* block = graph_->GetBlocks()[block_id];
279 DCHECK(block != nullptr);
280 return block;
281 }
282
283 HGraph* const graph_;
284 ArenaAllocator* const arena_;
285
286 // Set of basic block in the original graph to be copied.
287 HBasicBlockSet orig_bb_set_;
288
289 // Sets of edges which require successors remapping.
290 const HEdgeSet* remap_orig_internal_;
291 const HEdgeSet* remap_copy_internal_;
292 const HEdgeSet* remap_incoming_;
293
294 // Correspondence map for blocks: (original block, copy block).
295 HBasicBlockMap* bb_map_;
296 // Correspondence map for instructions: (original HInstruction, copy HInstruction).
297 HInstructionMap* hir_map_;
298 // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
299 HLoopInformation* outer_loop_;
300 HBasicBlockSet outer_loop_bb_set_;
301
302 ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
303
304 DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
305 };
306
307 } // namespace art
308
309 namespace std {
310
311 template <>
312 struct hash<art::HEdge> {
313 size_t operator()(art::HEdge const& x) const noexcept {
314 // Use Cantor pairing function as the hash function.
315 uint32_t a = x.GetFrom();
316 uint32_t b = x.GetTo();
317 return (a + b) * (a + b + 1) / 2 + b;
318 }
319 };
320
321 } // namespace std
322
323 #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
324