1 // Copyright (c) 2015-2016 The Khronos Group Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef SOURCE_CFA_H_
16 #define SOURCE_CFA_H_
17
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <functional>
22 #include <map>
23 #include <unordered_map>
24 #include <unordered_set>
25 #include <utility>
26 #include <vector>
27
28 namespace spvtools {
29
30 // Control Flow Analysis of control flow graphs of basic block nodes |BB|.
31 template <class BB>
32 class CFA {
33 using bb_ptr = BB*;
34 using cbb_ptr = const BB*;
35 using bb_iter = typename std::vector<BB*>::const_iterator;
36 using get_blocks_func = std::function<const std::vector<BB*>*(const BB*)>;
37
38 struct block_info {
39 cbb_ptr block; ///< pointer to the block
40 bb_iter iter; ///< Iterator to the current child node being processed
41 };
42
43 /// Returns true if a block with @p id is found in the @p work_list vector
44 ///
45 /// @param[in] work_list Set of blocks visited in the depth first
46 /// traversal
47 /// of the CFG
48 /// @param[in] id The ID of the block being checked
49 ///
50 /// @return true if the edge work_list.back().block->id() => id is a back-edge
51 static bool FindInWorkList(const std::vector<block_info>& work_list,
52 uint32_t id);
53
54 public:
55 /// @brief Depth first traversal starting from the \p entry BasicBlock
56 ///
57 /// This function performs a depth first traversal from the \p entry
58 /// BasicBlock and calls the pre/postorder functions when it needs to process
59 /// the node in pre order, post order.
60 ///
61 /// @param[in] entry The root BasicBlock of a CFG
62 /// @param[in] successor_func A function which will return a pointer to the
63 /// successor nodes
64 /// @param[in] preorder A function that will be called for every block in a
65 /// CFG following preorder traversal semantics
66 /// @param[in] postorder A function that will be called for every block in a
67 /// CFG following postorder traversal semantics
68 /// @param[in] terminal A function that will be called to determine if the
69 /// search should stop at the given node.
70 /// NOTE: The @p successor_func and predecessor_func each return a pointer to
71 /// a collection such that iterators to that collection remain valid for the
72 /// lifetime of the algorithm.
73 static void DepthFirstTraversal(const BB* entry,
74 get_blocks_func successor_func,
75 std::function<void(cbb_ptr)> preorder,
76 std::function<void(cbb_ptr)> postorder,
77 std::function<bool(cbb_ptr)> terminal);
78
79 /// @brief Depth first traversal starting from the \p entry BasicBlock
80 ///
81 /// This function performs a depth first traversal from the \p entry
82 /// BasicBlock and calls the pre/postorder functions when it needs to process
83 /// the node in pre order, post order. It also calls the backedge function
84 /// when a back edge is encountered. The backedge function can be empty. The
85 /// runtime of the algorithm is improved if backedge is empty.
86 ///
87 /// @param[in] entry The root BasicBlock of a CFG
88 /// @param[in] successor_func A function which will return a pointer to the
89 /// successor nodes
90 /// @param[in] preorder A function that will be called for every block in a
91 /// CFG following preorder traversal semantics
92 /// @param[in] postorder A function that will be called for every block in a
93 /// CFG following postorder traversal semantics
94 /// @param[in] backedge A function that will be called when a backedge is
95 /// encountered during a traversal.
96 /// @param[in] terminal A function that will be called to determine if the
97 /// search should stop at the given node.
98 /// NOTE: The @p successor_func and predecessor_func each return a pointer to
99 /// a collection such that iterators to that collection remain valid for the
100 /// lifetime of the algorithm.
101 static void DepthFirstTraversal(
102 const BB* entry, get_blocks_func successor_func,
103 std::function<void(cbb_ptr)> preorder,
104 std::function<void(cbb_ptr)> postorder,
105 std::function<void(cbb_ptr, cbb_ptr)> backedge,
106 std::function<bool(cbb_ptr)> terminal);
107
108 /// @brief Calculates dominator edges for a set of blocks
109 ///
110 /// Computes dominators using the algorithm of Cooper, Harvey, and Kennedy
111 /// "A Simple, Fast Dominance Algorithm", 2001.
112 ///
113 /// The algorithm assumes there is a unique root node (a node without
114 /// predecessors), and it is therefore at the end of the postorder vector.
115 ///
116 /// This function calculates the dominator edges for a set of blocks in the
117 /// CFG.
118 /// Uses the dominator algorithm by Cooper et al.
119 ///
120 /// @param[in] postorder A vector of blocks in post order traversal
121 /// order
122 /// in a CFG
123 /// @param[in] predecessor_func Function used to get the predecessor nodes of
124 /// a
125 /// block
126 ///
127 /// @return the dominator tree of the graph, as a vector of pairs of nodes.
128 /// The first node in the pair is a node in the graph. The second node in the
129 /// pair is its immediate dominator in the sense of Cooper et.al., where a
130 /// block
131 /// without predecessors (such as the root node) is its own immediate
132 /// dominator.
133 static std::vector<std::pair<BB*, BB*>> CalculateDominators(
134 const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func);
135
136 // Computes a minimal set of root nodes required to traverse, in the forward
137 // direction, the CFG represented by the given vector of blocks, and successor
138 // and predecessor functions. When considering adding two nodes, each having
139 // predecessors, favour using the one that appears earlier on the input blocks
140 // list.
141 static std::vector<BB*> TraversalRoots(const std::vector<BB*>& blocks,
142 get_blocks_func succ_func,
143 get_blocks_func pred_func);
144
145 static void ComputeAugmentedCFG(
146 std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block,
147 BB* pseudo_exit_block,
148 std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map,
149 std::unordered_map<const BB*, std::vector<BB*>>*
150 augmented_predecessors_map,
151 get_blocks_func succ_func, get_blocks_func pred_func);
152 };
153
154 template <class BB>
FindInWorkList(const std::vector<block_info> & work_list,uint32_t id)155 bool CFA<BB>::FindInWorkList(const std::vector<block_info>& work_list,
156 uint32_t id) {
157 for (const auto& b : work_list) {
158 if (b.block->id() == id) return true;
159 }
160 return false;
161 }
162
163 template <class BB>
DepthFirstTraversal(const BB * entry,get_blocks_func successor_func,std::function<void (cbb_ptr)> preorder,std::function<void (cbb_ptr)> postorder,std::function<bool (cbb_ptr)> terminal)164 void CFA<BB>::DepthFirstTraversal(const BB* entry,
165 get_blocks_func successor_func,
166 std::function<void(cbb_ptr)> preorder,
167 std::function<void(cbb_ptr)> postorder,
168 std::function<bool(cbb_ptr)> terminal) {
169 DepthFirstTraversal(entry, successor_func, preorder, postorder,
170 /* backedge = */ {}, terminal);
171 }
172
173 template <class BB>
DepthFirstTraversal(const BB * entry,get_blocks_func successor_func,std::function<void (cbb_ptr)> preorder,std::function<void (cbb_ptr)> postorder,std::function<void (cbb_ptr,cbb_ptr)> backedge,std::function<bool (cbb_ptr)> terminal)174 void CFA<BB>::DepthFirstTraversal(
175 const BB* entry, get_blocks_func successor_func,
176 std::function<void(cbb_ptr)> preorder,
177 std::function<void(cbb_ptr)> postorder,
178 std::function<void(cbb_ptr, cbb_ptr)> backedge,
179 std::function<bool(cbb_ptr)> terminal) {
180 assert(successor_func && "The successor function cannot be empty.");
181 assert(preorder && "The preorder function cannot be empty.");
182 assert(postorder && "The postorder function cannot be empty.");
183 assert(terminal && "The terminal function cannot be empty.");
184
185 std::unordered_set<uint32_t> processed;
186
187 /// NOTE: work_list is the sequence of nodes from the root node to the node
188 /// being processed in the traversal
189 std::vector<block_info> work_list;
190 work_list.reserve(10);
191
192 work_list.push_back({entry, std::begin(*successor_func(entry))});
193 preorder(entry);
194 processed.insert(entry->id());
195
196 while (!work_list.empty()) {
197 block_info& top = work_list.back();
198 if (terminal(top.block) || top.iter == end(*successor_func(top.block))) {
199 postorder(top.block);
200 work_list.pop_back();
201 } else {
202 BB* child = *top.iter;
203 top.iter++;
204 if (backedge && FindInWorkList(work_list, child->id())) {
205 backedge(top.block, child);
206 }
207 if (processed.count(child->id()) == 0) {
208 preorder(child);
209 work_list.emplace_back(
210 block_info{child, std::begin(*successor_func(child))});
211 processed.insert(child->id());
212 }
213 }
214 }
215 }
216
217 template <class BB>
CalculateDominators(const std::vector<cbb_ptr> & postorder,get_blocks_func predecessor_func)218 std::vector<std::pair<BB*, BB*>> CFA<BB>::CalculateDominators(
219 const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func) {
220 struct block_detail {
221 size_t dominator; ///< The index of blocks's dominator in post order array
222 size_t postorder_index; ///< The index of the block in the post order array
223 };
224 const size_t undefined_dom = postorder.size();
225
226 std::unordered_map<cbb_ptr, block_detail> idoms;
227 for (size_t i = 0; i < postorder.size(); i++) {
228 idoms[postorder[i]] = {undefined_dom, i};
229 }
230 idoms[postorder.back()].dominator = idoms[postorder.back()].postorder_index;
231
232 bool changed = true;
233 while (changed) {
234 changed = false;
235 for (auto b = postorder.rbegin() + 1; b != postorder.rend(); ++b) {
236 const std::vector<BB*>& predecessors = *predecessor_func(*b);
237 // Find the first processed/reachable predecessor that is reachable
238 // in the forward traversal.
239 auto res = std::find_if(std::begin(predecessors), std::end(predecessors),
240 [&idoms, undefined_dom](BB* pred) {
241 return idoms.count(pred) &&
242 idoms[pred].dominator != undefined_dom;
243 });
244 if (res == end(predecessors)) continue;
245 const BB* idom = *res;
246 size_t idom_idx = idoms[idom].postorder_index;
247
248 // all other predecessors
249 for (const auto* p : predecessors) {
250 if (idom == p) continue;
251 // Only consider nodes reachable in the forward traversal.
252 // Otherwise the intersection doesn't make sense and will never
253 // terminate.
254 if (!idoms.count(p)) continue;
255 if (idoms[p].dominator != undefined_dom) {
256 size_t finger1 = idoms[p].postorder_index;
257 size_t finger2 = idom_idx;
258 while (finger1 != finger2) {
259 while (finger1 < finger2) {
260 finger1 = idoms[postorder[finger1]].dominator;
261 }
262 while (finger2 < finger1) {
263 finger2 = idoms[postorder[finger2]].dominator;
264 }
265 }
266 idom_idx = finger1;
267 }
268 }
269 if (idoms[*b].dominator != idom_idx) {
270 idoms[*b].dominator = idom_idx;
271 changed = true;
272 }
273 }
274 }
275
276 std::vector<std::pair<bb_ptr, bb_ptr>> out;
277 for (auto idom : idoms) {
278 // NOTE: performing a const cast for convenient usage with
279 // UpdateImmediateDominators
280 out.push_back({const_cast<BB*>(std::get<0>(idom)),
281 const_cast<BB*>(postorder[std::get<1>(idom).dominator])});
282 }
283
284 // Sort by postorder index to generate a deterministic ordering of edges.
285 std::sort(
286 out.begin(), out.end(),
287 [&idoms](const std::pair<bb_ptr, bb_ptr>& lhs,
288 const std::pair<bb_ptr, bb_ptr>& rhs) {
289 assert(lhs.first);
290 assert(lhs.second);
291 assert(rhs.first);
292 assert(rhs.second);
293 auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index,
294 idoms[lhs.second].postorder_index);
295 auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index,
296 idoms[rhs.second].postorder_index);
297 return lhs_indices < rhs_indices;
298 });
299 return out;
300 }
301
302 template <class BB>
TraversalRoots(const std::vector<BB * > & blocks,get_blocks_func succ_func,get_blocks_func pred_func)303 std::vector<BB*> CFA<BB>::TraversalRoots(const std::vector<BB*>& blocks,
304 get_blocks_func succ_func,
305 get_blocks_func pred_func) {
306 // The set of nodes which have been visited from any of the roots so far.
307 std::unordered_set<const BB*> visited;
308
309 auto mark_visited = [&visited](const BB* b) { visited.insert(b); };
310 auto ignore_block = [](const BB*) {};
311 auto no_terminal_blocks = [](const BB*) { return false; };
312
313 auto traverse_from_root = [&mark_visited, &succ_func, &ignore_block,
314 &no_terminal_blocks](const BB* entry) {
315 DepthFirstTraversal(entry, succ_func, mark_visited, ignore_block,
316 no_terminal_blocks);
317 };
318
319 std::vector<BB*> result;
320
321 // First collect nodes without predecessors.
322 for (auto block : blocks) {
323 if (pred_func(block)->empty()) {
324 assert(visited.count(block) == 0 && "Malformed graph!");
325 result.push_back(block);
326 traverse_from_root(block);
327 }
328 }
329
330 // Now collect other stranded nodes. These must be in unreachable cycles.
331 for (auto block : blocks) {
332 if (visited.count(block) == 0) {
333 result.push_back(block);
334 traverse_from_root(block);
335 }
336 }
337
338 return result;
339 }
340
341 template <class BB>
ComputeAugmentedCFG(std::vector<BB * > & ordered_blocks,BB * pseudo_entry_block,BB * pseudo_exit_block,std::unordered_map<const BB *,std::vector<BB * >> * augmented_successors_map,std::unordered_map<const BB *,std::vector<BB * >> * augmented_predecessors_map,get_blocks_func succ_func,get_blocks_func pred_func)342 void CFA<BB>::ComputeAugmentedCFG(
343 std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block,
344 BB* pseudo_exit_block,
345 std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map,
346 std::unordered_map<const BB*, std::vector<BB*>>* augmented_predecessors_map,
347 get_blocks_func succ_func, get_blocks_func pred_func) {
348 // Compute the successors of the pseudo-entry block, and
349 // the predecessors of the pseudo exit block.
350 auto sources = TraversalRoots(ordered_blocks, succ_func, pred_func);
351
352 // For the predecessor traversals, reverse the order of blocks. This
353 // will affect the post-dominance calculation as follows:
354 // - Suppose you have blocks A and B, with A appearing before B in
355 // the list of blocks.
356 // - Also, A branches only to B, and B branches only to A.
357 // - We want to compute A as dominating B, and B as post-dominating B.
358 // By using reversed blocks for predecessor traversal roots discovery,
359 // we'll add an edge from B to the pseudo-exit node, rather than from A.
360 // All this is needed to correctly process the dominance/post-dominance
361 // constraint when A is a loop header that points to itself as its
362 // own continue target, and B is the latch block for the loop.
363 std::vector<BB*> reversed_blocks(ordered_blocks.rbegin(),
364 ordered_blocks.rend());
365 auto sinks = TraversalRoots(reversed_blocks, pred_func, succ_func);
366
367 // Wire up the pseudo entry block.
368 (*augmented_successors_map)[pseudo_entry_block] = sources;
369 for (auto block : sources) {
370 auto& augmented_preds = (*augmented_predecessors_map)[block];
371 const auto preds = pred_func(block);
372 augmented_preds.reserve(1 + preds->size());
373 augmented_preds.push_back(pseudo_entry_block);
374 augmented_preds.insert(augmented_preds.end(), preds->begin(), preds->end());
375 }
376
377 // Wire up the pseudo exit block.
378 (*augmented_predecessors_map)[pseudo_exit_block] = sinks;
379 for (auto block : sinks) {
380 auto& augmented_succ = (*augmented_successors_map)[block];
381 const auto succ = succ_func(block);
382 augmented_succ.reserve(1 + succ->size());
383 augmented_succ.push_back(pseudo_exit_block);
384 augmented_succ.insert(augmented_succ.end(), succ->begin(), succ->end());
385 }
386 }
387
388 } // namespace spvtools
389
390 #endif // SOURCE_CFA_H_
391