1 //===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the loop analysis on the CFG.
12 ///
13 //===----------------------------------------------------------------------===//
14 #include "IceLoopAnalyzer.h"
15
16 #include "IceCfg.h"
17 #include "IceCfgNode.h"
18
19 #include <algorithm>
20
21 namespace Ice {
22 class LoopAnalyzer {
23 public:
24 explicit LoopAnalyzer(Cfg *Func);
25
26 /// Use Tarjan's strongly connected components algorithm to identify outermost
27 /// to innermost loops. By deleting the head of the loop from the graph, inner
28 /// loops can be found. This assumes that the head node is not shared between
29 /// loops but instead all paths to the head come from 'continue' constructs.
30 ///
31 /// This only computes the loop nest depth within the function and does not
32 /// take into account whether the function was called from within a loop.
33 // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
34 // is bounded linear. ncbray suggests another algorithm which is linear in
35 // practice but not bounded linear. I think it also finds dominators.
36 // http://lenx.100871.net/papers/loop-SAS.pdf
37
getLoopBodies()38 CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; }
39
40 private:
41 LoopAnalyzer() = delete;
42 LoopAnalyzer(const LoopAnalyzer &) = delete;
43 LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
44 void computeLoopNestDepth();
45
46 using IndexT = uint32_t;
47 static constexpr IndexT UndefinedIndex = 0;
48 static constexpr IndexT FirstDefinedIndex = 1;
49
50 // TODO(ascull): classify the other fields
51 class LoopNode {
52 LoopNode() = delete;
53 LoopNode operator=(const LoopNode &) = delete;
54
55 public:
LoopNode(CfgNode * BB)56 explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
57 LoopNode(const LoopNode &) = default;
58
59 void reset();
60
61 NodeList::const_iterator successorsEnd() const;
currentSuccessor() const62 NodeList::const_iterator currentSuccessor() const { return Succ; }
nextSuccessor()63 void nextSuccessor() { ++Succ; }
64
visit(IndexT VisitIndex)65 void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
isVisited() const66 bool isVisited() const { return Index != UndefinedIndex; }
getIndex() const67 IndexT getIndex() const { return Index; }
68
tryLink(IndexT NewLink)69 void tryLink(IndexT NewLink) {
70 if (NewLink < LowLink)
71 LowLink = NewLink;
72 }
getLowLink() const73 IndexT getLowLink() const { return LowLink; }
74
setOnStack(bool NewValue=true)75 void setOnStack(bool NewValue = true) { OnStack = NewValue; }
isOnStack() const76 bool isOnStack() const { return OnStack; }
77
setDeleted()78 void setDeleted() { Deleted = true; }
isDeleted() const79 bool isDeleted() const { return Deleted; }
80
81 void incrementLoopNestDepth();
82 bool hasSelfEdge() const;
83
getNode()84 CfgNode *getNode() { return BB; }
85
86 private:
87 CfgNode *BB;
88 NodeList::const_iterator Succ;
89 IndexT Index;
90 IndexT LowLink;
91 bool OnStack;
92 bool Deleted = false;
93 };
94
95 using LoopNodeList = CfgVector<LoopNode>;
96 using LoopNodePtrList = CfgVector<LoopNode *>;
97
98 /// Process the node as part as part of Tarjan's algorithm and return either a
99 /// node to recurse into or nullptr when the node has been fully processed.
100 LoopNode *processNode(LoopNode &Node);
101
102 /// The function to analyze for loops.
103 Cfg *const Func;
104 /// A list of decorated nodes in the same order as Func->getNodes() which
105 /// means the node's index will also be valid in this list.
106 LoopNodeList AllNodes;
107 /// This is used as a replacement for the call stack.
108 LoopNodePtrList WorkStack;
109 /// Track which loop a node belongs to.
110 LoopNodePtrList LoopStack;
111 /// The index to assign to the next visited node.
112 IndexT NextIndex = FirstDefinedIndex;
113 /// The number of nodes which have been marked deleted. This is used to track
114 /// when the iteration should end.
115 LoopNodePtrList::size_type NumDeletedNodes = 0;
116
117 /// All the Loops, in descending order of size
118 CfgVector<CfgUnorderedSet<SizeT>> Loops;
119 };
reset()120 void LoopAnalyzer::LoopNode::reset() {
121 if (Deleted)
122 return;
123 Succ = BB->getOutEdges().begin();
124 Index = LowLink = UndefinedIndex;
125 OnStack = false;
126 }
127
successorsEnd() const128 NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const {
129 return BB->getOutEdges().end();
130 }
131
incrementLoopNestDepth()132 void LoopAnalyzer::LoopNode::incrementLoopNestDepth() {
133 BB->incrementLoopNestDepth();
134 }
135
hasSelfEdge() const136 bool LoopAnalyzer::LoopNode::hasSelfEdge() const {
137 for (CfgNode *Succ : BB->getOutEdges()) {
138 if (Succ == BB)
139 return true;
140 }
141 return false;
142 }
143
LoopAnalyzer(Cfg * Fn)144 LoopAnalyzer::LoopAnalyzer(Cfg *Fn) : Func(Fn) {
145 const NodeList &Nodes = Func->getNodes();
146
147 // Allocate memory ahead of time. This is why a vector is used instead of a
148 // stack which doesn't support reserving (or bulk erasure used below).
149 AllNodes.reserve(Nodes.size());
150 WorkStack.reserve(Nodes.size());
151 LoopStack.reserve(Nodes.size());
152
153 // Create the LoopNodes from the function's CFG
154 for (CfgNode *Node : Nodes)
155 AllNodes.emplace_back(Node);
156 computeLoopNestDepth();
157 }
158
computeLoopNestDepth()159 void LoopAnalyzer::computeLoopNestDepth() {
160 assert(AllNodes.size() == Func->getNodes().size());
161 assert(NextIndex == FirstDefinedIndex);
162 assert(NumDeletedNodes == 0);
163
164 while (NumDeletedNodes < AllNodes.size()) {
165 // Prepare to run Tarjan's
166 for (LoopNode &Node : AllNodes)
167 Node.reset();
168
169 assert(WorkStack.empty());
170 assert(LoopStack.empty());
171
172 for (LoopNode &Node : AllNodes) {
173 if (Node.isDeleted() || Node.isVisited())
174 continue;
175
176 WorkStack.push_back(&Node);
177
178 while (!WorkStack.empty()) {
179 LoopNode &WorkNode = *WorkStack.back();
180 if (LoopNode *Succ = processNode(WorkNode))
181 WorkStack.push_back(Succ);
182 else
183 WorkStack.pop_back();
184 }
185 }
186 }
187 }
188
189 LoopAnalyzer::LoopNode *
processNode(LoopAnalyzer::LoopNode & Node)190 LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
191 if (!Node.isVisited()) {
192 Node.visit(NextIndex++);
193 LoopStack.push_back(&Node);
194 Node.setOnStack();
195 } else {
196 // Returning to a node after having recursed into Succ so continue
197 // iterating through successors after using the Succ.LowLink value that was
198 // computed in the recursion.
199 LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
200 Node.tryLink(Succ.getLowLink());
201 Node.nextSuccessor();
202 }
203
204 // Visit the successors and recurse into unvisited nodes. The recursion could
205 // cause the iteration to be suspended but it will resume as the stack is
206 // unwound.
207 auto SuccEnd = Node.successorsEnd();
208 for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) {
209 LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
210
211 if (Succ.isDeleted())
212 continue;
213
214 if (!Succ.isVisited())
215 return &Succ;
216 else if (Succ.isOnStack())
217 Node.tryLink(Succ.getIndex());
218 }
219
220 if (Node.getLowLink() != Node.getIndex())
221 return nullptr;
222
223 // Single node means no loop in the CFG
224 if (LoopStack.back() == &Node) {
225 LoopStack.back()->setOnStack(false);
226 if (Node.hasSelfEdge())
227 LoopStack.back()->incrementLoopNestDepth();
228 LoopStack.back()->setDeleted();
229 ++NumDeletedNodes;
230 LoopStack.pop_back();
231 return nullptr;
232 }
233
234 // Reaching here means a loop has been found! It consists of the nodes on the
235 // top of the stack, down until the current node being processed, Node, is
236 // found.
237 for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
238 (*It)->setOnStack(false);
239 (*It)->incrementLoopNestDepth();
240 // Remove the loop from the stack and delete the head node
241 if (*It == &Node) {
242 (*It)->setDeleted();
243 ++NumDeletedNodes;
244 CfgUnorderedSet<SizeT> LoopNodes;
245 for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end();
246 ++LoopIter) {
247 LoopNodes.insert((*LoopIter)->getNode()->getIndex());
248 }
249 Loops.push_back(LoopNodes);
250 LoopStack.erase(It.base() - 1, LoopStack.end());
251 break;
252 }
253 }
254
255 return nullptr;
256 }
ComputeLoopInfo(Cfg * Func)257 CfgVector<Loop> ComputeLoopInfo(Cfg *Func) {
258 auto LoopBodies = LoopAnalyzer(Func).getLoopBodies();
259
260 CfgVector<Loop> Loops;
261 Loops.reserve(LoopBodies.size());
262 std::sort(
263 LoopBodies.begin(), LoopBodies.end(),
264 [](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) {
265 return A.size() > B.size();
266 });
267 for (auto &LoopBody : LoopBodies) {
268 CfgNode *Header = nullptr;
269 bool IsSimpleLoop = true;
270 for (auto NodeIndex : LoopBody) {
271 CfgNode *Cur = Func->getNodes()[NodeIndex];
272 for (auto *Prev : Cur->getInEdges()) {
273 if (LoopBody.find(Prev->getIndex()) ==
274 LoopBody.end()) { // coming from outside
275 if (Header == nullptr) {
276 Header = Cur;
277 } else {
278 Header = nullptr;
279 IsSimpleLoop = false;
280 break;
281 }
282 }
283 }
284 if (!IsSimpleLoop) {
285 break;
286 }
287 }
288 if (!IsSimpleLoop)
289 continue; // To next potential loop
290
291 CfgNode *PreHeader = nullptr;
292 for (auto *Prev : Header->getInEdges()) {
293 if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) {
294 if (PreHeader == nullptr) {
295 PreHeader = Prev;
296 } else {
297 PreHeader = nullptr;
298 break;
299 }
300 }
301 }
302
303 Loops.emplace_back(Header, PreHeader, LoopBody);
304 }
305 return Loops;
306 }
307
308 } // end of namespace Ice
309