1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "optimizer/ir/basicblock.h"
17 #include "optimizer/ir/graph.h"
18 #include "compiler_logger.h"
19 #include "optimizer/analysis/alias_analysis.h"
20
21 /**
22 * See "Efficient Field-sensitive pointer analysis for C" by "David J. Pearce
23 * and Paul H. J. Kelly and Chris Hankin
24 *
25 * We treat each IR Inst as a constraint that may be applied to a set of
26 * aliases of some virtual register. Virtual registers are used as constraint
27 * variables as well.
28 *
29 * In order to solve the system of set constraints, the following is done:
30 *
31 * 1. Each constraint variable x has a solution set associated with it, Sol(x).
32 * Implemented through AliasAnalysis::points_to_ that contains mapping of
33 * virtual register to possible aliases.
34 *
35 * 2. Constraints are separated into direct, copy.
36 *
37 * - Direct constraints are constraints that require no extra processing, such
38 * as P = &Q.
39 *
40 * - Copy constraints are those of the form P = Q. Such semantic can be
41 * obtained through NullCheck, Mov, and Phi instructions.
42 *
43 * 3. All direct constraints of the form P = &Q are processed, such that Q is
44 * added to Sol(P).
45 *
46 * 4. A directed graph is built out of the copy constraints. Each constraint
47 * variable is a node in the graph, and an edge from Q to P is added for each
48 * copy constraint of the form P = Q.
49 *
50 * 5. The graph is then walked, and solution sets are propagated along the copy
51 * edges, such that an edge from Q to P causes Sol(P) <- Sol(P) union Sol(Q).
52 *
53 * 6. The process of walking the graph is iterated until no solution sets
54 * change.
55 *
56 * To add new instructions to alias analysis please consider following:
57 * - AliasAnalysis class: to add a visitor for a new instruction that should be analyzed
58 *
59 * TODO(Evgenii Kudriashov): Prior to walking the graph in steps 5 and 6, We
60 * need to perform static cycle elimination on the constraint graph, as well as
61 * off-line variable substitution.
62 *
63 * TODO(Evgenii Kudriashov): To add flow-sensitivity the "Flow-sensitive
64 * pointer analysis for millions of lines of code" by Ben Hardekopf and Calvin
65 * Lin may be considered.
66 *
67 * TODO(Evgenii Kudriashov): After implementing VRP and SCEV the "Loop-Oriented
68 * Array- and Field-Sensitive Pointer Analysis for Automatic SIMD
69 * Vectorization" by Yulei Sui, Xiaokang Fan, Hao Zhou, and Jingling Xue may be
70 * considered to add advanced analysis of array indices.
71 */
72
73 namespace panda::compiler {
74
AliasAnalysis(Graph * graph)75 AliasAnalysis::AliasAnalysis(Graph *graph) : Analysis(graph), points_to_(graph->GetAllocator()->Adapter()) {}
76
GetBlocksToVisit() const77 const ArenaVector<BasicBlock *> &AliasAnalysis::GetBlocksToVisit() const
78 {
79 return GetGraph()->GetBlocksRPO();
80 }
81
RunImpl()82 bool AliasAnalysis::RunImpl()
83 {
84 Init();
85
86 VisitGraph();
87
88 // Initialize solution sets
89 for (auto pair : *direct_) {
90 auto it = points_to_.try_emplace(pair.first, GetGraph()->GetAllocator()->Adapter());
91 ASSERT(pair.first.GetBase() == nullptr || pair.first.GetBase()->GetOpcode() != Opcode::NullCheck);
92 ASSERT(pair.second.GetBase() == nullptr || pair.second.GetBase()->GetOpcode() != Opcode::NullCheck);
93 it.first->second.insert(pair.second);
94 }
95
96 SolveConstraints();
97
98 #ifndef NDEBUG
99 if (CompilerLogger::IsComponentEnabled(CompilerLoggerComponents::ALIAS_ANALYSIS)) {
100 std::ostringstream out;
101 DumpChains(&out);
102 Dump(&out);
103 COMPILER_LOG(DEBUG, ALIAS_ANALYSIS) << out.str();
104 }
105 #endif
106 return true;
107 }
108
Init()109 void AliasAnalysis::Init()
110 {
111 auto allocator = GetGraph()->GetLocalAllocator();
112 chains_ = allocator->New<PointerMap<ArenaVector<Pointer>>>(allocator->Adapter());
113 direct_ = allocator->New<PointerPairVector>(allocator->Adapter());
114 inputs_set_ = allocator->New<ArenaSet<Inst *>>(allocator->Adapter());
115 ASSERT(chains_ != nullptr);
116 ASSERT(direct_ != nullptr);
117 ASSERT(inputs_set_ != nullptr);
118 points_to_.clear();
119 }
120
Dump(std::ostream * out) const121 void Pointer::Dump(std::ostream *out) const
122 {
123 switch (type_) {
124 case OBJECT:
125 (*out) << "v" << base_->GetId();
126 break;
127 case STATIC_FIELD:
128 (*out) << "SF #" << imm_;
129 break;
130 case POOL_CONSTANT:
131 (*out) << "PC #" << imm_;
132 break;
133 case OBJECT_FIELD:
134 (*out) << "v" << base_->GetId() << " #" << imm_;
135 break;
136 case ARRAY_ELEMENT:
137 (*out) << "v" << base_->GetId() << "[";
138 if (idx_ != nullptr) {
139 (*out) << "v" << idx_->GetId();
140 if (imm_ != 0) {
141 (*out) << "+" << imm_;
142 }
143 } else {
144 (*out) << imm_;
145 }
146 (*out) << "]";
147 break;
148 default:
149 UNREACHABLE();
150 }
151 if (local_) {
152 (*out) << "(local)";
153 }
154 if (volatile_) {
155 (*out) << "(v)";
156 }
157 }
158
PointerLess(const Pointer & lhs,const Pointer & rhs)159 static bool PointerLess(const Pointer &lhs, const Pointer &rhs)
160 {
161 if (lhs.GetBase() == rhs.GetBase()) {
162 return lhs.GetImm() < rhs.GetImm();
163 }
164 if (lhs.GetBase() == nullptr) {
165 return true;
166 }
167 if (rhs.GetBase() == nullptr) {
168 return false;
169 }
170 return lhs.GetBase()->GetId() < rhs.GetBase()->GetId();
171 }
172
DumpChains(std::ostream * out) const173 void AliasAnalysis::DumpChains(std::ostream *out) const
174 {
175 ArenaVector<Pointer> sorted_keys(GetGraph()->GetLocalAllocator()->Adapter());
176 for (auto &pair : *chains_) {
177 sorted_keys.push_back(pair.first);
178 }
179 std::sort(sorted_keys.begin(), sorted_keys.end(), PointerLess);
180
181 (*out) << "The chains are the following:" << std::endl;
182 for (auto &p : sorted_keys) {
183 (*out) << "\t";
184 p.Dump(out);
185 (*out) << ": {";
186
187 // Sort by instruction ID to add more readability to logs
188 ArenaVector<Pointer> sorted(chains_->at(p), GetGraph()->GetLocalAllocator()->Adapter());
189 std::sort(sorted.begin(), sorted.end(), PointerLess);
190 auto edge = sorted.begin();
191 if (edge != sorted.end()) {
192 edge->Dump(out);
193 while (++edge != sorted.end()) {
194 (*out) << ", ";
195 edge->Dump(out);
196 }
197 }
198 (*out) << "}" << std::endl;
199 }
200 }
201
Dump(std::ostream * out) const202 void AliasAnalysis::Dump(std::ostream *out) const
203 {
204 ArenaVector<Pointer> sorted_keys(GetGraph()->GetLocalAllocator()->Adapter());
205 for (auto &pair : points_to_) {
206 sorted_keys.push_back(pair.first);
207 }
208 std::sort(sorted_keys.begin(), sorted_keys.end(), PointerLess);
209
210 (*out) << "The solution set is the following:" << std::endl;
211 for (auto &p : sorted_keys) {
212 (*out) << "\t";
213 p.Dump(out);
214 (*out) << ": {";
215
216 // Sort by instruction ID to add more readability to logs
217 auto values = points_to_.at(p);
218 ArenaVector<Pointer> sorted(values.begin(), values.end(), GetGraph()->GetLocalAllocator()->Adapter());
219 std::sort(sorted.begin(), sorted.end(), PointerLess);
220 auto iter = sorted.begin();
221 if (iter != sorted.end()) {
222 iter->Dump(out);
223 while (++iter != sorted.end()) {
224 (*out) << ", ";
225 iter->Dump(out);
226 }
227 }
228 (*out) << "}" << std::endl;
229 }
230 }
231
CheckInstAlias(Inst * mem1,Inst * mem2) const232 AliasType AliasAnalysis::CheckInstAlias(Inst *mem1, Inst *mem2) const
233 {
234 return MAY_ALIAS;
235 }
236
237 /**
238 * Here we propagate solutions obtained from direct constraints through copy
239 * constraints e.g: we have a node A with solution {a} and the node A was
240 * copied to B and C (this->chains_ maintains these links), and C was copied to
241 * D.
242 *
243 * A{a} -> B
244 * \-> C -> D
245 *
246 * After first iteration (iterating A node) we will obtain
247 *
248 * A{a} -> B{a}
249 * \-> C{a} -> D
250 *
251 * After second iteration (iterating B node) nothing changes
252 *
253 * After third iteration (iterating C node):
254 *
255 * A{a} -> B{a}
256 * \-> C{a} -> D{a}
257 *
258 * For complex nodes (OBJECT_FIELD, ARRAY_ELEMENT) we create auxiliary nodes e.g.
259 * if a field F was accessed from object A then we have two nodes:
260 *
261 * A{a} -> A.F
262 *
263 * And solutions from A would be propagated as following:
264 *
265 * A{a} -> A.F{a.F}
266 *
267 * The function works using worklist to process only updated nodes.
268 */
SolveConstraints()269 void AliasAnalysis::SolveConstraints()
270 {
271 ArenaQueue<Pointer> worklist(GetGraph()->GetLocalAllocator()->Adapter());
272 for (auto &pair : *direct_) {
273 if (chains_->find(pair.first) != chains_->end()) {
274 worklist.push(pair.first);
275 }
276 }
277
278 while (!worklist.empty()) {
279 Pointer &ref = worklist.front();
280 ASSERT(ref.GetBase() == nullptr || ref.GetBase()->GetOpcode() != Opcode::NullCheck);
281 for (auto &edge : chains_->at(ref)) {
282 // POOL_CONSTANT cannot be assignee
283 ASSERT(edge.GetType() != POOL_CONSTANT);
284 auto &sols = points_to_.try_emplace(edge, GetGraph()->GetAllocator()->Adapter()).first->second;
285 bool added = false;
286 for (auto &alias : points_to_.at(ref)) {
287 ASSERT(alias.GetBase() == nullptr || alias.GetBase()->GetOpcode() != Opcode::NullCheck);
288 if (edge.GetType() == OBJECT_FIELD && ref.GetBase() == edge.GetBase()) {
289 // Propagating from object to fields: A{a} -> A.F{a.f}
290 if (alias.GetType() == OBJECT) {
291 Pointer p = Pointer::CreateObjectField(alias.GetBase(), edge.GetImm(), edge.GetTypePtr());
292 p.SetLocalVolatile(alias.IsLocal(), edge.IsVolatile());
293
294 added |= sols.insert(p).second;
295 continue;
296 }
297 // In case A{a.g} -> A.F we propagate symbolic name: A{a.g} -> A.F{A.F}
298 Pointer p = Pointer::CreateObjectField(ref.GetBase(), edge.GetImm(), edge.GetTypePtr());
299 p.SetLocalVolatile(alias.IsLocal(), edge.IsVolatile());
300
301 added |= sols.insert(p).second;
302 continue;
303 }
304 if (edge.GetType() == ARRAY_ELEMENT && ref.GetBase() == edge.GetBase()) {
305 // Propagating from object to elements: A{a} -> A[i]{a[i]}
306 if (alias.GetType() == OBJECT) {
307 Pointer p = Pointer::CreateArrayElement(alias.GetBase(), edge.GetIdx(), edge.GetImm());
308 p.SetLocalVolatile(alias.IsLocal(), edge.IsVolatile());
309
310 added |= sols.insert(p).second;
311 continue;
312 }
313 // In case A{a[j]} -> A[i] we propagate symbolic name: A{a[j]} -> A[i]{A[i]}
314 Pointer p = Pointer::CreateArrayElement(ref.GetBase(), edge.GetIdx(), edge.GetImm());
315 p.SetLocalVolatile(alias.IsLocal(), edge.IsVolatile());
316
317 added |= sols.insert(p).second;
318 continue;
319 }
320 added |= sols.insert(alias).second;
321 }
322 if (added && chains_->find(edge) != chains_->end()) {
323 worklist.push(edge);
324 }
325 ASSERT(!sols.empty());
326 }
327 worklist.pop();
328 }
329 }
330
331 /**
332 * Instructions that introduce aliases.
333 */
334
VisitCastAnyTypeValue(GraphVisitor * v,Inst * inst)335 void AliasAnalysis::VisitCastAnyTypeValue(GraphVisitor *v, Inst *inst)
336 {
337 if (inst->GetType() == DataType::REFERENCE) {
338 static_cast<AliasAnalysis *>(v)->AddDirectEdge(Pointer::CreateObject(inst));
339 }
340 }
341
342 } // namespace panda::compiler
343