• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a CFL-based, summary-based alias analysis algorithm. It
10 // differs from CFLSteensAliasAnalysis in its inclusion-based nature while
11 // CFLSteensAliasAnalysis is unification-based. This pass has worse performance
12 // than CFLSteensAliasAnalysis (the worst case complexity of
13 // CFLAndersAliasAnalysis is cubic, while the worst case complexity of
14 // CFLSteensAliasAnalysis is almost linear), but it is able to yield more
15 // precise analysis result. The precision of this analysis is roughly the same
16 // as that of an one level context-sensitive Andersen's algorithm.
17 //
18 // The algorithm used here is based on recursive state machine matching scheme
19 // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu
20 // Rugina. The general idea is to extend the traditional transitive closure
21 // algorithm to perform CFL matching along the way: instead of recording
22 // "whether X is reachable from Y", we keep track of "whether X is reachable
23 // from Y at state Z", where the "state" field indicates where we are in the CFL
24 // matching process. To understand the matching better, it is advisable to have
25 // the state machine shown in Figure 3 of the paper available when reading the
26 // codes: all we do here is to selectively expand the transitive closure by
27 // discarding edges that are not recognized by the state machine.
28 //
29 // There are two differences between our current implementation and the one
30 // described in the paper:
31 // - Our algorithm eagerly computes all alias pairs after the CFLGraph is built,
32 // while in the paper the authors did the computation in a demand-driven
33 // fashion. We did not implement the demand-driven algorithm due to the
34 // additional coding complexity and higher memory profile, but if we found it
35 // necessary we may switch to it eventually.
36 // - In the paper the authors use a state machine that does not distinguish
37 // value reads from value writes. For example, if Y is reachable from X at state
38 // S3, it may be the case that X is written into Y, or it may be the case that
39 // there's a third value Z that writes into both X and Y. To make that
40 // distinction (which is crucial in building function summary as well as
41 // retrieving mod-ref info), we choose to duplicate some of the states in the
42 // paper's proposed state machine. The duplication does not change the set the
43 // machine accepts. Given a pair of reachable values, it only provides more
44 // detailed information on which value is being written into and which is being
45 // read from.
46 //
47 //===----------------------------------------------------------------------===//
48 
49 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
50 // CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because
51 // FunctionPasses are only allowed to inspect the Function that they're being
52 // run on. Realistically, this likely isn't a problem until we allow
53 // FunctionPasses to run concurrently.
54 
55 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
56 #include "AliasAnalysisSummary.h"
57 #include "CFLGraph.h"
58 #include "llvm/ADT/DenseMap.h"
59 #include "llvm/ADT/DenseMapInfo.h"
60 #include "llvm/ADT/DenseSet.h"
61 #include "llvm/ADT/None.h"
62 #include "llvm/ADT/Optional.h"
63 #include "llvm/ADT/STLExtras.h"
64 #include "llvm/ADT/SmallVector.h"
65 #include "llvm/ADT/iterator_range.h"
66 #include "llvm/Analysis/AliasAnalysis.h"
67 #include "llvm/Analysis/MemoryLocation.h"
68 #include "llvm/IR/Argument.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/IR/PassManager.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/InitializePasses.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/raw_ostream.h"
78 #include <algorithm>
79 #include <bitset>
80 #include <cassert>
81 #include <cstddef>
82 #include <cstdint>
83 #include <functional>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 using namespace llvm::cflaa;
89 
90 #define DEBUG_TYPE "cfl-anders-aa"
91 
CFLAndersAAResult(std::function<const TargetLibraryInfo & (Function & F)> GetTLI)92 CFLAndersAAResult::CFLAndersAAResult(
93     std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
94     : GetTLI(std::move(GetTLI)) {}
CFLAndersAAResult(CFLAndersAAResult && RHS)95 CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
96     : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {}
97 CFLAndersAAResult::~CFLAndersAAResult() = default;
98 
99 namespace {
100 
101 enum class MatchState : uint8_t {
102   // The following state represents S1 in the paper.
103   FlowFromReadOnly = 0,
104   // The following two states together represent S2 in the paper.
105   // The 'NoReadWrite' suffix indicates that there exists an alias path that
106   // does not contain assignment and reverse assignment edges.
107   // The 'ReadOnly' suffix indicates that there exists an alias path that
108   // contains reverse assignment edges only.
109   FlowFromMemAliasNoReadWrite,
110   FlowFromMemAliasReadOnly,
111   // The following two states together represent S3 in the paper.
112   // The 'WriteOnly' suffix indicates that there exists an alias path that
113   // contains assignment edges only.
114   // The 'ReadWrite' suffix indicates that there exists an alias path that
115   // contains both assignment and reverse assignment edges. Note that if X and Y
116   // are reachable at 'ReadWrite' state, it does NOT mean X is both read from
117   // and written to Y. Instead, it means that a third value Z is written to both
118   // X and Y.
119   FlowToWriteOnly,
120   FlowToReadWrite,
121   // The following two states together represent S4 in the paper.
122   FlowToMemAliasWriteOnly,
123   FlowToMemAliasReadWrite,
124 };
125 
126 using StateSet = std::bitset<7>;
127 
128 const unsigned ReadOnlyStateMask =
129     (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
130     (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
131 const unsigned WriteOnlyStateMask =
132     (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
133     (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
134 
135 // A pair that consists of a value and an offset
136 struct OffsetValue {
137   const Value *Val;
138   int64_t Offset;
139 };
140 
operator ==(OffsetValue LHS,OffsetValue RHS)141 bool operator==(OffsetValue LHS, OffsetValue RHS) {
142   return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
143 }
operator <(OffsetValue LHS,OffsetValue RHS)144 bool operator<(OffsetValue LHS, OffsetValue RHS) {
145   return std::less<const Value *>()(LHS.Val, RHS.Val) ||
146          (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
147 }
148 
149 // A pair that consists of an InstantiatedValue and an offset
150 struct OffsetInstantiatedValue {
151   InstantiatedValue IVal;
152   int64_t Offset;
153 };
154 
operator ==(OffsetInstantiatedValue LHS,OffsetInstantiatedValue RHS)155 bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
156   return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
157 }
158 
159 // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
160 // the paper) during the analysis.
161 class ReachabilitySet {
162   using ValueStateMap = DenseMap<InstantiatedValue, StateSet>;
163   using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>;
164 
165   ValueReachMap ReachMap;
166 
167 public:
168   using const_valuestate_iterator = ValueStateMap::const_iterator;
169   using const_value_iterator = ValueReachMap::const_iterator;
170 
171   // Insert edge 'From->To' at state 'State'
insert(InstantiatedValue From,InstantiatedValue To,MatchState State)172   bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
173     assert(From != To);
174     auto &States = ReachMap[To][From];
175     auto Idx = static_cast<size_t>(State);
176     if (!States.test(Idx)) {
177       States.set(Idx);
178       return true;
179     }
180     return false;
181   }
182 
183   // Return the set of all ('From', 'State') pair for a given node 'To'
184   iterator_range<const_valuestate_iterator>
reachableValueAliases(InstantiatedValue V) const185   reachableValueAliases(InstantiatedValue V) const {
186     auto Itr = ReachMap.find(V);
187     if (Itr == ReachMap.end())
188       return make_range<const_valuestate_iterator>(const_valuestate_iterator(),
189                                                    const_valuestate_iterator());
190     return make_range<const_valuestate_iterator>(Itr->second.begin(),
191                                                  Itr->second.end());
192   }
193 
value_mappings() const194   iterator_range<const_value_iterator> value_mappings() const {
195     return make_range<const_value_iterator>(ReachMap.begin(), ReachMap.end());
196   }
197 };
198 
199 // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M"
200 // in the paper) during the analysis.
201 class AliasMemSet {
202   using MemSet = DenseSet<InstantiatedValue>;
203   using MemMapType = DenseMap<InstantiatedValue, MemSet>;
204 
205   MemMapType MemMap;
206 
207 public:
208   using const_mem_iterator = MemSet::const_iterator;
209 
insert(InstantiatedValue LHS,InstantiatedValue RHS)210   bool insert(InstantiatedValue LHS, InstantiatedValue RHS) {
211     // Top-level values can never be memory aliases because one cannot take the
212     // addresses of them
213     assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0);
214     return MemMap[LHS].insert(RHS).second;
215   }
216 
getMemoryAliases(InstantiatedValue V) const217   const MemSet *getMemoryAliases(InstantiatedValue V) const {
218     auto Itr = MemMap.find(V);
219     if (Itr == MemMap.end())
220       return nullptr;
221     return &Itr->second;
222   }
223 };
224 
225 // We use AliasAttrMap to keep track of the AliasAttr of each node.
226 class AliasAttrMap {
227   using MapType = DenseMap<InstantiatedValue, AliasAttrs>;
228 
229   MapType AttrMap;
230 
231 public:
232   using const_iterator = MapType::const_iterator;
233 
add(InstantiatedValue V,AliasAttrs Attr)234   bool add(InstantiatedValue V, AliasAttrs Attr) {
235     auto &OldAttr = AttrMap[V];
236     auto NewAttr = OldAttr | Attr;
237     if (OldAttr == NewAttr)
238       return false;
239     OldAttr = NewAttr;
240     return true;
241   }
242 
getAttrs(InstantiatedValue V) const243   AliasAttrs getAttrs(InstantiatedValue V) const {
244     AliasAttrs Attr;
245     auto Itr = AttrMap.find(V);
246     if (Itr != AttrMap.end())
247       Attr = Itr->second;
248     return Attr;
249   }
250 
mappings() const251   iterator_range<const_iterator> mappings() const {
252     return make_range<const_iterator>(AttrMap.begin(), AttrMap.end());
253   }
254 };
255 
256 struct WorkListItem {
257   InstantiatedValue From;
258   InstantiatedValue To;
259   MatchState State;
260 };
261 
262 struct ValueSummary {
263   struct Record {
264     InterfaceValue IValue;
265     unsigned DerefLevel;
266   };
267   SmallVector<Record, 4> FromRecords, ToRecords;
268 };
269 
270 } // end anonymous namespace
271 
272 namespace llvm {
273 
274 // Specialize DenseMapInfo for OffsetValue.
275 template <> struct DenseMapInfo<OffsetValue> {
getEmptyKeyllvm::DenseMapInfo276   static OffsetValue getEmptyKey() {
277     return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
278                        DenseMapInfo<int64_t>::getEmptyKey()};
279   }
280 
getTombstoneKeyllvm::DenseMapInfo281   static OffsetValue getTombstoneKey() {
282     return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
283                        DenseMapInfo<int64_t>::getEmptyKey()};
284   }
285 
getHashValuellvm::DenseMapInfo286   static unsigned getHashValue(const OffsetValue &OVal) {
287     return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
288         std::make_pair(OVal.Val, OVal.Offset));
289   }
290 
isEqualllvm::DenseMapInfo291   static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
292     return LHS == RHS;
293   }
294 };
295 
296 // Specialize DenseMapInfo for OffsetInstantiatedValue.
297 template <> struct DenseMapInfo<OffsetInstantiatedValue> {
getEmptyKeyllvm::DenseMapInfo298   static OffsetInstantiatedValue getEmptyKey() {
299     return OffsetInstantiatedValue{
300         DenseMapInfo<InstantiatedValue>::getEmptyKey(),
301         DenseMapInfo<int64_t>::getEmptyKey()};
302   }
303 
getTombstoneKeyllvm::DenseMapInfo304   static OffsetInstantiatedValue getTombstoneKey() {
305     return OffsetInstantiatedValue{
306         DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
307         DenseMapInfo<int64_t>::getEmptyKey()};
308   }
309 
getHashValuellvm::DenseMapInfo310   static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
311     return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
312         std::make_pair(OVal.IVal, OVal.Offset));
313   }
314 
isEqualllvm::DenseMapInfo315   static bool isEqual(const OffsetInstantiatedValue &LHS,
316                       const OffsetInstantiatedValue &RHS) {
317     return LHS == RHS;
318   }
319 };
320 
321 } // end namespace llvm
322 
323 class CFLAndersAAResult::FunctionInfo {
324   /// Map a value to other values that may alias it
325   /// Since the alias relation is symmetric, to save some space we assume values
326   /// are properly ordered: if a and b alias each other, and a < b, then b is in
327   /// AliasMap[a] but not vice versa.
328   DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
329 
330   /// Map a value to its corresponding AliasAttrs
331   DenseMap<const Value *, AliasAttrs> AttrMap;
332 
333   /// Summary of externally visible effects.
334   AliasSummary Summary;
335 
336   Optional<AliasAttrs> getAttrs(const Value *) const;
337 
338 public:
339   FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
340                const ReachabilitySet &, const AliasAttrMap &);
341 
342   bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const;
getAliasSummary() const343   const AliasSummary &getAliasSummary() const { return Summary; }
344 };
345 
hasReadOnlyState(StateSet Set)346 static bool hasReadOnlyState(StateSet Set) {
347   return (Set & StateSet(ReadOnlyStateMask)).any();
348 }
349 
hasWriteOnlyState(StateSet Set)350 static bool hasWriteOnlyState(StateSet Set) {
351   return (Set & StateSet(WriteOnlyStateMask)).any();
352 }
353 
354 static Optional<InterfaceValue>
getInterfaceValue(InstantiatedValue IValue,const SmallVectorImpl<Value * > & RetVals)355 getInterfaceValue(InstantiatedValue IValue,
356                   const SmallVectorImpl<Value *> &RetVals) {
357   auto Val = IValue.Val;
358 
359   Optional<unsigned> Index;
360   if (auto Arg = dyn_cast<Argument>(Val))
361     Index = Arg->getArgNo() + 1;
362   else if (is_contained(RetVals, Val))
363     Index = 0;
364 
365   if (Index)
366     return InterfaceValue{*Index, IValue.DerefLevel};
367   return None;
368 }
369 
populateAttrMap(DenseMap<const Value *,AliasAttrs> & AttrMap,const AliasAttrMap & AMap)370 static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
371                             const AliasAttrMap &AMap) {
372   for (const auto &Mapping : AMap.mappings()) {
373     auto IVal = Mapping.first;
374 
375     // Insert IVal into the map
376     auto &Attr = AttrMap[IVal.Val];
377     // AttrMap only cares about top-level values
378     if (IVal.DerefLevel == 0)
379       Attr |= Mapping.second;
380   }
381 }
382 
383 static void
populateAliasMap(DenseMap<const Value *,std::vector<OffsetValue>> & AliasMap,const ReachabilitySet & ReachSet)384 populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
385                  const ReachabilitySet &ReachSet) {
386   for (const auto &OuterMapping : ReachSet.value_mappings()) {
387     // AliasMap only cares about top-level values
388     if (OuterMapping.first.DerefLevel > 0)
389       continue;
390 
391     auto Val = OuterMapping.first.Val;
392     auto &AliasList = AliasMap[Val];
393     for (const auto &InnerMapping : OuterMapping.second) {
394       // Again, AliasMap only cares about top-level values
395       if (InnerMapping.first.DerefLevel == 0)
396         AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
397     }
398 
399     // Sort AliasList for faster lookup
400     llvm::sort(AliasList);
401   }
402 }
403 
populateExternalRelations(SmallVectorImpl<ExternalRelation> & ExtRelations,const Function & Fn,const SmallVectorImpl<Value * > & RetVals,const ReachabilitySet & ReachSet)404 static void populateExternalRelations(
405     SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn,
406     const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) {
407   // If a function only returns one of its argument X, then X will be both an
408   // argument and a return value at the same time. This is an edge case that
409   // needs special handling here.
410   for (const auto &Arg : Fn.args()) {
411     if (is_contained(RetVals, &Arg)) {
412       auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
413       auto RetVal = InterfaceValue{0, 0};
414       ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
415     }
416   }
417 
418   // Below is the core summary construction logic.
419   // A naive solution of adding only the value aliases that are parameters or
420   // return values in ReachSet to the summary won't work: It is possible that a
421   // parameter P is written into an intermediate value I, and the function
422   // subsequently returns *I. In that case, *I is does not value alias anything
423   // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to
424   // (I, 1).
425   // To account for the aforementioned case, we need to check each non-parameter
426   // and non-return value for the possibility of acting as an intermediate.
427   // 'ValueMap' here records, for each value, which InterfaceValues read from or
428   // write into it. If both the read list and the write list of a given value
429   // are non-empty, we know that a particular value is an intermidate and we
430   // need to add summary edges from the writes to the reads.
431   DenseMap<Value *, ValueSummary> ValueMap;
432   for (const auto &OuterMapping : ReachSet.value_mappings()) {
433     if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) {
434       for (const auto &InnerMapping : OuterMapping.second) {
435         // If Src is a param/return value, we get a same-level assignment.
436         if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) {
437           // This may happen if both Dst and Src are return values
438           if (*Dst == *Src)
439             continue;
440 
441           if (hasReadOnlyState(InnerMapping.second))
442             ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
443           // No need to check for WriteOnly state, since ReachSet is symmetric
444         } else {
445           // If Src is not a param/return, add it to ValueMap
446           auto SrcIVal = InnerMapping.first;
447           if (hasReadOnlyState(InnerMapping.second))
448             ValueMap[SrcIVal.Val].FromRecords.push_back(
449                 ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
450           if (hasWriteOnlyState(InnerMapping.second))
451             ValueMap[SrcIVal.Val].ToRecords.push_back(
452                 ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
453         }
454       }
455     }
456   }
457 
458   for (const auto &Mapping : ValueMap) {
459     for (const auto &FromRecord : Mapping.second.FromRecords) {
460       for (const auto &ToRecord : Mapping.second.ToRecords) {
461         auto ToLevel = ToRecord.DerefLevel;
462         auto FromLevel = FromRecord.DerefLevel;
463         // Same-level assignments should have already been processed by now
464         if (ToLevel == FromLevel)
465           continue;
466 
467         auto SrcIndex = FromRecord.IValue.Index;
468         auto SrcLevel = FromRecord.IValue.DerefLevel;
469         auto DstIndex = ToRecord.IValue.Index;
470         auto DstLevel = ToRecord.IValue.DerefLevel;
471         if (ToLevel > FromLevel)
472           SrcLevel += ToLevel - FromLevel;
473         else
474           DstLevel += FromLevel - ToLevel;
475 
476         ExtRelations.push_back(ExternalRelation{
477             InterfaceValue{SrcIndex, SrcLevel},
478             InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
479       }
480     }
481   }
482 
483   // Remove duplicates in ExtRelations
484   llvm::sort(ExtRelations);
485   ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
486                      ExtRelations.end());
487 }
488 
populateExternalAttributes(SmallVectorImpl<ExternalAttribute> & ExtAttributes,const Function & Fn,const SmallVectorImpl<Value * > & RetVals,const AliasAttrMap & AMap)489 static void populateExternalAttributes(
490     SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn,
491     const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) {
492   for (const auto &Mapping : AMap.mappings()) {
493     if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) {
494       auto Attr = getExternallyVisibleAttrs(Mapping.second);
495       if (Attr.any())
496         ExtAttributes.push_back(ExternalAttribute{*IVal, Attr});
497     }
498   }
499 }
500 
FunctionInfo(const Function & Fn,const SmallVectorImpl<Value * > & RetVals,const ReachabilitySet & ReachSet,const AliasAttrMap & AMap)501 CFLAndersAAResult::FunctionInfo::FunctionInfo(
502     const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
503     const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) {
504   populateAttrMap(AttrMap, AMap);
505   populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
506   populateAliasMap(AliasMap, ReachSet);
507   populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet);
508 }
509 
510 Optional<AliasAttrs>
getAttrs(const Value * V) const511 CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
512   assert(V != nullptr);
513 
514   auto Itr = AttrMap.find(V);
515   if (Itr != AttrMap.end())
516     return Itr->second;
517   return None;
518 }
519 
mayAlias(const Value * LHS,LocationSize MaybeLHSSize,const Value * RHS,LocationSize MaybeRHSSize) const520 bool CFLAndersAAResult::FunctionInfo::mayAlias(
521     const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS,
522     LocationSize MaybeRHSSize) const {
523   assert(LHS && RHS);
524 
525   // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
526   // after the analysis gets executed, and we want to be conservative in those
527   // cases.
528   auto MaybeAttrsA = getAttrs(LHS);
529   auto MaybeAttrsB = getAttrs(RHS);
530   if (!MaybeAttrsA || !MaybeAttrsB)
531     return true;
532 
533   // Check AliasAttrs before AliasMap lookup since it's cheaper
534   auto AttrsA = *MaybeAttrsA;
535   auto AttrsB = *MaybeAttrsB;
536   if (hasUnknownOrCallerAttr(AttrsA))
537     return AttrsB.any();
538   if (hasUnknownOrCallerAttr(AttrsB))
539     return AttrsA.any();
540   if (isGlobalOrArgAttr(AttrsA))
541     return isGlobalOrArgAttr(AttrsB);
542   if (isGlobalOrArgAttr(AttrsB))
543     return isGlobalOrArgAttr(AttrsA);
544 
545   // At this point both LHS and RHS should point to locally allocated objects
546 
547   auto Itr = AliasMap.find(LHS);
548   if (Itr != AliasMap.end()) {
549 
550     // Find out all (X, Offset) where X == RHS
551     auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
552       return std::less<const Value *>()(LHS.Val, RHS.Val);
553     };
554 #ifdef EXPENSIVE_CHECKS
555     assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator));
556 #endif
557     auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
558                                       OffsetValue{RHS, 0}, Comparator);
559 
560     if (RangePair.first != RangePair.second) {
561       // Be conservative about unknown sizes
562       if (MaybeLHSSize == LocationSize::unknown() ||
563           MaybeRHSSize == LocationSize::unknown())
564         return true;
565 
566       const uint64_t LHSSize = MaybeLHSSize.getValue();
567       const uint64_t RHSSize = MaybeRHSSize.getValue();
568 
569       for (const auto &OVal : make_range(RangePair)) {
570         // Be conservative about UnknownOffset
571         if (OVal.Offset == UnknownOffset)
572           return true;
573 
574         // We know that LHS aliases (RHS + OVal.Offset) if the control flow
575         // reaches here. The may-alias query essentially becomes integer
576         // range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
577         // LHSSize) and [0, RHSSize).
578 
579         // Try to be conservative on super large offsets
580         if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
581           return true;
582 
583         auto LHSStart = OVal.Offset;
584         // FIXME: Do we need to guard against integer overflow?
585         auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
586         auto RHSStart = 0;
587         auto RHSEnd = static_cast<int64_t>(RHSSize);
588         if (LHSEnd > RHSStart && LHSStart < RHSEnd)
589           return true;
590       }
591     }
592   }
593 
594   return false;
595 }
596 
propagate(InstantiatedValue From,InstantiatedValue To,MatchState State,ReachabilitySet & ReachSet,std::vector<WorkListItem> & WorkList)597 static void propagate(InstantiatedValue From, InstantiatedValue To,
598                       MatchState State, ReachabilitySet &ReachSet,
599                       std::vector<WorkListItem> &WorkList) {
600   if (From == To)
601     return;
602   if (ReachSet.insert(From, To, State))
603     WorkList.push_back(WorkListItem{From, To, State});
604 }
605 
initializeWorkList(std::vector<WorkListItem> & WorkList,ReachabilitySet & ReachSet,const CFLGraph & Graph)606 static void initializeWorkList(std::vector<WorkListItem> &WorkList,
607                                ReachabilitySet &ReachSet,
608                                const CFLGraph &Graph) {
609   for (const auto &Mapping : Graph.value_mappings()) {
610     auto Val = Mapping.first;
611     auto &ValueInfo = Mapping.second;
612     assert(ValueInfo.getNumLevels() > 0);
613 
614     // Insert all immediate assignment neighbors to the worklist
615     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
616       auto Src = InstantiatedValue{Val, I};
617       // If there's an assignment edge from X to Y, it means Y is reachable from
618       // X at S3 and X is reachable from Y at S1
619       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
620         propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
621                   WorkList);
622         propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
623                   WorkList);
624       }
625     }
626   }
627 }
628 
getNodeBelow(const CFLGraph & Graph,InstantiatedValue V)629 static Optional<InstantiatedValue> getNodeBelow(const CFLGraph &Graph,
630                                                 InstantiatedValue V) {
631   auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1};
632   if (Graph.getNode(NodeBelow))
633     return NodeBelow;
634   return None;
635 }
636 
processWorkListItem(const WorkListItem & Item,const CFLGraph & Graph,ReachabilitySet & ReachSet,AliasMemSet & MemSet,std::vector<WorkListItem> & WorkList)637 static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
638                                 ReachabilitySet &ReachSet, AliasMemSet &MemSet,
639                                 std::vector<WorkListItem> &WorkList) {
640   auto FromNode = Item.From;
641   auto ToNode = Item.To;
642 
643   auto NodeInfo = Graph.getNode(ToNode);
644   assert(NodeInfo != nullptr);
645 
646   // TODO: propagate field offsets
647 
648   // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds
649   // relations that are symmetric, we could actually cut the storage by half by
650   // sorting FromNode and ToNode before insertion happens.
651 
652   // The newly added value alias pair may potentially generate more memory
653   // alias pairs. Check for them here.
654   auto FromNodeBelow = getNodeBelow(Graph, FromNode);
655   auto ToNodeBelow = getNodeBelow(Graph, ToNode);
656   if (FromNodeBelow && ToNodeBelow &&
657       MemSet.insert(*FromNodeBelow, *ToNodeBelow)) {
658     propagate(*FromNodeBelow, *ToNodeBelow,
659               MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList);
660     for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) {
661       auto Src = Mapping.first;
662       auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) {
663         if (Mapping.second.test(static_cast<size_t>(FromState)))
664           propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList);
665       };
666 
667       MemAliasPropagate(MatchState::FlowFromReadOnly,
668                         MatchState::FlowFromMemAliasReadOnly);
669       MemAliasPropagate(MatchState::FlowToWriteOnly,
670                         MatchState::FlowToMemAliasWriteOnly);
671       MemAliasPropagate(MatchState::FlowToReadWrite,
672                         MatchState::FlowToMemAliasReadWrite);
673     }
674   }
675 
676   // This is the core of the state machine walking algorithm. We expand ReachSet
677   // based on which state we are at (which in turn dictates what edges we
678   // should examine)
679   // From a high-level point of view, the state machine here guarantees two
680   // properties:
681   // - If *X and *Y are memory aliases, then X and Y are value aliases
682   // - If Y is an alias of X, then reverse assignment edges (if there is any)
683   // should precede any assignment edges on the path from X to Y.
684   auto NextAssignState = [&](MatchState State) {
685     for (const auto &AssignEdge : NodeInfo->Edges)
686       propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList);
687   };
688   auto NextRevAssignState = [&](MatchState State) {
689     for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
690       propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList);
691   };
692   auto NextMemState = [&](MatchState State) {
693     if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
694       for (const auto &MemAlias : *AliasSet)
695         propagate(FromNode, MemAlias, State, ReachSet, WorkList);
696     }
697   };
698 
699   switch (Item.State) {
700   case MatchState::FlowFromReadOnly:
701     NextRevAssignState(MatchState::FlowFromReadOnly);
702     NextAssignState(MatchState::FlowToReadWrite);
703     NextMemState(MatchState::FlowFromMemAliasReadOnly);
704     break;
705 
706   case MatchState::FlowFromMemAliasNoReadWrite:
707     NextRevAssignState(MatchState::FlowFromReadOnly);
708     NextAssignState(MatchState::FlowToWriteOnly);
709     break;
710 
711   case MatchState::FlowFromMemAliasReadOnly:
712     NextRevAssignState(MatchState::FlowFromReadOnly);
713     NextAssignState(MatchState::FlowToReadWrite);
714     break;
715 
716   case MatchState::FlowToWriteOnly:
717     NextAssignState(MatchState::FlowToWriteOnly);
718     NextMemState(MatchState::FlowToMemAliasWriteOnly);
719     break;
720 
721   case MatchState::FlowToReadWrite:
722     NextAssignState(MatchState::FlowToReadWrite);
723     NextMemState(MatchState::FlowToMemAliasReadWrite);
724     break;
725 
726   case MatchState::FlowToMemAliasWriteOnly:
727     NextAssignState(MatchState::FlowToWriteOnly);
728     break;
729 
730   case MatchState::FlowToMemAliasReadWrite:
731     NextAssignState(MatchState::FlowToReadWrite);
732     break;
733   }
734 }
735 
buildAttrMap(const CFLGraph & Graph,const ReachabilitySet & ReachSet)736 static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
737                                  const ReachabilitySet &ReachSet) {
738   AliasAttrMap AttrMap;
739   std::vector<InstantiatedValue> WorkList, NextList;
740 
741   // Initialize each node with its original AliasAttrs in CFLGraph
742   for (const auto &Mapping : Graph.value_mappings()) {
743     auto Val = Mapping.first;
744     auto &ValueInfo = Mapping.second;
745     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
746       auto Node = InstantiatedValue{Val, I};
747       AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr);
748       WorkList.push_back(Node);
749     }
750   }
751 
752   while (!WorkList.empty()) {
753     for (const auto &Dst : WorkList) {
754       auto DstAttr = AttrMap.getAttrs(Dst);
755       if (DstAttr.none())
756         continue;
757 
758       // Propagate attr on the same level
759       for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) {
760         auto Src = Mapping.first;
761         if (AttrMap.add(Src, DstAttr))
762           NextList.push_back(Src);
763       }
764 
765       // Propagate attr to the levels below
766       auto DstBelow = getNodeBelow(Graph, Dst);
767       while (DstBelow) {
768         if (AttrMap.add(*DstBelow, DstAttr)) {
769           NextList.push_back(*DstBelow);
770           break;
771         }
772         DstBelow = getNodeBelow(Graph, *DstBelow);
773       }
774     }
775     WorkList.swap(NextList);
776     NextList.clear();
777   }
778 
779   return AttrMap;
780 }
781 
782 CFLAndersAAResult::FunctionInfo
buildInfoFrom(const Function & Fn)783 CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
784   CFLGraphBuilder<CFLAndersAAResult> GraphBuilder(
785       *this, GetTLI(const_cast<Function &>(Fn)),
786       // Cast away the constness here due to GraphBuilder's API requirement
787       const_cast<Function &>(Fn));
788   auto &Graph = GraphBuilder.getCFLGraph();
789 
790   ReachabilitySet ReachSet;
791   AliasMemSet MemSet;
792 
793   std::vector<WorkListItem> WorkList, NextList;
794   initializeWorkList(WorkList, ReachSet, Graph);
795   // TODO: make sure we don't stop before the fix point is reached
796   while (!WorkList.empty()) {
797     for (const auto &Item : WorkList)
798       processWorkListItem(Item, Graph, ReachSet, MemSet, NextList);
799 
800     NextList.swap(WorkList);
801     NextList.clear();
802   }
803 
804   // Now that we have all the reachability info, propagate AliasAttrs according
805   // to it
806   auto IValueAttrMap = buildAttrMap(Graph, ReachSet);
807 
808   return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet,
809                       std::move(IValueAttrMap));
810 }
811 
scan(const Function & Fn)812 void CFLAndersAAResult::scan(const Function &Fn) {
813   auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional<FunctionInfo>()));
814   (void)InsertPair;
815   assert(InsertPair.second &&
816          "Trying to scan a function that has already been cached");
817 
818   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
819   // may get evaluated after operator[], potentially triggering a DenseMap
820   // resize and invalidating the reference returned by operator[]
821   auto FunInfo = buildInfoFrom(Fn);
822   Cache[&Fn] = std::move(FunInfo);
823   Handles.emplace_front(const_cast<Function *>(&Fn), this);
824 }
825 
evict(const Function * Fn)826 void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); }
827 
828 const Optional<CFLAndersAAResult::FunctionInfo> &
ensureCached(const Function & Fn)829 CFLAndersAAResult::ensureCached(const Function &Fn) {
830   auto Iter = Cache.find(&Fn);
831   if (Iter == Cache.end()) {
832     scan(Fn);
833     Iter = Cache.find(&Fn);
834     assert(Iter != Cache.end());
835     assert(Iter->second.hasValue());
836   }
837   return Iter->second;
838 }
839 
getAliasSummary(const Function & Fn)840 const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) {
841   auto &FunInfo = ensureCached(Fn);
842   if (FunInfo.hasValue())
843     return &FunInfo->getAliasSummary();
844   else
845     return nullptr;
846 }
847 
query(const MemoryLocation & LocA,const MemoryLocation & LocB)848 AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
849                                      const MemoryLocation &LocB) {
850   auto *ValA = LocA.Ptr;
851   auto *ValB = LocB.Ptr;
852 
853   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
854     return NoAlias;
855 
856   auto *Fn = parentFunctionOfValue(ValA);
857   if (!Fn) {
858     Fn = parentFunctionOfValue(ValB);
859     if (!Fn) {
860       // The only times this is known to happen are when globals + InlineAsm are
861       // involved
862       LLVM_DEBUG(
863           dbgs()
864           << "CFLAndersAA: could not extract parent function information.\n");
865       return MayAlias;
866     }
867   } else {
868     assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn);
869   }
870 
871   assert(Fn != nullptr);
872   auto &FunInfo = ensureCached(*Fn);
873 
874   // AliasMap lookup
875   if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
876     return MayAlias;
877   return NoAlias;
878 }
879 
alias(const MemoryLocation & LocA,const MemoryLocation & LocB,AAQueryInfo & AAQI)880 AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
881                                      const MemoryLocation &LocB,
882                                      AAQueryInfo &AAQI) {
883   if (LocA.Ptr == LocB.Ptr)
884     return MustAlias;
885 
886   // Comparisons between global variables and other constants should be
887   // handled by BasicAA.
888   // CFLAndersAA may report NoAlias when comparing a GlobalValue and
889   // ConstantExpr, but every query needs to have at least one Value tied to a
890   // Function, and neither GlobalValues nor ConstantExprs are.
891   if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
892     return AAResultBase::alias(LocA, LocB, AAQI);
893 
894   AliasResult QueryResult = query(LocA, LocB);
895   if (QueryResult == MayAlias)
896     return AAResultBase::alias(LocA, LocB, AAQI);
897 
898   return QueryResult;
899 }
900 
901 AnalysisKey CFLAndersAA::Key;
902 
run(Function & F,FunctionAnalysisManager & AM)903 CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
904   auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & {
905     return AM.getResult<TargetLibraryAnalysis>(F);
906   };
907   return CFLAndersAAResult(GetTLI);
908 }
909 
910 char CFLAndersAAWrapperPass::ID = 0;
911 INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa",
912                 "Inclusion-Based CFL Alias Analysis", false, true)
913 
createCFLAndersAAWrapperPass()914 ImmutablePass *llvm::createCFLAndersAAWrapperPass() {
915   return new CFLAndersAAWrapperPass();
916 }
917 
CFLAndersAAWrapperPass()918 CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) {
919   initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry());
920 }
921 
initializePass()922 void CFLAndersAAWrapperPass::initializePass() {
923   auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
924     return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
925   };
926   Result.reset(new CFLAndersAAResult(GetTLI));
927 }
928 
getAnalysisUsage(AnalysisUsage & AU) const929 void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
930   AU.setPreservesAll();
931   AU.addRequired<TargetLibraryInfoWrapperPass>();
932 }
933