1 //======- CFLGraph.h - Abstract stratified sets implementation. --------======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based 11 /// alias analysis. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ANALYSIS_CFLGRAPH_H 16 #define LLVM_ANALYSIS_CFLGRAPH_H 17 18 #include "AliasAnalysisSummary.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 #include "llvm/Analysis/MemoryBuiltins.h" 21 #include "llvm/IR/InstVisitor.h" 22 #include "llvm/IR/Instructions.h" 23 24 namespace llvm { 25 namespace cflaa { 26 27 /// \brief The Program Expression Graph (PEG) of CFL analysis 28 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to 29 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function, 30 /// the main purpose of this graph is to abstract away unrelated facts and 31 /// translate the rest into a form that can be easily digested by CFL analyses. 32 /// Each Node in the graph is an InstantiatedValue, and each edge represent a 33 /// pointer assignment between InstantiatedValue. Pointer 34 /// references/dereferences are not explicitly stored in the graph: we 35 /// implicitly assume that for each node (X, I) it has a dereference edge to (X, 36 /// I+1) and a reference edge to (X, I-1). 37 class CFLGraph { 38 public: 39 typedef InstantiatedValue Node; 40 41 struct Edge { 42 Node Other; 43 }; 44 45 typedef std::vector<Edge> EdgeList; 46 47 struct NodeInfo { 48 EdgeList Edges; 49 AliasAttrs Attr; 50 }; 51 52 class ValueInfo { 53 std::vector<NodeInfo> Levels; 54 55 public: addNodeToLevel(unsigned Level)56 bool addNodeToLevel(unsigned Level) { 57 auto NumLevels = Levels.size(); 58 if (NumLevels > Level) 59 return false; 60 Levels.resize(Level + 1); 61 return true; 62 } 63 getNodeInfoAtLevel(unsigned Level)64 NodeInfo &getNodeInfoAtLevel(unsigned Level) { 65 assert(Level < Levels.size()); 66 return Levels[Level]; 67 } getNodeInfoAtLevel(unsigned Level)68 const NodeInfo &getNodeInfoAtLevel(unsigned Level) const { 69 assert(Level < Levels.size()); 70 return Levels[Level]; 71 } 72 getNumLevels()73 unsigned getNumLevels() const { return Levels.size(); } 74 }; 75 76 private: 77 typedef DenseMap<Value *, ValueInfo> ValueMap; 78 ValueMap ValueImpls; 79 getNode(Node N)80 const NodeInfo *getNode(Node N) const { 81 auto Itr = ValueImpls.find(N.Val); 82 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) 83 return nullptr; 84 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); 85 } getNode(Node N)86 NodeInfo *getNode(Node N) { 87 auto Itr = ValueImpls.find(N.Val); 88 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) 89 return nullptr; 90 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); 91 } 92 93 public: 94 typedef ValueMap::const_iterator const_value_iterator; 95 96 bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) { 97 assert(N.Val != nullptr); 98 auto &ValInfo = ValueImpls[N.Val]; 99 auto Changed = ValInfo.addNodeToLevel(N.DerefLevel); 100 ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr; 101 return Changed; 102 } 103 addAttr(Node N,AliasAttrs Attr)104 void addAttr(Node N, AliasAttrs Attr) { 105 auto *Info = getNode(N); 106 assert(Info != nullptr); 107 Info->Attr |= Attr; 108 } 109 110 void addEdge(Node From, Node To, int64_t Offset = 0) { 111 assert(getNode(To) != nullptr); 112 113 auto *FromInfo = getNode(From); 114 assert(FromInfo != nullptr); 115 FromInfo->Edges.push_back(Edge{To}); 116 } 117 attrFor(Node N)118 AliasAttrs attrFor(Node N) const { 119 auto *Info = getNode(N); 120 assert(Info != nullptr); 121 return Info->Attr; 122 } 123 value_mappings()124 iterator_range<const_value_iterator> value_mappings() const { 125 return make_range<const_value_iterator>(ValueImpls.begin(), 126 ValueImpls.end()); 127 } 128 }; 129 130 ///\brief A builder class used to create CFLGraph instance from a given function 131 /// The CFL-AA that uses this builder must provide its own type as a template 132 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder 133 /// needs a way of obtaining the summary of other functions when callinsts are 134 /// encountered. 135 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public 136 /// member function that takes a Function& and returns the corresponding summary 137 /// as a const AliasSummary*. 138 template <typename CFLAA> class CFLGraphBuilder { 139 // Input of the builder 140 CFLAA &Analysis; 141 const TargetLibraryInfo &TLI; 142 143 // Output of the builder 144 CFLGraph Graph; 145 SmallVector<Value *, 4> ReturnedValues; 146 147 // Helper class 148 /// Gets the edges our graph should have, based on an Instruction* 149 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { 150 CFLAA &AA; 151 const TargetLibraryInfo &TLI; 152 153 CFLGraph &Graph; 154 SmallVectorImpl<Value *> &ReturnValues; 155 hasUsefulEdges(ConstantExpr * CE)156 static bool hasUsefulEdges(ConstantExpr *CE) { 157 // ConstantExpr doesn't have terminators, invokes, or fences, so only 158 // needs 159 // to check for compares. 160 return CE->getOpcode() != Instruction::ICmp && 161 CE->getOpcode() != Instruction::FCmp; 162 } 163 164 // Returns possible functions called by CS into the given SmallVectorImpl. 165 // Returns true if targets found, false otherwise. getPossibleTargets(CallSite CS,SmallVectorImpl<Function * > & Output)166 static bool getPossibleTargets(CallSite CS, 167 SmallVectorImpl<Function *> &Output) { 168 if (auto *Fn = CS.getCalledFunction()) { 169 Output.push_back(Fn); 170 return true; 171 } 172 173 // TODO: If the call is indirect, we might be able to enumerate all 174 // potential 175 // targets of the call and return them, rather than just failing. 176 return false; 177 } 178 179 void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) { 180 assert(Val != nullptr && Val->getType()->isPointerTy()); 181 if (auto GVal = dyn_cast<GlobalValue>(Val)) { 182 if (Graph.addNode(InstantiatedValue{GVal, 0}, 183 getGlobalOrArgAttrFromValue(*GVal))) 184 Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown()); 185 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) { 186 if (hasUsefulEdges(CExpr)) { 187 if (Graph.addNode(InstantiatedValue{CExpr, 0})) 188 visitConstantExpr(CExpr); 189 } 190 } else 191 Graph.addNode(InstantiatedValue{Val, 0}, Attr); 192 } 193 194 void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) { 195 assert(From != nullptr && To != nullptr); 196 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) 197 return; 198 addNode(From); 199 if (To != From) { 200 addNode(To); 201 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0}, 202 Offset); 203 } 204 } 205 addDerefEdge(Value * From,Value * To)206 void addDerefEdge(Value *From, Value *To) { 207 assert(From != nullptr && To != nullptr); 208 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) 209 return; 210 addNode(From); 211 addNode(To); 212 Graph.addNode(InstantiatedValue{From, 1}); 213 Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0}); 214 } 215 216 public: GetEdgesVisitor(CFLGraphBuilder & Builder)217 GetEdgesVisitor(CFLGraphBuilder &Builder) 218 : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph), 219 ReturnValues(Builder.ReturnedValues) {} 220 visitInstruction(Instruction &)221 void visitInstruction(Instruction &) { 222 llvm_unreachable("Unsupported instruction encountered"); 223 } 224 visitReturnInst(ReturnInst & Inst)225 void visitReturnInst(ReturnInst &Inst) { 226 if (auto RetVal = Inst.getReturnValue()) { 227 if (RetVal->getType()->isPointerTy()) { 228 addNode(RetVal); 229 ReturnValues.push_back(RetVal); 230 } 231 } 232 } 233 visitPtrToIntInst(PtrToIntInst & Inst)234 void visitPtrToIntInst(PtrToIntInst &Inst) { 235 auto *Ptr = Inst.getOperand(0); 236 addNode(Ptr, getAttrEscaped()); 237 } 238 visitIntToPtrInst(IntToPtrInst & Inst)239 void visitIntToPtrInst(IntToPtrInst &Inst) { 240 auto *Ptr = &Inst; 241 addNode(Ptr, getAttrUnknown()); 242 } 243 visitCastInst(CastInst & Inst)244 void visitCastInst(CastInst &Inst) { 245 auto *Src = Inst.getOperand(0); 246 addAssignEdge(Src, &Inst); 247 } 248 visitBinaryOperator(BinaryOperator & Inst)249 void visitBinaryOperator(BinaryOperator &Inst) { 250 auto *Op1 = Inst.getOperand(0); 251 auto *Op2 = Inst.getOperand(1); 252 addAssignEdge(Op1, &Inst); 253 addAssignEdge(Op2, &Inst); 254 } 255 visitAtomicCmpXchgInst(AtomicCmpXchgInst & Inst)256 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { 257 auto *Ptr = Inst.getPointerOperand(); 258 auto *Val = Inst.getNewValOperand(); 259 addDerefEdge(Ptr, Val); 260 } 261 visitAtomicRMWInst(AtomicRMWInst & Inst)262 void visitAtomicRMWInst(AtomicRMWInst &Inst) { 263 auto *Ptr = Inst.getPointerOperand(); 264 auto *Val = Inst.getValOperand(); 265 addDerefEdge(Ptr, Val); 266 } 267 visitPHINode(PHINode & Inst)268 void visitPHINode(PHINode &Inst) { 269 for (Value *Val : Inst.incoming_values()) 270 addAssignEdge(Val, &Inst); 271 } 272 visitGetElementPtrInst(GetElementPtrInst & Inst)273 void visitGetElementPtrInst(GetElementPtrInst &Inst) { 274 auto *Op = Inst.getPointerOperand(); 275 addAssignEdge(Op, &Inst); 276 } 277 visitSelectInst(SelectInst & Inst)278 void visitSelectInst(SelectInst &Inst) { 279 // Condition is not processed here (The actual statement producing 280 // the condition result is processed elsewhere). For select, the 281 // condition is evaluated, but not loaded, stored, or assigned 282 // simply as a result of being the condition of a select. 283 284 auto *TrueVal = Inst.getTrueValue(); 285 auto *FalseVal = Inst.getFalseValue(); 286 addAssignEdge(TrueVal, &Inst); 287 addAssignEdge(FalseVal, &Inst); 288 } 289 visitAllocaInst(AllocaInst & Inst)290 void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); } 291 visitLoadInst(LoadInst & Inst)292 void visitLoadInst(LoadInst &Inst) { 293 auto *Ptr = Inst.getPointerOperand(); 294 auto *Val = &Inst; 295 addDerefEdge(Ptr, Val); 296 } 297 visitStoreInst(StoreInst & Inst)298 void visitStoreInst(StoreInst &Inst) { 299 auto *Ptr = Inst.getPointerOperand(); 300 auto *Val = Inst.getValueOperand(); 301 addDerefEdge(Ptr, Val); 302 } 303 visitVAArgInst(VAArgInst & Inst)304 void visitVAArgInst(VAArgInst &Inst) { 305 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it 306 // does 307 // two things: 308 // 1. Loads a value from *((T*)*Ptr). 309 // 2. Increments (stores to) *Ptr by some target-specific amount. 310 // For now, we'll handle this like a landingpad instruction (by placing 311 // the 312 // result in its own group, and having that group alias externals). 313 addNode(&Inst, getAttrUnknown()); 314 } 315 isFunctionExternal(Function * Fn)316 static bool isFunctionExternal(Function *Fn) { 317 return !Fn->hasExactDefinition(); 318 } 319 tryInterproceduralAnalysis(CallSite CS,const SmallVectorImpl<Function * > & Fns)320 bool tryInterproceduralAnalysis(CallSite CS, 321 const SmallVectorImpl<Function *> &Fns) { 322 assert(Fns.size() > 0); 323 324 if (CS.arg_size() > MaxSupportedArgsInSummary) 325 return false; 326 327 // Exit early if we'll fail anyway 328 for (auto *Fn : Fns) { 329 if (isFunctionExternal(Fn) || Fn->isVarArg()) 330 return false; 331 // Fail if the caller does not provide enough arguments 332 assert(Fn->arg_size() <= CS.arg_size()); 333 if (!AA.getAliasSummary(*Fn)) 334 return false; 335 } 336 337 for (auto *Fn : Fns) { 338 auto Summary = AA.getAliasSummary(*Fn); 339 assert(Summary != nullptr); 340 341 auto &RetParamRelations = Summary->RetParamRelations; 342 for (auto &Relation : RetParamRelations) { 343 auto IRelation = instantiateExternalRelation(Relation, CS); 344 if (IRelation.hasValue()) { 345 Graph.addNode(IRelation->From); 346 Graph.addNode(IRelation->To); 347 Graph.addEdge(IRelation->From, IRelation->To); 348 } 349 } 350 351 auto &RetParamAttributes = Summary->RetParamAttributes; 352 for (auto &Attribute : RetParamAttributes) { 353 auto IAttr = instantiateExternalAttribute(Attribute, CS); 354 if (IAttr.hasValue()) 355 Graph.addNode(IAttr->IValue, IAttr->Attr); 356 } 357 } 358 359 return true; 360 } 361 visitCallSite(CallSite CS)362 void visitCallSite(CallSite CS) { 363 auto Inst = CS.getInstruction(); 364 365 // Make sure all arguments and return value are added to the graph first 366 for (Value *V : CS.args()) 367 if (V->getType()->isPointerTy()) 368 addNode(V); 369 if (Inst->getType()->isPointerTy()) 370 addNode(Inst); 371 372 // Check if Inst is a call to a library function that 373 // allocates/deallocates 374 // on the heap. Those kinds of functions do not introduce any aliases. 375 // TODO: address other common library functions such as realloc(), 376 // strdup(), 377 // etc. 378 if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || 379 isFreeCall(Inst, &TLI)) 380 return; 381 382 // TODO: Add support for noalias args/all the other fun function 383 // attributes 384 // that we can tack on. 385 SmallVector<Function *, 4> Targets; 386 if (getPossibleTargets(CS, Targets)) 387 if (tryInterproceduralAnalysis(CS, Targets)) 388 return; 389 390 // Because the function is opaque, we need to note that anything 391 // could have happened to the arguments (unless the function is marked 392 // readonly or readnone), and that the result could alias just about 393 // anything, too (unless the result is marked noalias). 394 if (!CS.onlyReadsMemory()) 395 for (Value *V : CS.args()) { 396 if (V->getType()->isPointerTy()) { 397 // The argument itself escapes. 398 Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped()); 399 // The fate of argument memory is unknown. Note that since 400 // AliasAttrs is transitive with respect to dereference, we only 401 // need to specify it for the first-level memory. 402 Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown()); 403 } 404 } 405 406 if (Inst->getType()->isPointerTy()) { 407 auto *Fn = CS.getCalledFunction(); 408 if (Fn == nullptr || !Fn->doesNotAlias(0)) 409 // No need to call addNode() since we've added Inst at the 410 // beginning of this function and we know it is not a global. 411 Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); 412 } 413 } 414 415 /// Because vectors/aggregates are immutable and unaddressable, there's 416 /// nothing we can do to coax a value out of them, other than calling 417 /// Extract{Element,Value}. We can effectively treat them as pointers to 418 /// arbitrary memory locations we can store in and load from. visitExtractElementInst(ExtractElementInst & Inst)419 void visitExtractElementInst(ExtractElementInst &Inst) { 420 auto *Ptr = Inst.getVectorOperand(); 421 auto *Val = &Inst; 422 addDerefEdge(Ptr, Val); 423 } 424 visitInsertElementInst(InsertElementInst & Inst)425 void visitInsertElementInst(InsertElementInst &Inst) { 426 auto *Vec = Inst.getOperand(0); 427 auto *Val = Inst.getOperand(1); 428 addAssignEdge(Vec, &Inst); 429 addDerefEdge(&Inst, Val); 430 } 431 visitLandingPadInst(LandingPadInst & Inst)432 void visitLandingPadInst(LandingPadInst &Inst) { 433 // Exceptions come from "nowhere", from our analysis' perspective. 434 // So we place the instruction its own group, noting that said group may 435 // alias externals 436 addNode(&Inst, getAttrUnknown()); 437 } 438 visitInsertValueInst(InsertValueInst & Inst)439 void visitInsertValueInst(InsertValueInst &Inst) { 440 auto *Agg = Inst.getOperand(0); 441 auto *Val = Inst.getOperand(1); 442 addAssignEdge(Agg, &Inst); 443 addDerefEdge(&Inst, Val); 444 } 445 visitExtractValueInst(ExtractValueInst & Inst)446 void visitExtractValueInst(ExtractValueInst &Inst) { 447 auto *Ptr = Inst.getAggregateOperand(); 448 addDerefEdge(Ptr, &Inst); 449 } 450 visitShuffleVectorInst(ShuffleVectorInst & Inst)451 void visitShuffleVectorInst(ShuffleVectorInst &Inst) { 452 auto *From1 = Inst.getOperand(0); 453 auto *From2 = Inst.getOperand(1); 454 addAssignEdge(From1, &Inst); 455 addAssignEdge(From2, &Inst); 456 } 457 visitConstantExpr(ConstantExpr * CE)458 void visitConstantExpr(ConstantExpr *CE) { 459 switch (CE->getOpcode()) { 460 default: 461 llvm_unreachable("Unknown instruction type encountered!"); 462 // Build the switch statement using the Instruction.def file. 463 #define HANDLE_INST(NUM, OPCODE, CLASS) \ 464 case Instruction::OPCODE: \ 465 this->visit##OPCODE(*(CLASS *)CE); \ 466 break; 467 #include "llvm/IR/Instruction.def" 468 } 469 } 470 }; 471 472 // Helper functions 473 474 // Determines whether or not we an instruction is useless to us (e.g. 475 // FenceInst) hasUsefulEdges(Instruction * Inst)476 static bool hasUsefulEdges(Instruction *Inst) { 477 bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) && 478 !isa<InvokeInst>(Inst) && 479 !isa<ReturnInst>(Inst); 480 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && 481 !IsNonInvokeRetTerminator; 482 } 483 addArgumentToGraph(Argument & Arg)484 void addArgumentToGraph(Argument &Arg) { 485 if (Arg.getType()->isPointerTy()) { 486 Graph.addNode(InstantiatedValue{&Arg, 0}, 487 getGlobalOrArgAttrFromValue(Arg)); 488 // Pointees of a formal parameter is known to the caller 489 Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller()); 490 } 491 } 492 493 // Given an Instruction, this will add it to the graph, along with any 494 // Instructions that are potentially only available from said Instruction 495 // For example, given the following line: 496 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 497 // addInstructionToGraph would add both the `load` and `getelementptr` 498 // instructions to the graph appropriately. addInstructionToGraph(GetEdgesVisitor & Visitor,Instruction & Inst)499 void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) { 500 if (!hasUsefulEdges(&Inst)) 501 return; 502 503 Visitor.visit(Inst); 504 } 505 506 // Builds the graph needed for constructing the StratifiedSets for the given 507 // function buildGraphFrom(Function & Fn)508 void buildGraphFrom(Function &Fn) { 509 GetEdgesVisitor Visitor(*this); 510 511 for (auto &Bb : Fn.getBasicBlockList()) 512 for (auto &Inst : Bb.getInstList()) 513 addInstructionToGraph(Visitor, Inst); 514 515 for (auto &Arg : Fn.args()) 516 addArgumentToGraph(Arg); 517 } 518 519 public: CFLGraphBuilder(CFLAA & Analysis,const TargetLibraryInfo & TLI,Function & Fn)520 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn) 521 : Analysis(Analysis), TLI(TLI) { 522 buildGraphFrom(Fn); 523 } 524 getCFLGraph()525 const CFLGraph &getCFLGraph() const { return Graph; } getReturnValues()526 const SmallVector<Value *, 4> &getReturnValues() const { 527 return ReturnedValues; 528 } 529 }; 530 } 531 } 532 533 #endif 534