1 //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines base types and functions for building dataflow analyses 10 // that run over Control-Flow Graphs (CFGs). 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 15 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 16 17 #include <iterator> 18 #include <optional> 19 #include <type_traits> 20 #include <utility> 21 #include <vector> 22 23 #include "clang/AST/ASTContext.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 26 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 27 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 28 #include "clang/Analysis/FlowSensitive/MatchSwitch.h" 29 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 30 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/STLFunctionalExtras.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/Support/Errc.h" 35 #include "llvm/Support/Error.h" 36 37 namespace clang { 38 namespace dataflow { 39 40 /// Base class template for dataflow analyses built on a single lattice type. 41 /// 42 /// Requirements: 43 /// 44 /// `Derived` must be derived from a specialization of this class template and 45 /// must provide the following public members: 46 /// * `LatticeT initialElement()` - returns a lattice element that models the 47 /// initial state of a basic block; 48 /// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies 49 /// the analysis transfer function for a given CFG element and lattice 50 /// element. 51 /// 52 /// `Derived` can optionally provide the following members: 53 /// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, 54 /// Environment &Env)` - applies the analysis transfer 55 /// function for a given edge from a CFG block of a conditional statement. 56 /// 57 /// `Derived` can optionally override the virtual functions in the 58 /// `Environment::ValueModel` interface (which is an indirect base class of 59 /// this class). 60 /// 61 /// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must 62 /// provide the following public members: 63 /// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the 64 /// argument by computing their least upper bound, modifies the object if 65 /// necessary, and returns an effect indicating whether any changes were 66 /// made to it; 67 /// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)` 68 /// * `bool operator==(const LatticeT &) const` - returns true if and only if 69 /// the object is equal to the argument. 70 /// 71 /// `LatticeT` can optionally provide the following members: 72 /// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the 73 /// lattice element with an approximation that can reach a fixed point more 74 /// quickly than iterated application of the transfer function alone. The 75 /// previous value is provided to inform the choice of widened value. The 76 /// function must also serve as a comparison operation, by indicating whether 77 /// the widened value is equivalent to the previous value with the returned 78 /// `LatticeJoinEffect`. 79 template <typename Derived, typename LatticeT> 80 class DataflowAnalysis : public TypeErasedDataflowAnalysis { 81 public: 82 /// Bounded join-semilattice that is used in the analysis. 83 using Lattice = LatticeT; 84 DataflowAnalysis(ASTContext & Context)85 explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} 86 DataflowAnalysis(ASTContext & Context,DataflowAnalysisOptions Options)87 explicit DataflowAnalysis(ASTContext &Context, 88 DataflowAnalysisOptions Options) 89 : TypeErasedDataflowAnalysis(Options), Context(Context) {} 90 getASTContext()91 ASTContext &getASTContext() final { return Context; } 92 typeErasedInitialElement()93 TypeErasedLattice typeErasedInitialElement() final { 94 return {static_cast<Derived *>(this)->initialElement()}; 95 } 96 joinTypeErased(const TypeErasedLattice & E1,const TypeErasedLattice & E2)97 TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, 98 const TypeErasedLattice &E2) final { 99 // FIXME: change the signature of join() to avoid copying here. 100 Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value); 101 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 102 L1.join(L2); 103 return {std::move(L1)}; 104 } 105 widenTypeErased(TypeErasedLattice & Current,const TypeErasedLattice & Previous)106 LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, 107 const TypeErasedLattice &Previous) final { 108 Lattice &C = llvm::any_cast<Lattice &>(Current.Value); 109 const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value); 110 return widenInternal(Rank0{}, C, P); 111 } 112 isEqualTypeErased(const TypeErasedLattice & E1,const TypeErasedLattice & E2)113 bool isEqualTypeErased(const TypeErasedLattice &E1, 114 const TypeErasedLattice &E2) final { 115 const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value); 116 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 117 return L1 == L2; 118 } 119 transferTypeErased(const CFGElement & Element,TypeErasedLattice & E,Environment & Env)120 void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, 121 Environment &Env) final { 122 Lattice &L = llvm::any_cast<Lattice &>(E.Value); 123 static_cast<Derived *>(this)->transfer(Element, L, Env); 124 } 125 transferBranchTypeErased(bool Branch,const Stmt * Stmt,TypeErasedLattice & E,Environment & Env)126 void transferBranchTypeErased(bool Branch, const Stmt *Stmt, 127 TypeErasedLattice &E, Environment &Env) final { 128 transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt, 129 E, Env); 130 } 131 132 private: 133 // These `Rank` structs are used for template metaprogramming to choose 134 // between overloads. 135 struct Rank1 {}; 136 struct Rank0 : Rank1 {}; 137 138 // The first-choice implementation: use `widen` when it is available. 139 template <typename T> 140 static auto widenInternal(Rank0, T &Current, const T &Prev) 141 -> decltype(Current.widen(Prev)) { 142 return Current.widen(Prev); 143 } 144 145 // The second-choice implementation: `widen` is unavailable. Widening is 146 // merged with equality checking, so when widening is unimplemented, we 147 // default to equality checking. widenInternal(Rank1,const Lattice & Current,const Lattice & Prev)148 static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current, 149 const Lattice &Prev) { 150 return Prev == Current ? LatticeJoinEffect::Unchanged 151 : LatticeJoinEffect::Changed; 152 } 153 154 // The first-choice implementation: `transferBranch` is implemented. 155 template <typename Analysis> 156 static auto transferBranchInternal(Rank0, Analysis &A, bool Branch, 157 const Stmt *Stmt, TypeErasedLattice &L, 158 Environment &Env) 159 -> std::void_t<decltype(A.transferBranch( 160 Branch, Stmt, std::declval<LatticeT &>(), Env))> { 161 A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env); 162 } 163 164 // The second-choice implementation: `transferBranch` is unimplemented. No-op. 165 template <typename Analysis> transferBranchInternal(Rank1,Analysis & A,bool,const Stmt *,TypeErasedLattice &,Environment &)166 static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *, 167 TypeErasedLattice &, Environment &) {} 168 169 ASTContext &Context; 170 }; 171 172 // Model of the program at a given program point. 173 template <typename LatticeT> struct DataflowAnalysisState { 174 // Model of a program property. 175 LatticeT Lattice; 176 177 // Model of the state of the program (store and heap). 178 Environment Env; 179 }; 180 181 /// Performs dataflow analysis and returns a mapping from basic block IDs to 182 /// dataflow analysis states that model the respective basic blocks. The 183 /// returned vector, if any, will have the same size as the number of CFG 184 /// blocks, with indices corresponding to basic block IDs. Returns an error if 185 /// the dataflow analysis cannot be performed successfully. Otherwise, calls 186 /// `PostVisitCFG` on each CFG element with the final analysis results at that 187 /// program point. 188 /// 189 /// `MaxBlockVisits` caps the number of block visits during analysis. See 190 /// `runTypeErasedDataflowAnalysis` for a full description. The default value is 191 /// essentially arbitrary -- large enough to accommodate what seems like any 192 /// reasonable CFG, but still small enough to limit the cost of hitting the 193 /// limit. 194 template <typename AnalysisT> 195 llvm::Expected<std::vector< 196 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> 197 runDataflowAnalysis( 198 const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv, 199 std::function<void(const CFGElement &, const DataflowAnalysisState< 200 typename AnalysisT::Lattice> &)> 201 PostVisitCFG = nullptr, 202 std::int32_t MaxBlockVisits = 20'000) { 203 std::function<void(const CFGElement &, 204 const TypeErasedDataflowAnalysisState &)> 205 PostVisitCFGClosure = nullptr; 206 if (PostVisitCFG) { 207 PostVisitCFGClosure = [&PostVisitCFG]( 208 const CFGElement &Element, 209 const TypeErasedDataflowAnalysisState &State) { 210 auto *Lattice = 211 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); 212 // FIXME: we should not be copying the environment here! 213 // Ultimately the PostVisitCFG only gets a const reference anyway. 214 PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ 215 *Lattice, State.Env.fork()}); 216 }; 217 } 218 219 auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis( 220 ACFG, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits); 221 if (!TypeErasedBlockStates) 222 return TypeErasedBlockStates.takeError(); 223 224 std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> 225 BlockStates; 226 BlockStates.reserve(TypeErasedBlockStates->size()); 227 228 llvm::transform( 229 std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates), 230 [](auto &OptState) { 231 return llvm::transformOptional( 232 std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) { 233 return DataflowAnalysisState<typename AnalysisT::Lattice>{ 234 llvm::any_cast<typename AnalysisT::Lattice>( 235 std::move(State.Lattice.Value)), 236 std::move(State.Env)}; 237 }); 238 }); 239 return std::move(BlockStates); 240 } 241 242 // Create an analysis class that is derived from `DataflowAnalysis`. This is an 243 // SFINAE adapter that allows us to call two different variants of constructor 244 // (either with or without the optional `Environment` parameter). 245 // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment` 246 // parameter in their constructor so that we can get rid of this abomination. 247 template <typename AnalysisT> 248 auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 249 -> decltype(AnalysisT(ASTCtx, Env)) { 250 return AnalysisT(ASTCtx, Env); 251 } 252 template <typename AnalysisT> 253 auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 254 -> decltype(AnalysisT(ASTCtx)) { 255 return AnalysisT(ASTCtx); 256 } 257 258 /// Runs a dataflow analysis over the given function and then runs `Diagnoser` 259 /// over the results. Returns a list of diagnostics for `FuncDecl` or an 260 /// error. Currently, errors can occur (at least) because the analysis requires 261 /// too many iterations over the CFG or the SAT solver times out. 262 /// 263 /// The default value of `MaxSATIterations` was chosen based on the following 264 /// observations: 265 /// - Non-pathological calls to the solver typically require only a few hundred 266 /// iterations. 267 /// - This limit is still low enough to keep runtimes acceptable (on typical 268 /// machines) in cases where we hit the limit. 269 /// 270 /// `MaxBlockVisits` caps the number of block visits during analysis. See 271 /// `runDataflowAnalysis` for a full description and explanation of the default 272 /// value. 273 template <typename AnalysisT, typename Diagnostic> 274 llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction( 275 const FunctionDecl &FuncDecl, ASTContext &ASTCtx, 276 llvm::function_ref<llvm::SmallVector<Diagnostic>( 277 const CFGElement &, ASTContext &, 278 const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)> 279 Diagnoser, 280 std::int64_t MaxSATIterations = 1'000'000'000, 281 std::int32_t MaxBlockVisits = 20'000) { 282 llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl); 283 if (!Context) 284 return Context.takeError(); 285 286 auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations); 287 DataflowAnalysisContext AnalysisContext(*Solver); 288 Environment Env(AnalysisContext, FuncDecl); 289 AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env); 290 llvm::SmallVector<Diagnostic> Diagnostics; 291 if (llvm::Error Err = 292 runTypeErasedDataflowAnalysis( 293 *Context, Analysis, Env, 294 [&ASTCtx, &Diagnoser, &Diagnostics]( 295 const CFGElement &Elt, 296 const TypeErasedDataflowAnalysisState &State) mutable { 297 auto EltDiagnostics = Diagnoser( 298 Elt, ASTCtx, 299 TransferStateForDiagnostics<typename AnalysisT::Lattice>( 300 llvm::any_cast<const typename AnalysisT::Lattice &>( 301 State.Lattice.Value), 302 State.Env)); 303 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); 304 }, 305 MaxBlockVisits) 306 .takeError()) 307 return std::move(Err); 308 309 if (Solver->reachedLimit()) 310 return llvm::createStringError(llvm::errc::interrupted, 311 "SAT solver timed out"); 312 313 return Diagnostics; 314 } 315 316 /// Abstract base class for dataflow "models": reusable analysis components that 317 /// model a particular aspect of program semantics in the `Environment`. For 318 /// example, a model may capture a type and its related functions. 319 class DataflowModel : public Environment::ValueModel { 320 public: 321 /// Return value indicates whether the model processed the `Element`. 322 virtual bool transfer(const CFGElement &Element, Environment &Env) = 0; 323 }; 324 325 } // namespace dataflow 326 } // namespace clang 327 328 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 329