1 //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H 10 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H 11 12 #include "clang/AST/ASTContext.h" 13 #include "clang/AST/RecursiveASTVisitor.h" 14 #include "clang/ASTMatchers/ASTMatchFinder.h" 15 #include "clang/Basic/SourceLocation.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include <algorithm> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 25 namespace clang { 26 namespace tidy { 27 namespace modernize { 28 29 enum LoopFixerKind { 30 LFK_Array, 31 LFK_Iterator, 32 LFK_ReverseIterator, 33 LFK_PseudoArray 34 }; 35 36 /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt. 37 typedef llvm::DenseMap<const clang::Stmt *, const clang::Stmt *> StmtParentMap; 38 39 /// A map used to walk the AST in reverse: 40 /// maps VarDecl to the to parent DeclStmt. 41 typedef llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *> 42 DeclParentMap; 43 44 /// A map used to track which variables have been removed by a refactoring pass. 45 /// It maps the parent ForStmt to the removed index variable's VarDecl. 46 typedef llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *> 47 ReplacedVarsMap; 48 49 /// A map used to remember the variable names generated in a Stmt 50 typedef llvm::DenseMap<const clang::Stmt *, std::string> 51 StmtGeneratedVarNameMap; 52 53 /// A vector used to store the AST subtrees of an Expr. 54 typedef llvm::SmallVector<const clang::Expr *, 16> ComponentVector; 55 56 /// Class used build the reverse AST properties needed to detect 57 /// name conflicts and free variables. 58 class StmtAncestorASTVisitor 59 : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> { 60 public: StmtAncestorASTVisitor()61 StmtAncestorASTVisitor() { StmtStack.push_back(nullptr); } 62 63 /// Run the analysis on the AST. 64 /// 65 /// In case we're running this analysis multiple times, don't repeat the work. gatherAncestors(ASTContext & Ctx)66 void gatherAncestors(ASTContext &Ctx) { 67 if (StmtAncestors.empty()) 68 TraverseAST(Ctx); 69 } 70 71 /// Accessor for StmtAncestors. getStmtToParentStmtMap()72 const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; } 73 74 /// Accessor for DeclParents. getDeclToParentStmtMap()75 const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; } 76 77 friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>; 78 79 private: 80 StmtParentMap StmtAncestors; 81 DeclParentMap DeclParents; 82 llvm::SmallVector<const clang::Stmt *, 16> StmtStack; 83 84 bool TraverseStmt(clang::Stmt *Statement); 85 bool VisitDeclStmt(clang::DeclStmt *Statement); 86 }; 87 88 /// Class used to find the variables and member expressions on which an 89 /// arbitrary expression depends. 90 class ComponentFinderASTVisitor 91 : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> { 92 public: 93 ComponentFinderASTVisitor() = default; 94 95 /// Find the components of an expression and place them in a ComponentVector. findExprComponents(const clang::Expr * SourceExpr)96 void findExprComponents(const clang::Expr *SourceExpr) { 97 TraverseStmt(const_cast<clang::Expr *>(SourceExpr)); 98 } 99 100 /// Accessor for Components. getComponents()101 const ComponentVector &getComponents() { return Components; } 102 103 friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>; 104 105 private: 106 ComponentVector Components; 107 108 bool VisitDeclRefExpr(clang::DeclRefExpr *E); 109 bool VisitMemberExpr(clang::MemberExpr *Member); 110 }; 111 112 /// Class used to determine if an expression is dependent on a variable declared 113 /// inside of the loop where it would be used. 114 class DependencyFinderASTVisitor 115 : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> { 116 public: DependencyFinderASTVisitor(const StmtParentMap * StmtParents,const DeclParentMap * DeclParents,const ReplacedVarsMap * ReplacedVars,const clang::Stmt * ContainingStmt)117 DependencyFinderASTVisitor(const StmtParentMap *StmtParents, 118 const DeclParentMap *DeclParents, 119 const ReplacedVarsMap *ReplacedVars, 120 const clang::Stmt *ContainingStmt) 121 : StmtParents(StmtParents), DeclParents(DeclParents), 122 ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {} 123 124 /// Run the analysis on Body, and return true iff the expression 125 /// depends on some variable declared within ContainingStmt. 126 /// 127 /// This is intended to protect against hoisting the container expression 128 /// outside of an inner context if part of that expression is declared in that 129 /// inner context. 130 /// 131 /// For example, 132 /// \code 133 /// const int N = 10, M = 20; 134 /// int arr[N][M]; 135 /// int getRow(); 136 /// 137 /// for (int i = 0; i < M; ++i) { 138 /// int k = getRow(); 139 /// printf("%d:", arr[k][i]); 140 /// } 141 /// \endcode 142 /// At first glance, this loop looks like it could be changed to 143 /// \code 144 /// for (int elem : arr[k]) { 145 /// int k = getIndex(); 146 /// printf("%d:", elem); 147 /// } 148 /// \endcode 149 /// But this is malformed, since `k` is used before it is defined! 150 /// 151 /// In order to avoid this, this class looks at the container expression 152 /// `arr[k]` and decides whether or not it contains a sub-expression declared 153 /// within the loop body. dependsOnInsideVariable(const clang::Stmt * Body)154 bool dependsOnInsideVariable(const clang::Stmt *Body) { 155 DependsOnInsideVariable = false; 156 TraverseStmt(const_cast<clang::Stmt *>(Body)); 157 return DependsOnInsideVariable; 158 } 159 160 friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>; 161 162 private: 163 const StmtParentMap *StmtParents; 164 const DeclParentMap *DeclParents; 165 const clang::Stmt *ContainingStmt; 166 const ReplacedVarsMap *ReplacedVars; 167 bool DependsOnInsideVariable; 168 169 bool VisitVarDecl(clang::VarDecl *V); 170 bool VisitDeclRefExpr(clang::DeclRefExpr *D); 171 }; 172 173 /// Class used to determine if any declarations used in a Stmt would conflict 174 /// with a particular identifier. This search includes the names that don't 175 /// actually appear in the AST (i.e. created by a refactoring tool) by including 176 /// a map from Stmts to generated names associated with those stmts. 177 class DeclFinderASTVisitor 178 : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> { 179 public: DeclFinderASTVisitor(const std::string & Name,const StmtGeneratedVarNameMap * GeneratedDecls)180 DeclFinderASTVisitor(const std::string &Name, 181 const StmtGeneratedVarNameMap *GeneratedDecls) 182 : Name(Name), GeneratedDecls(GeneratedDecls), Found(false) {} 183 184 /// Attempts to find any usages of variables name Name in Body, returning 185 /// true when it is used in Body. This includes the generated loop variables 186 /// of ForStmts which have already been transformed. findUsages(const clang::Stmt * Body)187 bool findUsages(const clang::Stmt *Body) { 188 Found = false; 189 TraverseStmt(const_cast<clang::Stmt *>(Body)); 190 return Found; 191 } 192 193 friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>; 194 195 private: 196 std::string Name; 197 /// GeneratedDecls keeps track of ForStmts which have been transformed, 198 /// mapping each modified ForStmt to the variable generated in the loop. 199 const StmtGeneratedVarNameMap *GeneratedDecls; 200 bool Found; 201 202 bool VisitForStmt(clang::ForStmt *F); 203 bool VisitNamedDecl(clang::NamedDecl *D); 204 bool VisitDeclRefExpr(clang::DeclRefExpr *D); 205 bool VisitTypeLoc(clang::TypeLoc TL); 206 }; 207 208 /// The information needed to describe a valid convertible usage 209 /// of an array index or iterator. 210 struct Usage { 211 enum UsageKind { 212 // Regular usages of the loop index (the ones not specified below). Some 213 // examples: 214 // \code 215 // int X = 8 * Arr[i]; 216 // ^~~~~~ 217 // f(param1, param2, *It); 218 // ^~~ 219 // if (Vec[i].SomeBool) {} 220 // ^~~~~~ 221 // \endcode 222 UK_Default, 223 // Indicates whether this is an access to a member through the arrow 224 // operator on pointers or iterators. 225 UK_MemberThroughArrow, 226 // If the variable is being captured by a lambda, indicates whether the 227 // capture was done by value or by reference. 228 UK_CaptureByCopy, 229 UK_CaptureByRef 230 }; 231 // The expression that is going to be converted. Null in case of lambda 232 // captures. 233 const Expr *Expression; 234 235 UsageKind Kind; 236 237 // Range that covers this usage. 238 SourceRange Range; 239 UsageUsage240 explicit Usage(const Expr *E) 241 : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {} UsageUsage242 Usage(const Expr *E, UsageKind Kind, SourceRange Range) 243 : Expression(E), Kind(Kind), Range(std::move(Range)) {} 244 }; 245 246 /// A class to encapsulate lowering of the tool's confidence level. 247 class Confidence { 248 public: 249 enum Level { 250 // Transformations that are likely to change semantics. 251 CL_Risky, 252 253 // Transformations that might change semantics. 254 CL_Reasonable, 255 256 // Transformations that will not change semantics. 257 CL_Safe 258 }; 259 /// Initialize confidence level. Confidence(Confidence::Level Level)260 explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {} 261 262 /// Lower the internal confidence level to Level, but do not raise it. lowerTo(Confidence::Level Level)263 void lowerTo(Confidence::Level Level) { 264 CurrentLevel = std::min(Level, CurrentLevel); 265 } 266 267 /// Return the internal confidence level. getLevel()268 Level getLevel() const { return CurrentLevel; } 269 270 private: 271 Level CurrentLevel; 272 }; 273 274 // The main computational result of ForLoopIndexVisitor. 275 typedef llvm::SmallVector<Usage, 8> UsageResult; 276 277 // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck. 278 const Expr *digThroughConstructors(const Expr *E); 279 bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second); 280 const DeclRefExpr *getDeclRef(const Expr *E); 281 bool areSameVariable(const ValueDecl *First, const ValueDecl *Second); 282 283 /// Discover usages of expressions consisting of index or iterator 284 /// access. 285 /// 286 /// Given an index variable, recursively crawls a for loop to discover if the 287 /// index variable is used in a way consistent with range-based for loop access. 288 class ForLoopIndexUseVisitor 289 : public RecursiveASTVisitor<ForLoopIndexUseVisitor> { 290 public: 291 ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar, 292 const VarDecl *EndVar, const Expr *ContainerExpr, 293 const Expr *ArrayBoundExpr, 294 bool ContainerNeedsDereference); 295 296 /// Finds all uses of IndexVar in Body, placing all usages in Usages, 297 /// and returns true if IndexVar was only used in a way consistent with a 298 /// range-based for loop. 299 /// 300 /// The general strategy is to reject any DeclRefExprs referencing IndexVar, 301 /// with the exception of certain acceptable patterns. 302 /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an 303 /// ArraySubscriptExpression. Iterator-based loops may dereference 304 /// IndexVar or call methods through operator-> (builtin or overloaded). 305 /// Array-like containers may use IndexVar as a parameter to the at() member 306 /// function and in overloaded operator[]. 307 bool findAndVerifyUsages(const Stmt *Body); 308 309 /// Add a set of components that we should consider relevant to the 310 /// container. 311 void addComponents(const ComponentVector &Components); 312 313 /// Accessor for Usages. getUsages()314 const UsageResult &getUsages() const { return Usages; } 315 316 /// Adds the Usage if it was not added before. 317 void addUsage(const Usage &U); 318 319 /// Get the container indexed by IndexVar, if any. getContainerIndexed()320 const Expr *getContainerIndexed() const { return ContainerExpr; } 321 322 /// Returns the statement declaring the variable created as an alias 323 /// for the loop element, if any. getAliasDecl()324 const DeclStmt *getAliasDecl() const { return AliasDecl; } 325 326 /// Accessor for ConfidenceLevel. getConfidenceLevel()327 Confidence::Level getConfidenceLevel() const { 328 return ConfidenceLevel.getLevel(); 329 } 330 331 /// Indicates if the alias declaration was in a place where it cannot 332 /// simply be removed but rather replaced with a use of the alias variable. 333 /// For example, variables declared in the condition of an if, switch, or for 334 /// stmt. aliasUseRequired()335 bool aliasUseRequired() const { return ReplaceWithAliasUse; } 336 337 /// Indicates if the alias declaration came from the init clause of a 338 /// nested for loop. SourceRanges provided by Clang for DeclStmts in this 339 /// case need to be adjusted. aliasFromForInit()340 bool aliasFromForInit() const { return AliasFromForInit; } 341 342 private: 343 /// Typedef used in CRTP functions. 344 typedef RecursiveASTVisitor<ForLoopIndexUseVisitor> VisitorBase; 345 friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>; 346 347 /// Overriden methods for RecursiveASTVisitor's traversal. 348 bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E); 349 bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall); 350 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall); 351 bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C, 352 Expr *Init); 353 bool TraverseMemberExpr(MemberExpr *Member); 354 bool TraverseUnaryOperator(UnaryOperator *Uop); 355 bool VisitDeclRefExpr(DeclRefExpr *E); 356 bool VisitDeclStmt(DeclStmt *S); 357 bool TraverseStmt(Stmt *S); 358 359 /// Add an expression to the list of expressions on which the container 360 /// expression depends. 361 void addComponent(const Expr *E); 362 363 // Input member variables: 364 ASTContext *Context; 365 /// The index variable's VarDecl. 366 const VarDecl *IndexVar; 367 /// The loop's 'end' variable, which cannot be mentioned at all. 368 const VarDecl *EndVar; 369 /// The Expr which refers to the container. 370 const Expr *ContainerExpr; 371 /// The Expr which refers to the terminating condition for array-based loops. 372 const Expr *ArrayBoundExpr; 373 bool ContainerNeedsDereference; 374 375 // Output member variables: 376 /// A container which holds all usages of IndexVar as the index of 377 /// ArraySubscriptExpressions. 378 UsageResult Usages; 379 llvm::SmallSet<SourceLocation, 8> UsageLocations; 380 bool OnlyUsedAsIndex; 381 /// The DeclStmt for an alias to the container element. 382 const DeclStmt *AliasDecl; 383 Confidence ConfidenceLevel; 384 /// A list of expressions on which ContainerExpr depends. 385 /// 386 /// If any of these expressions are encountered outside of an acceptable usage 387 /// of the loop element, lower our confidence level. 388 llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16> 389 DependentExprs; 390 391 /// The parent-in-waiting. Will become the real parent once we traverse down 392 /// one level in the AST. 393 const Stmt *NextStmtParent; 394 /// The actual parent of a node when Visit*() calls are made. Only the 395 /// parentage of DeclStmt's to possible iteration/selection statements is of 396 /// importance. 397 const Stmt *CurrStmtParent; 398 399 /// \see aliasUseRequired(). 400 bool ReplaceWithAliasUse; 401 /// \see aliasFromForInit(). 402 bool AliasFromForInit; 403 }; 404 405 struct TUTrackingInfo { 406 /// Reset and initialize per-TU tracking information. 407 /// 408 /// Must be called before using container accessors. TUTrackingInfoTUTrackingInfo409 TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {} 410 getParentFinderTUTrackingInfo411 StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; } getGeneratedDeclsTUTrackingInfo412 StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; } getReplacedVarsTUTrackingInfo413 ReplacedVarsMap &getReplacedVars() { return ReplacedVars; } 414 415 private: 416 std::unique_ptr<StmtAncestorASTVisitor> ParentFinder; 417 StmtGeneratedVarNameMap GeneratedDecls; 418 ReplacedVarsMap ReplacedVars; 419 }; 420 421 /// Create names for generated variables within a particular statement. 422 /// 423 /// VariableNamer uses a DeclContext as a reference point, checking for any 424 /// conflicting declarations higher up in the context or within SourceStmt. 425 /// It creates a variable name using hints from a source container and the old 426 /// index, if they exist. 427 class VariableNamer { 428 public: 429 // Supported naming styles. 430 enum NamingStyle { 431 NS_CamelBack, 432 NS_CamelCase, 433 NS_LowerCase, 434 NS_UpperCase, 435 }; 436 VariableNamer(StmtGeneratedVarNameMap * GeneratedDecls,const StmtParentMap * ReverseAST,const clang::Stmt * SourceStmt,const clang::VarDecl * OldIndex,const clang::ValueDecl * TheContainer,const clang::ASTContext * Context,NamingStyle Style)437 VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls, 438 const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt, 439 const clang::VarDecl *OldIndex, 440 const clang::ValueDecl *TheContainer, 441 const clang::ASTContext *Context, NamingStyle Style) 442 : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST), 443 SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer), 444 Context(Context), Style(Style) {} 445 446 /// Generate a new index name. 447 /// 448 /// Generates the name to be used for an inserted iterator. It relies on 449 /// declarationExists() to determine that there are no naming conflicts, and 450 /// tries to use some hints from the container name and the old index name. 451 std::string createIndexName(); 452 453 private: 454 StmtGeneratedVarNameMap *GeneratedDecls; 455 const StmtParentMap *ReverseAST; 456 const clang::Stmt *SourceStmt; 457 const clang::VarDecl *OldIndex; 458 const clang::ValueDecl *TheContainer; 459 const clang::ASTContext *Context; 460 const NamingStyle Style; 461 462 // Determine whether or not a declaration that would conflict with Symbol 463 // exists in an outer context or in any statement contained in SourceStmt. 464 bool declarationExists(llvm::StringRef Symbol); 465 }; 466 467 } // namespace modernize 468 } // namespace tidy 469 } // namespace clang 470 471 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H 472