• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // This implements a Clang tool to rewrite all instances of
6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to
7 // the .get() method.
8 
9 #include <assert.h>
10 #include <algorithm>
11 #include <memory>
12 #include <string>
13 
14 #include "clang/AST/ASTContext.h"
15 #include "clang/ASTMatchers/ASTMatchers.h"
16 #include "clang/ASTMatchers/ASTMatchersMacros.h"
17 #include "clang/ASTMatchers/ASTMatchFinder.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Tooling/CommonOptionsParser.h"
22 #include "clang/Tooling/Refactoring.h"
23 #include "clang/Tooling/Tooling.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/TargetSelect.h"
26 
27 using namespace clang::ast_matchers;
28 using clang::tooling::CommonOptionsParser;
29 using clang::tooling::Replacement;
30 using clang::tooling::Replacements;
31 using llvm::StringRef;
32 
33 namespace clang {
34 namespace ast_matchers {
35 
36 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
37     conversionDecl;
38 
AST_MATCHER(QualType,isBoolean)39 AST_MATCHER(QualType, isBoolean) {
40   return Node->isBooleanType();
41 }
42 
43 }  // namespace ast_matchers
44 }  // namespace clang
45 
46 namespace {
47 
48 // Returns true if expr needs to be put in parens (eg: when it is an operator
49 // syntactically).
NeedsParens(const clang::Expr * expr)50 bool NeedsParens(const clang::Expr* expr) {
51   if (llvm::dyn_cast<clang::UnaryOperator>(expr) ||
52       llvm::dyn_cast<clang::BinaryOperator>(expr) ||
53       llvm::dyn_cast<clang::ConditionalOperator>(expr)) {
54     return true;
55   }
56   // Calls to an overloaded operator also need parens, except for foo(...) and
57   // foo[...] expressions.
58   if (const clang::CXXOperatorCallExpr* op =
59           llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
60     return op->getOperator() != clang::OO_Call &&
61            op->getOperator() != clang::OO_Subscript;
62   }
63   return false;
64 }
65 
RewriteImplicitToExplicitConversion(const MatchFinder::MatchResult & result,const clang::Expr * expr)66 Replacement RewriteImplicitToExplicitConversion(
67     const MatchFinder::MatchResult& result,
68     const clang::Expr* expr) {
69   clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
70       result.SourceManager->getSpellingLoc(expr->getLocStart()),
71       result.SourceManager->getSpellingLoc(expr->getLocEnd()));
72   assert(range.isValid() && "Invalid range!");
73 
74   // Handle cases where an implicit cast is being done by dereferencing a
75   // pointer to a scoped_refptr<> (sadly, it happens...)
76   //
77   // This rewrites both "*foo" and "*(foo)" as "foo->get()".
78   if (const clang::UnaryOperator* op =
79           llvm::dyn_cast<clang::UnaryOperator>(expr)) {
80     if (op->getOpcode() == clang::UO_Deref) {
81       const clang::Expr* const sub_expr =
82           op->getSubExpr()->IgnoreParenImpCasts();
83       clang::CharSourceRange sub_expr_range =
84           clang::CharSourceRange::getTokenRange(
85               result.SourceManager->getSpellingLoc(sub_expr->getLocStart()),
86               result.SourceManager->getSpellingLoc(sub_expr->getLocEnd()));
87       assert(sub_expr_range.isValid() && "Invalid subexpression range!");
88 
89       std::string inner_text = clang::Lexer::getSourceText(
90           sub_expr_range, *result.SourceManager, result.Context->getLangOpts());
91       assert(!inner_text.empty() && "No text for subexpression!");
92       if (NeedsParens(sub_expr)) {
93         inner_text.insert(0, "(");
94         inner_text.append(")");
95       }
96       inner_text.append("->get()");
97       return Replacement(*result.SourceManager, range, inner_text);
98     }
99   }
100 
101   std::string text = clang::Lexer::getSourceText(
102       range, *result.SourceManager, result.Context->getLangOpts());
103   assert(!text.empty() && "No text for expression!");
104 
105   // Unwrap any temporaries - for example, custom iterators that return
106   // scoped_refptr<T> as part of operator*. Any such iterators should also
107   // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72)
108   if (const clang::CXXBindTemporaryExpr* op =
109           llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) {
110     expr = op->getSubExpr();
111   }
112 
113   // Handle iterators (which are operator* calls, followed by implicit
114   // conversions) by rewriting *it as it->get()
115   if (const clang::CXXOperatorCallExpr* op =
116           llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
117     if (op->getOperator() == clang::OO_Star) {
118       // Note that this doesn't rewrite **it correctly, since it should be
119       // rewritten using parens, e.g. (*it)->get(). However, this shouldn't
120       // happen frequently, if at all, since it would likely indicate code is
121       // storing pointers to a scoped_refptr in a container.
122       text.erase(0, 1);
123       text.append("->get()");
124       return Replacement(*result.SourceManager, range, text);
125     }
126   }
127 
128   // The only remaining calls should be non-dereferencing calls (eg: member
129   // calls), so a simple ".get()" appending should suffice.
130   if (NeedsParens(expr)) {
131     text.insert(0, "(");
132     text.append(")");
133   }
134   text.append(".get()");
135   return Replacement(*result.SourceManager, range, text);
136 }
137 
RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult & result,clang::SourceLocation begin,clang::SourceLocation end)138 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result,
139                                         clang::SourceLocation begin,
140                                         clang::SourceLocation end) {
141   clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
142       result.SourceManager->getSpellingLoc(begin),
143       result.SourceManager->getSpellingLoc(end));
144   assert(range.isValid() && "Invalid range!");
145 
146   std::string text = clang::Lexer::getSourceText(
147       range, *result.SourceManager, result.Context->getLangOpts());
148   text.erase(text.rfind('*'));
149 
150   std::string replacement_text("scoped_refptr<");
151   replacement_text += text;
152   replacement_text += ">";
153 
154   return Replacement(*result.SourceManager, range, replacement_text);
155 }
156 
157 class GetRewriterCallback : public MatchFinder::MatchCallback {
158  public:
GetRewriterCallback(Replacements * replacements)159   explicit GetRewriterCallback(Replacements* replacements)
160       : replacements_(replacements) {}
161   virtual void run(const MatchFinder::MatchResult& result) override;
162 
163  private:
164   Replacements* const replacements_;
165 };
166 
run(const MatchFinder::MatchResult & result)167 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) {
168   const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg");
169   assert(arg && "Unexpected match! No Expr captured!");
170   replacements_->insert(RewriteImplicitToExplicitConversion(result, arg));
171 }
172 
173 class VarRewriterCallback : public MatchFinder::MatchCallback {
174  public:
VarRewriterCallback(Replacements * replacements)175   explicit VarRewriterCallback(Replacements* replacements)
176       : replacements_(replacements) {}
177   virtual void run(const MatchFinder::MatchResult& result) override;
178 
179  private:
180   Replacements* const replacements_;
181 };
182 
run(const MatchFinder::MatchResult & result)183 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) {
184   const clang::DeclaratorDecl* const var_decl =
185       result.Nodes.getNodeAs<clang::DeclaratorDecl>("var");
186   assert(var_decl && "Unexpected match! No VarDecl captured!");
187 
188   const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo();
189 
190   // TODO(dcheng): This mishandles a case where a variable has multiple
191   // declarations, e.g.:
192   //
193   // in .h:
194   // Foo* my_global_magical_foo;
195   //
196   // in .cc:
197   // Foo* my_global_magical_foo = CreateFoo();
198   //
199   // In this case, it will only rewrite the .cc definition. Oh well. This should
200   // be rare enough that these cases can be manually handled, since the style
201   // guide prohibits globals of non-POD type.
202   replacements_->insert(RewriteRawPtrToScopedRefptr(
203       result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc()));
204 }
205 
206 class FunctionRewriterCallback : public MatchFinder::MatchCallback {
207  public:
FunctionRewriterCallback(Replacements * replacements)208   explicit FunctionRewriterCallback(Replacements* replacements)
209       : replacements_(replacements) {}
210   virtual void run(const MatchFinder::MatchResult& result) override;
211 
212  private:
213   Replacements* const replacements_;
214 };
215 
run(const MatchFinder::MatchResult & result)216 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) {
217   const clang::FunctionDecl* const function_decl =
218       result.Nodes.getNodeAs<clang::FunctionDecl>("fn");
219   assert(function_decl && "Unexpected match! No FunctionDecl captured!");
220 
221   // If matched against an implicit conversion to a DeclRefExpr, make sure the
222   // referenced declaration is of class type, e.g. the tool skips trying to
223   // chase pointers/references to determine if the pointee is a scoped_refptr<T>
224   // with local storage. Instead, let a human manually handle those cases.
225   const clang::VarDecl* const var_decl =
226       result.Nodes.getNodeAs<clang::VarDecl>("var");
227   if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) {
228     return;
229   }
230 
231   for (clang::FunctionDecl* f : function_decl->redecls()) {
232     clang::SourceRange range = f->getReturnTypeSourceRange();
233     replacements_->insert(
234         RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd()));
235   }
236 }
237 
238 class MacroRewriterCallback : public MatchFinder::MatchCallback {
239  public:
MacroRewriterCallback(Replacements * replacements)240   explicit MacroRewriterCallback(Replacements* replacements)
241       : replacements_(replacements) {}
242   virtual void run(const MatchFinder::MatchResult& result) override;
243 
244  private:
245   Replacements* const replacements_;
246 };
247 
run(const MatchFinder::MatchResult & result)248 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) {
249   const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr");
250   assert(expr && "Unexpected match! No Expr captured!");
251   replacements_->insert(RewriteImplicitToExplicitConversion(result, expr));
252 }
253 
254 }  // namespace
255 
256 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage);
257 
main(int argc,const char * argv[])258 int main(int argc, const char* argv[]) {
259   // TODO(dcheng): Clang tooling should do this itself.
260   // http://llvm.org/bugs/show_bug.cgi?id=21627
261   llvm::InitializeNativeTarget();
262   llvm::InitializeNativeTargetAsmParser();
263   llvm::cl::OptionCategory category("Remove scoped_refptr conversions");
264   CommonOptionsParser options(argc, argv, category);
265   clang::tooling::ClangTool tool(options.getCompilations(),
266                                  options.getSourcePathList());
267 
268   MatchFinder match_finder;
269   Replacements replacements;
270 
271   auto is_scoped_refptr = cxxRecordDecl(isSameOrDerivedFrom("::scoped_refptr"),
272                                         isTemplateInstantiation());
273 
274   // Finds all calls to conversion operator member function. This catches calls
275   // to "operator T*", "operator Testable", and "operator bool" equally.
276   auto base_matcher =
277       cxxMemberCallExpr(thisPointerType(is_scoped_refptr),
278                         callee(conversionDecl()), on(id("arg", expr())));
279 
280   // The heuristic for whether or not converting a temporary is 'unsafe'. An
281   // unsafe conversion is one where a temporary scoped_refptr<T> is converted to
282   // another type. The matcher provides an exception for a temporary
283   // scoped_refptr that is the result of an operator call. In this case, assume
284   // that it's the result of an iterator dereference, and the container itself
285   // retains the necessary reference, since this is a common idiom to see in
286   // loop bodies.
287   auto is_unsafe_temporary_conversion =
288       on(cxxBindTemporaryExpr(unless(has(cxxOperatorCallExpr()))));
289 
290   // Returning a scoped_refptr<T> as a T* is considered unsafe if either are
291   // true:
292   // - The scoped_refptr<T> is a temporary.
293   // - The scoped_refptr<T> has local lifetime.
294   auto returned_as_raw_ptr = hasParent(
295       returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType()))))));
296   // This matcher intentionally matches more than it should. For example, this
297   // will match:
298   //   scoped_refptr<Foo>& foo = some_other_foo;
299   //   return foo;
300   // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>,
301   // so those cases can be manually handled.
302   auto is_local_variable =
303       on(declRefExpr(to(id("var", varDecl(hasLocalStorage())))));
304   auto is_unsafe_return =
305       anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)),
306                   is_local_variable),
307             allOf(hasParent(implicitCastExpr(
308                       hasParent(exprWithCleanups(returned_as_raw_ptr)))),
309                   is_unsafe_temporary_conversion));
310 
311   // This catches both user-defined conversions (eg: "operator bool") and
312   // standard conversion sequence (C++03 13.3.3.1.1), such as converting a
313   // pointer to a bool.
314   auto implicit_to_bool =
315       implicitCastExpr(hasImplicitDestinationType(isBoolean()));
316 
317   // Avoid converting calls to of "operator Testable" -> "bool" and calls of
318   // "operator T*" -> "bool".
319   auto bool_conversion_matcher = hasParent(
320       expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool)))));
321 
322   auto is_logging_helper =
323       functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl")));
324   auto is_gtest_helper = functionDecl(
325       anyOf(cxxMethodDecl(ofClass(cxxRecordDecl(isSameOrDerivedFrom(
326                               hasName("::testing::internal::EqHelper")))),
327                           hasName("Compare")),
328             hasName("::testing::internal::CmpHelperNE")));
329   auto is_gtest_assertion_result_ctor =
330       cxxConstructorDecl(ofClass(cxxRecordDecl(
331           isSameOrDerivedFrom(hasName("::testing::AssertionResult")))));
332 
333   // Find all calls to an operator overload that are 'safe'.
334   //
335   // All bool conversions will be handled with the Testable trick, but that
336   // can only be used once "operator T*" is removed, since otherwise it leaves
337   // the call ambiguous.
338   GetRewriterCallback get_callback(&replacements);
339   match_finder.addMatcher(
340       cxxMemberCallExpr(
341           base_matcher,
342           // Excluded since the conversion may be unsafe.
343           unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)),
344           // Excluded since the conversion occurs inside a helper function that
345           // the macro wraps. Letting this callback handle the rewrite would
346           // result in an incorrect replacement that changes the helper function
347           // itself. Instead, the right replacement is to rewrite the macro's
348           // arguments.
349           unless(hasAncestor(decl(anyOf(is_logging_helper, is_gtest_helper,
350                                         is_gtest_assertion_result_ctor))))),
351       &get_callback);
352 
353   // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*.
354   VarRewriterCallback var_callback(&replacements);
355   auto initialized_with_temporary = ignoringImpCasts(exprWithCleanups(
356       has(cxxMemberCallExpr(base_matcher, is_unsafe_temporary_conversion))));
357   match_finder.addMatcher(id("var",
358                              varDecl(hasInitializer(initialized_with_temporary),
359                                      hasType(pointerType()))),
360                           &var_callback);
361   match_finder.addMatcher(
362       cxxConstructorDecl(forEachConstructorInitializer(
363           allOf(withInitializer(initialized_with_temporary),
364                 forField(id("var", fieldDecl(hasType(pointerType()))))))),
365       &var_callback);
366 
367   // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when
368   // returning a value.
369   FunctionRewriterCallback fn_callback(&replacements);
370   match_finder.addMatcher(cxxMemberCallExpr(base_matcher, is_unsafe_return),
371                           &fn_callback);
372 
373   // Rewrite logging / gtest expressions that result in an implicit conversion.
374   // Luckily, the matchers don't need to handle the case where one of the macro
375   // arguments is NULL, such as:
376   // CHECK_EQ(my_scoped_refptr, NULL)
377   // because it simply doesn't compile--since NULL is actually of integral type,
378   // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is
379   // no comparison overload for scoped_refptr<T> and int, this fails to compile.
380   MacroRewriterCallback macro_callback(&replacements);
381   // CHECK_EQ/CHECK_NE helpers.
382   match_finder.addMatcher(
383       callExpr(callee(is_logging_helper),
384                argumentCountIs(3),
385                hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
386                hasAnyArgument(hasType(pointerType())),
387                hasArgument(2, stringLiteral())),
388       &macro_callback);
389   // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying
390   // helper functions. Even though gtest has special handling for pointer to
391   // NULL comparisons, it doesn't trigger in this case, so no special handling
392   // is needed for the replacements.
393   match_finder.addMatcher(
394       callExpr(callee(is_gtest_helper),
395                argumentCountIs(4),
396                hasArgument(0, stringLiteral()),
397                hasArgument(1, stringLiteral()),
398                hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
399                hasAnyArgument(hasType(pointerType()))),
400       &macro_callback);
401   // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to
402   // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before
403   // being passed as an argument to AssertionResult's constructor. As a result,
404   // GetRewriterCallback handles this case properly since the conversion isn't
405   // hidden inside AssertionResult, and the generated replacement properly
406   // rewrites the macro argument.
407   // However, the tool does need to handle the _TRUE counterparts, since the
408   // conversion occurs inside the constructor in those cases.
409   match_finder.addMatcher(
410       cxxConstructExpr(
411           argumentCountIs(2),
412           hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))),
413           hasDeclaration(is_gtest_assertion_result_ctor)),
414       &macro_callback);
415 
416   std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
417       clang::tooling::newFrontendActionFactory(&match_finder);
418   int result = tool.run(factory.get());
419   if (result != 0)
420     return result;
421 
422   // Serialization format is documented in tools/clang/scripts/run_tool.py
423   llvm::outs() << "==== BEGIN EDITS ====\n";
424   for (const auto& r : replacements) {
425     std::string replacement_text = r.getReplacementText().str();
426     std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0');
427     llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::"
428                  << r.getLength() << ":::" << replacement_text << "\n";
429   }
430   llvm::outs() << "==== END EDITS ====\n";
431 
432   return 0;
433 }
434