1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // This implements a Clang tool to rewrite all instances of
6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to
7 // the .get() method.
8
9 #include <assert.h>
10 #include <algorithm>
11 #include <memory>
12 #include <string>
13
14 #include "clang/AST/ASTContext.h"
15 #include "clang/ASTMatchers/ASTMatchers.h"
16 #include "clang/ASTMatchers/ASTMatchersMacros.h"
17 #include "clang/ASTMatchers/ASTMatchFinder.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Tooling/CommonOptionsParser.h"
22 #include "clang/Tooling/Refactoring.h"
23 #include "clang/Tooling/Tooling.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/TargetSelect.h"
26
27 using namespace clang::ast_matchers;
28 using clang::tooling::CommonOptionsParser;
29 using clang::tooling::Replacement;
30 using clang::tooling::Replacements;
31 using llvm::StringRef;
32
33 namespace clang {
34 namespace ast_matchers {
35
36 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
37 conversionDecl;
38
AST_MATCHER(QualType,isBoolean)39 AST_MATCHER(QualType, isBoolean) {
40 return Node->isBooleanType();
41 }
42
43 } // namespace ast_matchers
44 } // namespace clang
45
46 namespace {
47
48 // Returns true if expr needs to be put in parens (eg: when it is an operator
49 // syntactically).
NeedsParens(const clang::Expr * expr)50 bool NeedsParens(const clang::Expr* expr) {
51 if (llvm::dyn_cast<clang::UnaryOperator>(expr) ||
52 llvm::dyn_cast<clang::BinaryOperator>(expr) ||
53 llvm::dyn_cast<clang::ConditionalOperator>(expr)) {
54 return true;
55 }
56 // Calls to an overloaded operator also need parens, except for foo(...) and
57 // foo[...] expressions.
58 if (const clang::CXXOperatorCallExpr* op =
59 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
60 return op->getOperator() != clang::OO_Call &&
61 op->getOperator() != clang::OO_Subscript;
62 }
63 return false;
64 }
65
RewriteImplicitToExplicitConversion(const MatchFinder::MatchResult & result,const clang::Expr * expr)66 Replacement RewriteImplicitToExplicitConversion(
67 const MatchFinder::MatchResult& result,
68 const clang::Expr* expr) {
69 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
70 result.SourceManager->getSpellingLoc(expr->getLocStart()),
71 result.SourceManager->getSpellingLoc(expr->getLocEnd()));
72 assert(range.isValid() && "Invalid range!");
73
74 // Handle cases where an implicit cast is being done by dereferencing a
75 // pointer to a scoped_refptr<> (sadly, it happens...)
76 //
77 // This rewrites both "*foo" and "*(foo)" as "foo->get()".
78 if (const clang::UnaryOperator* op =
79 llvm::dyn_cast<clang::UnaryOperator>(expr)) {
80 if (op->getOpcode() == clang::UO_Deref) {
81 const clang::Expr* const sub_expr =
82 op->getSubExpr()->IgnoreParenImpCasts();
83 clang::CharSourceRange sub_expr_range =
84 clang::CharSourceRange::getTokenRange(
85 result.SourceManager->getSpellingLoc(sub_expr->getLocStart()),
86 result.SourceManager->getSpellingLoc(sub_expr->getLocEnd()));
87 assert(sub_expr_range.isValid() && "Invalid subexpression range!");
88
89 std::string inner_text = clang::Lexer::getSourceText(
90 sub_expr_range, *result.SourceManager, result.Context->getLangOpts());
91 assert(!inner_text.empty() && "No text for subexpression!");
92 if (NeedsParens(sub_expr)) {
93 inner_text.insert(0, "(");
94 inner_text.append(")");
95 }
96 inner_text.append("->get()");
97 return Replacement(*result.SourceManager, range, inner_text);
98 }
99 }
100
101 std::string text = clang::Lexer::getSourceText(
102 range, *result.SourceManager, result.Context->getLangOpts());
103 assert(!text.empty() && "No text for expression!");
104
105 // Unwrap any temporaries - for example, custom iterators that return
106 // scoped_refptr<T> as part of operator*. Any such iterators should also
107 // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72)
108 if (const clang::CXXBindTemporaryExpr* op =
109 llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) {
110 expr = op->getSubExpr();
111 }
112
113 // Handle iterators (which are operator* calls, followed by implicit
114 // conversions) by rewriting *it as it->get()
115 if (const clang::CXXOperatorCallExpr* op =
116 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
117 if (op->getOperator() == clang::OO_Star) {
118 // Note that this doesn't rewrite **it correctly, since it should be
119 // rewritten using parens, e.g. (*it)->get(). However, this shouldn't
120 // happen frequently, if at all, since it would likely indicate code is
121 // storing pointers to a scoped_refptr in a container.
122 text.erase(0, 1);
123 text.append("->get()");
124 return Replacement(*result.SourceManager, range, text);
125 }
126 }
127
128 // The only remaining calls should be non-dereferencing calls (eg: member
129 // calls), so a simple ".get()" appending should suffice.
130 if (NeedsParens(expr)) {
131 text.insert(0, "(");
132 text.append(")");
133 }
134 text.append(".get()");
135 return Replacement(*result.SourceManager, range, text);
136 }
137
RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult & result,clang::SourceLocation begin,clang::SourceLocation end)138 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result,
139 clang::SourceLocation begin,
140 clang::SourceLocation end) {
141 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
142 result.SourceManager->getSpellingLoc(begin),
143 result.SourceManager->getSpellingLoc(end));
144 assert(range.isValid() && "Invalid range!");
145
146 std::string text = clang::Lexer::getSourceText(
147 range, *result.SourceManager, result.Context->getLangOpts());
148 text.erase(text.rfind('*'));
149
150 std::string replacement_text("scoped_refptr<");
151 replacement_text += text;
152 replacement_text += ">";
153
154 return Replacement(*result.SourceManager, range, replacement_text);
155 }
156
157 class GetRewriterCallback : public MatchFinder::MatchCallback {
158 public:
GetRewriterCallback(Replacements * replacements)159 explicit GetRewriterCallback(Replacements* replacements)
160 : replacements_(replacements) {}
161 virtual void run(const MatchFinder::MatchResult& result) override;
162
163 private:
164 Replacements* const replacements_;
165 };
166
run(const MatchFinder::MatchResult & result)167 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) {
168 const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg");
169 assert(arg && "Unexpected match! No Expr captured!");
170 auto err =
171 replacements_->add(RewriteImplicitToExplicitConversion(result, arg));
172 assert(!err);
173 }
174
175 class VarRewriterCallback : public MatchFinder::MatchCallback {
176 public:
VarRewriterCallback(Replacements * replacements)177 explicit VarRewriterCallback(Replacements* replacements)
178 : replacements_(replacements) {}
179 virtual void run(const MatchFinder::MatchResult& result) override;
180
181 private:
182 Replacements* const replacements_;
183 };
184
run(const MatchFinder::MatchResult & result)185 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) {
186 const clang::DeclaratorDecl* const var_decl =
187 result.Nodes.getNodeAs<clang::DeclaratorDecl>("var");
188 assert(var_decl && "Unexpected match! No VarDecl captured!");
189
190 const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo();
191
192 // TODO(dcheng): This mishandles a case where a variable has multiple
193 // declarations, e.g.:
194 //
195 // in .h:
196 // Foo* my_global_magical_foo;
197 //
198 // in .cc:
199 // Foo* my_global_magical_foo = CreateFoo();
200 //
201 // In this case, it will only rewrite the .cc definition. Oh well. This should
202 // be rare enough that these cases can be manually handled, since the style
203 // guide prohibits globals of non-POD type.
204 auto err = replacements_->add(RewriteRawPtrToScopedRefptr(
205 result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc()));
206 assert(!err);
207 }
208
209 class FunctionRewriterCallback : public MatchFinder::MatchCallback {
210 public:
FunctionRewriterCallback(Replacements * replacements)211 explicit FunctionRewriterCallback(Replacements* replacements)
212 : replacements_(replacements) {}
213 virtual void run(const MatchFinder::MatchResult& result) override;
214
215 private:
216 Replacements* const replacements_;
217 };
218
run(const MatchFinder::MatchResult & result)219 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) {
220 const clang::FunctionDecl* const function_decl =
221 result.Nodes.getNodeAs<clang::FunctionDecl>("fn");
222 assert(function_decl && "Unexpected match! No FunctionDecl captured!");
223
224 // If matched against an implicit conversion to a DeclRefExpr, make sure the
225 // referenced declaration is of class type, e.g. the tool skips trying to
226 // chase pointers/references to determine if the pointee is a scoped_refptr<T>
227 // with local storage. Instead, let a human manually handle those cases.
228 const clang::VarDecl* const var_decl =
229 result.Nodes.getNodeAs<clang::VarDecl>("var");
230 if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) {
231 return;
232 }
233
234 for (clang::FunctionDecl* f : function_decl->redecls()) {
235 clang::SourceRange range = f->getReturnTypeSourceRange();
236 auto err = replacements_->add(
237 RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd()));
238 assert(!err);
239 }
240 }
241
242 class MacroRewriterCallback : public MatchFinder::MatchCallback {
243 public:
MacroRewriterCallback(Replacements * replacements)244 explicit MacroRewriterCallback(Replacements* replacements)
245 : replacements_(replacements) {}
246 virtual void run(const MatchFinder::MatchResult& result) override;
247
248 private:
249 Replacements* const replacements_;
250 };
251
run(const MatchFinder::MatchResult & result)252 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) {
253 const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr");
254 assert(expr && "Unexpected match! No Expr captured!");
255 auto err =
256 replacements_->add(RewriteImplicitToExplicitConversion(result, expr));
257 assert(!err);
258 }
259
260 } // namespace
261
262 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage);
263
main(int argc,const char * argv[])264 int main(int argc, const char* argv[]) {
265 // TODO(dcheng): Clang tooling should do this itself.
266 // http://llvm.org/bugs/show_bug.cgi?id=21627
267 llvm::InitializeNativeTarget();
268 llvm::InitializeNativeTargetAsmParser();
269 llvm::cl::OptionCategory category("Remove scoped_refptr conversions");
270 CommonOptionsParser options(argc, argv, category);
271 clang::tooling::ClangTool tool(options.getCompilations(),
272 options.getSourcePathList());
273
274 MatchFinder match_finder;
275 Replacements replacements;
276
277 auto is_scoped_refptr = cxxRecordDecl(isSameOrDerivedFrom("::scoped_refptr"),
278 isTemplateInstantiation());
279
280 // Finds all calls to conversion operator member function. This catches calls
281 // to "operator T*", "operator Testable", and "operator bool" equally.
282 auto base_matcher =
283 cxxMemberCallExpr(thisPointerType(is_scoped_refptr),
284 callee(conversionDecl()), on(id("arg", expr())));
285
286 // The heuristic for whether or not converting a temporary is 'unsafe'. An
287 // unsafe conversion is one where a temporary scoped_refptr<T> is converted to
288 // another type. The matcher provides an exception for a temporary
289 // scoped_refptr that is the result of an operator call. In this case, assume
290 // that it's the result of an iterator dereference, and the container itself
291 // retains the necessary reference, since this is a common idiom to see in
292 // loop bodies.
293 auto is_unsafe_temporary_conversion =
294 on(cxxBindTemporaryExpr(unless(has(cxxOperatorCallExpr()))));
295
296 // Returning a scoped_refptr<T> as a T* is considered unsafe if either are
297 // true:
298 // - The scoped_refptr<T> is a temporary.
299 // - The scoped_refptr<T> has local lifetime.
300 auto returned_as_raw_ptr = hasParent(
301 returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType()))))));
302 // This matcher intentionally matches more than it should. For example, this
303 // will match:
304 // scoped_refptr<Foo>& foo = some_other_foo;
305 // return foo;
306 // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>,
307 // so those cases can be manually handled.
308 auto is_local_variable =
309 on(declRefExpr(to(id("var", varDecl(hasLocalStorage())))));
310 auto is_unsafe_return =
311 anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)),
312 is_local_variable),
313 allOf(hasParent(implicitCastExpr(
314 hasParent(exprWithCleanups(returned_as_raw_ptr)))),
315 is_unsafe_temporary_conversion));
316
317 // This catches both user-defined conversions (eg: "operator bool") and
318 // standard conversion sequence (C++03 13.3.3.1.1), such as converting a
319 // pointer to a bool.
320 auto implicit_to_bool =
321 implicitCastExpr(hasImplicitDestinationType(isBoolean()));
322
323 // Avoid converting calls to of "operator Testable" -> "bool" and calls of
324 // "operator T*" -> "bool".
325 auto bool_conversion_matcher = hasParent(
326 expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool)))));
327
328 auto is_logging_helper =
329 functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl")));
330 auto is_gtest_helper = functionDecl(
331 anyOf(cxxMethodDecl(ofClass(cxxRecordDecl(isSameOrDerivedFrom(
332 hasName("::testing::internal::EqHelper")))),
333 hasName("Compare")),
334 hasName("::testing::internal::CmpHelperNE")));
335 auto is_gtest_assertion_result_ctor =
336 cxxConstructorDecl(ofClass(cxxRecordDecl(
337 isSameOrDerivedFrom(hasName("::testing::AssertionResult")))));
338
339 // Find all calls to an operator overload that are 'safe'.
340 //
341 // All bool conversions will be handled with the Testable trick, but that
342 // can only be used once "operator T*" is removed, since otherwise it leaves
343 // the call ambiguous.
344 GetRewriterCallback get_callback(&replacements);
345 match_finder.addMatcher(
346 cxxMemberCallExpr(
347 base_matcher,
348 // Excluded since the conversion may be unsafe.
349 unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)),
350 // Excluded since the conversion occurs inside a helper function that
351 // the macro wraps. Letting this callback handle the rewrite would
352 // result in an incorrect replacement that changes the helper function
353 // itself. Instead, the right replacement is to rewrite the macro's
354 // arguments.
355 unless(hasAncestor(decl(anyOf(is_logging_helper, is_gtest_helper,
356 is_gtest_assertion_result_ctor))))),
357 &get_callback);
358
359 // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*.
360 VarRewriterCallback var_callback(&replacements);
361 auto initialized_with_temporary = has(ignoringImpCasts(
362 cxxMemberCallExpr(base_matcher, is_unsafe_temporary_conversion)));
363 match_finder.addMatcher(
364 id("var", varDecl(hasInitializer(initialized_with_temporary),
365 hasType(pointerType()))),
366 &var_callback);
367 match_finder.addMatcher(
368 cxxConstructorDecl(forEachConstructorInitializer(
369 allOf(withInitializer(initialized_with_temporary),
370 forField(id("var", fieldDecl(hasType(pointerType()))))))),
371 &var_callback);
372
373 // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when
374 // returning a value.
375 FunctionRewriterCallback fn_callback(&replacements);
376 match_finder.addMatcher(cxxMemberCallExpr(base_matcher, is_unsafe_return),
377 &fn_callback);
378
379 // Rewrite logging / gtest expressions that result in an implicit conversion.
380 // Luckily, the matchers don't need to handle the case where one of the macro
381 // arguments is NULL, such as:
382 // CHECK_EQ(my_scoped_refptr, NULL)
383 // because it simply doesn't compile--since NULL is actually of integral type,
384 // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is
385 // no comparison overload for scoped_refptr<T> and int, this fails to compile.
386 MacroRewriterCallback macro_callback(&replacements);
387 // CHECK_EQ/CHECK_NE helpers.
388 match_finder.addMatcher(
389 callExpr(callee(is_logging_helper), argumentCountIs(3),
390 hasAnyArgument(ignoringParenImpCasts(
391 id("expr", expr(hasType(is_scoped_refptr))))),
392 hasAnyArgument(ignoringParenImpCasts(hasType(pointerType()))),
393 hasArgument(2, stringLiteral())),
394 ¯o_callback);
395 // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying
396 // helper functions. Even though gtest has special handling for pointer to
397 // NULL comparisons, it doesn't trigger in this case, so no special handling
398 // is needed for the replacements.
399 match_finder.addMatcher(
400 callExpr(callee(is_gtest_helper),
401 argumentCountIs(4),
402 hasArgument(0, stringLiteral()),
403 hasArgument(1, stringLiteral()),
404 hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
405 hasAnyArgument(hasType(pointerType()))),
406 ¯o_callback);
407 // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to
408 // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before
409 // being passed as an argument to AssertionResult's constructor. As a result,
410 // GetRewriterCallback handles this case properly since the conversion isn't
411 // hidden inside AssertionResult, and the generated replacement properly
412 // rewrites the macro argument.
413 // However, the tool does need to handle the _TRUE counterparts, since the
414 // conversion occurs inside the constructor in those cases.
415 match_finder.addMatcher(
416 cxxConstructExpr(
417 argumentCountIs(2),
418 hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))),
419 hasDeclaration(is_gtest_assertion_result_ctor)),
420 ¯o_callback);
421
422 std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
423 clang::tooling::newFrontendActionFactory(&match_finder);
424 int result = tool.run(factory.get());
425 if (result != 0)
426 return result;
427
428 // Serialization format is documented in tools/clang/scripts/run_tool.py
429 llvm::outs() << "==== BEGIN EDITS ====\n";
430 for (const auto& r : replacements) {
431 std::string replacement_text = r.getReplacementText().str();
432 std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0');
433 llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::"
434 << r.getLength() << ":::" << replacement_text << "\n";
435 }
436 llvm::outs() << "==== END EDITS ====\n";
437
438 return 0;
439 }
440