1 //===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SuspiciousMissingCommaCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12
13 using namespace clang::ast_matchers;
14
15 namespace clang {
16 namespace tidy {
17 namespace bugprone {
18
19 namespace {
20
isConcatenatedLiteralsOnPurpose(ASTContext * Ctx,const StringLiteral * Lit)21 bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
22 const StringLiteral *Lit) {
23 // String literals surrounded by parentheses are assumed to be on purpose.
24 // i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] };
25
26 TraversalKindScope RAII(*Ctx, ast_type_traits::TK_AsIs);
27 auto Parents = Ctx->getParents(*Lit);
28 if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
29 return true;
30
31 // Appropriately indented string literals are assumed to be on purpose.
32 // The following frequent indentation is accepted:
33 // const char* Array[] = {
34 // "first literal"
35 // "indented literal"
36 // "indented literal",
37 // "second literal",
38 // [...]
39 // };
40 const SourceManager &SM = Ctx->getSourceManager();
41 bool IndentedCorrectly = true;
42 SourceLocation FirstToken = Lit->getStrTokenLoc(0);
43 FileID BaseFID = SM.getFileID(FirstToken);
44 unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken);
45 unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken);
46 for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) {
47 SourceLocation Token = Lit->getStrTokenLoc(TokNum);
48 FileID FID = SM.getFileID(Token);
49 unsigned int Indent = SM.getSpellingColumnNumber(Token);
50 unsigned int Line = SM.getSpellingLineNumber(Token);
51 if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
52 IndentedCorrectly = false;
53 break;
54 }
55 }
56 if (IndentedCorrectly)
57 return true;
58
59 // There is no pattern recognized by the checker, assume it's not on purpose.
60 return false;
61 }
62
AST_MATCHER_P(StringLiteral,isConcatenatedLiteral,unsigned,MaxConcatenatedTokens)63 AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
64 MaxConcatenatedTokens) {
65 return Node.getNumConcatenated() > 1 &&
66 Node.getNumConcatenated() < MaxConcatenatedTokens &&
67 !isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node);
68 }
69
70 } // namespace
71
SuspiciousMissingCommaCheck(StringRef Name,ClangTidyContext * Context)72 SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck(
73 StringRef Name, ClangTidyContext *Context)
74 : ClangTidyCheck(Name, Context),
75 SizeThreshold(Options.get("SizeThreshold", 5U)),
76 RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))),
77 MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {}
78
storeOptions(ClangTidyOptions::OptionMap & Opts)79 void SuspiciousMissingCommaCheck::storeOptions(
80 ClangTidyOptions::OptionMap &Opts) {
81 Options.store(Opts, "SizeThreshold", SizeThreshold);
82 Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold));
83 Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens);
84 }
85
registerMatchers(MatchFinder * Finder)86 void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
87 const auto ConcatenatedStringLiteral =
88 stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str");
89
90 const auto StringsInitializerList =
91 initListExpr(hasType(constantArrayType()),
92 has(ignoringParenImpCasts(expr(ConcatenatedStringLiteral))));
93
94 Finder->addMatcher(StringsInitializerList.bind("list"), this);
95 }
96
check(const MatchFinder::MatchResult & Result)97 void SuspiciousMissingCommaCheck::check(
98 const MatchFinder::MatchResult &Result) {
99 const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
100 const auto *ConcatenatedLiteral =
101 Result.Nodes.getNodeAs<StringLiteral>("str");
102 assert(InitializerList && ConcatenatedLiteral);
103
104 // Skip small arrays as they often generate false-positive.
105 unsigned int Size = InitializerList->getNumInits();
106 if (Size < SizeThreshold)
107 return;
108
109 // Count the number of occurrence of concatenated string literal.
110 unsigned int Count = 0;
111 for (unsigned int i = 0; i < Size; ++i) {
112 const Expr *Child = InitializerList->getInit(i)->IgnoreImpCasts();
113 if (const auto *Literal = dyn_cast<StringLiteral>(Child)) {
114 if (Literal->getNumConcatenated() > 1)
115 ++Count;
116 }
117 }
118
119 // Warn only when concatenation is not common in this initializer list.
120 // The current threshold is set to less than 1/5 of the string literals.
121 if (double(Count) / Size > RatioThreshold)
122 return;
123
124 diag(ConcatenatedLiteral->getBeginLoc(),
125 "suspicious string literal, probably missing a comma");
126 }
127
128 } // namespace bugprone
129 } // namespace tidy
130 } // namespace clang
131