1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Tooling/Transformer/RangeSelector.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/SourceLocation.h"
13 #include "clang/Lex/Lexer.h"
14 #include "clang/Tooling/Transformer/SourceCode.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Support/Errc.h"
17 #include "llvm/Support/Error.h"
18 #include <string>
19 #include <utility>
20 #include <vector>
21
22 using namespace clang;
23 using namespace transformer;
24
25 using ast_matchers::MatchFinder;
26 using llvm::Error;
27 using llvm::StringError;
28
29 using MatchResult = MatchFinder::MatchResult;
30
invalidArgumentError(Twine Message)31 static Error invalidArgumentError(Twine Message) {
32 return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
33 }
34
typeError(StringRef ID,const ASTNodeKind & Kind)35 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
36 return invalidArgumentError("mismatched type (node id=" + ID +
37 " kind=" + Kind.asStringRef() + ")");
38 }
39
typeError(StringRef ID,const ASTNodeKind & Kind,Twine ExpectedType)40 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
41 Twine ExpectedType) {
42 return invalidArgumentError("mismatched type: expected one of " +
43 ExpectedType + " (node id=" + ID +
44 " kind=" + Kind.asStringRef() + ")");
45 }
46
missingPropertyError(StringRef ID,Twine Description,StringRef Property)47 static Error missingPropertyError(StringRef ID, Twine Description,
48 StringRef Property) {
49 return invalidArgumentError(Description + " requires property '" + Property +
50 "' (node id=" + ID + ")");
51 }
52
getNode(const ast_matchers::BoundNodes & Nodes,StringRef ID)53 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
54 StringRef ID) {
55 auto &NodesMap = Nodes.getMap();
56 auto It = NodesMap.find(ID);
57 if (It == NodesMap.end())
58 return invalidArgumentError("ID not bound: " + ID);
59 return It->second;
60 }
61
62 // FIXME: handling of macros should be configurable.
findPreviousTokenStart(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)63 static SourceLocation findPreviousTokenStart(SourceLocation Start,
64 const SourceManager &SM,
65 const LangOptions &LangOpts) {
66 if (Start.isInvalid() || Start.isMacroID())
67 return SourceLocation();
68
69 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
70 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
71 return SourceLocation();
72
73 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
74 }
75
76 // Finds the start location of the previous token of kind \p TK.
77 // FIXME: handling of macros should be configurable.
findPreviousTokenKind(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts,tok::TokenKind TK)78 static SourceLocation findPreviousTokenKind(SourceLocation Start,
79 const SourceManager &SM,
80 const LangOptions &LangOpts,
81 tok::TokenKind TK) {
82 while (true) {
83 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
84 if (L.isInvalid() || L.isMacroID())
85 return SourceLocation();
86
87 Token T;
88 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
89 return SourceLocation();
90
91 if (T.is(TK))
92 return T.getLocation();
93
94 Start = L;
95 }
96 }
97
findOpenParen(const CallExpr & E,const SourceManager & SM,const LangOptions & LangOpts)98 static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
99 const LangOptions &LangOpts) {
100 SourceLocation EndLoc =
101 E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
102 return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
103 }
104
before(RangeSelector Selector)105 RangeSelector transformer::before(RangeSelector Selector) {
106 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
107 Expected<CharSourceRange> SelectedRange = Selector(Result);
108 if (!SelectedRange)
109 return SelectedRange.takeError();
110 return CharSourceRange::getCharRange(SelectedRange->getBegin());
111 };
112 }
113
after(RangeSelector Selector)114 RangeSelector transformer::after(RangeSelector Selector) {
115 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
116 Expected<CharSourceRange> SelectedRange = Selector(Result);
117 if (!SelectedRange)
118 return SelectedRange.takeError();
119 SourceLocation End = SelectedRange->getEnd();
120 if (SelectedRange->isTokenRange()) {
121 // We need to find the actual (exclusive) end location from which to
122 // create a new source range. However, that's not guaranteed to be valid,
123 // even if the token location itself is valid. So, we create a token range
124 // consisting only of the last token, then map that range back to the
125 // source file. If that succeeds, we have a valid location for the end of
126 // the generated range.
127 CharSourceRange Range = Lexer::makeFileCharRange(
128 CharSourceRange::getTokenRange(SelectedRange->getEnd()),
129 *Result.SourceManager, Result.Context->getLangOpts());
130 if (Range.isInvalid())
131 return invalidArgumentError(
132 "after: can't resolve sub-range to valid source range");
133 End = Range.getEnd();
134 }
135
136 return CharSourceRange::getCharRange(End);
137 };
138 }
139
node(std::string ID)140 RangeSelector transformer::node(std::string ID) {
141 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
142 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
143 if (!Node)
144 return Node.takeError();
145 return (Node->get<Decl>() != nullptr ||
146 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
147 ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
148 *Result.Context)
149 : CharSourceRange::getTokenRange(Node->getSourceRange());
150 };
151 }
152
statement(std::string ID)153 RangeSelector transformer::statement(std::string ID) {
154 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
155 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
156 if (!Node)
157 return Node.takeError();
158 return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
159 *Result.Context);
160 };
161 }
162
enclose(RangeSelector Begin,RangeSelector End)163 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
164 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
165 Expected<CharSourceRange> BeginRange = Begin(Result);
166 if (!BeginRange)
167 return BeginRange.takeError();
168 Expected<CharSourceRange> EndRange = End(Result);
169 if (!EndRange)
170 return EndRange.takeError();
171 SourceLocation B = BeginRange->getBegin();
172 SourceLocation E = EndRange->getEnd();
173 // Note: we are precluding the possibility of sub-token ranges in the case
174 // that EndRange is a token range.
175 if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
176 return invalidArgumentError("Bad range: out of order");
177 }
178 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
179 };
180 }
181
encloseNodes(std::string BeginID,std::string EndID)182 RangeSelector transformer::encloseNodes(std::string BeginID,
183 std::string EndID) {
184 return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
185 }
186
member(std::string ID)187 RangeSelector transformer::member(std::string ID) {
188 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
189 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
190 if (!Node)
191 return Node.takeError();
192 if (auto *M = Node->get<clang::MemberExpr>())
193 return CharSourceRange::getTokenRange(
194 M->getMemberNameInfo().getSourceRange());
195 return typeError(ID, Node->getNodeKind(), "MemberExpr");
196 };
197 }
198
name(std::string ID)199 RangeSelector transformer::name(std::string ID) {
200 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
201 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
202 if (!N)
203 return N.takeError();
204 auto &Node = *N;
205 if (const auto *D = Node.get<NamedDecl>()) {
206 if (!D->getDeclName().isIdentifier())
207 return missingPropertyError(ID, "name", "identifier");
208 SourceLocation L = D->getLocation();
209 auto R = CharSourceRange::getTokenRange(L, L);
210 // Verify that the range covers exactly the name.
211 // FIXME: extend this code to support cases like `operator +` or
212 // `foo<int>` for which this range will be too short. Doing so will
213 // require subcasing `NamedDecl`, because it doesn't provide virtual
214 // access to the \c DeclarationNameInfo.
215 if (tooling::getText(R, *Result.Context) != D->getName())
216 return CharSourceRange();
217 return R;
218 }
219 if (const auto *E = Node.get<DeclRefExpr>()) {
220 if (!E->getNameInfo().getName().isIdentifier())
221 return missingPropertyError(ID, "name", "identifier");
222 SourceLocation L = E->getLocation();
223 return CharSourceRange::getTokenRange(L, L);
224 }
225 if (const auto *I = Node.get<CXXCtorInitializer>()) {
226 if (!I->isMemberInitializer() && I->isWritten())
227 return missingPropertyError(ID, "name", "explicit member initializer");
228 SourceLocation L = I->getMemberLocation();
229 return CharSourceRange::getTokenRange(L, L);
230 }
231 return typeError(ID, Node.getNodeKind(),
232 "DeclRefExpr, NamedDecl, CXXCtorInitializer");
233 };
234 }
235
236 namespace {
237 // FIXME: make this available in the public API for users to easily create their
238 // own selectors.
239
240 // Creates a selector from a range-selection function \p Func, which selects a
241 // range that is relative to a bound node id. \c T is the node type expected by
242 // \p Func.
243 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
244 class RelativeSelector {
245 std::string ID;
246
247 public:
RelativeSelector(std::string ID)248 RelativeSelector(std::string ID) : ID(std::move(ID)) {}
249
operator ()(const MatchResult & Result)250 Expected<CharSourceRange> operator()(const MatchResult &Result) {
251 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
252 if (!N)
253 return N.takeError();
254 if (const auto *Arg = N->get<T>())
255 return Func(Result, *Arg);
256 return typeError(ID, N->getNodeKind());
257 }
258 };
259 } // namespace
260
261 // FIXME: Change the following functions from being in an anonymous namespace
262 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
263 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
264 // namespace works around a bug in earlier versions.
265 namespace {
266 // Returns the range of the statements (all source between the braces).
getStatementsRange(const MatchResult &,const CompoundStmt & CS)267 CharSourceRange getStatementsRange(const MatchResult &,
268 const CompoundStmt &CS) {
269 return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
270 CS.getRBracLoc());
271 }
272 } // namespace
273
statements(std::string ID)274 RangeSelector transformer::statements(std::string ID) {
275 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
276 }
277
278 namespace {
279 // Returns the range of the source between the call's parentheses.
getCallArgumentsRange(const MatchResult & Result,const CallExpr & CE)280 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
281 const CallExpr &CE) {
282 return CharSourceRange::getCharRange(
283 findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
284 .getLocWithOffset(1),
285 CE.getRParenLoc());
286 }
287 } // namespace
288
callArgs(std::string ID)289 RangeSelector transformer::callArgs(std::string ID) {
290 return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
291 }
292
293 namespace {
294 // Returns the range of the elements of the initializer list. Includes all
295 // source between the braces.
getElementsRange(const MatchResult &,const InitListExpr & E)296 CharSourceRange getElementsRange(const MatchResult &,
297 const InitListExpr &E) {
298 return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
299 E.getRBraceLoc());
300 }
301 } // namespace
302
initListElements(std::string ID)303 RangeSelector transformer::initListElements(std::string ID) {
304 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
305 }
306
307 namespace {
308 // Returns the range of the else branch, including the `else` keyword.
getElseRange(const MatchResult & Result,const IfStmt & S)309 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
310 return tooling::maybeExtendRange(
311 CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
312 tok::TokenKind::semi, *Result.Context);
313 }
314 } // namespace
315
elseBranch(std::string ID)316 RangeSelector transformer::elseBranch(std::string ID) {
317 return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
318 }
319
expansion(RangeSelector S)320 RangeSelector transformer::expansion(RangeSelector S) {
321 return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
322 Expected<CharSourceRange> SRange = S(Result);
323 if (!SRange)
324 return SRange.takeError();
325 return Result.SourceManager->getExpansionRange(*SRange);
326 };
327 }
328