1 //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Recursive parser implementation for the matcher expression grammar.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "clang/ASTMatchers/Dynamic/Parser.h"
16 #include "clang/ASTMatchers/Dynamic/Registry.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/Twine.h"
20 #include <string>
21 #include <vector>
22
23 namespace clang {
24 namespace ast_matchers {
25 namespace dynamic {
26
27 /// \brief Simple structure to hold information for one token from the parser.
28 struct Parser::TokenInfo {
29 /// \brief Different possible tokens.
30 enum TokenKind {
31 TK_Eof,
32 TK_OpenParen,
33 TK_CloseParen,
34 TK_Comma,
35 TK_Period,
36 TK_Literal,
37 TK_Ident,
38 TK_InvalidChar,
39 TK_Error,
40 TK_CodeCompletion
41 };
42
43 /// \brief Some known identifiers.
44 static const char* const ID_Bind;
45
TokenInfoclang::ast_matchers::dynamic::Parser::TokenInfo46 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
47
48 StringRef Text;
49 TokenKind Kind;
50 SourceRange Range;
51 VariantValue Value;
52 };
53
54 const char* const Parser::TokenInfo::ID_Bind = "bind";
55
56 /// \brief Simple tokenizer for the parser.
57 class Parser::CodeTokenizer {
58 public:
CodeTokenizer(StringRef MatcherCode,Diagnostics * Error)59 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
60 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
61 CodeCompletionLocation(nullptr) {
62 NextToken = getNextToken();
63 }
64
CodeTokenizer(StringRef MatcherCode,Diagnostics * Error,unsigned CodeCompletionOffset)65 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
66 unsigned CodeCompletionOffset)
67 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
68 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
69 NextToken = getNextToken();
70 }
71
72 /// \brief Returns but doesn't consume the next token.
peekNextToken() const73 const TokenInfo &peekNextToken() const { return NextToken; }
74
75 /// \brief Consumes and returns the next token.
consumeNextToken()76 TokenInfo consumeNextToken() {
77 TokenInfo ThisToken = NextToken;
78 NextToken = getNextToken();
79 return ThisToken;
80 }
81
nextTokenKind() const82 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
83
84 private:
getNextToken()85 TokenInfo getNextToken() {
86 consumeWhitespace();
87 TokenInfo Result;
88 Result.Range.Start = currentLocation();
89
90 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
91 Result.Kind = TokenInfo::TK_CodeCompletion;
92 Result.Text = StringRef(CodeCompletionLocation, 0);
93 CodeCompletionLocation = nullptr;
94 return Result;
95 }
96
97 if (Code.empty()) {
98 Result.Kind = TokenInfo::TK_Eof;
99 Result.Text = "";
100 return Result;
101 }
102
103 switch (Code[0]) {
104 case ',':
105 Result.Kind = TokenInfo::TK_Comma;
106 Result.Text = Code.substr(0, 1);
107 Code = Code.drop_front();
108 break;
109 case '.':
110 Result.Kind = TokenInfo::TK_Period;
111 Result.Text = Code.substr(0, 1);
112 Code = Code.drop_front();
113 break;
114 case '(':
115 Result.Kind = TokenInfo::TK_OpenParen;
116 Result.Text = Code.substr(0, 1);
117 Code = Code.drop_front();
118 break;
119 case ')':
120 Result.Kind = TokenInfo::TK_CloseParen;
121 Result.Text = Code.substr(0, 1);
122 Code = Code.drop_front();
123 break;
124
125 case '"':
126 case '\'':
127 // Parse a string literal.
128 consumeStringLiteral(&Result);
129 break;
130
131 case '0': case '1': case '2': case '3': case '4':
132 case '5': case '6': case '7': case '8': case '9':
133 // Parse an unsigned literal.
134 consumeUnsignedLiteral(&Result);
135 break;
136
137 default:
138 if (isAlphanumeric(Code[0])) {
139 // Parse an identifier
140 size_t TokenLength = 1;
141 while (1) {
142 // A code completion location in/immediately after an identifier will
143 // cause the portion of the identifier before the code completion
144 // location to become a code completion token.
145 if (CodeCompletionLocation == Code.data() + TokenLength) {
146 CodeCompletionLocation = nullptr;
147 Result.Kind = TokenInfo::TK_CodeCompletion;
148 Result.Text = Code.substr(0, TokenLength);
149 Code = Code.drop_front(TokenLength);
150 return Result;
151 }
152 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
153 break;
154 ++TokenLength;
155 }
156 Result.Kind = TokenInfo::TK_Ident;
157 Result.Text = Code.substr(0, TokenLength);
158 Code = Code.drop_front(TokenLength);
159 } else {
160 Result.Kind = TokenInfo::TK_InvalidChar;
161 Result.Text = Code.substr(0, 1);
162 Code = Code.drop_front(1);
163 }
164 break;
165 }
166
167 Result.Range.End = currentLocation();
168 return Result;
169 }
170
171 /// \brief Consume an unsigned literal.
consumeUnsignedLiteral(TokenInfo * Result)172 void consumeUnsignedLiteral(TokenInfo *Result) {
173 unsigned Length = 1;
174 if (Code.size() > 1) {
175 // Consume the 'x' or 'b' radix modifier, if present.
176 switch (toLowercase(Code[1])) {
177 case 'x': case 'b': Length = 2;
178 }
179 }
180 while (Length < Code.size() && isHexDigit(Code[Length]))
181 ++Length;
182
183 Result->Text = Code.substr(0, Length);
184 Code = Code.drop_front(Length);
185
186 unsigned Value;
187 if (!Result->Text.getAsInteger(0, Value)) {
188 Result->Kind = TokenInfo::TK_Literal;
189 Result->Value = Value;
190 } else {
191 SourceRange Range;
192 Range.Start = Result->Range.Start;
193 Range.End = currentLocation();
194 Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
195 Result->Kind = TokenInfo::TK_Error;
196 }
197 }
198
199 /// \brief Consume a string literal.
200 ///
201 /// \c Code must be positioned at the start of the literal (the opening
202 /// quote). Consumed until it finds the same closing quote character.
consumeStringLiteral(TokenInfo * Result)203 void consumeStringLiteral(TokenInfo *Result) {
204 bool InEscape = false;
205 const char Marker = Code[0];
206 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
207 if (InEscape) {
208 InEscape = false;
209 continue;
210 }
211 if (Code[Length] == '\\') {
212 InEscape = true;
213 continue;
214 }
215 if (Code[Length] == Marker) {
216 Result->Kind = TokenInfo::TK_Literal;
217 Result->Text = Code.substr(0, Length + 1);
218 Result->Value = Code.substr(1, Length - 1).str();
219 Code = Code.drop_front(Length + 1);
220 return;
221 }
222 }
223
224 StringRef ErrorText = Code;
225 Code = Code.drop_front(Code.size());
226 SourceRange Range;
227 Range.Start = Result->Range.Start;
228 Range.End = currentLocation();
229 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
230 Result->Kind = TokenInfo::TK_Error;
231 }
232
233 /// \brief Consume all leading whitespace from \c Code.
consumeWhitespace()234 void consumeWhitespace() {
235 while (!Code.empty() && isWhitespace(Code[0])) {
236 if (Code[0] == '\n') {
237 ++Line;
238 StartOfLine = Code.drop_front();
239 }
240 Code = Code.drop_front();
241 }
242 }
243
currentLocation()244 SourceLocation currentLocation() {
245 SourceLocation Location;
246 Location.Line = Line;
247 Location.Column = Code.data() - StartOfLine.data() + 1;
248 return Location;
249 }
250
251 StringRef Code;
252 StringRef StartOfLine;
253 unsigned Line;
254 Diagnostics *Error;
255 TokenInfo NextToken;
256 const char *CodeCompletionLocation;
257 };
258
~Sema()259 Parser::Sema::~Sema() {}
260
getNamedValue(StringRef Name)261 VariantValue Parser::Sema::getNamedValue(StringRef Name) {
262 return VariantValue();
263 }
264
265 struct Parser::ScopedContextEntry {
266 Parser *P;
267
ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry268 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
269 P->ContextStack.push_back(std::make_pair(C, 0u));
270 }
271
~ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry272 ~ScopedContextEntry() {
273 P->ContextStack.pop_back();
274 }
275
nextArgclang::ast_matchers::dynamic::Parser::ScopedContextEntry276 void nextArg() {
277 ++P->ContextStack.back().second;
278 }
279 };
280
281 /// \brief Parse expressions that start with an identifier.
282 ///
283 /// This function can parse named values and matchers.
284 /// In case of failure it will try to determine the user's intent to give
285 /// an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * Value)286 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
287 const TokenInfo NameToken = Tokenizer->consumeNextToken();
288
289 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
290 // Parse as a named value.
291 if (const VariantValue NamedValue = S->getNamedValue(NameToken.Text)) {
292 *Value = NamedValue;
293 return true;
294 }
295 // If the syntax is correct and the name is not a matcher either, report
296 // unknown named value.
297 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
298 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
299 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
300 !S->lookupMatcherCtor(NameToken.Text)) {
301 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
302 << NameToken.Text;
303 return false;
304 }
305 // Otherwise, fallback to the matcher parser.
306 }
307
308 // Parse as a matcher expression.
309 return parseMatcherExpressionImpl(NameToken, Value);
310 }
311
312 /// \brief Parse and validate a matcher expression.
313 /// \return \c true on success, in which case \c Value has the matcher parsed.
314 /// If the input is malformed, or some argument has an error, it
315 /// returns \c false.
parseMatcherExpressionImpl(const TokenInfo & NameToken,VariantValue * Value)316 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
317 VariantValue *Value) {
318 assert(NameToken.Kind == TokenInfo::TK_Ident);
319 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
320 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
321 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
322 << OpenToken.Text;
323 return false;
324 }
325
326 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
327
328 if (!Ctor) {
329 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
330 << NameToken.Text;
331 // Do not return here. We need to continue to give completion suggestions.
332 }
333
334 std::vector<ParserValue> Args;
335 TokenInfo EndToken;
336
337 {
338 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
339
340 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
341 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
342 // End of args.
343 EndToken = Tokenizer->consumeNextToken();
344 break;
345 }
346 if (Args.size() > 0) {
347 // We must find a , token to continue.
348 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
349 if (CommaToken.Kind != TokenInfo::TK_Comma) {
350 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
351 << CommaToken.Text;
352 return false;
353 }
354 }
355
356 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
357 NameToken.Text, NameToken.Range,
358 Args.size() + 1);
359 ParserValue ArgValue;
360 ArgValue.Text = Tokenizer->peekNextToken().Text;
361 ArgValue.Range = Tokenizer->peekNextToken().Range;
362 if (!parseExpressionImpl(&ArgValue.Value)) {
363 return false;
364 }
365
366 Args.push_back(ArgValue);
367 SCE.nextArg();
368 }
369 }
370
371 if (EndToken.Kind == TokenInfo::TK_Eof) {
372 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
373 return false;
374 }
375
376 std::string BindID;
377 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
378 // Parse .bind("foo")
379 Tokenizer->consumeNextToken(); // consume the period.
380 const TokenInfo BindToken = Tokenizer->consumeNextToken();
381 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
382 addCompletion(BindToken, "bind(\"", "bind");
383 return false;
384 }
385
386 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
387 const TokenInfo IDToken = Tokenizer->consumeNextToken();
388 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
389
390 // TODO: We could use different error codes for each/some to be more
391 // explicit about the syntax error.
392 if (BindToken.Kind != TokenInfo::TK_Ident ||
393 BindToken.Text != TokenInfo::ID_Bind) {
394 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
395 return false;
396 }
397 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
398 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
399 return false;
400 }
401 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
402 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
403 return false;
404 }
405 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
406 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
407 return false;
408 }
409 BindID = IDToken.Value.getString();
410 }
411
412 if (!Ctor)
413 return false;
414
415 // Merge the start and end infos.
416 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
417 NameToken.Text, NameToken.Range);
418 SourceRange MatcherRange = NameToken.Range;
419 MatcherRange.End = EndToken.Range.End;
420 VariantMatcher Result = S->actOnMatcherExpression(
421 *Ctor, MatcherRange, BindID, Args, Error);
422 if (Result.isNull()) return false;
423
424 *Value = Result;
425 return true;
426 }
427
428 // If the prefix of this completion matches the completion token, add it to
429 // Completions minus the prefix.
addCompletion(const TokenInfo & CompToken,StringRef TypedText,StringRef Decl)430 void Parser::addCompletion(const TokenInfo &CompToken, StringRef TypedText,
431 StringRef Decl) {
432 if (TypedText.size() >= CompToken.Text.size() &&
433 TypedText.substr(0, CompToken.Text.size()) == CompToken.Text) {
434 Completions.push_back(
435 MatcherCompletion(TypedText.substr(CompToken.Text.size()), Decl));
436 }
437 }
438
addExpressionCompletions()439 void Parser::addExpressionCompletions() {
440 const TokenInfo CompToken = Tokenizer->consumeNextToken();
441 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
442
443 // We cannot complete code if there is an invalid element on the context
444 // stack.
445 for (ContextStackTy::iterator I = ContextStack.begin(),
446 E = ContextStack.end();
447 I != E; ++I) {
448 if (!I->first)
449 return;
450 }
451
452 std::vector<MatcherCompletion> RegCompletions =
453 Registry::getCompletions(ContextStack);
454 for (std::vector<MatcherCompletion>::iterator I = RegCompletions.begin(),
455 E = RegCompletions.end();
456 I != E; ++I) {
457 addCompletion(CompToken, I->TypedText, I->MatcherDecl);
458 }
459 }
460
461 /// \brief Parse an <Expresssion>
parseExpressionImpl(VariantValue * Value)462 bool Parser::parseExpressionImpl(VariantValue *Value) {
463 switch (Tokenizer->nextTokenKind()) {
464 case TokenInfo::TK_Literal:
465 *Value = Tokenizer->consumeNextToken().Value;
466 return true;
467
468 case TokenInfo::TK_Ident:
469 return parseIdentifierPrefixImpl(Value);
470
471 case TokenInfo::TK_CodeCompletion:
472 addExpressionCompletions();
473 return false;
474
475 case TokenInfo::TK_Eof:
476 Error->addError(Tokenizer->consumeNextToken().Range,
477 Error->ET_ParserNoCode);
478 return false;
479
480 case TokenInfo::TK_Error:
481 // This error was already reported by the tokenizer.
482 return false;
483
484 case TokenInfo::TK_OpenParen:
485 case TokenInfo::TK_CloseParen:
486 case TokenInfo::TK_Comma:
487 case TokenInfo::TK_Period:
488 case TokenInfo::TK_InvalidChar:
489 const TokenInfo Token = Tokenizer->consumeNextToken();
490 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
491 return false;
492 }
493
494 llvm_unreachable("Unknown token kind.");
495 }
496
Parser(CodeTokenizer * Tokenizer,Sema * S,Diagnostics * Error)497 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
498 Diagnostics *Error)
499 : Tokenizer(Tokenizer), S(S), Error(Error) {}
500
~RegistrySema()501 Parser::RegistrySema::~RegistrySema() {}
502
503 llvm::Optional<MatcherCtor>
lookupMatcherCtor(StringRef MatcherName)504 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
505 return Registry::lookupMatcherCtor(MatcherName);
506 }
507
actOnMatcherExpression(MatcherCtor Ctor,const SourceRange & NameRange,StringRef BindID,ArrayRef<ParserValue> Args,Diagnostics * Error)508 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
509 MatcherCtor Ctor, const SourceRange &NameRange, StringRef BindID,
510 ArrayRef<ParserValue> Args, Diagnostics *Error) {
511 if (BindID.empty()) {
512 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
513 } else {
514 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
515 Error);
516 }
517 }
518
parseExpression(StringRef Code,VariantValue * Value,Diagnostics * Error)519 bool Parser::parseExpression(StringRef Code, VariantValue *Value,
520 Diagnostics *Error) {
521 RegistrySema S;
522 return parseExpression(Code, &S, Value, Error);
523 }
524
parseExpression(StringRef Code,Sema * S,VariantValue * Value,Diagnostics * Error)525 bool Parser::parseExpression(StringRef Code, Sema *S,
526 VariantValue *Value, Diagnostics *Error) {
527 CodeTokenizer Tokenizer(Code, Error);
528 if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
529 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
530 Error->addError(Tokenizer.peekNextToken().Range,
531 Error->ET_ParserTrailingCode);
532 return false;
533 }
534 return true;
535 }
536
537 std::vector<MatcherCompletion>
completeExpression(StringRef Code,unsigned CompletionOffset)538 Parser::completeExpression(StringRef Code, unsigned CompletionOffset) {
539 Diagnostics Error;
540 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
541 RegistrySema S;
542 Parser P(&Tokenizer, &S, &Error);
543 VariantValue Dummy;
544 P.parseExpressionImpl(&Dummy);
545
546 return P.Completions;
547 }
548
549 llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef Code,Diagnostics * Error)550 Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) {
551 RegistrySema S;
552 return parseMatcherExpression(Code, &S, Error);
553 }
554
555 llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef Code,Parser::Sema * S,Diagnostics * Error)556 Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S,
557 Diagnostics *Error) {
558 VariantValue Value;
559 if (!parseExpression(Code, S, &Value, Error))
560 return llvm::Optional<DynTypedMatcher>();
561 if (!Value.isMatcher()) {
562 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
563 return llvm::Optional<DynTypedMatcher>();
564 }
565 llvm::Optional<DynTypedMatcher> Result =
566 Value.getMatcher().getSingleMatcher();
567 if (!Result.hasValue()) {
568 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
569 << Value.getTypeAsString();
570 }
571 return Result;
572 }
573
574 } // namespace dynamic
575 } // namespace ast_matchers
576 } // namespace clang
577