1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "CPlusPlusNameParser.h"
10
11 #include "clang/Basic/IdentifierTable.h"
12 #include "llvm/ADT/StringMap.h"
13 #include "llvm/Support/Threading.h"
14
15 using namespace lldb;
16 using namespace lldb_private;
17 using llvm::Optional;
18 using llvm::None;
19 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21 namespace tok = clang::tok;
22
ParseAsFunctionDefinition()23 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24 m_next_token_index = 0;
25 Optional<ParsedFunction> result(None);
26
27 // Try to parse the name as function without a return type specified e.g.
28 // main(int, char*[])
29 {
30 Bookmark start_position = SetBookmark();
31 result = ParseFunctionImpl(false);
32 if (result && !HasMoreTokens())
33 return result;
34 }
35
36 // Try to parse the name as function with function pointer return type e.g.
37 // void (*get_func(const char*))()
38 result = ParseFuncPtr(true);
39 if (result)
40 return result;
41
42 // Finally try to parse the name as a function with non-function return type
43 // e.g. int main(int, char*[])
44 result = ParseFunctionImpl(true);
45 if (HasMoreTokens())
46 return None;
47 return result;
48 }
49
ParseAsFullName()50 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51 m_next_token_index = 0;
52 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53 if (!name_ranges)
54 return None;
55 if (HasMoreTokens())
56 return None;
57 ParsedName result;
58 result.basename = GetTextForRange(name_ranges.getValue().basename_range);
59 result.context = GetTextForRange(name_ranges.getValue().context_range);
60 return result;
61 }
62
HasMoreTokens()63 bool CPlusPlusNameParser::HasMoreTokens() {
64 return m_next_token_index < m_tokens.size();
65 }
66
Advance()67 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68
TakeBack()69 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70
ConsumeToken(tok::TokenKind kind)71 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72 if (!HasMoreTokens())
73 return false;
74
75 if (!Peek().is(kind))
76 return false;
77
78 Advance();
79 return true;
80 }
81
ConsumeToken(Ts...kinds)82 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83 if (!HasMoreTokens())
84 return false;
85
86 if (!Peek().isOneOf(kinds...))
87 return false;
88
89 Advance();
90 return true;
91 }
92
SetBookmark()93 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94 return Bookmark(m_next_token_index);
95 }
96
GetCurrentPosition()97 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98
Peek()99 clang::Token &CPlusPlusNameParser::Peek() {
100 assert(HasMoreTokens());
101 return m_tokens[m_next_token_index];
102 }
103
104 Optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)105 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106 Bookmark start_position = SetBookmark();
107 if (expect_return_type) {
108 // Consume return type if it's expected.
109 if (!ConsumeTypename())
110 return None;
111 }
112
113 auto maybe_name = ParseFullNameImpl();
114 if (!maybe_name) {
115 return None;
116 }
117
118 size_t argument_start = GetCurrentPosition();
119 if (!ConsumeArguments()) {
120 return None;
121 }
122
123 size_t qualifiers_start = GetCurrentPosition();
124 SkipFunctionQualifiers();
125 size_t end_position = GetCurrentPosition();
126
127 ParsedFunction result;
128 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
129 result.name.context = GetTextForRange(maybe_name.getValue().context_range);
130 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132 start_position.Remove();
133 return result;
134 }
135
136 Optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)137 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138 Bookmark start_position = SetBookmark();
139 if (expect_return_type) {
140 // Consume return type.
141 if (!ConsumeTypename())
142 return None;
143 }
144
145 if (!ConsumeToken(tok::l_paren))
146 return None;
147 if (!ConsumePtrsAndRefs())
148 return None;
149
150 {
151 Bookmark before_inner_function_pos = SetBookmark();
152 auto maybe_inner_function_name = ParseFunctionImpl(false);
153 if (maybe_inner_function_name)
154 if (ConsumeToken(tok::r_paren))
155 if (ConsumeArguments()) {
156 SkipFunctionQualifiers();
157 start_position.Remove();
158 before_inner_function_pos.Remove();
159 return maybe_inner_function_name;
160 }
161 }
162
163 auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164 if (maybe_inner_function_ptr_name)
165 if (ConsumeToken(tok::r_paren))
166 if (ConsumeArguments()) {
167 SkipFunctionQualifiers();
168 start_position.Remove();
169 return maybe_inner_function_ptr_name;
170 }
171 return None;
172 }
173
ConsumeArguments()174 bool CPlusPlusNameParser::ConsumeArguments() {
175 return ConsumeBrackets(tok::l_paren, tok::r_paren);
176 }
177
ConsumeTemplateArgs()178 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179 Bookmark start_position = SetBookmark();
180 if (!HasMoreTokens() || Peek().getKind() != tok::less)
181 return false;
182 Advance();
183
184 // Consuming template arguments is a bit trickier than consuming function
185 // arguments, because '<' '>' brackets are not always trivially balanced. In
186 // some rare cases tokens '<' and '>' can appear inside template arguments as
187 // arithmetic or shift operators not as template brackets. Examples:
188 // std::enable_if<(10u)<(64), bool>
189 // f<A<operator<(X,Y)::Subclass>>
190 // Good thing that compiler makes sure that really ambiguous cases of '>'
191 // usage should be enclosed within '()' brackets.
192 int template_counter = 1;
193 bool can_open_template = false;
194 while (HasMoreTokens() && template_counter > 0) {
195 tok::TokenKind kind = Peek().getKind();
196 switch (kind) {
197 case tok::greatergreater:
198 template_counter -= 2;
199 can_open_template = false;
200 Advance();
201 break;
202 case tok::greater:
203 --template_counter;
204 can_open_template = false;
205 Advance();
206 break;
207 case tok::less:
208 // '<' is an attempt to open a subteamplte
209 // check if parser is at the point where it's actually possible,
210 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211 // need to do the same for '>' because compiler actually makes sure that
212 // '>' always surrounded by brackets to avoid ambiguity.
213 if (can_open_template)
214 ++template_counter;
215 can_open_template = false;
216 Advance();
217 break;
218 case tok::kw_operator: // C++ operator overloading.
219 if (!ConsumeOperator())
220 return false;
221 can_open_template = true;
222 break;
223 case tok::raw_identifier:
224 can_open_template = true;
225 Advance();
226 break;
227 case tok::l_square:
228 if (!ConsumeBrackets(tok::l_square, tok::r_square))
229 return false;
230 can_open_template = false;
231 break;
232 case tok::l_paren:
233 if (!ConsumeArguments())
234 return false;
235 can_open_template = false;
236 break;
237 default:
238 can_open_template = false;
239 Advance();
240 break;
241 }
242 }
243
244 if (template_counter != 0) {
245 return false;
246 }
247 start_position.Remove();
248 return true;
249 }
250
ConsumeAnonymousNamespace()251 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252 Bookmark start_position = SetBookmark();
253 if (!ConsumeToken(tok::l_paren)) {
254 return false;
255 }
256 constexpr llvm::StringLiteral g_anonymous("anonymous");
257 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258 Peek().getRawIdentifier() == g_anonymous) {
259 Advance();
260 } else {
261 return false;
262 }
263
264 if (!ConsumeToken(tok::kw_namespace)) {
265 return false;
266 }
267
268 if (!ConsumeToken(tok::r_paren)) {
269 return false;
270 }
271 start_position.Remove();
272 return true;
273 }
274
ConsumeLambda()275 bool CPlusPlusNameParser::ConsumeLambda() {
276 Bookmark start_position = SetBookmark();
277 if (!ConsumeToken(tok::l_brace)) {
278 return false;
279 }
280 constexpr llvm::StringLiteral g_lambda("lambda");
281 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282 Peek().getRawIdentifier() == g_lambda) {
283 // Put the matched brace back so we can use ConsumeBrackets
284 TakeBack();
285 } else {
286 return false;
287 }
288
289 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
290 return false;
291 }
292
293 start_position.Remove();
294 return true;
295 }
296
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)297 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298 tok::TokenKind right) {
299 Bookmark start_position = SetBookmark();
300 if (!HasMoreTokens() || Peek().getKind() != left)
301 return false;
302 Advance();
303
304 int counter = 1;
305 while (HasMoreTokens() && counter > 0) {
306 tok::TokenKind kind = Peek().getKind();
307 if (kind == right)
308 --counter;
309 else if (kind == left)
310 ++counter;
311 Advance();
312 }
313
314 assert(counter >= 0);
315 if (counter > 0) {
316 return false;
317 }
318 start_position.Remove();
319 return true;
320 }
321
ConsumeOperator()322 bool CPlusPlusNameParser::ConsumeOperator() {
323 Bookmark start_position = SetBookmark();
324 if (!ConsumeToken(tok::kw_operator))
325 return false;
326
327 if (!HasMoreTokens()) {
328 return false;
329 }
330
331 const auto &token = Peek();
332
333 // When clang generates debug info it adds template parameters to names.
334 // Since clang doesn't add a space between the name and the template parameter
335 // in some cases we are not generating valid C++ names e.g.:
336 //
337 // operator<<A::B>
338 //
339 // In some of these cases we will not parse them correctly. This fixes the
340 // issue by detecting this case and inserting tok::less in place of
341 // tok::lessless and returning successfully that we consumed the operator.
342 if (token.getKind() == tok::lessless) {
343 // Make sure we have more tokens before attempting to look ahead one more.
344 if (m_next_token_index + 1 < m_tokens.size()) {
345 // Look ahead two tokens.
346 clang::Token n_token = m_tokens[m_next_token_index + 1];
347 // If we find ( or < then this is indeed operator<< no need for fix.
348 if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
349 clang::Token tmp_tok;
350 tmp_tok.startToken();
351 tmp_tok.setLength(1);
352 tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
353 tmp_tok.setKind(tok::less);
354
355 m_tokens[m_next_token_index] = tmp_tok;
356
357 start_position.Remove();
358 return true;
359 }
360 }
361 }
362
363 switch (token.getKind()) {
364 case tok::kw_new:
365 case tok::kw_delete:
366 // This is 'new' or 'delete' operators.
367 Advance();
368 // Check for array new/delete.
369 if (HasMoreTokens() && Peek().is(tok::l_square)) {
370 // Consume the '[' and ']'.
371 if (!ConsumeBrackets(tok::l_square, tok::r_square))
372 return false;
373 }
374 break;
375
376 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
377 case tok::Token: \
378 Advance(); \
379 break;
380 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
381 #include "clang/Basic/OperatorKinds.def"
382 #undef OVERLOADED_OPERATOR
383 #undef OVERLOADED_OPERATOR_MULTI
384
385 case tok::l_paren:
386 // Call operator consume '(' ... ')'.
387 if (ConsumeBrackets(tok::l_paren, tok::r_paren))
388 break;
389 return false;
390
391 case tok::l_square:
392 // This is a [] operator.
393 // Consume the '[' and ']'.
394 if (ConsumeBrackets(tok::l_square, tok::r_square))
395 break;
396 return false;
397
398 default:
399 // This might be a cast operator.
400 if (ConsumeTypename())
401 break;
402 return false;
403 }
404 start_position.Remove();
405 return true;
406 }
407
SkipTypeQualifiers()408 void CPlusPlusNameParser::SkipTypeQualifiers() {
409 while (ConsumeToken(tok::kw_const, tok::kw_volatile))
410 ;
411 }
412
SkipFunctionQualifiers()413 void CPlusPlusNameParser::SkipFunctionQualifiers() {
414 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
415 ;
416 }
417
ConsumeBuiltinType()418 bool CPlusPlusNameParser::ConsumeBuiltinType() {
419 bool result = false;
420 bool continue_parsing = true;
421 // Built-in types can be made of a few keywords like 'unsigned long long
422 // int'. This function consumes all built-in type keywords without checking
423 // if they make sense like 'unsigned char void'.
424 while (continue_parsing && HasMoreTokens()) {
425 switch (Peek().getKind()) {
426 case tok::kw_short:
427 case tok::kw_long:
428 case tok::kw___int64:
429 case tok::kw___int128:
430 case tok::kw_signed:
431 case tok::kw_unsigned:
432 case tok::kw_void:
433 case tok::kw_char:
434 case tok::kw_int:
435 case tok::kw_half:
436 case tok::kw_float:
437 case tok::kw_double:
438 case tok::kw___float128:
439 case tok::kw_wchar_t:
440 case tok::kw_bool:
441 case tok::kw_char16_t:
442 case tok::kw_char32_t:
443 result = true;
444 Advance();
445 break;
446 default:
447 continue_parsing = false;
448 break;
449 }
450 }
451 return result;
452 }
453
SkipPtrsAndRefs()454 void CPlusPlusNameParser::SkipPtrsAndRefs() {
455 // Ignoring result.
456 ConsumePtrsAndRefs();
457 }
458
ConsumePtrsAndRefs()459 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
460 bool found = false;
461 SkipTypeQualifiers();
462 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
463 tok::kw_volatile)) {
464 found = true;
465 SkipTypeQualifiers();
466 }
467 return found;
468 }
469
ConsumeDecltype()470 bool CPlusPlusNameParser::ConsumeDecltype() {
471 Bookmark start_position = SetBookmark();
472 if (!ConsumeToken(tok::kw_decltype))
473 return false;
474
475 if (!ConsumeArguments())
476 return false;
477
478 start_position.Remove();
479 return true;
480 }
481
ConsumeTypename()482 bool CPlusPlusNameParser::ConsumeTypename() {
483 Bookmark start_position = SetBookmark();
484 SkipTypeQualifiers();
485 if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
486 if (!ParseFullNameImpl())
487 return false;
488 }
489 SkipPtrsAndRefs();
490 start_position.Remove();
491 return true;
492 }
493
494 Optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()495 CPlusPlusNameParser::ParseFullNameImpl() {
496 // Name parsing state machine.
497 enum class State {
498 Beginning, // start of the name
499 AfterTwoColons, // right after ::
500 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
501 AfterTemplate, // right after template brackets (<something>)
502 AfterOperator, // right after name of C++ operator
503 };
504
505 Bookmark start_position = SetBookmark();
506 State state = State::Beginning;
507 bool continue_parsing = true;
508 Optional<size_t> last_coloncolon_position = None;
509
510 while (continue_parsing && HasMoreTokens()) {
511 const auto &token = Peek();
512 switch (token.getKind()) {
513 case tok::raw_identifier: // Just a name.
514 if (state != State::Beginning && state != State::AfterTwoColons) {
515 continue_parsing = false;
516 break;
517 }
518 Advance();
519 state = State::AfterIdentifier;
520 break;
521 case tok::l_paren: {
522 if (state == State::Beginning || state == State::AfterTwoColons) {
523 // (anonymous namespace)
524 if (ConsumeAnonymousNamespace()) {
525 state = State::AfterIdentifier;
526 break;
527 }
528 }
529
530 // Type declared inside a function 'func()::Type'
531 if (state != State::AfterIdentifier && state != State::AfterTemplate &&
532 state != State::AfterOperator) {
533 continue_parsing = false;
534 break;
535 }
536 Bookmark l_paren_position = SetBookmark();
537 // Consume the '(' ... ') [const]'.
538 if (!ConsumeArguments()) {
539 continue_parsing = false;
540 break;
541 }
542 SkipFunctionQualifiers();
543
544 // Consume '::'
545 size_t coloncolon_position = GetCurrentPosition();
546 if (!ConsumeToken(tok::coloncolon)) {
547 continue_parsing = false;
548 break;
549 }
550 l_paren_position.Remove();
551 last_coloncolon_position = coloncolon_position;
552 state = State::AfterTwoColons;
553 break;
554 }
555 case tok::l_brace:
556 if (state == State::Beginning || state == State::AfterTwoColons) {
557 if (ConsumeLambda()) {
558 state = State::AfterIdentifier;
559 break;
560 }
561 }
562 continue_parsing = false;
563 break;
564 case tok::coloncolon: // Type nesting delimiter.
565 if (state != State::Beginning && state != State::AfterIdentifier &&
566 state != State::AfterTemplate) {
567 continue_parsing = false;
568 break;
569 }
570 last_coloncolon_position = GetCurrentPosition();
571 Advance();
572 state = State::AfterTwoColons;
573 break;
574 case tok::less: // Template brackets.
575 if (state != State::AfterIdentifier && state != State::AfterOperator) {
576 continue_parsing = false;
577 break;
578 }
579 if (!ConsumeTemplateArgs()) {
580 continue_parsing = false;
581 break;
582 }
583 state = State::AfterTemplate;
584 break;
585 case tok::kw_operator: // C++ operator overloading.
586 if (state != State::Beginning && state != State::AfterTwoColons) {
587 continue_parsing = false;
588 break;
589 }
590 if (!ConsumeOperator()) {
591 continue_parsing = false;
592 break;
593 }
594 state = State::AfterOperator;
595 break;
596 case tok::tilde: // Destructor.
597 if (state != State::Beginning && state != State::AfterTwoColons) {
598 continue_parsing = false;
599 break;
600 }
601 Advance();
602 if (ConsumeToken(tok::raw_identifier)) {
603 state = State::AfterIdentifier;
604 } else {
605 TakeBack();
606 continue_parsing = false;
607 }
608 break;
609 default:
610 continue_parsing = false;
611 break;
612 }
613 }
614
615 if (state == State::AfterIdentifier || state == State::AfterOperator ||
616 state == State::AfterTemplate) {
617 ParsedNameRanges result;
618 if (last_coloncolon_position) {
619 result.context_range = Range(start_position.GetSavedPosition(),
620 last_coloncolon_position.getValue());
621 result.basename_range =
622 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
623 } else {
624 result.basename_range =
625 Range(start_position.GetSavedPosition(), GetCurrentPosition());
626 }
627 start_position.Remove();
628 return result;
629 } else {
630 return None;
631 }
632 }
633
GetTextForRange(const Range & range)634 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
635 if (range.empty())
636 return llvm::StringRef();
637 assert(range.begin_index < range.end_index);
638 assert(range.begin_index < m_tokens.size());
639 assert(range.end_index <= m_tokens.size());
640 clang::Token &first_token = m_tokens[range.begin_index];
641 clang::Token &last_token = m_tokens[range.end_index - 1];
642 clang::SourceLocation start_loc = first_token.getLocation();
643 clang::SourceLocation end_loc = last_token.getLocation();
644 unsigned start_pos = start_loc.getRawEncoding();
645 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
646 return m_text.take_front(end_pos).drop_front(start_pos);
647 }
648
GetLangOptions()649 static const clang::LangOptions &GetLangOptions() {
650 static clang::LangOptions g_options;
651 static llvm::once_flag g_once_flag;
652 llvm::call_once(g_once_flag, []() {
653 g_options.LineComment = true;
654 g_options.C99 = true;
655 g_options.C11 = true;
656 g_options.CPlusPlus = true;
657 g_options.CPlusPlus11 = true;
658 g_options.CPlusPlus14 = true;
659 g_options.CPlusPlus17 = true;
660 });
661 return g_options;
662 }
663
GetKeywordsMap()664 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
665 static llvm::StringMap<tok::TokenKind> g_map{
666 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
667 #include "clang/Basic/TokenKinds.def"
668 #undef KEYWORD
669 };
670 return g_map;
671 }
672
ExtractTokens()673 void CPlusPlusNameParser::ExtractTokens() {
674 if (m_text.empty())
675 return;
676 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
677 m_text.data(), m_text.data() + m_text.size());
678 const auto &kw_map = GetKeywordsMap();
679 clang::Token token;
680 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
681 lexer.LexFromRawLexer(token)) {
682 if (token.is(clang::tok::raw_identifier)) {
683 auto it = kw_map.find(token.getRawIdentifier());
684 if (it != kw_map.end()) {
685 token.setKind(it->getValue());
686 }
687 }
688
689 m_tokens.push_back(token);
690 }
691 }
692