• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // For reference check out:
16 // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
17 //
18 // Note that we only have partial C++11 support yet.
19 
20 #include "absl/debugging/internal/demangle.h"
21 
22 #include <cstdint>
23 #include <cstdio>
24 #include <limits>
25 
26 namespace absl {
27 ABSL_NAMESPACE_BEGIN
28 namespace debugging_internal {
29 
30 typedef struct {
31   const char *abbrev;
32   const char *real_name;
33   // Number of arguments in <expression> context, or 0 if disallowed.
34   int arity;
35 } AbbrevPair;
36 
37 // List of operators from Itanium C++ ABI.
38 static const AbbrevPair kOperatorList[] = {
39     // New has special syntax (not currently supported).
40     {"nw", "new", 0},
41     {"na", "new[]", 0},
42 
43     // Works except that the 'gs' prefix is not supported.
44     {"dl", "delete", 1},
45     {"da", "delete[]", 1},
46 
47     {"ps", "+", 1},  // "positive"
48     {"ng", "-", 1},  // "negative"
49     {"ad", "&", 1},  // "address-of"
50     {"de", "*", 1},  // "dereference"
51     {"co", "~", 1},
52 
53     {"pl", "+", 2},
54     {"mi", "-", 2},
55     {"ml", "*", 2},
56     {"dv", "/", 2},
57     {"rm", "%", 2},
58     {"an", "&", 2},
59     {"or", "|", 2},
60     {"eo", "^", 2},
61     {"aS", "=", 2},
62     {"pL", "+=", 2},
63     {"mI", "-=", 2},
64     {"mL", "*=", 2},
65     {"dV", "/=", 2},
66     {"rM", "%=", 2},
67     {"aN", "&=", 2},
68     {"oR", "|=", 2},
69     {"eO", "^=", 2},
70     {"ls", "<<", 2},
71     {"rs", ">>", 2},
72     {"lS", "<<=", 2},
73     {"rS", ">>=", 2},
74     {"eq", "==", 2},
75     {"ne", "!=", 2},
76     {"lt", "<", 2},
77     {"gt", ">", 2},
78     {"le", "<=", 2},
79     {"ge", ">=", 2},
80     {"nt", "!", 1},
81     {"aa", "&&", 2},
82     {"oo", "||", 2},
83     {"pp", "++", 1},
84     {"mm", "--", 1},
85     {"cm", ",", 2},
86     {"pm", "->*", 2},
87     {"pt", "->", 0},  // Special syntax
88     {"cl", "()", 0},  // Special syntax
89     {"ix", "[]", 2},
90     {"qu", "?", 3},
91     {"st", "sizeof", 0},  // Special syntax
92     {"sz", "sizeof", 1},  // Not a real operator name, but used in expressions.
93     {nullptr, nullptr, 0},
94 };
95 
96 // List of builtin types from Itanium C++ ABI.
97 //
98 // Invariant: only one- or two-character type abbreviations here.
99 static const AbbrevPair kBuiltinTypeList[] = {
100     {"v", "void", 0},
101     {"w", "wchar_t", 0},
102     {"b", "bool", 0},
103     {"c", "char", 0},
104     {"a", "signed char", 0},
105     {"h", "unsigned char", 0},
106     {"s", "short", 0},
107     {"t", "unsigned short", 0},
108     {"i", "int", 0},
109     {"j", "unsigned int", 0},
110     {"l", "long", 0},
111     {"m", "unsigned long", 0},
112     {"x", "long long", 0},
113     {"y", "unsigned long long", 0},
114     {"n", "__int128", 0},
115     {"o", "unsigned __int128", 0},
116     {"f", "float", 0},
117     {"d", "double", 0},
118     {"e", "long double", 0},
119     {"g", "__float128", 0},
120     {"z", "ellipsis", 0},
121 
122     {"De", "decimal128", 0},      // IEEE 754r decimal floating point (128 bits)
123     {"Dd", "decimal64", 0},       // IEEE 754r decimal floating point (64 bits)
124     {"Dc", "decltype(auto)", 0},
125     {"Da", "auto", 0},
126     {"Dn", "std::nullptr_t", 0},  // i.e., decltype(nullptr)
127     {"Df", "decimal32", 0},       // IEEE 754r decimal floating point (32 bits)
128     {"Di", "char32_t", 0},
129     {"Ds", "char16_t", 0},
130     {"Dh", "float16", 0},         // IEEE 754r half-precision float (16 bits)
131     {nullptr, nullptr, 0},
132 };
133 
134 // List of substitutions Itanium C++ ABI.
135 static const AbbrevPair kSubstitutionList[] = {
136     {"St", "", 0},
137     {"Sa", "allocator", 0},
138     {"Sb", "basic_string", 0},
139     // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
140     {"Ss", "string", 0},
141     // std::basic_istream<char, std::char_traits<char> >
142     {"Si", "istream", 0},
143     // std::basic_ostream<char, std::char_traits<char> >
144     {"So", "ostream", 0},
145     // std::basic_iostream<char, std::char_traits<char> >
146     {"Sd", "iostream", 0},
147     {nullptr, nullptr, 0},
148 };
149 
150 // State needed for demangling.  This struct is copied in almost every stack
151 // frame, so every byte counts.
152 typedef struct {
153   int mangled_idx;                   // Cursor of mangled name.
154   int out_cur_idx;                   // Cursor of output std::string.
155   int prev_name_idx;                 // For constructors/destructors.
156   signed int prev_name_length : 16;  // For constructors/destructors.
157   signed int nest_level : 15;        // For nested names.
158   unsigned int append : 1;           // Append flag.
159   // Note: for some reason MSVC can't pack "bool append : 1" into the same int
160   // with the above two fields, so we use an int instead.  Amusingly it can pack
161   // "signed bool" as expected, but relying on that to continue to be a legal
162   // type seems ill-advised (as it's illegal in at least clang).
163 } ParseState;
164 
165 static_assert(sizeof(ParseState) == 4 * sizeof(int),
166               "unexpected size of ParseState");
167 
168 // One-off state for demangling that's not subject to backtracking -- either
169 // constant data, data that's intentionally immune to backtracking (steps), or
170 // data that would never be changed by backtracking anyway (recursion_depth).
171 //
172 // Only one copy of this exists for each call to Demangle, so the size of this
173 // struct is nearly inconsequential.
174 typedef struct {
175   const char *mangled_begin;  // Beginning of input std::string.
176   char *out;                  // Beginning of output std::string.
177   int out_end_idx;            // One past last allowed output character.
178   int recursion_depth;        // For stack exhaustion prevention.
179   int steps;               // Cap how much work we'll do, regardless of depth.
180   ParseState parse_state;  // Backtrackable state copied for most frames.
181 } State;
182 
183 namespace {
184 // Prevent deep recursion / stack exhaustion.
185 // Also prevent unbounded handling of complex inputs.
186 class ComplexityGuard {
187  public:
ComplexityGuard(State * state)188   explicit ComplexityGuard(State *state) : state_(state) {
189     ++state->recursion_depth;
190     ++state->steps;
191   }
~ComplexityGuard()192   ~ComplexityGuard() { --state_->recursion_depth; }
193 
194   // 256 levels of recursion seems like a reasonable upper limit on depth.
195   // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
196   // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
197   static constexpr int kRecursionDepthLimit = 256;
198 
199   // We're trying to pick a charitable upper-limit on how many parse steps are
200   // necessary to handle something that a human could actually make use of.
201   // This is mostly in place as a bound on how much work we'll do if we are
202   // asked to demangle an mangled name from an untrusted source, so it should be
203   // much larger than the largest expected symbol, but much smaller than the
204   // amount of work we can do in, e.g., a second.
205   //
206   // Some real-world symbols from an arbitrary binary started failing between
207   // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set
208   // the limit.
209   //
210   // Spending one second on 2^17 parse steps would require each step to take
211   // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in
212   // under a second.
213   static constexpr int kParseStepsLimit = 1 << 17;
214 
IsTooComplex() const215   bool IsTooComplex() const {
216     return state_->recursion_depth > kRecursionDepthLimit ||
217            state_->steps > kParseStepsLimit;
218   }
219 
220  private:
221   State *state_;
222 };
223 }  // namespace
224 
225 // We don't use strlen() in libc since it's not guaranteed to be async
226 // signal safe.
StrLen(const char * str)227 static size_t StrLen(const char *str) {
228   size_t len = 0;
229   while (*str != '\0') {
230     ++str;
231     ++len;
232   }
233   return len;
234 }
235 
236 // Returns true if "str" has at least "n" characters remaining.
AtLeastNumCharsRemaining(const char * str,int n)237 static bool AtLeastNumCharsRemaining(const char *str, int n) {
238   for (int i = 0; i < n; ++i) {
239     if (str[i] == '\0') {
240       return false;
241     }
242   }
243   return true;
244 }
245 
246 // Returns true if "str" has "prefix" as a prefix.
StrPrefix(const char * str,const char * prefix)247 static bool StrPrefix(const char *str, const char *prefix) {
248   size_t i = 0;
249   while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) {
250     ++i;
251   }
252   return prefix[i] == '\0';  // Consumed everything in "prefix".
253 }
254 
InitState(State * state,const char * mangled,char * out,int out_size)255 static void InitState(State *state, const char *mangled, char *out,
256                       int out_size) {
257   state->mangled_begin = mangled;
258   state->out = out;
259   state->out_end_idx = out_size;
260   state->recursion_depth = 0;
261   state->steps = 0;
262 
263   state->parse_state.mangled_idx = 0;
264   state->parse_state.out_cur_idx = 0;
265   state->parse_state.prev_name_idx = 0;
266   state->parse_state.prev_name_length = -1;
267   state->parse_state.nest_level = -1;
268   state->parse_state.append = true;
269 }
270 
RemainingInput(State * state)271 static inline const char *RemainingInput(State *state) {
272   return &state->mangled_begin[state->parse_state.mangled_idx];
273 }
274 
275 // Returns true and advances "mangled_idx" if we find "one_char_token"
276 // at "mangled_idx" position.  It is assumed that "one_char_token" does
277 // not contain '\0'.
ParseOneCharToken(State * state,const char one_char_token)278 static bool ParseOneCharToken(State *state, const char one_char_token) {
279   ComplexityGuard guard(state);
280   if (guard.IsTooComplex()) return false;
281   if (RemainingInput(state)[0] == one_char_token) {
282     ++state->parse_state.mangled_idx;
283     return true;
284   }
285   return false;
286 }
287 
288 // Returns true and advances "mangled_cur" if we find "two_char_token"
289 // at "mangled_cur" position.  It is assumed that "two_char_token" does
290 // not contain '\0'.
ParseTwoCharToken(State * state,const char * two_char_token)291 static bool ParseTwoCharToken(State *state, const char *two_char_token) {
292   ComplexityGuard guard(state);
293   if (guard.IsTooComplex()) return false;
294   if (RemainingInput(state)[0] == two_char_token[0] &&
295       RemainingInput(state)[1] == two_char_token[1]) {
296     state->parse_state.mangled_idx += 2;
297     return true;
298   }
299   return false;
300 }
301 
302 // Returns true and advances "mangled_cur" if we find any character in
303 // "char_class" at "mangled_cur" position.
ParseCharClass(State * state,const char * char_class)304 static bool ParseCharClass(State *state, const char *char_class) {
305   ComplexityGuard guard(state);
306   if (guard.IsTooComplex()) return false;
307   if (RemainingInput(state)[0] == '\0') {
308     return false;
309   }
310   const char *p = char_class;
311   for (; *p != '\0'; ++p) {
312     if (RemainingInput(state)[0] == *p) {
313       ++state->parse_state.mangled_idx;
314       return true;
315     }
316   }
317   return false;
318 }
319 
ParseDigit(State * state,int * digit)320 static bool ParseDigit(State *state, int *digit) {
321   char c = RemainingInput(state)[0];
322   if (ParseCharClass(state, "0123456789")) {
323     if (digit != nullptr) {
324       *digit = c - '0';
325     }
326     return true;
327   }
328   return false;
329 }
330 
331 // This function is used for handling an optional non-terminal.
Optional(bool)332 static bool Optional(bool /*status*/) { return true; }
333 
334 // This function is used for handling <non-terminal>+ syntax.
335 typedef bool (*ParseFunc)(State *);
OneOrMore(ParseFunc parse_func,State * state)336 static bool OneOrMore(ParseFunc parse_func, State *state) {
337   if (parse_func(state)) {
338     while (parse_func(state)) {
339     }
340     return true;
341   }
342   return false;
343 }
344 
345 // This function is used for handling <non-terminal>* syntax. The function
346 // always returns true and must be followed by a termination token or a
347 // terminating sequence not handled by parse_func (e.g.
348 // ParseOneCharToken(state, 'E')).
ZeroOrMore(ParseFunc parse_func,State * state)349 static bool ZeroOrMore(ParseFunc parse_func, State *state) {
350   while (parse_func(state)) {
351   }
352   return true;
353 }
354 
355 // Append "str" at "out_cur_idx".  If there is an overflow, out_cur_idx is
356 // set to out_end_idx+1.  The output string is ensured to
357 // always terminate with '\0' as long as there is no overflow.
Append(State * state,const char * const str,const int length)358 static void Append(State *state, const char *const str, const int length) {
359   for (int i = 0; i < length; ++i) {
360     if (state->parse_state.out_cur_idx + 1 <
361         state->out_end_idx) {  // +1 for '\0'
362       state->out[state->parse_state.out_cur_idx++] = str[i];
363     } else {
364       // signal overflow
365       state->parse_state.out_cur_idx = state->out_end_idx + 1;
366       break;
367     }
368   }
369   if (state->parse_state.out_cur_idx < state->out_end_idx) {
370     state->out[state->parse_state.out_cur_idx] =
371         '\0';  // Terminate it with '\0'
372   }
373 }
374 
375 // We don't use equivalents in libc to avoid locale issues.
IsLower(char c)376 static bool IsLower(char c) { return c >= 'a' && c <= 'z'; }
377 
IsAlpha(char c)378 static bool IsAlpha(char c) {
379   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
380 }
381 
IsDigit(char c)382 static bool IsDigit(char c) { return c >= '0' && c <= '9'; }
383 
384 // Returns true if "str" is a function clone suffix.  These suffixes are used
385 // by GCC 4.5.x and later versions (and our locally-modified version of GCC
386 // 4.4.x) to indicate functions which have been cloned during optimization.
387 // We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix.
IsFunctionCloneSuffix(const char * str)388 static bool IsFunctionCloneSuffix(const char *str) {
389   size_t i = 0;
390   while (str[i] != '\0') {
391     // Consume a single .<alpha>+.<digit>+ sequence.
392     if (str[i] != '.' || !IsAlpha(str[i + 1])) {
393       return false;
394     }
395     i += 2;
396     while (IsAlpha(str[i])) {
397       ++i;
398     }
399     if (str[i] != '.' || !IsDigit(str[i + 1])) {
400       return false;
401     }
402     i += 2;
403     while (IsDigit(str[i])) {
404       ++i;
405     }
406   }
407   return true;  // Consumed everything in "str".
408 }
409 
EndsWith(State * state,const char chr)410 static bool EndsWith(State *state, const char chr) {
411   return state->parse_state.out_cur_idx > 0 &&
412          chr == state->out[state->parse_state.out_cur_idx - 1];
413 }
414 
415 // Append "str" with some tweaks, iff "append" state is true.
MaybeAppendWithLength(State * state,const char * const str,const int length)416 static void MaybeAppendWithLength(State *state, const char *const str,
417                                   const int length) {
418   if (state->parse_state.append && length > 0) {
419     // Append a space if the output buffer ends with '<' and "str"
420     // starts with '<' to avoid <<<.
421     if (str[0] == '<' && EndsWith(state, '<')) {
422       Append(state, " ", 1);
423     }
424     // Remember the last identifier name for ctors/dtors.
425     if (IsAlpha(str[0]) || str[0] == '_') {
426       state->parse_state.prev_name_idx = state->parse_state.out_cur_idx;
427       state->parse_state.prev_name_length = length;
428     }
429     Append(state, str, length);
430   }
431 }
432 
433 // Appends a positive decimal number to the output if appending is enabled.
MaybeAppendDecimal(State * state,unsigned int val)434 static bool MaybeAppendDecimal(State *state, unsigned int val) {
435   // Max {32-64}-bit unsigned int is 20 digits.
436   constexpr size_t kMaxLength = 20;
437   char buf[kMaxLength];
438 
439   // We can't use itoa or sprintf as neither is specified to be
440   // async-signal-safe.
441   if (state->parse_state.append) {
442     // We can't have a one-before-the-beginning pointer, so instead start with
443     // one-past-the-end and manipulate one character before the pointer.
444     char *p = &buf[kMaxLength];
445     do {  // val=0 is the only input that should write a leading zero digit.
446       *--p = (val % 10) + '0';
447       val /= 10;
448     } while (p > buf && val != 0);
449 
450     // 'p' landed on the last character we set.  How convenient.
451     Append(state, p, kMaxLength - (p - buf));
452   }
453 
454   return true;
455 }
456 
457 // A convenient wrapper around MaybeAppendWithLength().
458 // Returns true so that it can be placed in "if" conditions.
MaybeAppend(State * state,const char * const str)459 static bool MaybeAppend(State *state, const char *const str) {
460   if (state->parse_state.append) {
461     int length = StrLen(str);
462     MaybeAppendWithLength(state, str, length);
463   }
464   return true;
465 }
466 
467 // This function is used for handling nested names.
EnterNestedName(State * state)468 static bool EnterNestedName(State *state) {
469   state->parse_state.nest_level = 0;
470   return true;
471 }
472 
473 // This function is used for handling nested names.
LeaveNestedName(State * state,int16_t prev_value)474 static bool LeaveNestedName(State *state, int16_t prev_value) {
475   state->parse_state.nest_level = prev_value;
476   return true;
477 }
478 
479 // Disable the append mode not to print function parameters, etc.
DisableAppend(State * state)480 static bool DisableAppend(State *state) {
481   state->parse_state.append = false;
482   return true;
483 }
484 
485 // Restore the append mode to the previous state.
RestoreAppend(State * state,bool prev_value)486 static bool RestoreAppend(State *state, bool prev_value) {
487   state->parse_state.append = prev_value;
488   return true;
489 }
490 
491 // Increase the nest level for nested names.
MaybeIncreaseNestLevel(State * state)492 static void MaybeIncreaseNestLevel(State *state) {
493   if (state->parse_state.nest_level > -1) {
494     ++state->parse_state.nest_level;
495   }
496 }
497 
498 // Appends :: for nested names if necessary.
MaybeAppendSeparator(State * state)499 static void MaybeAppendSeparator(State *state) {
500   if (state->parse_state.nest_level >= 1) {
501     MaybeAppend(state, "::");
502   }
503 }
504 
505 // Cancel the last separator if necessary.
MaybeCancelLastSeparator(State * state)506 static void MaybeCancelLastSeparator(State *state) {
507   if (state->parse_state.nest_level >= 1 && state->parse_state.append &&
508       state->parse_state.out_cur_idx >= 2) {
509     state->parse_state.out_cur_idx -= 2;
510     state->out[state->parse_state.out_cur_idx] = '\0';
511   }
512 }
513 
514 // Returns true if the identifier of the given length pointed to by
515 // "mangled_cur" is anonymous namespace.
IdentifierIsAnonymousNamespace(State * state,int length)516 static bool IdentifierIsAnonymousNamespace(State *state, int length) {
517   // Returns true if "anon_prefix" is a proper prefix of "mangled_cur".
518   static const char anon_prefix[] = "_GLOBAL__N_";
519   return (length > static_cast<int>(sizeof(anon_prefix) - 1) &&
520           StrPrefix(RemainingInput(state), anon_prefix));
521 }
522 
523 // Forward declarations of our parsing functions.
524 static bool ParseMangledName(State *state);
525 static bool ParseEncoding(State *state);
526 static bool ParseName(State *state);
527 static bool ParseUnscopedName(State *state);
528 static bool ParseNestedName(State *state);
529 static bool ParsePrefix(State *state);
530 static bool ParseUnqualifiedName(State *state);
531 static bool ParseSourceName(State *state);
532 static bool ParseLocalSourceName(State *state);
533 static bool ParseUnnamedTypeName(State *state);
534 static bool ParseNumber(State *state, int *number_out);
535 static bool ParseFloatNumber(State *state);
536 static bool ParseSeqId(State *state);
537 static bool ParseIdentifier(State *state, int length);
538 static bool ParseOperatorName(State *state, int *arity);
539 static bool ParseSpecialName(State *state);
540 static bool ParseCallOffset(State *state);
541 static bool ParseNVOffset(State *state);
542 static bool ParseVOffset(State *state);
543 static bool ParseCtorDtorName(State *state);
544 static bool ParseDecltype(State *state);
545 static bool ParseType(State *state);
546 static bool ParseCVQualifiers(State *state);
547 static bool ParseBuiltinType(State *state);
548 static bool ParseFunctionType(State *state);
549 static bool ParseBareFunctionType(State *state);
550 static bool ParseClassEnumType(State *state);
551 static bool ParseArrayType(State *state);
552 static bool ParsePointerToMemberType(State *state);
553 static bool ParseTemplateParam(State *state);
554 static bool ParseTemplateTemplateParam(State *state);
555 static bool ParseTemplateArgs(State *state);
556 static bool ParseTemplateArg(State *state);
557 static bool ParseBaseUnresolvedName(State *state);
558 static bool ParseUnresolvedName(State *state);
559 static bool ParseExpression(State *state);
560 static bool ParseExprPrimary(State *state);
561 static bool ParseExprCastValue(State *state);
562 static bool ParseLocalName(State *state);
563 static bool ParseLocalNameSuffix(State *state);
564 static bool ParseDiscriminator(State *state);
565 static bool ParseSubstitution(State *state, bool accept_std);
566 
567 // Implementation note: the following code is a straightforward
568 // translation of the Itanium C++ ABI defined in BNF with a couple of
569 // exceptions.
570 //
571 // - Support GNU extensions not defined in the Itanium C++ ABI
572 // - <prefix> and <template-prefix> are combined to avoid infinite loop
573 // - Reorder patterns to shorten the code
574 // - Reorder patterns to give greedier functions precedence
575 //   We'll mark "Less greedy than" for these cases in the code
576 //
577 // Each parsing function changes the parse state and returns true on
578 // success, or returns false and doesn't change the parse state (note:
579 // the parse-steps counter increases regardless of success or failure).
580 // To ensure that the parse state isn't changed in the latter case, we
581 // save the original state before we call multiple parsing functions
582 // consecutively with &&, and restore it if unsuccessful.  See
583 // ParseEncoding() as an example of this convention.  We follow the
584 // convention throughout the code.
585 //
586 // Originally we tried to do demangling without following the full ABI
587 // syntax but it turned out we needed to follow the full syntax to
588 // parse complicated cases like nested template arguments.  Note that
589 // implementing a full-fledged demangler isn't trivial (libiberty's
590 // cp-demangle.c has +4300 lines).
591 //
592 // Note that (foo) in <(foo) ...> is a modifier to be ignored.
593 //
594 // Reference:
595 // - Itanium C++ ABI
596 //   <https://mentorembedded.github.io/cxx-abi/abi.html#mangling>
597 
598 // <mangled-name> ::= _Z <encoding>
ParseMangledName(State * state)599 static bool ParseMangledName(State *state) {
600   ComplexityGuard guard(state);
601   if (guard.IsTooComplex()) return false;
602   return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
603 }
604 
605 // <encoding> ::= <(function) name> <bare-function-type>
606 //            ::= <(data) name>
607 //            ::= <special-name>
ParseEncoding(State * state)608 static bool ParseEncoding(State *state) {
609   ComplexityGuard guard(state);
610   if (guard.IsTooComplex()) return false;
611   // Implementing the first two productions together as <name>
612   // [<bare-function-type>] avoids exponential blowup of backtracking.
613   //
614   // Since Optional(...) can't fail, there's no need to copy the state for
615   // backtracking.
616   if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
617     return true;
618   }
619 
620   if (ParseSpecialName(state)) {
621     return true;
622   }
623   return false;
624 }
625 
626 // <name> ::= <nested-name>
627 //        ::= <unscoped-template-name> <template-args>
628 //        ::= <unscoped-name>
629 //        ::= <local-name>
ParseName(State * state)630 static bool ParseName(State *state) {
631   ComplexityGuard guard(state);
632   if (guard.IsTooComplex()) return false;
633   if (ParseNestedName(state) || ParseLocalName(state)) {
634     return true;
635   }
636 
637   // We reorganize the productions to avoid re-parsing unscoped names.
638   // - Inline <unscoped-template-name> productions:
639   //   <name> ::= <substitution> <template-args>
640   //          ::= <unscoped-name> <template-args>
641   //          ::= <unscoped-name>
642   // - Merge the two productions that start with unscoped-name:
643   //   <name> ::= <unscoped-name> [<template-args>]
644 
645   ParseState copy = state->parse_state;
646   // "std<...>" isn't a valid name.
647   if (ParseSubstitution(state, /*accept_std=*/false) &&
648       ParseTemplateArgs(state)) {
649     return true;
650   }
651   state->parse_state = copy;
652 
653   // Note there's no need to restore state after this since only the first
654   // subparser can fail.
655   return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state));
656 }
657 
658 // <unscoped-name> ::= <unqualified-name>
659 //                 ::= St <unqualified-name>
ParseUnscopedName(State * state)660 static bool ParseUnscopedName(State *state) {
661   ComplexityGuard guard(state);
662   if (guard.IsTooComplex()) return false;
663   if (ParseUnqualifiedName(state)) {
664     return true;
665   }
666 
667   ParseState copy = state->parse_state;
668   if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") &&
669       ParseUnqualifiedName(state)) {
670     return true;
671   }
672   state->parse_state = copy;
673   return false;
674 }
675 
676 // <ref-qualifer> ::= R // lvalue method reference qualifier
677 //                ::= O // rvalue method reference qualifier
ParseRefQualifier(State * state)678 static inline bool ParseRefQualifier(State *state) {
679   return ParseCharClass(state, "OR");
680 }
681 
682 // <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
683 //                   <unqualified-name> E
684 //               ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
685 //                   <template-args> E
ParseNestedName(State * state)686 static bool ParseNestedName(State *state) {
687   ComplexityGuard guard(state);
688   if (guard.IsTooComplex()) return false;
689   ParseState copy = state->parse_state;
690   if (ParseOneCharToken(state, 'N') && EnterNestedName(state) &&
691       Optional(ParseCVQualifiers(state)) &&
692       Optional(ParseRefQualifier(state)) && ParsePrefix(state) &&
693       LeaveNestedName(state, copy.nest_level) &&
694       ParseOneCharToken(state, 'E')) {
695     return true;
696   }
697   state->parse_state = copy;
698   return false;
699 }
700 
701 // This part is tricky.  If we literally translate them to code, we'll
702 // end up infinite loop.  Hence we merge them to avoid the case.
703 //
704 // <prefix> ::= <prefix> <unqualified-name>
705 //          ::= <template-prefix> <template-args>
706 //          ::= <template-param>
707 //          ::= <substitution>
708 //          ::= # empty
709 // <template-prefix> ::= <prefix> <(template) unqualified-name>
710 //                   ::= <template-param>
711 //                   ::= <substitution>
ParsePrefix(State * state)712 static bool ParsePrefix(State *state) {
713   ComplexityGuard guard(state);
714   if (guard.IsTooComplex()) return false;
715   bool has_something = false;
716   while (true) {
717     MaybeAppendSeparator(state);
718     if (ParseTemplateParam(state) ||
719         ParseSubstitution(state, /*accept_std=*/true) ||
720         ParseUnscopedName(state) ||
721         (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
722       has_something = true;
723       MaybeIncreaseNestLevel(state);
724       continue;
725     }
726     MaybeCancelLastSeparator(state);
727     if (has_something && ParseTemplateArgs(state)) {
728       return ParsePrefix(state);
729     } else {
730       break;
731     }
732   }
733   return true;
734 }
735 
736 // <unqualified-name> ::= <operator-name>
737 //                    ::= <ctor-dtor-name>
738 //                    ::= <source-name>
739 //                    ::= <local-source-name> // GCC extension; see below.
740 //                    ::= <unnamed-type-name>
ParseUnqualifiedName(State * state)741 static bool ParseUnqualifiedName(State *state) {
742   ComplexityGuard guard(state);
743   if (guard.IsTooComplex()) return false;
744   return (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) ||
745           ParseSourceName(state) || ParseLocalSourceName(state) ||
746           ParseUnnamedTypeName(state));
747 }
748 
749 // <source-name> ::= <positive length number> <identifier>
ParseSourceName(State * state)750 static bool ParseSourceName(State *state) {
751   ComplexityGuard guard(state);
752   if (guard.IsTooComplex()) return false;
753   ParseState copy = state->parse_state;
754   int length = -1;
755   if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
756     return true;
757   }
758   state->parse_state = copy;
759   return false;
760 }
761 
762 // <local-source-name> ::= L <source-name> [<discriminator>]
763 //
764 // References:
765 //   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
766 //   https://gcc.gnu.org/viewcvs?view=rev&revision=124467
ParseLocalSourceName(State * state)767 static bool ParseLocalSourceName(State *state) {
768   ComplexityGuard guard(state);
769   if (guard.IsTooComplex()) return false;
770   ParseState copy = state->parse_state;
771   if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
772       Optional(ParseDiscriminator(state))) {
773     return true;
774   }
775   state->parse_state = copy;
776   return false;
777 }
778 
779 // <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
780 //                     ::= <closure-type-name>
781 // <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
782 // <lambda-sig>        ::= <(parameter) type>+
ParseUnnamedTypeName(State * state)783 static bool ParseUnnamedTypeName(State *state) {
784   ComplexityGuard guard(state);
785   if (guard.IsTooComplex()) return false;
786   ParseState copy = state->parse_state;
787   // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }.
788   // Optionally parse the encoded value into 'which' and add 2 to get the index.
789   int which = -1;
790 
791   // Unnamed type local to function or class.
792   if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) &&
793       which <= std::numeric_limits<int>::max() - 2 &&  // Don't overflow.
794       ParseOneCharToken(state, '_')) {
795     MaybeAppend(state, "{unnamed type#");
796     MaybeAppendDecimal(state, 2 + which);
797     MaybeAppend(state, "}");
798     return true;
799   }
800   state->parse_state = copy;
801 
802   // Closure type.
803   which = -1;
804   if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
805       OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
806       ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
807       which <= std::numeric_limits<int>::max() - 2 &&  // Don't overflow.
808       ParseOneCharToken(state, '_')) {
809     MaybeAppend(state, "{lambda()#");
810     MaybeAppendDecimal(state, 2 + which);
811     MaybeAppend(state, "}");
812     return true;
813   }
814   state->parse_state = copy;
815 
816   return false;
817 }
818 
819 // <number> ::= [n] <non-negative decimal integer>
820 // If "number_out" is non-null, then *number_out is set to the value of the
821 // parsed number on success.
ParseNumber(State * state,int * number_out)822 static bool ParseNumber(State *state, int *number_out) {
823   ComplexityGuard guard(state);
824   if (guard.IsTooComplex()) return false;
825   bool negative = false;
826   if (ParseOneCharToken(state, 'n')) {
827     negative = true;
828   }
829   const char *p = RemainingInput(state);
830   uint64_t number = 0;
831   for (; *p != '\0'; ++p) {
832     if (IsDigit(*p)) {
833       number = number * 10 + (*p - '0');
834     } else {
835       break;
836     }
837   }
838   // Apply the sign with uint64_t arithmetic so overflows aren't UB.  Gives
839   // "incorrect" results for out-of-range inputs, but negative values only
840   // appear for literals, which aren't printed.
841   if (negative) {
842     number = ~number + 1;
843   }
844   if (p != RemainingInput(state)) {  // Conversion succeeded.
845     state->parse_state.mangled_idx += p - RemainingInput(state);
846     if (number_out != nullptr) {
847       // Note: possibly truncate "number".
848       *number_out = number;
849     }
850     return true;
851   }
852   return false;
853 }
854 
855 // Floating-point literals are encoded using a fixed-length lowercase
856 // hexadecimal string.
ParseFloatNumber(State * state)857 static bool ParseFloatNumber(State *state) {
858   ComplexityGuard guard(state);
859   if (guard.IsTooComplex()) return false;
860   const char *p = RemainingInput(state);
861   for (; *p != '\0'; ++p) {
862     if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
863       break;
864     }
865   }
866   if (p != RemainingInput(state)) {  // Conversion succeeded.
867     state->parse_state.mangled_idx += p - RemainingInput(state);
868     return true;
869   }
870   return false;
871 }
872 
873 // The <seq-id> is a sequence number in base 36,
874 // using digits and upper case letters
ParseSeqId(State * state)875 static bool ParseSeqId(State *state) {
876   ComplexityGuard guard(state);
877   if (guard.IsTooComplex()) return false;
878   const char *p = RemainingInput(state);
879   for (; *p != '\0'; ++p) {
880     if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
881       break;
882     }
883   }
884   if (p != RemainingInput(state)) {  // Conversion succeeded.
885     state->parse_state.mangled_idx += p - RemainingInput(state);
886     return true;
887   }
888   return false;
889 }
890 
891 // <identifier> ::= <unqualified source code identifier> (of given length)
ParseIdentifier(State * state,int length)892 static bool ParseIdentifier(State *state, int length) {
893   ComplexityGuard guard(state);
894   if (guard.IsTooComplex()) return false;
895   if (length < 0 || !AtLeastNumCharsRemaining(RemainingInput(state), length)) {
896     return false;
897   }
898   if (IdentifierIsAnonymousNamespace(state, length)) {
899     MaybeAppend(state, "(anonymous namespace)");
900   } else {
901     MaybeAppendWithLength(state, RemainingInput(state), length);
902   }
903   state->parse_state.mangled_idx += length;
904   return true;
905 }
906 
907 // <operator-name> ::= nw, and other two letters cases
908 //                 ::= cv <type>  # (cast)
909 //                 ::= v  <digit> <source-name> # vendor extended operator
ParseOperatorName(State * state,int * arity)910 static bool ParseOperatorName(State *state, int *arity) {
911   ComplexityGuard guard(state);
912   if (guard.IsTooComplex()) return false;
913   if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) {
914     return false;
915   }
916   // First check with "cv" (cast) case.
917   ParseState copy = state->parse_state;
918   if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
919       EnterNestedName(state) && ParseType(state) &&
920       LeaveNestedName(state, copy.nest_level)) {
921     if (arity != nullptr) {
922       *arity = 1;
923     }
924     return true;
925   }
926   state->parse_state = copy;
927 
928   // Then vendor extended operators.
929   if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
930       ParseSourceName(state)) {
931     return true;
932   }
933   state->parse_state = copy;
934 
935   // Other operator names should start with a lower alphabet followed
936   // by a lower/upper alphabet.
937   if (!(IsLower(RemainingInput(state)[0]) &&
938         IsAlpha(RemainingInput(state)[1]))) {
939     return false;
940   }
941   // We may want to perform a binary search if we really need speed.
942   const AbbrevPair *p;
943   for (p = kOperatorList; p->abbrev != nullptr; ++p) {
944     if (RemainingInput(state)[0] == p->abbrev[0] &&
945         RemainingInput(state)[1] == p->abbrev[1]) {
946       if (arity != nullptr) {
947         *arity = p->arity;
948       }
949       MaybeAppend(state, "operator");
950       if (IsLower(*p->real_name)) {  // new, delete, etc.
951         MaybeAppend(state, " ");
952       }
953       MaybeAppend(state, p->real_name);
954       state->parse_state.mangled_idx += 2;
955       return true;
956     }
957   }
958   return false;
959 }
960 
961 // <special-name> ::= TV <type>
962 //                ::= TT <type>
963 //                ::= TI <type>
964 //                ::= TS <type>
965 //                ::= Tc <call-offset> <call-offset> <(base) encoding>
966 //                ::= GV <(object) name>
967 //                ::= T <call-offset> <(base) encoding>
968 // G++ extensions:
969 //                ::= TC <type> <(offset) number> _ <(base) type>
970 //                ::= TF <type>
971 //                ::= TJ <type>
972 //                ::= GR <name>
973 //                ::= GA <encoding>
974 //                ::= Th <call-offset> <(base) encoding>
975 //                ::= Tv <call-offset> <(base) encoding>
976 //
977 // Note: we don't care much about them since they don't appear in
978 // stack traces.  The are special data.
ParseSpecialName(State * state)979 static bool ParseSpecialName(State *state) {
980   ComplexityGuard guard(state);
981   if (guard.IsTooComplex()) return false;
982   ParseState copy = state->parse_state;
983   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") &&
984       ParseType(state)) {
985     return true;
986   }
987   state->parse_state = copy;
988 
989   if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
990       ParseCallOffset(state) && ParseEncoding(state)) {
991     return true;
992   }
993   state->parse_state = copy;
994 
995   if (ParseTwoCharToken(state, "GV") && ParseName(state)) {
996     return true;
997   }
998   state->parse_state = copy;
999 
1000   if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
1001       ParseEncoding(state)) {
1002     return true;
1003   }
1004   state->parse_state = copy;
1005 
1006   // G++ extensions
1007   if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
1008       ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
1009       DisableAppend(state) && ParseType(state)) {
1010     RestoreAppend(state, copy.append);
1011     return true;
1012   }
1013   state->parse_state = copy;
1014 
1015   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
1016       ParseType(state)) {
1017     return true;
1018   }
1019   state->parse_state = copy;
1020 
1021   if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
1022     return true;
1023   }
1024   state->parse_state = copy;
1025 
1026   if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
1027     return true;
1028   }
1029   state->parse_state = copy;
1030 
1031   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
1032       ParseCallOffset(state) && ParseEncoding(state)) {
1033     return true;
1034   }
1035   state->parse_state = copy;
1036   return false;
1037 }
1038 
1039 // <call-offset> ::= h <nv-offset> _
1040 //               ::= v <v-offset> _
ParseCallOffset(State * state)1041 static bool ParseCallOffset(State *state) {
1042   ComplexityGuard guard(state);
1043   if (guard.IsTooComplex()) return false;
1044   ParseState copy = state->parse_state;
1045   if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) &&
1046       ParseOneCharToken(state, '_')) {
1047     return true;
1048   }
1049   state->parse_state = copy;
1050 
1051   if (ParseOneCharToken(state, 'v') && ParseVOffset(state) &&
1052       ParseOneCharToken(state, '_')) {
1053     return true;
1054   }
1055   state->parse_state = copy;
1056 
1057   return false;
1058 }
1059 
1060 // <nv-offset> ::= <(offset) number>
ParseNVOffset(State * state)1061 static bool ParseNVOffset(State *state) {
1062   ComplexityGuard guard(state);
1063   if (guard.IsTooComplex()) return false;
1064   return ParseNumber(state, nullptr);
1065 }
1066 
1067 // <v-offset>  ::= <(offset) number> _ <(virtual offset) number>
ParseVOffset(State * state)1068 static bool ParseVOffset(State *state) {
1069   ComplexityGuard guard(state);
1070   if (guard.IsTooComplex()) return false;
1071   ParseState copy = state->parse_state;
1072   if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
1073       ParseNumber(state, nullptr)) {
1074     return true;
1075   }
1076   state->parse_state = copy;
1077   return false;
1078 }
1079 
1080 // <ctor-dtor-name> ::= C1 | C2 | C3
1081 //                  ::= D0 | D1 | D2
1082 // # GCC extensions: "unified" constructor/destructor.  See
1083 // # https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847
1084 //                  ::= C4 | D4
ParseCtorDtorName(State * state)1085 static bool ParseCtorDtorName(State *state) {
1086   ComplexityGuard guard(state);
1087   if (guard.IsTooComplex()) return false;
1088   ParseState copy = state->parse_state;
1089   if (ParseOneCharToken(state, 'C') && ParseCharClass(state, "1234")) {
1090     const char *const prev_name = state->out + state->parse_state.prev_name_idx;
1091     MaybeAppendWithLength(state, prev_name,
1092                           state->parse_state.prev_name_length);
1093     return true;
1094   }
1095   state->parse_state = copy;
1096 
1097   if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) {
1098     const char *const prev_name = state->out + state->parse_state.prev_name_idx;
1099     MaybeAppend(state, "~");
1100     MaybeAppendWithLength(state, prev_name,
1101                           state->parse_state.prev_name_length);
1102     return true;
1103   }
1104   state->parse_state = copy;
1105   return false;
1106 }
1107 
1108 // <decltype> ::= Dt <expression> E  # decltype of an id-expression or class
1109 //                                   # member access (C++0x)
1110 //            ::= DT <expression> E  # decltype of an expression (C++0x)
ParseDecltype(State * state)1111 static bool ParseDecltype(State *state) {
1112   ComplexityGuard guard(state);
1113   if (guard.IsTooComplex()) return false;
1114 
1115   ParseState copy = state->parse_state;
1116   if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
1117       ParseExpression(state) && ParseOneCharToken(state, 'E')) {
1118     return true;
1119   }
1120   state->parse_state = copy;
1121 
1122   return false;
1123 }
1124 
1125 // <type> ::= <CV-qualifiers> <type>
1126 //        ::= P <type>   # pointer-to
1127 //        ::= R <type>   # reference-to
1128 //        ::= O <type>   # rvalue reference-to (C++0x)
1129 //        ::= C <type>   # complex pair (C 2000)
1130 //        ::= G <type>   # imaginary (C 2000)
1131 //        ::= U <source-name> <type>  # vendor extended type qualifier
1132 //        ::= <builtin-type>
1133 //        ::= <function-type>
1134 //        ::= <class-enum-type>  # note: just an alias for <name>
1135 //        ::= <array-type>
1136 //        ::= <pointer-to-member-type>
1137 //        ::= <template-template-param> <template-args>
1138 //        ::= <template-param>
1139 //        ::= <decltype>
1140 //        ::= <substitution>
1141 //        ::= Dp <type>          # pack expansion of (C++0x)
1142 //
ParseType(State * state)1143 static bool ParseType(State *state) {
1144   ComplexityGuard guard(state);
1145   if (guard.IsTooComplex()) return false;
1146   ParseState copy = state->parse_state;
1147 
1148   // We should check CV-qualifers, and PRGC things first.
1149   //
1150   // CV-qualifiers overlap with some operator names, but an operator name is not
1151   // valid as a type.  To avoid an ambiguity that can lead to exponential time
1152   // complexity, refuse to backtrack the CV-qualifiers.
1153   //
1154   // _Z4aoeuIrMvvE
1155   //  => _Z 4aoeuI        rM  v     v   E
1156   //         aoeu<operator%=, void, void>
1157   //  => _Z 4aoeuI r Mv v              E
1158   //         aoeu<void void::* restrict>
1159   //
1160   // By consuming the CV-qualifiers first, the former parse is disabled.
1161   if (ParseCVQualifiers(state)) {
1162     const bool result = ParseType(state);
1163     if (!result) state->parse_state = copy;
1164     return result;
1165   }
1166   state->parse_state = copy;
1167 
1168   // Similarly, these tag characters can overlap with other <name>s resulting in
1169   // two different parse prefixes that land on <template-args> in the same
1170   // place, such as "C3r1xI...".  So, disable the "ctor-name = C3" parse by
1171   // refusing to backtrack the tag characters.
1172   if (ParseCharClass(state, "OPRCG")) {
1173     const bool result = ParseType(state);
1174     if (!result) state->parse_state = copy;
1175     return result;
1176   }
1177   state->parse_state = copy;
1178 
1179   if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
1180     return true;
1181   }
1182   state->parse_state = copy;
1183 
1184   if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
1185       ParseType(state)) {
1186     return true;
1187   }
1188   state->parse_state = copy;
1189 
1190   if (ParseBuiltinType(state) || ParseFunctionType(state) ||
1191       ParseClassEnumType(state) || ParseArrayType(state) ||
1192       ParsePointerToMemberType(state) || ParseDecltype(state) ||
1193       // "std" on its own isn't a type.
1194       ParseSubstitution(state, /*accept_std=*/false)) {
1195     return true;
1196   }
1197 
1198   if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) {
1199     return true;
1200   }
1201   state->parse_state = copy;
1202 
1203   // Less greedy than <template-template-param> <template-args>.
1204   if (ParseTemplateParam(state)) {
1205     return true;
1206   }
1207 
1208   return false;
1209 }
1210 
1211 // <CV-qualifiers> ::= [r] [V] [K]
1212 // We don't allow empty <CV-qualifiers> to avoid infinite loop in
1213 // ParseType().
ParseCVQualifiers(State * state)1214 static bool ParseCVQualifiers(State *state) {
1215   ComplexityGuard guard(state);
1216   if (guard.IsTooComplex()) return false;
1217   int num_cv_qualifiers = 0;
1218   num_cv_qualifiers += ParseOneCharToken(state, 'r');
1219   num_cv_qualifiers += ParseOneCharToken(state, 'V');
1220   num_cv_qualifiers += ParseOneCharToken(state, 'K');
1221   return num_cv_qualifiers > 0;
1222 }
1223 
1224 // <builtin-type> ::= v, etc.  # single-character builtin types
1225 //                ::= u <source-name>
1226 //                ::= Dd, etc.  # two-character builtin types
1227 //
1228 // Not supported:
1229 //                ::= DF <number> _ # _FloatN (N bits)
1230 //
ParseBuiltinType(State * state)1231 static bool ParseBuiltinType(State *state) {
1232   ComplexityGuard guard(state);
1233   if (guard.IsTooComplex()) return false;
1234   const AbbrevPair *p;
1235   for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
1236     // Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
1237     if (p->abbrev[1] == '\0') {
1238       if (ParseOneCharToken(state, p->abbrev[0])) {
1239         MaybeAppend(state, p->real_name);
1240         return true;
1241       }
1242     } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
1243       MaybeAppend(state, p->real_name);
1244       return true;
1245     }
1246   }
1247 
1248   ParseState copy = state->parse_state;
1249   if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
1250     return true;
1251   }
1252   state->parse_state = copy;
1253   return false;
1254 }
1255 
1256 // <function-type> ::= F [Y] <bare-function-type> E
ParseFunctionType(State * state)1257 static bool ParseFunctionType(State *state) {
1258   ComplexityGuard guard(state);
1259   if (guard.IsTooComplex()) return false;
1260   ParseState copy = state->parse_state;
1261   if (ParseOneCharToken(state, 'F') &&
1262       Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
1263       ParseOneCharToken(state, 'E')) {
1264     return true;
1265   }
1266   state->parse_state = copy;
1267   return false;
1268 }
1269 
1270 // <bare-function-type> ::= <(signature) type>+
ParseBareFunctionType(State * state)1271 static bool ParseBareFunctionType(State *state) {
1272   ComplexityGuard guard(state);
1273   if (guard.IsTooComplex()) return false;
1274   ParseState copy = state->parse_state;
1275   DisableAppend(state);
1276   if (OneOrMore(ParseType, state)) {
1277     RestoreAppend(state, copy.append);
1278     MaybeAppend(state, "()");
1279     return true;
1280   }
1281   state->parse_state = copy;
1282   return false;
1283 }
1284 
1285 // <class-enum-type> ::= <name>
ParseClassEnumType(State * state)1286 static bool ParseClassEnumType(State *state) {
1287   ComplexityGuard guard(state);
1288   if (guard.IsTooComplex()) return false;
1289   return ParseName(state);
1290 }
1291 
1292 // <array-type> ::= A <(positive dimension) number> _ <(element) type>
1293 //              ::= A [<(dimension) expression>] _ <(element) type>
ParseArrayType(State * state)1294 static bool ParseArrayType(State *state) {
1295   ComplexityGuard guard(state);
1296   if (guard.IsTooComplex()) return false;
1297   ParseState copy = state->parse_state;
1298   if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) &&
1299       ParseOneCharToken(state, '_') && ParseType(state)) {
1300     return true;
1301   }
1302   state->parse_state = copy;
1303 
1304   if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
1305       ParseOneCharToken(state, '_') && ParseType(state)) {
1306     return true;
1307   }
1308   state->parse_state = copy;
1309   return false;
1310 }
1311 
1312 // <pointer-to-member-type> ::= M <(class) type> <(member) type>
ParsePointerToMemberType(State * state)1313 static bool ParsePointerToMemberType(State *state) {
1314   ComplexityGuard guard(state);
1315   if (guard.IsTooComplex()) return false;
1316   ParseState copy = state->parse_state;
1317   if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) {
1318     return true;
1319   }
1320   state->parse_state = copy;
1321   return false;
1322 }
1323 
1324 // <template-param> ::= T_
1325 //                  ::= T <parameter-2 non-negative number> _
ParseTemplateParam(State * state)1326 static bool ParseTemplateParam(State *state) {
1327   ComplexityGuard guard(state);
1328   if (guard.IsTooComplex()) return false;
1329   if (ParseTwoCharToken(state, "T_")) {
1330     MaybeAppend(state, "?");  // We don't support template substitutions.
1331     return true;
1332   }
1333 
1334   ParseState copy = state->parse_state;
1335   if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
1336       ParseOneCharToken(state, '_')) {
1337     MaybeAppend(state, "?");  // We don't support template substitutions.
1338     return true;
1339   }
1340   state->parse_state = copy;
1341   return false;
1342 }
1343 
1344 // <template-template-param> ::= <template-param>
1345 //                           ::= <substitution>
ParseTemplateTemplateParam(State * state)1346 static bool ParseTemplateTemplateParam(State *state) {
1347   ComplexityGuard guard(state);
1348   if (guard.IsTooComplex()) return false;
1349   return (ParseTemplateParam(state) ||
1350           // "std" on its own isn't a template.
1351           ParseSubstitution(state, /*accept_std=*/false));
1352 }
1353 
1354 // <template-args> ::= I <template-arg>+ E
ParseTemplateArgs(State * state)1355 static bool ParseTemplateArgs(State *state) {
1356   ComplexityGuard guard(state);
1357   if (guard.IsTooComplex()) return false;
1358   ParseState copy = state->parse_state;
1359   DisableAppend(state);
1360   if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
1361       ParseOneCharToken(state, 'E')) {
1362     RestoreAppend(state, copy.append);
1363     MaybeAppend(state, "<>");
1364     return true;
1365   }
1366   state->parse_state = copy;
1367   return false;
1368 }
1369 
1370 // <template-arg>  ::= <type>
1371 //                 ::= <expr-primary>
1372 //                 ::= J <template-arg>* E        # argument pack
1373 //                 ::= X <expression> E
ParseTemplateArg(State * state)1374 static bool ParseTemplateArg(State *state) {
1375   ComplexityGuard guard(state);
1376   if (guard.IsTooComplex()) return false;
1377   ParseState copy = state->parse_state;
1378   if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) &&
1379       ParseOneCharToken(state, 'E')) {
1380     return true;
1381   }
1382   state->parse_state = copy;
1383 
1384   // There can be significant overlap between the following leading to
1385   // exponential backtracking:
1386   //
1387   //   <expr-primary> ::= L <type> <expr-cast-value> E
1388   //                 e.g. L 2xxIvE 1                 E
1389   //   <type>         ==> <local-source-name> <template-args>
1390   //                 e.g. L 2xx               IvE
1391   //
1392   // This means parsing an entire <type> twice, and <type> can contain
1393   // <template-arg>, so this can generate exponential backtracking.  There is
1394   // only overlap when the remaining input starts with "L <source-name>", so
1395   // parse all cases that can start this way jointly to share the common prefix.
1396   //
1397   // We have:
1398   //
1399   //   <template-arg> ::= <type>
1400   //                  ::= <expr-primary>
1401   //
1402   // First, drop all the productions of <type> that must start with something
1403   // other than 'L'.  All that's left is <class-enum-type>; inline it.
1404   //
1405   //   <type> ::= <nested-name> # starts with 'N'
1406   //          ::= <unscoped-name>
1407   //          ::= <unscoped-template-name> <template-args>
1408   //          ::= <local-name> # starts with 'Z'
1409   //
1410   // Drop and inline again:
1411   //
1412   //   <type> ::= <unscoped-name>
1413   //          ::= <unscoped-name> <template-args>
1414   //          ::= <substitution> <template-args> # starts with 'S'
1415   //
1416   // Merge the first two, inline <unscoped-name>, drop last:
1417   //
1418   //   <type> ::= <unqualified-name> [<template-args>]
1419   //          ::= St <unqualified-name> [<template-args>] # starts with 'S'
1420   //
1421   // Drop and inline:
1422   //
1423   //   <type> ::= <operator-name> [<template-args>] # starts with lowercase
1424   //          ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D'
1425   //          ::= <source-name> [<template-args>] # starts with digit
1426   //          ::= <local-source-name> [<template-args>]
1427   //          ::= <unnamed-type-name> [<template-args>] # starts with 'U'
1428   //
1429   // One more time:
1430   //
1431   //   <type> ::= L <source-name> [<template-args>]
1432   //
1433   // Likewise with <expr-primary>:
1434   //
1435   //   <expr-primary> ::= L <type> <expr-cast-value> E
1436   //                  ::= LZ <encoding> E # cannot overlap; drop
1437   //                  ::= L <mangled_name> E # cannot overlap; drop
1438   //
1439   // By similar reasoning as shown above, the only <type>s starting with
1440   // <source-name> are "<source-name> [<template-args>]".  Inline this.
1441   //
1442   //   <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E
1443   //
1444   // Now inline both of these into <template-arg>:
1445   //
1446   //   <template-arg> ::= L <source-name> [<template-args>]
1447   //                  ::= L <source-name> [<template-args>] <expr-cast-value> E
1448   //
1449   // Merge them and we're done:
1450   //   <template-arg>
1451   //     ::= L <source-name> [<template-args>] [<expr-cast-value> E]
1452   if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
1453     copy = state->parse_state;
1454     if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
1455       return true;
1456     }
1457     state->parse_state = copy;
1458     return true;
1459   }
1460 
1461   // Now that the overlapping cases can't reach this code, we can safely call
1462   // both of these.
1463   if (ParseType(state) || ParseExprPrimary(state)) {
1464     return true;
1465   }
1466   state->parse_state = copy;
1467 
1468   if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
1469       ParseOneCharToken(state, 'E')) {
1470     return true;
1471   }
1472   state->parse_state = copy;
1473   return false;
1474 }
1475 
1476 // <unresolved-type> ::= <template-param> [<template-args>]
1477 //                   ::= <decltype>
1478 //                   ::= <substitution>
ParseUnresolvedType(State * state)1479 static inline bool ParseUnresolvedType(State *state) {
1480   // No ComplexityGuard because we don't copy the state in this stack frame.
1481   return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) ||
1482          ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false);
1483 }
1484 
1485 // <simple-id> ::= <source-name> [<template-args>]
ParseSimpleId(State * state)1486 static inline bool ParseSimpleId(State *state) {
1487   // No ComplexityGuard because we don't copy the state in this stack frame.
1488 
1489   // Note: <simple-id> cannot be followed by a parameter pack; see comment in
1490   // ParseUnresolvedType.
1491   return ParseSourceName(state) && Optional(ParseTemplateArgs(state));
1492 }
1493 
1494 // <base-unresolved-name> ::= <source-name> [<template-args>]
1495 //                        ::= on <operator-name> [<template-args>]
1496 //                        ::= dn <destructor-name>
ParseBaseUnresolvedName(State * state)1497 static bool ParseBaseUnresolvedName(State *state) {
1498   ComplexityGuard guard(state);
1499   if (guard.IsTooComplex()) return false;
1500 
1501   if (ParseSimpleId(state)) {
1502     return true;
1503   }
1504 
1505   ParseState copy = state->parse_state;
1506   if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) &&
1507       Optional(ParseTemplateArgs(state))) {
1508     return true;
1509   }
1510   state->parse_state = copy;
1511 
1512   if (ParseTwoCharToken(state, "dn") &&
1513       (ParseUnresolvedType(state) || ParseSimpleId(state))) {
1514     return true;
1515   }
1516   state->parse_state = copy;
1517 
1518   return false;
1519 }
1520 
1521 // <unresolved-name> ::= [gs] <base-unresolved-name>
1522 //                   ::= sr <unresolved-type> <base-unresolved-name>
1523 //                   ::= srN <unresolved-type> <unresolved-qualifier-level>+ E
1524 //                         <base-unresolved-name>
1525 //                   ::= [gs] sr <unresolved-qualifier-level>+ E
1526 //                         <base-unresolved-name>
ParseUnresolvedName(State * state)1527 static bool ParseUnresolvedName(State *state) {
1528   ComplexityGuard guard(state);
1529   if (guard.IsTooComplex()) return false;
1530 
1531   ParseState copy = state->parse_state;
1532   if (Optional(ParseTwoCharToken(state, "gs")) &&
1533       ParseBaseUnresolvedName(state)) {
1534     return true;
1535   }
1536   state->parse_state = copy;
1537 
1538   if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) &&
1539       ParseBaseUnresolvedName(state)) {
1540     return true;
1541   }
1542   state->parse_state = copy;
1543 
1544   if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
1545       ParseUnresolvedType(state) &&
1546       OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
1547       ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
1548     return true;
1549   }
1550   state->parse_state = copy;
1551 
1552   if (Optional(ParseTwoCharToken(state, "gs")) &&
1553       ParseTwoCharToken(state, "sr") &&
1554       OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
1555       ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
1556     return true;
1557   }
1558   state->parse_state = copy;
1559 
1560   return false;
1561 }
1562 
1563 // <expression> ::= <1-ary operator-name> <expression>
1564 //              ::= <2-ary operator-name> <expression> <expression>
1565 //              ::= <3-ary operator-name> <expression> <expression> <expression>
1566 //              ::= cl <expression>+ E
1567 //              ::= cv <type> <expression>      # type (expression)
1568 //              ::= cv <type> _ <expression>* E # type (expr-list)
1569 //              ::= st <type>
1570 //              ::= <template-param>
1571 //              ::= <function-param>
1572 //              ::= <expr-primary>
1573 //              ::= dt <expression> <unresolved-name> # expr.name
1574 //              ::= pt <expression> <unresolved-name> # expr->name
1575 //              ::= sp <expression>         # argument pack expansion
1576 //              ::= sr <type> <unqualified-name> <template-args>
1577 //              ::= sr <type> <unqualified-name>
1578 // <function-param> ::= fp <(top-level) CV-qualifiers> _
1579 //                  ::= fp <(top-level) CV-qualifiers> <number> _
1580 //                  ::= fL <number> p <(top-level) CV-qualifiers> _
1581 //                  ::= fL <number> p <(top-level) CV-qualifiers> <number> _
ParseExpression(State * state)1582 static bool ParseExpression(State *state) {
1583   ComplexityGuard guard(state);
1584   if (guard.IsTooComplex()) return false;
1585   if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
1586     return true;
1587   }
1588 
1589   // Object/function call expression.
1590   ParseState copy = state->parse_state;
1591   if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) &&
1592       ParseOneCharToken(state, 'E')) {
1593     return true;
1594   }
1595   state->parse_state = copy;
1596 
1597   // Function-param expression (level 0).
1598   if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
1599       Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
1600     return true;
1601   }
1602   state->parse_state = copy;
1603 
1604   // Function-param expression (level 1+).
1605   if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
1606       ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
1607       Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
1608     return true;
1609   }
1610   state->parse_state = copy;
1611 
1612   // Parse the conversion expressions jointly to avoid re-parsing the <type> in
1613   // their common prefix.  Parsed as:
1614   // <expression> ::= cv <type> <conversion-args>
1615   // <conversion-args> ::= _ <expression>* E
1616   //                   ::= <expression>
1617   //
1618   // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName
1619   // also needs to accept "cv <type>" in other contexts.
1620   if (ParseTwoCharToken(state, "cv")) {
1621     if (ParseType(state)) {
1622       ParseState copy2 = state->parse_state;
1623       if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) &&
1624           ParseOneCharToken(state, 'E')) {
1625         return true;
1626       }
1627       state->parse_state = copy2;
1628       if (ParseExpression(state)) {
1629         return true;
1630       }
1631     }
1632   } else {
1633     // Parse unary, binary, and ternary operator expressions jointly, taking
1634     // care not to re-parse subexpressions repeatedly. Parse like:
1635     //   <expression> ::= <operator-name> <expression>
1636     //                    [<one-to-two-expressions>]
1637     //   <one-to-two-expressions> ::= <expression> [<expression>]
1638     int arity = -1;
1639     if (ParseOperatorName(state, &arity) &&
1640         arity > 0 &&  // 0 arity => disabled.
1641         (arity < 3 || ParseExpression(state)) &&
1642         (arity < 2 || ParseExpression(state)) &&
1643         (arity < 1 || ParseExpression(state))) {
1644       return true;
1645     }
1646   }
1647   state->parse_state = copy;
1648 
1649   // sizeof type
1650   if (ParseTwoCharToken(state, "st") && ParseType(state)) {
1651     return true;
1652   }
1653   state->parse_state = copy;
1654 
1655   // Object and pointer member access expressions.
1656   if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
1657       ParseExpression(state) && ParseType(state)) {
1658     return true;
1659   }
1660   state->parse_state = copy;
1661 
1662   // Pointer-to-member access expressions.  This parses the same as a binary
1663   // operator, but it's implemented separately because "ds" shouldn't be
1664   // accepted in other contexts that parse an operator name.
1665   if (ParseTwoCharToken(state, "ds") && ParseExpression(state) &&
1666       ParseExpression(state)) {
1667     return true;
1668   }
1669   state->parse_state = copy;
1670 
1671   // Parameter pack expansion
1672   if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) {
1673     return true;
1674   }
1675   state->parse_state = copy;
1676 
1677   return ParseUnresolvedName(state);
1678 }
1679 
1680 // <expr-primary> ::= L <type> <(value) number> E
1681 //                ::= L <type> <(value) float> E
1682 //                ::= L <mangled-name> E
1683 //                // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
1684 //                ::= LZ <encoding> E
1685 //
1686 // Warning, subtle: the "bug" LZ production above is ambiguous with the first
1687 // production where <type> starts with <local-name>, which can lead to
1688 // exponential backtracking in two scenarios:
1689 //
1690 // - When whatever follows the E in the <local-name> in the first production is
1691 //   not a name, we backtrack the whole <encoding> and re-parse the whole thing.
1692 //
1693 // - When whatever follows the <local-name> in the first production is not a
1694 //   number and this <expr-primary> may be followed by a name, we backtrack the
1695 //   <name> and re-parse it.
1696 //
1697 // Moreover this ambiguity isn't always resolved -- for example, the following
1698 // has two different parses:
1699 //
1700 //   _ZaaILZ4aoeuE1x1EvE
1701 //   => operator&&<aoeu, x, E, void>
1702 //   => operator&&<(aoeu::x)(1), void>
1703 //
1704 // To resolve this, we just do what GCC's demangler does, and refuse to parse
1705 // casts to <local-name> types.
ParseExprPrimary(State * state)1706 static bool ParseExprPrimary(State *state) {
1707   ComplexityGuard guard(state);
1708   if (guard.IsTooComplex()) return false;
1709   ParseState copy = state->parse_state;
1710 
1711   // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E"
1712   // or fail, no backtracking.
1713   if (ParseTwoCharToken(state, "LZ")) {
1714     if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) {
1715       return true;
1716     }
1717 
1718     state->parse_state = copy;
1719     return false;
1720   }
1721 
1722   // The merged cast production.
1723   if (ParseOneCharToken(state, 'L') && ParseType(state) &&
1724       ParseExprCastValue(state)) {
1725     return true;
1726   }
1727   state->parse_state = copy;
1728 
1729   if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
1730       ParseOneCharToken(state, 'E')) {
1731     return true;
1732   }
1733   state->parse_state = copy;
1734 
1735   return false;
1736 }
1737 
1738 // <number> or <float>, followed by 'E', as described above ParseExprPrimary.
ParseExprCastValue(State * state)1739 static bool ParseExprCastValue(State *state) {
1740   ComplexityGuard guard(state);
1741   if (guard.IsTooComplex()) return false;
1742   // We have to be able to backtrack after accepting a number because we could
1743   // have e.g. "7fffE", which will accept "7" as a number but then fail to find
1744   // the 'E'.
1745   ParseState copy = state->parse_state;
1746   if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) {
1747     return true;
1748   }
1749   state->parse_state = copy;
1750 
1751   if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
1752     return true;
1753   }
1754   state->parse_state = copy;
1755 
1756   return false;
1757 }
1758 
1759 // <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
1760 //              ::= Z <(function) encoding> E s [<discriminator>]
1761 //
1762 // Parsing a common prefix of these two productions together avoids an
1763 // exponential blowup of backtracking.  Parse like:
1764 //   <local-name> := Z <encoding> E <local-name-suffix>
1765 //   <local-name-suffix> ::= s [<discriminator>]
1766 //                       ::= <name> [<discriminator>]
1767 
ParseLocalNameSuffix(State * state)1768 static bool ParseLocalNameSuffix(State *state) {
1769   ComplexityGuard guard(state);
1770   if (guard.IsTooComplex()) return false;
1771 
1772   if (MaybeAppend(state, "::") && ParseName(state) &&
1773       Optional(ParseDiscriminator(state))) {
1774     return true;
1775   }
1776 
1777   // Since we're not going to overwrite the above "::" by re-parsing the
1778   // <encoding> (whose trailing '\0' byte was in the byte now holding the
1779   // first ':'), we have to rollback the "::" if the <name> parse failed.
1780   if (state->parse_state.append) {
1781     state->out[state->parse_state.out_cur_idx - 2] = '\0';
1782   }
1783 
1784   return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
1785 }
1786 
ParseLocalName(State * state)1787 static bool ParseLocalName(State *state) {
1788   ComplexityGuard guard(state);
1789   if (guard.IsTooComplex()) return false;
1790   ParseState copy = state->parse_state;
1791   if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
1792       ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) {
1793     return true;
1794   }
1795   state->parse_state = copy;
1796   return false;
1797 }
1798 
1799 // <discriminator> := _ <(non-negative) number>
ParseDiscriminator(State * state)1800 static bool ParseDiscriminator(State *state) {
1801   ComplexityGuard guard(state);
1802   if (guard.IsTooComplex()) return false;
1803   ParseState copy = state->parse_state;
1804   if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
1805     return true;
1806   }
1807   state->parse_state = copy;
1808   return false;
1809 }
1810 
1811 // <substitution> ::= S_
1812 //                ::= S <seq-id> _
1813 //                ::= St, etc.
1814 //
1815 // "St" is special in that it's not valid as a standalone name, and it *is*
1816 // allowed to precede a name without being wrapped in "N...E".  This means that
1817 // if we accept it on its own, we can accept "St1a" and try to parse
1818 // template-args, then fail and backtrack, accept "St" on its own, then "1a" as
1819 // an unqualified name and re-parse the same template-args.  To block this
1820 // exponential backtracking, we disable it with 'accept_std=false' in
1821 // problematic contexts.
ParseSubstitution(State * state,bool accept_std)1822 static bool ParseSubstitution(State *state, bool accept_std) {
1823   ComplexityGuard guard(state);
1824   if (guard.IsTooComplex()) return false;
1825   if (ParseTwoCharToken(state, "S_")) {
1826     MaybeAppend(state, "?");  // We don't support substitutions.
1827     return true;
1828   }
1829 
1830   ParseState copy = state->parse_state;
1831   if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
1832       ParseOneCharToken(state, '_')) {
1833     MaybeAppend(state, "?");  // We don't support substitutions.
1834     return true;
1835   }
1836   state->parse_state = copy;
1837 
1838   // Expand abbreviations like "St" => "std".
1839   if (ParseOneCharToken(state, 'S')) {
1840     const AbbrevPair *p;
1841     for (p = kSubstitutionList; p->abbrev != nullptr; ++p) {
1842       if (RemainingInput(state)[0] == p->abbrev[1] &&
1843           (accept_std || p->abbrev[1] != 't')) {
1844         MaybeAppend(state, "std");
1845         if (p->real_name[0] != '\0') {
1846           MaybeAppend(state, "::");
1847           MaybeAppend(state, p->real_name);
1848         }
1849         ++state->parse_state.mangled_idx;
1850         return true;
1851       }
1852     }
1853   }
1854   state->parse_state = copy;
1855   return false;
1856 }
1857 
1858 // Parse <mangled-name>, optionally followed by either a function-clone suffix
1859 // or version suffix.  Returns true only if all of "mangled_cur" was consumed.
ParseTopLevelMangledName(State * state)1860 static bool ParseTopLevelMangledName(State *state) {
1861   ComplexityGuard guard(state);
1862   if (guard.IsTooComplex()) return false;
1863   if (ParseMangledName(state)) {
1864     if (RemainingInput(state)[0] != '\0') {
1865       // Drop trailing function clone suffix, if any.
1866       if (IsFunctionCloneSuffix(RemainingInput(state))) {
1867         return true;
1868       }
1869       // Append trailing version suffix if any.
1870       // ex. _Z3foo@@GLIBCXX_3.4
1871       if (RemainingInput(state)[0] == '@') {
1872         MaybeAppend(state, RemainingInput(state));
1873         return true;
1874       }
1875       return false;  // Unconsumed suffix.
1876     }
1877     return true;
1878   }
1879   return false;
1880 }
1881 
Overflowed(const State * state)1882 static bool Overflowed(const State *state) {
1883   return state->parse_state.out_cur_idx >= state->out_end_idx;
1884 }
1885 
1886 // The demangler entry point.
Demangle(const char * mangled,char * out,int out_size)1887 bool Demangle(const char *mangled, char *out, int out_size) {
1888   State state;
1889   InitState(&state, mangled, out, out_size);
1890   return ParseTopLevelMangledName(&state) && !Overflowed(&state);
1891 }
1892 
1893 }  // namespace debugging_internal
1894 ABSL_NAMESPACE_END
1895 }  // namespace absl
1896