• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algorithm>
18 #include <list>
19 #include <string>
20 
21 #include <math.h>
22 
23 #include "flatbuffers/idl.h"
24 #include "flatbuffers/util.h"
25 
26 namespace flatbuffers {
27 
28 const double kPi = 3.14159265358979323846;
29 
30 const char *const kTypeNames[] = {
31 // clang-format off
32   #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
33     CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
34     IDLTYPE,
35     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
36   #undef FLATBUFFERS_TD
37   // clang-format on
38   nullptr
39 };
40 
41 const char kTypeSizes[] = {
42 // clang-format off
43   #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
44       CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
45       sizeof(CTYPE),
46     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
47   #undef FLATBUFFERS_TD
48   // clang-format on
49 };
50 
51 // The enums in the reflection schema should match the ones we use internally.
52 // Compare the last element to check if these go out of sync.
53 static_assert(BASE_TYPE_UNION == static_cast<BaseType>(reflection::Union),
54               "enums don't match");
55 
56 // Any parsing calls have to be wrapped in this macro, which automates
57 // handling of recursive error checking a bit. It will check the received
58 // CheckedError object, and return straight away on error.
59 #define ECHECK(call)           \
60   {                            \
61     auto ce = (call);          \
62     if (ce.Check()) return ce; \
63   }
64 
65 // These two functions are called hundreds of times below, so define a short
66 // form:
67 #define NEXT() ECHECK(Next())
68 #define EXPECT(tok) ECHECK(Expect(tok))
69 
ValidateUTF8(const std::string & str)70 static bool ValidateUTF8(const std::string &str) {
71   const char *s = &str[0];
72   const char *const sEnd = s + str.length();
73   while (s < sEnd) {
74     if (FromUTF8(&s) < 0) { return false; }
75   }
76   return true;
77 }
78 
79 // Convert an underscore_based_indentifier in to camelCase.
80 // Also uppercases the first character if first is true.
MakeCamel(const std::string & in,bool first)81 std::string MakeCamel(const std::string &in, bool first) {
82   std::string s;
83   for (size_t i = 0; i < in.length(); i++) {
84     if (!i && first)
85       s += static_cast<char>(toupper(in[0]));
86     else if (in[i] == '_' && i + 1 < in.length())
87       s += static_cast<char>(toupper(in[++i]));
88     else
89       s += in[i];
90   }
91   return s;
92 }
93 
DeserializeDoc(std::vector<std::string> & doc,const Vector<Offset<String>> * documentation)94 void DeserializeDoc( std::vector<std::string> &doc,
95                      const Vector<Offset<String>> *documentation) {
96   if (documentation == nullptr) return;
97   for (uoffset_t index = 0; index < documentation->size(); index++)
98     doc.push_back(documentation->Get(index)->str());
99 }
100 
Message(const std::string & msg)101 void Parser::Message(const std::string &msg) {
102   if (!error_.empty()) error_ += "\n";  // log all warnings and errors
103   error_ += file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
104   // clang-format off
105 
106   #ifdef _WIN32  // MSVC alike
107     error_ +=
108         "(" + NumToString(line_) + ", " + NumToString(CursorPosition()) + ")";
109   #else  // gcc alike
110     if (file_being_parsed_.length()) error_ += ":";
111     error_ += NumToString(line_) + ": " + NumToString(CursorPosition());
112   #endif
113   // clang-format on
114   error_ += ": " + msg;
115 }
116 
Warning(const std::string & msg)117 void Parser::Warning(const std::string &msg) { Message("warning: " + msg); }
118 
Error(const std::string & msg)119 CheckedError Parser::Error(const std::string &msg) {
120   Message("error: " + msg);
121   return CheckedError(true);
122 }
123 
NoError()124 inline CheckedError NoError() { return CheckedError(false); }
125 
RecurseError()126 CheckedError Parser::RecurseError() {
127   return Error("maximum parsing recursion of " +
128                NumToString(FLATBUFFERS_MAX_PARSING_DEPTH) + " reached");
129 }
130 
Recurse(F f)131 template<typename F> CheckedError Parser::Recurse(F f) {
132   if (recurse_protection_counter >= (FLATBUFFERS_MAX_PARSING_DEPTH))
133     return RecurseError();
134   recurse_protection_counter++;
135   auto ce = f();
136   recurse_protection_counter--;
137   return ce;
138 }
139 
TypeToIntervalString()140 template<typename T> std::string TypeToIntervalString() {
141   return "[" + NumToString((flatbuffers::numeric_limits<T>::lowest)()) + "; " +
142          NumToString((flatbuffers::numeric_limits<T>::max)()) + "]";
143 }
144 
145 // atot: template version of atoi/atof: convert a string to an instance of T.
146 template<typename T>
atot(const char * s,Parser & parser,T * val)147 inline CheckedError atot(const char *s, Parser &parser, T *val) {
148   auto done = StringToNumber(s, val);
149   if (done) return NoError();
150   if (0 == *val)
151     return parser.Error("invalid number: \"" + std::string(s) + "\"");
152   else
153     return parser.Error("invalid number: \"" + std::string(s) + "\"" +
154                         ", constant does not fit " + TypeToIntervalString<T>());
155 }
156 template<>
atot(const char * s,Parser & parser,Offset<void> * val)157 inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
158                                        Offset<void> *val) {
159   (void)parser;
160   *val = Offset<void>(atoi(s));
161   return NoError();
162 }
163 
GetFullyQualifiedName(const std::string & name,size_t max_components) const164 std::string Namespace::GetFullyQualifiedName(const std::string &name,
165                                              size_t max_components) const {
166   // Early exit if we don't have a defined namespace.
167   if (components.empty() || !max_components) { return name; }
168   std::string stream_str;
169   for (size_t i = 0; i < std::min(components.size(), max_components); i++) {
170     if (i) { stream_str += '.'; }
171     stream_str += std::string(components[i]);
172   }
173   if (name.length()) {
174     stream_str += '.';
175     stream_str += name;
176   }
177   return stream_str;
178 }
179 
180 // Declare tokens we'll use. Single character tokens are represented by their
181 // ascii character code (e.g. '{'), others above 256.
182 // clang-format off
183 #define FLATBUFFERS_GEN_TOKENS(TD) \
184   TD(Eof, 256, "end of file") \
185   TD(StringConstant, 257, "string constant") \
186   TD(IntegerConstant, 258, "integer constant") \
187   TD(FloatConstant, 259, "float constant") \
188   TD(Identifier, 260, "identifier")
189 #ifdef __GNUC__
190 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
191 #endif
192 enum {
193   #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
194     FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
195   #undef FLATBUFFERS_TOKEN
196 };
197 
TokenToString(int t)198 static std::string TokenToString(int t) {
199   static const char * const tokens[] = {
200     #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
201       FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
202     #undef FLATBUFFERS_TOKEN
203     #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
204       CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
205       IDLTYPE,
206       FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
207     #undef FLATBUFFERS_TD
208   };
209   if (t < 256) {  // A single ascii char token.
210     std::string s;
211     s.append(1, static_cast<char>(t));
212     return s;
213   } else {       // Other tokens.
214     return tokens[t - 256];
215   }
216 }
217 // clang-format on
218 
TokenToStringId(int t) const219 std::string Parser::TokenToStringId(int t) const {
220   return t == kTokenIdentifier ? attribute_ : TokenToString(t);
221 }
222 
223 // Parses exactly nibbles worth of hex digits into a number, or error.
ParseHexNum(int nibbles,uint64_t * val)224 CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
225   FLATBUFFERS_ASSERT(nibbles > 0);
226   for (int i = 0; i < nibbles; i++)
227     if (!is_xdigit(cursor_[i]))
228       return Error("escape code must be followed by " + NumToString(nibbles) +
229                    " hex digits");
230   std::string target(cursor_, cursor_ + nibbles);
231   *val = StringToUInt(target.c_str(), 16);
232   cursor_ += nibbles;
233   return NoError();
234 }
235 
SkipByteOrderMark()236 CheckedError Parser::SkipByteOrderMark() {
237   if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
238   cursor_++;
239   if (static_cast<unsigned char>(*cursor_) != 0xbb)
240     return Error("invalid utf-8 byte order mark");
241   cursor_++;
242   if (static_cast<unsigned char>(*cursor_) != 0xbf)
243     return Error("invalid utf-8 byte order mark");
244   cursor_++;
245   return NoError();
246 }
247 
IsIdentifierStart(char c)248 static inline bool IsIdentifierStart(char c) {
249   return is_alpha(c) || (c == '_');
250 }
251 
Next()252 CheckedError Parser::Next() {
253   doc_comment_.clear();
254   bool seen_newline = cursor_ == source_;
255   attribute_.clear();
256   attr_is_trivial_ascii_string_ = true;
257   for (;;) {
258     char c = *cursor_++;
259     token_ = c;
260     switch (c) {
261       case '\0':
262         cursor_--;
263         token_ = kTokenEof;
264         return NoError();
265       case ' ':
266       case '\r':
267       case '\t': break;
268       case '\n':
269         MarkNewLine();
270         seen_newline = true;
271         break;
272       case '{':
273       case '}':
274       case '(':
275       case ')':
276       case '[':
277       case ']':
278       case ',':
279       case ':':
280       case ';':
281       case '=': return NoError();
282       case '\"':
283       case '\'': {
284         int unicode_high_surrogate = -1;
285 
286         while (*cursor_ != c) {
287           if (*cursor_ < ' ' && static_cast<signed char>(*cursor_) >= 0)
288             return Error("illegal character in string constant");
289           if (*cursor_ == '\\') {
290             attr_is_trivial_ascii_string_ = false;  // has escape sequence
291             cursor_++;
292             if (unicode_high_surrogate != -1 && *cursor_ != 'u') {
293               return Error(
294                   "illegal Unicode sequence (unpaired high surrogate)");
295             }
296             switch (*cursor_) {
297               case 'n':
298                 attribute_ += '\n';
299                 cursor_++;
300                 break;
301               case 't':
302                 attribute_ += '\t';
303                 cursor_++;
304                 break;
305               case 'r':
306                 attribute_ += '\r';
307                 cursor_++;
308                 break;
309               case 'b':
310                 attribute_ += '\b';
311                 cursor_++;
312                 break;
313               case 'f':
314                 attribute_ += '\f';
315                 cursor_++;
316                 break;
317               case '\"':
318                 attribute_ += '\"';
319                 cursor_++;
320                 break;
321               case '\'':
322                 attribute_ += '\'';
323                 cursor_++;
324                 break;
325               case '\\':
326                 attribute_ += '\\';
327                 cursor_++;
328                 break;
329               case '/':
330                 attribute_ += '/';
331                 cursor_++;
332                 break;
333               case 'x': {  // Not in the JSON standard
334                 cursor_++;
335                 uint64_t val;
336                 ECHECK(ParseHexNum(2, &val));
337                 attribute_ += static_cast<char>(val);
338                 break;
339               }
340               case 'u': {
341                 cursor_++;
342                 uint64_t val;
343                 ECHECK(ParseHexNum(4, &val));
344                 if (val >= 0xD800 && val <= 0xDBFF) {
345                   if (unicode_high_surrogate != -1) {
346                     return Error(
347                         "illegal Unicode sequence (multiple high surrogates)");
348                   } else {
349                     unicode_high_surrogate = static_cast<int>(val);
350                   }
351                 } else if (val >= 0xDC00 && val <= 0xDFFF) {
352                   if (unicode_high_surrogate == -1) {
353                     return Error(
354                         "illegal Unicode sequence (unpaired low surrogate)");
355                   } else {
356                     int code_point = 0x10000 +
357                                      ((unicode_high_surrogate & 0x03FF) << 10) +
358                                      (val & 0x03FF);
359                     ToUTF8(code_point, &attribute_);
360                     unicode_high_surrogate = -1;
361                   }
362                 } else {
363                   if (unicode_high_surrogate != -1) {
364                     return Error(
365                         "illegal Unicode sequence (unpaired high surrogate)");
366                   }
367                   ToUTF8(static_cast<int>(val), &attribute_);
368                 }
369                 break;
370               }
371               default: return Error("unknown escape code in string constant");
372             }
373           } else {  // printable chars + UTF-8 bytes
374             if (unicode_high_surrogate != -1) {
375               return Error(
376                   "illegal Unicode sequence (unpaired high surrogate)");
377             }
378             // reset if non-printable
379             attr_is_trivial_ascii_string_ &= check_ascii_range(*cursor_, ' ', '~');
380 
381             attribute_ += *cursor_++;
382           }
383         }
384         if (unicode_high_surrogate != -1) {
385           return Error("illegal Unicode sequence (unpaired high surrogate)");
386         }
387         cursor_++;
388         if (!attr_is_trivial_ascii_string_ && !opts.allow_non_utf8 &&
389             !ValidateUTF8(attribute_)) {
390           return Error("illegal UTF-8 sequence");
391         }
392         token_ = kTokenStringConstant;
393         return NoError();
394       }
395       case '/':
396         if (*cursor_ == '/') {
397           const char *start = ++cursor_;
398           while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
399           if (*start == '/') {  // documentation comment
400             if (!seen_newline)
401               return Error(
402                   "a documentation comment should be on a line on its own");
403             doc_comment_.push_back(std::string(start + 1, cursor_));
404           }
405           break;
406         } else if (*cursor_ == '*') {
407           cursor_++;
408           // TODO: make nested.
409           while (*cursor_ != '*' || cursor_[1] != '/') {
410             if (*cursor_ == '\n') MarkNewLine();
411             if (!*cursor_) return Error("end of file in comment");
412             cursor_++;
413           }
414           cursor_ += 2;
415           break;
416         }
417         FLATBUFFERS_FALLTHROUGH(); // else fall thru
418       default:
419         const auto has_sign = (c == '+') || (c == '-');
420         // '-'/'+' and following identifier - can be a predefined constant like:
421         // NAN, INF, PI, etc.
422         if (IsIdentifierStart(c) || (has_sign && IsIdentifierStart(*cursor_))) {
423           // Collect all chars of an identifier:
424           const char *start = cursor_ - 1;
425           while (IsIdentifierStart(*cursor_) || is_digit(*cursor_)) cursor_++;
426           attribute_.append(start, cursor_);
427           token_ = has_sign ? kTokenStringConstant : kTokenIdentifier;
428           return NoError();
429         }
430 
431         auto dot_lvl = (c == '.') ? 0 : 1;  // dot_lvl==0 <=> exactly one '.' seen
432         if (!dot_lvl && !is_digit(*cursor_)) return NoError(); // enum?
433         // Parser accepts hexadecimal-floating-literal (see C++ 5.13.4).
434         if (is_digit(c) || has_sign || !dot_lvl) {
435           const auto start = cursor_ - 1;
436           auto start_digits = !is_digit(c) ? cursor_ : cursor_ - 1;
437           if (!is_digit(c) && is_digit(*cursor_)){
438             start_digits = cursor_; // see digit in cursor_ position
439             c = *cursor_++;
440           }
441           // hex-float can't begind with '.'
442           auto use_hex = dot_lvl && (c == '0') && is_alpha_char(*cursor_, 'X');
443           if (use_hex) start_digits = ++cursor_;  // '0x' is the prefix, skip it
444           // Read an integer number or mantisa of float-point number.
445           do {
446             if (use_hex) {
447               while (is_xdigit(*cursor_)) cursor_++;
448             } else {
449               while (is_digit(*cursor_)) cursor_++;
450             }
451           } while ((*cursor_ == '.') && (++cursor_) && (--dot_lvl >= 0));
452           // Exponent of float-point number.
453           if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
454             // The exponent suffix of hexadecimal float number is mandatory.
455             if (use_hex && !dot_lvl) start_digits = cursor_;
456             if ((use_hex && is_alpha_char(*cursor_, 'P')) ||
457                 is_alpha_char(*cursor_, 'E')) {
458               dot_lvl = 0;  // Emulate dot to signal about float-point number.
459               cursor_++;
460               if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
461               start_digits = cursor_;  // the exponent-part has to have digits
462               // Exponent is decimal integer number
463               while (is_digit(*cursor_)) cursor_++;
464               if (*cursor_ == '.') {
465                 cursor_++;  // If see a dot treat it as part of invalid number.
466                 dot_lvl = -1;  // Fall thru to Error().
467               }
468             }
469           }
470           // Finalize.
471           if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
472             attribute_.append(start, cursor_);
473             token_ = dot_lvl ? kTokenIntegerConstant : kTokenFloatConstant;
474             return NoError();
475           } else {
476             return Error("invalid number: " + std::string(start, cursor_));
477           }
478         }
479         std::string ch;
480         ch = c;
481         if (false == check_ascii_range(c, ' ', '~')) ch = "code: " + NumToString(c);
482         return Error("illegal character: " + ch);
483     }
484   }
485 }
486 
487 // Check if a given token is next.
Is(int t) const488 bool Parser::Is(int t) const { return t == token_; }
489 
IsIdent(const char * id) const490 bool Parser::IsIdent(const char *id) const {
491   return token_ == kTokenIdentifier && attribute_ == id;
492 }
493 
494 // Expect a given token to be next, consume it, or error if not present.
Expect(int t)495 CheckedError Parser::Expect(int t) {
496   if (t != token_) {
497     return Error("expecting: " + TokenToString(t) +
498                  " instead got: " + TokenToStringId(token_));
499   }
500   NEXT();
501   return NoError();
502 }
503 
ParseNamespacing(std::string * id,std::string * last)504 CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
505   while (Is('.')) {
506     NEXT();
507     *id += ".";
508     *id += attribute_;
509     if (last) *last = attribute_;
510     EXPECT(kTokenIdentifier);
511   }
512   return NoError();
513 }
514 
LookupEnum(const std::string & id)515 EnumDef *Parser::LookupEnum(const std::string &id) {
516   // Search thru parent namespaces.
517   for (int components = static_cast<int>(current_namespace_->components.size());
518        components >= 0; components--) {
519     auto ed = enums_.Lookup(
520         current_namespace_->GetFullyQualifiedName(id, components));
521     if (ed) return ed;
522   }
523   return nullptr;
524 }
525 
LookupStruct(const std::string & id) const526 StructDef *Parser::LookupStruct(const std::string &id) const {
527   auto sd = structs_.Lookup(id);
528   if (sd) sd->refcount++;
529   return sd;
530 }
531 
ParseTypeIdent(Type & type)532 CheckedError Parser::ParseTypeIdent(Type &type) {
533   std::string id = attribute_;
534   EXPECT(kTokenIdentifier);
535   ECHECK(ParseNamespacing(&id, nullptr));
536   auto enum_def = LookupEnum(id);
537   if (enum_def) {
538     type = enum_def->underlying_type;
539     if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
540   } else {
541     type.base_type = BASE_TYPE_STRUCT;
542     type.struct_def = LookupCreateStruct(id);
543   }
544   return NoError();
545 }
546 
547 // Parse any IDL type.
ParseType(Type & type)548 CheckedError Parser::ParseType(Type &type) {
549   if (token_ == kTokenIdentifier) {
550     if (IsIdent("bool")) {
551       type.base_type = BASE_TYPE_BOOL;
552       NEXT();
553     } else if (IsIdent("byte") || IsIdent("int8")) {
554       type.base_type = BASE_TYPE_CHAR;
555       NEXT();
556     } else if (IsIdent("ubyte") || IsIdent("uint8")) {
557       type.base_type = BASE_TYPE_UCHAR;
558       NEXT();
559     } else if (IsIdent("short") || IsIdent("int16")) {
560       type.base_type = BASE_TYPE_SHORT;
561       NEXT();
562     } else if (IsIdent("ushort") || IsIdent("uint16")) {
563       type.base_type = BASE_TYPE_USHORT;
564       NEXT();
565     } else if (IsIdent("int") || IsIdent("int32")) {
566       type.base_type = BASE_TYPE_INT;
567       NEXT();
568     } else if (IsIdent("uint") || IsIdent("uint32")) {
569       type.base_type = BASE_TYPE_UINT;
570       NEXT();
571     } else if (IsIdent("long") || IsIdent("int64")) {
572       type.base_type = BASE_TYPE_LONG;
573       NEXT();
574     } else if (IsIdent("ulong") || IsIdent("uint64")) {
575       type.base_type = BASE_TYPE_ULONG;
576       NEXT();
577     } else if (IsIdent("float") || IsIdent("float32")) {
578       type.base_type = BASE_TYPE_FLOAT;
579       NEXT();
580     } else if (IsIdent("double") || IsIdent("float64")) {
581       type.base_type = BASE_TYPE_DOUBLE;
582       NEXT();
583     } else if (IsIdent("string")) {
584       type.base_type = BASE_TYPE_STRING;
585       NEXT();
586     } else {
587       ECHECK(ParseTypeIdent(type));
588     }
589   } else if (token_ == '[') {
590     NEXT();
591     Type subtype;
592     ECHECK(Recurse([&]() { return ParseType(subtype); }));
593     if (subtype.base_type == BASE_TYPE_VECTOR) {
594       // We could support this, but it will complicate things, and it's
595       // easier to work around with a struct around the inner vector.
596       return Error("nested vector types not supported (wrap in table first).");
597     }
598     type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
599     type.element = subtype.base_type;
600     EXPECT(']');
601   } else {
602     return Error("illegal type syntax");
603   }
604   return NoError();
605 }
606 
AddField(StructDef & struct_def,const std::string & name,const Type & type,FieldDef ** dest)607 CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
608                               const Type &type, FieldDef **dest) {
609   auto &field = *new FieldDef();
610   field.value.offset =
611       FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
612   field.name = name;
613   field.file = struct_def.file;
614   field.value.type = type;
615   if (struct_def.fixed) {  // statically compute the field offset
616     auto size = InlineSize(type);
617     auto alignment = InlineAlignment(type);
618     // structs_ need to have a predictable format, so we need to align to
619     // the largest scalar
620     struct_def.minalign = std::max(struct_def.minalign, alignment);
621     struct_def.PadLastField(alignment);
622     field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
623     struct_def.bytesize += size;
624   }
625   if (struct_def.fields.Add(name, &field))
626     return Error("field already exists: " + name);
627   *dest = &field;
628   return NoError();
629 }
630 
ParseField(StructDef & struct_def)631 CheckedError Parser::ParseField(StructDef &struct_def) {
632   std::string name = attribute_;
633 
634   if (LookupStruct(name))
635     return Error("field name can not be the same as table/struct name");
636 
637   std::vector<std::string> dc = doc_comment_;
638   EXPECT(kTokenIdentifier);
639   EXPECT(':');
640   Type type;
641   ECHECK(ParseType(type));
642 
643   if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
644     return Error("structs_ may contain only scalar or struct fields");
645 
646   FieldDef *typefield = nullptr;
647   if (type.base_type == BASE_TYPE_UNION) {
648     // For union fields, add a second auto-generated field to hold the type,
649     // with a special suffix.
650     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
651                     type.enum_def->underlying_type, &typefield));
652   } else if (type.base_type == BASE_TYPE_VECTOR &&
653              type.element == BASE_TYPE_UNION) {
654     // Only cpp, js and ts supports the union vector feature so far.
655     if (!SupportsAdvancedUnionFeatures()) {
656       return Error(
657           "Vectors of unions are not yet supported in all "
658           "the specified programming languages.");
659     }
660     // For vector of union fields, add a second auto-generated vector field to
661     // hold the types, with a special suffix.
662     Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
663     union_vector.element = BASE_TYPE_UTYPE;
664     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(), union_vector,
665                     &typefield));
666   }
667 
668   FieldDef *field;
669   ECHECK(AddField(struct_def, name, type, &field));
670 
671   if (token_ == '=') {
672     NEXT();
673     ECHECK(ParseSingleValue(&field->name, field->value, true));
674     if (!IsScalar(type.base_type) ||
675         (struct_def.fixed && field->value.constant != "0"))
676       return Error(
677             "default values currently only supported for scalars in tables");
678   }
679   if (type.enum_def &&
680       !type.enum_def->is_union &&
681       !type.enum_def->attributes.Lookup("bit_flags") &&
682       !type.enum_def->ReverseLookup(StringToInt(
683                                       field->value.constant.c_str()))) {
684     return Error("default value of " + field->value.constant + " for field " +
685                  name + " is not part of enum " + type.enum_def->name);
686   }
687   // Append .0 if the value has not it (skip hex and scientific floats).
688   // This suffix needed for generated C++ code.
689   if (IsFloat(type.base_type)) {
690     auto &text = field->value.constant;
691     FLATBUFFERS_ASSERT(false == text.empty());
692     auto s = text.c_str();
693     while(*s == ' ') s++;
694     if (*s == '-' || *s == '+') s++;
695     // 1) A float constants (nan, inf, pi, etc) is a kind of identifier.
696     // 2) A float number needn't ".0" at the end if it has exponent.
697     if ((false == IsIdentifierStart(*s)) &&
698         (std::string::npos == field->value.constant.find_first_of(".eEpP"))) {
699       field->value.constant += ".0";
700     }
701   }
702 
703   if (type.enum_def && IsScalar(type.base_type) && !struct_def.fixed &&
704       !type.enum_def->attributes.Lookup("bit_flags") &&
705       !type.enum_def->ReverseLookup(StringToInt(
706                                       field->value.constant.c_str())))
707     Warning("enum " + type.enum_def->name +
708             " does not have a declaration for this field\'s default of " +
709             field->value.constant);
710 
711   field->doc_comment = dc;
712   ECHECK(ParseMetaData(&field->attributes));
713   field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
714   auto hash_name = field->attributes.Lookup("hash");
715   if (hash_name) {
716     switch ((type.base_type == BASE_TYPE_VECTOR) ? type.element : type.base_type) {
717       case BASE_TYPE_SHORT:
718       case BASE_TYPE_USHORT: {
719         if (FindHashFunction16(hash_name->constant.c_str()) == nullptr)
720           return Error("Unknown hashing algorithm for 16 bit types: " +
721                        hash_name->constant);
722         break;
723       }
724       case BASE_TYPE_INT:
725       case BASE_TYPE_UINT: {
726         if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
727           return Error("Unknown hashing algorithm for 32 bit types: " +
728                        hash_name->constant);
729         break;
730       }
731       case BASE_TYPE_LONG:
732       case BASE_TYPE_ULONG: {
733         if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
734           return Error("Unknown hashing algorithm for 64 bit types: " +
735                        hash_name->constant);
736         break;
737       }
738       default:
739         return Error(
740             "only short, ushort, int, uint, long and ulong data types support hashing.");
741     }
742   }
743   auto cpp_type = field->attributes.Lookup("cpp_type");
744   if (cpp_type) {
745     if (!hash_name)
746       return Error("cpp_type can only be used with a hashed field");
747     /// forcing cpp_ptr_type to 'naked' if unset
748     auto cpp_ptr_type = field->attributes.Lookup("cpp_ptr_type");
749     if (!cpp_ptr_type) {
750       auto val = new Value();
751       val->type = cpp_type->type;
752       val->constant = "naked";
753       field->attributes.Add("cpp_ptr_type", val);
754     }
755   }
756   if (field->deprecated && struct_def.fixed)
757     return Error("can't deprecate fields in a struct");
758   field->required = field->attributes.Lookup("required") != nullptr;
759   if (field->required &&
760       (struct_def.fixed || IsScalar(type.base_type)))
761     return Error("only non-scalar fields in tables may be 'required'");
762   field->key = field->attributes.Lookup("key") != nullptr;
763   if (field->key) {
764     if (struct_def.has_key) return Error("only one field may be set as 'key'");
765     struct_def.has_key = true;
766     if (!IsScalar(type.base_type)) {
767       field->required = true;
768       if (type.base_type != BASE_TYPE_STRING)
769         return Error("'key' field must be string or scalar type");
770     }
771   }
772   field->shared = field->attributes.Lookup("shared") != nullptr;
773   if (field->shared && field->value.type.base_type != BASE_TYPE_STRING)
774     return Error("shared can only be defined on strings");
775 
776   auto field_native_custom_alloc =
777       field->attributes.Lookup("native_custom_alloc");
778   if (field_native_custom_alloc)
779     return Error(
780         "native_custom_alloc can only be used with a table or struct "
781         "definition");
782 
783   field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
784   if (field->native_inline && !IsStruct(field->value.type))
785     return Error("native_inline can only be defined on structs");
786 
787   auto nested = field->attributes.Lookup("nested_flatbuffer");
788   if (nested) {
789     if (nested->type.base_type != BASE_TYPE_STRING)
790       return Error(
791           "nested_flatbuffer attribute must be a string (the root type)");
792     if (type.base_type != BASE_TYPE_VECTOR || type.element != BASE_TYPE_UCHAR)
793       return Error(
794           "nested_flatbuffer attribute may only apply to a vector of ubyte");
795     // This will cause an error if the root type of the nested flatbuffer
796     // wasn't defined elsewhere.
797     LookupCreateStruct(nested->constant);
798 
799     // Keep a pointer to StructDef in FieldDef to simplify re-use later
800     auto nested_qualified_name =
801         current_namespace_->GetFullyQualifiedName(nested->constant);
802     field->nested_flatbuffer = LookupStruct(nested_qualified_name);
803   }
804 
805   if (field->attributes.Lookup("flexbuffer")) {
806     field->flexbuffer = true;
807     uses_flexbuffers_ = true;
808     if (type.base_type != BASE_TYPE_VECTOR ||
809         type.element != BASE_TYPE_UCHAR)
810       return Error("flexbuffer attribute may only apply to a vector of ubyte");
811   }
812 
813   if (typefield) {
814     if (!IsScalar(typefield->value.type.base_type)) {
815       // this is a union vector field
816       typefield->required = field->required;
817     }
818     // If this field is a union, and it has a manually assigned id,
819     // the automatically added type field should have an id as well (of N - 1).
820     auto attr = field->attributes.Lookup("id");
821     if (attr) {
822       auto id = atoi(attr->constant.c_str());
823       auto val = new Value();
824       val->type = attr->type;
825       val->constant = NumToString(id - 1);
826       typefield->attributes.Add("id", val);
827     }
828   }
829 
830   EXPECT(';');
831   return NoError();
832 }
833 
ParseString(Value & val)834 CheckedError Parser::ParseString(Value &val) {
835   auto s = attribute_;
836   EXPECT(kTokenStringConstant);
837   val.constant = NumToString(builder_.CreateString(s).o);
838   return NoError();
839 }
840 
ParseComma()841 CheckedError Parser::ParseComma() {
842   if (!opts.protobuf_ascii_alike) EXPECT(',');
843   return NoError();
844 }
845 
ParseAnyValue(Value & val,FieldDef * field,size_t parent_fieldn,const StructDef * parent_struct_def,uoffset_t count,bool inside_vector)846 CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
847                                    size_t parent_fieldn,
848                                    const StructDef *parent_struct_def,
849                                    uoffset_t count,
850                                    bool inside_vector) {
851   switch (val.type.base_type) {
852     case BASE_TYPE_UNION: {
853       FLATBUFFERS_ASSERT(field);
854       std::string constant;
855       Vector<uint8_t> *vector_of_union_types = nullptr;
856       // Find corresponding type field we may have already parsed.
857       for (auto elem = field_stack_.rbegin() + count;
858            elem != field_stack_.rbegin() + parent_fieldn + count; ++elem) {
859         auto &type = elem->second->value.type;
860         if (type.enum_def == val.type.enum_def) {
861           if (inside_vector) {
862             if (type.base_type == BASE_TYPE_VECTOR &&
863                 type.element == BASE_TYPE_UTYPE) {
864               // Vector of union type field.
865               uoffset_t offset;
866               ECHECK(atot(elem->first.constant.c_str(), *this, &offset));
867               vector_of_union_types = reinterpret_cast<Vector<uint8_t> *>(
868                                         builder_.GetCurrentBufferPointer() +
869                                         builder_.GetSize() - offset);
870               break;
871             }
872           } else {
873             if (type.base_type == BASE_TYPE_UTYPE) {
874               // Union type field.
875               constant = elem->first.constant;
876               break;
877             }
878           }
879         }
880       }
881       if (constant.empty() && !inside_vector) {
882         // We haven't seen the type field yet. Sadly a lot of JSON writers
883         // output these in alphabetical order, meaning it comes after this
884         // value. So we scan past the value to find it, then come back here.
885         // We currently don't do this for vectors of unions because the
886         // scanning/serialization logic would get very complicated.
887         auto type_name = field->name + UnionTypeFieldSuffix();
888         FLATBUFFERS_ASSERT(parent_struct_def);
889         auto type_field = parent_struct_def->fields.Lookup(type_name);
890         FLATBUFFERS_ASSERT(type_field);  // Guaranteed by ParseField().
891         // Remember where we are in the source file, so we can come back here.
892         auto backup = *static_cast<ParserState *>(this);
893         ECHECK(SkipAnyJsonValue());  // The table.
894         ECHECK(ParseComma());
895         auto next_name = attribute_;
896         if (Is(kTokenStringConstant)) {
897           NEXT();
898         } else {
899           EXPECT(kTokenIdentifier);
900         }
901         if (next_name == type_name) {
902           EXPECT(':');
903           Value type_val = type_field->value;
904           ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr, 0));
905           constant = type_val.constant;
906           // Got the information we needed, now rewind:
907           *static_cast<ParserState *>(this) = backup;
908         }
909       }
910       if (constant.empty() && !vector_of_union_types) {
911         return Error("missing type field for this union value: " +
912                      field->name);
913       }
914       uint8_t enum_idx;
915       if (vector_of_union_types) {
916         enum_idx = vector_of_union_types->Get(count);
917       } else {
918         ECHECK(atot(constant.c_str(), *this, &enum_idx));
919       }
920       auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
921       if (!enum_val) return Error("illegal type id for: " + field->name);
922       if (enum_val->union_type.base_type == BASE_TYPE_STRUCT) {
923         ECHECK(ParseTable(*enum_val->union_type.struct_def, &val.constant,
924                           nullptr));
925         if (enum_val->union_type.struct_def->fixed) {
926           // All BASE_TYPE_UNION values are offsets, so turn this into one.
927           SerializeStruct(*enum_val->union_type.struct_def, val);
928           builder_.ClearOffsets();
929           val.constant = NumToString(builder_.GetSize());
930         }
931       } else if (enum_val->union_type.base_type == BASE_TYPE_STRING) {
932         ECHECK(ParseString(val));
933       } else {
934         FLATBUFFERS_ASSERT(false);
935       }
936       break;
937     }
938     case BASE_TYPE_STRUCT:
939       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
940       break;
941     case BASE_TYPE_STRING: {
942       ECHECK(ParseString(val));
943       break;
944     }
945     case BASE_TYPE_VECTOR: {
946       uoffset_t off;
947       ECHECK(ParseVector(val.type.VectorType(), &off, field, parent_fieldn));
948       val.constant = NumToString(off);
949       break;
950     }
951     case BASE_TYPE_INT:
952     case BASE_TYPE_UINT:
953     case BASE_TYPE_LONG:
954     case BASE_TYPE_ULONG: {
955       if (field && field->attributes.Lookup("hash") &&
956           (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
957         ECHECK(ParseHash(val, field));
958       } else {
959         ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
960       }
961       break;
962     }
963     default:
964       ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
965       break;
966   }
967   return NoError();
968 }
969 
SerializeStruct(const StructDef & struct_def,const Value & val)970 void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
971   FLATBUFFERS_ASSERT(val.constant.length() == struct_def.bytesize);
972   builder_.Align(struct_def.minalign);
973   builder_.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
974                      struct_def.bytesize);
975   builder_.AddStructOffset(val.offset, builder_.GetSize());
976 }
977 
978 template <typename F>
ParseTableDelimiters(size_t & fieldn,const StructDef * struct_def,F body)979 CheckedError Parser::ParseTableDelimiters(size_t &fieldn,
980                                           const StructDef *struct_def,
981                                           F body) {
982   // We allow tables both as JSON object{ .. } with field names
983   // or vector[..] with all fields in order
984   char terminator = '}';
985   bool is_nested_vector = struct_def && Is('[');
986   if (is_nested_vector) {
987     NEXT();
988     terminator = ']';
989   } else {
990     EXPECT('{');
991   }
992   for (;;) {
993     if ((!opts.strict_json || !fieldn) && Is(terminator)) break;
994     std::string name;
995     if (is_nested_vector) {
996       if (fieldn >= struct_def->fields.vec.size()) {
997         return Error("too many unnamed fields in nested array");
998       }
999       name = struct_def->fields.vec[fieldn]->name;
1000     } else {
1001       name = attribute_;
1002       if (Is(kTokenStringConstant)) {
1003         NEXT();
1004       } else {
1005         EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
1006       }
1007       if (!opts.protobuf_ascii_alike || !(Is('{') || Is('['))) EXPECT(':');
1008     }
1009     ECHECK(body(name, fieldn, struct_def));
1010     if (Is(terminator)) break;
1011     ECHECK(ParseComma());
1012   }
1013   NEXT();
1014   if (is_nested_vector && fieldn != struct_def->fields.vec.size()) {
1015     return Error("wrong number of unnamed fields in table vector");
1016   }
1017   return NoError();
1018 }
1019 
ParseTable(const StructDef & struct_def,std::string * value,uoffset_t * ovalue)1020 CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
1021                                 uoffset_t *ovalue) {
1022   size_t fieldn_outer = 0;
1023   auto err = ParseTableDelimiters(
1024       fieldn_outer, &struct_def,
1025       [&](const std::string &name, size_t &fieldn,
1026           const StructDef *struct_def_inner) -> CheckedError {
1027         if (name == "$schema") {
1028           ECHECK(Expect(kTokenStringConstant));
1029           return NoError();
1030         }
1031         auto field = struct_def_inner->fields.Lookup(name);
1032         if (!field) {
1033           if (!opts.skip_unexpected_fields_in_json) {
1034             return Error("unknown field: " + name);
1035           } else {
1036             ECHECK(SkipAnyJsonValue());
1037           }
1038         } else {
1039           if (IsIdent("null") && !IsScalar(field->value.type.base_type)) {
1040             ECHECK(Next());  // Ignore this field.
1041           } else {
1042             Value val = field->value;
1043             if (field->flexbuffer) {
1044               flexbuffers::Builder builder(1024,
1045                                            flexbuffers::BUILDER_FLAG_SHARE_ALL);
1046               ECHECK(ParseFlexBufferValue(&builder));
1047               builder.Finish();
1048               // Force alignment for nested flexbuffer
1049               builder_.ForceVectorAlignment(builder.GetSize(), sizeof(uint8_t),
1050                                             sizeof(largest_scalar_t));
1051               auto off = builder_.CreateVector(builder.GetBuffer());
1052               val.constant = NumToString(off.o);
1053             } else if (field->nested_flatbuffer) {
1054               ECHECK(
1055                   ParseNestedFlatbuffer(val, field, fieldn, struct_def_inner));
1056             } else {
1057               ECHECK(Recurse([&]() {
1058                 return ParseAnyValue(val, field, fieldn, struct_def_inner, 0);
1059               }));
1060             }
1061             // Hardcoded insertion-sort with error-check.
1062             // If fields are specified in order, then this loop exits
1063             // immediately.
1064             auto elem = field_stack_.rbegin();
1065             for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
1066               auto existing_field = elem->second;
1067               if (existing_field == field)
1068                 return Error("field set more than once: " + field->name);
1069               if (existing_field->value.offset < field->value.offset) break;
1070             }
1071             // Note: elem points to before the insertion point, thus .base()
1072             // points to the correct spot.
1073             field_stack_.insert(elem.base(), std::make_pair(val, field));
1074             fieldn++;
1075           }
1076         }
1077         return NoError();
1078       });
1079   ECHECK(err);
1080 
1081   // Check if all required fields are parsed.
1082   for (auto field_it = struct_def.fields.vec.begin();
1083        field_it != struct_def.fields.vec.end(); ++field_it) {
1084     auto required_field = *field_it;
1085     if (!required_field->required) { continue; }
1086     bool found = false;
1087     for (auto pf_it = field_stack_.end() - fieldn_outer;
1088          pf_it != field_stack_.end(); ++pf_it) {
1089       auto parsed_field = pf_it->second;
1090       if (parsed_field == required_field) {
1091         found = true;
1092         break;
1093       }
1094     }
1095     if (!found) {
1096       return Error("required field is missing: " + required_field->name +
1097                    " in " + struct_def.name);
1098     }
1099   }
1100 
1101   if (struct_def.fixed && fieldn_outer != struct_def.fields.vec.size())
1102     return Error("struct: wrong number of initializers: " + struct_def.name);
1103 
1104   auto start = struct_def.fixed ? builder_.StartStruct(struct_def.minalign)
1105                                 : builder_.StartTable();
1106 
1107   for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1; size;
1108        size /= 2) {
1109     // Go through elements in reverse, since we're building the data backwards.
1110     for (auto it = field_stack_.rbegin();
1111          it != field_stack_.rbegin() + fieldn_outer; ++it) {
1112       auto &field_value = it->first;
1113       auto field = it->second;
1114       if (!struct_def.sortbysize ||
1115           size == SizeOf(field_value.type.base_type)) {
1116         switch (field_value.type.base_type) {
1117           // clang-format off
1118           #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
1119             CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
1120             case BASE_TYPE_ ## ENUM: \
1121               builder_.Pad(field->padding); \
1122               if (struct_def.fixed) { \
1123                 CTYPE val; \
1124                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1125                 builder_.PushElement(val); \
1126               } else { \
1127                 CTYPE val, valdef; \
1128                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1129                 ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
1130                 builder_.AddElement(field_value.offset, val, valdef); \
1131               } \
1132               break;
1133             FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
1134           #undef FLATBUFFERS_TD
1135           #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
1136             CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
1137             case BASE_TYPE_ ## ENUM: \
1138               builder_.Pad(field->padding); \
1139               if (IsStruct(field->value.type)) { \
1140                 SerializeStruct(*field->value.type.struct_def, field_value); \
1141               } else { \
1142                 CTYPE val; \
1143                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1144                 builder_.AddOffset(field_value.offset, val); \
1145               } \
1146               break;
1147             FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
1148           #undef FLATBUFFERS_TD
1149           // clang-format on
1150         }
1151       }
1152     }
1153   }
1154   for (size_t i = 0; i < fieldn_outer; i++) field_stack_.pop_back();
1155 
1156   if (struct_def.fixed) {
1157     builder_.ClearOffsets();
1158     builder_.EndStruct();
1159     FLATBUFFERS_ASSERT(value);
1160     // Temporarily store this struct in the value string, since it is to
1161     // be serialized in-place elsewhere.
1162     value->assign(
1163         reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
1164         struct_def.bytesize);
1165     builder_.PopBytes(struct_def.bytesize);
1166     FLATBUFFERS_ASSERT(!ovalue);
1167   } else {
1168     auto val = builder_.EndTable(start);
1169     if (ovalue) *ovalue = val;
1170     if (value) *value = NumToString(val);
1171   }
1172   return NoError();
1173 }
1174 
1175 template <typename F>
ParseVectorDelimiters(uoffset_t & count,F body)1176 CheckedError Parser::ParseVectorDelimiters(uoffset_t &count, F body) {
1177   EXPECT('[');
1178   for (;;) {
1179     if ((!opts.strict_json || !count) && Is(']')) break;
1180     ECHECK(body(count));
1181     count++;
1182     if (Is(']')) break;
1183     ECHECK(ParseComma());
1184   }
1185   NEXT();
1186   return NoError();
1187 }
1188 
ParseVector(const Type & type,uoffset_t * ovalue,FieldDef * field,size_t fieldn)1189 CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue,
1190                                  FieldDef *field, size_t fieldn) {
1191   uoffset_t count = 0;
1192   auto err = ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
1193     Value val;
1194     val.type = type;
1195     ECHECK(Recurse([&]() {
1196       return ParseAnyValue(val, field, fieldn, nullptr, count, true);
1197     }));
1198     field_stack_.push_back(std::make_pair(val, nullptr));
1199     return NoError();
1200   });
1201   ECHECK(err);
1202 
1203   builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
1204                        InlineAlignment(type));
1205   for (uoffset_t i = 0; i < count; i++) {
1206     // start at the back, since we're building the data backwards.
1207     auto &val = field_stack_.back().first;
1208     switch (val.type.base_type) {
1209       // clang-format off
1210       #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
1211         CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
1212         case BASE_TYPE_ ## ENUM: \
1213           if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
1214           else { \
1215              CTYPE elem; \
1216              ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1217              builder_.PushElement(elem); \
1218           } \
1219           break;
1220         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1221       #undef FLATBUFFERS_TD
1222       // clang-format on
1223     }
1224     field_stack_.pop_back();
1225   }
1226 
1227   builder_.ClearOffsets();
1228   *ovalue = builder_.EndVector(count);
1229   return NoError();
1230 }
1231 
ParseNestedFlatbuffer(Value & val,FieldDef * field,size_t fieldn,const StructDef * parent_struct_def)1232 CheckedError Parser::ParseNestedFlatbuffer(Value &val, FieldDef *field,
1233                                            size_t fieldn,
1234                                            const StructDef *parent_struct_def) {
1235   if (token_ == '[') {  // backwards compat for 'legacy' ubyte buffers
1236     ECHECK(ParseAnyValue(val, field, fieldn, parent_struct_def, 0));
1237   } else {
1238     auto cursor_at_value_begin = cursor_;
1239     ECHECK(SkipAnyJsonValue());
1240     std::string substring(cursor_at_value_begin - 1, cursor_ - 1);
1241 
1242     // Create and initialize new parser
1243     Parser nested_parser;
1244     FLATBUFFERS_ASSERT(field->nested_flatbuffer);
1245     nested_parser.root_struct_def_ = field->nested_flatbuffer;
1246     nested_parser.enums_ = enums_;
1247     nested_parser.opts = opts;
1248     nested_parser.uses_flexbuffers_ = uses_flexbuffers_;
1249 
1250     // Parse JSON substring into new flatbuffer builder using nested_parser
1251     bool ok = nested_parser.Parse(substring.c_str(), nullptr, nullptr);
1252 
1253     // Clean nested_parser to avoid deleting the elements in
1254     // the SymbolTables on destruction
1255     nested_parser.enums_.dict.clear();
1256     nested_parser.enums_.vec.clear();
1257 
1258     if (!ok) {
1259       ECHECK(Error(nested_parser.error_));
1260     }
1261     // Force alignment for nested flatbuffer
1262     builder_.ForceVectorAlignment(nested_parser.builder_.GetSize(), sizeof(uint8_t),
1263                                   nested_parser.builder_.GetBufferMinAlignment());
1264 
1265     auto off = builder_.CreateVector(nested_parser.builder_.GetBufferPointer(),
1266                                      nested_parser.builder_.GetSize());
1267     val.constant = NumToString(off.o);
1268   }
1269   return NoError();
1270 }
1271 
ParseMetaData(SymbolTable<Value> * attributes)1272 CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
1273   if (Is('(')) {
1274     NEXT();
1275     for (;;) {
1276       auto name = attribute_;
1277       if (false == (Is(kTokenIdentifier) || Is(kTokenStringConstant)))
1278         return Error("attribute name must be either identifier or string: " +
1279           name);
1280       if (known_attributes_.find(name) == known_attributes_.end())
1281         return Error("user define attributes must be declared before use: " +
1282                      name);
1283       NEXT();
1284       auto e = new Value();
1285       attributes->Add(name, e);
1286       if (Is(':')) {
1287         NEXT();
1288         ECHECK(ParseSingleValue(&name, *e, true));
1289       }
1290       if (Is(')')) {
1291         NEXT();
1292         break;
1293       }
1294       EXPECT(',');
1295     }
1296   }
1297   return NoError();
1298 }
1299 
TryTypedValue(const std::string * name,int dtoken,bool check,Value & e,BaseType req,bool * destmatch)1300 CheckedError Parser::TryTypedValue(const std::string *name, int dtoken,
1301                                    bool check, Value &e, BaseType req,
1302                                    bool *destmatch) {
1303   bool match = dtoken == token_;
1304   if (match) {
1305     FLATBUFFERS_ASSERT(*destmatch == false);
1306     *destmatch = true;
1307     e.constant = attribute_;
1308     // Check token match
1309     if (!check) {
1310       if (e.type.base_type == BASE_TYPE_NONE) {
1311         e.type.base_type = req;
1312       } else {
1313         return Error(
1314             std::string("type mismatch: expecting: ") +
1315             kTypeNames[e.type.base_type] + ", found: " + kTypeNames[req] +
1316             ", name: " + (name ? *name : "") + ", value: " + e.constant);
1317       }
1318     }
1319     // The exponent suffix of hexadecimal float-point number is mandatory.
1320     // A hex-integer constant is forbidden as an initializer of float number.
1321     if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) {
1322       const auto &s = e.constant;
1323       const auto k = s.find_first_of("0123456789.");
1324       if ((std::string::npos != k) && (s.length() > (k + 1)) &&
1325           (s.at(k) == '0' && is_alpha_char(s.at(k + 1), 'X')) &&
1326           (std::string::npos == s.find_first_of("pP", k + 2))) {
1327         return Error(
1328             "invalid number, the exponent suffix of hexadecimal "
1329             "floating-point literals is mandatory: \"" +
1330             s + "\"");
1331       }
1332     }
1333 
1334     NEXT();
1335   }
1336   return NoError();
1337 }
1338 
ParseEnumFromString(const Type & type,std::string * result)1339 CheckedError Parser::ParseEnumFromString(const Type &type,
1340                                          std::string *result) {
1341   int64_t i64 = 0;
1342   // Parse one or more enum identifiers, separated by spaces.
1343   const char *next = attribute_.c_str();
1344   do {
1345     const char *divider = strchr(next, ' ');
1346     std::string word;
1347     if (divider) {
1348       word = std::string(next, divider);
1349       next = divider + strspn(divider, " ");
1350     } else {
1351       word = next;
1352       next += word.length();
1353     }
1354     if (type.enum_def) {  // The field has an enum type
1355       auto enum_val = type.enum_def->vals.Lookup(word);
1356       if (!enum_val)
1357         return Error("unknown enum value: " + word +
1358                      ", for enum: " + type.enum_def->name);
1359       i64 |= enum_val->value;
1360     } else {  // No enum type, probably integral field.
1361       if (!IsInteger(type.base_type))
1362         return Error("not a valid value for this field: " + word);
1363       // TODO: could check if its a valid number constant here.
1364       const char *dot = strrchr(word.c_str(), '.');
1365       if (!dot)
1366         return Error("enum values need to be qualified by an enum type");
1367       std::string enum_def_str(word.c_str(), dot);
1368       std::string enum_val_str(dot + 1, word.c_str() + word.length());
1369       auto enum_def = LookupEnum(enum_def_str);
1370       if (!enum_def) return Error("unknown enum: " + enum_def_str);
1371       auto enum_val = enum_def->vals.Lookup(enum_val_str);
1372       if (!enum_val) return Error("unknown enum value: " + enum_val_str);
1373       i64 |= enum_val->value;
1374     }
1375   } while (*next);
1376   *result = NumToString(i64);
1377   return NoError();
1378 }
1379 
ParseHash(Value & e,FieldDef * field)1380 CheckedError Parser::ParseHash(Value &e, FieldDef *field) {
1381   FLATBUFFERS_ASSERT(field);
1382   Value *hash_name = field->attributes.Lookup("hash");
1383   switch (e.type.base_type) {
1384     case BASE_TYPE_SHORT: {
1385       auto hash = FindHashFunction16(hash_name->constant.c_str());
1386       int16_t hashed_value = static_cast<int16_t>(hash(attribute_.c_str()));
1387       e.constant = NumToString(hashed_value);
1388       break;
1389     }
1390     case BASE_TYPE_USHORT: {
1391       auto hash = FindHashFunction16(hash_name->constant.c_str());
1392       uint16_t hashed_value = hash(attribute_.c_str());
1393       e.constant = NumToString(hashed_value);
1394       break;
1395     }
1396     case BASE_TYPE_INT: {
1397       auto hash = FindHashFunction32(hash_name->constant.c_str());
1398       int32_t hashed_value = static_cast<int32_t>(hash(attribute_.c_str()));
1399       e.constant = NumToString(hashed_value);
1400       break;
1401     }
1402     case BASE_TYPE_UINT: {
1403       auto hash = FindHashFunction32(hash_name->constant.c_str());
1404       uint32_t hashed_value = hash(attribute_.c_str());
1405       e.constant = NumToString(hashed_value);
1406       break;
1407     }
1408     case BASE_TYPE_LONG: {
1409       auto hash = FindHashFunction64(hash_name->constant.c_str());
1410       int64_t hashed_value = static_cast<int64_t>(hash(attribute_.c_str()));
1411       e.constant = NumToString(hashed_value);
1412       break;
1413     }
1414     case BASE_TYPE_ULONG: {
1415       auto hash = FindHashFunction64(hash_name->constant.c_str());
1416       uint64_t hashed_value = hash(attribute_.c_str());
1417       e.constant = NumToString(hashed_value);
1418       break;
1419     }
1420     default: FLATBUFFERS_ASSERT(0);
1421   }
1422   NEXT();
1423   return NoError();
1424 }
1425 
TokenError()1426 CheckedError Parser::TokenError() {
1427   return Error("cannot parse value starting with: " + TokenToStringId(token_));
1428 }
1429 
ParseSingleValue(const std::string * name,Value & e,bool check_now)1430 CheckedError Parser::ParseSingleValue(const std::string *name, Value &e,
1431                                       bool check_now) {
1432   // First see if this could be a conversion function:
1433   if (token_ == kTokenIdentifier && *cursor_ == '(') {
1434     // todo: Extract processing of conversion functions to ParseFunction.
1435     const auto functionname = attribute_;
1436     if (!IsFloat(e.type.base_type)) {
1437       return Error(functionname + ": type of argument mismatch, expecting: " +
1438                    kTypeNames[BASE_TYPE_DOUBLE] +
1439                    ", found: " + kTypeNames[e.type.base_type] +
1440                    ", name: " + (name ? *name : "") + ", value: " + e.constant);
1441     }
1442     NEXT();
1443     EXPECT('(');
1444     ECHECK(Recurse([&]() { return ParseSingleValue(name, e, false); }));
1445     EXPECT(')');
1446     // calculate with double precision
1447     double x, y = 0.0;
1448     ECHECK(atot(e.constant.c_str(), *this, &x));
1449     auto func_match = false;
1450     // clang-format off
1451     #define FLATBUFFERS_FN_DOUBLE(name, op) \
1452       if (!func_match && functionname == name) { y = op; func_match = true; }
1453     FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180);
1454     FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180);
1455     FLATBUFFERS_FN_DOUBLE("sin", sin(x));
1456     FLATBUFFERS_FN_DOUBLE("cos", cos(x));
1457     FLATBUFFERS_FN_DOUBLE("tan", tan(x));
1458     FLATBUFFERS_FN_DOUBLE("asin", asin(x));
1459     FLATBUFFERS_FN_DOUBLE("acos", acos(x));
1460     FLATBUFFERS_FN_DOUBLE("atan", atan(x));
1461     // TODO(wvo): add more useful conversion functions here.
1462     #undef FLATBUFFERS_FN_DOUBLE
1463     // clang-format on
1464     if (true != func_match) {
1465       return Error(std::string("Unknown conversion function: ") + functionname +
1466                    ", field name: " + (name ? *name : "") +
1467                    ", value: " + e.constant);
1468     }
1469     e.constant = NumToString(y);
1470     return NoError();
1471   }
1472 
1473   auto match = false;
1474   // clang-format off
1475   #define TRY_ECHECK(force, dtoken, check, req)    \
1476     if (!match && ((check) || IsConstTrue(force))) \
1477     ECHECK(TryTypedValue(name, dtoken, check, e, req, &match))
1478   // clang-format on
1479 
1480   if (token_ == kTokenStringConstant || token_ == kTokenIdentifier) {
1481     const auto kTokenStringOrIdent = token_;
1482     // The string type is a most probable type, check it first.
1483     TRY_ECHECK(false, kTokenStringConstant,
1484                e.type.base_type == BASE_TYPE_STRING, BASE_TYPE_STRING);
1485 
1486     // avoid escaped and non-ascii in the string
1487     if ((token_ == kTokenStringConstant) && IsScalar(e.type.base_type) &&
1488         !attr_is_trivial_ascii_string_) {
1489       return Error(
1490           std::string("type mismatch or invalid value, an initializer of "
1491                       "non-string field must be trivial ASCII string: type: ") +
1492           kTypeNames[e.type.base_type] + ", name: " + (name ? *name : "") +
1493           ", value: " + attribute_);
1494     }
1495 
1496     // A boolean as true/false. Boolean as Integer check below.
1497     if (!match && IsBool(e.type.base_type)) {
1498       auto is_true = attribute_ == "true";
1499       if (is_true || attribute_ == "false") {
1500         attribute_ = is_true ? "1" : "0";
1501         // accepts both kTokenStringConstant and kTokenIdentifier
1502         TRY_ECHECK(false, kTokenStringOrIdent, IsBool(e.type.base_type),
1503                    BASE_TYPE_BOOL);
1504       }
1505     }
1506     // Check if this could be a string/identifier enum value.
1507     // Enum can have only true integer base type.
1508     if (!match && IsInteger(e.type.base_type) && !IsBool(e.type.base_type) &&
1509         IsIdentifierStart(*attribute_.c_str())) {
1510       ECHECK(ParseEnumFromString(e.type, &e.constant));
1511       NEXT();
1512       match = true;
1513     }
1514     // float/integer number in string
1515     if ((token_ == kTokenStringConstant) && IsScalar(e.type.base_type)) {
1516       // remove trailing whitespaces from attribute_
1517       auto last = attribute_.find_last_not_of(' ');
1518       if (std::string::npos != last)  // has non-whitespace
1519         attribute_.resize(last + 1);
1520     }
1521     // Float numbers or nan, inf, pi, etc.
1522     TRY_ECHECK(false, kTokenStringOrIdent, IsFloat(e.type.base_type),
1523                BASE_TYPE_FLOAT);
1524     // An integer constant in string.
1525     TRY_ECHECK(false, kTokenStringOrIdent, IsInteger(e.type.base_type),
1526                BASE_TYPE_INT);
1527     // Unknown tokens will be interpreted as string type.
1528     TRY_ECHECK(true, kTokenStringConstant, e.type.base_type == BASE_TYPE_STRING,
1529                BASE_TYPE_STRING);
1530   } else {
1531     // Try a float number.
1532     TRY_ECHECK(false, kTokenFloatConstant, IsFloat(e.type.base_type),
1533                BASE_TYPE_FLOAT);
1534     // Integer token can init any scalar (integer of float).
1535     TRY_ECHECK(true, kTokenIntegerConstant, IsScalar(e.type.base_type),
1536                BASE_TYPE_INT);
1537   }
1538   #undef TRY_ECHECK
1539 
1540   if (!match) return TokenError();
1541 
1542   // The check_now flag must be true when parse a fbs-schema.
1543   // This flag forces to check default scalar values or metadata of field.
1544   // For JSON parser the flag should be false.
1545   // If it is set for JSON each value will be checked twice (see ParseTable).
1546   if (check_now && IsScalar(e.type.base_type)) {
1547     // "re-pack" an integer scalar to remove any ambiguities like leading zeros
1548     // which can be treated as octal-literal (idl_gen_cpp/GenDefaultConstant).
1549     const auto repack = IsInteger(e.type.base_type);
1550     switch (e.type.base_type) {
1551     // clang-format off
1552     #define FLATBUFFERS_TD(ENUM, IDLTYPE, \
1553             CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
1554             case BASE_TYPE_ ## ENUM: {\
1555                 CTYPE val; \
1556                 ECHECK(atot(e.constant.c_str(), *this, &val)); \
1557                 if(repack) e.constant = NumToString(val); \
1558               break; }
1559     FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
1560     #undef FLATBUFFERS_TD
1561     default: break;
1562     // clang-format on
1563     }
1564   }
1565   return NoError();
1566 }
1567 
LookupCreateStruct(const std::string & name,bool create_if_new,bool definition)1568 StructDef *Parser::LookupCreateStruct(const std::string &name,
1569                                       bool create_if_new, bool definition) {
1570   std::string qualified_name = current_namespace_->GetFullyQualifiedName(name);
1571   // See if it exists pre-declared by an unqualified use.
1572   auto struct_def = LookupStruct(name);
1573   if (struct_def && struct_def->predecl) {
1574     if (definition) {
1575       // Make sure it has the current namespace, and is registered under its
1576       // qualified name.
1577       struct_def->defined_namespace = current_namespace_;
1578       structs_.Move(name, qualified_name);
1579     }
1580     return struct_def;
1581   }
1582   // See if it exists pre-declared by an qualified use.
1583   struct_def = LookupStruct(qualified_name);
1584   if (struct_def && struct_def->predecl) {
1585     if (definition) {
1586       // Make sure it has the current namespace.
1587       struct_def->defined_namespace = current_namespace_;
1588     }
1589     return struct_def;
1590   }
1591   if (!definition) {
1592     // Search thru parent namespaces.
1593     for (size_t components = current_namespace_->components.size();
1594          components && !struct_def; components--) {
1595       struct_def = LookupStruct(
1596           current_namespace_->GetFullyQualifiedName(name, components - 1));
1597     }
1598   }
1599   if (!struct_def && create_if_new) {
1600     struct_def = new StructDef();
1601     if (definition) {
1602       structs_.Add(qualified_name, struct_def);
1603       struct_def->name = name;
1604       struct_def->defined_namespace = current_namespace_;
1605     } else {
1606       // Not a definition.
1607       // Rather than failing, we create a "pre declared" StructDef, due to
1608       // circular references, and check for errors at the end of parsing.
1609       // It is defined in the current namespace, as the best guess what the
1610       // final namespace will be.
1611       structs_.Add(name, struct_def);
1612       struct_def->name = name;
1613       struct_def->defined_namespace = current_namespace_;
1614       struct_def->original_location.reset(
1615           new std::string(file_being_parsed_ + ":" + NumToString(line_)));
1616     }
1617   }
1618   return struct_def;
1619 }
1620 
ParseEnum(bool is_union,EnumDef ** dest)1621 CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
1622   std::vector<std::string> enum_comment = doc_comment_;
1623   NEXT();
1624   std::string enum_name = attribute_;
1625   EXPECT(kTokenIdentifier);
1626   EnumDef *enum_def;
1627   ECHECK(StartEnum(enum_name, is_union, &enum_def));
1628   enum_def->doc_comment = enum_comment;
1629   if (!is_union && !opts.proto_mode) {
1630     // Give specialized error message, since this type spec used to
1631     // be optional in the first FlatBuffers release.
1632     if (!Is(':')) {
1633       return Error(
1634           "must specify the underlying integer type for this"
1635           " enum (e.g. \': short\', which was the default).");
1636     } else {
1637       NEXT();
1638     }
1639     // Specify the integer type underlying this enum.
1640     ECHECK(ParseType(enum_def->underlying_type));
1641     if (!IsInteger(enum_def->underlying_type.base_type) ||
1642         IsBool(enum_def->underlying_type.base_type))
1643       return Error("underlying enum type must be integral");
1644     // Make this type refer back to the enum it was derived from.
1645     enum_def->underlying_type.enum_def = enum_def;
1646   }
1647   ECHECK(ParseMetaData(&enum_def->attributes));
1648   EXPECT('{');
1649   if (is_union) enum_def->vals.Add("NONE", new EnumVal("NONE", 0));
1650   std::set<std::pair<BaseType, StructDef*>> union_types;
1651   for (;;) {
1652     if (opts.proto_mode && attribute_ == "option") {
1653       ECHECK(ParseProtoOption());
1654     } else {
1655       auto value_name = attribute_;
1656       auto full_name = value_name;
1657       std::vector<std::string> value_comment = doc_comment_;
1658       EXPECT(kTokenIdentifier);
1659       if (is_union) {
1660         ECHECK(ParseNamespacing(&full_name, &value_name));
1661         if (opts.union_value_namespacing) {
1662           // Since we can't namespace the actual enum identifiers, turn
1663           // namespace parts into part of the identifier.
1664           value_name = full_name;
1665           std::replace(value_name.begin(), value_name.end(), '.', '_');
1666         }
1667       }
1668       auto prevsize = enum_def->vals.vec.size();
1669       auto prevvalue = prevsize > 0 ? enum_def->vals.vec.back()->value : 0;
1670       auto &ev = *new EnumVal(value_name, 0);
1671       if (enum_def->vals.Add(value_name, &ev))
1672         return Error("enum value already exists: " + value_name);
1673       ev.doc_comment = value_comment;
1674       if (is_union) {
1675         if (Is(':')) {
1676           NEXT();
1677           ECHECK(ParseType(ev.union_type));
1678           if (ev.union_type.base_type != BASE_TYPE_STRUCT &&
1679               ev.union_type.base_type != BASE_TYPE_STRING)
1680             return Error("union value type may only be table/struct/string");
1681         } else {
1682           ev.union_type = Type(BASE_TYPE_STRUCT, LookupCreateStruct(full_name));
1683         }
1684         if (!enum_def->uses_multiple_type_instances) {
1685           auto union_type_key = std::make_pair(ev.union_type.base_type, ev.union_type.struct_def);
1686           if (union_types.count(union_type_key) > 0) {
1687             enum_def->uses_multiple_type_instances = true;
1688           } else {
1689             union_types.insert(union_type_key);
1690           }
1691         }
1692       }
1693       if (Is('=')) {
1694         NEXT();
1695         ECHECK(atot(attribute_.c_str(), *this, &ev.value));
1696         EXPECT(kTokenIntegerConstant);
1697         if (!opts.proto_mode && prevsize &&
1698             enum_def->vals.vec[prevsize - 1]->value >= ev.value)
1699           return Error("enum values must be specified in ascending order");
1700       } else if (prevsize == 0) {
1701         // already set to zero
1702       } else if (prevvalue != flatbuffers::numeric_limits<int64_t>::max()) {
1703         ev.value = prevvalue + 1;
1704       } else {
1705         return Error("enum value overflows");
1706       }
1707 
1708       // Check that value fits into the underlying type.
1709       switch (enum_def->underlying_type.base_type) {
1710         // clang-format off
1711         #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
1712                                PTYPE, RTYPE)                              \
1713           case BASE_TYPE_##ENUM: {                                        \
1714             int64_t min_value = static_cast<int64_t>(                     \
1715               flatbuffers::numeric_limits<CTYPE>::lowest());              \
1716             int64_t max_value = static_cast<int64_t>(                     \
1717               flatbuffers::numeric_limits<CTYPE>::max());                 \
1718             if (ev.value < min_value || ev.value > max_value) {           \
1719               return Error(                                               \
1720                 "enum value does not fit [" +  NumToString(min_value) +   \
1721                 "; " + NumToString(max_value) + "]");                     \
1722             }                                                             \
1723             break;                                                        \
1724           }
1725         FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
1726         #undef FLATBUFFERS_TD
1727         default: break;
1728         // clang-format on
1729       }
1730 
1731       if (opts.proto_mode && Is('[')) {
1732         NEXT();
1733         // ignore attributes on enums.
1734         while (token_ != ']') NEXT();
1735         NEXT();
1736       }
1737     }
1738     if (!Is(opts.proto_mode ? ';' : ',')) break;
1739     NEXT();
1740     if (Is('}')) break;
1741   }
1742   EXPECT('}');
1743   if (enum_def->attributes.Lookup("bit_flags")) {
1744     for (auto it = enum_def->vals.vec.begin(); it != enum_def->vals.vec.end();
1745          ++it) {
1746       if (static_cast<size_t>((*it)->value) >=
1747           SizeOf(enum_def->underlying_type.base_type) * 8)
1748         return Error("bit flag out of range of underlying integral type");
1749       (*it)->value = 1LL << (*it)->value;
1750     }
1751   }
1752   if (dest) *dest = enum_def;
1753   types_.Add(current_namespace_->GetFullyQualifiedName(enum_def->name),
1754              new Type(BASE_TYPE_UNION, nullptr, enum_def));
1755   return NoError();
1756 }
1757 
StartStruct(const std::string & name,StructDef ** dest)1758 CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
1759   auto &struct_def = *LookupCreateStruct(name, true, true);
1760   if (!struct_def.predecl) return Error("datatype already exists: " + name);
1761   struct_def.predecl = false;
1762   struct_def.name = name;
1763   struct_def.file = file_being_parsed_;
1764   // Move this struct to the back of the vector just in case it was predeclared,
1765   // to preserve declaration order.
1766   *std::remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) =
1767       &struct_def;
1768   *dest = &struct_def;
1769   return NoError();
1770 }
1771 
CheckClash(std::vector<FieldDef * > & fields,StructDef * struct_def,const char * suffix,BaseType basetype)1772 CheckedError Parser::CheckClash(std::vector<FieldDef *> &fields,
1773                                 StructDef *struct_def, const char *suffix,
1774                                 BaseType basetype) {
1775   auto len = strlen(suffix);
1776   for (auto it = fields.begin(); it != fields.end(); ++it) {
1777     auto &fname = (*it)->name;
1778     if (fname.length() > len &&
1779         fname.compare(fname.length() - len, len, suffix) == 0 &&
1780         (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
1781       auto field =
1782           struct_def->fields.Lookup(fname.substr(0, fname.length() - len));
1783       if (field && field->value.type.base_type == basetype)
1784         return Error("Field " + fname +
1785                      " would clash with generated functions for field " +
1786                      field->name);
1787     }
1788   }
1789   return NoError();
1790 }
1791 
SupportsAdvancedUnionFeatures() const1792 bool Parser::SupportsAdvancedUnionFeatures() const {
1793   return opts.lang_to_generate != 0 &&
1794          (opts.lang_to_generate & ~(IDLOptions::kCpp | IDLOptions::kJs |
1795                                     IDLOptions::kTs | IDLOptions::kPhp |
1796                                     IDLOptions::kJava | IDLOptions::kCSharp |
1797                                     IDLOptions::kBinary)) == 0;
1798 }
1799 
UniqueNamespace(Namespace * ns)1800 Namespace *Parser::UniqueNamespace(Namespace *ns) {
1801   for (auto it = namespaces_.begin(); it != namespaces_.end(); ++it) {
1802     if (ns->components == (*it)->components) {
1803       delete ns;
1804       return *it;
1805     }
1806   }
1807   namespaces_.push_back(ns);
1808   return ns;
1809 }
1810 
UnqualifiedName(std::string full_qualified_name)1811 std::string Parser::UnqualifiedName(std::string full_qualified_name) {
1812   Namespace *ns = new Namespace();
1813 
1814   std::size_t current, previous = 0;
1815   current = full_qualified_name.find('.');
1816   while (current != std::string::npos) {
1817     ns->components.push_back(
1818         full_qualified_name.substr(previous, current - previous));
1819     previous = current + 1;
1820     current = full_qualified_name.find('.', previous);
1821   }
1822   current_namespace_ = UniqueNamespace(ns);
1823   return full_qualified_name.substr(previous, current - previous);
1824 }
1825 
compareFieldDefs(const FieldDef * a,const FieldDef * b)1826 static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
1827   auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
1828   auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
1829   return a_id < b_id;
1830 }
1831 
ParseDecl()1832 CheckedError Parser::ParseDecl() {
1833   std::vector<std::string> dc = doc_comment_;
1834   bool fixed = IsIdent("struct");
1835   if (!fixed && !IsIdent("table")) return Error("declaration expected");
1836   NEXT();
1837   std::string name = attribute_;
1838   EXPECT(kTokenIdentifier);
1839   StructDef *struct_def;
1840   ECHECK(StartStruct(name, &struct_def));
1841   struct_def->doc_comment = dc;
1842   struct_def->fixed = fixed;
1843   ECHECK(ParseMetaData(&struct_def->attributes));
1844   struct_def->sortbysize =
1845       struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
1846   EXPECT('{');
1847   while (token_ != '}') ECHECK(ParseField(*struct_def));
1848   auto force_align = struct_def->attributes.Lookup("force_align");
1849   if (fixed) {
1850     if (force_align) {
1851       auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
1852       if (force_align->type.base_type != BASE_TYPE_INT ||
1853           align < struct_def->minalign || align > FLATBUFFERS_MAX_ALIGNMENT ||
1854           align & (align - 1))
1855         return Error(
1856             "force_align must be a power of two integer ranging from the"
1857             "struct\'s natural alignment to " +
1858             NumToString(FLATBUFFERS_MAX_ALIGNMENT));
1859       struct_def->minalign = align;
1860     }
1861     if (!struct_def->bytesize) return Error("size 0 structs not allowed");
1862   }
1863   struct_def->PadLastField(struct_def->minalign);
1864   // Check if this is a table that has manual id assignments
1865   auto &fields = struct_def->fields.vec;
1866   if (!fixed && fields.size()) {
1867     size_t num_id_fields = 0;
1868     for (auto it = fields.begin(); it != fields.end(); ++it) {
1869       if ((*it)->attributes.Lookup("id")) num_id_fields++;
1870     }
1871     // If any fields have ids..
1872     if (num_id_fields) {
1873       // Then all fields must have them.
1874       if (num_id_fields != fields.size())
1875         return Error(
1876             "either all fields or no fields must have an 'id' attribute");
1877       // Simply sort by id, then the fields are the same as if no ids had
1878       // been specified.
1879       std::sort(fields.begin(), fields.end(), compareFieldDefs);
1880       // Verify we have a contiguous set, and reassign vtable offsets.
1881       for (int i = 0; i < static_cast<int>(fields.size()); i++) {
1882         if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
1883           return Error("field id\'s must be consecutive from 0, id " +
1884                        NumToString(i) + " missing or set twice");
1885         fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
1886       }
1887     }
1888   }
1889 
1890   ECHECK(
1891       CheckClash(fields, struct_def, UnionTypeFieldSuffix(), BASE_TYPE_UNION));
1892   ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
1893   ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
1894   ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
1895   ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
1896   ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
1897   EXPECT('}');
1898   types_.Add(current_namespace_->GetFullyQualifiedName(struct_def->name),
1899              new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
1900   return NoError();
1901 }
1902 
ParseService()1903 CheckedError Parser::ParseService() {
1904   std::vector<std::string> service_comment = doc_comment_;
1905   NEXT();
1906   auto service_name = attribute_;
1907   EXPECT(kTokenIdentifier);
1908   auto &service_def = *new ServiceDef();
1909   service_def.name = service_name;
1910   service_def.file = file_being_parsed_;
1911   service_def.doc_comment = service_comment;
1912   service_def.defined_namespace = current_namespace_;
1913   if (services_.Add(current_namespace_->GetFullyQualifiedName(service_name),
1914                     &service_def))
1915     return Error("service already exists: " + service_name);
1916   ECHECK(ParseMetaData(&service_def.attributes));
1917   EXPECT('{');
1918   do {
1919     std::vector<std::string> doc_comment = doc_comment_;
1920     auto rpc_name = attribute_;
1921     EXPECT(kTokenIdentifier);
1922     EXPECT('(');
1923     Type reqtype, resptype;
1924     ECHECK(ParseTypeIdent(reqtype));
1925     EXPECT(')');
1926     EXPECT(':');
1927     ECHECK(ParseTypeIdent(resptype));
1928     if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
1929         resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
1930       return Error("rpc request and response types must be tables");
1931     auto &rpc = *new RPCCall();
1932     rpc.name = rpc_name;
1933     rpc.request = reqtype.struct_def;
1934     rpc.response = resptype.struct_def;
1935     rpc.doc_comment = doc_comment;
1936     if (service_def.calls.Add(rpc_name, &rpc))
1937       return Error("rpc already exists: " + rpc_name);
1938     ECHECK(ParseMetaData(&rpc.attributes));
1939     EXPECT(';');
1940   } while (token_ != '}');
1941   NEXT();
1942   return NoError();
1943 }
1944 
SetRootType(const char * name)1945 bool Parser::SetRootType(const char *name) {
1946   root_struct_def_ = LookupStruct(name);
1947   if (!root_struct_def_)
1948     root_struct_def_ =
1949         LookupStruct(current_namespace_->GetFullyQualifiedName(name));
1950   return root_struct_def_ != nullptr;
1951 }
1952 
MarkGenerated()1953 void Parser::MarkGenerated() {
1954   // This function marks all existing definitions as having already
1955   // been generated, which signals no code for included files should be
1956   // generated.
1957   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
1958     (*it)->generated = true;
1959   }
1960   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
1961     if (!(*it)->predecl) { (*it)->generated = true; }
1962   }
1963   for (auto it = services_.vec.begin(); it != services_.vec.end(); ++it) {
1964     (*it)->generated = true;
1965   }
1966 }
1967 
ParseNamespace()1968 CheckedError Parser::ParseNamespace() {
1969   NEXT();
1970   auto ns = new Namespace();
1971   namespaces_.push_back(ns);  // Store it here to not leak upon error.
1972   if (token_ != ';') {
1973     for (;;) {
1974       ns->components.push_back(attribute_);
1975       EXPECT(kTokenIdentifier);
1976       if (Is('.')) NEXT() else break;
1977     }
1978   }
1979   namespaces_.pop_back();
1980   current_namespace_ = UniqueNamespace(ns);
1981   EXPECT(';');
1982   return NoError();
1983 }
1984 
compareEnumVals(const EnumVal * a,const EnumVal * b)1985 static bool compareEnumVals(const EnumVal *a, const EnumVal *b) {
1986   return a->value < b->value;
1987 }
1988 
1989 // Best effort parsing of .proto declarations, with the aim to turn them
1990 // in the closest corresponding FlatBuffer equivalent.
1991 // We parse everything as identifiers instead of keywords, since we don't
1992 // want protobuf keywords to become invalid identifiers in FlatBuffers.
ParseProtoDecl()1993 CheckedError Parser::ParseProtoDecl() {
1994   bool isextend = IsIdent("extend");
1995   if (IsIdent("package")) {
1996     // These are identical in syntax to FlatBuffer's namespace decl.
1997     ECHECK(ParseNamespace());
1998   } else if (IsIdent("message") || isextend) {
1999     std::vector<std::string> struct_comment = doc_comment_;
2000     NEXT();
2001     StructDef *struct_def = nullptr;
2002     Namespace *parent_namespace = nullptr;
2003     if (isextend) {
2004       if (Is('.')) NEXT();  // qualified names may start with a . ?
2005       auto id = attribute_;
2006       EXPECT(kTokenIdentifier);
2007       ECHECK(ParseNamespacing(&id, nullptr));
2008       struct_def = LookupCreateStruct(id, false);
2009       if (!struct_def)
2010         return Error("cannot extend unknown message type: " + id);
2011     } else {
2012       std::string name = attribute_;
2013       EXPECT(kTokenIdentifier);
2014       ECHECK(StartStruct(name, &struct_def));
2015       // Since message definitions can be nested, we create a new namespace.
2016       auto ns = new Namespace();
2017       // Copy of current namespace.
2018       *ns = *current_namespace_;
2019       // But with current message name.
2020       ns->components.push_back(name);
2021       ns->from_table++;
2022       parent_namespace = current_namespace_;
2023       current_namespace_ = UniqueNamespace(ns);
2024     }
2025     struct_def->doc_comment = struct_comment;
2026     ECHECK(ParseProtoFields(struct_def, isextend, false));
2027     if (!isextend) { current_namespace_ = parent_namespace; }
2028     if (Is(';')) NEXT();
2029   } else if (IsIdent("enum")) {
2030     // These are almost the same, just with different terminator:
2031     EnumDef *enum_def;
2032     ECHECK(ParseEnum(false, &enum_def));
2033     if (Is(';')) NEXT();
2034     // Protobuf allows them to be specified in any order, so sort afterwards.
2035     auto &v = enum_def->vals.vec;
2036     std::sort(v.begin(), v.end(), compareEnumVals);
2037 
2038     // Temp: remove any duplicates, as .fbs files can't handle them.
2039     for (auto it = v.begin(); it != v.end();) {
2040       if (it != v.begin() && it[0]->value == it[-1]->value) {
2041         auto ref = it[-1];
2042         auto ev = it[0];
2043         for (auto dit = enum_def->vals.dict.begin();
2044              dit != enum_def->vals.dict.end(); ++dit) {
2045           if (dit->second == ev) dit->second = ref;  // reassign
2046         }
2047         delete ev;  // delete enum value
2048         it = v.erase(it);
2049       } else {
2050         ++it;
2051       }
2052     }
2053   } else if (IsIdent("syntax")) {  // Skip these.
2054     NEXT();
2055     EXPECT('=');
2056     EXPECT(kTokenStringConstant);
2057     EXPECT(';');
2058   } else if (IsIdent("option")) {  // Skip these.
2059     ECHECK(ParseProtoOption());
2060     EXPECT(';');
2061   } else if (IsIdent("service")) {  // Skip these.
2062     NEXT();
2063     EXPECT(kTokenIdentifier);
2064     ECHECK(ParseProtoCurliesOrIdent());
2065   } else {
2066     return Error("don\'t know how to parse .proto declaration starting with " +
2067                  TokenToStringId(token_));
2068   }
2069   return NoError();
2070 }
2071 
StartEnum(const std::string & enum_name,bool is_union,EnumDef ** dest)2072 CheckedError Parser::StartEnum(const std::string &enum_name, bool is_union,
2073                                EnumDef **dest) {
2074   auto &enum_def = *new EnumDef();
2075   enum_def.name = enum_name;
2076   enum_def.file = file_being_parsed_;
2077   enum_def.doc_comment = doc_comment_;
2078   enum_def.is_union = is_union;
2079   enum_def.defined_namespace = current_namespace_;
2080   if (enums_.Add(current_namespace_->GetFullyQualifiedName(enum_name),
2081                  &enum_def))
2082     return Error("enum already exists: " + enum_name);
2083   enum_def.underlying_type.base_type = is_union ? BASE_TYPE_UTYPE
2084                                                 : BASE_TYPE_INT;
2085   enum_def.underlying_type.enum_def = &enum_def;
2086   if (dest) *dest = &enum_def;
2087   return NoError();
2088 }
2089 
ParseProtoFields(StructDef * struct_def,bool isextend,bool inside_oneof)2090 CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
2091                                       bool inside_oneof) {
2092   EXPECT('{');
2093   while (token_ != '}') {
2094     if (IsIdent("message") || IsIdent("extend") || IsIdent("enum")) {
2095       // Nested declarations.
2096       ECHECK(ParseProtoDecl());
2097     } else if (IsIdent("extensions")) {  // Skip these.
2098       NEXT();
2099       EXPECT(kTokenIntegerConstant);
2100       if (Is(kTokenIdentifier)) {
2101         NEXT();  // to
2102         NEXT();  // num
2103       }
2104       EXPECT(';');
2105     } else if (IsIdent("option")) {  // Skip these.
2106       ECHECK(ParseProtoOption());
2107       EXPECT(';');
2108     } else if (IsIdent("reserved")) {  // Skip these.
2109       NEXT();
2110       while (!Is(';')) { NEXT(); }  // A variety of formats, just skip.
2111       NEXT();
2112     } else {
2113       std::vector<std::string> field_comment = doc_comment_;
2114       // Parse the qualifier.
2115       bool required = false;
2116       bool repeated = false;
2117       bool oneof = false;
2118       if (!inside_oneof) {
2119         if (IsIdent("optional")) {
2120           // This is the default.
2121           NEXT();
2122         } else if (IsIdent("required")) {
2123           required = true;
2124           NEXT();
2125         } else if (IsIdent("repeated")) {
2126           repeated = true;
2127           NEXT();
2128         } else if (IsIdent("oneof")) {
2129           oneof = true;
2130           NEXT();
2131         } else {
2132           // can't error, proto3 allows decls without any of the above.
2133         }
2134       }
2135       StructDef *anonymous_struct = nullptr;
2136       EnumDef *oneof_union = nullptr;
2137       Type type;
2138       if (IsIdent("group") || oneof) {
2139         if (!oneof) NEXT();
2140         if (oneof && opts.proto_oneof_union) {
2141           auto name = MakeCamel(attribute_, true) + "Union";
2142           ECHECK(StartEnum(name, true, &oneof_union));
2143           type = Type(BASE_TYPE_UNION, nullptr, oneof_union);
2144         } else {
2145           auto name = "Anonymous" + NumToString(anonymous_counter++);
2146           ECHECK(StartStruct(name, &anonymous_struct));
2147           type = Type(BASE_TYPE_STRUCT, anonymous_struct);
2148         }
2149       } else {
2150         ECHECK(ParseTypeFromProtoType(&type));
2151       }
2152       // Repeated elements get mapped to a vector.
2153       if (repeated) {
2154         type.element = type.base_type;
2155         type.base_type = BASE_TYPE_VECTOR;
2156         if (type.element == BASE_TYPE_VECTOR) {
2157           // We have a vector or vectors, which FlatBuffers doesn't support.
2158           // For now make it a vector of string (since the source is likely
2159           // "repeated bytes").
2160           // TODO(wvo): A better solution would be to wrap this in a table.
2161           type.element = BASE_TYPE_STRING;
2162         }
2163       }
2164       std::string name = attribute_;
2165       EXPECT(kTokenIdentifier);
2166       if (!oneof) {
2167         // Parse the field id. Since we're just translating schemas, not
2168         // any kind of binary compatibility, we can safely ignore these, and
2169         // assign our own.
2170         EXPECT('=');
2171         EXPECT(kTokenIntegerConstant);
2172       }
2173       FieldDef *field = nullptr;
2174       if (isextend) {
2175         // We allow a field to be re-defined when extending.
2176         // TODO: are there situations where that is problematic?
2177         field = struct_def->fields.Lookup(name);
2178       }
2179       if (!field) ECHECK(AddField(*struct_def, name, type, &field));
2180       field->doc_comment = field_comment;
2181       if (!IsScalar(type.base_type)) field->required = required;
2182       // See if there's a default specified.
2183       if (Is('[')) {
2184         NEXT();
2185         for (;;) {
2186           auto key = attribute_;
2187           ECHECK(ParseProtoKey());
2188           EXPECT('=');
2189           auto val = attribute_;
2190           ECHECK(ParseProtoCurliesOrIdent());
2191           if (key == "default") {
2192             // Temp: skip non-numeric defaults (enums).
2193             auto numeric = strpbrk(val.c_str(), "0123456789-+.");
2194             if (IsScalar(type.base_type) && numeric == val.c_str())
2195               field->value.constant = val;
2196           } else if (key == "deprecated") {
2197             field->deprecated = val == "true";
2198           }
2199           if (!Is(',')) break;
2200           NEXT();
2201         }
2202         EXPECT(']');
2203       }
2204       if (anonymous_struct) {
2205         ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
2206         if (Is(';')) NEXT();
2207       } else if (oneof_union) {
2208         // Parse into a temporary StructDef, then transfer fields into an
2209         // EnumDef describing the oneof as a union.
2210         StructDef oneof_struct;
2211         ECHECK(ParseProtoFields(&oneof_struct, false, oneof));
2212         if (Is(';')) NEXT();
2213         for (auto field_it = oneof_struct.fields.vec.begin();
2214              field_it != oneof_struct.fields.vec.end(); ++field_it) {
2215           const auto &oneof_field = **field_it;
2216           const auto &oneof_type = oneof_field.value.type;
2217           if (oneof_type.base_type != BASE_TYPE_STRUCT ||
2218               !oneof_type.struct_def || oneof_type.struct_def->fixed)
2219             return Error("oneof '" + name +
2220                 "' cannot be mapped to a union because member '" +
2221                 oneof_field.name + "' is not a table type.");
2222           auto enum_val = new EnumVal(oneof_type.struct_def->name,
2223                                       oneof_union->vals.vec.size());
2224           enum_val->union_type = oneof_type;
2225           enum_val->doc_comment = oneof_field.doc_comment;
2226           oneof_union->vals.Add(oneof_field.name, enum_val);
2227         }
2228       } else {
2229         EXPECT(';');
2230       }
2231     }
2232   }
2233   NEXT();
2234   return NoError();
2235 }
2236 
ParseProtoKey()2237 CheckedError Parser::ParseProtoKey() {
2238   if (token_ == '(') {
2239     NEXT();
2240     // Skip "(a.b)" style custom attributes.
2241     while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
2242     EXPECT(')');
2243     while (Is('.')) {
2244       NEXT();
2245       EXPECT(kTokenIdentifier);
2246     }
2247   } else {
2248     EXPECT(kTokenIdentifier);
2249   }
2250   return NoError();
2251 }
2252 
ParseProtoCurliesOrIdent()2253 CheckedError Parser::ParseProtoCurliesOrIdent() {
2254   if (Is('{')) {
2255     NEXT();
2256     for (int nesting = 1; nesting;) {
2257       if (token_ == '{')
2258         nesting++;
2259       else if (token_ == '}')
2260         nesting--;
2261       NEXT();
2262     }
2263   } else {
2264     NEXT();  // Any single token.
2265   }
2266   return NoError();
2267 }
2268 
ParseProtoOption()2269 CheckedError Parser::ParseProtoOption() {
2270   NEXT();
2271   ECHECK(ParseProtoKey());
2272   EXPECT('=');
2273   ECHECK(ParseProtoCurliesOrIdent());
2274   return NoError();
2275 }
2276 
2277 // Parse a protobuf type, and map it to the corresponding FlatBuffer one.
ParseTypeFromProtoType(Type * type)2278 CheckedError Parser::ParseTypeFromProtoType(Type *type) {
2279   struct type_lookup {
2280     const char *proto_type;
2281     BaseType fb_type, element;
2282   };
2283   static type_lookup lookup[] = {
2284     { "float", BASE_TYPE_FLOAT, BASE_TYPE_NONE },
2285     { "double", BASE_TYPE_DOUBLE, BASE_TYPE_NONE },
2286     { "int32", BASE_TYPE_INT, BASE_TYPE_NONE },
2287     { "int64", BASE_TYPE_LONG, BASE_TYPE_NONE },
2288     { "uint32", BASE_TYPE_UINT, BASE_TYPE_NONE },
2289     { "uint64", BASE_TYPE_ULONG, BASE_TYPE_NONE },
2290     { "sint32", BASE_TYPE_INT, BASE_TYPE_NONE },
2291     { "sint64", BASE_TYPE_LONG, BASE_TYPE_NONE },
2292     { "fixed32", BASE_TYPE_UINT, BASE_TYPE_NONE },
2293     { "fixed64", BASE_TYPE_ULONG, BASE_TYPE_NONE },
2294     { "sfixed32", BASE_TYPE_INT, BASE_TYPE_NONE },
2295     { "sfixed64", BASE_TYPE_LONG, BASE_TYPE_NONE },
2296     { "bool", BASE_TYPE_BOOL, BASE_TYPE_NONE },
2297     { "string", BASE_TYPE_STRING, BASE_TYPE_NONE },
2298     { "bytes", BASE_TYPE_VECTOR, BASE_TYPE_UCHAR },
2299     { nullptr, BASE_TYPE_NONE, BASE_TYPE_NONE }
2300   };
2301   for (auto tl = lookup; tl->proto_type; tl++) {
2302     if (attribute_ == tl->proto_type) {
2303       type->base_type = tl->fb_type;
2304       type->element = tl->element;
2305       NEXT();
2306       return NoError();
2307     }
2308   }
2309   if (Is('.')) NEXT();  // qualified names may start with a . ?
2310   ECHECK(ParseTypeIdent(*type));
2311   return NoError();
2312 }
2313 
SkipAnyJsonValue()2314 CheckedError Parser::SkipAnyJsonValue() {
2315   switch (token_) {
2316     case '{': {
2317       size_t fieldn_outer = 0;
2318       return ParseTableDelimiters(
2319           fieldn_outer, nullptr,
2320           [&](const std::string &, size_t &fieldn,
2321               const StructDef *) -> CheckedError {
2322             ECHECK(Recurse([&]() { return SkipAnyJsonValue(); }));
2323             fieldn++;
2324             return NoError();
2325           });
2326     }
2327     case '[': {
2328       uoffset_t count = 0;
2329       return ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
2330         return Recurse([&]() { return SkipAnyJsonValue(); });
2331       });
2332     }
2333     case kTokenStringConstant:
2334     case kTokenIntegerConstant:
2335     case kTokenFloatConstant: NEXT(); break;
2336     default:
2337       if (IsIdent("true") || IsIdent("false") || IsIdent("null")) {
2338         NEXT();
2339       } else
2340         return TokenError();
2341   }
2342   return NoError();
2343 }
2344 
ParseFlexBufferValue(flexbuffers::Builder * builder)2345 CheckedError Parser::ParseFlexBufferValue(flexbuffers::Builder *builder) {
2346   switch (token_) {
2347     case '{': {
2348       auto start = builder->StartMap();
2349       size_t fieldn_outer = 0;
2350       auto err =
2351           ParseTableDelimiters(fieldn_outer, nullptr,
2352                                [&](const std::string &name, size_t &fieldn,
2353                                    const StructDef *) -> CheckedError {
2354                                  builder->Key(name);
2355                                  ECHECK(ParseFlexBufferValue(builder));
2356                                  fieldn++;
2357                                  return NoError();
2358                                });
2359       ECHECK(err);
2360       builder->EndMap(start);
2361       break;
2362     }
2363     case '[': {
2364       auto start = builder->StartVector();
2365       uoffset_t count = 0;
2366       ECHECK(ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
2367         return ParseFlexBufferValue(builder);
2368       }));
2369       builder->EndVector(start, false, false);
2370       break;
2371     }
2372     case kTokenStringConstant:
2373       builder->String(attribute_);
2374       EXPECT(kTokenStringConstant);
2375       break;
2376     case kTokenIntegerConstant:
2377       builder->Int(StringToInt(attribute_.c_str()));
2378       EXPECT(kTokenIntegerConstant);
2379       break;
2380     case kTokenFloatConstant:
2381       builder->Double(strtod(attribute_.c_str(), nullptr));
2382       EXPECT(kTokenFloatConstant);
2383       break;
2384     default:
2385       if (IsIdent("true")) {
2386         builder->Bool(true);
2387         NEXT();
2388       } else if (IsIdent("false")) {
2389         builder->Bool(false);
2390         NEXT();
2391       } else if (IsIdent("null")) {
2392         builder->Null();
2393         NEXT();
2394       } else
2395         return TokenError();
2396   }
2397   return NoError();
2398 }
2399 
ParseFlexBuffer(const char * source,const char * source_filename,flexbuffers::Builder * builder)2400 bool Parser::ParseFlexBuffer(const char *source, const char *source_filename,
2401                              flexbuffers::Builder *builder) {
2402   auto ok = !StartParseFile(source, source_filename).Check() &&
2403             !ParseFlexBufferValue(builder).Check();
2404   if (ok) builder->Finish();
2405   return ok;
2406 }
2407 
Parse(const char * source,const char ** include_paths,const char * source_filename)2408 bool Parser::Parse(const char *source, const char **include_paths,
2409                    const char *source_filename) {
2410   FLATBUFFERS_ASSERT(0 == recurse_protection_counter);
2411   auto r = !ParseRoot(source, include_paths, source_filename).Check();
2412   FLATBUFFERS_ASSERT(0 == recurse_protection_counter);
2413   return r;
2414 }
2415 
StartParseFile(const char * source,const char * source_filename)2416 CheckedError Parser::StartParseFile(const char *source,
2417                                     const char *source_filename) {
2418   file_being_parsed_ = source_filename ? source_filename : "";
2419   source_ = source;
2420   ResetState(source_);
2421   error_.clear();
2422   ECHECK(SkipByteOrderMark());
2423   NEXT();
2424   if (Is(kTokenEof)) return Error("input file is empty");
2425   return NoError();
2426 }
2427 
ParseRoot(const char * source,const char ** include_paths,const char * source_filename)2428 CheckedError Parser::ParseRoot(const char *source, const char **include_paths,
2429                                const char *source_filename) {
2430   ECHECK(DoParse(source, include_paths, source_filename, nullptr));
2431 
2432   // Check that all types were defined.
2433   for (auto it = structs_.vec.begin(); it != structs_.vec.end();) {
2434     auto &struct_def = **it;
2435     if (struct_def.predecl) {
2436       if (opts.proto_mode) {
2437         // Protos allow enums to be used before declaration, so check if that
2438         // is the case here.
2439         EnumDef *enum_def = nullptr;
2440         for (size_t components =
2441                  struct_def.defined_namespace->components.size() + 1;
2442              components && !enum_def; components--) {
2443           auto qualified_name =
2444               struct_def.defined_namespace->GetFullyQualifiedName(
2445                   struct_def.name, components - 1);
2446           enum_def = LookupEnum(qualified_name);
2447         }
2448         if (enum_def) {
2449           // This is pretty slow, but a simple solution for now.
2450           auto initial_count = struct_def.refcount;
2451           for (auto struct_it = structs_.vec.begin();
2452                struct_it != structs_.vec.end(); ++struct_it) {
2453             auto &sd = **struct_it;
2454             for (auto field_it = sd.fields.vec.begin();
2455                  field_it != sd.fields.vec.end(); ++field_it) {
2456               auto &field = **field_it;
2457               if (field.value.type.struct_def == &struct_def) {
2458                 field.value.type.struct_def = nullptr;
2459                 field.value.type.enum_def = enum_def;
2460                 auto &bt = field.value.type.base_type == BASE_TYPE_VECTOR
2461                                ? field.value.type.element
2462                                : field.value.type.base_type;
2463                 FLATBUFFERS_ASSERT(bt == BASE_TYPE_STRUCT);
2464                 bt = enum_def->underlying_type.base_type;
2465                 struct_def.refcount--;
2466                 enum_def->refcount++;
2467               }
2468             }
2469           }
2470           if (struct_def.refcount)
2471             return Error("internal: " + NumToString(struct_def.refcount) + "/" +
2472                          NumToString(initial_count) +
2473                          " use(s) of pre-declaration enum not accounted for: " +
2474                          enum_def->name);
2475           structs_.dict.erase(structs_.dict.find(struct_def.name));
2476           it = structs_.vec.erase(it);
2477           delete &struct_def;
2478           continue;  // Skip error.
2479         }
2480       }
2481       auto err = "type referenced but not defined (check namespace): " +
2482                  struct_def.name;
2483       if (struct_def.original_location)
2484         err += ", originally at: " + *struct_def.original_location;
2485       return Error(err);
2486     }
2487     ++it;
2488   }
2489 
2490   // This check has to happen here and not earlier, because only now do we
2491   // know for sure what the type of these are.
2492   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2493     auto &enum_def = **it;
2494     if (enum_def.is_union) {
2495       for (auto val_it = enum_def.Vals().begin();
2496            val_it != enum_def.Vals().end(); ++val_it) {
2497         auto &val = **val_it;
2498         if (!SupportsAdvancedUnionFeatures() && val.union_type.struct_def &&
2499             val.union_type.struct_def->fixed)
2500           return Error(
2501               "only tables can be union elements in the generated language: " +
2502               val.name);
2503       }
2504     }
2505   }
2506   return NoError();
2507 }
2508 
DoParse(const char * source,const char ** include_paths,const char * source_filename,const char * include_filename)2509 CheckedError Parser::DoParse(const char *source, const char **include_paths,
2510                              const char *source_filename,
2511                              const char *include_filename) {
2512   if (source_filename) {
2513     if (included_files_.find(source_filename) == included_files_.end()) {
2514       included_files_[source_filename] =
2515           include_filename ? include_filename : "";
2516       files_included_per_file_[source_filename] = std::set<std::string>();
2517     } else {
2518       return NoError();
2519     }
2520   }
2521   if (!include_paths) {
2522     static const char *current_directory[] = { "", nullptr };
2523     include_paths = current_directory;
2524   }
2525   field_stack_.clear();
2526   builder_.Clear();
2527   // Start with a blank namespace just in case this file doesn't have one.
2528   current_namespace_ = empty_namespace_;
2529 
2530   ECHECK(StartParseFile(source, source_filename));
2531 
2532   // Includes must come before type declarations:
2533   for (;;) {
2534     // Parse pre-include proto statements if any:
2535     if (opts.proto_mode && (attribute_ == "option" || attribute_ == "syntax" ||
2536                             attribute_ == "package")) {
2537       ECHECK(ParseProtoDecl());
2538     } else if (IsIdent("native_include")) {
2539       NEXT();
2540       vector_emplace_back(&native_included_files_, attribute_);
2541       EXPECT(kTokenStringConstant);
2542       EXPECT(';');
2543     } else if (IsIdent("include") || (opts.proto_mode && IsIdent("import"))) {
2544       NEXT();
2545       if (opts.proto_mode && attribute_ == "public") NEXT();
2546       auto name = flatbuffers::PosixPath(attribute_.c_str());
2547       EXPECT(kTokenStringConstant);
2548       // Look for the file in include_paths.
2549       std::string filepath;
2550       for (auto paths = include_paths; paths && *paths; paths++) {
2551         filepath = flatbuffers::ConCatPathFileName(*paths, name);
2552         if (FileExists(filepath.c_str())) break;
2553       }
2554       if (filepath.empty())
2555         return Error("unable to locate include file: " + name);
2556       if (source_filename)
2557         files_included_per_file_[source_filename].insert(filepath);
2558       if (included_files_.find(filepath) == included_files_.end()) {
2559         // We found an include file that we have not parsed yet.
2560         // Load it and parse it.
2561         std::string contents;
2562         if (!LoadFile(filepath.c_str(), true, &contents))
2563           return Error("unable to load include file: " + name);
2564         ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(),
2565                        name.c_str()));
2566         // We generally do not want to output code for any included files:
2567         if (!opts.generate_all) MarkGenerated();
2568         // Reset these just in case the included file had them, and the
2569         // parent doesn't.
2570         root_struct_def_ = nullptr;
2571         file_identifier_.clear();
2572         file_extension_.clear();
2573         // This is the easiest way to continue this file after an include:
2574         // instead of saving and restoring all the state, we simply start the
2575         // file anew. This will cause it to encounter the same include
2576         // statement again, but this time it will skip it, because it was
2577         // entered into included_files_.
2578         // This is recursive, but only go as deep as the number of include
2579         // statements.
2580         if (source_filename) {
2581           included_files_.erase(source_filename);
2582         }
2583         return DoParse(source, include_paths, source_filename,
2584                        include_filename);
2585       }
2586       EXPECT(';');
2587     } else {
2588       break;
2589     }
2590   }
2591   // Now parse all other kinds of declarations:
2592   while (token_ != kTokenEof) {
2593     if (opts.proto_mode) {
2594       ECHECK(ParseProtoDecl());
2595     } else if (IsIdent("namespace")) {
2596       ECHECK(ParseNamespace());
2597     } else if (token_ == '{') {
2598       if (!root_struct_def_)
2599         return Error("no root type set to parse json with");
2600       if (builder_.GetSize()) {
2601         return Error("cannot have more than one json object in a file");
2602       }
2603       uoffset_t toff;
2604       ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
2605       if (opts.size_prefixed) {
2606         builder_.FinishSizePrefixed(Offset<Table>(toff), file_identifier_.length()
2607                                                              ? file_identifier_.c_str()
2608                                                              : nullptr);
2609       } else {
2610         builder_.Finish(Offset<Table>(toff), file_identifier_.length()
2611                                                  ? file_identifier_.c_str()
2612                                                  : nullptr);
2613       }
2614       // Check that JSON file doesn't contain more objects or IDL directives.
2615       // Comments after JSON are allowed.
2616       EXPECT(kTokenEof);
2617     } else if (IsIdent("enum")) {
2618       ECHECK(ParseEnum(false, nullptr));
2619     } else if (IsIdent("union")) {
2620       ECHECK(ParseEnum(true, nullptr));
2621     } else if (IsIdent("root_type")) {
2622       NEXT();
2623       auto root_type = attribute_;
2624       EXPECT(kTokenIdentifier);
2625       ECHECK(ParseNamespacing(&root_type, nullptr));
2626       if (opts.root_type.empty()) {
2627         if (!SetRootType(root_type.c_str()))
2628           return Error("unknown root type: " + root_type);
2629         if (root_struct_def_->fixed)
2630           return Error("root type must be a table");
2631       }
2632       EXPECT(';');
2633     } else if (IsIdent("file_identifier")) {
2634       NEXT();
2635       file_identifier_ = attribute_;
2636       EXPECT(kTokenStringConstant);
2637       if (file_identifier_.length() != FlatBufferBuilder::kFileIdentifierLength)
2638         return Error("file_identifier must be exactly " +
2639                      NumToString(FlatBufferBuilder::kFileIdentifierLength) +
2640                      " characters");
2641       EXPECT(';');
2642     } else if (IsIdent("file_extension")) {
2643       NEXT();
2644       file_extension_ = attribute_;
2645       EXPECT(kTokenStringConstant);
2646       EXPECT(';');
2647     } else if (IsIdent("include")) {
2648       return Error("includes must come before declarations");
2649     } else if (IsIdent("attribute")) {
2650       NEXT();
2651       auto name = attribute_;
2652       if (Is(kTokenIdentifier)) {
2653         NEXT();
2654       } else {
2655         EXPECT(kTokenStringConstant);
2656       }
2657       EXPECT(';');
2658       known_attributes_[name] = false;
2659     } else if (IsIdent("rpc_service")) {
2660       ECHECK(ParseService());
2661     } else {
2662       ECHECK(ParseDecl());
2663     }
2664   }
2665   return NoError();
2666 }
2667 
GetIncludedFilesRecursive(const std::string & file_name) const2668 std::set<std::string> Parser::GetIncludedFilesRecursive(
2669     const std::string &file_name) const {
2670   std::set<std::string> included_files;
2671   std::list<std::string> to_process;
2672 
2673   if (file_name.empty()) return included_files;
2674   to_process.push_back(file_name);
2675 
2676   while (!to_process.empty()) {
2677     std::string current = to_process.front();
2678     to_process.pop_front();
2679     included_files.insert(current);
2680 
2681     // Workaround the lack of const accessor in C++98 maps.
2682     auto &new_files =
2683         (*const_cast<std::map<std::string, std::set<std::string>> *>(
2684             &files_included_per_file_))[current];
2685     for (auto it = new_files.begin(); it != new_files.end(); ++it) {
2686       if (included_files.find(*it) == included_files.end())
2687         to_process.push_back(*it);
2688     }
2689   }
2690 
2691   return included_files;
2692 }
2693 
2694 // Schema serialization functionality:
2695 
compareName(const T * a,const T * b)2696 template<typename T> bool compareName(const T *a, const T *b) {
2697   return a->defined_namespace->GetFullyQualifiedName(a->name) <
2698          b->defined_namespace->GetFullyQualifiedName(b->name);
2699 }
2700 
AssignIndices(const std::vector<T * > & defvec)2701 template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
2702   // Pre-sort these vectors, such that we can set the correct indices for them.
2703   auto vec = defvec;
2704   std::sort(vec.begin(), vec.end(), compareName<T>);
2705   for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
2706 }
2707 
Serialize()2708 void Parser::Serialize() {
2709   builder_.Clear();
2710   AssignIndices(structs_.vec);
2711   AssignIndices(enums_.vec);
2712   std::vector<Offset<reflection::Object>> object_offsets;
2713   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2714     auto offset = (*it)->Serialize(&builder_, *this);
2715     object_offsets.push_back(offset);
2716     (*it)->serialized_location = offset.o;
2717   }
2718   std::vector<Offset<reflection::Enum>> enum_offsets;
2719   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2720     auto offset = (*it)->Serialize(&builder_, *this);
2721     enum_offsets.push_back(offset);
2722     (*it)->serialized_location = offset.o;
2723   }
2724   std::vector<Offset<reflection::Service>> service_offsets;
2725   for (auto it = services_.vec.begin(); it != services_.vec.end(); ++it) {
2726     auto offset = (*it)->Serialize(&builder_, *this);
2727     service_offsets.push_back(offset);
2728     (*it)->serialized_location = offset.o;
2729   }
2730   auto objs__ = builder_.CreateVectorOfSortedTables(&object_offsets);
2731   auto enum__ = builder_.CreateVectorOfSortedTables(&enum_offsets);
2732   auto fiid__ = builder_.CreateString(file_identifier_);
2733   auto fext__ = builder_.CreateString(file_extension_);
2734   auto serv__ = builder_.CreateVectorOfSortedTables(&service_offsets);
2735   auto schema_offset =
2736       reflection::CreateSchema(builder_, objs__, enum__, fiid__, fext__,
2737         (root_struct_def_ ? root_struct_def_->serialized_location : 0),
2738         serv__);
2739   if (opts.size_prefixed) {
2740     builder_.FinishSizePrefixed(schema_offset, reflection::SchemaIdentifier());
2741   } else {
2742     builder_.Finish(schema_offset, reflection::SchemaIdentifier());
2743   }
2744 }
2745 
GetNamespace(const std::string & qualified_name,std::vector<Namespace * > & namespaces,std::map<std::string,Namespace * > & namespaces_index)2746 static Namespace *GetNamespace(
2747     const std::string &qualified_name, std::vector<Namespace *> &namespaces,
2748     std::map<std::string, Namespace *> &namespaces_index) {
2749   size_t dot = qualified_name.find_last_of('.');
2750   std::string namespace_name = (dot != std::string::npos)
2751                                    ? std::string(qualified_name.c_str(), dot)
2752                                    : "";
2753   Namespace *&ns = namespaces_index[namespace_name];
2754 
2755   if (!ns) {
2756     ns = new Namespace();
2757     namespaces.push_back(ns);
2758 
2759     size_t pos = 0;
2760 
2761     for (;;) {
2762       dot = qualified_name.find('.', pos);
2763       if (dot == std::string::npos) { break; }
2764       ns->components.push_back(qualified_name.substr(pos, dot-pos));
2765       pos = dot + 1;
2766     }
2767   }
2768 
2769   return ns;
2770 }
2771 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2772 Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
2773                                                 const Parser &parser) const {
2774   std::vector<Offset<reflection::Field>> field_offsets;
2775   for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
2776     field_offsets.push_back((*it)->Serialize(
2777         builder, static_cast<uint16_t>(it - fields.vec.begin()), parser));
2778   }
2779   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2780   auto name__ = builder->CreateString(qualified_name);
2781   auto flds__ = builder->CreateVectorOfSortedTables(&field_offsets);
2782   auto attr__ = SerializeAttributes(builder, parser);
2783   auto docs__ = parser.opts.binary_schema_comments
2784                 ? builder->CreateVectorOfStrings(doc_comment)
2785                 : 0;
2786   return reflection::CreateObject(*builder, name__, flds__, fixed,
2787                                   static_cast<int>(minalign),
2788                                   static_cast<int>(bytesize),
2789                                   attr__, docs__);
2790 }
2791 
Deserialize(Parser & parser,const reflection::Object * object)2792 bool StructDef::Deserialize(Parser &parser, const reflection::Object *object) {
2793   if (!DeserializeAttributes(parser, object->attributes()))
2794     return false;
2795   DeserializeDoc(doc_comment, object->documentation());
2796   name = parser.UnqualifiedName(object->name()->str());
2797   fixed = object->is_struct();
2798   minalign = object->minalign();
2799   predecl = false;
2800   sortbysize = attributes.Lookup("original_order") == nullptr && !fixed;
2801   std::vector<uoffset_t> indexes =
2802     std::vector<uoffset_t>(object->fields()->size());
2803   for (uoffset_t i = 0; i < object->fields()->size(); i++)
2804     indexes[object->fields()->Get(i)->id()] = i;
2805   for (size_t i = 0; i < indexes.size(); i++) {
2806     auto field = object->fields()->Get(indexes[i]);
2807     auto field_def = new FieldDef();
2808     if (!field_def->Deserialize(parser, field) ||
2809         fields.Add(field_def->name, field_def)) {
2810       delete field_def;
2811       return false;
2812     }
2813     if (fixed) {
2814       // Recompute padding since that's currently not serialized.
2815       auto size = InlineSize(field_def->value.type);
2816       auto next_field =
2817           i + 1 < indexes.size()
2818           ? object->fields()->Get(indexes[i+1])
2819           : nullptr;
2820       bytesize += size;
2821       field_def->padding =
2822           next_field ? (next_field->offset() - field_def->value.offset) - size
2823                      : PaddingBytes(bytesize, minalign);
2824       bytesize += field_def->padding;
2825     }
2826   }
2827   FLATBUFFERS_ASSERT(static_cast<int>(bytesize) == object->bytesize());
2828   return true;
2829 }
2830 
Serialize(FlatBufferBuilder * builder,uint16_t id,const Parser & parser) const2831 Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
2832                                               uint16_t id,
2833                                               const Parser &parser) const {
2834   auto name__ = builder->CreateString(name);
2835   auto type__ = value.type.Serialize(builder);
2836   auto attr__ = SerializeAttributes(builder, parser);
2837   auto docs__ = parser.opts.binary_schema_comments
2838                 ? builder->CreateVectorOfStrings(doc_comment)
2839                 : 0;
2840   return reflection::CreateField(*builder, name__, type__, id, value.offset,
2841       // Is uint64>max(int64) tested?
2842       IsInteger(value.type.base_type) ? StringToInt(value.constant.c_str()) : 0,
2843       // result may be platform-dependent if underlying is float (not double)
2844       IsFloat(value.type.base_type) ? strtod(value.constant.c_str(), nullptr)
2845                                     : 0.0,
2846       deprecated, required, key, attr__, docs__);
2847   // TODO: value.constant is almost always "0", we could save quite a bit of
2848   // space by sharing it. Same for common values of value.type.
2849 }
2850 
Deserialize(Parser & parser,const reflection::Field * field)2851 bool FieldDef::Deserialize(Parser &parser, const reflection::Field *field) {
2852   name = parser.UnqualifiedName(field->name()->str());
2853   defined_namespace = parser.current_namespace_;
2854   if (!value.type.Deserialize(parser, field->type()))
2855     return false;
2856   value.offset = field->offset();
2857   if (IsInteger(value.type.base_type)) {
2858     value.constant = NumToString(field->default_integer());
2859   } else if (IsFloat(value.type.base_type)) {
2860     value.constant = FloatToString(field->default_real(), 16);
2861     size_t last_zero = value.constant.find_last_not_of('0');
2862     if (last_zero != std::string::npos && last_zero != 0) {
2863       value.constant.erase(last_zero, std::string::npos);
2864     }
2865   }
2866   deprecated = field->deprecated();
2867   required = field->required();
2868   key = field->key();
2869   if (!DeserializeAttributes(parser, field->attributes()))
2870     return false;
2871   // TODO: this should probably be handled by a separate attribute
2872   if (attributes.Lookup("flexbuffer")) {
2873     flexbuffer = true;
2874     parser.uses_flexbuffers_ = true;
2875     if (value.type.base_type != BASE_TYPE_VECTOR ||
2876         value.type.element != BASE_TYPE_UCHAR)
2877       return false;
2878   }
2879   DeserializeDoc(doc_comment, field->documentation());
2880   return true;
2881 }
2882 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2883 Offset<reflection::RPCCall> RPCCall::Serialize(FlatBufferBuilder *builder,
2884                                                const Parser &parser) const {
2885   auto name__ = builder->CreateString(name);
2886   auto attr__ = SerializeAttributes(builder, parser);
2887   auto docs__ = parser.opts.binary_schema_comments
2888                 ? builder->CreateVectorOfStrings(doc_comment)
2889                 : 0;
2890   return reflection::CreateRPCCall(*builder, name__,
2891                                    request->serialized_location,
2892                                    response->serialized_location,
2893                                    attr__, docs__);
2894 }
2895 
Deserialize(Parser & parser,const reflection::RPCCall * call)2896 bool RPCCall::Deserialize(Parser &parser, const reflection::RPCCall *call) {
2897   name = call->name()->str();
2898   if (!DeserializeAttributes(parser, call->attributes()))
2899     return false;
2900   DeserializeDoc(doc_comment, call->documentation());
2901   request = parser.structs_.Lookup(call->request()->name()->str());
2902   response = parser.structs_.Lookup(call->response()->name()->str());
2903   if (!request || !response) { return false; }
2904   return true;
2905 }
2906 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2907 Offset<reflection::Service> ServiceDef::Serialize(FlatBufferBuilder *builder,
2908                                                   const Parser &parser) const {
2909   std::vector<Offset<reflection::RPCCall>> servicecall_offsets;
2910   for (auto it = calls.vec.begin(); it != calls.vec.end(); ++it) {
2911     servicecall_offsets.push_back((*it)->Serialize(builder, parser));
2912   }
2913   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2914   auto name__ = builder->CreateString(qualified_name);
2915   auto call__ = builder->CreateVector(servicecall_offsets);
2916   auto attr__ = SerializeAttributes(builder, parser);
2917   auto docs__ = parser.opts.binary_schema_comments
2918                 ? builder->CreateVectorOfStrings(doc_comment)
2919                 : 0;
2920   return reflection::CreateService(*builder, name__, call__, attr__, docs__);
2921 }
2922 
Deserialize(Parser & parser,const reflection::Service * service)2923 bool ServiceDef::Deserialize(Parser &parser,
2924                              const reflection::Service *service) {
2925   name = parser.UnqualifiedName(service->name()->str());
2926   if (service->calls()) {
2927     for (uoffset_t i = 0; i < service->calls()->size(); ++i) {
2928       auto call = new RPCCall();
2929       if (!call->Deserialize(parser, service->calls()->Get(i)) ||
2930           calls.Add(call->name, call)) {
2931         delete call;
2932         return false;
2933       }
2934     }
2935   }
2936   if (!DeserializeAttributes(parser, service->attributes()))
2937     return false;
2938   DeserializeDoc(doc_comment, service->documentation());
2939   return true;
2940 }
2941 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2942 Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
2943                                             const Parser &parser) const {
2944   std::vector<Offset<reflection::EnumVal>> enumval_offsets;
2945   for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
2946     enumval_offsets.push_back((*it)->Serialize(builder, parser));
2947   }
2948   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2949   auto name__ = builder->CreateString(qualified_name);
2950   auto vals__ = builder->CreateVector(enumval_offsets);
2951   auto type__ = underlying_type.Serialize(builder);
2952   auto attr__ = SerializeAttributes(builder, parser);
2953   auto docs__ = parser.opts.binary_schema_comments
2954                 ? builder->CreateVectorOfStrings(doc_comment)
2955                 : 0;
2956   return reflection::CreateEnum(*builder, name__, vals__, is_union, type__,
2957                                 attr__, docs__);
2958 }
2959 
Deserialize(Parser & parser,const reflection::Enum * _enum)2960 bool EnumDef::Deserialize(Parser &parser, const reflection::Enum *_enum) {
2961   name = parser.UnqualifiedName(_enum->name()->str());
2962   for (uoffset_t i = 0; i < _enum->values()->size(); ++i) {
2963     auto val = new EnumVal();
2964     if (!val->Deserialize(parser, _enum->values()->Get(i)) ||
2965         vals.Add(val->name, val)) {
2966       delete val;
2967       return false;
2968     }
2969   }
2970   is_union = _enum->is_union();
2971   if (!underlying_type.Deserialize(parser, _enum->underlying_type())) {
2972     return false;
2973   }
2974   if (!DeserializeAttributes(parser, _enum->attributes()))
2975     return false;
2976   DeserializeDoc(doc_comment, _enum->documentation());
2977   return true;
2978 }
2979 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2980 Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder,
2981                                                const Parser &parser) const {
2982   auto name__ = builder->CreateString(name);
2983   auto type__ = union_type.Serialize(builder);
2984   auto docs__ = parser.opts.binary_schema_comments
2985                 ? builder->CreateVectorOfStrings(doc_comment)
2986                 : 0;
2987   return reflection::CreateEnumVal(*builder, name__, value,
2988       union_type.struct_def ? union_type.struct_def->serialized_location : 0,
2989       type__, docs__);
2990 }
2991 
Deserialize(const Parser & parser,const reflection::EnumVal * val)2992 bool EnumVal::Deserialize(const Parser &parser,
2993                           const reflection::EnumVal *val) {
2994   name = val->name()->str();
2995   value = val->value();
2996   if (!union_type.Deserialize(parser, val->union_type()))
2997     return false;
2998   DeserializeDoc(doc_comment, val->documentation());
2999   return true;
3000 }
3001 
Serialize(FlatBufferBuilder * builder) const3002 Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
3003   return reflection::CreateType(
3004       *builder,
3005       static_cast<reflection::BaseType>(base_type),
3006       static_cast<reflection::BaseType>(element),
3007       struct_def ? struct_def->index : (enum_def ? enum_def->index : -1));
3008 }
3009 
Deserialize(const Parser & parser,const reflection::Type * type)3010 bool Type::Deserialize(const Parser &parser, const reflection::Type *type) {
3011   if (type == nullptr) return true;
3012   base_type = static_cast<BaseType>(type->base_type());
3013   element = static_cast<BaseType>(type->element());
3014   if (type->index() >= 0) {
3015     if (type->base_type() == reflection::Obj ||
3016         (type->base_type() == reflection::Vector &&
3017          type->element() == reflection::Obj)) {
3018       if (static_cast<size_t>(type->index()) < parser.structs_.vec.size()) {
3019         struct_def = parser.structs_.vec[type->index()];
3020         struct_def->refcount++;
3021       } else {
3022         return false;
3023       }
3024     } else {
3025       if (static_cast<size_t>(type->index()) < parser.enums_.vec.size()) {
3026         enum_def = parser.enums_.vec[type->index()];
3027       } else {
3028         return false;
3029       }
3030     }
3031   }
3032   return true;
3033 }
3034 
3035 flatbuffers::Offset<
3036     flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
SerializeAttributes(FlatBufferBuilder * builder,const Parser & parser) const3037 Definition::SerializeAttributes(FlatBufferBuilder *builder,
3038                                 const Parser &parser) const {
3039   std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
3040   for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
3041     auto it = parser.known_attributes_.find(kv->first);
3042     FLATBUFFERS_ASSERT(it != parser.known_attributes_.end());
3043     if (parser.opts.binary_schema_builtins || !it->second) {
3044       auto key = builder->CreateString(kv->first);
3045       auto val = builder->CreateString(kv->second->constant);
3046       attrs.push_back(reflection::CreateKeyValue(*builder, key, val));
3047     }
3048   }
3049   if (attrs.size()) {
3050     return builder->CreateVectorOfSortedTables(&attrs);
3051   } else {
3052     return 0;
3053   }
3054 }
3055 
DeserializeAttributes(Parser & parser,const Vector<Offset<reflection::KeyValue>> * attrs)3056 bool Definition::DeserializeAttributes(
3057     Parser &parser, const Vector<Offset<reflection::KeyValue>> *attrs) {
3058   if (attrs == nullptr)
3059     return true;
3060   for (uoffset_t i = 0; i < attrs->size(); ++i) {
3061     auto kv = attrs->Get(i);
3062     auto value = new Value();
3063     if (kv->value()) { value->constant = kv->value()->str(); }
3064     if (attributes.Add(kv->key()->str(), value)) {
3065       delete value;
3066       return false;
3067     }
3068     parser.known_attributes_[kv->key()->str()];
3069   }
3070   return true;
3071 }
3072 
3073 /************************************************************************/
3074 /* DESERIALIZATION                                                      */
3075 /************************************************************************/
Deserialize(const uint8_t * buf,const size_t size)3076 bool Parser::Deserialize(const uint8_t *buf, const size_t size) {
3077   flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(buf), size);
3078   bool size_prefixed = false;
3079   if(!reflection::SchemaBufferHasIdentifier(buf)) {
3080     if (!flatbuffers::BufferHasIdentifier(buf, reflection::SchemaIdentifier(),
3081                                           true))
3082       return false;
3083     else
3084       size_prefixed = true;
3085   }
3086   auto verify_fn = size_prefixed ? &reflection::VerifySizePrefixedSchemaBuffer
3087                                  : &reflection::VerifySchemaBuffer;
3088   if (!verify_fn(verifier)) {
3089     return false;
3090   }
3091   auto schema = size_prefixed ? reflection::GetSizePrefixedSchema(buf)
3092                               : reflection::GetSchema(buf);
3093   return Deserialize(schema);
3094 }
3095 
Deserialize(const reflection::Schema * schema)3096 bool Parser::Deserialize(const reflection::Schema *schema) {
3097   file_identifier_ = schema->file_ident() ? schema->file_ident()->str() : "";
3098   file_extension_ = schema->file_ext() ? schema->file_ext()->str() : "";
3099   std::map<std::string, Namespace *> namespaces_index;
3100 
3101   // Create defs without deserializing so references from fields to structs and
3102   // enums can be resolved.
3103   for (auto it = schema->objects()->begin(); it != schema->objects()->end();
3104        ++it) {
3105     auto struct_def = new StructDef();
3106     if (structs_.Add(it->name()->str(), struct_def)) {
3107       delete struct_def;
3108       return false;
3109     }
3110     auto type = new Type(BASE_TYPE_STRUCT, struct_def, nullptr);
3111     if (types_.Add(it->name()->str(), type)) {
3112       delete type;
3113       return false;
3114     }
3115   }
3116   for (auto it = schema->enums()->begin(); it != schema->enums()->end(); ++it) {
3117     auto enum_def = new EnumDef();
3118     if (enums_.Add(it->name()->str(), enum_def)) {
3119       delete enum_def;
3120       return false;
3121     }
3122     auto type = new Type(BASE_TYPE_UNION, nullptr, enum_def);
3123     if (types_.Add(it->name()->str(), type)) {
3124       delete type;
3125       return false;
3126     }
3127   }
3128 
3129   // Now fields can refer to structs and enums by index.
3130   for (auto it = schema->objects()->begin(); it != schema->objects()->end();
3131        ++it) {
3132     std::string qualified_name = it->name()->str();
3133     auto struct_def = structs_.Lookup(qualified_name);
3134     struct_def->defined_namespace =
3135         GetNamespace(qualified_name, namespaces_, namespaces_index);
3136     if (!struct_def->Deserialize(*this, * it)) { return false; }
3137     if (schema->root_table() == *it) { root_struct_def_ = struct_def; }
3138   }
3139   for (auto it = schema->enums()->begin(); it != schema->enums()->end(); ++it) {
3140     std::string qualified_name = it->name()->str();
3141     auto enum_def = enums_.Lookup(qualified_name);
3142     enum_def->defined_namespace =
3143         GetNamespace(qualified_name, namespaces_, namespaces_index);
3144     if (!enum_def->Deserialize(*this, *it)) { return false; }
3145   }
3146 
3147   if (schema->services()) {
3148     for (auto it = schema->services()->begin(); it != schema->services()->end();
3149          ++it) {
3150       std::string qualified_name = it->name()->str();
3151       auto service_def = new ServiceDef();
3152       service_def->defined_namespace =
3153           GetNamespace(qualified_name, namespaces_, namespaces_index);
3154       if (!service_def->Deserialize(*this, *it) ||
3155           services_.Add(qualified_name, service_def)) {
3156         delete service_def;
3157         return false;
3158       }
3159     }
3160   }
3161 
3162   return true;
3163 }
3164 
ConformTo(const Parser & base)3165 std::string Parser::ConformTo(const Parser &base) {
3166   for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
3167     auto &struct_def = **sit;
3168     auto qualified_name =
3169         struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
3170     auto struct_def_base = base.LookupStruct(qualified_name);
3171     if (!struct_def_base) continue;
3172     for (auto fit = struct_def.fields.vec.begin();
3173          fit != struct_def.fields.vec.end(); ++fit) {
3174       auto &field = **fit;
3175       auto field_base = struct_def_base->fields.Lookup(field.name);
3176       if (field_base) {
3177         if (field.value.offset != field_base->value.offset)
3178           return "offsets differ for field: " + field.name;
3179         if (field.value.constant != field_base->value.constant)
3180           return "defaults differ for field: " + field.name;
3181         if (!EqualByName(field.value.type, field_base->value.type))
3182           return "types differ for field: " + field.name;
3183       } else {
3184         // Doesn't have to exist, deleting fields is fine.
3185         // But we should check if there is a field that has the same offset
3186         // but is incompatible (in the case of field renaming).
3187         for (auto fbit = struct_def_base->fields.vec.begin();
3188              fbit != struct_def_base->fields.vec.end(); ++fbit) {
3189           field_base = *fbit;
3190           if (field.value.offset == field_base->value.offset) {
3191             if (!EqualByName(field.value.type, field_base->value.type))
3192               return "field renamed to different type: " + field.name;
3193             break;
3194           }
3195         }
3196       }
3197     }
3198   }
3199   for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
3200     auto &enum_def = **eit;
3201     auto qualified_name =
3202         enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
3203     auto enum_def_base = base.enums_.Lookup(qualified_name);
3204     if (!enum_def_base) continue;
3205     for (auto evit = enum_def.Vals().begin(); evit != enum_def.Vals().end();
3206          ++evit) {
3207       auto &enum_val = **evit;
3208       auto enum_val_base = enum_def_base->vals.Lookup(enum_val.name);
3209       if (enum_val_base) {
3210         if (enum_val.value != enum_val_base->value)
3211           return "values differ for enum: " + enum_val.name;
3212       }
3213     }
3214   }
3215   return "";
3216 }
3217 
3218 }  // namespace flatbuffers
3219