• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <iostream>
20 #include <list>
21 #include <string>
22 #include <utility>
23 
24 #include "flatbuffers/base.h"
25 #include "flatbuffers/buffer.h"
26 #include "flatbuffers/idl.h"
27 #include "flatbuffers/reflection_generated.h"
28 #include "flatbuffers/util.h"
29 
30 namespace flatbuffers {
31 
32 // Reflects the version at the compiling time of binary(lib/dll/so).
FLATBUFFERS_VERSION()33 const char *FLATBUFFERS_VERSION() {
34   // clang-format off
35   return
36       FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "."
37       FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "."
38       FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION);
39   // clang-format on
40 }
41 
42 namespace {
43 
44 static const double kPi = 3.14159265358979323846;
45 
46 // The enums in the reflection schema should match the ones we use internally.
47 // Compare the last element to check if these go out of sync.
48 static_assert(BASE_TYPE_VECTOR64 ==
49                   static_cast<BaseType>(reflection::MaxBaseType - 1),
50               "enums don't match");
51 
52 // Any parsing calls have to be wrapped in this macro, which automates
53 // handling of recursive error checking a bit. It will check the received
54 // CheckedError object, and return straight away on error.
55 #define ECHECK(call)           \
56   {                            \
57     auto ce = (call);          \
58     if (ce.Check()) return ce; \
59   }
60 
61 // These two functions are called hundreds of times below, so define a short
62 // form:
63 #define NEXT() ECHECK(Next())
64 #define EXPECT(tok) ECHECK(Expect(tok))
65 
ValidateUTF8(const std::string & str)66 static bool ValidateUTF8(const std::string &str) {
67   const char *s = &str[0];
68   const char *const sEnd = s + str.length();
69   while (s < sEnd) {
70     if (FromUTF8(&s) < 0) { return false; }
71   }
72   return true;
73 }
74 
IsLowerSnakeCase(const std::string & str)75 static bool IsLowerSnakeCase(const std::string &str) {
76   for (size_t i = 0; i < str.length(); i++) {
77     char c = str[i];
78     if (!check_ascii_range(c, 'a', 'z') && !is_digit(c) && c != '_') {
79       return false;
80     }
81   }
82   return true;
83 }
84 
DeserializeDoc(std::vector<std::string> & doc,const Vector<Offset<String>> * documentation)85 static void DeserializeDoc(std::vector<std::string> &doc,
86                            const Vector<Offset<String>> *documentation) {
87   if (documentation == nullptr) return;
88   for (uoffset_t index = 0; index < documentation->size(); index++)
89     doc.push_back(documentation->Get(index)->str());
90 }
91 
NoError()92 static CheckedError NoError() { return CheckedError(false); }
93 
TypeToIntervalString()94 template<typename T> static std::string TypeToIntervalString() {
95   return "[" + NumToString((flatbuffers::numeric_limits<T>::lowest)()) + "; " +
96          NumToString((flatbuffers::numeric_limits<T>::max)()) + "]";
97 }
98 
99 // atot: template version of atoi/atof: convert a string to an instance of T.
100 template<typename T>
atot_scalar(const char * s,T * val,bool_constant<false>)101 static bool atot_scalar(const char *s, T *val, bool_constant<false>) {
102   return StringToNumber(s, val);
103 }
104 
105 template<typename T>
atot_scalar(const char * s,T * val,bool_constant<true>)106 static bool atot_scalar(const char *s, T *val, bool_constant<true>) {
107   // Normalize NaN parsed from fbs or json to unsigned NaN.
108   if (false == StringToNumber(s, val)) return false;
109   *val = (*val != *val) ? std::fabs(*val) : *val;
110   return true;
111 }
112 
113 template<typename T>
atot(const char * s,Parser & parser,T * val)114 static CheckedError atot(const char *s, Parser &parser, T *val) {
115   auto done = atot_scalar(s, val, bool_constant<is_floating_point<T>::value>());
116   if (done) return NoError();
117   if (0 == *val)
118     return parser.Error("invalid number: \"" + std::string(s) + "\"");
119   else
120     return parser.Error("invalid number: \"" + std::string(s) + "\"" +
121                         ", constant does not fit " + TypeToIntervalString<T>());
122 }
123 template<>
atot(const char * s,Parser & parser,Offset<void> * val)124 CheckedError atot<Offset<void>>(const char *s, Parser &parser,
125                                 Offset<void> *val) {
126   (void)parser;
127   *val = Offset<void>(atoi(s));
128   return NoError();
129 }
130 
131 template<>
atot(const char * s,Parser & parser,Offset64<void> * val)132 CheckedError atot<Offset64<void>>(const char *s, Parser &parser,
133                                   Offset64<void> *val) {
134   (void)parser;
135   *val = Offset64<void>(atoi(s));
136   return NoError();
137 }
138 
139 template<typename T>
LookupTableByName(const SymbolTable<T> & table,const std::string & name,const Namespace & current_namespace,size_t skip_top)140 static T *LookupTableByName(const SymbolTable<T> &table,
141                             const std::string &name,
142                             const Namespace &current_namespace,
143                             size_t skip_top) {
144   const auto &components = current_namespace.components;
145   if (table.dict.empty()) return nullptr;
146   if (components.size() < skip_top) return nullptr;
147   const auto N = components.size() - skip_top;
148   std::string full_name;
149   for (size_t i = 0; i < N; i++) {
150     full_name += components[i];
151     full_name += '.';
152   }
153   for (size_t i = N; i > 0; i--) {
154     full_name += name;
155     auto obj = table.Lookup(full_name);
156     if (obj) return obj;
157     auto len = full_name.size() - components[i - 1].size() - 1 - name.size();
158     full_name.resize(len);
159   }
160   FLATBUFFERS_ASSERT(full_name.empty());
161   return table.Lookup(name);  // lookup in global namespace
162 }
163 
164 // Declare tokens we'll use. Single character tokens are represented by their
165 // ascii character code (e.g. '{'), others above 256.
166 // clang-format off
167 #define FLATBUFFERS_GEN_TOKENS(TD) \
168   TD(Eof, 256, "end of file") \
169   TD(StringConstant, 257, "string constant") \
170   TD(IntegerConstant, 258, "integer constant") \
171   TD(FloatConstant, 259, "float constant") \
172   TD(Identifier, 260, "identifier")
173 #ifdef __GNUC__
174 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
175 #endif
176 enum {
177   #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
178     FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
179   #undef FLATBUFFERS_TOKEN
180 };
181 
TokenToString(int t)182 static std::string TokenToString(int t) {
183   static const char * const tokens[] = {
184     #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
185       FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
186     #undef FLATBUFFERS_TOKEN
187     #define FLATBUFFERS_TD(ENUM, IDLTYPE, ...) \
188       IDLTYPE,
189       FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
190     #undef FLATBUFFERS_TD
191   };
192   if (t < 256) {  // A single ascii char token.
193     std::string s;
194     s.append(1, static_cast<char>(t));
195     return s;
196   } else {       // Other tokens.
197     return tokens[t - 256];
198   }
199 }
200 // clang-format on
201 
IsIdentifierStart(char c)202 static bool IsIdentifierStart(char c) { return is_alpha(c) || (c == '_'); }
203 
CompareSerializedScalars(const uint8_t * a,const uint8_t * b,const FieldDef & key)204 static bool CompareSerializedScalars(const uint8_t *a, const uint8_t *b,
205                                      const FieldDef &key) {
206   switch (key.value.type.base_type) {
207 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...)                       \
208   case BASE_TYPE_##ENUM: {                                              \
209     CTYPE def = static_cast<CTYPE>(0);                                  \
210     if (!a || !b) { StringToNumber(key.value.constant.c_str(), &def); } \
211     const auto av = a ? ReadScalar<CTYPE>(a) : def;                     \
212     const auto bv = b ? ReadScalar<CTYPE>(b) : def;                     \
213     return av < bv;                                                     \
214   }
215     FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
216 #undef FLATBUFFERS_TD
217     default: {
218       FLATBUFFERS_ASSERT(false && "scalar type expected");
219       return false;
220     }
221   }
222 }
223 
CompareTablesByScalarKey(const Offset<Table> * _a,const Offset<Table> * _b,const FieldDef & key)224 static bool CompareTablesByScalarKey(const Offset<Table> *_a,
225                                      const Offset<Table> *_b,
226                                      const FieldDef &key) {
227   const voffset_t offset = key.value.offset;
228   // Indirect offset pointer to table pointer.
229   auto a = reinterpret_cast<const uint8_t *>(_a) + ReadScalar<uoffset_t>(_a);
230   auto b = reinterpret_cast<const uint8_t *>(_b) + ReadScalar<uoffset_t>(_b);
231   // Fetch field address from table.
232   a = reinterpret_cast<const Table *>(a)->GetAddressOf(offset);
233   b = reinterpret_cast<const Table *>(b)->GetAddressOf(offset);
234   return CompareSerializedScalars(a, b, key);
235 }
236 
CompareTablesByStringKey(const Offset<Table> * _a,const Offset<Table> * _b,const FieldDef & key)237 static bool CompareTablesByStringKey(const Offset<Table> *_a,
238                                      const Offset<Table> *_b,
239                                      const FieldDef &key) {
240   const voffset_t offset = key.value.offset;
241   // Indirect offset pointer to table pointer.
242   auto a = reinterpret_cast<const uint8_t *>(_a) + ReadScalar<uoffset_t>(_a);
243   auto b = reinterpret_cast<const uint8_t *>(_b) + ReadScalar<uoffset_t>(_b);
244   // Fetch field address from table.
245   a = reinterpret_cast<const Table *>(a)->GetAddressOf(offset);
246   b = reinterpret_cast<const Table *>(b)->GetAddressOf(offset);
247   if (a && b) {
248     // Indirect offset pointer to string pointer.
249     a += ReadScalar<uoffset_t>(a);
250     b += ReadScalar<uoffset_t>(b);
251     return *reinterpret_cast<const String *>(a) <
252            *reinterpret_cast<const String *>(b);
253   } else {
254     return a ? true : false;
255   }
256 }
257 
SwapSerializedTables(Offset<Table> * a,Offset<Table> * b)258 static void SwapSerializedTables(Offset<Table> *a, Offset<Table> *b) {
259   // These are serialized offsets, so are relative where they are
260   // stored in memory, so compute the distance between these pointers:
261   ptrdiff_t diff = (b - a) * sizeof(Offset<Table>);
262   FLATBUFFERS_ASSERT(diff >= 0);  // Guaranteed by SimpleQsort.
263   auto udiff = static_cast<uoffset_t>(diff);
264   a->o = EndianScalar(ReadScalar<uoffset_t>(a) - udiff);
265   b->o = EndianScalar(ReadScalar<uoffset_t>(b) + udiff);
266   std::swap(*a, *b);
267 }
268 
269 // See below for why we need our own sort :(
270 template<typename T, typename F, typename S>
SimpleQsort(T * begin,T * end,size_t width,F comparator,S swapper)271 static void SimpleQsort(T *begin, T *end, size_t width, F comparator,
272                         S swapper) {
273   if (end - begin <= static_cast<ptrdiff_t>(width)) return;
274   auto l = begin + width;
275   auto r = end;
276   while (l < r) {
277     if (comparator(begin, l)) {
278       r -= width;
279       swapper(l, r);
280     } else {
281       l += width;
282     }
283   }
284   l -= width;
285   swapper(begin, l);
286   SimpleQsort(begin, l, width, comparator, swapper);
287   SimpleQsort(r, end, width, comparator, swapper);
288 }
289 
SingleValueRepack(Value & e,T val)290 template<typename T> static inline void SingleValueRepack(Value &e, T val) {
291   // Remove leading zeros.
292   if (IsInteger(e.type.base_type)) { e.constant = NumToString(val); }
293 }
294 
295 #if defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0)
296 // Normalize defaults NaN to unsigned quiet-NaN(0) if value was parsed from
297 // hex-float literal.
SingleValueRepack(Value & e,float val)298 static void SingleValueRepack(Value &e, float val) {
299   if (val != val) e.constant = "nan";
300 }
SingleValueRepack(Value & e,double val)301 static void SingleValueRepack(Value &e, double val) {
302   if (val != val) e.constant = "nan";
303 }
304 #endif
305 
EnumDistanceImpl(T e1,T e2)306 template<typename T> static uint64_t EnumDistanceImpl(T e1, T e2) {
307   if (e1 < e2) { std::swap(e1, e2); }  // use std for scalars
308   // Signed overflow may occur, use unsigned calculation.
309   // The unsigned overflow is well-defined by C++ standard (modulo 2^n).
310   return static_cast<uint64_t>(e1) - static_cast<uint64_t>(e2);
311 }
312 
compareFieldDefs(const FieldDef * a,const FieldDef * b)313 static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
314   auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
315   auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
316   return a_id < b_id;
317 }
318 
GetNamespace(const std::string & qualified_name,std::vector<Namespace * > & namespaces,std::map<std::string,Namespace * > & namespaces_index)319 static Namespace *GetNamespace(
320     const std::string &qualified_name, std::vector<Namespace *> &namespaces,
321     std::map<std::string, Namespace *> &namespaces_index) {
322   size_t dot = qualified_name.find_last_of('.');
323   std::string namespace_name = (dot != std::string::npos)
324                                    ? std::string(qualified_name.c_str(), dot)
325                                    : "";
326   Namespace *&ns = namespaces_index[namespace_name];
327 
328   if (!ns) {
329     ns = new Namespace();
330     namespaces.push_back(ns);
331 
332     size_t pos = 0;
333 
334     for (;;) {
335       dot = qualified_name.find('.', pos);
336       if (dot == std::string::npos) { break; }
337       ns->components.push_back(qualified_name.substr(pos, dot - pos));
338       pos = dot + 1;
339     }
340   }
341 
342   return ns;
343 }
344 
345 // Generate a unique hash for a file based on its name and contents (if any).
HashFile(const char * source_filename,const char * source)346 static uint64_t HashFile(const char *source_filename, const char *source) {
347   uint64_t hash = 0;
348 
349   if (source_filename)
350     hash = HashFnv1a<uint64_t>(StripPath(source_filename).c_str());
351 
352   if (source && *source) hash ^= HashFnv1a<uint64_t>(source);
353 
354   return hash;
355 }
356 
compareName(const T * a,const T * b)357 template<typename T> static bool compareName(const T *a, const T *b) {
358   return a->defined_namespace->GetFullyQualifiedName(a->name) <
359          b->defined_namespace->GetFullyQualifiedName(b->name);
360 }
361 
AssignIndices(const std::vector<T * > & defvec)362 template<typename T> static void AssignIndices(const std::vector<T *> &defvec) {
363   // Pre-sort these vectors, such that we can set the correct indices for them.
364   auto vec = defvec;
365   std::sort(vec.begin(), vec.end(), compareName<T>);
366   for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
367 }
368 
369 }  // namespace
370 
Message(const std::string & msg)371 void Parser::Message(const std::string &msg) {
372   if (!error_.empty()) error_ += "\n";  // log all warnings and errors
373   error_ += file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
374   // clang-format off
375 
376   #ifdef _WIN32  // MSVC alike
377     error_ +=
378         "(" + NumToString(line_) + ", " + NumToString(CursorPosition()) + ")";
379   #else  // gcc alike
380     if (file_being_parsed_.length()) error_ += ":";
381     error_ += NumToString(line_) + ": " + NumToString(CursorPosition());
382   #endif
383   // clang-format on
384   error_ += ": " + msg;
385 }
386 
Warning(const std::string & msg)387 void Parser::Warning(const std::string &msg) {
388   if (!opts.no_warnings) {
389     Message("warning: " + msg);
390     has_warning_ = true;  // for opts.warnings_as_errors
391   }
392 }
393 
Error(const std::string & msg)394 CheckedError Parser::Error(const std::string &msg) {
395   Message("error: " + msg);
396   return CheckedError(true);
397 }
398 
RecurseError()399 CheckedError Parser::RecurseError() {
400   return Error("maximum parsing depth " + NumToString(parse_depth_counter_) +
401                " reached");
402 }
403 
GetPooledString(const std::string & s) const404 const std::string &Parser::GetPooledString(const std::string &s) const {
405   return *(string_cache_.insert(s).first);
406 }
407 
408 class Parser::ParseDepthGuard {
409  public:
ParseDepthGuard(Parser * parser_not_null)410   explicit ParseDepthGuard(Parser *parser_not_null)
411       : parser_(*parser_not_null), caller_depth_(parser_.parse_depth_counter_) {
412     FLATBUFFERS_ASSERT(caller_depth_ <= (FLATBUFFERS_MAX_PARSING_DEPTH) &&
413                        "Check() must be called to prevent stack overflow");
414     parser_.parse_depth_counter_ += 1;
415   }
416 
~ParseDepthGuard()417   ~ParseDepthGuard() { parser_.parse_depth_counter_ -= 1; }
418 
Check()419   CheckedError Check() {
420     return caller_depth_ >= (FLATBUFFERS_MAX_PARSING_DEPTH)
421                ? parser_.RecurseError()
422                : CheckedError(false);
423   }
424 
425   FLATBUFFERS_DELETE_FUNC(ParseDepthGuard(const ParseDepthGuard &));
426   FLATBUFFERS_DELETE_FUNC(ParseDepthGuard &operator=(const ParseDepthGuard &));
427 
428  private:
429   Parser &parser_;
430   const int caller_depth_;
431 };
432 
GetFullyQualifiedName(const std::string & name,size_t max_components) const433 std::string Namespace::GetFullyQualifiedName(const std::string &name,
434                                              size_t max_components) const {
435   // Early exit if we don't have a defined namespace.
436   if (components.empty() || !max_components) { return name; }
437   std::string stream_str;
438   for (size_t i = 0; i < std::min(components.size(), max_components); i++) {
439     stream_str += components[i];
440     stream_str += '.';
441   }
442   if (!stream_str.empty()) stream_str.pop_back();
443   if (name.length()) {
444     stream_str += '.';
445     stream_str += name;
446   }
447   return stream_str;
448 }
449 
TokenToStringId(int t) const450 std::string Parser::TokenToStringId(int t) const {
451   return t == kTokenIdentifier ? attribute_ : TokenToString(t);
452 }
453 
454 // Parses exactly nibbles worth of hex digits into a number, or error.
ParseHexNum(int nibbles,uint64_t * val)455 CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
456   FLATBUFFERS_ASSERT(nibbles > 0);
457   for (int i = 0; i < nibbles; i++)
458     if (!is_xdigit(cursor_[i]))
459       return Error("escape code must be followed by " + NumToString(nibbles) +
460                    " hex digits");
461   std::string target(cursor_, cursor_ + nibbles);
462   *val = StringToUInt(target.c_str(), 16);
463   cursor_ += nibbles;
464   return NoError();
465 }
466 
SkipByteOrderMark()467 CheckedError Parser::SkipByteOrderMark() {
468   if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
469   cursor_++;
470   if (static_cast<unsigned char>(*cursor_) != 0xbb)
471     return Error("invalid utf-8 byte order mark");
472   cursor_++;
473   if (static_cast<unsigned char>(*cursor_) != 0xbf)
474     return Error("invalid utf-8 byte order mark");
475   cursor_++;
476   return NoError();
477 }
478 
Next()479 CheckedError Parser::Next() {
480   doc_comment_.clear();
481   prev_cursor_ = cursor_;
482   bool seen_newline = cursor_ == source_;
483   attribute_.clear();
484   attr_is_trivial_ascii_string_ = true;
485   for (;;) {
486     char c = *cursor_++;
487     token_ = c;
488     switch (c) {
489       case '\0':
490         cursor_--;
491         token_ = kTokenEof;
492         return NoError();
493       case ' ':
494       case '\r':
495       case '\t': break;
496       case '\n':
497         MarkNewLine();
498         seen_newline = true;
499         break;
500       case '{':
501       case '}':
502       case '(':
503       case ')':
504       case '[':
505       case ']':
506       case '<':
507       case '>':
508       case ',':
509       case ':':
510       case ';':
511       case '=': return NoError();
512       case '\"':
513       case '\'': {
514         int unicode_high_surrogate = -1;
515 
516         while (*cursor_ != c) {
517           if (*cursor_ < ' ' && static_cast<signed char>(*cursor_) >= 0)
518             return Error("illegal character in string constant");
519           if (*cursor_ == '\\') {
520             attr_is_trivial_ascii_string_ = false;  // has escape sequence
521             cursor_++;
522             if (unicode_high_surrogate != -1 && *cursor_ != 'u') {
523               return Error(
524                   "illegal Unicode sequence (unpaired high surrogate)");
525             }
526             switch (*cursor_) {
527               case 'n':
528                 attribute_ += '\n';
529                 cursor_++;
530                 break;
531               case 't':
532                 attribute_ += '\t';
533                 cursor_++;
534                 break;
535               case 'r':
536                 attribute_ += '\r';
537                 cursor_++;
538                 break;
539               case 'b':
540                 attribute_ += '\b';
541                 cursor_++;
542                 break;
543               case 'f':
544                 attribute_ += '\f';
545                 cursor_++;
546                 break;
547               case '\"':
548                 attribute_ += '\"';
549                 cursor_++;
550                 break;
551               case '\'':
552                 attribute_ += '\'';
553                 cursor_++;
554                 break;
555               case '\\':
556                 attribute_ += '\\';
557                 cursor_++;
558                 break;
559               case '/':
560                 attribute_ += '/';
561                 cursor_++;
562                 break;
563               case 'x': {  // Not in the JSON standard
564                 cursor_++;
565                 uint64_t val;
566                 ECHECK(ParseHexNum(2, &val));
567                 attribute_ += static_cast<char>(val);
568                 break;
569               }
570               case 'u': {
571                 cursor_++;
572                 uint64_t val;
573                 ECHECK(ParseHexNum(4, &val));
574                 if (val >= 0xD800 && val <= 0xDBFF) {
575                   if (unicode_high_surrogate != -1) {
576                     return Error(
577                         "illegal Unicode sequence (multiple high surrogates)");
578                   } else {
579                     unicode_high_surrogate = static_cast<int>(val);
580                   }
581                 } else if (val >= 0xDC00 && val <= 0xDFFF) {
582                   if (unicode_high_surrogate == -1) {
583                     return Error(
584                         "illegal Unicode sequence (unpaired low surrogate)");
585                   } else {
586                     int code_point = 0x10000 +
587                                      ((unicode_high_surrogate & 0x03FF) << 10) +
588                                      (val & 0x03FF);
589                     ToUTF8(code_point, &attribute_);
590                     unicode_high_surrogate = -1;
591                   }
592                 } else {
593                   if (unicode_high_surrogate != -1) {
594                     return Error(
595                         "illegal Unicode sequence (unpaired high surrogate)");
596                   }
597                   ToUTF8(static_cast<int>(val), &attribute_);
598                 }
599                 break;
600               }
601               default: return Error("unknown escape code in string constant");
602             }
603           } else {  // printable chars + UTF-8 bytes
604             if (unicode_high_surrogate != -1) {
605               return Error(
606                   "illegal Unicode sequence (unpaired high surrogate)");
607             }
608             // reset if non-printable
609             attr_is_trivial_ascii_string_ &=
610                 check_ascii_range(*cursor_, ' ', '~');
611 
612             attribute_ += *cursor_++;
613           }
614         }
615         if (unicode_high_surrogate != -1) {
616           return Error("illegal Unicode sequence (unpaired high surrogate)");
617         }
618         cursor_++;
619         if (!attr_is_trivial_ascii_string_ && !opts.allow_non_utf8 &&
620             !ValidateUTF8(attribute_)) {
621           return Error("illegal UTF-8 sequence");
622         }
623         token_ = kTokenStringConstant;
624         return NoError();
625       }
626       case '/':
627         if (*cursor_ == '/') {
628           const char *start = ++cursor_;
629           while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
630           if (*start == '/') {  // documentation comment
631             if (!seen_newline)
632               return Error(
633                   "a documentation comment should be on a line on its own");
634             doc_comment_.push_back(std::string(start + 1, cursor_));
635           }
636           break;
637         } else if (*cursor_ == '*') {
638           cursor_++;
639           // TODO: make nested.
640           while (*cursor_ != '*' || cursor_[1] != '/') {
641             if (*cursor_ == '\n') MarkNewLine();
642             if (!*cursor_) return Error("end of file in comment");
643             cursor_++;
644           }
645           cursor_ += 2;
646           break;
647         }
648         FLATBUFFERS_FALLTHROUGH();  // else fall thru
649       default:
650         if (IsIdentifierStart(c)) {
651           // Collect all chars of an identifier:
652           const char *start = cursor_ - 1;
653           while (IsIdentifierStart(*cursor_) || is_digit(*cursor_)) cursor_++;
654           attribute_.append(start, cursor_);
655           token_ = kTokenIdentifier;
656           return NoError();
657         }
658 
659         const auto has_sign = (c == '+') || (c == '-');
660         if (has_sign) {
661           // Check for +/-inf which is considered a float constant.
662           if (strncmp(cursor_, "inf", 3) == 0 &&
663               !(IsIdentifierStart(cursor_[3]) || is_digit(cursor_[3]))) {
664             attribute_.assign(cursor_ - 1, cursor_ + 3);
665             token_ = kTokenFloatConstant;
666             cursor_ += 3;
667             return NoError();
668           }
669 
670           if (IsIdentifierStart(*cursor_)) {
671             // '-'/'+' and following identifier - it could be a predefined
672             // constant. Return the sign in token_, see ParseSingleValue.
673             return NoError();
674           }
675         }
676 
677         auto dot_lvl =
678             (c == '.') ? 0 : 1;  // dot_lvl==0 <=> exactly one '.' seen
679         if (!dot_lvl && !is_digit(*cursor_)) return NoError();  // enum?
680         // Parser accepts hexadecimal-floating-literal (see C++ 5.13.4).
681         if (is_digit(c) || has_sign || !dot_lvl) {
682           const auto start = cursor_ - 1;
683           auto start_digits = !is_digit(c) ? cursor_ : cursor_ - 1;
684           if (!is_digit(c) && is_digit(*cursor_)) {
685             start_digits = cursor_;  // see digit in cursor_ position
686             c = *cursor_++;
687           }
688           // hex-float can't begind with '.'
689           auto use_hex = dot_lvl && (c == '0') && is_alpha_char(*cursor_, 'X');
690           if (use_hex) start_digits = ++cursor_;  // '0x' is the prefix, skip it
691           // Read an integer number or mantisa of float-point number.
692           do {
693             if (use_hex) {
694               while (is_xdigit(*cursor_)) cursor_++;
695             } else {
696               while (is_digit(*cursor_)) cursor_++;
697             }
698           } while ((*cursor_ == '.') && (++cursor_) && (--dot_lvl >= 0));
699           // Exponent of float-point number.
700           if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
701             // The exponent suffix of hexadecimal float number is mandatory.
702             if (use_hex && !dot_lvl) start_digits = cursor_;
703             if ((use_hex && is_alpha_char(*cursor_, 'P')) ||
704                 is_alpha_char(*cursor_, 'E')) {
705               dot_lvl = 0;  // Emulate dot to signal about float-point number.
706               cursor_++;
707               if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
708               start_digits = cursor_;  // the exponent-part has to have digits
709               // Exponent is decimal integer number
710               while (is_digit(*cursor_)) cursor_++;
711               if (*cursor_ == '.') {
712                 cursor_++;  // If see a dot treat it as part of invalid number.
713                 dot_lvl = -1;  // Fall thru to Error().
714               }
715             }
716           }
717           // Finalize.
718           if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
719             attribute_.append(start, cursor_);
720             token_ = dot_lvl ? kTokenIntegerConstant : kTokenFloatConstant;
721             return NoError();
722           } else {
723             return Error("invalid number: " + std::string(start, cursor_));
724           }
725         }
726         std::string ch;
727         ch = c;
728         if (false == check_ascii_range(c, ' ', '~'))
729           ch = "code: " + NumToString(c);
730         return Error("illegal character: " + ch);
731     }
732   }
733 }
734 
735 // Check if a given token is next.
Is(int t) const736 bool Parser::Is(int t) const { return t == token_; }
737 
IsIdent(const char * id) const738 bool Parser::IsIdent(const char *id) const {
739   return token_ == kTokenIdentifier && attribute_ == id;
740 }
741 
742 // Expect a given token to be next, consume it, or error if not present.
Expect(int t)743 CheckedError Parser::Expect(int t) {
744   if (t != token_) {
745     return Error("expecting: " + TokenToString(t) +
746                  " instead got: " + TokenToStringId(token_));
747   }
748   NEXT();
749   return NoError();
750 }
751 
ParseNamespacing(std::string * id,std::string * last)752 CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
753   while (Is('.')) {
754     NEXT();
755     *id += ".";
756     *id += attribute_;
757     if (last) *last = attribute_;
758     EXPECT(kTokenIdentifier);
759   }
760   return NoError();
761 }
762 
LookupEnum(const std::string & id)763 EnumDef *Parser::LookupEnum(const std::string &id) {
764   // Search thru parent namespaces.
765   return LookupTableByName(enums_, id, *current_namespace_, 0);
766 }
767 
LookupStruct(const std::string & id) const768 StructDef *Parser::LookupStruct(const std::string &id) const {
769   auto sd = structs_.Lookup(id);
770   if (sd) sd->refcount++;
771   return sd;
772 }
773 
LookupStructThruParentNamespaces(const std::string & id) const774 StructDef *Parser::LookupStructThruParentNamespaces(
775     const std::string &id) const {
776   auto sd = LookupTableByName(structs_, id, *current_namespace_, 1);
777   if (sd) sd->refcount++;
778   return sd;
779 }
780 
ParseTypeIdent(Type & type)781 CheckedError Parser::ParseTypeIdent(Type &type) {
782   std::string id = attribute_;
783   EXPECT(kTokenIdentifier);
784   ECHECK(ParseNamespacing(&id, nullptr));
785   auto enum_def = LookupEnum(id);
786   if (enum_def) {
787     type = enum_def->underlying_type;
788     if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
789   } else {
790     type.base_type = BASE_TYPE_STRUCT;
791     type.struct_def = LookupCreateStruct(id);
792   }
793   return NoError();
794 }
795 
796 // Parse any IDL type.
ParseType(Type & type)797 CheckedError Parser::ParseType(Type &type) {
798   if (token_ == kTokenIdentifier) {
799     if (IsIdent("bool")) {
800       type.base_type = BASE_TYPE_BOOL;
801       NEXT();
802     } else if (IsIdent("byte") || IsIdent("int8")) {
803       type.base_type = BASE_TYPE_CHAR;
804       NEXT();
805     } else if (IsIdent("ubyte") || IsIdent("uint8")) {
806       type.base_type = BASE_TYPE_UCHAR;
807       NEXT();
808     } else if (IsIdent("short") || IsIdent("int16")) {
809       type.base_type = BASE_TYPE_SHORT;
810       NEXT();
811     } else if (IsIdent("ushort") || IsIdent("uint16")) {
812       type.base_type = BASE_TYPE_USHORT;
813       NEXT();
814     } else if (IsIdent("int") || IsIdent("int32")) {
815       type.base_type = BASE_TYPE_INT;
816       NEXT();
817     } else if (IsIdent("uint") || IsIdent("uint32")) {
818       type.base_type = BASE_TYPE_UINT;
819       NEXT();
820     } else if (IsIdent("long") || IsIdent("int64")) {
821       type.base_type = BASE_TYPE_LONG;
822       NEXT();
823     } else if (IsIdent("ulong") || IsIdent("uint64")) {
824       type.base_type = BASE_TYPE_ULONG;
825       NEXT();
826     } else if (IsIdent("float") || IsIdent("float32")) {
827       type.base_type = BASE_TYPE_FLOAT;
828       NEXT();
829     } else if (IsIdent("double") || IsIdent("float64")) {
830       type.base_type = BASE_TYPE_DOUBLE;
831       NEXT();
832     } else if (IsIdent("string")) {
833       type.base_type = BASE_TYPE_STRING;
834       NEXT();
835     } else {
836       ECHECK(ParseTypeIdent(type));
837     }
838   } else if (token_ == '[') {
839     ParseDepthGuard depth_guard(this);
840     ECHECK(depth_guard.Check());
841     NEXT();
842     Type subtype;
843     ECHECK(ParseType(subtype));
844     if (IsSeries(subtype)) {
845       // We could support this, but it will complicate things, and it's
846       // easier to work around with a struct around the inner vector.
847       return Error("nested vector types not supported (wrap in table first)");
848     }
849     if (token_ == ':') {
850       NEXT();
851       if (token_ != kTokenIntegerConstant) {
852         return Error("length of fixed-length array must be an integer value");
853       }
854       uint16_t fixed_length = 0;
855       bool check = StringToNumber(attribute_.c_str(), &fixed_length);
856       if (!check || fixed_length < 1) {
857         return Error(
858             "length of fixed-length array must be positive and fit to "
859             "uint16_t type");
860       }
861       type = Type(BASE_TYPE_ARRAY, subtype.struct_def, subtype.enum_def,
862                   fixed_length);
863       NEXT();
864     } else {
865       type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
866     }
867     type.element = subtype.base_type;
868     EXPECT(']');
869   } else {
870     return Error("illegal type syntax");
871   }
872   return NoError();
873 }
874 
AddField(StructDef & struct_def,const std::string & name,const Type & type,FieldDef ** dest)875 CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
876                               const Type &type, FieldDef **dest) {
877   auto &field = *new FieldDef();
878   field.value.offset =
879       FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
880   field.name = name;
881   field.file = struct_def.file;
882   field.value.type = type;
883   if (struct_def.fixed) {  // statically compute the field offset
884     auto size = InlineSize(type);
885     auto alignment = InlineAlignment(type);
886     // structs_ need to have a predictable format, so we need to align to
887     // the largest scalar
888     struct_def.minalign = std::max(struct_def.minalign, alignment);
889     struct_def.PadLastField(alignment);
890     field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
891     struct_def.bytesize += size;
892   }
893   if (struct_def.fields.Add(name, &field))
894     return Error("field already exists: " + name);
895   *dest = &field;
896   return NoError();
897 }
898 
ParseField(StructDef & struct_def)899 CheckedError Parser::ParseField(StructDef &struct_def) {
900   std::string name = attribute_;
901 
902   if (LookupCreateStruct(name, false, false))
903     return Error("field name can not be the same as table/struct name");
904 
905   if (!IsLowerSnakeCase(name)) {
906     Warning("field names should be lowercase snake_case, got: " + name);
907   }
908 
909   std::vector<std::string> dc = doc_comment_;
910   EXPECT(kTokenIdentifier);
911   EXPECT(':');
912   Type type;
913   ECHECK(ParseType(type));
914 
915   if (struct_def.fixed) {
916     if (IsIncompleteStruct(type) ||
917         (IsArray(type) && IsIncompleteStruct(type.VectorType()))) {
918       std::string type_name = IsArray(type) ? type.VectorType().struct_def->name
919                                             : type.struct_def->name;
920       return Error(
921           std::string("Incomplete type in struct is not allowed, type name: ") +
922           type_name);
923     }
924 
925     auto valid = IsScalar(type.base_type) || IsStruct(type);
926     if (!valid && IsArray(type)) {
927       const auto &elem_type = type.VectorType();
928       valid |= IsScalar(elem_type.base_type) || IsStruct(elem_type);
929     }
930     if (!valid)
931       return Error("structs may contain only scalar or struct fields");
932   }
933 
934   if (!struct_def.fixed && IsArray(type))
935     return Error("fixed-length array in table must be wrapped in struct");
936 
937   if (IsArray(type)) {
938     advanced_features_ |= reflection::AdvancedArrayFeatures;
939     if (!SupportsAdvancedArrayFeatures()) {
940       return Error(
941           "Arrays are not yet supported in all "
942           "the specified programming languages.");
943     }
944   }
945 
946   FieldDef *typefield = nullptr;
947   if (type.base_type == BASE_TYPE_UNION) {
948     // For union fields, add a second auto-generated field to hold the type,
949     // with a special suffix.
950 
951     // To ensure compatibility with many codes that rely on the BASE_TYPE_UTYPE value to identify union type fields.
952     Type union_type(type.enum_def->underlying_type);
953     union_type.base_type = BASE_TYPE_UTYPE;
954     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),union_type, &typefield));
955 
956   } else if (IsVector(type) && type.element == BASE_TYPE_UNION) {
957     advanced_features_ |= reflection::AdvancedUnionFeatures;
958     // Only cpp, js and ts supports the union vector feature so far.
959     if (!SupportsAdvancedUnionFeatures()) {
960       return Error(
961           "Vectors of unions are not yet supported in at least one of "
962           "the specified programming languages.");
963     }
964     // For vector of union fields, add a second auto-generated vector field to
965     // hold the types, with a special suffix.
966     Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
967     union_vector.element = BASE_TYPE_UTYPE;
968     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(), union_vector,
969                     &typefield));
970   }
971 
972   FieldDef *field;
973   ECHECK(AddField(struct_def, name, type, &field));
974 
975   if (typefield) {
976     // We preserve the relation between the typefield
977     // and field, so we can easily map it in the code
978     // generators.
979     typefield->sibling_union_field = field;
980     field->sibling_union_field = typefield;
981   }
982 
983   if (token_ == '=') {
984     NEXT();
985     ECHECK(ParseSingleValue(&field->name, field->value, true));
986     if (IsStruct(type) || (struct_def.fixed && field->value.constant != "0"))
987       return Error(
988           "default values are not supported for struct fields, table fields, "
989           "or in structs.");
990     if (IsString(type) || IsVector(type)) {
991       advanced_features_ |= reflection::DefaultVectorsAndStrings;
992       if (field->value.constant != "0" && !SupportsDefaultVectorsAndStrings()) {
993         return Error(
994             "Default values for strings and vectors are not supported in one "
995             "of the specified programming languages");
996       }
997     }
998 
999     if (IsVector(type) && field->value.constant != "0" &&
1000         field->value.constant != "[]") {
1001       return Error("The only supported default for vectors is `[]`.");
1002     }
1003   }
1004 
1005   // Append .0 if the value has not it (skip hex and scientific floats).
1006   // This suffix needed for generated C++ code.
1007   if (IsFloat(type.base_type)) {
1008     auto &text = field->value.constant;
1009     FLATBUFFERS_ASSERT(false == text.empty());
1010     auto s = text.c_str();
1011     while (*s == ' ') s++;
1012     if (*s == '-' || *s == '+') s++;
1013     // 1) A float constants (nan, inf, pi, etc) is a kind of identifier.
1014     // 2) A float number needn't ".0" at the end if it has exponent.
1015     if ((false == IsIdentifierStart(*s)) &&
1016         (std::string::npos == field->value.constant.find_first_of(".eEpP"))) {
1017       field->value.constant += ".0";
1018     }
1019   }
1020 
1021   field->doc_comment = dc;
1022   ECHECK(ParseMetaData(&field->attributes));
1023   field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
1024   auto hash_name = field->attributes.Lookup("hash");
1025   if (hash_name) {
1026     switch ((IsVector(type)) ? type.element : type.base_type) {
1027       case BASE_TYPE_SHORT:
1028       case BASE_TYPE_USHORT: {
1029         if (FindHashFunction16(hash_name->constant.c_str()) == nullptr)
1030           return Error("Unknown hashing algorithm for 16 bit types: " +
1031                        hash_name->constant);
1032         break;
1033       }
1034       case BASE_TYPE_INT:
1035       case BASE_TYPE_UINT: {
1036         if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
1037           return Error("Unknown hashing algorithm for 32 bit types: " +
1038                        hash_name->constant);
1039         break;
1040       }
1041       case BASE_TYPE_LONG:
1042       case BASE_TYPE_ULONG: {
1043         if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
1044           return Error("Unknown hashing algorithm for 64 bit types: " +
1045                        hash_name->constant);
1046         break;
1047       }
1048       default:
1049         return Error(
1050             "only short, ushort, int, uint, long and ulong data types support "
1051             "hashing.");
1052     }
1053   }
1054 
1055   if (field->attributes.Lookup("vector64") != nullptr) {
1056     if (!IsVector(type)) {
1057       return Error("`vector64` attribute can only be applied on vectors.");
1058     }
1059 
1060     // Upgrade the type to be a BASE_TYPE_VECTOR64, since the attributes are
1061     // parsed after the type.
1062     const BaseType element_base_type = type.element;
1063     type = Type(BASE_TYPE_VECTOR64, type.struct_def, type.enum_def);
1064     type.element = element_base_type;
1065 
1066     // Since the field was already added to the parent object, update the type
1067     // in place.
1068     field->value.type = type;
1069 
1070     // 64-bit vectors imply the offset64 attribute.
1071     field->offset64 = true;
1072   }
1073 
1074   // Record that this field uses 64-bit offsets.
1075   if (field->attributes.Lookup("offset64") != nullptr) {
1076     // TODO(derekbailey): would be nice to have this be a recommendation or hint
1077     // instead of a warning.
1078     if (type.base_type == BASE_TYPE_VECTOR64) {
1079       Warning("attribute `vector64` implies `offset64` and isn't required.");
1080     }
1081 
1082     field->offset64 = true;
1083   }
1084 
1085   // Check for common conditions with Offset64 fields.
1086   if (field->offset64) {
1087     // TODO(derekbailey): this is where we can disable string support for
1088     // offset64, as that is not a hard requirement to have.
1089     if (!IsString(type) && !IsVector(type)) {
1090       return Error(
1091           "only string and vectors can have `offset64` attribute applied");
1092     }
1093 
1094     // If this is a Vector, only scalar and scalar-like (structs) items are
1095     // allowed.
1096     // TODO(derekbailey): allow vector of strings, just require that the strings
1097     // are Offset64<string>.
1098     if (IsVector(type) &&
1099         !((IsScalar(type.element) && !IsEnum(type.VectorType())) ||
1100           IsStruct(type.VectorType()))) {
1101       return Error("only vectors of scalars are allowed to be 64-bit.");
1102     }
1103 
1104     // Lastly, check if it is supported by the specified generated languages. Do
1105     // this last so the above checks can inform the user of schema errors to fix
1106     // first.
1107     if (!Supports64BitOffsets()) {
1108       return Error(
1109           "fields using 64-bit offsets are not yet supported in at least one "
1110           "of the specified programming languages.");
1111     }
1112   }
1113 
1114   // For historical convenience reasons, string keys are assumed required.
1115   // Scalars are kDefault unless otherwise specified.
1116   // Nonscalars are kOptional unless required;
1117   field->key = field->attributes.Lookup("key") != nullptr;
1118   const bool required = field->attributes.Lookup("required") != nullptr ||
1119                         (IsString(type) && field->key);
1120   const bool default_str_or_vec =
1121       ((IsString(type) || IsVector(type)) && field->value.constant != "0");
1122   const bool optional = IsScalar(type.base_type)
1123                             ? (field->value.constant == "null")
1124                             : !(required || default_str_or_vec);
1125   if (required && optional) {
1126     return Error("Fields cannot be both optional and required.");
1127   }
1128   field->presence = FieldDef::MakeFieldPresence(optional, required);
1129 
1130   if (required && (struct_def.fixed || IsScalar(type.base_type))) {
1131     return Error("only non-scalar fields in tables may be 'required'");
1132   }
1133   if (field->key) {
1134     if (struct_def.has_key) return Error("only one field may be set as 'key'");
1135     struct_def.has_key = true;
1136     auto is_valid =
1137         IsScalar(type.base_type) || IsString(type) || IsStruct(type);
1138     if (IsArray(type)) {
1139       is_valid |=
1140           IsScalar(type.VectorType().base_type) || IsStruct(type.VectorType());
1141     }
1142     if (!is_valid) {
1143       return Error(
1144           "'key' field must be string, scalar type or fixed size array of "
1145           "scalars");
1146     }
1147   }
1148 
1149   if (field->IsScalarOptional()) {
1150     advanced_features_ |= reflection::OptionalScalars;
1151     if (type.enum_def && type.enum_def->Lookup("null")) {
1152       FLATBUFFERS_ASSERT(IsInteger(type.base_type));
1153       return Error(
1154           "the default 'null' is reserved for declaring optional scalar "
1155           "fields, it conflicts with declaration of enum '" +
1156           type.enum_def->name + "'.");
1157     }
1158     if (field->attributes.Lookup("key")) {
1159       return Error(
1160           "only a non-optional scalar field can be used as a 'key' field");
1161     }
1162     if (!SupportsOptionalScalars()) {
1163       return Error(
1164           "Optional scalars are not yet supported in at least one of "
1165           "the specified programming languages.");
1166     }
1167   }
1168 
1169   if (type.enum_def) {
1170     // Verify the enum's type and default value.
1171     const std::string &constant = field->value.constant;
1172     if (type.base_type == BASE_TYPE_UNION) {
1173       if (constant != "0") { return Error("Union defaults must be NONE"); }
1174     } else if (IsVector(type)) {
1175       if (constant != "0" && constant != "[]") {
1176         return Error("Vector defaults may only be `[]`.");
1177       }
1178     } else if (IsArray(type)) {
1179       if (constant != "0") {
1180         return Error("Array defaults are not supported yet.");
1181       }
1182     } else {
1183       if (!IsInteger(type.base_type)) {
1184         return Error("Enums must have integer base types");
1185       }
1186       // Optional and bitflags enums may have default constants that are not
1187       // their specified variants.
1188       if (!field->IsOptional() &&
1189           type.enum_def->attributes.Lookup("bit_flags") == nullptr) {
1190         if (type.enum_def->FindByValue(constant) == nullptr) {
1191           return Error("default value of `" + constant + "` for " + "field `" +
1192                        name + "` is not part of enum `" + type.enum_def->name +
1193                        "`.");
1194         }
1195       }
1196     }
1197   }
1198 
1199   if (field->deprecated && struct_def.fixed)
1200     return Error("can't deprecate fields in a struct");
1201 
1202   auto cpp_type = field->attributes.Lookup("cpp_type");
1203   if (cpp_type) {
1204     if (!hash_name)
1205       return Error("cpp_type can only be used with a hashed field");
1206     /// forcing cpp_ptr_type to 'naked' if unset
1207     auto cpp_ptr_type = field->attributes.Lookup("cpp_ptr_type");
1208     if (!cpp_ptr_type) {
1209       auto val = new Value();
1210       val->type = cpp_type->type;
1211       val->constant = "naked";
1212       field->attributes.Add("cpp_ptr_type", val);
1213     }
1214   }
1215 
1216   field->shared = field->attributes.Lookup("shared") != nullptr;
1217   if (field->shared && field->value.type.base_type != BASE_TYPE_STRING)
1218     return Error("shared can only be defined on strings");
1219 
1220   auto field_native_custom_alloc =
1221       field->attributes.Lookup("native_custom_alloc");
1222   if (field_native_custom_alloc)
1223     return Error(
1224         "native_custom_alloc can only be used with a table or struct "
1225         "definition");
1226 
1227   field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
1228   if (field->native_inline && !IsStruct(field->value.type) &&
1229       !IsVectorOfStruct(field->value.type) &&
1230       !IsVectorOfTable(field->value.type))
1231     return Error(
1232         "'native_inline' can only be defined on structs, vector of structs or "
1233         "vector of tables");
1234 
1235   auto nested = field->attributes.Lookup("nested_flatbuffer");
1236   if (nested) {
1237     if (nested->type.base_type != BASE_TYPE_STRING)
1238       return Error(
1239           "nested_flatbuffer attribute must be a string (the root type)");
1240     if (!IsVector(type.base_type) || type.element != BASE_TYPE_UCHAR)
1241       return Error(
1242           "nested_flatbuffer attribute may only apply to a vector of ubyte");
1243     // This will cause an error if the root type of the nested flatbuffer
1244     // wasn't defined elsewhere.
1245     field->nested_flatbuffer = LookupCreateStruct(nested->constant);
1246   }
1247 
1248   if (field->attributes.Lookup("flexbuffer")) {
1249     field->flexbuffer = true;
1250     uses_flexbuffers_ = true;
1251     if (type.base_type != BASE_TYPE_VECTOR || type.element != BASE_TYPE_UCHAR)
1252       return Error("flexbuffer attribute may only apply to a vector of ubyte");
1253   }
1254 
1255   if (typefield) {
1256     if (!IsScalar(typefield->value.type.base_type)) {
1257       // this is a union vector field
1258       typefield->presence = field->presence;
1259     }
1260     // If this field is a union, and it has a manually assigned id,
1261     // the automatically added type field should have an id as well (of N - 1).
1262     auto attr = field->attributes.Lookup("id");
1263     if (attr) {
1264       const auto &id_str = attr->constant;
1265       voffset_t id = 0;
1266       const auto done = !atot(id_str.c_str(), *this, &id).Check();
1267       if (done && id > 0) {
1268         auto val = new Value();
1269         val->type = attr->type;
1270         val->constant = NumToString(id - 1);
1271         typefield->attributes.Add("id", val);
1272       } else {
1273         return Error(
1274             "a union type effectively adds two fields with non-negative ids, "
1275             "its id must be that of the second field (the first field is "
1276             "the type field and not explicitly declared in the schema);\n"
1277             "field: " +
1278             field->name + ", id: " + id_str);
1279       }
1280     }
1281     // if this field is a union that is deprecated,
1282     // the automatically added type field should be deprecated as well
1283     if (field->deprecated) { typefield->deprecated = true; }
1284   }
1285 
1286   EXPECT(';');
1287   return NoError();
1288 }
1289 
ParseString(Value & val,bool use_string_pooling)1290 CheckedError Parser::ParseString(Value &val, bool use_string_pooling) {
1291   auto s = attribute_;
1292   EXPECT(kTokenStringConstant);
1293   if (use_string_pooling) {
1294     val.constant = NumToString(builder_.CreateSharedString(s).o);
1295   } else {
1296     val.constant = NumToString(builder_.CreateString(s).o);
1297   }
1298   return NoError();
1299 }
1300 
ParseComma()1301 CheckedError Parser::ParseComma() {
1302   if (!opts.protobuf_ascii_alike) EXPECT(',');
1303   return NoError();
1304 }
1305 
ParseAnyValue(Value & val,FieldDef * field,size_t parent_fieldn,const StructDef * parent_struct_def,size_t count,bool inside_vector)1306 CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
1307                                    size_t parent_fieldn,
1308                                    const StructDef *parent_struct_def,
1309                                    size_t count, bool inside_vector) {
1310   switch (val.type.base_type) {
1311     case BASE_TYPE_UNION: {
1312       FLATBUFFERS_ASSERT(field);
1313       std::string constant;
1314       Vector<uint8_t> *vector_of_union_types = nullptr;
1315       // Find corresponding type field we may have already parsed.
1316       for (auto elem = field_stack_.rbegin() + count;
1317            elem != field_stack_.rbegin() + parent_fieldn + count; ++elem) {
1318         auto &type = elem->second->value.type;
1319         if (type.enum_def == val.type.enum_def) {
1320           if (inside_vector) {
1321             if (IsVector(type) && type.element == BASE_TYPE_UTYPE) {
1322               // Vector of union type field.
1323               uoffset_t offset;
1324               ECHECK(atot(elem->first.constant.c_str(), *this, &offset));
1325               vector_of_union_types = reinterpret_cast<Vector<uint8_t> *>(
1326                   builder_.GetCurrentBufferPointer() + builder_.GetSize() -
1327                   offset);
1328               break;
1329             }
1330           } else {
1331             if (type.base_type == BASE_TYPE_UTYPE) {
1332               // Union type field.
1333               constant = elem->first.constant;
1334               break;
1335             }
1336           }
1337         }
1338       }
1339       if (constant.empty() && !inside_vector) {
1340         // We haven't seen the type field yet. Sadly a lot of JSON writers
1341         // output these in alphabetical order, meaning it comes after this
1342         // value. So we scan past the value to find it, then come back here.
1343         // We currently don't do this for vectors of unions because the
1344         // scanning/serialization logic would get very complicated.
1345         auto type_name = field->name + UnionTypeFieldSuffix();
1346         FLATBUFFERS_ASSERT(parent_struct_def);
1347         auto type_field = parent_struct_def->fields.Lookup(type_name);
1348         FLATBUFFERS_ASSERT(type_field);  // Guaranteed by ParseField().
1349         // Remember where we are in the source file, so we can come back here.
1350         auto backup = *static_cast<ParserState *>(this);
1351         ECHECK(SkipAnyJsonValue());  // The table.
1352         ECHECK(ParseComma());
1353         auto next_name = attribute_;
1354         if (Is(kTokenStringConstant)) {
1355           NEXT();
1356         } else {
1357           EXPECT(kTokenIdentifier);
1358         }
1359         if (next_name == type_name) {
1360           EXPECT(':');
1361           ParseDepthGuard depth_guard(this);
1362           ECHECK(depth_guard.Check());
1363           Value type_val = type_field->value;
1364           ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr, 0));
1365           constant = type_val.constant;
1366           // Got the information we needed, now rewind:
1367           *static_cast<ParserState *>(this) = backup;
1368         }
1369       }
1370       if (constant.empty() && !vector_of_union_types) {
1371         return Error("missing type field for this union value: " + field->name);
1372       }
1373       uint8_t enum_idx;
1374       if (vector_of_union_types) {
1375         if (vector_of_union_types->size() <= count)
1376           return Error(
1377               "union types vector smaller than union values vector for: " +
1378               field->name);
1379         enum_idx = vector_of_union_types->Get(static_cast<uoffset_t>(count));
1380       } else {
1381         ECHECK(atot(constant.c_str(), *this, &enum_idx));
1382       }
1383       auto enum_val = val.type.enum_def->ReverseLookup(enum_idx, true);
1384       if (!enum_val) return Error("illegal type id for: " + field->name);
1385       if (enum_val->union_type.base_type == BASE_TYPE_STRUCT) {
1386         ECHECK(ParseTable(*enum_val->union_type.struct_def, &val.constant,
1387                           nullptr));
1388         if (enum_val->union_type.struct_def->fixed) {
1389           // All BASE_TYPE_UNION values are offsets, so turn this into one.
1390           SerializeStruct(*enum_val->union_type.struct_def, val);
1391           builder_.ClearOffsets();
1392           val.constant = NumToString(builder_.GetSize());
1393         }
1394       } else if (IsString(enum_val->union_type)) {
1395         ECHECK(ParseString(val, field->shared));
1396       } else {
1397         FLATBUFFERS_ASSERT(false);
1398       }
1399       break;
1400     }
1401     case BASE_TYPE_STRUCT:
1402       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
1403       break;
1404     case BASE_TYPE_STRING: {
1405       ECHECK(ParseString(val, field->shared));
1406       break;
1407     }
1408     case BASE_TYPE_VECTOR64:
1409     case BASE_TYPE_VECTOR: {
1410       uoffset_t off;
1411       ECHECK(ParseVector(val.type, &off, field, parent_fieldn));
1412       val.constant = NumToString(off);
1413       break;
1414     }
1415     case BASE_TYPE_ARRAY: {
1416       ECHECK(ParseArray(val));
1417       break;
1418     }
1419     case BASE_TYPE_INT:
1420     case BASE_TYPE_UINT:
1421     case BASE_TYPE_LONG:
1422     case BASE_TYPE_ULONG: {
1423       if (field && field->attributes.Lookup("hash") &&
1424           (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
1425         ECHECK(ParseHash(val, field));
1426       } else {
1427         ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
1428       }
1429       break;
1430     }
1431     default:
1432       ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
1433       break;
1434   }
1435   return NoError();
1436 }
1437 
SerializeStruct(const StructDef & struct_def,const Value & val)1438 void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
1439   SerializeStruct(builder_, struct_def, val);
1440 }
1441 
SerializeStruct(FlatBufferBuilder & builder,const StructDef & struct_def,const Value & val)1442 void Parser::SerializeStruct(FlatBufferBuilder &builder,
1443                              const StructDef &struct_def, const Value &val) {
1444   FLATBUFFERS_ASSERT(val.constant.length() == struct_def.bytesize);
1445   builder.Align(struct_def.minalign);
1446   builder.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
1447                     struct_def.bytesize);
1448   builder.AddStructOffset(val.offset, builder.GetSize());
1449 }
1450 
1451 template<typename F>
ParseTableDelimiters(size_t & fieldn,const StructDef * struct_def,F body)1452 CheckedError Parser::ParseTableDelimiters(size_t &fieldn,
1453                                           const StructDef *struct_def, F body) {
1454   // We allow tables both as JSON object{ .. } with field names
1455   // or vector[..] with all fields in order
1456   char terminator = '}';
1457   bool is_nested_vector = struct_def && Is('[');
1458   if (is_nested_vector) {
1459     NEXT();
1460     terminator = ']';
1461   } else {
1462     EXPECT('{');
1463   }
1464   for (;;) {
1465     if ((!opts.strict_json || !fieldn) && Is(terminator)) break;
1466     std::string name;
1467     if (is_nested_vector) {
1468       if (fieldn >= struct_def->fields.vec.size()) {
1469         return Error("too many unnamed fields in nested array");
1470       }
1471       name = struct_def->fields.vec[fieldn]->name;
1472     } else {
1473       name = attribute_;
1474       if (Is(kTokenStringConstant)) {
1475         NEXT();
1476       } else {
1477         EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
1478       }
1479       if (!opts.protobuf_ascii_alike || !(Is('{') || Is('['))) EXPECT(':');
1480     }
1481     ECHECK(body(name, fieldn, struct_def));
1482     if (Is(terminator)) break;
1483     ECHECK(ParseComma());
1484   }
1485   NEXT();
1486   if (is_nested_vector && fieldn != struct_def->fields.vec.size()) {
1487     return Error("wrong number of unnamed fields in table vector");
1488   }
1489   return NoError();
1490 }
1491 
ParseTable(const StructDef & struct_def,std::string * value,uoffset_t * ovalue)1492 CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
1493                                 uoffset_t *ovalue) {
1494   ParseDepthGuard depth_guard(this);
1495   ECHECK(depth_guard.Check());
1496 
1497   size_t fieldn_outer = 0;
1498   auto err = ParseTableDelimiters(
1499       fieldn_outer, &struct_def,
1500       [&](const std::string &name, size_t &fieldn,
1501           const StructDef *struct_def_inner) -> CheckedError {
1502         if (name == "$schema") {
1503           ECHECK(Expect(kTokenStringConstant));
1504           return NoError();
1505         }
1506         auto field = struct_def_inner->fields.Lookup(name);
1507         if (!field) {
1508           if (!opts.skip_unexpected_fields_in_json) {
1509             return Error("unknown field: " + name);
1510           } else {
1511             ECHECK(SkipAnyJsonValue());
1512           }
1513         } else {
1514           if (IsIdent("null") && !IsScalar(field->value.type.base_type)) {
1515             ECHECK(Next());  // Ignore this field.
1516           } else {
1517             Value val = field->value;
1518             if (field->flexbuffer) {
1519               flexbuffers::Builder builder(1024,
1520                                            flexbuffers::BUILDER_FLAG_SHARE_ALL);
1521               ECHECK(ParseFlexBufferValue(&builder));
1522               builder.Finish();
1523               // Force alignment for nested flexbuffer
1524               builder_.ForceVectorAlignment(builder.GetSize(), sizeof(uint8_t),
1525                                             sizeof(largest_scalar_t));
1526               auto off = builder_.CreateVector(builder.GetBuffer());
1527               val.constant = NumToString(off.o);
1528             } else if (field->nested_flatbuffer) {
1529               ECHECK(
1530                   ParseNestedFlatbuffer(val, field, fieldn, struct_def_inner));
1531             } else {
1532               ECHECK(ParseAnyValue(val, field, fieldn, struct_def_inner, 0));
1533             }
1534             // Hardcoded insertion-sort with error-check.
1535             // If fields are specified in order, then this loop exits
1536             // immediately.
1537             auto elem = field_stack_.rbegin();
1538             for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
1539               auto existing_field = elem->second;
1540               if (existing_field == field)
1541                 return Error("field set more than once: " + field->name);
1542               if (existing_field->value.offset < field->value.offset) break;
1543             }
1544             // Note: elem points to before the insertion point, thus .base()
1545             // points to the correct spot.
1546             field_stack_.insert(elem.base(), std::make_pair(val, field));
1547             fieldn++;
1548           }
1549         }
1550         return NoError();
1551       });
1552   ECHECK(err);
1553 
1554   // Check if all required fields are parsed.
1555   for (auto field_it = struct_def.fields.vec.begin();
1556        field_it != struct_def.fields.vec.end(); ++field_it) {
1557     auto required_field = *field_it;
1558     if (!required_field->IsRequired()) { continue; }
1559     bool found = false;
1560     for (auto pf_it = field_stack_.end() - fieldn_outer;
1561          pf_it != field_stack_.end(); ++pf_it) {
1562       auto parsed_field = pf_it->second;
1563       if (parsed_field == required_field) {
1564         found = true;
1565         break;
1566       }
1567     }
1568     if (!found) {
1569       return Error("required field is missing: " + required_field->name +
1570                    " in " + struct_def.name);
1571     }
1572   }
1573 
1574   if (struct_def.fixed && fieldn_outer != struct_def.fields.vec.size())
1575     return Error("struct: wrong number of initializers: " + struct_def.name);
1576 
1577   auto start = struct_def.fixed ? builder_.StartStruct(struct_def.minalign)
1578                                 : builder_.StartTable();
1579 
1580   for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1; size;
1581        size /= 2) {
1582     // Go through elements in reverse, since we're building the data backwards.
1583     // TODO(derekbailey): this doesn't work when there are Offset64 fields, as
1584     // those have to be built first. So this needs to be changed to iterate over
1585     // Offset64 then Offset32 fields.
1586     for (auto it = field_stack_.rbegin();
1587          it != field_stack_.rbegin() + fieldn_outer; ++it) {
1588       auto &field_value = it->first;
1589       auto field = it->second;
1590       if (!struct_def.sortbysize ||
1591           size == SizeOf(field_value.type.base_type)) {
1592         switch (field_value.type.base_type) {
1593           // clang-format off
1594           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1595             case BASE_TYPE_ ## ENUM: \
1596               builder_.Pad(field->padding); \
1597               if (struct_def.fixed) { \
1598                 CTYPE val; \
1599                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1600                 builder_.PushElement(val); \
1601               } else { \
1602                 if (field->IsScalarOptional()) { \
1603                   if (field_value.constant != "null") { \
1604                     CTYPE val; \
1605                     ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1606                     builder_.AddElement(field_value.offset, val); \
1607                   } \
1608                 } else { \
1609                   CTYPE val, valdef; \
1610                   ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1611                   ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
1612                   builder_.AddElement(field_value.offset, val, valdef); \
1613                 } \
1614               } \
1615               break;
1616             FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
1617           #undef FLATBUFFERS_TD
1618           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1619             case BASE_TYPE_ ## ENUM: \
1620               builder_.Pad(field->padding); \
1621               if (IsStruct(field->value.type)) { \
1622                 SerializeStruct(*field->value.type.struct_def, field_value); \
1623               } else { \
1624                 /* Special case for fields that use 64-bit addressing */ \
1625                 if(field->offset64) { \
1626                   Offset64<void> offset; \
1627                   ECHECK(atot(field_value.constant.c_str(), *this, &offset)); \
1628                   builder_.AddOffset(field_value.offset, offset); \
1629                 } else { \
1630                   CTYPE val; \
1631                   ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1632                   builder_.AddOffset(field_value.offset, val); \
1633                 } \
1634               } \
1635               break;
1636             FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD)
1637           #undef FLATBUFFERS_TD
1638             case BASE_TYPE_ARRAY:
1639               builder_.Pad(field->padding);
1640               builder_.PushBytes(
1641                 reinterpret_cast<const uint8_t*>(field_value.constant.c_str()),
1642                 InlineSize(field_value.type));
1643               break;
1644             // clang-format on
1645         }
1646       }
1647     }
1648   }
1649   for (size_t i = 0; i < fieldn_outer; i++) field_stack_.pop_back();
1650 
1651   if (struct_def.fixed) {
1652     builder_.ClearOffsets();
1653     builder_.EndStruct();
1654     FLATBUFFERS_ASSERT(value);
1655     // Temporarily store this struct in the value string, since it is to
1656     // be serialized in-place elsewhere.
1657     value->assign(
1658         reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
1659         struct_def.bytesize);
1660     builder_.PopBytes(struct_def.bytesize);
1661     FLATBUFFERS_ASSERT(!ovalue);
1662   } else {
1663     auto val = builder_.EndTable(start);
1664     if (ovalue) *ovalue = val;
1665     if (value) *value = NumToString(val);
1666   }
1667   return NoError();
1668 }
1669 
1670 template<typename F>
ParseVectorDelimiters(size_t & count,F body)1671 CheckedError Parser::ParseVectorDelimiters(size_t &count, F body) {
1672   EXPECT('[');
1673   for (;;) {
1674     if ((!opts.strict_json || !count) && Is(']')) break;
1675     ECHECK(body(count));
1676     count++;
1677     if (Is(']')) break;
1678     ECHECK(ParseComma());
1679   }
1680   NEXT();
1681   return NoError();
1682 }
1683 
ParseAlignAttribute(const std::string & align_constant,size_t min_align,size_t * align)1684 CheckedError Parser::ParseAlignAttribute(const std::string &align_constant,
1685                                          size_t min_align, size_t *align) {
1686   // Use uint8_t to avoid problems with size_t==`unsigned long` on LP64.
1687   uint8_t align_value;
1688   if (StringToNumber(align_constant.c_str(), &align_value) &&
1689       VerifyAlignmentRequirements(static_cast<size_t>(align_value),
1690                                   min_align)) {
1691     *align = align_value;
1692     return NoError();
1693   }
1694   return Error("unexpected force_align value '" + align_constant +
1695                "', alignment must be a power of two integer ranging from the "
1696                "type\'s natural alignment " +
1697                NumToString(min_align) + " to " +
1698                NumToString(FLATBUFFERS_MAX_ALIGNMENT));
1699 }
1700 
ParseVector(const Type & vector_type,uoffset_t * ovalue,FieldDef * field,size_t fieldn)1701 CheckedError Parser::ParseVector(const Type &vector_type, uoffset_t *ovalue,
1702                                  FieldDef *field, size_t fieldn) {
1703   Type type = vector_type.VectorType();
1704   size_t count = 0;
1705   auto err = ParseVectorDelimiters(count, [&](size_t &) -> CheckedError {
1706     Value val;
1707     val.type = type;
1708     ECHECK(ParseAnyValue(val, field, fieldn, nullptr, count, true));
1709     field_stack_.push_back(std::make_pair(val, nullptr));
1710     return NoError();
1711   });
1712   ECHECK(err);
1713 
1714   const size_t alignment = InlineAlignment(type);
1715   const size_t len = count * InlineSize(type) / InlineAlignment(type);
1716   const size_t elemsize = InlineAlignment(type);
1717   const auto force_align = field->attributes.Lookup("force_align");
1718   if (force_align) {
1719     size_t align;
1720     ECHECK(ParseAlignAttribute(force_align->constant, 1, &align));
1721     if (align > 1) { builder_.ForceVectorAlignment(len, elemsize, align); }
1722   }
1723 
1724   // TODO Fix using element alignment as size (`elemsize`)!
1725   if (vector_type.base_type == BASE_TYPE_VECTOR64) {
1726     // TODO(derekbailey): this requires a 64-bit builder.
1727     // builder_.StartVector<Offset64, uoffset64_t>(len, elemsize, alignment);
1728     builder_.StartVector(len, elemsize, alignment);
1729   } else {
1730     builder_.StartVector(len, elemsize, alignment);
1731   }
1732   for (size_t i = 0; i < count; i++) {
1733     // start at the back, since we're building the data backwards.
1734     auto &val = field_stack_.back().first;
1735     switch (val.type.base_type) {
1736       // clang-format off
1737       #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE,...) \
1738         case BASE_TYPE_ ## ENUM: \
1739           if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
1740           else { \
1741              CTYPE elem; \
1742              ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1743              builder_.PushElement(elem); \
1744           } \
1745           break;
1746         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1747       #undef FLATBUFFERS_TD
1748       // clang-format on
1749     }
1750     field_stack_.pop_back();
1751   }
1752 
1753   builder_.ClearOffsets();
1754   if (vector_type.base_type == BASE_TYPE_VECTOR64) {
1755     *ovalue = builder_.EndVector<uoffset64_t>(count);
1756   } else {
1757     *ovalue = builder_.EndVector(count);
1758   }
1759 
1760   if (type.base_type == BASE_TYPE_STRUCT && type.struct_def->has_key) {
1761     // We should sort this vector. Find the key first.
1762     const FieldDef *key = nullptr;
1763     for (auto it = type.struct_def->fields.vec.begin();
1764          it != type.struct_def->fields.vec.end(); ++it) {
1765       if ((*it)->key) {
1766         key = (*it);
1767         break;
1768       }
1769     }
1770     FLATBUFFERS_ASSERT(key);
1771     // Now sort it.
1772     // We can't use std::sort because for structs the size is not known at
1773     // compile time, and for tables our iterators dereference offsets, so can't
1774     // be used to swap elements.
1775     // And we can't use C qsort either, since that would force use to use
1776     // globals, making parsing thread-unsafe.
1777     // So for now, we use SimpleQsort above.
1778     // TODO: replace with something better, preferably not recursive.
1779 
1780     if (type.struct_def->fixed) {
1781       const voffset_t offset = key->value.offset;
1782       const size_t struct_size = type.struct_def->bytesize;
1783       auto v =
1784           reinterpret_cast<VectorOfAny *>(builder_.GetCurrentBufferPointer());
1785       SimpleQsort<uint8_t>(
1786           v->Data(), v->Data() + v->size() * type.struct_def->bytesize,
1787           type.struct_def->bytesize,
1788           [offset, key](const uint8_t *a, const uint8_t *b) -> bool {
1789             return CompareSerializedScalars(a + offset, b + offset, *key);
1790           },
1791           [struct_size](uint8_t *a, uint8_t *b) {
1792             // FIXME: faster?
1793             for (size_t i = 0; i < struct_size; i++) { std::swap(a[i], b[i]); }
1794           });
1795     } else {
1796       auto v = reinterpret_cast<Vector<Offset<Table>> *>(
1797           builder_.GetCurrentBufferPointer());
1798       // Here also can't use std::sort. We do have an iterator type for it,
1799       // but it is non-standard as it will dereference the offsets, and thus
1800       // can't be used to swap elements.
1801       if (key->value.type.base_type == BASE_TYPE_STRING) {
1802         SimpleQsort<Offset<Table>>(
1803             v->data(), v->data() + v->size(), 1,
1804             [key](const Offset<Table> *_a, const Offset<Table> *_b) -> bool {
1805               return CompareTablesByStringKey(_a, _b, *key);
1806             },
1807             SwapSerializedTables);
1808       } else {
1809         SimpleQsort<Offset<Table>>(
1810             v->data(), v->data() + v->size(), 1,
1811             [key](const Offset<Table> *_a, const Offset<Table> *_b) -> bool {
1812               return CompareTablesByScalarKey(_a, _b, *key);
1813             },
1814             SwapSerializedTables);
1815       }
1816     }
1817   }
1818   return NoError();
1819 }
1820 
ParseArray(Value & array)1821 CheckedError Parser::ParseArray(Value &array) {
1822   std::vector<Value> stack;
1823   FlatBufferBuilder builder;
1824   const auto &type = array.type.VectorType();
1825   auto length = array.type.fixed_length;
1826   size_t count = 0;
1827   auto err = ParseVectorDelimiters(count, [&](size_t &) -> CheckedError {
1828     stack.emplace_back(Value());
1829     auto &val = stack.back();
1830     val.type = type;
1831     if (IsStruct(type)) {
1832       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
1833     } else {
1834       ECHECK(ParseSingleValue(nullptr, val, false));
1835     }
1836     return NoError();
1837   });
1838   ECHECK(err);
1839   if (length != count) return Error("Fixed-length array size is incorrect.");
1840 
1841   for (auto it = stack.rbegin(); it != stack.rend(); ++it) {
1842     auto &val = *it;
1843     // clang-format off
1844     switch (val.type.base_type) {
1845       #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1846         case BASE_TYPE_ ## ENUM: \
1847           if (IsStruct(val.type)) { \
1848             SerializeStruct(builder, *val.type.struct_def, val); \
1849           } else { \
1850             CTYPE elem; \
1851             ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1852             builder.PushElement(elem); \
1853           } \
1854         break;
1855         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1856       #undef FLATBUFFERS_TD
1857       default: FLATBUFFERS_ASSERT(0);
1858     }
1859     // clang-format on
1860   }
1861 
1862   array.constant.assign(
1863       reinterpret_cast<const char *>(builder.GetCurrentBufferPointer()),
1864       InlineSize(array.type));
1865   return NoError();
1866 }
1867 
ParseNestedFlatbuffer(Value & val,FieldDef * field,size_t fieldn,const StructDef * parent_struct_def)1868 CheckedError Parser::ParseNestedFlatbuffer(Value &val, FieldDef *field,
1869                                            size_t fieldn,
1870                                            const StructDef *parent_struct_def) {
1871   if (token_ == '[') {  // backwards compat for 'legacy' ubyte buffers
1872     if (opts.json_nested_legacy_flatbuffers) {
1873       ECHECK(ParseAnyValue(val, field, fieldn, parent_struct_def, 0));
1874     } else {
1875       return Error(
1876           "cannot parse nested_flatbuffer as bytes unless"
1877           " --json-nested-bytes is set");
1878     }
1879   } else {
1880     auto cursor_at_value_begin = cursor_;
1881     ECHECK(SkipAnyJsonValue());
1882     std::string substring(cursor_at_value_begin - 1, cursor_ - 1);
1883 
1884     // Create and initialize new parser
1885     Parser nested_parser;
1886     FLATBUFFERS_ASSERT(field->nested_flatbuffer);
1887     nested_parser.root_struct_def_ = field->nested_flatbuffer;
1888     nested_parser.enums_ = enums_;
1889     nested_parser.opts = opts;
1890     nested_parser.uses_flexbuffers_ = uses_flexbuffers_;
1891     nested_parser.parse_depth_counter_ = parse_depth_counter_;
1892     // Parse JSON substring into new flatbuffer builder using nested_parser
1893     bool ok = nested_parser.Parse(substring.c_str(), nullptr, nullptr);
1894 
1895     // Clean nested_parser to avoid deleting the elements in
1896     // the SymbolTables on destruction
1897     nested_parser.enums_.dict.clear();
1898     nested_parser.enums_.vec.clear();
1899 
1900     if (!ok) { ECHECK(Error(nested_parser.error_)); }
1901     // Force alignment for nested flatbuffer
1902     builder_.ForceVectorAlignment(
1903         nested_parser.builder_.GetSize(), sizeof(uint8_t),
1904         nested_parser.builder_.GetBufferMinAlignment());
1905 
1906     auto off = builder_.CreateVector(nested_parser.builder_.GetBufferPointer(),
1907                                      nested_parser.builder_.GetSize());
1908     val.constant = NumToString(off.o);
1909   }
1910   return NoError();
1911 }
1912 
ParseMetaData(SymbolTable<Value> * attributes)1913 CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
1914   if (Is('(')) {
1915     NEXT();
1916     for (;;) {
1917       auto name = attribute_;
1918       if (false == (Is(kTokenIdentifier) || Is(kTokenStringConstant)))
1919         return Error("attribute name must be either identifier or string: " +
1920                      name);
1921       if (known_attributes_.find(name) == known_attributes_.end())
1922         return Error("user define attributes must be declared before use: " +
1923                      name);
1924       NEXT();
1925       auto e = new Value();
1926       if (attributes->Add(name, e)) Warning("attribute already found: " + name);
1927       if (Is(':')) {
1928         NEXT();
1929         ECHECK(ParseSingleValue(&name, *e, true));
1930       }
1931       if (Is(')')) {
1932         NEXT();
1933         break;
1934       }
1935       EXPECT(',');
1936     }
1937   }
1938   return NoError();
1939 }
1940 
ParseEnumFromString(const Type & type,std::string * result)1941 CheckedError Parser::ParseEnumFromString(const Type &type,
1942                                          std::string *result) {
1943   const auto base_type =
1944       type.enum_def ? type.enum_def->underlying_type.base_type : type.base_type;
1945   if (!IsInteger(base_type)) return Error("not a valid value for this field");
1946   uint64_t u64 = 0;
1947   for (size_t pos = 0; pos != std::string::npos;) {
1948     const auto delim = attribute_.find_first_of(' ', pos);
1949     const auto last = (std::string::npos == delim);
1950     auto word = attribute_.substr(pos, !last ? delim - pos : std::string::npos);
1951     pos = !last ? delim + 1 : std::string::npos;
1952     const EnumVal *ev = nullptr;
1953     if (type.enum_def) {
1954       ev = type.enum_def->Lookup(word);
1955     } else {
1956       auto dot = word.find_first_of('.');
1957       if (std::string::npos == dot)
1958         return Error("enum values need to be qualified by an enum type");
1959       auto enum_def_str = word.substr(0, dot);
1960       const auto enum_def = LookupEnum(enum_def_str);
1961       if (!enum_def) return Error("unknown enum: " + enum_def_str);
1962       auto enum_val_str = word.substr(dot + 1);
1963       ev = enum_def->Lookup(enum_val_str);
1964     }
1965     if (!ev) return Error("unknown enum value: " + word);
1966     u64 |= ev->GetAsUInt64();
1967   }
1968   *result = IsUnsigned(base_type) ? NumToString(u64)
1969                                   : NumToString(static_cast<int64_t>(u64));
1970   return NoError();
1971 }
1972 
ParseHash(Value & e,FieldDef * field)1973 CheckedError Parser::ParseHash(Value &e, FieldDef *field) {
1974   FLATBUFFERS_ASSERT(field);
1975   Value *hash_name = field->attributes.Lookup("hash");
1976   switch (e.type.base_type) {
1977     case BASE_TYPE_SHORT: {
1978       auto hash = FindHashFunction16(hash_name->constant.c_str());
1979       int16_t hashed_value = static_cast<int16_t>(hash(attribute_.c_str()));
1980       e.constant = NumToString(hashed_value);
1981       break;
1982     }
1983     case BASE_TYPE_USHORT: {
1984       auto hash = FindHashFunction16(hash_name->constant.c_str());
1985       uint16_t hashed_value = hash(attribute_.c_str());
1986       e.constant = NumToString(hashed_value);
1987       break;
1988     }
1989     case BASE_TYPE_INT: {
1990       auto hash = FindHashFunction32(hash_name->constant.c_str());
1991       int32_t hashed_value = static_cast<int32_t>(hash(attribute_.c_str()));
1992       e.constant = NumToString(hashed_value);
1993       break;
1994     }
1995     case BASE_TYPE_UINT: {
1996       auto hash = FindHashFunction32(hash_name->constant.c_str());
1997       uint32_t hashed_value = hash(attribute_.c_str());
1998       e.constant = NumToString(hashed_value);
1999       break;
2000     }
2001     case BASE_TYPE_LONG: {
2002       auto hash = FindHashFunction64(hash_name->constant.c_str());
2003       int64_t hashed_value = static_cast<int64_t>(hash(attribute_.c_str()));
2004       e.constant = NumToString(hashed_value);
2005       break;
2006     }
2007     case BASE_TYPE_ULONG: {
2008       auto hash = FindHashFunction64(hash_name->constant.c_str());
2009       uint64_t hashed_value = hash(attribute_.c_str());
2010       e.constant = NumToString(hashed_value);
2011       break;
2012     }
2013     default: FLATBUFFERS_ASSERT(0);
2014   }
2015   NEXT();
2016   return NoError();
2017 }
2018 
TokenError()2019 CheckedError Parser::TokenError() {
2020   return Error("cannot parse value starting with: " + TokenToStringId(token_));
2021 }
2022 
ParseFunction(const std::string * name,Value & e)2023 CheckedError Parser::ParseFunction(const std::string *name, Value &e) {
2024   ParseDepthGuard depth_guard(this);
2025   ECHECK(depth_guard.Check());
2026 
2027   // Copy name, attribute will be changed on NEXT().
2028   const auto functionname = attribute_;
2029   if (!IsFloat(e.type.base_type)) {
2030     return Error(functionname + ": type of argument mismatch, expecting: " +
2031                  TypeName(BASE_TYPE_DOUBLE) +
2032                  ", found: " + TypeName(e.type.base_type) +
2033                  ", name: " + (name ? *name : "") + ", value: " + e.constant);
2034   }
2035   NEXT();
2036   EXPECT('(');
2037   ECHECK(ParseSingleValue(name, e, false));
2038   EXPECT(')');
2039   // calculate with double precision
2040   double x, y = 0.0;
2041   ECHECK(atot(e.constant.c_str(), *this, &x));
2042   // clang-format off
2043   auto func_match = false;
2044   #define FLATBUFFERS_FN_DOUBLE(name, op) \
2045     if (!func_match && functionname == name) { y = op; func_match = true; }
2046   FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180);
2047   FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180);
2048   FLATBUFFERS_FN_DOUBLE("sin", sin(x));
2049   FLATBUFFERS_FN_DOUBLE("cos", cos(x));
2050   FLATBUFFERS_FN_DOUBLE("tan", tan(x));
2051   FLATBUFFERS_FN_DOUBLE("asin", asin(x));
2052   FLATBUFFERS_FN_DOUBLE("acos", acos(x));
2053   FLATBUFFERS_FN_DOUBLE("atan", atan(x));
2054   // TODO(wvo): add more useful conversion functions here.
2055   #undef FLATBUFFERS_FN_DOUBLE
2056   // clang-format on
2057   if (true != func_match) {
2058     return Error(std::string("Unknown conversion function: ") + functionname +
2059                  ", field name: " + (name ? *name : "") +
2060                  ", value: " + e.constant);
2061   }
2062   e.constant = NumToString(y);
2063   return NoError();
2064 }
2065 
TryTypedValue(const std::string * name,int dtoken,bool check,Value & e,BaseType req,bool * destmatch)2066 CheckedError Parser::TryTypedValue(const std::string *name, int dtoken,
2067                                    bool check, Value &e, BaseType req,
2068                                    bool *destmatch) {
2069   FLATBUFFERS_ASSERT(*destmatch == false && dtoken == token_);
2070   *destmatch = true;
2071   e.constant = attribute_;
2072   // Check token match
2073   if (!check) {
2074     if (e.type.base_type == BASE_TYPE_NONE) {
2075       e.type.base_type = req;
2076     } else {
2077       return Error(std::string("type mismatch: expecting: ") +
2078                    TypeName(e.type.base_type) + ", found: " + TypeName(req) +
2079                    ", name: " + (name ? *name : "") + ", value: " + e.constant);
2080     }
2081   }
2082   // The exponent suffix of hexadecimal float-point number is mandatory.
2083   // A hex-integer constant is forbidden as an initializer of float number.
2084   if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) {
2085     const auto &s = e.constant;
2086     const auto k = s.find_first_of("0123456789.");
2087     if ((std::string::npos != k) && (s.length() > (k + 1)) &&
2088         (s[k] == '0' && is_alpha_char(s[k + 1], 'X')) &&
2089         (std::string::npos == s.find_first_of("pP", k + 2))) {
2090       return Error(
2091           "invalid number, the exponent suffix of hexadecimal "
2092           "floating-point literals is mandatory: \"" +
2093           s + "\"");
2094     }
2095   }
2096   NEXT();
2097   return NoError();
2098 }
2099 
ParseSingleValue(const std::string * name,Value & e,bool check_now)2100 CheckedError Parser::ParseSingleValue(const std::string *name, Value &e,
2101                                       bool check_now) {
2102   if (token_ == '+' || token_ == '-') {
2103     const char sign = static_cast<char>(token_);
2104     // Get an indentifier: NAN, INF, or function name like cos/sin/deg.
2105     NEXT();
2106     if (token_ != kTokenIdentifier) return Error("constant name expected");
2107     attribute_.insert(size_t(0), size_t(1), sign);
2108   }
2109 
2110   const auto in_type = e.type.base_type;
2111   const auto is_tok_ident = (token_ == kTokenIdentifier);
2112   const auto is_tok_string = (token_ == kTokenStringConstant);
2113 
2114   // First see if this could be a conversion function.
2115   if (is_tok_ident && *cursor_ == '(') { return ParseFunction(name, e); }
2116 
2117   // clang-format off
2118   auto match = false;
2119 
2120   #define IF_ECHECK_(force, dtoken, check, req)    \
2121     if (!match && ((dtoken) == token_) && ((check) || flatbuffers::IsConstTrue(force))) \
2122       ECHECK(TryTypedValue(name, dtoken, check, e, req, &match))
2123   #define TRY_ECHECK(dtoken, check, req) IF_ECHECK_(false, dtoken, check, req)
2124   #define FORCE_ECHECK(dtoken, check, req) IF_ECHECK_(true, dtoken, check, req)
2125   // clang-format on
2126 
2127   if (is_tok_ident || is_tok_string) {
2128     const auto kTokenStringOrIdent = token_;
2129     // The string type is a most probable type, check it first.
2130     TRY_ECHECK(kTokenStringConstant, in_type == BASE_TYPE_STRING,
2131                BASE_TYPE_STRING);
2132 
2133     // avoid escaped and non-ascii in the string
2134     if (!match && is_tok_string && IsScalar(in_type) &&
2135         !attr_is_trivial_ascii_string_) {
2136       return Error(
2137           std::string("type mismatch or invalid value, an initializer of "
2138                       "non-string field must be trivial ASCII string: type: ") +
2139           TypeName(in_type) + ", name: " + (name ? *name : "") +
2140           ", value: " + attribute_);
2141     }
2142 
2143     // A boolean as true/false. Boolean as Integer check below.
2144     if (!match && IsBool(in_type)) {
2145       auto is_true = attribute_ == "true";
2146       if (is_true || attribute_ == "false") {
2147         attribute_ = is_true ? "1" : "0";
2148         // accepts both kTokenStringConstant and kTokenIdentifier
2149         TRY_ECHECK(kTokenStringOrIdent, IsBool(in_type), BASE_TYPE_BOOL);
2150       }
2151     }
2152     // Check for optional scalars.
2153     if (!match && IsScalar(in_type) && attribute_ == "null") {
2154       e.constant = "null";
2155       NEXT();
2156       match = true;
2157     }
2158     // Check if this could be a string/identifier enum value.
2159     // Enum can have only true integer base type.
2160     if (!match && IsInteger(in_type) && !IsBool(in_type) &&
2161         IsIdentifierStart(*attribute_.c_str())) {
2162       ECHECK(ParseEnumFromString(e.type, &e.constant));
2163       NEXT();
2164       match = true;
2165     }
2166     // Parse a float/integer number from the string.
2167     // A "scalar-in-string" value needs extra checks.
2168     if (!match && is_tok_string && IsScalar(in_type)) {
2169       // Strip trailing whitespaces from attribute_.
2170       auto last_non_ws = attribute_.find_last_not_of(' ');
2171       if (std::string::npos != last_non_ws) attribute_.resize(last_non_ws + 1);
2172       if (IsFloat(e.type.base_type)) {
2173         // The functions strtod() and strtof() accept both 'nan' and
2174         // 'nan(number)' literals. While 'nan(number)' is rejected by the parser
2175         // as an unsupported function if is_tok_ident is true.
2176         if (attribute_.find_last_of(')') != std::string::npos) {
2177           return Error("invalid number: " + attribute_);
2178         }
2179       }
2180     }
2181     // Float numbers or nan, inf, pi, etc.
2182     TRY_ECHECK(kTokenStringOrIdent, IsFloat(in_type), BASE_TYPE_FLOAT);
2183     // An integer constant in string.
2184     TRY_ECHECK(kTokenStringOrIdent, IsInteger(in_type), BASE_TYPE_INT);
2185     // Unknown tokens will be interpreted as string type.
2186     // An attribute value may be a scalar or string constant.
2187     FORCE_ECHECK(kTokenStringConstant, in_type == BASE_TYPE_STRING,
2188                  BASE_TYPE_STRING);
2189   } else {
2190     // Try a float number.
2191     TRY_ECHECK(kTokenFloatConstant, IsFloat(in_type), BASE_TYPE_FLOAT);
2192     // Integer token can init any scalar (integer of float).
2193     FORCE_ECHECK(kTokenIntegerConstant, IsScalar(in_type), BASE_TYPE_INT);
2194   }
2195   // Match empty vectors for default-empty-vectors.
2196   if (!match && IsVector(e.type) && token_ == '[') {
2197     NEXT();
2198     if (token_ != ']') { return Error("Expected `]` in vector default"); }
2199     NEXT();
2200     match = true;
2201     e.constant = "[]";
2202   }
2203 
2204 #undef FORCE_ECHECK
2205 #undef TRY_ECHECK
2206 #undef IF_ECHECK_
2207 
2208   if (!match) {
2209     std::string msg;
2210     msg += "Cannot assign token starting with '" + TokenToStringId(token_) +
2211            "' to value of <" + std::string(TypeName(in_type)) + "> type.";
2212     return Error(msg);
2213   }
2214   const auto match_type = e.type.base_type;  // may differ from in_type
2215   // The check_now flag must be true when parse a fbs-schema.
2216   // This flag forces to check default scalar values or metadata of field.
2217   // For JSON parser the flag should be false.
2218   // If it is set for JSON each value will be checked twice (see ParseTable).
2219   // Special case 'null' since atot can't handle that.
2220   if (check_now && IsScalar(match_type) && e.constant != "null") {
2221     // clang-format off
2222     switch (match_type) {
2223     #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
2224       case BASE_TYPE_ ## ENUM: {\
2225           CTYPE val; \
2226           ECHECK(atot(e.constant.c_str(), *this, &val)); \
2227           SingleValueRepack(e, val); \
2228         break; }
2229     FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
2230     #undef FLATBUFFERS_TD
2231     default: break;
2232     }
2233     // clang-format on
2234   }
2235   return NoError();
2236 }
2237 
LookupCreateStruct(const std::string & name,bool create_if_new,bool definition)2238 StructDef *Parser::LookupCreateStruct(const std::string &name,
2239                                       bool create_if_new, bool definition) {
2240   std::string qualified_name = current_namespace_->GetFullyQualifiedName(name);
2241   // See if it exists pre-declared by an unqualified use.
2242   auto struct_def = LookupStruct(name);
2243   if (struct_def && struct_def->predecl) {
2244     if (definition) {
2245       // Make sure it has the current namespace, and is registered under its
2246       // qualified name.
2247       struct_def->defined_namespace = current_namespace_;
2248       structs_.Move(name, qualified_name);
2249     }
2250     return struct_def;
2251   }
2252   // See if it exists pre-declared by an qualified use.
2253   struct_def = LookupStruct(qualified_name);
2254   if (struct_def && struct_def->predecl) {
2255     if (definition) {
2256       // Make sure it has the current namespace.
2257       struct_def->defined_namespace = current_namespace_;
2258     }
2259     return struct_def;
2260   }
2261   if (!definition && !struct_def) {
2262     struct_def = LookupStructThruParentNamespaces(name);
2263   }
2264   if (!struct_def && create_if_new) {
2265     struct_def = new StructDef();
2266     if (definition) {
2267       structs_.Add(qualified_name, struct_def);
2268       struct_def->name = name;
2269       struct_def->defined_namespace = current_namespace_;
2270     } else {
2271       // Not a definition.
2272       // Rather than failing, we create a "pre declared" StructDef, due to
2273       // circular references, and check for errors at the end of parsing.
2274       // It is defined in the current namespace, as the best guess what the
2275       // final namespace will be.
2276       structs_.Add(name, struct_def);
2277       struct_def->name = name;
2278       struct_def->defined_namespace = current_namespace_;
2279       struct_def->original_location.reset(
2280           new std::string(file_being_parsed_ + ":" + NumToString(line_)));
2281     }
2282   }
2283   return struct_def;
2284 }
2285 
MinValue() const2286 const EnumVal *EnumDef::MinValue() const {
2287   return vals.vec.empty() ? nullptr : vals.vec.front();
2288 }
MaxValue() const2289 const EnumVal *EnumDef::MaxValue() const {
2290   return vals.vec.empty() ? nullptr : vals.vec.back();
2291 }
2292 
Distance(const EnumVal * v1,const EnumVal * v2) const2293 uint64_t EnumDef::Distance(const EnumVal *v1, const EnumVal *v2) const {
2294   return IsUInt64() ? EnumDistanceImpl(v1->GetAsUInt64(), v2->GetAsUInt64())
2295                     : EnumDistanceImpl(v1->GetAsInt64(), v2->GetAsInt64());
2296 }
2297 
AllFlags() const2298 std::string EnumDef::AllFlags() const {
2299   FLATBUFFERS_ASSERT(attributes.Lookup("bit_flags"));
2300   uint64_t u64 = 0;
2301   for (auto it = Vals().begin(); it != Vals().end(); ++it) {
2302     u64 |= (*it)->GetAsUInt64();
2303   }
2304   return IsUInt64() ? NumToString(u64) : NumToString(static_cast<int64_t>(u64));
2305 }
2306 
ReverseLookup(int64_t enum_idx,bool skip_union_default) const2307 EnumVal *EnumDef::ReverseLookup(int64_t enum_idx,
2308                                 bool skip_union_default) const {
2309   auto skip_first = static_cast<int>(is_union && skip_union_default);
2310   for (auto it = Vals().begin() + skip_first; it != Vals().end(); ++it) {
2311     if ((*it)->GetAsInt64() == enum_idx) { return *it; }
2312   }
2313   return nullptr;
2314 }
2315 
FindByValue(const std::string & constant) const2316 EnumVal *EnumDef::FindByValue(const std::string &constant) const {
2317   int64_t i64;
2318   auto done = false;
2319   if (IsUInt64()) {
2320     uint64_t u64;  // avoid reinterpret_cast of pointers
2321     done = StringToNumber(constant.c_str(), &u64);
2322     i64 = static_cast<int64_t>(u64);
2323   } else {
2324     done = StringToNumber(constant.c_str(), &i64);
2325   }
2326   FLATBUFFERS_ASSERT(done);
2327   if (!done) return nullptr;
2328   return ReverseLookup(i64, false);
2329 }
2330 
SortByValue()2331 void EnumDef::SortByValue() {
2332   auto &v = vals.vec;
2333   if (IsUInt64())
2334     std::sort(v.begin(), v.end(), [](const EnumVal *e1, const EnumVal *e2) {
2335       if (e1->GetAsUInt64() == e2->GetAsUInt64()) {
2336         return e1->name < e2->name;
2337       }
2338       return e1->GetAsUInt64() < e2->GetAsUInt64();
2339     });
2340   else
2341     std::sort(v.begin(), v.end(), [](const EnumVal *e1, const EnumVal *e2) {
2342       if (e1->GetAsInt64() == e2->GetAsInt64()) { return e1->name < e2->name; }
2343       return e1->GetAsInt64() < e2->GetAsInt64();
2344     });
2345 }
2346 
RemoveDuplicates()2347 void EnumDef::RemoveDuplicates() {
2348   // This method depends form SymbolTable implementation!
2349   // 1) vals.vec - owner (raw pointer)
2350   // 2) vals.dict - access map
2351   auto first = vals.vec.begin();
2352   auto last = vals.vec.end();
2353   if (first == last) return;
2354   auto result = first;
2355   while (++first != last) {
2356     if ((*result)->value != (*first)->value) {
2357       *(++result) = *first;
2358     } else {
2359       auto ev = *first;
2360       for (auto it = vals.dict.begin(); it != vals.dict.end(); ++it) {
2361         if (it->second == ev) it->second = *result;  // reassign
2362       }
2363       delete ev;  // delete enum value
2364       *first = nullptr;
2365     }
2366   }
2367   vals.vec.erase(++result, last);
2368 }
2369 
ChangeEnumValue(EnumVal * ev,T new_value)2370 template<typename T> void EnumDef::ChangeEnumValue(EnumVal *ev, T new_value) {
2371   ev->value = static_cast<int64_t>(new_value);
2372 }
2373 
2374 namespace EnumHelper {
2375 template<BaseType E> struct EnumValType {
2376   typedef int64_t type;
2377 };
2378 template<> struct EnumValType<BASE_TYPE_ULONG> {
2379   typedef uint64_t type;
2380 };
2381 }  // namespace EnumHelper
2382 
2383 struct EnumValBuilder {
CreateEnumeratorflatbuffers::EnumValBuilder2384   EnumVal *CreateEnumerator(const std::string &ev_name) {
2385     FLATBUFFERS_ASSERT(!temp);
2386     auto first = enum_def.vals.vec.empty();
2387     user_value = first;
2388     temp = new EnumVal(ev_name, first ? 0 : enum_def.vals.vec.back()->value);
2389     return temp;
2390   }
2391 
CreateEnumeratorflatbuffers::EnumValBuilder2392   EnumVal *CreateEnumerator(const std::string &ev_name, int64_t val) {
2393     FLATBUFFERS_ASSERT(!temp);
2394     user_value = true;
2395     temp = new EnumVal(ev_name, val);
2396     return temp;
2397   }
2398 
AcceptEnumeratorflatbuffers::EnumValBuilder2399   FLATBUFFERS_CHECKED_ERROR AcceptEnumerator(const std::string &name) {
2400     FLATBUFFERS_ASSERT(temp);
2401     ECHECK(ValidateValue(&temp->value, false == user_value));
2402     FLATBUFFERS_ASSERT((temp->union_type.enum_def == nullptr) ||
2403                        (temp->union_type.enum_def == &enum_def));
2404     auto not_unique = enum_def.vals.Add(name, temp);
2405     temp = nullptr;
2406     if (not_unique) return parser.Error("enum value already exists: " + name);
2407     return NoError();
2408   }
2409 
AcceptEnumeratorflatbuffers::EnumValBuilder2410   FLATBUFFERS_CHECKED_ERROR AcceptEnumerator() {
2411     return AcceptEnumerator(temp->name);
2412   }
2413 
AssignEnumeratorValueflatbuffers::EnumValBuilder2414   FLATBUFFERS_CHECKED_ERROR AssignEnumeratorValue(const std::string &value) {
2415     user_value = true;
2416     auto fit = false;
2417     if (enum_def.IsUInt64()) {
2418       uint64_t u64;
2419       fit = StringToNumber(value.c_str(), &u64);
2420       temp->value = static_cast<int64_t>(u64);  // well-defined since C++20.
2421     } else {
2422       int64_t i64;
2423       fit = StringToNumber(value.c_str(), &i64);
2424       temp->value = i64;
2425     }
2426     if (!fit) return parser.Error("enum value does not fit, \"" + value + "\"");
2427     return NoError();
2428   }
2429 
2430   template<BaseType E, typename CTYPE>
ValidateImplflatbuffers::EnumValBuilder2431   inline FLATBUFFERS_CHECKED_ERROR ValidateImpl(int64_t *ev, int m) {
2432     typedef typename EnumHelper::EnumValType<E>::type T;  // int64_t or uint64_t
2433     static_assert(sizeof(T) == sizeof(int64_t), "invalid EnumValType");
2434     const auto v = static_cast<T>(*ev);
2435     auto up = static_cast<T>((flatbuffers::numeric_limits<CTYPE>::max)());
2436     auto dn = static_cast<T>((flatbuffers::numeric_limits<CTYPE>::lowest)());
2437     if (v < dn || v > (up - m)) {
2438       return parser.Error("enum value does not fit, \"" + NumToString(v) +
2439                           (m ? " + 1\"" : "\"") + " out of " +
2440                           TypeToIntervalString<CTYPE>());
2441     }
2442     *ev = static_cast<int64_t>(v + m);  // well-defined since C++20.
2443     return NoError();
2444   }
2445 
ValidateValueflatbuffers::EnumValBuilder2446   FLATBUFFERS_CHECKED_ERROR ValidateValue(int64_t *ev, bool next) {
2447     // clang-format off
2448     switch (enum_def.underlying_type.base_type) {
2449     #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...)                   \
2450       case BASE_TYPE_##ENUM: {                                          \
2451         if (!IsInteger(BASE_TYPE_##ENUM)) break;                        \
2452         return ValidateImpl<BASE_TYPE_##ENUM, CTYPE>(ev, next ? 1 : 0); \
2453       }
2454       FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
2455     #undef FLATBUFFERS_TD
2456     default: break;
2457     }
2458     // clang-format on
2459     return parser.Error("fatal: invalid enum underlying type");
2460   }
2461 
EnumValBuilderflatbuffers::EnumValBuilder2462   EnumValBuilder(Parser &_parser, EnumDef &_enum_def)
2463       : parser(_parser),
2464         enum_def(_enum_def),
2465         temp(nullptr),
2466         user_value(false) {}
2467 
~EnumValBuilderflatbuffers::EnumValBuilder2468   ~EnumValBuilder() { delete temp; }
2469 
2470   Parser &parser;
2471   EnumDef &enum_def;
2472   EnumVal *temp;
2473   bool user_value;
2474 };
2475 
ParseEnum(const bool is_union,EnumDef ** dest,const char * filename)2476 CheckedError Parser::ParseEnum(const bool is_union, EnumDef **dest,
2477                                const char *filename) {
2478   std::vector<std::string> enum_comment = doc_comment_;
2479   NEXT();
2480   std::string enum_name = attribute_;
2481   EXPECT(kTokenIdentifier);
2482   EnumDef *enum_def;
2483   ECHECK(StartEnum(enum_name, is_union, &enum_def));
2484   if (filename != nullptr && !opts.project_root.empty()) {
2485     enum_def->declaration_file =
2486         &GetPooledString(FilePath(opts.project_root, filename, opts.binary_schema_absolute_paths));
2487   }
2488   enum_def->doc_comment = enum_comment;
2489   if (!opts.proto_mode) {
2490     // Give specialized error message, since this type spec used to
2491     // be optional in the first FlatBuffers release.
2492     bool explicit_underlying_type = false;
2493     if (!Is(':')) {
2494       // Enum is forced to have an explicit underlying type in declaration.
2495       if (!is_union) {
2496         return Error(
2497             "must specify the underlying integer type for this"
2498             " enum (e.g. \': short\', which was the default).");
2499       }
2500     } else {
2501       // Union underlying type is only supported for cpp
2502       if (is_union && !SupportsUnionUnderlyingType()) {
2503         return Error(
2504             "Underlying type for union is not yet supported in at least one of "
2505             "the specified programming languages.");
2506       }
2507       NEXT();
2508       explicit_underlying_type = true;
2509     }
2510 
2511     if (explicit_underlying_type) {
2512       // Specify the integer type underlying this enum.
2513       ECHECK(ParseType(enum_def->underlying_type));
2514       if (!IsInteger(enum_def->underlying_type.base_type) || IsBool(enum_def->underlying_type.base_type)) {
2515         return Error("underlying " + std::string(is_union ? "union" : "enum") + "type must be integral");
2516       }
2517 
2518       // Make this type refer back to the enum it was derived from.
2519       enum_def->underlying_type.enum_def = enum_def;
2520     }
2521 
2522   }
2523   ECHECK(ParseMetaData(&enum_def->attributes));
2524   const auto underlying_type = enum_def->underlying_type.base_type;
2525   if (enum_def->attributes.Lookup("bit_flags") &&
2526       !IsUnsigned(underlying_type)) {
2527     // todo: Convert to the Error in the future?
2528     Warning("underlying type of bit_flags enum must be unsigned");
2529   }
2530   if (enum_def->attributes.Lookup("force_align")) {
2531     return Error("`force_align` is not a valid attribute for Enums. ");
2532   }
2533   EnumValBuilder evb(*this, *enum_def);
2534   EXPECT('{');
2535   // A lot of code generatos expect that an enum is not-empty.
2536   if ((is_union || Is('}')) && !opts.proto_mode) {
2537     evb.CreateEnumerator("NONE");
2538     ECHECK(evb.AcceptEnumerator());
2539   }
2540   std::set<std::pair<BaseType, StructDef *>> union_types;
2541   while (!Is('}')) {
2542     if (opts.proto_mode && attribute_ == "option") {
2543       ECHECK(ParseProtoOption());
2544     } else {
2545       auto &ev = *evb.CreateEnumerator(attribute_);
2546       auto full_name = ev.name;
2547       ev.doc_comment = doc_comment_;
2548       EXPECT(kTokenIdentifier);
2549       if (is_union) {
2550         ECHECK(ParseNamespacing(&full_name, &ev.name));
2551         if (opts.union_value_namespacing) {
2552           // Since we can't namespace the actual enum identifiers, turn
2553           // namespace parts into part of the identifier.
2554           ev.name = full_name;
2555           std::replace(ev.name.begin(), ev.name.end(), '.', '_');
2556         }
2557         if (Is(':')) {
2558           NEXT();
2559           ECHECK(ParseType(ev.union_type));
2560           if (ev.union_type.base_type != BASE_TYPE_STRUCT &&
2561               ev.union_type.base_type != BASE_TYPE_STRING)
2562             return Error("union value type may only be table/struct/string");
2563         } else {
2564           ev.union_type = Type(BASE_TYPE_STRUCT, LookupCreateStruct(full_name));
2565         }
2566         if (!enum_def->uses_multiple_type_instances) {
2567           auto ins = union_types.insert(std::make_pair(
2568               ev.union_type.base_type, ev.union_type.struct_def));
2569           enum_def->uses_multiple_type_instances = (false == ins.second);
2570         }
2571       }
2572 
2573       if (Is('=')) {
2574         NEXT();
2575         ECHECK(evb.AssignEnumeratorValue(attribute_));
2576         EXPECT(kTokenIntegerConstant);
2577       }
2578 
2579       if (opts.proto_mode && Is('[')) {
2580         NEXT();
2581         // ignore attributes on enums.
2582         while (token_ != ']') NEXT();
2583         NEXT();
2584       } else {
2585         // parse attributes in fbs schema
2586         ECHECK(ParseMetaData(&ev.attributes));
2587       }
2588 
2589       ECHECK(evb.AcceptEnumerator());
2590     }
2591     if (!Is(opts.proto_mode ? ';' : ',')) break;
2592     NEXT();
2593   }
2594   EXPECT('}');
2595 
2596   // At this point, the enum can be empty if input is invalid proto-file.
2597   if (!enum_def->size())
2598     return Error("incomplete enum declaration, values not found");
2599 
2600   if (enum_def->attributes.Lookup("bit_flags")) {
2601     const auto base_width = static_cast<uint64_t>(8 * SizeOf(underlying_type));
2602     for (auto it = enum_def->Vals().begin(); it != enum_def->Vals().end();
2603          ++it) {
2604       auto ev = *it;
2605       const auto u = ev->GetAsUInt64();
2606       // Stop manipulations with the sign.
2607       if (!IsUnsigned(underlying_type) && u == (base_width - 1))
2608         return Error("underlying type of bit_flags enum must be unsigned");
2609       if (u >= base_width)
2610         return Error("bit flag out of range of underlying integral type");
2611       enum_def->ChangeEnumValue(ev, 1ULL << u);
2612     }
2613   }
2614 
2615   enum_def->SortByValue();  // Must be sorted to use MinValue/MaxValue.
2616 
2617   // Ensure enum value uniqueness.
2618   auto prev_it = enum_def->Vals().begin();
2619   for (auto it = prev_it + 1; it != enum_def->Vals().end(); ++it) {
2620     auto prev_ev = *prev_it;
2621     auto ev = *it;
2622     if (prev_ev->GetAsUInt64() == ev->GetAsUInt64())
2623       return Error("all enum values must be unique: " + prev_ev->name +
2624                    " and " + ev->name + " are both " +
2625                    NumToString(ev->GetAsInt64()));
2626   }
2627 
2628   if (dest) *dest = enum_def;
2629   const auto qualified_name =
2630       current_namespace_->GetFullyQualifiedName(enum_def->name);
2631   if (types_.Add(qualified_name, new Type(BASE_TYPE_UNION, nullptr, enum_def)))
2632     return Error("datatype already exists: " + qualified_name);
2633   return NoError();
2634 }
2635 
StartStruct(const std::string & name,StructDef ** dest)2636 CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
2637   auto &struct_def = *LookupCreateStruct(name, true, true);
2638   if (!struct_def.predecl)
2639     return Error("datatype already exists: " +
2640                  current_namespace_->GetFullyQualifiedName(name));
2641   struct_def.predecl = false;
2642   struct_def.name = name;
2643   struct_def.file = file_being_parsed_;
2644   // Move this struct to the back of the vector just in case it was predeclared,
2645   // to preserve declaration order.
2646   *std::remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) =
2647       &struct_def;
2648   *dest = &struct_def;
2649   return NoError();
2650 }
2651 
CheckClash(std::vector<FieldDef * > & fields,StructDef * struct_def,const char * suffix,BaseType basetype)2652 CheckedError Parser::CheckClash(std::vector<FieldDef *> &fields,
2653                                 StructDef *struct_def, const char *suffix,
2654                                 BaseType basetype) {
2655   auto len = strlen(suffix);
2656   for (auto it = fields.begin(); it != fields.end(); ++it) {
2657     auto &fname = (*it)->name;
2658     if (fname.length() > len &&
2659         fname.compare(fname.length() - len, len, suffix) == 0 &&
2660         (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
2661       auto field =
2662           struct_def->fields.Lookup(fname.substr(0, fname.length() - len));
2663       if (field && field->value.type.base_type == basetype)
2664         return Error("Field " + fname +
2665                      " would clash with generated functions for field " +
2666                      field->name);
2667     }
2668   }
2669   return NoError();
2670 }
2671 
GetIncludedFiles() const2672 std::vector<IncludedFile> Parser::GetIncludedFiles() const {
2673   const auto it = files_included_per_file_.find(file_being_parsed_);
2674   if (it == files_included_per_file_.end()) { return {}; }
2675 
2676   return { it->second.cbegin(), it->second.cend() };
2677 }
2678 
SupportsOptionalScalars(const flatbuffers::IDLOptions & opts)2679 bool Parser::SupportsOptionalScalars(const flatbuffers::IDLOptions &opts) {
2680   static FLATBUFFERS_CONSTEXPR unsigned long supported_langs =
2681       IDLOptions::kRust | IDLOptions::kSwift | IDLOptions::kLobster |
2682       IDLOptions::kKotlin | IDLOptions::kKotlinKmp | IDLOptions::kCpp |
2683       IDLOptions::kJava | IDLOptions::kCSharp | IDLOptions::kTs |
2684       IDLOptions::kBinary | IDLOptions::kGo | IDLOptions::kPython |
2685       IDLOptions::kJson |
2686       IDLOptions::kNim;
2687   unsigned long langs = opts.lang_to_generate;
2688   return (langs > 0 && langs < IDLOptions::kMAX) && !(langs & ~supported_langs);
2689 }
SupportsOptionalScalars() const2690 bool Parser::SupportsOptionalScalars() const {
2691   // Check in general if a language isn't specified.
2692   return opts.lang_to_generate == 0 || SupportsOptionalScalars(opts);
2693 }
2694 
SupportsDefaultVectorsAndStrings() const2695 bool Parser::SupportsDefaultVectorsAndStrings() const {
2696   static FLATBUFFERS_CONSTEXPR unsigned long supported_langs =
2697       IDLOptions::kRust | IDLOptions::kSwift | IDLOptions::kNim;
2698   return !(opts.lang_to_generate & ~supported_langs);
2699 }
2700 
SupportsAdvancedUnionFeatures() const2701 bool Parser::SupportsAdvancedUnionFeatures() const {
2702   return (opts.lang_to_generate &
2703           ~(IDLOptions::kCpp | IDLOptions::kTs | IDLOptions::kPhp |
2704             IDLOptions::kJava | IDLOptions::kCSharp | IDLOptions::kKotlin |
2705             IDLOptions::kBinary | IDLOptions::kSwift | IDLOptions::kNim |
2706             IDLOptions::kJson | IDLOptions::kKotlinKmp)) == 0;
2707 }
2708 
SupportsAdvancedArrayFeatures() const2709 bool Parser::SupportsAdvancedArrayFeatures() const {
2710   return (opts.lang_to_generate &
2711           ~(IDLOptions::kCpp | IDLOptions::kPython | IDLOptions::kJava |
2712             IDLOptions::kCSharp | IDLOptions::kJsonSchema | IDLOptions::kJson |
2713             IDLOptions::kBinary | IDLOptions::kRust | IDLOptions::kTs)) == 0;
2714 }
2715 
Supports64BitOffsets() const2716 bool Parser::Supports64BitOffsets() const {
2717   return (opts.lang_to_generate &
2718           ~(IDLOptions::kCpp | IDLOptions::kJson | IDLOptions::kBinary)) == 0;
2719 }
2720 
SupportsUnionUnderlyingType() const2721 bool Parser::SupportsUnionUnderlyingType() const {
2722     return (opts.lang_to_generate & ~(IDLOptions::kCpp | IDLOptions::kTs |
2723          IDLOptions::kBinary)) == 0;
2724 }
2725 
UniqueNamespace(Namespace * ns)2726 Namespace *Parser::UniqueNamespace(Namespace *ns) {
2727   for (auto it = namespaces_.begin(); it != namespaces_.end(); ++it) {
2728     if (ns->components == (*it)->components) {
2729       delete ns;
2730       return *it;
2731     }
2732   }
2733   namespaces_.push_back(ns);
2734   return ns;
2735 }
2736 
UnqualifiedName(const std::string & full_qualified_name)2737 std::string Parser::UnqualifiedName(const std::string &full_qualified_name) {
2738   Namespace *ns = new Namespace();
2739 
2740   std::size_t current, previous = 0;
2741   current = full_qualified_name.find('.');
2742   while (current != std::string::npos) {
2743     ns->components.push_back(
2744         full_qualified_name.substr(previous, current - previous));
2745     previous = current + 1;
2746     current = full_qualified_name.find('.', previous);
2747   }
2748   current_namespace_ = UniqueNamespace(ns);
2749   return full_qualified_name.substr(previous, current - previous);
2750 }
2751 
ParseDecl(const char * filename)2752 CheckedError Parser::ParseDecl(const char *filename) {
2753   std::vector<std::string> dc = doc_comment_;
2754   bool fixed = IsIdent("struct");
2755   if (!fixed && !IsIdent("table")) return Error("declaration expected");
2756   NEXT();
2757   std::string name = attribute_;
2758   EXPECT(kTokenIdentifier);
2759   StructDef *struct_def;
2760   ECHECK(StartStruct(name, &struct_def));
2761   struct_def->doc_comment = dc;
2762   struct_def->fixed = fixed;
2763   if (filename && !opts.project_root.empty()) {
2764     struct_def->declaration_file =
2765         &GetPooledString(FilePath(opts.project_root, filename, opts.binary_schema_absolute_paths));
2766   }
2767   ECHECK(ParseMetaData(&struct_def->attributes));
2768   struct_def->sortbysize =
2769       struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
2770   EXPECT('{');
2771   while (token_ != '}') ECHECK(ParseField(*struct_def));
2772   if (fixed) {
2773     const auto force_align = struct_def->attributes.Lookup("force_align");
2774     if (force_align) {
2775       size_t align;
2776       ECHECK(ParseAlignAttribute(force_align->constant, struct_def->minalign,
2777                                  &align));
2778       struct_def->minalign = align;
2779     }
2780     if (!struct_def->bytesize) return Error("size 0 structs not allowed");
2781   }
2782   struct_def->PadLastField(struct_def->minalign);
2783   // Check if this is a table that has manual id assignments
2784   auto &fields = struct_def->fields.vec;
2785   if (!fixed && fields.size()) {
2786     size_t num_id_fields = 0;
2787     for (auto it = fields.begin(); it != fields.end(); ++it) {
2788       if ((*it)->attributes.Lookup("id")) num_id_fields++;
2789     }
2790     // If any fields have ids..
2791     if (num_id_fields || opts.require_explicit_ids) {
2792       // Then all fields must have them.
2793       if (num_id_fields != fields.size()) {
2794         if (opts.require_explicit_ids) {
2795           return Error(
2796               "all fields must have an 'id' attribute when "
2797               "--require-explicit-ids is used");
2798         } else {
2799           return Error(
2800               "either all fields or no fields must have an 'id' attribute");
2801         }
2802       }
2803       // Simply sort by id, then the fields are the same as if no ids had
2804       // been specified.
2805       std::sort(fields.begin(), fields.end(), compareFieldDefs);
2806       // Verify we have a contiguous set, and reassign vtable offsets.
2807       FLATBUFFERS_ASSERT(fields.size() <=
2808                          flatbuffers::numeric_limits<voffset_t>::max());
2809       for (voffset_t i = 0; i < static_cast<voffset_t>(fields.size()); i++) {
2810         auto &field = *fields[i];
2811         const auto &id_str = field.attributes.Lookup("id")->constant;
2812 
2813         // Metadata values have a dynamic type, they can be `float`, 'int', or
2814         // 'string`.
2815         // The FieldIndexToOffset(i) expects the voffset_t so `id` is limited by
2816         // this type.
2817         voffset_t id = 0;
2818         const auto done = !atot(id_str.c_str(), *this, &id).Check();
2819         if (!done)
2820           return Error("field id\'s must be non-negative number, field: " +
2821                        field.name + ", id: " + id_str);
2822         if (i != id)
2823           return Error("field id\'s must be consecutive from 0, id " +
2824                        NumToString(i) + " missing or set twice, field: " +
2825                        field.name + ", id: " + id_str);
2826         field.value.offset = FieldIndexToOffset(i);
2827       }
2828     }
2829   }
2830 
2831   ECHECK(
2832       CheckClash(fields, struct_def, UnionTypeFieldSuffix(), BASE_TYPE_UNION));
2833   ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
2834   ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
2835   ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
2836   ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
2837   ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
2838   EXPECT('}');
2839   const auto qualified_name =
2840       current_namespace_->GetFullyQualifiedName(struct_def->name);
2841   if (types_.Add(qualified_name,
2842                  new Type(BASE_TYPE_STRUCT, struct_def, nullptr)))
2843     return Error("datatype already exists: " + qualified_name);
2844   return NoError();
2845 }
2846 
ParseService(const char * filename)2847 CheckedError Parser::ParseService(const char *filename) {
2848   std::vector<std::string> service_comment = doc_comment_;
2849   NEXT();
2850   auto service_name = attribute_;
2851   EXPECT(kTokenIdentifier);
2852   auto &service_def = *new ServiceDef();
2853   service_def.name = service_name;
2854   service_def.file = file_being_parsed_;
2855   service_def.doc_comment = service_comment;
2856   service_def.defined_namespace = current_namespace_;
2857   if (filename != nullptr && !opts.project_root.empty()) {
2858     service_def.declaration_file =
2859         &GetPooledString(FilePath(opts.project_root, filename, opts.binary_schema_absolute_paths));
2860   }
2861   if (services_.Add(current_namespace_->GetFullyQualifiedName(service_name),
2862                     &service_def))
2863     return Error("service already exists: " + service_name);
2864   ECHECK(ParseMetaData(&service_def.attributes));
2865   EXPECT('{');
2866   do {
2867     std::vector<std::string> doc_comment = doc_comment_;
2868     auto rpc_name = attribute_;
2869     EXPECT(kTokenIdentifier);
2870     EXPECT('(');
2871     Type reqtype, resptype;
2872     ECHECK(ParseTypeIdent(reqtype));
2873     EXPECT(')');
2874     EXPECT(':');
2875     ECHECK(ParseTypeIdent(resptype));
2876     if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
2877         resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
2878       return Error("rpc request and response types must be tables");
2879     auto &rpc = *new RPCCall();
2880     rpc.name = rpc_name;
2881     rpc.request = reqtype.struct_def;
2882     rpc.response = resptype.struct_def;
2883     rpc.doc_comment = doc_comment;
2884     if (service_def.calls.Add(rpc_name, &rpc))
2885       return Error("rpc already exists: " + rpc_name);
2886     ECHECK(ParseMetaData(&rpc.attributes));
2887     EXPECT(';');
2888   } while (token_ != '}');
2889   NEXT();
2890   return NoError();
2891 }
2892 
SetRootType(const char * name)2893 bool Parser::SetRootType(const char *name) {
2894   root_struct_def_ = LookupStruct(name);
2895   if (!root_struct_def_)
2896     root_struct_def_ =
2897         LookupStruct(current_namespace_->GetFullyQualifiedName(name));
2898   return root_struct_def_ != nullptr;
2899 }
2900 
MarkGenerated()2901 void Parser::MarkGenerated() {
2902   // This function marks all existing definitions as having already
2903   // been generated, which signals no code for included files should be
2904   // generated.
2905   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2906     (*it)->generated = true;
2907   }
2908   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2909     if (!(*it)->predecl) { (*it)->generated = true; }
2910   }
2911   for (auto it = services_.vec.begin(); it != services_.vec.end(); ++it) {
2912     (*it)->generated = true;
2913   }
2914 }
2915 
ParseNamespace()2916 CheckedError Parser::ParseNamespace() {
2917   NEXT();
2918   auto ns = new Namespace();
2919   namespaces_.push_back(ns);  // Store it here to not leak upon error.
2920   if (token_ != ';') {
2921     for (;;) {
2922       ns->components.push_back(attribute_);
2923       EXPECT(kTokenIdentifier);
2924       if (Is('.')) NEXT() else break;
2925     }
2926   }
2927   namespaces_.pop_back();
2928   current_namespace_ = UniqueNamespace(ns);
2929   EXPECT(';');
2930   return NoError();
2931 }
2932 
2933 // Best effort parsing of .proto declarations, with the aim to turn them
2934 // in the closest corresponding FlatBuffer equivalent.
2935 // We parse everything as identifiers instead of keywords, since we don't
2936 // want protobuf keywords to become invalid identifiers in FlatBuffers.
ParseProtoDecl()2937 CheckedError Parser::ParseProtoDecl() {
2938   bool isextend = IsIdent("extend");
2939   if (IsIdent("package")) {
2940     // These are identical in syntax to FlatBuffer's namespace decl.
2941     ECHECK(ParseNamespace());
2942   } else if (IsIdent("message") || isextend) {
2943     std::vector<std::string> struct_comment = doc_comment_;
2944     NEXT();
2945     StructDef *struct_def = nullptr;
2946     Namespace *parent_namespace = nullptr;
2947     if (isextend) {
2948       if (Is('.')) NEXT();  // qualified names may start with a . ?
2949       auto id = attribute_;
2950       EXPECT(kTokenIdentifier);
2951       ECHECK(ParseNamespacing(&id, nullptr));
2952       struct_def = LookupCreateStruct(id, false);
2953       if (!struct_def)
2954         return Error("cannot extend unknown message type: " + id);
2955     } else {
2956       std::string name = attribute_;
2957       EXPECT(kTokenIdentifier);
2958       ECHECK(StartStruct(name, &struct_def));
2959       // Since message definitions can be nested, we create a new namespace.
2960       auto ns = new Namespace();
2961       // Copy of current namespace.
2962       *ns = *current_namespace_;
2963       // But with current message name.
2964       ns->components.push_back(name);
2965       ns->from_table++;
2966       parent_namespace = current_namespace_;
2967       current_namespace_ = UniqueNamespace(ns);
2968     }
2969     struct_def->doc_comment = struct_comment;
2970     ECHECK(ParseProtoFields(struct_def, isextend, false));
2971     if (!isextend) { current_namespace_ = parent_namespace; }
2972     if (Is(';')) NEXT();
2973   } else if (IsIdent("enum")) {
2974     // These are almost the same, just with different terminator:
2975     EnumDef *enum_def;
2976     ECHECK(ParseEnum(false, &enum_def, nullptr));
2977     if (Is(';')) NEXT();
2978     // Temp: remove any duplicates, as .fbs files can't handle them.
2979     enum_def->RemoveDuplicates();
2980   } else if (IsIdent("syntax")) {  // Skip these.
2981     NEXT();
2982     EXPECT('=');
2983     EXPECT(kTokenStringConstant);
2984     EXPECT(';');
2985   } else if (IsIdent("option")) {  // Skip these.
2986     ECHECK(ParseProtoOption());
2987     EXPECT(';');
2988   } else if (IsIdent("service")) {  // Skip these.
2989     NEXT();
2990     EXPECT(kTokenIdentifier);
2991     ECHECK(ParseProtoCurliesOrIdent());
2992   } else {
2993     return Error("don\'t know how to parse .proto declaration starting with " +
2994                  TokenToStringId(token_));
2995   }
2996   return NoError();
2997 }
2998 
StartEnum(const std::string & name,bool is_union,EnumDef ** dest)2999 CheckedError Parser::StartEnum(const std::string &name, bool is_union,
3000                                EnumDef **dest) {
3001   auto &enum_def = *new EnumDef();
3002   enum_def.name = name;
3003   enum_def.file = file_being_parsed_;
3004   enum_def.doc_comment = doc_comment_;
3005   enum_def.is_union = is_union;
3006   enum_def.defined_namespace = current_namespace_;
3007   const auto qualified_name = current_namespace_->GetFullyQualifiedName(name);
3008   if (enums_.Add(qualified_name, &enum_def))
3009     return Error("enum already exists: " + qualified_name);
3010   enum_def.underlying_type.base_type =
3011       is_union ? BASE_TYPE_UTYPE : BASE_TYPE_INT;
3012   enum_def.underlying_type.enum_def = &enum_def;
3013   if (dest) *dest = &enum_def;
3014   return NoError();
3015 }
3016 
ParseProtoFields(StructDef * struct_def,bool isextend,bool inside_oneof)3017 CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
3018                                       bool inside_oneof) {
3019   EXPECT('{');
3020   while (token_ != '}') {
3021     if (IsIdent("message") || IsIdent("extend") || IsIdent("enum")) {
3022       // Nested declarations.
3023       ECHECK(ParseProtoDecl());
3024     } else if (IsIdent("extensions")) {  // Skip these.
3025       NEXT();
3026       EXPECT(kTokenIntegerConstant);
3027       if (Is(kTokenIdentifier)) {
3028         NEXT();  // to
3029         NEXT();  // num
3030       }
3031       EXPECT(';');
3032     } else if (IsIdent("option")) {  // Skip these.
3033       ECHECK(ParseProtoOption());
3034       EXPECT(';');
3035     } else if (IsIdent("reserved")) {  // Skip these.
3036       /**
3037        * Reserved proto ids can be comma seperated (e.g. 1,2,4,5;)
3038        * or range based (e.g. 9 to 11;)
3039        * or combination of them (e.g. 1,2,9 to 11,4,5;)
3040        * It will be ended by a semicolon.
3041        */
3042       NEXT();
3043       bool range = false;
3044       voffset_t from = 0;
3045 
3046       while (!Is(';')) {
3047         if (token_ == kTokenIntegerConstant) {
3048           voffset_t attribute = 0;
3049           bool done = StringToNumber(attribute_.c_str(), &attribute);
3050           if (!done)
3051             return Error("Protobuf has non positive number in reserved ids");
3052 
3053           if (range) {
3054             for (voffset_t id = from + 1; id <= attribute; id++)
3055               struct_def->reserved_ids.push_back(id);
3056 
3057             range = false;
3058           } else {
3059             struct_def->reserved_ids.push_back(attribute);
3060           }
3061 
3062           from = attribute;
3063         }
3064 
3065         if (attribute_ == "to") range = true;
3066 
3067         NEXT();
3068       }  // A variety of formats, just skip.
3069 
3070       NEXT();
3071     } else if (IsIdent("map")) {
3072       ECHECK(ParseProtoMapField(struct_def));
3073     } else {
3074       std::vector<std::string> field_comment = doc_comment_;
3075       // Parse the qualifier.
3076       bool required = false;
3077       bool repeated = false;
3078       bool oneof = false;
3079       if (!inside_oneof) {
3080         if (IsIdent("optional")) {
3081           // This is the default.
3082           NEXT();
3083         } else if (IsIdent("required")) {
3084           required = true;
3085           NEXT();
3086         } else if (IsIdent("repeated")) {
3087           repeated = true;
3088           NEXT();
3089         } else if (IsIdent("oneof")) {
3090           oneof = true;
3091           NEXT();
3092         } else {
3093           // can't error, proto3 allows decls without any of the above.
3094         }
3095       }
3096       StructDef *anonymous_struct = nullptr;
3097       EnumDef *oneof_union = nullptr;
3098       Type type;
3099       if (IsIdent("group") || oneof) {
3100         if (!oneof) NEXT();
3101         if (oneof && opts.proto_oneof_union) {
3102           auto name = ConvertCase(attribute_, Case::kUpperCamel) + "Union";
3103           ECHECK(StartEnum(name, true, &oneof_union));
3104           type = Type(BASE_TYPE_UNION, nullptr, oneof_union);
3105         } else {
3106           auto name = "Anonymous" + NumToString(anonymous_counter_++);
3107           ECHECK(StartStruct(name, &anonymous_struct));
3108           type = Type(BASE_TYPE_STRUCT, anonymous_struct);
3109         }
3110       } else {
3111         ECHECK(ParseTypeFromProtoType(&type));
3112       }
3113       // Repeated elements get mapped to a vector.
3114       if (repeated) {
3115         type.element = type.base_type;
3116         type.base_type = BASE_TYPE_VECTOR;
3117         if (type.element == BASE_TYPE_VECTOR) {
3118           // We have a vector or vectors, which FlatBuffers doesn't support.
3119           // For now make it a vector of string (since the source is likely
3120           // "repeated bytes").
3121           // TODO(wvo): A better solution would be to wrap this in a table.
3122           type.element = BASE_TYPE_STRING;
3123         }
3124       }
3125       std::string name = attribute_;
3126       EXPECT(kTokenIdentifier);
3127       std::string proto_field_id;
3128       if (!oneof) {
3129         // Parse the field id. Since we're just translating schemas, not
3130         // any kind of binary compatibility, we can safely ignore these, and
3131         // assign our own.
3132         EXPECT('=');
3133         proto_field_id = attribute_;
3134         EXPECT(kTokenIntegerConstant);
3135       }
3136       FieldDef *field = nullptr;
3137       if (isextend) {
3138         // We allow a field to be re-defined when extending.
3139         // TODO: are there situations where that is problematic?
3140         field = struct_def->fields.Lookup(name);
3141       }
3142       if (!field) ECHECK(AddField(*struct_def, name, type, &field));
3143       field->doc_comment = field_comment;
3144       if (!proto_field_id.empty() || oneof) {
3145         auto val = new Value();
3146         val->constant = proto_field_id;
3147         field->attributes.Add("id", val);
3148       }
3149       if (!IsScalar(type.base_type) && required) {
3150         field->presence = FieldDef::kRequired;
3151       }
3152       // See if there's a default specified.
3153       if (Is('[')) {
3154         NEXT();
3155         for (;;) {
3156           auto key = attribute_;
3157           ECHECK(ParseProtoKey());
3158           EXPECT('=');
3159           auto val = attribute_;
3160           ECHECK(ParseProtoCurliesOrIdent());
3161           if (key == "default") {
3162             // Temp: skip non-numeric and non-boolean defaults (enums).
3163             auto numeric = strpbrk(val.c_str(), "0123456789-+.");
3164             if (IsFloat(type.base_type) &&
3165                 (val == "inf" || val == "+inf" || val == "-inf")) {
3166               // Prefer to be explicit with +inf.
3167               field->value.constant = val == "inf" ? "+inf" : val;
3168             } else if (IsScalar(type.base_type) && numeric == val.c_str()) {
3169               field->value.constant = val;
3170             } else if (val == "true") {
3171               field->value.constant = val;
3172             }  // "false" is default, no need to handle explicitly.
3173           } else if (key == "deprecated") {
3174             field->deprecated = val == "true";
3175           }
3176           if (!Is(',')) break;
3177           NEXT();
3178         }
3179         EXPECT(']');
3180       }
3181       if (anonymous_struct) {
3182         ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
3183         if (Is(';')) NEXT();
3184       } else if (oneof_union) {
3185         // Parse into a temporary StructDef, then transfer fields into an
3186         // EnumDef describing the oneof as a union.
3187         StructDef oneof_struct;
3188         ECHECK(ParseProtoFields(&oneof_struct, false, oneof));
3189         if (Is(';')) NEXT();
3190         for (auto field_it = oneof_struct.fields.vec.begin();
3191              field_it != oneof_struct.fields.vec.end(); ++field_it) {
3192           const auto &oneof_field = **field_it;
3193           const auto &oneof_type = oneof_field.value.type;
3194           if (oneof_type.base_type != BASE_TYPE_STRUCT ||
3195               !oneof_type.struct_def || oneof_type.struct_def->fixed)
3196             return Error("oneof '" + name +
3197                          "' cannot be mapped to a union because member '" +
3198                          oneof_field.name + "' is not a table type.");
3199           EnumValBuilder evb(*this, *oneof_union);
3200           auto ev = evb.CreateEnumerator(oneof_type.struct_def->name);
3201           ev->union_type = oneof_type;
3202           ev->doc_comment = oneof_field.doc_comment;
3203           ECHECK(evb.AcceptEnumerator(oneof_field.name));
3204         }
3205       } else {
3206         EXPECT(';');
3207       }
3208     }
3209   }
3210   NEXT();
3211   return NoError();
3212 }
3213 
ParseProtoMapField(StructDef * struct_def)3214 CheckedError Parser::ParseProtoMapField(StructDef *struct_def) {
3215   NEXT();
3216   EXPECT('<');
3217   Type key_type;
3218   ECHECK(ParseType(key_type));
3219   EXPECT(',');
3220   Type value_type;
3221   ECHECK(ParseType(value_type));
3222   EXPECT('>');
3223   auto field_name = attribute_;
3224   NEXT();
3225   EXPECT('=');
3226   std::string proto_field_id = attribute_;
3227   EXPECT(kTokenIntegerConstant);
3228   EXPECT(';');
3229 
3230   auto entry_table_name = ConvertCase(field_name, Case::kUpperCamel) + "Entry";
3231   StructDef *entry_table;
3232   ECHECK(StartStruct(entry_table_name, &entry_table));
3233   entry_table->has_key = true;
3234   FieldDef *key_field;
3235   ECHECK(AddField(*entry_table, "key", key_type, &key_field));
3236   key_field->key = true;
3237   FieldDef *value_field;
3238   ECHECK(AddField(*entry_table, "value", value_type, &value_field));
3239 
3240   Type field_type;
3241   field_type.base_type = BASE_TYPE_VECTOR;
3242   field_type.element = BASE_TYPE_STRUCT;
3243   field_type.struct_def = entry_table;
3244   FieldDef *field;
3245   ECHECK(AddField(*struct_def, field_name, field_type, &field));
3246   if (!proto_field_id.empty()) {
3247     auto val = new Value();
3248     val->constant = proto_field_id;
3249     field->attributes.Add("id", val);
3250   }
3251 
3252   return NoError();
3253 }
3254 
ParseProtoKey()3255 CheckedError Parser::ParseProtoKey() {
3256   if (token_ == '(') {
3257     NEXT();
3258     // Skip "(a.b)" style custom attributes.
3259     while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
3260     EXPECT(')');
3261     while (Is('.')) {
3262       NEXT();
3263       EXPECT(kTokenIdentifier);
3264     }
3265   } else {
3266     EXPECT(kTokenIdentifier);
3267   }
3268   return NoError();
3269 }
3270 
ParseProtoCurliesOrIdent()3271 CheckedError Parser::ParseProtoCurliesOrIdent() {
3272   if (Is('{')) {
3273     NEXT();
3274     for (int nesting = 1; nesting;) {
3275       if (token_ == '{')
3276         nesting++;
3277       else if (token_ == '}')
3278         nesting--;
3279       NEXT();
3280     }
3281   } else {
3282     NEXT();  // Any single token.
3283   }
3284   return NoError();
3285 }
3286 
ParseProtoOption()3287 CheckedError Parser::ParseProtoOption() {
3288   NEXT();
3289   ECHECK(ParseProtoKey());
3290   EXPECT('=');
3291   ECHECK(ParseProtoCurliesOrIdent());
3292   return NoError();
3293 }
3294 
3295 // Parse a protobuf type, and map it to the corresponding FlatBuffer one.
ParseTypeFromProtoType(Type * type)3296 CheckedError Parser::ParseTypeFromProtoType(Type *type) {
3297   struct type_lookup {
3298     const char *proto_type;
3299     BaseType fb_type, element;
3300   };
3301   static type_lookup lookup[] = {
3302     { "float", BASE_TYPE_FLOAT, BASE_TYPE_NONE },
3303     { "double", BASE_TYPE_DOUBLE, BASE_TYPE_NONE },
3304     { "int32", BASE_TYPE_INT, BASE_TYPE_NONE },
3305     { "int64", BASE_TYPE_LONG, BASE_TYPE_NONE },
3306     { "uint32", BASE_TYPE_UINT, BASE_TYPE_NONE },
3307     { "uint64", BASE_TYPE_ULONG, BASE_TYPE_NONE },
3308     { "sint32", BASE_TYPE_INT, BASE_TYPE_NONE },
3309     { "sint64", BASE_TYPE_LONG, BASE_TYPE_NONE },
3310     { "fixed32", BASE_TYPE_UINT, BASE_TYPE_NONE },
3311     { "fixed64", BASE_TYPE_ULONG, BASE_TYPE_NONE },
3312     { "sfixed32", BASE_TYPE_INT, BASE_TYPE_NONE },
3313     { "sfixed64", BASE_TYPE_LONG, BASE_TYPE_NONE },
3314     { "bool", BASE_TYPE_BOOL, BASE_TYPE_NONE },
3315     { "string", BASE_TYPE_STRING, BASE_TYPE_NONE },
3316     { "bytes", BASE_TYPE_VECTOR, BASE_TYPE_UCHAR },
3317     { nullptr, BASE_TYPE_NONE, BASE_TYPE_NONE }
3318   };
3319   for (auto tl = lookup; tl->proto_type; tl++) {
3320     if (attribute_ == tl->proto_type) {
3321       type->base_type = tl->fb_type;
3322       type->element = tl->element;
3323       NEXT();
3324       return NoError();
3325     }
3326   }
3327   if (Is('.')) NEXT();  // qualified names may start with a . ?
3328   ECHECK(ParseTypeIdent(*type));
3329   return NoError();
3330 }
3331 
SkipAnyJsonValue()3332 CheckedError Parser::SkipAnyJsonValue() {
3333   ParseDepthGuard depth_guard(this);
3334   ECHECK(depth_guard.Check());
3335 
3336   switch (token_) {
3337     case '{': {
3338       size_t fieldn_outer = 0;
3339       return ParseTableDelimiters(fieldn_outer, nullptr,
3340                                   [&](const std::string &, size_t &fieldn,
3341                                       const StructDef *) -> CheckedError {
3342                                     ECHECK(SkipAnyJsonValue());
3343                                     fieldn++;
3344                                     return NoError();
3345                                   });
3346     }
3347     case '[': {
3348       size_t count = 0;
3349       return ParseVectorDelimiters(
3350           count, [&](size_t &) -> CheckedError { return SkipAnyJsonValue(); });
3351     }
3352     case kTokenStringConstant:
3353     case kTokenIntegerConstant:
3354     case kTokenFloatConstant: NEXT(); break;
3355     default:
3356       if (IsIdent("true") || IsIdent("false") || IsIdent("null") ||
3357           IsIdent("inf")) {
3358         NEXT();
3359       } else
3360         return TokenError();
3361   }
3362   return NoError();
3363 }
3364 
ParseFlexBufferNumericConstant(flexbuffers::Builder * builder)3365 CheckedError Parser::ParseFlexBufferNumericConstant(
3366     flexbuffers::Builder *builder) {
3367   double d;
3368   if (!StringToNumber(attribute_.c_str(), &d))
3369     return Error("unexpected floating-point constant: " + attribute_);
3370   builder->Double(d);
3371   return NoError();
3372 }
3373 
ParseFlexBufferValue(flexbuffers::Builder * builder)3374 CheckedError Parser::ParseFlexBufferValue(flexbuffers::Builder *builder) {
3375   ParseDepthGuard depth_guard(this);
3376   ECHECK(depth_guard.Check());
3377 
3378   switch (token_) {
3379     case '{': {
3380       auto start = builder->StartMap();
3381       size_t fieldn_outer = 0;
3382       auto err =
3383           ParseTableDelimiters(fieldn_outer, nullptr,
3384                                [&](const std::string &name, size_t &fieldn,
3385                                    const StructDef *) -> CheckedError {
3386                                  builder->Key(name);
3387                                  ECHECK(ParseFlexBufferValue(builder));
3388                                  fieldn++;
3389                                  return NoError();
3390                                });
3391       ECHECK(err);
3392       builder->EndMap(start);
3393       if (builder->HasDuplicateKeys())
3394         return Error("FlexBuffers map has duplicate keys");
3395       break;
3396     }
3397     case '[': {
3398       auto start = builder->StartVector();
3399       size_t count = 0;
3400       ECHECK(ParseVectorDelimiters(count, [&](size_t &) -> CheckedError {
3401         return ParseFlexBufferValue(builder);
3402       }));
3403       builder->EndVector(start, false, false);
3404       break;
3405     }
3406     case kTokenStringConstant:
3407       builder->String(attribute_);
3408       EXPECT(kTokenStringConstant);
3409       break;
3410     case kTokenIntegerConstant:
3411       builder->Int(StringToInt(attribute_.c_str()));
3412       EXPECT(kTokenIntegerConstant);
3413       break;
3414     case kTokenFloatConstant: {
3415       double d;
3416       StringToNumber(attribute_.c_str(), &d);
3417       builder->Double(d);
3418       EXPECT(kTokenFloatConstant);
3419       break;
3420     }
3421     case '-':
3422     case '+': {
3423       // `[-+]?(nan|inf|infinity)`, see ParseSingleValue().
3424       const auto sign = static_cast<char>(token_);
3425       NEXT();
3426       if (token_ != kTokenIdentifier)
3427         return Error("floating-point constant expected");
3428       attribute_.insert(size_t(0), size_t(1), sign);
3429       ECHECK(ParseFlexBufferNumericConstant(builder));
3430       NEXT();
3431       break;
3432     }
3433     default:
3434       if (IsIdent("true")) {
3435         builder->Bool(true);
3436         NEXT();
3437       } else if (IsIdent("false")) {
3438         builder->Bool(false);
3439         NEXT();
3440       } else if (IsIdent("null")) {
3441         builder->Null();
3442         NEXT();
3443       } else if (IsIdent("inf") || IsIdent("infinity") || IsIdent("nan")) {
3444         ECHECK(ParseFlexBufferNumericConstant(builder));
3445         NEXT();
3446       } else
3447         return TokenError();
3448   }
3449   return NoError();
3450 }
3451 
ParseFlexBuffer(const char * source,const char * source_filename,flexbuffers::Builder * builder)3452 bool Parser::ParseFlexBuffer(const char *source, const char *source_filename,
3453                              flexbuffers::Builder *builder) {
3454   const auto initial_depth = parse_depth_counter_;
3455   (void)initial_depth;
3456   auto ok = !StartParseFile(source, source_filename).Check() &&
3457             !ParseFlexBufferValue(builder).Check();
3458   if (ok) builder->Finish();
3459   FLATBUFFERS_ASSERT(initial_depth == parse_depth_counter_);
3460   return ok;
3461 }
3462 
Parse(const char * source,const char ** include_paths,const char * source_filename)3463 bool Parser::Parse(const char *source, const char **include_paths,
3464                    const char *source_filename) {
3465   const auto initial_depth = parse_depth_counter_;
3466   (void)initial_depth;
3467   bool r;
3468 
3469   if (opts.use_flexbuffers) {
3470     r = ParseFlexBuffer(source, source_filename, &flex_builder_);
3471   } else {
3472     r = !ParseRoot(source, include_paths, source_filename).Check();
3473   }
3474   FLATBUFFERS_ASSERT(initial_depth == parse_depth_counter_);
3475   return r;
3476 }
3477 
ParseJson(const char * json,const char * json_filename)3478 bool Parser::ParseJson(const char *json, const char *json_filename) {
3479   const auto initial_depth = parse_depth_counter_;
3480   (void)initial_depth;
3481   builder_.Clear();
3482   const auto done =
3483       !StartParseFile(json, json_filename).Check() && !DoParseJson().Check();
3484   FLATBUFFERS_ASSERT(initial_depth == parse_depth_counter_);
3485   return done;
3486 }
3487 
BytesConsumed() const3488 std::ptrdiff_t Parser::BytesConsumed() const {
3489   return std::distance(source_, prev_cursor_);
3490 }
3491 
StartParseFile(const char * source,const char * source_filename)3492 CheckedError Parser::StartParseFile(const char *source,
3493                                     const char *source_filename) {
3494   file_being_parsed_ = source_filename ? source_filename : "";
3495   source_ = source;
3496   ResetState(source_);
3497   error_.clear();
3498   ECHECK(SkipByteOrderMark());
3499   NEXT();
3500   if (Is(kTokenEof)) return Error("input file is empty");
3501   return NoError();
3502 }
3503 
ParseRoot(const char * source,const char ** include_paths,const char * source_filename)3504 CheckedError Parser::ParseRoot(const char *source, const char **include_paths,
3505                                const char *source_filename) {
3506   ECHECK(DoParse(source, include_paths, source_filename, nullptr));
3507 
3508   // Check that all types were defined.
3509   for (auto it = structs_.vec.begin(); it != structs_.vec.end();) {
3510     auto &struct_def = **it;
3511     if (struct_def.predecl) {
3512       if (opts.proto_mode) {
3513         // Protos allow enums to be used before declaration, so check if that
3514         // is the case here.
3515         EnumDef *enum_def = nullptr;
3516         for (size_t components =
3517                  struct_def.defined_namespace->components.size() + 1;
3518              components && !enum_def; components--) {
3519           auto qualified_name =
3520               struct_def.defined_namespace->GetFullyQualifiedName(
3521                   struct_def.name, components - 1);
3522           enum_def = LookupEnum(qualified_name);
3523         }
3524         if (enum_def) {
3525           // This is pretty slow, but a simple solution for now.
3526           auto initial_count = struct_def.refcount;
3527           for (auto struct_it = structs_.vec.begin();
3528                struct_it != structs_.vec.end(); ++struct_it) {
3529             auto &sd = **struct_it;
3530             for (auto field_it = sd.fields.vec.begin();
3531                  field_it != sd.fields.vec.end(); ++field_it) {
3532               auto &field = **field_it;
3533               if (field.value.type.struct_def == &struct_def) {
3534                 field.value.type.struct_def = nullptr;
3535                 field.value.type.enum_def = enum_def;
3536                 auto &bt = IsVector(field.value.type)
3537                                ? field.value.type.element
3538                                : field.value.type.base_type;
3539                 FLATBUFFERS_ASSERT(bt == BASE_TYPE_STRUCT);
3540                 bt = enum_def->underlying_type.base_type;
3541                 struct_def.refcount--;
3542                 enum_def->refcount++;
3543               }
3544             }
3545           }
3546           if (struct_def.refcount)
3547             return Error("internal: " + NumToString(struct_def.refcount) + "/" +
3548                          NumToString(initial_count) +
3549                          " use(s) of pre-declaration enum not accounted for: " +
3550                          enum_def->name);
3551           structs_.dict.erase(structs_.dict.find(struct_def.name));
3552           it = structs_.vec.erase(it);
3553           delete &struct_def;
3554           continue;  // Skip error.
3555         }
3556       }
3557       auto err = "type referenced but not defined (check namespace): " +
3558                  struct_def.name;
3559       if (struct_def.original_location)
3560         err += ", originally at: " + *struct_def.original_location;
3561       return Error(err);
3562     }
3563     ++it;
3564   }
3565 
3566   // This check has to happen here and not earlier, because only now do we
3567   // know for sure what the type of these are.
3568   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
3569     auto &enum_def = **it;
3570     if (enum_def.is_union) {
3571       for (auto val_it = enum_def.Vals().begin();
3572            val_it != enum_def.Vals().end(); ++val_it) {
3573         auto &val = **val_it;
3574 
3575         if (!(opts.lang_to_generate != 0 && SupportsAdvancedUnionFeatures()) &&
3576             (IsStruct(val.union_type) || IsString(val.union_type)))
3577 
3578           return Error(
3579               "only tables can be union elements in the generated language: " +
3580               val.name);
3581       }
3582     }
3583   }
3584 
3585   auto err = CheckPrivateLeak();
3586   if (err.Check()) return err;
3587 
3588   // Parse JSON object only if the scheme has been parsed.
3589   if (token_ == '{') { ECHECK(DoParseJson()); }
3590   return NoError();
3591 }
3592 
CheckPrivateLeak()3593 CheckedError Parser::CheckPrivateLeak() {
3594   if (!opts.no_leak_private_annotations) return NoError();
3595   // Iterate over all structs/tables to validate we arent leaking
3596   // any private (structs/tables/enums)
3597   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); it++) {
3598     auto &struct_def = **it;
3599     for (auto fld_it = struct_def.fields.vec.begin();
3600          fld_it != struct_def.fields.vec.end(); ++fld_it) {
3601       auto &field = **fld_it;
3602 
3603       if (field.value.type.enum_def) {
3604         auto err =
3605             CheckPrivatelyLeakedFields(struct_def, *field.value.type.enum_def);
3606         if (err.Check()) { return err; }
3607       } else if (field.value.type.struct_def) {
3608         auto err = CheckPrivatelyLeakedFields(struct_def,
3609                                               *field.value.type.struct_def);
3610         if (err.Check()) { return err; }
3611       }
3612     }
3613   }
3614   // Iterate over all enums to validate we arent leaking
3615   // any private (structs/tables)
3616   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
3617     auto &enum_def = **it;
3618     if (enum_def.is_union) {
3619       for (auto val_it = enum_def.Vals().begin();
3620            val_it != enum_def.Vals().end(); ++val_it) {
3621         auto &val = **val_it;
3622         if (val.union_type.struct_def) {
3623           auto err =
3624               CheckPrivatelyLeakedFields(enum_def, *val.union_type.struct_def);
3625           if (err.Check()) { return err; }
3626         }
3627       }
3628     }
3629   }
3630   return NoError();
3631 }
3632 
CheckPrivatelyLeakedFields(const Definition & def,const Definition & value_type)3633 CheckedError Parser::CheckPrivatelyLeakedFields(const Definition &def,
3634                                                 const Definition &value_type) {
3635   if (!opts.no_leak_private_annotations) return NoError();
3636   const auto is_private = def.attributes.Lookup("private");
3637   const auto is_field_private = value_type.attributes.Lookup("private");
3638   if (!is_private && is_field_private) {
3639     return Error(
3640         "Leaking private implementation, verify all objects have similar "
3641         "annotations");
3642   }
3643   return NoError();
3644 }
3645 
DoParse(const char * source,const char ** include_paths,const char * source_filename,const char * include_filename)3646 CheckedError Parser::DoParse(const char *source, const char **include_paths,
3647                              const char *source_filename,
3648                              const char *include_filename) {
3649   uint64_t source_hash = 0;
3650   if (source_filename) {
3651     // If the file is in-memory, don't include its contents in the hash as we
3652     // won't be able to load them later.
3653     if (FileExists(source_filename))
3654       source_hash = HashFile(source_filename, source);
3655     else
3656       source_hash = HashFile(source_filename, nullptr);
3657 
3658     if (included_files_.find(source_hash) == included_files_.end()) {
3659       included_files_[source_hash] = include_filename ? include_filename : "";
3660       files_included_per_file_[source_filename] = std::set<IncludedFile>();
3661     } else {
3662       return NoError();
3663     }
3664   }
3665   if (!include_paths) {
3666     static const char *current_directory[] = { "", nullptr };
3667     include_paths = current_directory;
3668   }
3669   field_stack_.clear();
3670   builder_.Clear();
3671   // Start with a blank namespace just in case this file doesn't have one.
3672   current_namespace_ = empty_namespace_;
3673 
3674   ECHECK(StartParseFile(source, source_filename));
3675 
3676   // Includes must come before type declarations:
3677   for (;;) {
3678     // Parse pre-include proto statements if any:
3679     if (opts.proto_mode && (attribute_ == "option" || attribute_ == "syntax" ||
3680                             attribute_ == "package")) {
3681       ECHECK(ParseProtoDecl());
3682     } else if (IsIdent("native_include")) {
3683       NEXT();
3684       native_included_files_.emplace_back(attribute_);
3685       EXPECT(kTokenStringConstant);
3686       EXPECT(';');
3687     } else if (IsIdent("include") || (opts.proto_mode && IsIdent("import"))) {
3688       NEXT();
3689       if (opts.proto_mode && attribute_ == "public") NEXT();
3690       auto name = flatbuffers::PosixPath(attribute_.c_str());
3691       EXPECT(kTokenStringConstant);
3692       // Look for the file relative to the directory of the current file.
3693       std::string filepath;
3694       if (source_filename) {
3695         auto source_file_directory =
3696             flatbuffers::StripFileName(source_filename);
3697         filepath = flatbuffers::ConCatPathFileName(source_file_directory, name);
3698       }
3699       if (filepath.empty() || !FileExists(filepath.c_str())) {
3700         // Look for the file in include_paths.
3701         for (auto paths = include_paths; paths && *paths; paths++) {
3702           filepath = flatbuffers::ConCatPathFileName(*paths, name);
3703           if (FileExists(filepath.c_str())) break;
3704         }
3705       }
3706       if (filepath.empty())
3707         return Error("unable to locate include file: " + name);
3708       if (source_filename) {
3709         IncludedFile included_file;
3710         included_file.filename = filepath;
3711         included_file.schema_name = name;
3712         files_included_per_file_[source_filename].insert(included_file);
3713       }
3714 
3715       std::string contents;
3716       bool file_loaded = LoadFile(filepath.c_str(), true, &contents);
3717       if (included_files_.find(HashFile(filepath.c_str(), contents.c_str())) ==
3718           included_files_.end()) {
3719         // We found an include file that we have not parsed yet.
3720         // Parse it.
3721         if (!file_loaded) return Error("unable to load include file: " + name);
3722         ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(),
3723                        name.c_str()));
3724         // We generally do not want to output code for any included files:
3725         if (!opts.generate_all) MarkGenerated();
3726         // Reset these just in case the included file had them, and the
3727         // parent doesn't.
3728         root_struct_def_ = nullptr;
3729         file_identifier_.clear();
3730         file_extension_.clear();
3731         // This is the easiest way to continue this file after an include:
3732         // instead of saving and restoring all the state, we simply start the
3733         // file anew. This will cause it to encounter the same include
3734         // statement again, but this time it will skip it, because it was
3735         // entered into included_files_.
3736         // This is recursive, but only go as deep as the number of include
3737         // statements.
3738         included_files_.erase(source_hash);
3739         return DoParse(source, include_paths, source_filename,
3740                        include_filename);
3741       }
3742       EXPECT(';');
3743     } else {
3744       break;
3745     }
3746   }
3747   // Now parse all other kinds of declarations:
3748   while (token_ != kTokenEof) {
3749     if (opts.proto_mode) {
3750       ECHECK(ParseProtoDecl());
3751     } else if (IsIdent("namespace")) {
3752       ECHECK(ParseNamespace());
3753     } else if (token_ == '{') {
3754       return NoError();
3755     } else if (IsIdent("enum")) {
3756       ECHECK(ParseEnum(false, nullptr, source_filename));
3757     } else if (IsIdent("union")) {
3758       ECHECK(ParseEnum(true, nullptr, source_filename));
3759     } else if (IsIdent("root_type")) {
3760       NEXT();
3761       auto root_type = attribute_;
3762       EXPECT(kTokenIdentifier);
3763       ECHECK(ParseNamespacing(&root_type, nullptr));
3764       if (opts.root_type.empty()) {
3765         if (!SetRootType(root_type.c_str()))
3766           return Error("unknown root type: " + root_type);
3767         if (root_struct_def_->fixed) return Error("root type must be a table");
3768       }
3769       EXPECT(';');
3770     } else if (IsIdent("file_identifier")) {
3771       NEXT();
3772       file_identifier_ = attribute_;
3773       EXPECT(kTokenStringConstant);
3774       if (file_identifier_.length() != flatbuffers::kFileIdentifierLength)
3775         return Error("file_identifier must be exactly " +
3776                      NumToString(flatbuffers::kFileIdentifierLength) +
3777                      " characters");
3778       EXPECT(';');
3779     } else if (IsIdent("file_extension")) {
3780       NEXT();
3781       file_extension_ = attribute_;
3782       EXPECT(kTokenStringConstant);
3783       EXPECT(';');
3784     } else if (IsIdent("include")) {
3785       return Error("includes must come before declarations");
3786     } else if (IsIdent("attribute")) {
3787       NEXT();
3788       auto name = attribute_;
3789       if (Is(kTokenIdentifier)) {
3790         NEXT();
3791       } else {
3792         EXPECT(kTokenStringConstant);
3793       }
3794       EXPECT(';');
3795       known_attributes_[name] = false;
3796     } else if (IsIdent("rpc_service")) {
3797       ECHECK(ParseService(source_filename));
3798     } else {
3799       ECHECK(ParseDecl(source_filename));
3800     }
3801   }
3802   EXPECT(kTokenEof);
3803   if (opts.warnings_as_errors && has_warning_) {
3804     return Error("treating warnings as errors, failed due to above warnings");
3805   }
3806   return NoError();
3807 }
3808 
DoParseJson()3809 CheckedError Parser::DoParseJson() {
3810   if (token_ != '{') {
3811     EXPECT('{');
3812   } else {
3813     if (!root_struct_def_) return Error("no root type set to parse json with");
3814     if (builder_.GetSize()) {
3815       return Error("cannot have more than one json object in a file");
3816     }
3817     uoffset_t toff;
3818     ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
3819     if (opts.size_prefixed) {
3820       builder_.FinishSizePrefixed(
3821           Offset<Table>(toff),
3822           file_identifier_.length() ? file_identifier_.c_str() : nullptr);
3823     } else {
3824       builder_.Finish(Offset<Table>(toff), file_identifier_.length()
3825                                                ? file_identifier_.c_str()
3826                                                : nullptr);
3827     }
3828   }
3829   if (opts.require_json_eof) {
3830     // Check that JSON file doesn't contain more objects or IDL directives.
3831     // Comments after JSON are allowed.
3832     EXPECT(kTokenEof);
3833   }
3834   return NoError();
3835 }
3836 
GetIncludedFilesRecursive(const std::string & file_name) const3837 std::set<std::string> Parser::GetIncludedFilesRecursive(
3838     const std::string &file_name) const {
3839   std::set<std::string> included_files;
3840   std::list<std::string> to_process;
3841 
3842   if (file_name.empty()) return included_files;
3843   to_process.push_back(file_name);
3844 
3845   while (!to_process.empty()) {
3846     std::string current = to_process.front();
3847     to_process.pop_front();
3848     included_files.insert(current);
3849 
3850     // Workaround the lack of const accessor in C++98 maps.
3851     auto &new_files =
3852         (*const_cast<std::map<std::string, std::set<IncludedFile>> *>(
3853             &files_included_per_file_))[current];
3854     for (auto it = new_files.begin(); it != new_files.end(); ++it) {
3855       if (included_files.find(it->filename) == included_files.end())
3856         to_process.push_back(it->filename);
3857     }
3858   }
3859 
3860   return included_files;
3861 }
3862 
3863 // Schema serialization functionality:
3864 
3865 static flatbuffers::Offset<
3866     flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
SerializeAttributesCommon(const SymbolTable<Value> & attributes,FlatBufferBuilder * builder,const Parser & parser)3867 SerializeAttributesCommon(const SymbolTable<Value> &attributes,
3868                           FlatBufferBuilder *builder, const Parser &parser) {
3869   std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
3870   for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
3871     auto it = parser.known_attributes_.find(kv->first);
3872     FLATBUFFERS_ASSERT(it != parser.known_attributes_.end());
3873     if (parser.opts.binary_schema_builtins || !it->second) {
3874       auto key = builder->CreateString(kv->first);
3875       auto val = builder->CreateString(kv->second->constant);
3876       attrs.push_back(reflection::CreateKeyValue(*builder, key, val));
3877     }
3878   }
3879   if (attrs.size()) {
3880     return builder->CreateVectorOfSortedTables(&attrs);
3881   } else {
3882     return 0;
3883   }
3884 }
3885 
DeserializeAttributesCommon(SymbolTable<Value> & attributes,Parser & parser,const Vector<Offset<reflection::KeyValue>> * attrs)3886 static bool DeserializeAttributesCommon(
3887     SymbolTable<Value> &attributes, Parser &parser,
3888     const Vector<Offset<reflection::KeyValue>> *attrs) {
3889   if (attrs == nullptr) return true;
3890   for (uoffset_t i = 0; i < attrs->size(); ++i) {
3891     auto kv = attrs->Get(i);
3892     auto value = new Value();
3893     if (kv->value()) { value->constant = kv->value()->str(); }
3894     if (attributes.Add(kv->key()->str(), value)) {
3895       delete value;
3896       return false;
3897     }
3898     parser.known_attributes_[kv->key()->str()];
3899   }
3900   return true;
3901 }
3902 
Serialize()3903 void Parser::Serialize() {
3904   builder_.Clear();
3905   AssignIndices(structs_.vec);
3906   AssignIndices(enums_.vec);
3907   std::vector<Offset<reflection::Object>> object_offsets;
3908   std::set<std::string> files;
3909   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
3910     auto offset = (*it)->Serialize(&builder_, *this);
3911     object_offsets.push_back(offset);
3912     (*it)->serialized_location = offset.o;
3913     const std::string *file = (*it)->declaration_file;
3914     if (file) files.insert(*file);
3915   }
3916   std::vector<Offset<reflection::Enum>> enum_offsets;
3917   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
3918     auto offset = (*it)->Serialize(&builder_, *this);
3919     enum_offsets.push_back(offset);
3920     const std::string *file = (*it)->declaration_file;
3921     if (file) files.insert(*file);
3922   }
3923   std::vector<Offset<reflection::Service>> service_offsets;
3924   for (auto it = services_.vec.begin(); it != services_.vec.end(); ++it) {
3925     auto offset = (*it)->Serialize(&builder_, *this);
3926     service_offsets.push_back(offset);
3927     const std::string *file = (*it)->declaration_file;
3928     if (file) files.insert(*file);
3929   }
3930 
3931   // Create Schemafiles vector of tables.
3932   flatbuffers::Offset<
3933       flatbuffers::Vector<flatbuffers::Offset<reflection::SchemaFile>>>
3934       schema_files__;
3935   if (!opts.project_root.empty()) {
3936     std::vector<Offset<reflection::SchemaFile>> schema_files;
3937     std::vector<Offset<flatbuffers::String>> included_files;
3938     for (auto f = files_included_per_file_.begin();
3939          f != files_included_per_file_.end(); f++) {
3940 
3941       const auto filename__ = builder_.CreateSharedString(FilePath(
3942           opts.project_root, f->first, opts.binary_schema_absolute_paths));
3943       for (auto i = f->second.begin(); i != f->second.end(); i++) {
3944         included_files.push_back(builder_.CreateSharedString(
3945             FilePath(opts.project_root, i->filename, opts.binary_schema_absolute_paths)));
3946       }
3947       const auto included_files__ = builder_.CreateVector(included_files);
3948       included_files.clear();
3949 
3950       schema_files.push_back(
3951           reflection::CreateSchemaFile(builder_, filename__, included_files__));
3952     }
3953     schema_files__ = builder_.CreateVectorOfSortedTables(&schema_files);
3954   }
3955 
3956   const auto objs__ = builder_.CreateVectorOfSortedTables(&object_offsets);
3957   const auto enum__ = builder_.CreateVectorOfSortedTables(&enum_offsets);
3958   const auto fiid__ = builder_.CreateString(file_identifier_);
3959   const auto fext__ = builder_.CreateString(file_extension_);
3960   const auto serv__ = builder_.CreateVectorOfSortedTables(&service_offsets);
3961   const auto schema_offset = reflection::CreateSchema(
3962       builder_, objs__, enum__, fiid__, fext__,
3963       (root_struct_def_ ? root_struct_def_->serialized_location : 0), serv__,
3964       static_cast<reflection::AdvancedFeatures>(advanced_features_),
3965       schema_files__);
3966   if (opts.size_prefixed) {
3967     builder_.FinishSizePrefixed(schema_offset, reflection::SchemaIdentifier());
3968   } else {
3969     builder_.Finish(schema_offset, reflection::SchemaIdentifier());
3970   }
3971 }
3972 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const3973 Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
3974                                                 const Parser &parser) const {
3975   std::vector<Offset<reflection::Field>> field_offsets;
3976   for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
3977     field_offsets.push_back((*it)->Serialize(
3978         builder, static_cast<uint16_t>(it - fields.vec.begin()), parser));
3979   }
3980   const auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
3981   const auto name__ = builder->CreateString(qualified_name);
3982   const auto flds__ = builder->CreateVectorOfSortedTables(&field_offsets);
3983   const auto attr__ = SerializeAttributes(builder, parser);
3984   const auto docs__ = parser.opts.binary_schema_comments && !doc_comment.empty()
3985                           ? builder->CreateVectorOfStrings(doc_comment)
3986                           : 0;
3987   std::string decl_file_in_project = declaration_file ? *declaration_file : "";
3988   const auto file__ = builder->CreateSharedString(decl_file_in_project);
3989   return reflection::CreateObject(
3990       *builder, name__, flds__, fixed, static_cast<int>(minalign),
3991       static_cast<int>(bytesize), attr__, docs__, file__);
3992 }
3993 
Deserialize(Parser & parser,const reflection::Object * object)3994 bool StructDef::Deserialize(Parser &parser, const reflection::Object *object) {
3995   if (!DeserializeAttributes(parser, object->attributes())) return false;
3996   DeserializeDoc(doc_comment, object->documentation());
3997   name = parser.UnqualifiedName(object->name()->str());
3998   predecl = false;
3999   sortbysize = attributes.Lookup("original_order") == nullptr && !fixed;
4000   const auto &of = *(object->fields());
4001   auto indexes = std::vector<uoffset_t>(of.size());
4002   for (uoffset_t i = 0; i < of.size(); i++) indexes[of.Get(i)->id()] = i;
4003   size_t tmp_struct_size = 0;
4004   for (size_t i = 0; i < indexes.size(); i++) {
4005     auto field = of.Get(indexes[i]);
4006     auto field_def = new FieldDef();
4007     if (!field_def->Deserialize(parser, field) ||
4008         fields.Add(field_def->name, field_def)) {
4009       delete field_def;
4010       return false;
4011     }
4012     if (field_def->key) {
4013       if (has_key) {
4014         // only one field may be set as key
4015         delete field_def;
4016         return false;
4017       }
4018       has_key = true;
4019     }
4020     if (fixed) {
4021       // Recompute padding since that's currently not serialized.
4022       auto size = InlineSize(field_def->value.type);
4023       auto next_field =
4024           i + 1 < indexes.size() ? of.Get(indexes[i + 1]) : nullptr;
4025       tmp_struct_size += size;
4026       field_def->padding =
4027           next_field ? (next_field->offset() - field_def->value.offset) - size
4028                      : PaddingBytes(tmp_struct_size, minalign);
4029       tmp_struct_size += field_def->padding;
4030     }
4031   }
4032   FLATBUFFERS_ASSERT(static_cast<int>(tmp_struct_size) == object->bytesize());
4033   return true;
4034 }
4035 
Serialize(FlatBufferBuilder * builder,uint16_t id,const Parser & parser) const4036 Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
4037                                               uint16_t id,
4038                                               const Parser &parser) const {
4039   auto name__ = builder->CreateString(name);
4040   auto type__ = value.type.Serialize(builder);
4041   auto attr__ = SerializeAttributes(builder, parser);
4042   auto docs__ = parser.opts.binary_schema_comments && !doc_comment.empty()
4043                     ? builder->CreateVectorOfStrings(doc_comment)
4044                     : 0;
4045   double d;
4046   StringToNumber(value.constant.c_str(), &d);
4047   return reflection::CreateField(
4048       *builder, name__, type__, id, value.offset,
4049       // Is uint64>max(int64) tested?
4050       IsInteger(value.type.base_type) ? StringToInt(value.constant.c_str()) : 0,
4051       // result may be platform-dependent if underlying is float (not double)
4052       IsFloat(value.type.base_type) ? d : 0.0, deprecated, IsRequired(), key,
4053       attr__, docs__, IsOptional(), static_cast<uint16_t>(padding), offset64);
4054   // TODO: value.constant is almost always "0", we could save quite a bit of
4055   // space by sharing it. Same for common values of value.type.
4056 }
4057 
Deserialize(Parser & parser,const reflection::Field * field)4058 bool FieldDef::Deserialize(Parser &parser, const reflection::Field *field) {
4059   name = field->name()->str();
4060   defined_namespace = parser.current_namespace_;
4061   if (!value.type.Deserialize(parser, field->type())) return false;
4062   value.offset = field->offset();
4063   if (IsInteger(value.type.base_type)) {
4064     value.constant = NumToString(field->default_integer());
4065   } else if (IsFloat(value.type.base_type)) {
4066     value.constant = FloatToString(field->default_real(), 17);
4067   }
4068   presence = FieldDef::MakeFieldPresence(field->optional(), field->required());
4069   padding = field->padding();
4070   key = field->key();
4071   offset64 = field->offset64();
4072   if (!DeserializeAttributes(parser, field->attributes())) return false;
4073   // TODO: this should probably be handled by a separate attribute
4074   if (attributes.Lookup("flexbuffer")) {
4075     flexbuffer = true;
4076     parser.uses_flexbuffers_ = true;
4077     if (value.type.base_type != BASE_TYPE_VECTOR ||
4078         value.type.element != BASE_TYPE_UCHAR)
4079       return false;
4080   }
4081   if (auto nested = attributes.Lookup("nested_flatbuffer")) {
4082     auto nested_qualified_name =
4083         parser.current_namespace_->GetFullyQualifiedName(nested->constant);
4084     nested_flatbuffer = parser.LookupStruct(nested_qualified_name);
4085     if (!nested_flatbuffer) return false;
4086   }
4087   shared = attributes.Lookup("shared") != nullptr;
4088   DeserializeDoc(doc_comment, field->documentation());
4089   return true;
4090 }
4091 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const4092 Offset<reflection::RPCCall> RPCCall::Serialize(FlatBufferBuilder *builder,
4093                                                const Parser &parser) const {
4094   auto name__ = builder->CreateString(name);
4095   auto attr__ = SerializeAttributes(builder, parser);
4096   auto docs__ = parser.opts.binary_schema_comments && !doc_comment.empty()
4097                     ? builder->CreateVectorOfStrings(doc_comment)
4098                     : 0;
4099   return reflection::CreateRPCCall(
4100       *builder, name__, request->serialized_location,
4101       response->serialized_location, attr__, docs__);
4102 }
4103 
Deserialize(Parser & parser,const reflection::RPCCall * call)4104 bool RPCCall::Deserialize(Parser &parser, const reflection::RPCCall *call) {
4105   name = call->name()->str();
4106   if (!DeserializeAttributes(parser, call->attributes())) return false;
4107   DeserializeDoc(doc_comment, call->documentation());
4108   request = parser.structs_.Lookup(call->request()->name()->str());
4109   response = parser.structs_.Lookup(call->response()->name()->str());
4110   if (!request || !response) { return false; }
4111   return true;
4112 }
4113 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const4114 Offset<reflection::Service> ServiceDef::Serialize(FlatBufferBuilder *builder,
4115                                                   const Parser &parser) const {
4116   std::vector<Offset<reflection::RPCCall>> servicecall_offsets;
4117   for (auto it = calls.vec.begin(); it != calls.vec.end(); ++it) {
4118     servicecall_offsets.push_back((*it)->Serialize(builder, parser));
4119   }
4120   const auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
4121   const auto name__ = builder->CreateString(qualified_name);
4122   const auto call__ = builder->CreateVector(servicecall_offsets);
4123   const auto attr__ = SerializeAttributes(builder, parser);
4124   const auto docs__ = parser.opts.binary_schema_comments && !doc_comment.empty()
4125                           ? builder->CreateVectorOfStrings(doc_comment)
4126                           : 0;
4127   std::string decl_file_in_project = declaration_file ? *declaration_file : "";
4128   const auto file__ = builder->CreateSharedString(decl_file_in_project);
4129   return reflection::CreateService(*builder, name__, call__, attr__, docs__,
4130                                    file__);
4131 }
4132 
Deserialize(Parser & parser,const reflection::Service * service)4133 bool ServiceDef::Deserialize(Parser &parser,
4134                              const reflection::Service *service) {
4135   name = parser.UnqualifiedName(service->name()->str());
4136   if (service->calls()) {
4137     for (uoffset_t i = 0; i < service->calls()->size(); ++i) {
4138       auto call = new RPCCall();
4139       if (!call->Deserialize(parser, service->calls()->Get(i)) ||
4140           calls.Add(call->name, call)) {
4141         delete call;
4142         return false;
4143       }
4144     }
4145   }
4146   if (!DeserializeAttributes(parser, service->attributes())) return false;
4147   DeserializeDoc(doc_comment, service->documentation());
4148   return true;
4149 }
4150 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const4151 Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
4152                                             const Parser &parser) const {
4153   std::vector<Offset<reflection::EnumVal>> enumval_offsets;
4154   for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
4155     enumval_offsets.push_back((*it)->Serialize(builder, parser));
4156   }
4157   const auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
4158   const auto name__ = builder->CreateString(qualified_name);
4159   const auto vals__ = builder->CreateVector(enumval_offsets);
4160   const auto type__ = underlying_type.Serialize(builder);
4161   const auto attr__ = SerializeAttributes(builder, parser);
4162   const auto docs__ = parser.opts.binary_schema_comments && !doc_comment.empty()
4163                           ? builder->CreateVectorOfStrings(doc_comment)
4164                           : 0;
4165   std::string decl_file_in_project = declaration_file ? *declaration_file : "";
4166   const auto file__ = builder->CreateSharedString(decl_file_in_project);
4167   return reflection::CreateEnum(*builder, name__, vals__, is_union, type__,
4168                                 attr__, docs__, file__);
4169 }
4170 
Deserialize(Parser & parser,const reflection::Enum * _enum)4171 bool EnumDef::Deserialize(Parser &parser, const reflection::Enum *_enum) {
4172   name = parser.UnqualifiedName(_enum->name()->str());
4173   for (uoffset_t i = 0; i < _enum->values()->size(); ++i) {
4174     auto val = new EnumVal();
4175     if (!val->Deserialize(parser, _enum->values()->Get(i)) ||
4176         vals.Add(val->name, val)) {
4177       delete val;
4178       return false;
4179     }
4180   }
4181   is_union = _enum->is_union();
4182   if (!underlying_type.Deserialize(parser, _enum->underlying_type())) {
4183     return false;
4184   }
4185   if (!DeserializeAttributes(parser, _enum->attributes())) return false;
4186   DeserializeDoc(doc_comment, _enum->documentation());
4187   return true;
4188 }
4189 
4190 flatbuffers::Offset<
4191     flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
SerializeAttributes(FlatBufferBuilder * builder,const Parser & parser) const4192 EnumVal::SerializeAttributes(FlatBufferBuilder *builder,
4193                              const Parser &parser) const {
4194   return SerializeAttributesCommon(attributes, builder, parser);
4195 }
4196 
DeserializeAttributes(Parser & parser,const Vector<Offset<reflection::KeyValue>> * attrs)4197 bool EnumVal::DeserializeAttributes(
4198     Parser &parser, const Vector<Offset<reflection::KeyValue>> *attrs) {
4199   return DeserializeAttributesCommon(attributes, parser, attrs);
4200 }
4201 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const4202 Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder,
4203                                                const Parser &parser) const {
4204   const auto name__ = builder->CreateString(name);
4205   const auto type__ = union_type.Serialize(builder);
4206   const auto attr__ = SerializeAttributes(builder, parser);
4207   const auto docs__ = parser.opts.binary_schema_comments && !doc_comment.empty()
4208                           ? builder->CreateVectorOfStrings(doc_comment)
4209                           : 0;
4210   return reflection::CreateEnumVal(*builder, name__, value, type__, docs__,
4211                                    attr__);
4212 }
4213 
Deserialize(Parser & parser,const reflection::EnumVal * val)4214 bool EnumVal::Deserialize(Parser &parser, const reflection::EnumVal *val) {
4215   name = val->name()->str();
4216   value = val->value();
4217   if (!union_type.Deserialize(parser, val->union_type())) return false;
4218   if (!DeserializeAttributes(parser, val->attributes())) return false;
4219   DeserializeDoc(doc_comment, val->documentation());
4220   return true;
4221 }
4222 
Serialize(FlatBufferBuilder * builder) const4223 Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
4224   size_t element_size = SizeOf(element);
4225   if (base_type == BASE_TYPE_VECTOR && element == BASE_TYPE_STRUCT &&
4226       struct_def->bytesize != 0) {
4227     // struct_def->bytesize==0 means struct is table
4228     element_size = struct_def->bytesize;
4229   }
4230   return reflection::CreateType(
4231       *builder, static_cast<reflection::BaseType>(base_type),
4232       static_cast<reflection::BaseType>(element),
4233       struct_def ? struct_def->index : (enum_def ? enum_def->index : -1),
4234       fixed_length, static_cast<uint32_t>(SizeOf(base_type)),
4235       static_cast<uint32_t>(element_size));
4236 }
4237 
Deserialize(const Parser & parser,const reflection::Type * type)4238 bool Type::Deserialize(const Parser &parser, const reflection::Type *type) {
4239   if (type == nullptr) return true;
4240   base_type = static_cast<BaseType>(type->base_type());
4241   element = static_cast<BaseType>(type->element());
4242   fixed_length = type->fixed_length();
4243   if (type->index() >= 0) {
4244     bool is_series = type->base_type() == reflection::Vector ||
4245                      type->base_type() == reflection::Array;
4246     if (type->base_type() == reflection::Obj ||
4247         (is_series && type->element() == reflection::Obj)) {
4248       if (static_cast<size_t>(type->index()) < parser.structs_.vec.size()) {
4249         struct_def = parser.structs_.vec[type->index()];
4250         struct_def->refcount++;
4251       } else {
4252         return false;
4253       }
4254     } else {
4255       if (static_cast<size_t>(type->index()) < parser.enums_.vec.size()) {
4256         enum_def = parser.enums_.vec[type->index()];
4257       } else {
4258         return false;
4259       }
4260     }
4261   }
4262   return true;
4263 }
4264 
4265 flatbuffers::Offset<
4266     flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
SerializeAttributes(FlatBufferBuilder * builder,const Parser & parser) const4267 Definition::SerializeAttributes(FlatBufferBuilder *builder,
4268                                 const Parser &parser) const {
4269   return SerializeAttributesCommon(attributes, builder, parser);
4270 }
4271 
DeserializeAttributes(Parser & parser,const Vector<Offset<reflection::KeyValue>> * attrs)4272 bool Definition::DeserializeAttributes(
4273     Parser &parser, const Vector<Offset<reflection::KeyValue>> *attrs) {
4274   return DeserializeAttributesCommon(attributes, parser, attrs);
4275 }
4276 
4277 /************************************************************************/
4278 /* DESERIALIZATION                                                      */
4279 /************************************************************************/
Deserialize(const uint8_t * buf,const size_t size)4280 bool Parser::Deserialize(const uint8_t *buf, const size_t size) {
4281   flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(buf), size);
4282   bool size_prefixed = false;
4283   if (!reflection::SchemaBufferHasIdentifier(buf)) {
4284     if (!flatbuffers::BufferHasIdentifier(buf, reflection::SchemaIdentifier(),
4285                                           true))
4286       return false;
4287     else
4288       size_prefixed = true;
4289   }
4290   auto verify_fn = size_prefixed ? &reflection::VerifySizePrefixedSchemaBuffer
4291                                  : &reflection::VerifySchemaBuffer;
4292   if (!verify_fn(verifier)) { return false; }
4293   auto schema = size_prefixed ? reflection::GetSizePrefixedSchema(buf)
4294                               : reflection::GetSchema(buf);
4295   return Deserialize(schema);
4296 }
4297 
Deserialize(const reflection::Schema * schema)4298 bool Parser::Deserialize(const reflection::Schema *schema) {
4299   file_identifier_ = schema->file_ident() ? schema->file_ident()->str() : "";
4300   file_extension_ = schema->file_ext() ? schema->file_ext()->str() : "";
4301   std::map<std::string, Namespace *> namespaces_index;
4302 
4303   // Create defs without deserializing so references from fields to structs and
4304   // enums can be resolved.
4305   for (auto it = schema->objects()->begin(); it != schema->objects()->end();
4306        ++it) {
4307     auto struct_def = new StructDef();
4308     struct_def->bytesize = it->bytesize();
4309     struct_def->fixed = it->is_struct();
4310     struct_def->minalign = it->minalign();
4311     if (structs_.Add(it->name()->str(), struct_def)) {
4312       delete struct_def;
4313       return false;
4314     }
4315     auto type = new Type(BASE_TYPE_STRUCT, struct_def, nullptr);
4316     if (types_.Add(it->name()->str(), type)) {
4317       delete type;
4318       return false;
4319     }
4320   }
4321   for (auto it = schema->enums()->begin(); it != schema->enums()->end(); ++it) {
4322     auto enum_def = new EnumDef();
4323     if (enums_.Add(it->name()->str(), enum_def)) {
4324       delete enum_def;
4325       return false;
4326     }
4327     auto type = new Type(BASE_TYPE_UNION, nullptr, enum_def);
4328     if (types_.Add(it->name()->str(), type)) {
4329       delete type;
4330       return false;
4331     }
4332   }
4333 
4334   // Now fields can refer to structs and enums by index.
4335   for (auto it = schema->objects()->begin(); it != schema->objects()->end();
4336        ++it) {
4337     std::string qualified_name = it->name()->str();
4338     auto struct_def = structs_.Lookup(qualified_name);
4339     struct_def->defined_namespace =
4340         GetNamespace(qualified_name, namespaces_, namespaces_index);
4341     if (!struct_def->Deserialize(*this, *it)) { return false; }
4342     if (schema->root_table() == *it) { root_struct_def_ = struct_def; }
4343   }
4344   for (auto it = schema->enums()->begin(); it != schema->enums()->end(); ++it) {
4345     std::string qualified_name = it->name()->str();
4346     auto enum_def = enums_.Lookup(qualified_name);
4347     enum_def->defined_namespace =
4348         GetNamespace(qualified_name, namespaces_, namespaces_index);
4349     if (!enum_def->Deserialize(*this, *it)) { return false; }
4350   }
4351 
4352   if (schema->services()) {
4353     for (auto it = schema->services()->begin(); it != schema->services()->end();
4354          ++it) {
4355       std::string qualified_name = it->name()->str();
4356       auto service_def = new ServiceDef();
4357       service_def->defined_namespace =
4358           GetNamespace(qualified_name, namespaces_, namespaces_index);
4359       if (!service_def->Deserialize(*this, *it) ||
4360           services_.Add(qualified_name, service_def)) {
4361         delete service_def;
4362         return false;
4363       }
4364     }
4365   }
4366   advanced_features_ = schema->advanced_features();
4367 
4368   if (schema->fbs_files())
4369     for (auto s = schema->fbs_files()->begin(); s != schema->fbs_files()->end();
4370          ++s) {
4371       for (auto f = s->included_filenames()->begin();
4372            f != s->included_filenames()->end(); ++f) {
4373         IncludedFile included_file;
4374         included_file.filename = f->str();
4375         files_included_per_file_[s->filename()->str()].insert(included_file);
4376       }
4377     }
4378 
4379   return true;
4380 }
4381 
ConformTo(const Parser & base)4382 std::string Parser::ConformTo(const Parser &base) {
4383   for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
4384     auto &struct_def = **sit;
4385     auto qualified_name =
4386         struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
4387     auto struct_def_base = base.LookupStruct(qualified_name);
4388     if (!struct_def_base) continue;
4389     std::set<FieldDef *> renamed_fields;
4390     for (auto fit = struct_def.fields.vec.begin();
4391          fit != struct_def.fields.vec.end(); ++fit) {
4392       auto &field = **fit;
4393       auto field_base = struct_def_base->fields.Lookup(field.name);
4394       const auto qualified_field_name = qualified_name + "." + field.name;
4395       if (field_base) {
4396         if (field.value.offset != field_base->value.offset) {
4397           return "offsets differ for field: " + qualified_field_name;
4398         }
4399         if (field.value.constant != field_base->value.constant) {
4400           return "defaults differ for field: " + qualified_field_name;
4401         }
4402         if (!EqualByName(field.value.type, field_base->value.type)) {
4403           return "types differ for field: " + qualified_field_name;
4404         }
4405         if (field.offset64 != field_base->offset64) {
4406           return "offset types differ for field: " + qualified_field_name;
4407         }
4408       } else {
4409         // Doesn't have to exist, deleting fields is fine.
4410         // But we should check if there is a field that has the same offset
4411         // but is incompatible (in the case of field renaming).
4412         for (auto fbit = struct_def_base->fields.vec.begin();
4413              fbit != struct_def_base->fields.vec.end(); ++fbit) {
4414           field_base = *fbit;
4415           if (field.value.offset == field_base->value.offset) {
4416             renamed_fields.insert(field_base);
4417             if (!EqualByName(field.value.type, field_base->value.type)) {
4418               const auto qualified_field_base =
4419                   qualified_name + "." + field_base->name;
4420               return "field renamed to different type: " +
4421                      qualified_field_name + " (renamed from " +
4422                      qualified_field_base + ")";
4423             }
4424             break;
4425           }
4426         }
4427       }
4428     }
4429     // deletion of trailing fields are not allowed
4430     for (auto fit = struct_def_base->fields.vec.begin();
4431          fit != struct_def_base->fields.vec.end(); ++fit) {
4432       auto &field_base = **fit;
4433       // not a renamed field
4434       if (renamed_fields.find(&field_base) == renamed_fields.end()) {
4435         auto field = struct_def.fields.Lookup(field_base.name);
4436         if (!field) {
4437           return "field deleted: " + qualified_name + "." + field_base.name;
4438         }
4439       }
4440     }
4441   }
4442 
4443   for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
4444     auto &enum_def = **eit;
4445     auto qualified_name =
4446         enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
4447     auto enum_def_base = base.enums_.Lookup(qualified_name);
4448     if (!enum_def_base) continue;
4449     for (auto evit = enum_def.Vals().begin(); evit != enum_def.Vals().end();
4450          ++evit) {
4451       auto &enum_val = **evit;
4452       auto enum_val_base = enum_def_base->Lookup(enum_val.name);
4453       if (enum_val_base) {
4454         if (enum_val != *enum_val_base)
4455           return "values differ for enum: " + enum_val.name;
4456       }
4457     }
4458     // Check underlying type changes
4459     if (enum_def_base->underlying_type.base_type != enum_def.underlying_type.base_type) {
4460       return "underlying type differ for " + std::string(enum_def.is_union ? "union: " : "enum: ") + qualified_name;
4461     }
4462   }
4463   return "";
4464 }
4465 
4466 }  // namespace flatbuffers
4467