1 /*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <algorithm>
18 #include <list>
19
20 #ifdef _WIN32
21 #if !defined(_USE_MATH_DEFINES)
22 #define _USE_MATH_DEFINES // For M_PI.
23 #endif // !defined(_USE_MATH_DEFINES)
24 #endif // _WIN32
25
26 #include <math.h>
27
28 #include "flatbuffers/idl.h"
29 #include "flatbuffers/util.h"
30
31 namespace flatbuffers {
32
33 const char *const kTypeNames[] = {
34 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
35 IDLTYPE,
36 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
37 #undef FLATBUFFERS_TD
38 nullptr
39 };
40
41 const char kTypeSizes[] = {
42 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
43 sizeof(CTYPE),
44 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
45 #undef FLATBUFFERS_TD
46 };
47
48 // The enums in the reflection schema should match the ones we use internally.
49 // Compare the last element to check if these go out of sync.
50 static_assert(BASE_TYPE_UNION ==
51 static_cast<BaseType>(reflection::Union),
52 "enums don't match");
53
54 // Any parsing calls have to be wrapped in this macro, which automates
55 // handling of recursive error checking a bit. It will check the received
56 // CheckedError object, and return straight away on error.
57 #define ECHECK(call) { auto ce = (call); if (ce.Check()) return ce; }
58
59 // These two functions are called hundreds of times below, so define a short
60 // form:
61 #define NEXT() ECHECK(Next())
62 #define EXPECT(tok) ECHECK(Expect(tok))
63
ValidateUTF8(const std::string & str)64 static bool ValidateUTF8(const std::string &str) {
65 const char *s = &str[0];
66 const char * const sEnd = s + str.length();
67 while (s < sEnd) {
68 if (FromUTF8(&s) < 0) {
69 return false;
70 }
71 }
72 return true;
73 }
74
Error(const std::string & msg)75 CheckedError Parser::Error(const std::string &msg) {
76 error_ = file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
77 #ifdef _WIN32
78 error_ += "(" + NumToString(line_) + ")"; // MSVC alike
79 #else
80 if (file_being_parsed_.length()) error_ += ":";
81 error_ += NumToString(line_) + ":0"; // gcc alike
82 #endif
83 error_ += ": error: " + msg;
84 return CheckedError(true);
85 }
86
NoError()87 inline CheckedError NoError() { return CheckedError(false); }
88
89 // Ensure that integer values we parse fit inside the declared integer type.
CheckBitsFit(int64_t val,size_t bits)90 CheckedError Parser::CheckBitsFit(int64_t val, size_t bits) {
91 // Left-shifting a 64-bit value by 64 bits or more is undefined
92 // behavior (C99 6.5.7), so check *before* we shift.
93 if (bits < 64) {
94 // Bits we allow to be used.
95 auto mask = static_cast<int64_t>((1ull << bits) - 1);
96 if ((val & ~mask) != 0 && // Positive or unsigned.
97 (val | mask) != -1) // Negative.
98 return Error("constant does not fit in a " + NumToString(bits) +
99 "-bit field");
100 }
101 return NoError();
102 }
103
104 // atot: templated version of atoi/atof: convert a string to an instance of T.
atot(const char * s,Parser & parser,T * val)105 template<typename T> inline CheckedError atot(const char *s, Parser &parser,
106 T *val) {
107 int64_t i = StringToInt(s);
108 ECHECK(parser.CheckBitsFit(i, sizeof(T) * 8));
109 *val = (T)i;
110 return NoError();
111 }
atot(const char * s,Parser & parser,uint64_t * val)112 template<> inline CheckedError atot<uint64_t>(const char *s, Parser &parser,
113 uint64_t *val) {
114 (void)parser;
115 *val = StringToUInt(s);
116 return NoError();
117 }
atot(const char * s,Parser & parser,bool * val)118 template<> inline CheckedError atot<bool>(const char *s, Parser &parser,
119 bool *val) {
120 (void)parser;
121 *val = 0 != atoi(s);
122 return NoError();
123 }
atot(const char * s,Parser & parser,float * val)124 template<> inline CheckedError atot<float>(const char *s, Parser &parser,
125 float *val) {
126 (void)parser;
127 *val = static_cast<float>(strtod(s, nullptr));
128 return NoError();
129 }
atot(const char * s,Parser & parser,double * val)130 template<> inline CheckedError atot<double>(const char *s, Parser &parser,
131 double *val) {
132 (void)parser;
133 *val = strtod(s, nullptr);
134 return NoError();
135 }
136
atot(const char * s,Parser & parser,Offset<void> * val)137 template<> inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
138 Offset<void> *val) {
139 (void)parser;
140 *val = Offset<void>(atoi(s));
141 return NoError();
142 }
143
GetFullyQualifiedName(const std::string & name,size_t max_components) const144 std::string Namespace::GetFullyQualifiedName(const std::string &name,
145 size_t max_components) const {
146 // Early exit if we don't have a defined namespace.
147 if (components.size() == 0 || !max_components) {
148 return name;
149 }
150 std::stringstream stream;
151 for (size_t i = 0; i < std::min(components.size(), max_components);
152 i++) {
153 if (i) {
154 stream << ".";
155 }
156 stream << components[i];
157 }
158 if (name.length()) stream << "." << name;
159 return stream.str();
160 }
161
162
163
164 // Declare tokens we'll use. Single character tokens are represented by their
165 // ascii character code (e.g. '{'), others above 256.
166 #define FLATBUFFERS_GEN_TOKENS(TD) \
167 TD(Eof, 256, "end of file") \
168 TD(StringConstant, 257, "string constant") \
169 TD(IntegerConstant, 258, "integer constant") \
170 TD(FloatConstant, 259, "float constant") \
171 TD(Identifier, 260, "identifier") \
172 TD(Table, 261, "table") \
173 TD(Struct, 262, "struct") \
174 TD(Enum, 263, "enum") \
175 TD(Union, 264, "union") \
176 TD(NameSpace, 265, "namespace") \
177 TD(RootType, 266, "root_type") \
178 TD(FileIdentifier, 267, "file_identifier") \
179 TD(FileExtension, 268, "file_extension") \
180 TD(Include, 269, "include") \
181 TD(Attribute, 270, "attribute") \
182 TD(Null, 271, "null") \
183 TD(Service, 272, "rpc_service") \
184 TD(NativeInclude, 273, "native_include")
185 #ifdef __GNUC__
186 __extension__ // Stop GCC complaining about trailing comma with -Wpendantic.
187 #endif
188 enum {
189 #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
190 FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
191 #undef FLATBUFFERS_TOKEN
192 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
193 kToken ## ENUM,
194 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
195 #undef FLATBUFFERS_TD
196 };
197
TokenToString(int t)198 static std::string TokenToString(int t) {
199 static const char *tokens[] = {
200 #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
201 FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
202 #undef FLATBUFFERS_TOKEN
203 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
204 IDLTYPE,
205 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
206 #undef FLATBUFFERS_TD
207 };
208 if (t < 256) { // A single ascii char token.
209 std::string s;
210 s.append(1, static_cast<char>(t));
211 return s;
212 } else { // Other tokens.
213 return tokens[t - 256];
214 }
215 }
216
TokenToStringId(int t)217 std::string Parser::TokenToStringId(int t) {
218 return TokenToString(t) + (t == kTokenIdentifier ? ": " + attribute_ : "");
219 }
220
221 // Parses exactly nibbles worth of hex digits into a number, or error.
ParseHexNum(int nibbles,uint64_t * val)222 CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
223 for (int i = 0; i < nibbles; i++)
224 if (!isxdigit(static_cast<const unsigned char>(cursor_[i])))
225 return Error("escape code must be followed by " + NumToString(nibbles) +
226 " hex digits");
227 std::string target(cursor_, cursor_ + nibbles);
228 *val = StringToUInt(target.c_str(), nullptr, 16);
229 cursor_ += nibbles;
230 return NoError();
231 }
232
SkipByteOrderMark()233 CheckedError Parser::SkipByteOrderMark() {
234 if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
235 cursor_++;
236 if (static_cast<unsigned char>(*cursor_) != 0xbb) return Error("invalid utf-8 byte order mark");
237 cursor_++;
238 if (static_cast<unsigned char>(*cursor_) != 0xbf) return Error("invalid utf-8 byte order mark");
239 cursor_++;
240 return NoError();
241 }
242
IsIdentifierStart(char c)243 bool IsIdentifierStart(char c) {
244 return isalpha(static_cast<unsigned char>(c)) || c == '_';
245 }
246
Next()247 CheckedError Parser::Next() {
248 doc_comment_.clear();
249 bool seen_newline = false;
250 attribute_.clear();
251 for (;;) {
252 char c = *cursor_++;
253 token_ = c;
254 switch (c) {
255 case '\0': cursor_--; token_ = kTokenEof; return NoError();
256 case ' ': case '\r': case '\t': break;
257 case '\n': line_++; seen_newline = true; break;
258 case '{': case '}': case '(': case ')': case '[': case ']':
259 case ',': case ':': case ';': case '=': return NoError();
260 case '.':
261 if(!isdigit(static_cast<const unsigned char>(*cursor_))) return NoError();
262 return Error("floating point constant can\'t start with \".\"");
263 case '\"':
264 case '\'': {
265 int unicode_high_surrogate = -1;
266
267 while (*cursor_ != c) {
268 if (*cursor_ < ' ' && *cursor_ >= 0)
269 return Error("illegal character in string constant");
270 if (*cursor_ == '\\') {
271 cursor_++;
272 if (unicode_high_surrogate != -1 &&
273 *cursor_ != 'u') {
274 return Error(
275 "illegal Unicode sequence (unpaired high surrogate)");
276 }
277 switch (*cursor_) {
278 case 'n': attribute_ += '\n'; cursor_++; break;
279 case 't': attribute_ += '\t'; cursor_++; break;
280 case 'r': attribute_ += '\r'; cursor_++; break;
281 case 'b': attribute_ += '\b'; cursor_++; break;
282 case 'f': attribute_ += '\f'; cursor_++; break;
283 case '\"': attribute_ += '\"'; cursor_++; break;
284 case '\'': attribute_ += '\''; cursor_++; break;
285 case '\\': attribute_ += '\\'; cursor_++; break;
286 case '/': attribute_ += '/'; cursor_++; break;
287 case 'x': { // Not in the JSON standard
288 cursor_++;
289 uint64_t val;
290 ECHECK(ParseHexNum(2, &val));
291 attribute_ += static_cast<char>(val);
292 break;
293 }
294 case 'u': {
295 cursor_++;
296 uint64_t val;
297 ECHECK(ParseHexNum(4, &val));
298 if (val >= 0xD800 && val <= 0xDBFF) {
299 if (unicode_high_surrogate != -1) {
300 return Error(
301 "illegal Unicode sequence (multiple high surrogates)");
302 } else {
303 unicode_high_surrogate = static_cast<int>(val);
304 }
305 } else if (val >= 0xDC00 && val <= 0xDFFF) {
306 if (unicode_high_surrogate == -1) {
307 return Error(
308 "illegal Unicode sequence (unpaired low surrogate)");
309 } else {
310 int code_point = 0x10000 +
311 ((unicode_high_surrogate & 0x03FF) << 10) +
312 (val & 0x03FF);
313 ToUTF8(code_point, &attribute_);
314 unicode_high_surrogate = -1;
315 }
316 } else {
317 if (unicode_high_surrogate != -1) {
318 return Error(
319 "illegal Unicode sequence (unpaired high surrogate)");
320 }
321 ToUTF8(static_cast<int>(val), &attribute_);
322 }
323 break;
324 }
325 default: return Error("unknown escape code in string constant");
326 }
327 } else { // printable chars + UTF-8 bytes
328 if (unicode_high_surrogate != -1) {
329 return Error(
330 "illegal Unicode sequence (unpaired high surrogate)");
331 }
332 attribute_ += *cursor_++;
333 }
334 }
335 if (unicode_high_surrogate != -1) {
336 return Error(
337 "illegal Unicode sequence (unpaired high surrogate)");
338 }
339 cursor_++;
340 if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) {
341 return Error("illegal UTF-8 sequence");
342 }
343 token_ = kTokenStringConstant;
344 return NoError();
345 }
346 case '/':
347 if (*cursor_ == '/') {
348 const char *start = ++cursor_;
349 while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
350 if (*start == '/') { // documentation comment
351 if (cursor_ != source_ && !seen_newline)
352 return Error(
353 "a documentation comment should be on a line on its own");
354 doc_comment_.push_back(std::string(start + 1, cursor_));
355 }
356 break;
357 } else if (*cursor_ == '*') {
358 cursor_++;
359 // TODO: make nested.
360 while (*cursor_ != '*' || cursor_[1] != '/') {
361 if (*cursor_ == '\n') line_++;
362 if (!*cursor_) return Error("end of file in comment");
363 cursor_++;
364 }
365 cursor_ += 2;
366 break;
367 }
368 // fall thru
369 default:
370 if (IsIdentifierStart(c)) {
371 // Collect all chars of an identifier:
372 const char *start = cursor_ - 1;
373 while (isalnum(static_cast<unsigned char>(*cursor_)) ||
374 *cursor_ == '_')
375 cursor_++;
376 attribute_.append(start, cursor_);
377 // First, see if it is a type keyword from the table of types:
378 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
379 PTYPE) \
380 if (attribute_ == IDLTYPE) { \
381 token_ = kToken ## ENUM; \
382 return NoError(); \
383 }
384 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
385 #undef FLATBUFFERS_TD
386 // If it's a boolean constant keyword, turn those into integers,
387 // which simplifies our logic downstream.
388 if (attribute_ == "true" || attribute_ == "false") {
389 attribute_ = NumToString(attribute_ == "true");
390 token_ = kTokenIntegerConstant;
391 return NoError();
392 }
393 // Check for declaration keywords:
394 if (attribute_ == "table") {
395 token_ = kTokenTable;
396 return NoError();
397 }
398 if (attribute_ == "struct") {
399 token_ = kTokenStruct;
400 return NoError();
401 }
402 if (attribute_ == "enum") {
403 token_ = kTokenEnum;
404 return NoError();
405 }
406 if (attribute_ == "union") {
407 token_ = kTokenUnion;
408 return NoError();
409 }
410 if (attribute_ == "namespace") {
411 token_ = kTokenNameSpace;
412 return NoError();
413 }
414 if (attribute_ == "root_type") {
415 token_ = kTokenRootType;
416 return NoError();
417 }
418 if (attribute_ == "include") {
419 token_ = kTokenInclude;
420 return NoError();
421 }
422 if (attribute_ == "attribute") {
423 token_ = kTokenAttribute;
424 return NoError();
425 }
426 if (attribute_ == "file_identifier") {
427 token_ = kTokenFileIdentifier;
428 return NoError();
429 }
430 if (attribute_ == "file_extension") {
431 token_ = kTokenFileExtension;
432 return NoError();
433 }
434 if (attribute_ == "null") {
435 token_ = kTokenNull;
436 return NoError();
437 }
438 if (attribute_ == "rpc_service") {
439 token_ = kTokenService;
440 return NoError();
441 }
442 if (attribute_ == "native_include") {
443 token_ = kTokenNativeInclude;
444 return NoError();
445 }
446 // If not, it is a user-defined identifier:
447 token_ = kTokenIdentifier;
448 return NoError();
449 } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
450 const char *start = cursor_ - 1;
451 if (c == '-' && *cursor_ == '0' &&
452 (cursor_[1] == 'x' || cursor_[1] == 'X')) {
453 ++start;
454 ++cursor_;
455 attribute_.append(&c, &c + 1);
456 c = '0';
457 }
458 if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) {
459 cursor_++;
460 while (isxdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
461 attribute_.append(start + 2, cursor_);
462 attribute_ = NumToString(static_cast<int64_t>(
463 StringToUInt(attribute_.c_str(), nullptr, 16)));
464 token_ = kTokenIntegerConstant;
465 return NoError();
466 }
467 while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
468 if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') {
469 if (*cursor_ == '.') {
470 cursor_++;
471 while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
472 }
473 // See if this float has a scientific notation suffix. Both JSON
474 // and C++ (through strtod() we use) have the same format:
475 if (*cursor_ == 'e' || *cursor_ == 'E') {
476 cursor_++;
477 if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
478 while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
479 }
480 token_ = kTokenFloatConstant;
481 } else {
482 token_ = kTokenIntegerConstant;
483 }
484 attribute_.append(start, cursor_);
485 return NoError();
486 }
487 std::string ch;
488 ch = c;
489 if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
490 return Error("illegal character: " + ch);
491 }
492 }
493 }
494
495 // Check if a given token is next.
Is(int t)496 bool Parser::Is(int t) {
497 return t == token_;
498 }
499
500 // Expect a given token to be next, consume it, or error if not present.
Expect(int t)501 CheckedError Parser::Expect(int t) {
502 if (t != token_) {
503 return Error("expecting: " + TokenToString(t) + " instead got: " +
504 TokenToStringId(token_));
505 }
506 NEXT();
507 return NoError();
508 }
509
ParseNamespacing(std::string * id,std::string * last)510 CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
511 while (Is('.')) {
512 NEXT();
513 *id += ".";
514 *id += attribute_;
515 if (last) *last = attribute_;
516 EXPECT(kTokenIdentifier);
517 }
518 return NoError();
519 }
520
LookupEnum(const std::string & id)521 EnumDef *Parser::LookupEnum(const std::string &id) {
522 // Search thru parent namespaces.
523 for (int components = static_cast<int>(namespaces_.back()->components.size());
524 components >= 0; components--) {
525 auto ed = enums_.Lookup(
526 namespaces_.back()->GetFullyQualifiedName(id, components));
527 if (ed) return ed;
528 }
529 return nullptr;
530 }
531
ParseTypeIdent(Type & type)532 CheckedError Parser::ParseTypeIdent(Type &type) {
533 std::string id = attribute_;
534 EXPECT(kTokenIdentifier);
535 ECHECK(ParseNamespacing(&id, nullptr));
536 auto enum_def = LookupEnum(id);
537 if (enum_def) {
538 type = enum_def->underlying_type;
539 if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
540 } else {
541 type.base_type = BASE_TYPE_STRUCT;
542 type.struct_def = LookupCreateStruct(id);
543 }
544 return NoError();
545 }
546
547 // Parse any IDL type.
ParseType(Type & type)548 CheckedError Parser::ParseType(Type &type) {
549 if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
550 type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
551 NEXT();
552 } else {
553 if (token_ == kTokenIdentifier) {
554 ECHECK(ParseTypeIdent(type));
555 } else if (token_ == '[') {
556 NEXT();
557 Type subtype;
558 ECHECK(ParseType(subtype));
559 if (subtype.base_type == BASE_TYPE_VECTOR) {
560 // We could support this, but it will complicate things, and it's
561 // easier to work around with a struct around the inner vector.
562 return Error(
563 "nested vector types not supported (wrap in table first).");
564 }
565 type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
566 type.element = subtype.base_type;
567 EXPECT(']');
568 } else {
569 return Error("illegal type syntax");
570 }
571 }
572 return NoError();
573 }
574
AddField(StructDef & struct_def,const std::string & name,const Type & type,FieldDef ** dest)575 CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
576 const Type &type, FieldDef **dest) {
577 auto &field = *new FieldDef();
578 field.value.offset =
579 FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
580 field.name = name;
581 field.file = struct_def.file;
582 field.value.type = type;
583 if (struct_def.fixed) { // statically compute the field offset
584 auto size = InlineSize(type);
585 auto alignment = InlineAlignment(type);
586 // structs_ need to have a predictable format, so we need to align to
587 // the largest scalar
588 struct_def.minalign = std::max(struct_def.minalign, alignment);
589 struct_def.PadLastField(alignment);
590 field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
591 struct_def.bytesize += size;
592 }
593 if (struct_def.fields.Add(name, &field))
594 return Error("field already exists: " + name);
595 *dest = &field;
596 return NoError();
597 }
598
ParseField(StructDef & struct_def)599 CheckedError Parser::ParseField(StructDef &struct_def) {
600 std::string name = attribute_;
601 std::vector<std::string> dc = doc_comment_;
602 EXPECT(kTokenIdentifier);
603 EXPECT(':');
604 Type type;
605 ECHECK(ParseType(type));
606
607 if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
608 return Error("structs_ may contain only scalar or struct fields");
609
610 FieldDef *typefield = nullptr;
611 if (type.base_type == BASE_TYPE_UNION) {
612 // For union fields, add a second auto-generated field to hold the type,
613 // with a special suffix.
614 ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
615 type.enum_def->underlying_type, &typefield));
616 } else if (type.base_type == BASE_TYPE_VECTOR &&
617 type.element == BASE_TYPE_UNION) {
618 // Only cpp supports the union vector feature so far.
619 if (opts.lang_to_generate != IDLOptions::kCpp) {
620 return Error("Vectors of unions are not yet supported in all "
621 "the specified programming languages.");
622 }
623 // For vector of union fields, add a second auto-generated vector field to
624 // hold the types, with a special suffix.
625 Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
626 union_vector.element = BASE_TYPE_UTYPE;
627 ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
628 union_vector, &typefield));
629 }
630
631 FieldDef *field;
632 ECHECK(AddField(struct_def, name, type, &field));
633
634 if (token_ == '=') {
635 NEXT();
636 if (!IsScalar(type.base_type))
637 return Error("default values currently only supported for scalars");
638 ECHECK(ParseSingleValue(field->value));
639 }
640 if (IsFloat(field->value.type.base_type)) {
641 if (!strpbrk(field->value.constant.c_str(), ".eE"))
642 field->value.constant += ".0";
643 }
644
645 if (type.enum_def &&
646 IsScalar(type.base_type) &&
647 !struct_def.fixed &&
648 !type.enum_def->attributes.Lookup("bit_flags") &&
649 !type.enum_def->ReverseLookup(static_cast<int>(
650 StringToInt(field->value.constant.c_str()))))
651 return Error("enum " + type.enum_def->name +
652 " does not have a declaration for this field\'s default of " +
653 field->value.constant);
654
655 field->doc_comment = dc;
656 ECHECK(ParseMetaData(&field->attributes));
657 field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
658 auto hash_name = field->attributes.Lookup("hash");
659 if (hash_name) {
660 switch (type.base_type) {
661 case BASE_TYPE_INT:
662 case BASE_TYPE_UINT: {
663 if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
664 return Error("Unknown hashing algorithm for 32 bit types: " +
665 hash_name->constant);
666 break;
667 }
668 case BASE_TYPE_LONG:
669 case BASE_TYPE_ULONG: {
670 if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
671 return Error("Unknown hashing algorithm for 64 bit types: " +
672 hash_name->constant);
673 break;
674 }
675 default:
676 return Error(
677 "only int, uint, long and ulong data types support hashing.");
678 }
679 }
680 auto cpp_type = field->attributes.Lookup("cpp_type");
681 if (cpp_type) {
682 if (!hash_name)
683 return Error("cpp_type can only be used with a hashed field");
684 }
685 if (field->deprecated && struct_def.fixed)
686 return Error("can't deprecate fields in a struct");
687 field->required = field->attributes.Lookup("required") != nullptr;
688 if (field->required && (struct_def.fixed ||
689 IsScalar(field->value.type.base_type)))
690 return Error("only non-scalar fields in tables may be 'required'");
691 field->key = field->attributes.Lookup("key") != nullptr;
692 if (field->key) {
693 if (struct_def.has_key)
694 return Error("only one field may be set as 'key'");
695 struct_def.has_key = true;
696 if (!IsScalar(field->value.type.base_type)) {
697 field->required = true;
698 if (field->value.type.base_type != BASE_TYPE_STRING)
699 return Error("'key' field must be string or scalar type");
700 }
701 }
702
703 field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
704 if (field->native_inline && !IsStruct(field->value.type))
705 return Error("native_inline can only be defined on structs'");
706
707 auto nested = field->attributes.Lookup("nested_flatbuffer");
708 if (nested) {
709 if (nested->type.base_type != BASE_TYPE_STRING)
710 return Error(
711 "nested_flatbuffer attribute must be a string (the root type)");
712 if (field->value.type.base_type != BASE_TYPE_VECTOR ||
713 field->value.type.element != BASE_TYPE_UCHAR)
714 return Error(
715 "nested_flatbuffer attribute may only apply to a vector of ubyte");
716 // This will cause an error if the root type of the nested flatbuffer
717 // wasn't defined elsewhere.
718 LookupCreateStruct(nested->constant);
719 }
720
721 if (typefield) {
722 // If this field is a union, and it has a manually assigned id,
723 // the automatically added type field should have an id as well (of N - 1).
724 auto attr = field->attributes.Lookup("id");
725 if (attr) {
726 auto id = atoi(attr->constant.c_str());
727 auto val = new Value();
728 val->type = attr->type;
729 val->constant = NumToString(id - 1);
730 typefield->attributes.Add("id", val);
731 }
732 }
733
734 EXPECT(';');
735 return NoError();
736 }
737
ParseAnyValue(Value & val,FieldDef * field,size_t parent_fieldn,const StructDef * parent_struct_def)738 CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
739 size_t parent_fieldn,
740 const StructDef *parent_struct_def) {
741 switch (val.type.base_type) {
742 case BASE_TYPE_UNION: {
743 assert(field);
744 std::string constant;
745 // Find corresponding type field we may have already parsed.
746 for (auto elem = field_stack_.rbegin();
747 elem != field_stack_.rbegin() + parent_fieldn; ++elem) {
748 auto &type = elem->second->value.type;
749 if (type.base_type == BASE_TYPE_UTYPE &&
750 type.enum_def == val.type.enum_def) {
751 constant = elem->first.constant;
752 break;
753 }
754 }
755 if (constant.empty()) {
756 // We haven't seen the type field yet. Sadly a lot of JSON writers
757 // output these in alphabetical order, meaning it comes after this
758 // value. So we scan past the value to find it, then come back here.
759 auto type_name = field->name + UnionTypeFieldSuffix();
760 assert(parent_struct_def);
761 auto type_field = parent_struct_def->fields.Lookup(type_name);
762 assert(type_field); // Guaranteed by ParseField().
763 // Remember where we are in the source file, so we can come back here.
764 auto backup = *static_cast<ParserState *>(this);
765 ECHECK(SkipAnyJsonValue()); // The table.
766 EXPECT(',');
767 auto next_name = attribute_;
768 if (Is(kTokenStringConstant)) {
769 NEXT();
770 } else {
771 EXPECT(kTokenIdentifier);
772 }
773 if (next_name != type_name)
774 return Error("missing type field after this union value: " +
775 type_name);
776 EXPECT(':');
777 Value type_val = type_field->value;
778 ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr));
779 constant = type_val.constant;
780 // Got the information we needed, now rewind:
781 *static_cast<ParserState *>(this) = backup;
782 }
783 uint8_t enum_idx;
784 ECHECK(atot(constant.c_str(), *this, &enum_idx));
785 auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
786 if (!enum_val) return Error("illegal type id for: " + field->name);
787 ECHECK(ParseTable(*enum_val->struct_def, &val.constant, nullptr));
788 break;
789 }
790 case BASE_TYPE_STRUCT:
791 ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
792 break;
793 case BASE_TYPE_STRING: {
794 auto s = attribute_;
795 EXPECT(kTokenStringConstant);
796 val.constant = NumToString(builder_.CreateString(s).o);
797 break;
798 }
799 case BASE_TYPE_VECTOR: {
800 EXPECT('[');
801 uoffset_t off;
802 ECHECK(ParseVector(val.type.VectorType(), &off));
803 val.constant = NumToString(off);
804 break;
805 }
806 case BASE_TYPE_INT:
807 case BASE_TYPE_UINT:
808 case BASE_TYPE_LONG:
809 case BASE_TYPE_ULONG: {
810 if (field && field->attributes.Lookup("hash") &&
811 (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
812 ECHECK(ParseHash(val, field));
813 } else {
814 ECHECK(ParseSingleValue(val));
815 }
816 break;
817 }
818 default:
819 ECHECK(ParseSingleValue(val));
820 break;
821 }
822 return NoError();
823 }
824
SerializeStruct(const StructDef & struct_def,const Value & val)825 void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
826 assert(val.constant.length() == struct_def.bytesize);
827 builder_.Align(struct_def.minalign);
828 builder_.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
829 struct_def.bytesize);
830 builder_.AddStructOffset(val.offset, builder_.GetSize());
831 }
832
ParseTable(const StructDef & struct_def,std::string * value,uoffset_t * ovalue)833 CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
834 uoffset_t *ovalue) {
835 EXPECT('{');
836 size_t fieldn = 0;
837 for (;;) {
838 if ((!opts.strict_json || !fieldn) && Is('}')) { NEXT(); break; }
839 std::string name = attribute_;
840 if (Is(kTokenStringConstant)) {
841 NEXT();
842 } else {
843 EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
844 }
845 auto field = struct_def.fields.Lookup(name);
846 if (!field) {
847 if (!opts.skip_unexpected_fields_in_json) {
848 return Error("unknown field: " + name);
849 } else {
850 EXPECT(':');
851 ECHECK(SkipAnyJsonValue());
852 }
853 } else {
854 EXPECT(':');
855 if (Is(kTokenNull)) {
856 NEXT(); // Ignore this field.
857 } else {
858 Value val = field->value;
859 ECHECK(ParseAnyValue(val, field, fieldn, &struct_def));
860 // Hardcoded insertion-sort with error-check.
861 // If fields are specified in order, then this loop exits immediately.
862 auto elem = field_stack_.rbegin();
863 for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
864 auto existing_field = elem->second;
865 if (existing_field == field)
866 return Error("field set more than once: " + field->name);
867 if (existing_field->value.offset < field->value.offset) break;
868 }
869 // Note: elem points to before the insertion point, thus .base() points
870 // to the correct spot.
871 field_stack_.insert(elem.base(), std::make_pair(val, field));
872 fieldn++;
873 }
874 }
875 if (Is('}')) { NEXT(); break; }
876 EXPECT(',');
877 }
878
879 if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
880 return Error("struct: wrong number of initializers: " + struct_def.name);
881
882 auto start = struct_def.fixed
883 ? builder_.StartStruct(struct_def.minalign)
884 : builder_.StartTable();
885
886 for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
887 size;
888 size /= 2) {
889 // Go through elements in reverse, since we're building the data backwards.
890 for (auto it = field_stack_.rbegin();
891 it != field_stack_.rbegin() + fieldn; ++it) {
892 auto &field_value = it->first;
893 auto field = it->second;
894 if (!struct_def.sortbysize ||
895 size == SizeOf(field_value.type.base_type)) {
896 switch (field_value.type.base_type) {
897 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
898 PTYPE) \
899 case BASE_TYPE_ ## ENUM: \
900 builder_.Pad(field->padding); \
901 if (struct_def.fixed) { \
902 CTYPE val; \
903 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
904 builder_.PushElement(val); \
905 } else { \
906 CTYPE val, valdef; \
907 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
908 ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
909 builder_.AddElement(field_value.offset, val, valdef); \
910 } \
911 break;
912 FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
913 #undef FLATBUFFERS_TD
914 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
915 PTYPE) \
916 case BASE_TYPE_ ## ENUM: \
917 builder_.Pad(field->padding); \
918 if (IsStruct(field->value.type)) { \
919 SerializeStruct(*field->value.type.struct_def, field_value); \
920 } else { \
921 CTYPE val; \
922 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
923 builder_.AddOffset(field_value.offset, val); \
924 } \
925 break;
926 FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
927 #undef FLATBUFFERS_TD
928 }
929 }
930 }
931 }
932 for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
933
934 if (struct_def.fixed) {
935 builder_.ClearOffsets();
936 builder_.EndStruct();
937 assert(value);
938 // Temporarily store this struct in the value string, since it is to
939 // be serialized in-place elsewhere.
940 value->assign(
941 reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
942 struct_def.bytesize);
943 builder_.PopBytes(struct_def.bytesize);
944 assert(!ovalue);
945 } else {
946 auto val = builder_.EndTable(start,
947 static_cast<voffset_t>(struct_def.fields.vec.size()));
948 if (ovalue) *ovalue = val;
949 if (value) *value = NumToString(val);
950 }
951 return NoError();
952 }
953
ParseVector(const Type & type,uoffset_t * ovalue)954 CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue) {
955 int count = 0;
956 for (;;) {
957 if ((!opts.strict_json || !count) && Is(']')) { NEXT(); break; }
958 Value val;
959 val.type = type;
960 ECHECK(ParseAnyValue(val, nullptr, 0, nullptr));
961 field_stack_.push_back(std::make_pair(val, nullptr));
962 count++;
963 if (Is(']')) { NEXT(); break; }
964 EXPECT(',');
965 }
966
967 builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
968 InlineAlignment(type));
969 for (int i = 0; i < count; i++) {
970 // start at the back, since we're building the data backwards.
971 auto &val = field_stack_.back().first;
972 switch (val.type.base_type) {
973 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
974 case BASE_TYPE_ ## ENUM: \
975 if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
976 else { \
977 CTYPE elem; \
978 ECHECK(atot(val.constant.c_str(), *this, &elem)); \
979 builder_.PushElement(elem); \
980 } \
981 break;
982 FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
983 #undef FLATBUFFERS_TD
984 }
985 field_stack_.pop_back();
986 }
987
988 builder_.ClearOffsets();
989 *ovalue = builder_.EndVector(count);
990 return NoError();
991 }
992
ParseMetaData(SymbolTable<Value> * attributes)993 CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
994 if (Is('(')) {
995 NEXT();
996 for (;;) {
997 auto name = attribute_;
998 EXPECT(kTokenIdentifier);
999 if (known_attributes_.find(name) == known_attributes_.end())
1000 return Error("user define attributes must be declared before use: " +
1001 name);
1002 auto e = new Value();
1003 attributes->Add(name, e);
1004 if (Is(':')) {
1005 NEXT();
1006 ECHECK(ParseSingleValue(*e));
1007 }
1008 if (Is(')')) { NEXT(); break; }
1009 EXPECT(',');
1010 }
1011 }
1012 return NoError();
1013 }
1014
TryTypedValue(int dtoken,bool check,Value & e,BaseType req,bool * destmatch)1015 CheckedError Parser::TryTypedValue(int dtoken, bool check, Value &e,
1016 BaseType req, bool *destmatch) {
1017 bool match = dtoken == token_;
1018 if (match) {
1019 *destmatch = true;
1020 e.constant = attribute_;
1021 if (!check) {
1022 if (e.type.base_type == BASE_TYPE_NONE) {
1023 e.type.base_type = req;
1024 } else {
1025 return Error(std::string("type mismatch: expecting: ") +
1026 kTypeNames[e.type.base_type] +
1027 ", found: " +
1028 kTypeNames[req]);
1029 }
1030 }
1031 NEXT();
1032 }
1033 return NoError();
1034 }
1035
ParseEnumFromString(Type & type,int64_t * result)1036 CheckedError Parser::ParseEnumFromString(Type &type, int64_t *result) {
1037 *result = 0;
1038 // Parse one or more enum identifiers, separated by spaces.
1039 const char *next = attribute_.c_str();
1040 do {
1041 const char *divider = strchr(next, ' ');
1042 std::string word;
1043 if (divider) {
1044 word = std::string(next, divider);
1045 next = divider + strspn(divider, " ");
1046 } else {
1047 word = next;
1048 next += word.length();
1049 }
1050 if (type.enum_def) { // The field has an enum type
1051 auto enum_val = type.enum_def->vals.Lookup(word);
1052 if (!enum_val)
1053 return Error("unknown enum value: " + word +
1054 ", for enum: " + type.enum_def->name);
1055 *result |= enum_val->value;
1056 } else { // No enum type, probably integral field.
1057 if (!IsInteger(type.base_type))
1058 return Error("not a valid value for this field: " + word);
1059 // TODO: could check if its a valid number constant here.
1060 const char *dot = strrchr(word.c_str(), '.');
1061 if (!dot)
1062 return Error("enum values need to be qualified by an enum type");
1063 std::string enum_def_str(word.c_str(), dot);
1064 std::string enum_val_str(dot + 1, word.c_str() + word.length());
1065 auto enum_def = LookupEnum(enum_def_str);
1066 if (!enum_def) return Error("unknown enum: " + enum_def_str);
1067 auto enum_val = enum_def->vals.Lookup(enum_val_str);
1068 if (!enum_val) return Error("unknown enum value: " + enum_val_str);
1069 *result |= enum_val->value;
1070 }
1071 } while(*next);
1072 return NoError();
1073 }
1074
1075
ParseHash(Value & e,FieldDef * field)1076 CheckedError Parser::ParseHash(Value &e, FieldDef* field) {
1077 assert(field);
1078 Value *hash_name = field->attributes.Lookup("hash");
1079 switch (e.type.base_type) {
1080 case BASE_TYPE_INT:
1081 case BASE_TYPE_UINT: {
1082 auto hash = FindHashFunction32(hash_name->constant.c_str());
1083 uint32_t hashed_value = hash(attribute_.c_str());
1084 e.constant = NumToString(hashed_value);
1085 break;
1086 }
1087 case BASE_TYPE_LONG:
1088 case BASE_TYPE_ULONG: {
1089 auto hash = FindHashFunction64(hash_name->constant.c_str());
1090 uint64_t hashed_value = hash(attribute_.c_str());
1091 e.constant = NumToString(hashed_value);
1092 break;
1093 }
1094 default:
1095 assert(0);
1096 }
1097 NEXT();
1098 return NoError();
1099 }
1100
ParseSingleValue(Value & e)1101 CheckedError Parser::ParseSingleValue(Value &e) {
1102 // First see if this could be a conversion function:
1103 if (token_ == kTokenIdentifier && *cursor_ == '(') {
1104 auto functionname = attribute_;
1105 NEXT();
1106 EXPECT('(');
1107 ECHECK(ParseSingleValue(e));
1108 EXPECT(')');
1109 #define FLATBUFFERS_FN_DOUBLE(name, op) \
1110 if (functionname == name) { \
1111 auto x = strtod(e.constant.c_str(), nullptr); \
1112 e.constant = NumToString(op); \
1113 }
1114 FLATBUFFERS_FN_DOUBLE("deg", x / M_PI * 180);
1115 FLATBUFFERS_FN_DOUBLE("rad", x * M_PI / 180);
1116 FLATBUFFERS_FN_DOUBLE("sin", sin(x));
1117 FLATBUFFERS_FN_DOUBLE("cos", cos(x));
1118 FLATBUFFERS_FN_DOUBLE("tan", tan(x));
1119 FLATBUFFERS_FN_DOUBLE("asin", asin(x));
1120 FLATBUFFERS_FN_DOUBLE("acos", acos(x));
1121 FLATBUFFERS_FN_DOUBLE("atan", atan(x));
1122 // TODO(wvo): add more useful conversion functions here.
1123 #undef FLATBUFFERS_FN_DOUBLE
1124 // Then check if this could be a string/identifier enum value:
1125 } else if (e.type.base_type != BASE_TYPE_STRING &&
1126 e.type.base_type != BASE_TYPE_NONE &&
1127 (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
1128 if (IsIdentifierStart(attribute_[0])) { // Enum value.
1129 int64_t val;
1130 ECHECK(ParseEnumFromString(e.type, &val));
1131 e.constant = NumToString(val);
1132 NEXT();
1133 } else { // Numeric constant in string.
1134 if (IsInteger(e.type.base_type)) {
1135 char *end;
1136 e.constant = NumToString(StringToInt(attribute_.c_str(), &end));
1137 if (*end)
1138 return Error("invalid integer: " + attribute_);
1139 } else if (IsFloat(e.type.base_type)) {
1140 char *end;
1141 e.constant = NumToString(strtod(attribute_.c_str(), &end));
1142 if (*end)
1143 return Error("invalid float: " + attribute_);
1144 } else {
1145 assert(0); // Shouldn't happen, we covered all types.
1146 e.constant = "0";
1147 }
1148 NEXT();
1149 }
1150 } else {
1151 bool match = false;
1152 ECHECK(TryTypedValue(kTokenIntegerConstant,
1153 IsScalar(e.type.base_type),
1154 e,
1155 BASE_TYPE_INT,
1156 &match));
1157 ECHECK(TryTypedValue(kTokenFloatConstant,
1158 IsFloat(e.type.base_type),
1159 e,
1160 BASE_TYPE_FLOAT,
1161 &match));
1162 ECHECK(TryTypedValue(kTokenStringConstant,
1163 e.type.base_type == BASE_TYPE_STRING,
1164 e,
1165 BASE_TYPE_STRING,
1166 &match));
1167 if (!match)
1168 return Error("cannot parse value starting with: " +
1169 TokenToStringId(token_));
1170 }
1171 return NoError();
1172 }
1173
LookupCreateStruct(const std::string & name,bool create_if_new,bool definition)1174 StructDef *Parser::LookupCreateStruct(const std::string &name,
1175 bool create_if_new, bool definition) {
1176 std::string qualified_name = namespaces_.back()->GetFullyQualifiedName(name);
1177 // See if it exists pre-declared by an unqualified use.
1178 auto struct_def = structs_.Lookup(name);
1179 if (struct_def && struct_def->predecl) {
1180 if (definition) {
1181 // Make sure it has the current namespace, and is registered under its
1182 // qualified name.
1183 struct_def->defined_namespace = namespaces_.back();
1184 structs_.Move(name, qualified_name);
1185 }
1186 return struct_def;
1187 }
1188 // See if it exists pre-declared by an qualified use.
1189 struct_def = structs_.Lookup(qualified_name);
1190 if (struct_def && struct_def->predecl) {
1191 if (definition) {
1192 // Make sure it has the current namespace.
1193 struct_def->defined_namespace = namespaces_.back();
1194 }
1195 return struct_def;
1196 }
1197 if (!definition) {
1198 // Search thru parent namespaces.
1199 for (size_t components = namespaces_.back()->components.size();
1200 components && !struct_def; components--) {
1201 struct_def = structs_.Lookup(
1202 namespaces_.back()->GetFullyQualifiedName(name, components - 1));
1203 }
1204 }
1205 if (!struct_def && create_if_new) {
1206 struct_def = new StructDef();
1207 if (definition) {
1208 structs_.Add(qualified_name, struct_def);
1209 struct_def->name = name;
1210 struct_def->defined_namespace = namespaces_.back();
1211 } else {
1212 // Not a definition.
1213 // Rather than failing, we create a "pre declared" StructDef, due to
1214 // circular references, and check for errors at the end of parsing.
1215 // It is defined in the root namespace, since we don't know what the
1216 // final namespace will be.
1217 // TODO: maybe safer to use special namespace?
1218 structs_.Add(name, struct_def);
1219 struct_def->name = name;
1220 struct_def->defined_namespace = new Namespace();
1221 namespaces_.insert(namespaces_.begin(), struct_def->defined_namespace);
1222 }
1223 }
1224 return struct_def;
1225 }
1226
ParseEnum(bool is_union,EnumDef ** dest)1227 CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
1228 std::vector<std::string> enum_comment = doc_comment_;
1229 NEXT();
1230 std::string enum_name = attribute_;
1231 EXPECT(kTokenIdentifier);
1232 auto &enum_def = *new EnumDef();
1233 enum_def.name = enum_name;
1234 enum_def.file = file_being_parsed_;
1235 enum_def.doc_comment = enum_comment;
1236 enum_def.is_union = is_union;
1237 enum_def.defined_namespace = namespaces_.back();
1238 if (enums_.Add(namespaces_.back()->GetFullyQualifiedName(enum_name),
1239 &enum_def))
1240 return Error("enum already exists: " + enum_name);
1241 if (is_union) {
1242 enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
1243 enum_def.underlying_type.enum_def = &enum_def;
1244 } else {
1245 if (opts.proto_mode) {
1246 enum_def.underlying_type.base_type = BASE_TYPE_INT;
1247 } else {
1248 // Give specialized error message, since this type spec used to
1249 // be optional in the first FlatBuffers release.
1250 if (!Is(':')) {
1251 return Error("must specify the underlying integer type for this"
1252 " enum (e.g. \': short\', which was the default).");
1253 } else {
1254 NEXT();
1255 }
1256 // Specify the integer type underlying this enum.
1257 ECHECK(ParseType(enum_def.underlying_type));
1258 if (!IsInteger(enum_def.underlying_type.base_type))
1259 return Error("underlying enum type must be integral");
1260 }
1261 // Make this type refer back to the enum it was derived from.
1262 enum_def.underlying_type.enum_def = &enum_def;
1263 }
1264 ECHECK(ParseMetaData(&enum_def.attributes));
1265 EXPECT('{');
1266 if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
1267 for (;;) {
1268 if (opts.proto_mode && attribute_ == "option") {
1269 ECHECK(ParseProtoOption());
1270 } else {
1271 auto value_name = attribute_;
1272 auto full_name = value_name;
1273 std::vector<std::string> value_comment = doc_comment_;
1274 EXPECT(kTokenIdentifier);
1275 if (is_union) {
1276 ECHECK(ParseNamespacing(&full_name, &value_name));
1277 if (opts.union_value_namespacing) {
1278 // Since we can't namespace the actual enum identifiers, turn
1279 // namespace parts into part of the identifier.
1280 value_name = full_name;
1281 std::replace(value_name.begin(), value_name.end(), '.', '_');
1282 }
1283 }
1284 auto prevsize = enum_def.vals.vec.size();
1285 auto value = enum_def.vals.vec.size()
1286 ? enum_def.vals.vec.back()->value + 1
1287 : 0;
1288 auto &ev = *new EnumVal(value_name, value);
1289 if (enum_def.vals.Add(value_name, &ev))
1290 return Error("enum value already exists: " + value_name);
1291 ev.doc_comment = value_comment;
1292 if (is_union) {
1293 ev.struct_def = LookupCreateStruct(full_name);
1294 }
1295 if (Is('=')) {
1296 NEXT();
1297 ev.value = StringToInt(attribute_.c_str());
1298 EXPECT(kTokenIntegerConstant);
1299 if (!opts.proto_mode && prevsize &&
1300 enum_def.vals.vec[prevsize - 1]->value >= ev.value)
1301 return Error("enum values must be specified in ascending order");
1302 }
1303 if (opts.proto_mode && Is('[')) {
1304 NEXT();
1305 // ignore attributes on enums.
1306 while (token_ != ']') NEXT();
1307 NEXT();
1308 }
1309 }
1310 if (!Is(opts.proto_mode ? ';' : ',')) break;
1311 NEXT();
1312 if (Is('}')) break;
1313 }
1314 EXPECT('}');
1315 if (enum_def.attributes.Lookup("bit_flags")) {
1316 for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
1317 ++it) {
1318 if (static_cast<size_t>((*it)->value) >=
1319 SizeOf(enum_def.underlying_type.base_type) * 8)
1320 return Error("bit flag out of range of underlying integral type");
1321 (*it)->value = 1LL << (*it)->value;
1322 }
1323 }
1324 if (dest) *dest = &enum_def;
1325 types_.Add(namespaces_.back()->GetFullyQualifiedName(enum_def.name),
1326 new Type(BASE_TYPE_UNION, nullptr, &enum_def));
1327 return NoError();
1328 }
1329
StartStruct(const std::string & name,StructDef ** dest)1330 CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
1331 auto &struct_def = *LookupCreateStruct(name, true, true);
1332 if (!struct_def.predecl) return Error("datatype already exists: " + name);
1333 struct_def.predecl = false;
1334 struct_def.name = name;
1335 struct_def.file = file_being_parsed_;
1336 // Move this struct to the back of the vector just in case it was predeclared,
1337 // to preserve declaration order.
1338 *remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) = &struct_def;
1339 *dest = &struct_def;
1340 return NoError();
1341 }
1342
CheckClash(std::vector<FieldDef * > & fields,StructDef * struct_def,const char * suffix,BaseType basetype)1343 CheckedError Parser::CheckClash(std::vector<FieldDef*> &fields,
1344 StructDef *struct_def,
1345 const char *suffix,
1346 BaseType basetype) {
1347 auto len = strlen(suffix);
1348 for (auto it = fields.begin(); it != fields.end(); ++it) {
1349 auto &fname = (*it)->name;
1350 if (fname.length() > len &&
1351 fname.compare(fname.length() - len, len, suffix) == 0 &&
1352 (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
1353 auto field = struct_def->fields.Lookup(
1354 fname.substr(0, fname.length() - len));
1355 if (field && field->value.type.base_type == basetype)
1356 return Error("Field " + fname +
1357 " would clash with generated functions for field " +
1358 field->name);
1359 }
1360 }
1361 return NoError();
1362 }
1363
compareFieldDefs(const FieldDef * a,const FieldDef * b)1364 static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
1365 auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
1366 auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
1367 return a_id < b_id;
1368 }
1369
ParseDecl()1370 CheckedError Parser::ParseDecl() {
1371 std::vector<std::string> dc = doc_comment_;
1372 bool fixed = Is(kTokenStruct);
1373 if (fixed) NEXT() else EXPECT(kTokenTable);
1374 std::string name = attribute_;
1375 EXPECT(kTokenIdentifier);
1376 StructDef *struct_def;
1377 ECHECK(StartStruct(name, &struct_def));
1378 struct_def->doc_comment = dc;
1379 struct_def->fixed = fixed;
1380 ECHECK(ParseMetaData(&struct_def->attributes));
1381 struct_def->sortbysize =
1382 struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
1383 EXPECT('{');
1384 while (token_ != '}') ECHECK(ParseField(*struct_def));
1385 auto force_align = struct_def->attributes.Lookup("force_align");
1386 if (fixed && force_align) {
1387 auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
1388 if (force_align->type.base_type != BASE_TYPE_INT ||
1389 align < struct_def->minalign ||
1390 align > FLATBUFFERS_MAX_ALIGNMENT ||
1391 align & (align - 1))
1392 return Error("force_align must be a power of two integer ranging from the"
1393 "struct\'s natural alignment to " +
1394 NumToString(FLATBUFFERS_MAX_ALIGNMENT));
1395 struct_def->minalign = align;
1396 }
1397 struct_def->PadLastField(struct_def->minalign);
1398 // Check if this is a table that has manual id assignments
1399 auto &fields = struct_def->fields.vec;
1400 if (!struct_def->fixed && fields.size()) {
1401 size_t num_id_fields = 0;
1402 for (auto it = fields.begin(); it != fields.end(); ++it) {
1403 if ((*it)->attributes.Lookup("id")) num_id_fields++;
1404 }
1405 // If any fields have ids..
1406 if (num_id_fields) {
1407 // Then all fields must have them.
1408 if (num_id_fields != fields.size())
1409 return Error(
1410 "either all fields or no fields must have an 'id' attribute");
1411 // Simply sort by id, then the fields are the same as if no ids had
1412 // been specified.
1413 std::sort(fields.begin(), fields.end(), compareFieldDefs);
1414 // Verify we have a contiguous set, and reassign vtable offsets.
1415 for (int i = 0; i < static_cast<int>(fields.size()); i++) {
1416 if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
1417 return Error("field id\'s must be consecutive from 0, id " +
1418 NumToString(i) + " missing or set twice");
1419 fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
1420 }
1421 }
1422 }
1423
1424 ECHECK(CheckClash(fields, struct_def, UnionTypeFieldSuffix(),
1425 BASE_TYPE_UNION));
1426 ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
1427 ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
1428 ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
1429 ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
1430 ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
1431 EXPECT('}');
1432 types_.Add(namespaces_.back()->GetFullyQualifiedName(struct_def->name),
1433 new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
1434 return NoError();
1435 }
1436
ParseService()1437 CheckedError Parser::ParseService() {
1438 std::vector<std::string> service_comment = doc_comment_;
1439 NEXT();
1440 auto service_name = attribute_;
1441 EXPECT(kTokenIdentifier);
1442 auto &service_def = *new ServiceDef();
1443 service_def.name = service_name;
1444 service_def.file = file_being_parsed_;
1445 service_def.doc_comment = service_comment;
1446 service_def.defined_namespace = namespaces_.back();
1447 if (services_.Add(namespaces_.back()->GetFullyQualifiedName(service_name),
1448 &service_def))
1449 return Error("service already exists: " + service_name);
1450 ECHECK(ParseMetaData(&service_def.attributes));
1451 EXPECT('{');
1452 do {
1453 auto rpc_name = attribute_;
1454 EXPECT(kTokenIdentifier);
1455 EXPECT('(');
1456 Type reqtype, resptype;
1457 ECHECK(ParseTypeIdent(reqtype));
1458 EXPECT(')');
1459 EXPECT(':');
1460 ECHECK(ParseTypeIdent(resptype));
1461 if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
1462 resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
1463 return Error("rpc request and response types must be tables");
1464 auto &rpc = *new RPCCall();
1465 rpc.name = rpc_name;
1466 rpc.request = reqtype.struct_def;
1467 rpc.response = resptype.struct_def;
1468 if (service_def.calls.Add(rpc_name, &rpc))
1469 return Error("rpc already exists: " + rpc_name);
1470 ECHECK(ParseMetaData(&rpc.attributes));
1471 EXPECT(';');
1472 } while (token_ != '}');
1473 NEXT();
1474 return NoError();
1475 }
1476
SetRootType(const char * name)1477 bool Parser::SetRootType(const char *name) {
1478 root_struct_def_ = structs_.Lookup(name);
1479 if (!root_struct_def_)
1480 root_struct_def_ = structs_.Lookup(
1481 namespaces_.back()->GetFullyQualifiedName(name));
1482 return root_struct_def_ != nullptr;
1483 }
1484
MarkGenerated()1485 void Parser::MarkGenerated() {
1486 // This function marks all existing definitions as having already
1487 // been generated, which signals no code for included files should be
1488 // generated.
1489 for (auto it = enums_.vec.begin();
1490 it != enums_.vec.end(); ++it) {
1491 (*it)->generated = true;
1492 }
1493 for (auto it = structs_.vec.begin();
1494 it != structs_.vec.end(); ++it) {
1495 (*it)->generated = true;
1496 }
1497 for (auto it = services_.vec.begin();
1498 it != services_.vec.end(); ++it) {
1499 (*it)->generated = true;
1500 }
1501 }
1502
ParseNamespace()1503 CheckedError Parser::ParseNamespace() {
1504 NEXT();
1505 auto ns = new Namespace();
1506 namespaces_.push_back(ns);
1507 if (token_ != ';') {
1508 for (;;) {
1509 ns->components.push_back(attribute_);
1510 EXPECT(kTokenIdentifier);
1511 if (Is('.')) NEXT() else break;
1512 }
1513 }
1514 EXPECT(';');
1515 return NoError();
1516 }
1517
compareEnumVals(const EnumVal * a,const EnumVal * b)1518 static bool compareEnumVals(const EnumVal *a, const EnumVal* b) {
1519 return a->value < b->value;
1520 }
1521
1522 // Best effort parsing of .proto declarations, with the aim to turn them
1523 // in the closest corresponding FlatBuffer equivalent.
1524 // We parse everything as identifiers instead of keywords, since we don't
1525 // want protobuf keywords to become invalid identifiers in FlatBuffers.
ParseProtoDecl()1526 CheckedError Parser::ParseProtoDecl() {
1527 bool isextend = attribute_ == "extend";
1528 if (attribute_ == "package") {
1529 // These are identical in syntax to FlatBuffer's namespace decl.
1530 ECHECK(ParseNamespace());
1531 } else if (attribute_ == "message" || isextend) {
1532 std::vector<std::string> struct_comment = doc_comment_;
1533 NEXT();
1534 StructDef *struct_def = nullptr;
1535 if (isextend) {
1536 if (Is('.')) NEXT(); // qualified names may start with a . ?
1537 auto id = attribute_;
1538 EXPECT(kTokenIdentifier);
1539 ECHECK(ParseNamespacing(&id, nullptr));
1540 struct_def = LookupCreateStruct(id, false);
1541 if (!struct_def)
1542 return Error("cannot extend unknown message type: " + id);
1543 } else {
1544 std::string name = attribute_;
1545 EXPECT(kTokenIdentifier);
1546 ECHECK(StartStruct(name, &struct_def));
1547 // Since message definitions can be nested, we create a new namespace.
1548 auto ns = new Namespace();
1549 // Copy of current namespace.
1550 *ns = *namespaces_.back();
1551 // But with current message name.
1552 ns->components.push_back(name);
1553 namespaces_.push_back(ns);
1554 }
1555 struct_def->doc_comment = struct_comment;
1556 ECHECK(ParseProtoFields(struct_def, isextend, false));
1557 if (!isextend) {
1558 // We have to remove the nested namespace, but we can't just throw it
1559 // away, so put it at the beginning of the vector.
1560 auto ns = namespaces_.back();
1561 namespaces_.pop_back();
1562 namespaces_.insert(namespaces_.begin(), ns);
1563 }
1564 if (Is(';')) NEXT();
1565 } else if (attribute_ == "enum") {
1566 // These are almost the same, just with different terminator:
1567 EnumDef *enum_def;
1568 ECHECK(ParseEnum(false, &enum_def));
1569 if (Is(';')) NEXT();
1570 // Protobuf allows them to be specified in any order, so sort afterwards.
1571 auto &v = enum_def->vals.vec;
1572 std::sort(v.begin(), v.end(), compareEnumVals);
1573
1574 // Temp: remove any duplicates, as .fbs files can't handle them.
1575 for (auto it = v.begin(); it != v.end(); ) {
1576 if (it != v.begin() && it[0]->value == it[-1]->value) it = v.erase(it);
1577 else ++it;
1578 }
1579 } else if (attribute_ == "syntax") { // Skip these.
1580 NEXT();
1581 EXPECT('=');
1582 EXPECT(kTokenStringConstant);
1583 EXPECT(';');
1584 } else if (attribute_ == "option") { // Skip these.
1585 ECHECK(ParseProtoOption());
1586 EXPECT(';');
1587 } else if (attribute_ == "service") { // Skip these.
1588 NEXT();
1589 EXPECT(kTokenIdentifier);
1590 ECHECK(ParseProtoCurliesOrIdent());
1591 } else {
1592 return Error("don\'t know how to parse .proto declaration starting with " +
1593 TokenToStringId(token_));
1594 }
1595 return NoError();
1596 }
1597
ParseProtoFields(StructDef * struct_def,bool isextend,bool inside_oneof)1598 CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
1599 bool inside_oneof) {
1600 EXPECT('{');
1601 while (token_ != '}') {
1602 if (attribute_ == "message" || attribute_ == "extend" ||
1603 attribute_ == "enum") {
1604 // Nested declarations.
1605 ECHECK(ParseProtoDecl());
1606 } else if (attribute_ == "extensions") { // Skip these.
1607 NEXT();
1608 EXPECT(kTokenIntegerConstant);
1609 if (Is(kTokenIdentifier)) {
1610 NEXT(); // to
1611 NEXT(); // num
1612 }
1613 EXPECT(';');
1614 } else if (attribute_ == "option") { // Skip these.
1615 ECHECK(ParseProtoOption());
1616 EXPECT(';');
1617 } else if (attribute_ == "reserved") { // Skip these.
1618 NEXT();
1619 EXPECT(kTokenIntegerConstant);
1620 while (Is(',')) { NEXT(); EXPECT(kTokenIntegerConstant); }
1621 EXPECT(';');
1622 } else {
1623 std::vector<std::string> field_comment = doc_comment_;
1624 // Parse the qualifier.
1625 bool required = false;
1626 bool repeated = false;
1627 bool oneof = false;
1628 if (!inside_oneof) {
1629 if (attribute_ == "optional") {
1630 // This is the default.
1631 EXPECT(kTokenIdentifier);
1632 } else if (attribute_ == "required") {
1633 required = true;
1634 EXPECT(kTokenIdentifier);
1635 } else if (attribute_ == "repeated") {
1636 repeated = true;
1637 EXPECT(kTokenIdentifier);
1638 } else if (attribute_ == "oneof") {
1639 oneof = true;
1640 EXPECT(kTokenIdentifier);
1641 } else {
1642 // can't error, proto3 allows decls without any of the above.
1643 }
1644 }
1645 StructDef *anonymous_struct = nullptr;
1646 Type type;
1647 if (attribute_ == "group" || oneof) {
1648 if (!oneof) EXPECT(kTokenIdentifier);
1649 auto name = "Anonymous" + NumToString(anonymous_counter++);
1650 ECHECK(StartStruct(name, &anonymous_struct));
1651 type = Type(BASE_TYPE_STRUCT, anonymous_struct);
1652 } else {
1653 ECHECK(ParseTypeFromProtoType(&type));
1654 }
1655 // Repeated elements get mapped to a vector.
1656 if (repeated) {
1657 type.element = type.base_type;
1658 type.base_type = BASE_TYPE_VECTOR;
1659 }
1660 std::string name = attribute_;
1661 // Protos may use our keywords "attribute" & "namespace" as an identifier.
1662 if (Is(kTokenAttribute) || Is(kTokenNameSpace)) {
1663 NEXT();
1664 // TODO: simpler to just not make these keywords?
1665 name += "_"; // Have to make it not a keyword.
1666 } else {
1667 EXPECT(kTokenIdentifier);
1668 }
1669 if (!oneof) {
1670 // Parse the field id. Since we're just translating schemas, not
1671 // any kind of binary compatibility, we can safely ignore these, and
1672 // assign our own.
1673 EXPECT('=');
1674 EXPECT(kTokenIntegerConstant);
1675 }
1676 FieldDef *field = nullptr;
1677 if (isextend) {
1678 // We allow a field to be re-defined when extending.
1679 // TODO: are there situations where that is problematic?
1680 field = struct_def->fields.Lookup(name);
1681 }
1682 if (!field) ECHECK(AddField(*struct_def, name, type, &field));
1683 field->doc_comment = field_comment;
1684 if (!IsScalar(type.base_type)) field->required = required;
1685 // See if there's a default specified.
1686 if (Is('[')) {
1687 NEXT();
1688 for (;;) {
1689 auto key = attribute_;
1690 ECHECK(ParseProtoKey());
1691 EXPECT('=');
1692 auto val = attribute_;
1693 ECHECK(ParseProtoCurliesOrIdent());
1694 if (key == "default") {
1695 // Temp: skip non-numeric defaults (enums).
1696 auto numeric = strpbrk(val.c_str(), "0123456789-+.");
1697 if (IsScalar(type.base_type) && numeric == val.c_str())
1698 field->value.constant = val;
1699 } else if (key == "deprecated") {
1700 field->deprecated = val == "true";
1701 }
1702 if (!Is(',')) break;
1703 NEXT();
1704 }
1705 EXPECT(']');
1706 }
1707 if (anonymous_struct) {
1708 ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
1709 if (Is(';')) NEXT();
1710 } else {
1711 EXPECT(';');
1712 }
1713 }
1714 }
1715 NEXT();
1716 return NoError();
1717 }
1718
ParseProtoKey()1719 CheckedError Parser::ParseProtoKey() {
1720 if (token_ == '(') {
1721 NEXT();
1722 // Skip "(a.b)" style custom attributes.
1723 while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
1724 EXPECT(')');
1725 while (Is('.')) { NEXT(); EXPECT(kTokenIdentifier); }
1726 } else {
1727 EXPECT(kTokenIdentifier);
1728 }
1729 return NoError();
1730 }
1731
ParseProtoCurliesOrIdent()1732 CheckedError Parser::ParseProtoCurliesOrIdent() {
1733 if (Is('{')) {
1734 NEXT();
1735 for (int nesting = 1; nesting; ) {
1736 if (token_ == '{') nesting++;
1737 else if (token_ == '}') nesting--;
1738 NEXT();
1739 }
1740 } else {
1741 NEXT(); // Any single token.
1742 }
1743 return NoError();
1744 }
1745
ParseProtoOption()1746 CheckedError Parser::ParseProtoOption() {
1747 NEXT();
1748 ECHECK(ParseProtoKey());
1749 EXPECT('=');
1750 ECHECK(ParseProtoCurliesOrIdent());
1751 return NoError();
1752 }
1753
1754 // Parse a protobuf type, and map it to the corresponding FlatBuffer one.
ParseTypeFromProtoType(Type * type)1755 CheckedError Parser::ParseTypeFromProtoType(Type *type) {
1756 struct type_lookup { const char *proto_type; BaseType fb_type; };
1757 static type_lookup lookup[] = {
1758 { "float", BASE_TYPE_FLOAT }, { "double", BASE_TYPE_DOUBLE },
1759 { "int32", BASE_TYPE_INT }, { "int64", BASE_TYPE_LONG },
1760 { "uint32", BASE_TYPE_UINT }, { "uint64", BASE_TYPE_ULONG },
1761 { "sint32", BASE_TYPE_INT }, { "sint64", BASE_TYPE_LONG },
1762 { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
1763 { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
1764 { "bool", BASE_TYPE_BOOL },
1765 { "string", BASE_TYPE_STRING },
1766 { "bytes", BASE_TYPE_STRING },
1767 { nullptr, BASE_TYPE_NONE }
1768 };
1769 for (auto tl = lookup; tl->proto_type; tl++) {
1770 if (attribute_ == tl->proto_type) {
1771 type->base_type = tl->fb_type;
1772 NEXT();
1773 return NoError();
1774 }
1775 }
1776 if (Is('.')) NEXT(); // qualified names may start with a . ?
1777 ECHECK(ParseTypeIdent(*type));
1778 return NoError();
1779 }
1780
SkipAnyJsonValue()1781 CheckedError Parser::SkipAnyJsonValue() {
1782 switch (token_) {
1783 case '{':
1784 ECHECK(SkipJsonObject());
1785 break;
1786 case kTokenStringConstant:
1787 ECHECK(SkipJsonString());
1788 break;
1789 case '[':
1790 ECHECK(SkipJsonArray());
1791 break;
1792 case kTokenIntegerConstant:
1793 EXPECT(kTokenIntegerConstant);
1794 break;
1795 case kTokenFloatConstant:
1796 EXPECT(kTokenFloatConstant);
1797 break;
1798 default:
1799 return Error(std::string("Unexpected token:") + std::string(1, static_cast<char>(token_)));
1800 }
1801 return NoError();
1802 }
1803
SkipJsonObject()1804 CheckedError Parser::SkipJsonObject() {
1805 EXPECT('{');
1806 size_t fieldn = 0;
1807
1808 for (;;) {
1809 if ((!opts.strict_json || !fieldn) && Is('}')) break;
1810
1811 if (!Is(kTokenStringConstant)) {
1812 EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
1813 }
1814 else {
1815 NEXT();
1816 }
1817
1818 EXPECT(':');
1819 ECHECK(SkipAnyJsonValue());
1820 fieldn++;
1821
1822 if (Is('}')) break;
1823 EXPECT(',');
1824 }
1825
1826 NEXT();
1827 return NoError();
1828 }
1829
SkipJsonArray()1830 CheckedError Parser::SkipJsonArray() {
1831 EXPECT('[');
1832
1833 for (;;) {
1834 if (Is(']')) break;
1835
1836 ECHECK(SkipAnyJsonValue());
1837
1838 if (Is(']')) break;
1839 EXPECT(',');
1840 }
1841
1842 NEXT();
1843 return NoError();
1844 }
1845
SkipJsonString()1846 CheckedError Parser::SkipJsonString() {
1847 EXPECT(kTokenStringConstant);
1848 return NoError();
1849 }
1850
Parse(const char * source,const char ** include_paths,const char * source_filename)1851 bool Parser::Parse(const char *source, const char **include_paths,
1852 const char *source_filename) {
1853 return !DoParse(source, include_paths, source_filename).Check();
1854 }
1855
DoParse(const char * source,const char ** include_paths,const char * source_filename)1856 CheckedError Parser::DoParse(const char *source, const char **include_paths,
1857 const char *source_filename) {
1858 file_being_parsed_ = source_filename ? source_filename : "";
1859 if (source_filename &&
1860 included_files_.find(source_filename) == included_files_.end()) {
1861 included_files_[source_filename] = true;
1862 files_included_per_file_[source_filename] = std::set<std::string>();
1863 }
1864 if (!include_paths) {
1865 static const char *current_directory[] = { "", nullptr };
1866 include_paths = current_directory;
1867 }
1868 source_ = cursor_ = source;
1869 line_ = 1;
1870 error_.clear();
1871 field_stack_.clear();
1872 builder_.Clear();
1873 // Start with a blank namespace just in case this file doesn't have one.
1874 namespaces_.push_back(new Namespace());
1875 ECHECK(SkipByteOrderMark());
1876 NEXT();
1877 // Includes must come before type declarations:
1878 for (;;) {
1879 // Parse pre-include proto statements if any:
1880 if (opts.proto_mode &&
1881 (attribute_ == "option" || attribute_ == "syntax" ||
1882 attribute_ == "package")) {
1883 ECHECK(ParseProtoDecl());
1884 } else if (Is(kTokenNativeInclude)) {
1885 NEXT();
1886 native_included_files_.emplace_back(attribute_);
1887 EXPECT(kTokenStringConstant);
1888 } else if (Is(kTokenInclude) ||
1889 (opts.proto_mode &&
1890 attribute_ == "import" &&
1891 Is(kTokenIdentifier))) {
1892 NEXT();
1893 if (opts.proto_mode && attribute_ == "public") NEXT();
1894 auto name = attribute_;
1895 EXPECT(kTokenStringConstant);
1896 // Look for the file in include_paths.
1897 std::string filepath;
1898 for (auto paths = include_paths; paths && *paths; paths++) {
1899 filepath = flatbuffers::ConCatPathFileName(*paths, name);
1900 if(FileExists(filepath.c_str())) break;
1901 }
1902 if (filepath.empty())
1903 return Error("unable to locate include file: " + name);
1904 if (source_filename)
1905 files_included_per_file_[source_filename].insert(filepath);
1906 if (included_files_.find(filepath) == included_files_.end()) {
1907 // We found an include file that we have not parsed yet.
1908 // Load it and parse it.
1909 std::string contents;
1910 if (!LoadFile(filepath.c_str(), true, &contents))
1911 return Error("unable to load include file: " + name);
1912 ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str()));
1913 // We generally do not want to output code for any included files:
1914 if (!opts.generate_all) MarkGenerated();
1915 // This is the easiest way to continue this file after an include:
1916 // instead of saving and restoring all the state, we simply start the
1917 // file anew. This will cause it to encounter the same include
1918 // statement again, but this time it will skip it, because it was
1919 // entered into included_files_.
1920 // This is recursive, but only go as deep as the number of include
1921 // statements.
1922 return DoParse(source, include_paths, source_filename);
1923 }
1924 EXPECT(';');
1925 } else {
1926 break;
1927 }
1928 }
1929 // Now parse all other kinds of declarations:
1930 while (token_ != kTokenEof) {
1931 if (opts.proto_mode) {
1932 ECHECK(ParseProtoDecl());
1933 } else if (token_ == kTokenNameSpace) {
1934 ECHECK(ParseNamespace());
1935 } else if (token_ == '{') {
1936 if (!root_struct_def_)
1937 return Error("no root type set to parse json with");
1938 if (builder_.GetSize()) {
1939 return Error("cannot have more than one json object in a file");
1940 }
1941 uoffset_t toff;
1942 ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
1943 builder_.Finish(Offset<Table>(toff),
1944 file_identifier_.length() ? file_identifier_.c_str() : nullptr);
1945 } else if (token_ == kTokenEnum) {
1946 ECHECK(ParseEnum(false, nullptr));
1947 } else if (token_ == kTokenUnion) {
1948 ECHECK(ParseEnum(true, nullptr));
1949 } else if (token_ == kTokenRootType) {
1950 NEXT();
1951 auto root_type = attribute_;
1952 EXPECT(kTokenIdentifier);
1953 ECHECK(ParseNamespacing(&root_type, nullptr));
1954 if (!SetRootType(root_type.c_str()))
1955 return Error("unknown root type: " + root_type);
1956 if (root_struct_def_->fixed)
1957 return Error("root type must be a table");
1958 EXPECT(';');
1959 } else if (token_ == kTokenFileIdentifier) {
1960 NEXT();
1961 file_identifier_ = attribute_;
1962 EXPECT(kTokenStringConstant);
1963 if (file_identifier_.length() !=
1964 FlatBufferBuilder::kFileIdentifierLength)
1965 return Error("file_identifier must be exactly " +
1966 NumToString(FlatBufferBuilder::kFileIdentifierLength) +
1967 " characters");
1968 EXPECT(';');
1969 } else if (token_ == kTokenFileExtension) {
1970 NEXT();
1971 file_extension_ = attribute_;
1972 EXPECT(kTokenStringConstant);
1973 EXPECT(';');
1974 } else if(token_ == kTokenInclude) {
1975 return Error("includes must come before declarations");
1976 } else if(token_ == kTokenAttribute) {
1977 NEXT();
1978 auto name = attribute_;
1979 EXPECT(kTokenStringConstant);
1980 EXPECT(';');
1981 known_attributes_[name] = false;
1982 } else if (token_ == kTokenService) {
1983 ECHECK(ParseService());
1984 } else {
1985 ECHECK(ParseDecl());
1986 }
1987 }
1988 for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
1989 if ((*it)->predecl) {
1990 return Error("type referenced but not defined: " + (*it)->name);
1991 }
1992 }
1993 for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
1994 auto &enum_def = **it;
1995 if (enum_def.is_union) {
1996 for (auto val_it = enum_def.vals.vec.begin();
1997 val_it != enum_def.vals.vec.end();
1998 ++val_it) {
1999 auto &val = **val_it;
2000 if (val.struct_def && val.struct_def->fixed)
2001 return Error("only tables can be union elements: " + val.name);
2002 }
2003 }
2004 }
2005 return NoError();
2006 }
2007
GetIncludedFilesRecursive(const std::string & file_name) const2008 std::set<std::string> Parser::GetIncludedFilesRecursive(
2009 const std::string &file_name) const {
2010 std::set<std::string> included_files;
2011 std::list<std::string> to_process;
2012
2013 if (file_name.empty()) return included_files;
2014 to_process.push_back(file_name);
2015
2016 while (!to_process.empty()) {
2017 std::string current = to_process.front();
2018 to_process.pop_front();
2019 included_files.insert(current);
2020
2021 auto new_files = files_included_per_file_.at(current);
2022 for (auto it = new_files.begin(); it != new_files.end(); ++it) {
2023 if (included_files.find(*it) == included_files.end())
2024 to_process.push_back(*it);
2025 }
2026 }
2027
2028 return included_files;
2029 }
2030
2031 // Schema serialization functionality:
2032
compareName(const T * a,const T * b)2033 template<typename T> bool compareName(const T* a, const T* b) {
2034 return a->defined_namespace->GetFullyQualifiedName(a->name)
2035 < b->defined_namespace->GetFullyQualifiedName(b->name);
2036 }
2037
AssignIndices(const std::vector<T * > & defvec)2038 template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
2039 // Pre-sort these vectors, such that we can set the correct indices for them.
2040 auto vec = defvec;
2041 std::sort(vec.begin(), vec.end(), compareName<T>);
2042 for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
2043 }
2044
Serialize()2045 void Parser::Serialize() {
2046 builder_.Clear();
2047 AssignIndices(structs_.vec);
2048 AssignIndices(enums_.vec);
2049 std::vector<Offset<reflection::Object>> object_offsets;
2050 for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2051 auto offset = (*it)->Serialize(&builder_, *this);
2052 object_offsets.push_back(offset);
2053 (*it)->serialized_location = offset.o;
2054 }
2055 std::vector<Offset<reflection::Enum>> enum_offsets;
2056 for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2057 auto offset = (*it)->Serialize(&builder_, *this);
2058 enum_offsets.push_back(offset);
2059 (*it)->serialized_location = offset.o;
2060 }
2061 auto schema_offset = reflection::CreateSchema(
2062 builder_,
2063 builder_.CreateVectorOfSortedTables(&object_offsets),
2064 builder_.CreateVectorOfSortedTables(&enum_offsets),
2065 builder_.CreateString(file_identifier_),
2066 builder_.CreateString(file_extension_),
2067 root_struct_def_
2068 ? root_struct_def_->serialized_location
2069 : 0);
2070 builder_.Finish(schema_offset, reflection::SchemaIdentifier());
2071 }
2072
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2073 Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
2074 const Parser &parser) const {
2075 std::vector<Offset<reflection::Field>> field_offsets;
2076 for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
2077 field_offsets.push_back(
2078 (*it)->Serialize(builder,
2079 static_cast<uint16_t>(it - fields.vec.begin()), parser));
2080 }
2081 auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2082 return reflection::CreateObject(*builder,
2083 builder->CreateString(qualified_name),
2084 builder->CreateVectorOfSortedTables(
2085 &field_offsets),
2086 fixed,
2087 static_cast<int>(minalign),
2088 static_cast<int>(bytesize),
2089 SerializeAttributes(builder, parser),
2090 parser.opts.binary_schema_comments
2091 ? builder->CreateVectorOfStrings(
2092 doc_comment)
2093 : 0);
2094 }
2095
Serialize(FlatBufferBuilder * builder,uint16_t id,const Parser & parser) const2096 Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
2097 uint16_t id,
2098 const Parser &parser) const {
2099 return reflection::CreateField(*builder,
2100 builder->CreateString(name),
2101 value.type.Serialize(builder),
2102 id,
2103 value.offset,
2104 IsInteger(value.type.base_type)
2105 ? StringToInt(value.constant.c_str())
2106 : 0,
2107 IsFloat(value.type.base_type)
2108 ? strtod(value.constant.c_str(), nullptr)
2109 : 0.0,
2110 deprecated,
2111 required,
2112 key,
2113 SerializeAttributes(builder, parser),
2114 parser.opts.binary_schema_comments
2115 ? builder->CreateVectorOfStrings(doc_comment)
2116 : 0);
2117 // TODO: value.constant is almost always "0", we could save quite a bit of
2118 // space by sharing it. Same for common values of value.type.
2119 }
2120
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2121 Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
2122 const Parser &parser) const {
2123 std::vector<Offset<reflection::EnumVal>> enumval_offsets;
2124 for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
2125 enumval_offsets.push_back((*it)->Serialize(builder));
2126 }
2127 auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2128 return reflection::CreateEnum(*builder,
2129 builder->CreateString(qualified_name),
2130 builder->CreateVector(enumval_offsets),
2131 is_union,
2132 underlying_type.Serialize(builder),
2133 SerializeAttributes(builder, parser),
2134 parser.opts.binary_schema_comments
2135 ? builder->CreateVectorOfStrings(doc_comment)
2136 : 0);
2137 }
2138
Serialize(FlatBufferBuilder * builder) const2139 Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder) const
2140 {
2141 return reflection::CreateEnumVal(*builder,
2142 builder->CreateString(name),
2143 value,
2144 struct_def
2145 ? struct_def->serialized_location
2146 : 0);
2147 }
2148
Serialize(FlatBufferBuilder * builder) const2149 Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
2150 return reflection::CreateType(*builder,
2151 static_cast<reflection::BaseType>(base_type),
2152 static_cast<reflection::BaseType>(element),
2153 struct_def ? struct_def->index :
2154 (enum_def ? enum_def->index : -1));
2155 }
2156
2157 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<
2158 reflection::KeyValue>>>
SerializeAttributes(FlatBufferBuilder * builder,const Parser & parser) const2159 Definition::SerializeAttributes(FlatBufferBuilder *builder,
2160 const Parser &parser) const {
2161 std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
2162 for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
2163 auto it = parser.known_attributes_.find(kv->first);
2164 assert(it != parser.known_attributes_.end());
2165 if (!it->second) { // Custom attribute.
2166 attrs.push_back(
2167 reflection::CreateKeyValue(*builder, builder->CreateString(kv->first),
2168 builder->CreateString(
2169 kv->second->constant)));
2170 }
2171 }
2172 if (attrs.size()) {
2173 return builder->CreateVectorOfSortedTables(&attrs);
2174 } else {
2175 return 0;
2176 }
2177 }
2178
ConformTo(const Parser & base)2179 std::string Parser::ConformTo(const Parser &base) {
2180 for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
2181 auto &struct_def = **sit;
2182 auto qualified_name =
2183 struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
2184 auto struct_def_base = base.structs_.Lookup(qualified_name);
2185 if (!struct_def_base) continue;
2186 for (auto fit = struct_def.fields.vec.begin();
2187 fit != struct_def.fields.vec.end(); ++fit) {
2188 auto &field = **fit;
2189 auto field_base = struct_def_base->fields.Lookup(field.name);
2190 if (field_base) {
2191 if (field.value.offset != field_base->value.offset)
2192 return "offsets differ for field: " + field.name;
2193 if (field.value.constant != field_base->value.constant)
2194 return "defaults differ for field: " + field.name;
2195 if (!EqualByName(field.value.type, field_base->value.type))
2196 return "types differ for field: " + field.name;
2197 } else {
2198 // Doesn't have to exist, deleting fields is fine.
2199 // But we should check if there is a field that has the same offset
2200 // but is incompatible (in the case of field renaming).
2201 for (auto fbit = struct_def_base->fields.vec.begin();
2202 fbit != struct_def_base->fields.vec.end(); ++fbit) {
2203 field_base = *fbit;
2204 if (field.value.offset == field_base->value.offset) {
2205 if (!EqualByName(field.value.type, field_base->value.type))
2206 return "field renamed to different type: " + field.name;
2207 break;
2208 }
2209 }
2210 }
2211 }
2212 }
2213 for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
2214 auto &enum_def = **eit;
2215 auto qualified_name =
2216 enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
2217 auto enum_def_base = base.enums_.Lookup(qualified_name);
2218 if (!enum_def_base) continue;
2219 for (auto evit = enum_def.vals.vec.begin();
2220 evit != enum_def.vals.vec.end(); ++evit) {
2221 auto &enum_val = **evit;
2222 auto enum_val_base = enum_def_base->vals.Lookup(enum_val.name);
2223 if (enum_val_base) {
2224 if (enum_val.value != enum_val_base->value)
2225 return "values differ for enum: " + enum_val.name;
2226 }
2227 }
2228 }
2229 return "";
2230 }
2231
2232 } // namespace flatbuffers
2233