1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36
37 #include <float.h>
38 #include <limits>
39 #include <unordered_map>
40
41
42 #include <google/protobuf/stubs/hash.h>
43
44 #include <google/protobuf/stubs/casts.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/stubs/common.h>
47 #include <google/protobuf/compiler/parser.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/tokenizer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/wire_format.h>
52 #include <google/protobuf/stubs/strutil.h>
53 #include <google/protobuf/stubs/map_util.h>
54
55 namespace google {
56 namespace protobuf {
57 namespace compiler {
58
59 using internal::WireFormat;
60
61 namespace {
62
63 typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
64
MakeTypeNameTable()65 TypeNameMap MakeTypeNameTable() {
66 TypeNameMap result;
67
68 result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
69 result["float"] = FieldDescriptorProto::TYPE_FLOAT;
70 result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
71 result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
72 result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
73 result["bool"] = FieldDescriptorProto::TYPE_BOOL;
74 result["string"] = FieldDescriptorProto::TYPE_STRING;
75 result["group"] = FieldDescriptorProto::TYPE_GROUP;
76
77 result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
78 result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
79 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
80 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
81 result["int32"] = FieldDescriptorProto::TYPE_INT32;
82 result["int64"] = FieldDescriptorProto::TYPE_INT64;
83 result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
84 result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
85
86 return result;
87 }
88
89 const TypeNameMap kTypeNames = MakeTypeNameTable();
90
91 // Camel-case the field name and append "Entry" for generated map entry name.
92 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(const std::string & field_name)93 std::string MapEntryName(const std::string& field_name) {
94 std::string result;
95 static const char kSuffix[] = "Entry";
96 result.reserve(field_name.size() + sizeof(kSuffix));
97 bool cap_next = true;
98 for (int i = 0; i < field_name.size(); ++i) {
99 if (field_name[i] == '_') {
100 cap_next = true;
101 } else if (cap_next) {
102 // Note: Do not use ctype.h due to locales.
103 if ('a' <= field_name[i] && field_name[i] <= 'z') {
104 result.push_back(field_name[i] - 'a' + 'A');
105 } else {
106 result.push_back(field_name[i]);
107 }
108 cap_next = false;
109 } else {
110 result.push_back(field_name[i]);
111 }
112 }
113 result.append(kSuffix);
114 return result;
115 }
116
IsUppercase(char c)117 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
118
IsLowercase(char c)119 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
120
IsNumber(char c)121 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
122
IsUpperCamelCase(const string & name)123 bool IsUpperCamelCase(const string& name) {
124 if (name.empty()) {
125 return true;
126 }
127 // Name must start with an upper case character.
128 if (!IsUppercase(name[0])) {
129 return false;
130 }
131 // Must not contains underscore.
132 for (int i = 1; i < name.length(); i++) {
133 if (name[i] == '_') {
134 return false;
135 }
136 }
137 return true;
138 }
139
IsUpperUnderscore(const string & name)140 bool IsUpperUnderscore(const string& name) {
141 for (int i = 0; i < name.length(); i++) {
142 const char c = name[i];
143 if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
144 return false;
145 }
146 }
147 return true;
148 }
149
IsLowerUnderscore(const string & name)150 bool IsLowerUnderscore(const string& name) {
151 for (int i = 0; i < name.length(); i++) {
152 const char c = name[i];
153 if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
154 return false;
155 }
156 }
157 return true;
158 }
159
IsNumberFollowUnderscore(const string & name)160 bool IsNumberFollowUnderscore(const string& name) {
161 for (int i = 1; i < name.length(); i++) {
162 const char c = name[i];
163 if (IsNumber(c) && name[i - 1] == '_') {
164 return true;
165 }
166 }
167 return false;
168 }
169
170 } // anonymous namespace
171
172 // Makes code slightly more readable. The meaning of "DO(foo)" is
173 // "Execute foo and fail if it fails.", where failure is indicated by
174 // returning false.
175 #define DO(STATEMENT) \
176 if (STATEMENT) { \
177 } else \
178 return false
179
180 // ===================================================================
181
Parser()182 Parser::Parser()
183 : input_(NULL),
184 error_collector_(NULL),
185 source_location_table_(NULL),
186 had_errors_(false),
187 require_syntax_identifier_(false),
188 stop_after_syntax_identifier_(false) {
189 }
190
~Parser()191 Parser::~Parser() {}
192
193 // ===================================================================
194
LookingAt(const char * text)195 inline bool Parser::LookingAt(const char* text) {
196 return input_->current().text == text;
197 }
198
LookingAtType(io::Tokenizer::TokenType token_type)199 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
200 return input_->current().type == token_type;
201 }
202
AtEnd()203 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
204
TryConsume(const char * text)205 bool Parser::TryConsume(const char* text) {
206 if (LookingAt(text)) {
207 input_->Next();
208 return true;
209 } else {
210 return false;
211 }
212 }
213
Consume(const char * text,const char * error)214 bool Parser::Consume(const char* text, const char* error) {
215 if (TryConsume(text)) {
216 return true;
217 } else {
218 AddError(error);
219 return false;
220 }
221 }
222
Consume(const char * text)223 bool Parser::Consume(const char* text) {
224 if (TryConsume(text)) {
225 return true;
226 } else {
227 AddError("Expected \"" + string(text) + "\".");
228 return false;
229 }
230 }
231
ConsumeIdentifier(std::string * output,const char * error)232 bool Parser::ConsumeIdentifier(std::string* output, const char* error) {
233 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
234 *output = input_->current().text;
235 input_->Next();
236 return true;
237 } else {
238 AddError(error);
239 return false;
240 }
241 }
242
ConsumeInteger(int * output,const char * error)243 bool Parser::ConsumeInteger(int* output, const char* error) {
244 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
245 uint64 value = 0;
246 if (!io::Tokenizer::ParseInteger(input_->current().text, kint32max,
247 &value)) {
248 AddError("Integer out of range.");
249 // We still return true because we did, in fact, parse an integer.
250 }
251 *output = value;
252 input_->Next();
253 return true;
254 } else {
255 AddError(error);
256 return false;
257 }
258 }
259
ConsumeSignedInteger(int * output,const char * error)260 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
261 bool is_negative = false;
262 uint64 max_value = kint32max;
263 if (TryConsume("-")) {
264 is_negative = true;
265 max_value += 1;
266 }
267 uint64 value = 0;
268 DO(ConsumeInteger64(max_value, &value, error));
269 if (is_negative) value *= -1;
270 *output = value;
271 return true;
272 }
273
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)274 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
275 const char* error) {
276 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
277 if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
278 output)) {
279 AddError("Integer out of range.");
280 // We still return true because we did, in fact, parse an integer.
281 *output = 0;
282 }
283 input_->Next();
284 return true;
285 } else {
286 AddError(error);
287 return false;
288 }
289 }
290
ConsumeNumber(double * output,const char * error)291 bool Parser::ConsumeNumber(double* output, const char* error) {
292 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
293 *output = io::Tokenizer::ParseFloat(input_->current().text);
294 input_->Next();
295 return true;
296 } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
297 // Also accept integers.
298 uint64 value = 0;
299 if (!io::Tokenizer::ParseInteger(input_->current().text, kuint64max,
300 &value)) {
301 AddError("Integer out of range.");
302 // We still return true because we did, in fact, parse a number.
303 }
304 *output = value;
305 input_->Next();
306 return true;
307 } else if (LookingAt("inf")) {
308 *output = std::numeric_limits<double>::infinity();
309 input_->Next();
310 return true;
311 } else if (LookingAt("nan")) {
312 *output = std::numeric_limits<double>::quiet_NaN();
313 input_->Next();
314 return true;
315 } else {
316 AddError(error);
317 return false;
318 }
319 }
320
ConsumeString(std::string * output,const char * error)321 bool Parser::ConsumeString(std::string* output, const char* error) {
322 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
323 io::Tokenizer::ParseString(input_->current().text, output);
324 input_->Next();
325 // Allow C++ like concatenation of adjacent string tokens.
326 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
327 io::Tokenizer::ParseStringAppend(input_->current().text, output);
328 input_->Next();
329 }
330 return true;
331 } else {
332 AddError(error);
333 return false;
334 }
335 }
336
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)337 bool Parser::TryConsumeEndOfDeclaration(const char* text,
338 const LocationRecorder* location) {
339 if (LookingAt(text)) {
340 std::string leading, trailing;
341 std::vector<std::string> detached;
342 input_->NextWithComments(&trailing, &detached, &leading);
343
344 // Save the leading comments for next time, and recall the leading comments
345 // from last time.
346 leading.swap(upcoming_doc_comments_);
347
348 if (location != NULL) {
349 upcoming_detached_comments_.swap(detached);
350 location->AttachComments(&leading, &trailing, &detached);
351 } else if (strcmp(text, "}") == 0) {
352 // If the current location is null and we are finishing the current scope,
353 // drop pending upcoming detached comments.
354 upcoming_detached_comments_.swap(detached);
355 } else {
356 // Otherwise, append the new detached comments to the existing upcoming
357 // detached comments.
358 upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
359 detached.begin(), detached.end());
360 }
361
362 return true;
363 } else {
364 return false;
365 }
366 }
367
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)368 bool Parser::ConsumeEndOfDeclaration(const char* text,
369 const LocationRecorder* location) {
370 if (TryConsumeEndOfDeclaration(text, location)) {
371 return true;
372 } else {
373 AddError("Expected \"" + string(text) + "\".");
374 return false;
375 }
376 }
377
378 // -------------------------------------------------------------------
379
AddError(int line,int column,const std::string & error)380 void Parser::AddError(int line, int column, const std::string& error) {
381 if (error_collector_ != NULL) {
382 error_collector_->AddError(line, column, error);
383 }
384 had_errors_ = true;
385 }
386
AddError(const std::string & error)387 void Parser::AddError(const std::string& error) {
388 AddError(input_->current().line, input_->current().column, error);
389 }
390
AddWarning(const string & warning)391 void Parser::AddWarning(const string& warning) {
392 if (error_collector_ != nullptr) {
393 error_collector_->AddWarning(input_->current().line,
394 input_->current().column, warning);
395 }
396 }
397
398 // -------------------------------------------------------------------
399
LocationRecorder(Parser * parser)400 Parser::LocationRecorder::LocationRecorder(Parser* parser)
401 : parser_(parser),
402 source_code_info_(parser->source_code_info_),
403 location_(parser_->source_code_info_->add_location()) {
404 location_->add_span(parser_->input_->current().line);
405 location_->add_span(parser_->input_->current().column);
406 }
407
LocationRecorder(const LocationRecorder & parent)408 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
409 Init(parent, parent.source_code_info_);
410 }
411
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)412 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
413 int path1,
414 SourceCodeInfo* source_code_info) {
415 Init(parent, source_code_info);
416 AddPath(path1);
417 }
418
LocationRecorder(const LocationRecorder & parent,int path1)419 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
420 int path1) {
421 Init(parent, parent.source_code_info_);
422 AddPath(path1);
423 }
424
LocationRecorder(const LocationRecorder & parent,int path1,int path2)425 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
426 int path1, int path2) {
427 Init(parent, parent.source_code_info_);
428 AddPath(path1);
429 AddPath(path2);
430 }
431
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)432 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
433 SourceCodeInfo* source_code_info) {
434 parser_ = parent.parser_;
435 source_code_info_ = source_code_info;
436
437 location_ = source_code_info_->add_location();
438 location_->mutable_path()->CopyFrom(parent.location_->path());
439
440 location_->add_span(parser_->input_->current().line);
441 location_->add_span(parser_->input_->current().column);
442 }
443
~LocationRecorder()444 Parser::LocationRecorder::~LocationRecorder() {
445 if (location_->span_size() <= 2) {
446 EndAt(parser_->input_->previous());
447 }
448 }
449
AddPath(int path_component)450 void Parser::LocationRecorder::AddPath(int path_component) {
451 location_->add_path(path_component);
452 }
453
StartAt(const io::Tokenizer::Token & token)454 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
455 location_->set_span(0, token.line);
456 location_->set_span(1, token.column);
457 }
458
StartAt(const LocationRecorder & other)459 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
460 location_->set_span(0, other.location_->span(0));
461 location_->set_span(1, other.location_->span(1));
462 }
463
EndAt(const io::Tokenizer::Token & token)464 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
465 if (token.line != location_->span(0)) {
466 location_->add_span(token.line);
467 }
468 location_->add_span(token.end_column);
469 }
470
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)471 void Parser::LocationRecorder::RecordLegacyLocation(
472 const Message* descriptor,
473 DescriptorPool::ErrorCollector::ErrorLocation location) {
474 if (parser_->source_location_table_ != NULL) {
475 parser_->source_location_table_->Add(
476 descriptor, location, location_->span(0), location_->span(1));
477 }
478 }
479
RecordLegacyImportLocation(const Message * descriptor,const string & name)480 void Parser::LocationRecorder::RecordLegacyImportLocation(
481 const Message* descriptor, const string& name) {
482 if (parser_->source_location_table_ != nullptr) {
483 parser_->source_location_table_->AddImport(
484 descriptor, name, location_->span(0), location_->span(1));
485 }
486 }
487
CurrentPathSize() const488 int Parser::LocationRecorder::CurrentPathSize() const {
489 return location_->path_size();
490 }
491
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const492 void Parser::LocationRecorder::AttachComments(
493 std::string* leading, std::string* trailing,
494 std::vector<std::string>* detached_comments) const {
495 GOOGLE_CHECK(!location_->has_leading_comments());
496 GOOGLE_CHECK(!location_->has_trailing_comments());
497
498 if (!leading->empty()) {
499 location_->mutable_leading_comments()->swap(*leading);
500 }
501 if (!trailing->empty()) {
502 location_->mutable_trailing_comments()->swap(*trailing);
503 }
504 for (int i = 0; i < detached_comments->size(); ++i) {
505 location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
506 }
507 detached_comments->clear();
508 }
509
510 // -------------------------------------------------------------------
511
SkipStatement()512 void Parser::SkipStatement() {
513 while (true) {
514 if (AtEnd()) {
515 return;
516 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
517 if (TryConsumeEndOfDeclaration(";", NULL)) {
518 return;
519 } else if (TryConsume("{")) {
520 SkipRestOfBlock();
521 return;
522 } else if (LookingAt("}")) {
523 return;
524 }
525 }
526 input_->Next();
527 }
528 }
529
SkipRestOfBlock()530 void Parser::SkipRestOfBlock() {
531 while (true) {
532 if (AtEnd()) {
533 return;
534 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
535 if (TryConsumeEndOfDeclaration("}", NULL)) {
536 return;
537 } else if (TryConsume("{")) {
538 SkipRestOfBlock();
539 }
540 }
541 input_->Next();
542 }
543 }
544
545 // ===================================================================
546
ValidateEnum(const EnumDescriptorProto * proto)547 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
548 bool has_allow_alias = false;
549 bool allow_alias = false;
550
551 for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
552 const UninterpretedOption option = proto->options().uninterpreted_option(i);
553 if (option.name_size() > 1) {
554 continue;
555 }
556 if (!option.name(0).is_extension() &&
557 option.name(0).name_part() == "allow_alias") {
558 has_allow_alias = true;
559 if (option.identifier_value() == "true") {
560 allow_alias = true;
561 }
562 break;
563 }
564 }
565
566 if (has_allow_alias && !allow_alias) {
567 std::string error =
568 "\"" + proto->name() +
569 "\" declares 'option allow_alias = false;' which has no effect. "
570 "Please remove the declaration.";
571 // This needlessly clutters declarations with nops.
572 AddError(error);
573 return false;
574 }
575
576 std::set<int> used_values;
577 bool has_duplicates = false;
578 for (int i = 0; i < proto->value_size(); ++i) {
579 const EnumValueDescriptorProto& enum_value = proto->value(i);
580 if (used_values.find(enum_value.number()) != used_values.end()) {
581 has_duplicates = true;
582 break;
583 } else {
584 used_values.insert(enum_value.number());
585 }
586 }
587 if (allow_alias && !has_duplicates) {
588 std::string error =
589 "\"" + proto->name() +
590 "\" declares support for enum aliases but no enum values share field "
591 "numbers. Please remove the unnecessary 'option allow_alias = true;' "
592 "declaration.";
593 // Generate an error if an enum declares support for duplicate enum values
594 // and does not use it protect future authors.
595 AddError(error);
596 return false;
597 }
598
599 // Enforce that enum constants must be UPPER_CASE except in case of
600 // enum_alias.
601 if (!allow_alias) {
602 for (const auto& enum_value : proto->value()) {
603 if (!IsUpperUnderscore(enum_value.name())) {
604 AddWarning(
605 "Enum constant should be in UPPER_CASE. Found: " +
606 enum_value.name() +
607 ". See https://developers.google.com/protocol-buffers/docs/style");
608 }
609 }
610 }
611
612 return true;
613 }
614
Parse(io::Tokenizer * input,FileDescriptorProto * file)615 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
616 input_ = input;
617 had_errors_ = false;
618 syntax_identifier_.clear();
619
620 // Note that |file| could be NULL at this point if
621 // stop_after_syntax_identifier_ is true. So, we conservatively allocate
622 // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
623 // later on.
624 SourceCodeInfo source_code_info;
625 source_code_info_ = &source_code_info;
626
627 if (LookingAtType(io::Tokenizer::TYPE_START)) {
628 // Advance to first token.
629 input_->NextWithComments(NULL, &upcoming_detached_comments_,
630 &upcoming_doc_comments_);
631 }
632
633 {
634 LocationRecorder root_location(this);
635 root_location.RecordLegacyLocation(file,
636 DescriptorPool::ErrorCollector::OTHER);
637
638 if (require_syntax_identifier_ || LookingAt("syntax")) {
639 if (!ParseSyntaxIdentifier(root_location)) {
640 // Don't attempt to parse the file if we didn't recognize the syntax
641 // identifier.
642 return false;
643 }
644 // Store the syntax into the file.
645 if (file != NULL) file->set_syntax(syntax_identifier_);
646 } else if (!stop_after_syntax_identifier_) {
647 GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: " << file->name()
648 << ". Please use 'syntax = \"proto2\";' "
649 << "or 'syntax = \"proto3\";' to specify a syntax "
650 << "version. (Defaulted to proto2 syntax.)";
651 syntax_identifier_ = "proto2";
652 }
653
654 if (stop_after_syntax_identifier_) return !had_errors_;
655
656 // Repeatedly parse statements until we reach the end of the file.
657 while (!AtEnd()) {
658 if (!ParseTopLevelStatement(file, root_location)) {
659 // This statement failed to parse. Skip it, but keep looping to parse
660 // other statements.
661 SkipStatement();
662
663 if (LookingAt("}")) {
664 AddError("Unmatched \"}\".");
665 input_->NextWithComments(NULL, &upcoming_detached_comments_,
666 &upcoming_doc_comments_);
667 }
668 }
669 }
670 }
671
672 input_ = NULL;
673 source_code_info_ = NULL;
674 assert(file != NULL);
675 source_code_info.Swap(file->mutable_source_code_info());
676 return !had_errors_;
677 }
678
ParseSyntaxIdentifier(const LocationRecorder & parent)679 bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
680 LocationRecorder syntax_location(parent,
681 FileDescriptorProto::kSyntaxFieldNumber);
682 DO(Consume(
683 "syntax",
684 "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
685 DO(Consume("="));
686 io::Tokenizer::Token syntax_token = input_->current();
687 std::string syntax;
688 DO(ConsumeString(&syntax, "Expected syntax identifier."));
689 DO(ConsumeEndOfDeclaration(";", &syntax_location));
690
691 syntax_identifier_ = syntax;
692
693 if (syntax != "proto2" && syntax != "proto3" &&
694 !stop_after_syntax_identifier_) {
695 AddError(syntax_token.line, syntax_token.column,
696 "Unrecognized syntax identifier \"" + syntax +
697 "\". This parser "
698 "only recognizes \"proto2\" and \"proto3\".");
699 return false;
700 }
701
702 return true;
703 }
704
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)705 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
706 const LocationRecorder& root_location) {
707 if (TryConsumeEndOfDeclaration(";", NULL)) {
708 // empty statement; ignore
709 return true;
710 } else if (LookingAt("message")) {
711 LocationRecorder location(root_location,
712 FileDescriptorProto::kMessageTypeFieldNumber,
713 file->message_type_size());
714 return ParseMessageDefinition(file->add_message_type(), location, file);
715 } else if (LookingAt("enum")) {
716 LocationRecorder location(root_location,
717 FileDescriptorProto::kEnumTypeFieldNumber,
718 file->enum_type_size());
719 return ParseEnumDefinition(file->add_enum_type(), location, file);
720 } else if (LookingAt("service")) {
721 LocationRecorder location(root_location,
722 FileDescriptorProto::kServiceFieldNumber,
723 file->service_size());
724 return ParseServiceDefinition(file->add_service(), location, file);
725 } else if (LookingAt("extend")) {
726 LocationRecorder location(root_location,
727 FileDescriptorProto::kExtensionFieldNumber);
728 return ParseExtend(
729 file->mutable_extension(), file->mutable_message_type(), root_location,
730 FileDescriptorProto::kMessageTypeFieldNumber, location, file);
731 } else if (LookingAt("import")) {
732 return ParseImport(file->mutable_dependency(),
733 file->mutable_public_dependency(),
734 file->mutable_weak_dependency(), root_location, file);
735 } else if (LookingAt("package")) {
736 return ParsePackage(file, root_location, file);
737 } else if (LookingAt("option")) {
738 LocationRecorder location(root_location,
739 FileDescriptorProto::kOptionsFieldNumber);
740 return ParseOption(file->mutable_options(), location, file,
741 OPTION_STATEMENT);
742 } else {
743 AddError("Expected top-level statement (e.g. \"message\").");
744 return false;
745 }
746 }
747
748 // -------------------------------------------------------------------
749 // Messages
750
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)751 bool Parser::ParseMessageDefinition(
752 DescriptorProto* message, const LocationRecorder& message_location,
753 const FileDescriptorProto* containing_file) {
754 DO(Consume("message"));
755 {
756 LocationRecorder location(message_location,
757 DescriptorProto::kNameFieldNumber);
758 location.RecordLegacyLocation(message,
759 DescriptorPool::ErrorCollector::NAME);
760 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
761 if (!IsUpperCamelCase(message->name())) {
762 AddWarning(
763 "Message name should be in UpperCamelCase. Found: " +
764 message->name() +
765 ". See https://developers.google.com/protocol-buffers/docs/style");
766 }
767 }
768 DO(ParseMessageBlock(message, message_location, containing_file));
769 return true;
770 }
771
772 namespace {
773
774 const int kMaxRangeSentinel = -1;
775
IsMessageSetWireFormatMessage(const DescriptorProto & message)776 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
777 const MessageOptions& options = message.options();
778 for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
779 const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
780 if (uninterpreted.name_size() == 1 &&
781 uninterpreted.name(0).name_part() == "message_set_wire_format" &&
782 uninterpreted.identifier_value() == "true") {
783 return true;
784 }
785 }
786 return false;
787 }
788
789 // Modifies any extension ranges that specified 'max' as the end of the
790 // extension range, and sets them to the type-specific maximum. The actual max
791 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)792 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
793 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
794 const int max_extension_number =
795 is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
796 for (int i = 0; i < message->extension_range_size(); ++i) {
797 if (message->extension_range(i).end() == kMaxRangeSentinel) {
798 message->mutable_extension_range(i)->set_end(max_extension_number);
799 }
800 }
801 }
802
803 // Modifies any reserved ranges that specified 'max' as the end of the
804 // reserved range, and sets them to the type-specific maximum. The actual max
805 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)806 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
807 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
808 const int max_field_number =
809 is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
810 for (int i = 0; i < message->reserved_range_size(); ++i) {
811 if (message->reserved_range(i).end() == kMaxRangeSentinel) {
812 message->mutable_reserved_range(i)->set_end(max_field_number);
813 }
814 }
815 }
816
817 } // namespace
818
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)819 bool Parser::ParseMessageBlock(DescriptorProto* message,
820 const LocationRecorder& message_location,
821 const FileDescriptorProto* containing_file) {
822 DO(ConsumeEndOfDeclaration("{", &message_location));
823
824 while (!TryConsumeEndOfDeclaration("}", NULL)) {
825 if (AtEnd()) {
826 AddError("Reached end of input in message definition (missing '}').");
827 return false;
828 }
829
830 if (!ParseMessageStatement(message, message_location, containing_file)) {
831 // This statement failed to parse. Skip it, but keep looping to parse
832 // other statements.
833 SkipStatement();
834 }
835 }
836
837 if (message->extension_range_size() > 0) {
838 AdjustExtensionRangesWithMaxEndNumber(message);
839 }
840 if (message->reserved_range_size() > 0) {
841 AdjustReservedRangesWithMaxEndNumber(message);
842 }
843 return true;
844 }
845
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)846 bool Parser::ParseMessageStatement(DescriptorProto* message,
847 const LocationRecorder& message_location,
848 const FileDescriptorProto* containing_file) {
849 if (TryConsumeEndOfDeclaration(";", NULL)) {
850 // empty statement; ignore
851 return true;
852 } else if (LookingAt("message")) {
853 LocationRecorder location(message_location,
854 DescriptorProto::kNestedTypeFieldNumber,
855 message->nested_type_size());
856 return ParseMessageDefinition(message->add_nested_type(), location,
857 containing_file);
858 } else if (LookingAt("enum")) {
859 LocationRecorder location(message_location,
860 DescriptorProto::kEnumTypeFieldNumber,
861 message->enum_type_size());
862 return ParseEnumDefinition(message->add_enum_type(), location,
863 containing_file);
864 } else if (LookingAt("extensions")) {
865 LocationRecorder location(message_location,
866 DescriptorProto::kExtensionRangeFieldNumber);
867 return ParseExtensions(message, location, containing_file);
868 } else if (LookingAt("reserved")) {
869 return ParseReserved(message, message_location);
870 } else if (LookingAt("extend")) {
871 LocationRecorder location(message_location,
872 DescriptorProto::kExtensionFieldNumber);
873 return ParseExtend(message->mutable_extension(),
874 message->mutable_nested_type(), message_location,
875 DescriptorProto::kNestedTypeFieldNumber, location,
876 containing_file);
877 } else if (LookingAt("option")) {
878 LocationRecorder location(message_location,
879 DescriptorProto::kOptionsFieldNumber);
880 return ParseOption(message->mutable_options(), location, containing_file,
881 OPTION_STATEMENT);
882 } else if (LookingAt("oneof")) {
883 int oneof_index = message->oneof_decl_size();
884 LocationRecorder oneof_location(
885 message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
886
887 return ParseOneof(message->add_oneof_decl(), message, oneof_index,
888 oneof_location, message_location, containing_file);
889 } else {
890 LocationRecorder location(message_location,
891 DescriptorProto::kFieldFieldNumber,
892 message->field_size());
893 return ParseMessageField(
894 message->add_field(), message->mutable_nested_type(), message_location,
895 DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
896 }
897 }
898
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)899 bool Parser::ParseMessageField(FieldDescriptorProto* field,
900 RepeatedPtrField<DescriptorProto>* messages,
901 const LocationRecorder& parent_location,
902 int location_field_number_for_nested_type,
903 const LocationRecorder& field_location,
904 const FileDescriptorProto* containing_file) {
905 {
906 LocationRecorder location(field_location,
907 FieldDescriptorProto::kLabelFieldNumber);
908 FieldDescriptorProto::Label label;
909 if (ParseLabel(&label, containing_file)) {
910 field->set_label(label);
911 if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
912 syntax_identifier_ == "proto3") {
913 AddError(
914 "Explicit 'optional' labels are disallowed in the Proto3 syntax. "
915 "To define 'optional' fields in Proto3, simply remove the "
916 "'optional' label, as fields are 'optional' by default.");
917 }
918 }
919 }
920
921 return ParseMessageFieldNoLabel(field, messages, parent_location,
922 location_field_number_for_nested_type,
923 field_location, containing_file);
924 }
925
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)926 bool Parser::ParseMessageFieldNoLabel(
927 FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
928 const LocationRecorder& parent_location,
929 int location_field_number_for_nested_type,
930 const LocationRecorder& field_location,
931 const FileDescriptorProto* containing_file) {
932 MapField map_field;
933 // Parse type.
934 {
935 LocationRecorder location(field_location); // add path later
936 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
937
938 bool type_parsed = false;
939 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
940 std::string type_name;
941
942 // Special case map field. We only treat the field as a map field if the
943 // field type name starts with the word "map" with a following "<".
944 if (TryConsume("map")) {
945 if (LookingAt("<")) {
946 map_field.is_map_field = true;
947 } else {
948 // False positive
949 type_parsed = true;
950 type_name = "map";
951 }
952 }
953 if (map_field.is_map_field) {
954 if (field->has_oneof_index()) {
955 AddError("Map fields are not allowed in oneofs.");
956 return false;
957 }
958 if (field->has_label()) {
959 AddError(
960 "Field labels (required/optional/repeated) are not allowed on "
961 "map fields.");
962 return false;
963 }
964 if (field->has_extendee()) {
965 AddError("Map fields are not allowed to be extensions.");
966 return false;
967 }
968 field->set_label(FieldDescriptorProto::LABEL_REPEATED);
969 DO(Consume("<"));
970 DO(ParseType(&map_field.key_type, &map_field.key_type_name));
971 DO(Consume(","));
972 DO(ParseType(&map_field.value_type, &map_field.value_type_name));
973 DO(Consume(">"));
974 // Defer setting of the type name of the map field until the
975 // field name is parsed. Add the source location though.
976 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
977 } else {
978 // Handle the case where no explicit label is given for a non-map field.
979 if (!field->has_label() && DefaultToOptionalFields()) {
980 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
981 }
982 if (!field->has_label()) {
983 AddError("Expected \"required\", \"optional\", or \"repeated\".");
984 // We can actually reasonably recover here by just assuming the user
985 // forgot the label altogether.
986 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
987 }
988
989 // Handle the case where the actual type is a message or enum named "map",
990 // which we already consumed in the code above.
991 if (!type_parsed) {
992 DO(ParseType(&type, &type_name));
993 }
994 if (type_name.empty()) {
995 location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
996 field->set_type(type);
997 } else {
998 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
999 field->set_type_name(type_name);
1000 }
1001 }
1002 }
1003
1004 // Parse name and '='.
1005 io::Tokenizer::Token name_token = input_->current();
1006 {
1007 LocationRecorder location(field_location,
1008 FieldDescriptorProto::kNameFieldNumber);
1009 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1010 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1011
1012 if (!IsLowerUnderscore(field->name())) {
1013 AddWarning(
1014 "Field name should be lowercase. Found: " + field->name() +
1015 ". See: https://developers.google.com/protocol-buffers/docs/style");
1016 }
1017 if (IsNumberFollowUnderscore(field->name())) {
1018 AddWarning(
1019 "Number should not come right after an underscore. Found: " +
1020 field->name() +
1021 ". See: https://developers.google.com/protocol-buffers/docs/style");
1022 }
1023 }
1024 DO(Consume("=", "Missing field number."));
1025
1026 // Parse field number.
1027 {
1028 LocationRecorder location(field_location,
1029 FieldDescriptorProto::kNumberFieldNumber);
1030 location.RecordLegacyLocation(field,
1031 DescriptorPool::ErrorCollector::NUMBER);
1032 int number;
1033 DO(ConsumeInteger(&number, "Expected field number."));
1034 field->set_number(number);
1035 }
1036
1037 // Parse options.
1038 DO(ParseFieldOptions(field, field_location, containing_file));
1039
1040 // Deal with groups.
1041 if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1042 // Awkward: Since a group declares both a message type and a field, we
1043 // have to create overlapping locations.
1044 LocationRecorder group_location(parent_location);
1045 group_location.StartAt(field_location);
1046 group_location.AddPath(location_field_number_for_nested_type);
1047 group_location.AddPath(messages->size());
1048
1049 DescriptorProto* group = messages->Add();
1050 group->set_name(field->name());
1051
1052 // Record name location to match the field name's location.
1053 {
1054 LocationRecorder location(group_location,
1055 DescriptorProto::kNameFieldNumber);
1056 location.StartAt(name_token);
1057 location.EndAt(name_token);
1058 location.RecordLegacyLocation(group,
1059 DescriptorPool::ErrorCollector::NAME);
1060 }
1061
1062 // The field's type_name also comes from the name. Confusing!
1063 {
1064 LocationRecorder location(field_location,
1065 FieldDescriptorProto::kTypeNameFieldNumber);
1066 location.StartAt(name_token);
1067 location.EndAt(name_token);
1068 }
1069
1070 // As a hack for backwards-compatibility, we force the group name to start
1071 // with a capital letter and lower-case the field name. New code should
1072 // not use groups; it should use nested messages.
1073 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1074 AddError(name_token.line, name_token.column,
1075 "Group names must start with a capital letter.");
1076 }
1077 LowerString(field->mutable_name());
1078
1079 field->set_type_name(group->name());
1080 if (LookingAt("{")) {
1081 DO(ParseMessageBlock(group, group_location, containing_file));
1082 } else {
1083 AddError("Missing group body.");
1084 return false;
1085 }
1086 } else {
1087 DO(ConsumeEndOfDeclaration(";", &field_location));
1088 }
1089
1090 // Create a map entry type if this is a map field.
1091 if (map_field.is_map_field) {
1092 GenerateMapEntry(map_field, field, messages);
1093 }
1094
1095 return true;
1096 }
1097
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1098 void Parser::GenerateMapEntry(const MapField& map_field,
1099 FieldDescriptorProto* field,
1100 RepeatedPtrField<DescriptorProto>* messages) {
1101 DescriptorProto* entry = messages->Add();
1102 std::string entry_name = MapEntryName(field->name());
1103 field->set_type_name(entry_name);
1104 entry->set_name(entry_name);
1105 entry->mutable_options()->set_map_entry(true);
1106 FieldDescriptorProto* key_field = entry->add_field();
1107 key_field->set_name("key");
1108 key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1109 key_field->set_number(1);
1110 if (map_field.key_type_name.empty()) {
1111 key_field->set_type(map_field.key_type);
1112 } else {
1113 key_field->set_type_name(map_field.key_type_name);
1114 }
1115 FieldDescriptorProto* value_field = entry->add_field();
1116 value_field->set_name("value");
1117 value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1118 value_field->set_number(2);
1119 if (map_field.value_type_name.empty()) {
1120 value_field->set_type(map_field.value_type);
1121 } else {
1122 value_field->set_type_name(map_field.value_type_name);
1123 }
1124 // Propagate the "enforce_utf8" option to key and value fields if they
1125 // are strings. This helps simplify the implementation of code generators
1126 // and also reflection-based parsing code.
1127 //
1128 // The following definition:
1129 // message Foo {
1130 // map<string, string> value = 1 [enforce_utf8 = false];
1131 // }
1132 // will be interpreted as:
1133 // message Foo {
1134 // message ValueEntry {
1135 // option map_entry = true;
1136 // string key = 1 [enforce_utf8 = false];
1137 // string value = 2 [enforce_utf8 = false];
1138 // }
1139 // repeated ValueEntry value = 1 [enforce_utf8 = false];
1140 // }
1141 //
1142 // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1143 // from protocol compiler.
1144 for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1145 const UninterpretedOption& option =
1146 field->options().uninterpreted_option(i);
1147 if (option.name_size() == 1 &&
1148 option.name(0).name_part() == "enforce_utf8" &&
1149 !option.name(0).is_extension()) {
1150 if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1151 key_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1152 option);
1153 }
1154 if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1155 value_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1156 option);
1157 }
1158 }
1159 }
1160 }
1161
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1162 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1163 const LocationRecorder& field_location,
1164 const FileDescriptorProto* containing_file) {
1165 if (!LookingAt("[")) return true;
1166
1167 LocationRecorder location(field_location,
1168 FieldDescriptorProto::kOptionsFieldNumber);
1169
1170 DO(Consume("["));
1171
1172 // Parse field options.
1173 do {
1174 if (LookingAt("default")) {
1175 // We intentionally pass field_location rather than location here, since
1176 // the default value is not actually an option.
1177 DO(ParseDefaultAssignment(field, field_location, containing_file));
1178 } else if (LookingAt("json_name")) {
1179 // Like default value, this "json_name" is not an actual option.
1180 DO(ParseJsonName(field, field_location, containing_file));
1181 } else {
1182 DO(ParseOption(field->mutable_options(), location, containing_file,
1183 OPTION_ASSIGNMENT));
1184 }
1185 } while (TryConsume(","));
1186
1187 DO(Consume("]"));
1188 return true;
1189 }
1190
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1191 bool Parser::ParseDefaultAssignment(
1192 FieldDescriptorProto* field, const LocationRecorder& field_location,
1193 const FileDescriptorProto* containing_file) {
1194 if (field->has_default_value()) {
1195 AddError("Already set option \"default\".");
1196 field->clear_default_value();
1197 }
1198
1199 DO(Consume("default"));
1200 DO(Consume("="));
1201
1202 LocationRecorder location(field_location,
1203 FieldDescriptorProto::kDefaultValueFieldNumber);
1204 location.RecordLegacyLocation(field,
1205 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1206 std::string* default_value = field->mutable_default_value();
1207
1208 if (!field->has_type()) {
1209 // The field has a type name, but we don't know if it is a message or an
1210 // enum yet. (If it were a primitive type, |field| would have a type set
1211 // already.) In this case, simply take the current string as the default
1212 // value; we will catch the error later if it is not a valid enum value.
1213 // (N.B. that we do not check whether the current token is an identifier:
1214 // doing so throws strange errors when the user mistypes a primitive
1215 // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1216 // = 42]". In such a case the fundamental error is really that "int" is not
1217 // a type, not that "42" is not an identifier. See b/12533582.)
1218 *default_value = input_->current().text;
1219 input_->Next();
1220 return true;
1221 }
1222
1223 switch (field->type()) {
1224 case FieldDescriptorProto::TYPE_INT32:
1225 case FieldDescriptorProto::TYPE_INT64:
1226 case FieldDescriptorProto::TYPE_SINT32:
1227 case FieldDescriptorProto::TYPE_SINT64:
1228 case FieldDescriptorProto::TYPE_SFIXED32:
1229 case FieldDescriptorProto::TYPE_SFIXED64: {
1230 uint64 max_value = kint64max;
1231 if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1232 field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1233 field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1234 max_value = kint32max;
1235 }
1236
1237 // These types can be negative.
1238 if (TryConsume("-")) {
1239 default_value->append("-");
1240 // Two's complement always has one more negative value than positive.
1241 ++max_value;
1242 }
1243 // Parse the integer to verify that it is not out-of-range.
1244 uint64 value;
1245 DO(ConsumeInteger64(max_value, &value,
1246 "Expected integer for field default value."));
1247 // And stringify it again.
1248 default_value->append(StrCat(value));
1249 break;
1250 }
1251
1252 case FieldDescriptorProto::TYPE_UINT32:
1253 case FieldDescriptorProto::TYPE_UINT64:
1254 case FieldDescriptorProto::TYPE_FIXED32:
1255 case FieldDescriptorProto::TYPE_FIXED64: {
1256 uint64 max_value = kuint64max;
1257 if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1258 field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1259 max_value = kuint32max;
1260 }
1261
1262 // Numeric, not negative.
1263 if (TryConsume("-")) {
1264 AddError("Unsigned field can't have negative default value.");
1265 }
1266 // Parse the integer to verify that it is not out-of-range.
1267 uint64 value;
1268 DO(ConsumeInteger64(max_value, &value,
1269 "Expected integer for field default value."));
1270 // And stringify it again.
1271 default_value->append(StrCat(value));
1272 break;
1273 }
1274
1275 case FieldDescriptorProto::TYPE_FLOAT:
1276 case FieldDescriptorProto::TYPE_DOUBLE:
1277 // These types can be negative.
1278 if (TryConsume("-")) {
1279 default_value->append("-");
1280 }
1281 // Parse the integer because we have to convert hex integers to decimal
1282 // floats.
1283 double value;
1284 DO(ConsumeNumber(&value, "Expected number."));
1285 // And stringify it again.
1286 default_value->append(SimpleDtoa(value));
1287 break;
1288
1289 case FieldDescriptorProto::TYPE_BOOL:
1290 if (TryConsume("true")) {
1291 default_value->assign("true");
1292 } else if (TryConsume("false")) {
1293 default_value->assign("false");
1294 } else {
1295 AddError("Expected \"true\" or \"false\".");
1296 return false;
1297 }
1298 break;
1299
1300 case FieldDescriptorProto::TYPE_STRING:
1301 // Note: When file opton java_string_check_utf8 is true, if a
1302 // non-string representation (eg byte[]) is later supported, it must
1303 // be checked for UTF-8-ness.
1304 DO(ConsumeString(default_value,
1305 "Expected string for field default "
1306 "value."));
1307 break;
1308
1309 case FieldDescriptorProto::TYPE_BYTES:
1310 DO(ConsumeString(default_value, "Expected string."));
1311 *default_value = CEscape(*default_value);
1312 break;
1313
1314 case FieldDescriptorProto::TYPE_ENUM:
1315 DO(ConsumeIdentifier(default_value,
1316 "Expected enum identifier for field "
1317 "default value."));
1318 break;
1319
1320 case FieldDescriptorProto::TYPE_MESSAGE:
1321 case FieldDescriptorProto::TYPE_GROUP:
1322 AddError("Messages can't have default values.");
1323 return false;
1324 }
1325
1326 return true;
1327 }
1328
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1329 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1330 const LocationRecorder& field_location,
1331 const FileDescriptorProto* containing_file) {
1332 if (field->has_json_name()) {
1333 AddError("Already set option \"json_name\".");
1334 field->clear_json_name();
1335 }
1336
1337 LocationRecorder location(field_location,
1338 FieldDescriptorProto::kJsonNameFieldNumber);
1339 location.RecordLegacyLocation(field,
1340 DescriptorPool::ErrorCollector::OPTION_NAME);
1341
1342 DO(Consume("json_name"));
1343 DO(Consume("="));
1344
1345 LocationRecorder value_location(location);
1346 value_location.RecordLegacyLocation(
1347 field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1348
1349 DO(ConsumeString(field->mutable_json_name(),
1350 "Expected string for JSON name."));
1351 return true;
1352 }
1353
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1354 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1355 const LocationRecorder& part_location,
1356 const FileDescriptorProto* containing_file) {
1357 UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1358 std::string identifier; // We parse identifiers into this string.
1359 if (LookingAt("(")) { // This is an extension.
1360 DO(Consume("("));
1361
1362 {
1363 LocationRecorder location(
1364 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1365 // An extension name consists of dot-separated identifiers, and may begin
1366 // with a dot.
1367 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1368 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1369 name->mutable_name_part()->append(identifier);
1370 }
1371 while (LookingAt(".")) {
1372 DO(Consume("."));
1373 name->mutable_name_part()->append(".");
1374 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1375 name->mutable_name_part()->append(identifier);
1376 }
1377 }
1378
1379 DO(Consume(")"));
1380 name->set_is_extension(true);
1381 } else { // This is a regular field.
1382 LocationRecorder location(
1383 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1384 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1385 name->mutable_name_part()->append(identifier);
1386 name->set_is_extension(false);
1387 }
1388 return true;
1389 }
1390
ParseUninterpretedBlock(std::string * value)1391 bool Parser::ParseUninterpretedBlock(std::string* value) {
1392 // Note that enclosing braces are not added to *value.
1393 // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1394 // an expression, not a block of statements.
1395 DO(Consume("{"));
1396 int brace_depth = 1;
1397 while (!AtEnd()) {
1398 if (LookingAt("{")) {
1399 brace_depth++;
1400 } else if (LookingAt("}")) {
1401 brace_depth--;
1402 if (brace_depth == 0) {
1403 input_->Next();
1404 return true;
1405 }
1406 }
1407 // TODO(sanjay): Interpret line/column numbers to preserve formatting
1408 if (!value->empty()) value->push_back(' ');
1409 value->append(input_->current().text);
1410 input_->Next();
1411 }
1412 AddError("Unexpected end of stream while parsing aggregate value.");
1413 return false;
1414 }
1415
1416 // We don't interpret the option here. Instead we store it in an
1417 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1418 bool Parser::ParseOption(Message* options,
1419 const LocationRecorder& options_location,
1420 const FileDescriptorProto* containing_file,
1421 OptionStyle style) {
1422 // Create an entry in the uninterpreted_option field.
1423 const FieldDescriptor* uninterpreted_option_field =
1424 options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1425 GOOGLE_CHECK(uninterpreted_option_field != NULL)
1426 << "No field named \"uninterpreted_option\" in the Options proto.";
1427
1428 const Reflection* reflection = options->GetReflection();
1429
1430 LocationRecorder location(
1431 options_location, uninterpreted_option_field->number(),
1432 reflection->FieldSize(*options, uninterpreted_option_field));
1433
1434 if (style == OPTION_STATEMENT) {
1435 DO(Consume("option"));
1436 }
1437
1438 UninterpretedOption* uninterpreted_option =
1439 down_cast<UninterpretedOption*>(options->GetReflection()->AddMessage(
1440 options, uninterpreted_option_field));
1441
1442 // Parse dot-separated name.
1443 {
1444 LocationRecorder name_location(location,
1445 UninterpretedOption::kNameFieldNumber);
1446 name_location.RecordLegacyLocation(
1447 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1448
1449 {
1450 LocationRecorder part_location(name_location,
1451 uninterpreted_option->name_size());
1452 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1453 containing_file));
1454 }
1455
1456 while (LookingAt(".")) {
1457 DO(Consume("."));
1458 LocationRecorder part_location(name_location,
1459 uninterpreted_option->name_size());
1460 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1461 containing_file));
1462 }
1463 }
1464
1465 DO(Consume("="));
1466
1467 {
1468 LocationRecorder value_location(location);
1469 value_location.RecordLegacyLocation(
1470 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1471
1472 // All values are a single token, except for negative numbers, which consist
1473 // of a single '-' symbol, followed by a positive number.
1474 bool is_negative = TryConsume("-");
1475
1476 switch (input_->current().type) {
1477 case io::Tokenizer::TYPE_START:
1478 GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1479 return false;
1480
1481 case io::Tokenizer::TYPE_END:
1482 AddError("Unexpected end of stream while parsing option value.");
1483 return false;
1484
1485 case io::Tokenizer::TYPE_IDENTIFIER: {
1486 value_location.AddPath(
1487 UninterpretedOption::kIdentifierValueFieldNumber);
1488 if (is_negative) {
1489 AddError("Invalid '-' symbol before identifier.");
1490 return false;
1491 }
1492 std::string value;
1493 DO(ConsumeIdentifier(&value, "Expected identifier."));
1494 uninterpreted_option->set_identifier_value(value);
1495 break;
1496 }
1497
1498 case io::Tokenizer::TYPE_INTEGER: {
1499 uint64 value;
1500 uint64 max_value =
1501 is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1502 DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1503 if (is_negative) {
1504 value_location.AddPath(
1505 UninterpretedOption::kNegativeIntValueFieldNumber);
1506 uninterpreted_option->set_negative_int_value(
1507 static_cast<int64>(-value));
1508 } else {
1509 value_location.AddPath(
1510 UninterpretedOption::kPositiveIntValueFieldNumber);
1511 uninterpreted_option->set_positive_int_value(value);
1512 }
1513 break;
1514 }
1515
1516 case io::Tokenizer::TYPE_FLOAT: {
1517 value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1518 double value;
1519 DO(ConsumeNumber(&value, "Expected number."));
1520 uninterpreted_option->set_double_value(is_negative ? -value : value);
1521 break;
1522 }
1523
1524 case io::Tokenizer::TYPE_STRING: {
1525 value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1526 if (is_negative) {
1527 AddError("Invalid '-' symbol before string.");
1528 return false;
1529 }
1530 std::string value;
1531 DO(ConsumeString(&value, "Expected string."));
1532 uninterpreted_option->set_string_value(value);
1533 break;
1534 }
1535
1536 case io::Tokenizer::TYPE_SYMBOL:
1537 if (LookingAt("{")) {
1538 value_location.AddPath(
1539 UninterpretedOption::kAggregateValueFieldNumber);
1540 DO(ParseUninterpretedBlock(
1541 uninterpreted_option->mutable_aggregate_value()));
1542 } else {
1543 AddError("Expected option value.");
1544 return false;
1545 }
1546 break;
1547 }
1548 }
1549
1550 if (style == OPTION_STATEMENT) {
1551 DO(ConsumeEndOfDeclaration(";", &location));
1552 }
1553
1554 return true;
1555 }
1556
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1557 bool Parser::ParseExtensions(DescriptorProto* message,
1558 const LocationRecorder& extensions_location,
1559 const FileDescriptorProto* containing_file) {
1560 // Parse the declaration.
1561 DO(Consume("extensions"));
1562
1563 int old_range_size = message->extension_range_size();
1564
1565 do {
1566 // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1567 LocationRecorder location(extensions_location,
1568 message->extension_range_size());
1569
1570 DescriptorProto::ExtensionRange* range = message->add_extension_range();
1571 location.RecordLegacyLocation(range,
1572 DescriptorPool::ErrorCollector::NUMBER);
1573
1574 int start, end;
1575 io::Tokenizer::Token start_token;
1576
1577 {
1578 LocationRecorder start_location(
1579 location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1580 start_token = input_->current();
1581 DO(ConsumeInteger(&start, "Expected field number range."));
1582 }
1583
1584 if (TryConsume("to")) {
1585 LocationRecorder end_location(
1586 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1587 if (TryConsume("max")) {
1588 // Set to the sentinel value - 1 since we increment the value below.
1589 // The actual value of the end of the range should be set with
1590 // AdjustExtensionRangesWithMaxEndNumber.
1591 end = kMaxRangeSentinel - 1;
1592 } else {
1593 DO(ConsumeInteger(&end, "Expected integer."));
1594 }
1595 } else {
1596 LocationRecorder end_location(
1597 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1598 end_location.StartAt(start_token);
1599 end_location.EndAt(start_token);
1600 end = start;
1601 }
1602
1603 // Users like to specify inclusive ranges, but in code we like the end
1604 // number to be exclusive.
1605 ++end;
1606
1607 range->set_start(start);
1608 range->set_end(end);
1609 } while (TryConsume(","));
1610
1611 if (LookingAt("[")) {
1612 int range_number_index = extensions_location.CurrentPathSize();
1613 SourceCodeInfo info;
1614
1615 // Parse extension range options in the first range.
1616 ExtensionRangeOptions* options =
1617 message->mutable_extension_range(old_range_size)->mutable_options();
1618
1619 {
1620 LocationRecorder index_location(
1621 extensions_location, 0 /* we fill this in w/ actual index below */,
1622 &info);
1623 LocationRecorder location(
1624 index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1625 DO(Consume("["));
1626
1627 do {
1628 DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1629 } while (TryConsume(","));
1630
1631 DO(Consume("]"));
1632 }
1633
1634 // Then copy the extension range options to all of the other ranges we've
1635 // parsed.
1636 for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1637 message->mutable_extension_range(i)->mutable_options()->CopyFrom(
1638 *options);
1639 }
1640 // and copy source locations to the other ranges, too
1641 for (int i = old_range_size; i < message->extension_range_size(); i++) {
1642 for (int j = 0; j < info.location_size(); j++) {
1643 if (info.location(j).path_size() == range_number_index + 1) {
1644 // this location's path is up to the extension range index, but
1645 // doesn't include options; so it's redundant with location above
1646 continue;
1647 }
1648 SourceCodeInfo_Location* dest = source_code_info_->add_location();
1649 *dest = info.location(j);
1650 dest->set_path(range_number_index, i);
1651 }
1652 }
1653 }
1654
1655 DO(ConsumeEndOfDeclaration(";", &extensions_location));
1656 return true;
1657 }
1658
1659 // This is similar to extension range parsing, except that it accepts field
1660 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1661 bool Parser::ParseReserved(DescriptorProto* message,
1662 const LocationRecorder& message_location) {
1663 io::Tokenizer::Token start_token = input_->current();
1664 // Parse the declaration.
1665 DO(Consume("reserved"));
1666 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1667 LocationRecorder location(message_location,
1668 DescriptorProto::kReservedNameFieldNumber);
1669 location.StartAt(start_token);
1670 return ParseReservedNames(message, location);
1671 } else {
1672 LocationRecorder location(message_location,
1673 DescriptorProto::kReservedRangeFieldNumber);
1674 location.StartAt(start_token);
1675 return ParseReservedNumbers(message, location);
1676 }
1677 }
1678
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1679 bool Parser::ParseReservedNames(DescriptorProto* message,
1680 const LocationRecorder& parent_location) {
1681 do {
1682 LocationRecorder location(parent_location, message->reserved_name_size());
1683 DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1684 } while (TryConsume(","));
1685 DO(ConsumeEndOfDeclaration(";", &parent_location));
1686 return true;
1687 }
1688
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1689 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1690 const LocationRecorder& parent_location) {
1691 bool first = true;
1692 do {
1693 LocationRecorder location(parent_location, message->reserved_range_size());
1694
1695 DescriptorProto::ReservedRange* range = message->add_reserved_range();
1696 int start, end;
1697 io::Tokenizer::Token start_token;
1698 {
1699 LocationRecorder start_location(
1700 location, DescriptorProto::ReservedRange::kStartFieldNumber);
1701 start_token = input_->current();
1702 DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1703 : "Expected field number range.")));
1704 }
1705
1706 if (TryConsume("to")) {
1707 LocationRecorder end_location(
1708 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1709 if (TryConsume("max")) {
1710 // Set to the sentinel value - 1 since we increment the value below.
1711 // The actual value of the end of the range should be set with
1712 // AdjustExtensionRangesWithMaxEndNumber.
1713 end = kMaxRangeSentinel - 1;
1714 } else {
1715 DO(ConsumeInteger(&end, "Expected integer."));
1716 }
1717 } else {
1718 LocationRecorder end_location(
1719 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1720 end_location.StartAt(start_token);
1721 end_location.EndAt(start_token);
1722 end = start;
1723 }
1724
1725 // Users like to specify inclusive ranges, but in code we like the end
1726 // number to be exclusive.
1727 ++end;
1728
1729 range->set_start(start);
1730 range->set_end(end);
1731 first = false;
1732 } while (TryConsume(","));
1733
1734 DO(ConsumeEndOfDeclaration(";", &parent_location));
1735 return true;
1736 }
1737
ParseReserved(EnumDescriptorProto * message,const LocationRecorder & message_location)1738 bool Parser::ParseReserved(EnumDescriptorProto* message,
1739 const LocationRecorder& message_location) {
1740 io::Tokenizer::Token start_token = input_->current();
1741 // Parse the declaration.
1742 DO(Consume("reserved"));
1743 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1744 LocationRecorder location(message_location,
1745 DescriptorProto::kReservedNameFieldNumber);
1746 location.StartAt(start_token);
1747 return ParseReservedNames(message, location);
1748 } else {
1749 LocationRecorder location(message_location,
1750 DescriptorProto::kReservedRangeFieldNumber);
1751 location.StartAt(start_token);
1752 return ParseReservedNumbers(message, location);
1753 }
1754 }
1755
ParseReservedNames(EnumDescriptorProto * message,const LocationRecorder & parent_location)1756 bool Parser::ParseReservedNames(EnumDescriptorProto* message,
1757 const LocationRecorder& parent_location) {
1758 do {
1759 LocationRecorder location(parent_location, message->reserved_name_size());
1760 DO(ConsumeString(message->add_reserved_name(), "Expected enum value."));
1761 } while (TryConsume(","));
1762 DO(ConsumeEndOfDeclaration(";", &parent_location));
1763 return true;
1764 }
1765
ParseReservedNumbers(EnumDescriptorProto * message,const LocationRecorder & parent_location)1766 bool Parser::ParseReservedNumbers(EnumDescriptorProto* message,
1767 const LocationRecorder& parent_location) {
1768 bool first = true;
1769 do {
1770 LocationRecorder location(parent_location, message->reserved_range_size());
1771
1772 EnumDescriptorProto::EnumReservedRange* range =
1773 message->add_reserved_range();
1774 int start, end;
1775 io::Tokenizer::Token start_token;
1776 {
1777 LocationRecorder start_location(
1778 location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
1779 start_token = input_->current();
1780 DO(ConsumeSignedInteger(&start,
1781 (first ? "Expected enum value or number range."
1782 : "Expected enum number range.")));
1783 }
1784
1785 if (TryConsume("to")) {
1786 LocationRecorder end_location(
1787 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1788 if (TryConsume("max")) {
1789 // This is in the enum descriptor path, which doesn't have the message
1790 // set duality to fix up, so it doesn't integrate with the sentinel.
1791 end = INT_MAX;
1792 } else {
1793 DO(ConsumeSignedInteger(&end, "Expected integer."));
1794 }
1795 } else {
1796 LocationRecorder end_location(
1797 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1798 end_location.StartAt(start_token);
1799 end_location.EndAt(start_token);
1800 end = start;
1801 }
1802
1803 range->set_start(start);
1804 range->set_end(end);
1805 first = false;
1806 } while (TryConsume(","));
1807
1808 DO(ConsumeEndOfDeclaration(";", &parent_location));
1809 return true;
1810 }
1811
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1812 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1813 RepeatedPtrField<DescriptorProto>* messages,
1814 const LocationRecorder& parent_location,
1815 int location_field_number_for_nested_type,
1816 const LocationRecorder& extend_location,
1817 const FileDescriptorProto* containing_file) {
1818 DO(Consume("extend"));
1819
1820 // Parse the extendee type.
1821 io::Tokenizer::Token extendee_start = input_->current();
1822 std::string extendee;
1823 DO(ParseUserDefinedType(&extendee));
1824 io::Tokenizer::Token extendee_end = input_->previous();
1825
1826 // Parse the block.
1827 DO(ConsumeEndOfDeclaration("{", &extend_location));
1828
1829 bool is_first = true;
1830
1831 do {
1832 if (AtEnd()) {
1833 AddError("Reached end of input in extend definition (missing '}').");
1834 return false;
1835 }
1836
1837 // Note that kExtensionFieldNumber was already pushed by the parent.
1838 LocationRecorder location(extend_location, extensions->size());
1839
1840 FieldDescriptorProto* field = extensions->Add();
1841
1842 {
1843 LocationRecorder extendee_location(
1844 location, FieldDescriptorProto::kExtendeeFieldNumber);
1845 extendee_location.StartAt(extendee_start);
1846 extendee_location.EndAt(extendee_end);
1847
1848 if (is_first) {
1849 extendee_location.RecordLegacyLocation(
1850 field, DescriptorPool::ErrorCollector::EXTENDEE);
1851 is_first = false;
1852 }
1853 }
1854
1855 field->set_extendee(extendee);
1856
1857 if (!ParseMessageField(field, messages, parent_location,
1858 location_field_number_for_nested_type, location,
1859 containing_file)) {
1860 // This statement failed to parse. Skip it, but keep looping to parse
1861 // other statements.
1862 SkipStatement();
1863 }
1864 } while (!TryConsumeEndOfDeclaration("}", NULL));
1865
1866 return true;
1867 }
1868
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1869 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1870 DescriptorProto* containing_type, int oneof_index,
1871 const LocationRecorder& oneof_location,
1872 const LocationRecorder& containing_type_location,
1873 const FileDescriptorProto* containing_file) {
1874 DO(Consume("oneof"));
1875
1876 {
1877 LocationRecorder name_location(oneof_location,
1878 OneofDescriptorProto::kNameFieldNumber);
1879 DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1880 }
1881
1882 DO(ConsumeEndOfDeclaration("{", &oneof_location));
1883
1884 do {
1885 if (AtEnd()) {
1886 AddError("Reached end of input in oneof definition (missing '}').");
1887 return false;
1888 }
1889
1890 if (LookingAt("option")) {
1891 LocationRecorder option_location(
1892 oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1893 if (!ParseOption(oneof_decl->mutable_options(), option_location,
1894 containing_file, OPTION_STATEMENT)) {
1895 return false;
1896 }
1897 continue;
1898 }
1899
1900 // Print a nice error if the user accidentally tries to place a label
1901 // on an individual member of a oneof.
1902 if (LookingAt("required") || LookingAt("optional") ||
1903 LookingAt("repeated")) {
1904 AddError(
1905 "Fields in oneofs must not have labels (required / optional "
1906 "/ repeated).");
1907 // We can continue parsing here because we understand what the user
1908 // meant. The error report will still make parsing fail overall.
1909 input_->Next();
1910 }
1911
1912 LocationRecorder field_location(containing_type_location,
1913 DescriptorProto::kFieldFieldNumber,
1914 containing_type->field_size());
1915
1916 FieldDescriptorProto* field = containing_type->add_field();
1917 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1918 field->set_oneof_index(oneof_index);
1919
1920 if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
1921 containing_type_location,
1922 DescriptorProto::kNestedTypeFieldNumber,
1923 field_location, containing_file)) {
1924 // This statement failed to parse. Skip it, but keep looping to parse
1925 // other statements.
1926 SkipStatement();
1927 }
1928 } while (!TryConsumeEndOfDeclaration("}", NULL));
1929
1930 return true;
1931 }
1932
1933 // -------------------------------------------------------------------
1934 // Enums
1935
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1936 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1937 const LocationRecorder& enum_location,
1938 const FileDescriptorProto* containing_file) {
1939 DO(Consume("enum"));
1940
1941 {
1942 LocationRecorder location(enum_location,
1943 EnumDescriptorProto::kNameFieldNumber);
1944 location.RecordLegacyLocation(enum_type,
1945 DescriptorPool::ErrorCollector::NAME);
1946 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1947 }
1948
1949 DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1950
1951 DO(ValidateEnum(enum_type));
1952
1953 return true;
1954 }
1955
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1956 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1957 const LocationRecorder& enum_location,
1958 const FileDescriptorProto* containing_file) {
1959 DO(ConsumeEndOfDeclaration("{", &enum_location));
1960
1961 while (!TryConsumeEndOfDeclaration("}", NULL)) {
1962 if (AtEnd()) {
1963 AddError("Reached end of input in enum definition (missing '}').");
1964 return false;
1965 }
1966
1967 if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
1968 // This statement failed to parse. Skip it, but keep looping to parse
1969 // other statements.
1970 SkipStatement();
1971 }
1972 }
1973
1974 return true;
1975 }
1976
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1977 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
1978 const LocationRecorder& enum_location,
1979 const FileDescriptorProto* containing_file) {
1980 if (TryConsumeEndOfDeclaration(";", NULL)) {
1981 // empty statement; ignore
1982 return true;
1983 } else if (LookingAt("option")) {
1984 LocationRecorder location(enum_location,
1985 EnumDescriptorProto::kOptionsFieldNumber);
1986 return ParseOption(enum_type->mutable_options(), location, containing_file,
1987 OPTION_STATEMENT);
1988 } else if (LookingAt("reserved")) {
1989 return ParseReserved(enum_type, enum_location);
1990 } else {
1991 LocationRecorder location(enum_location,
1992 EnumDescriptorProto::kValueFieldNumber,
1993 enum_type->value_size());
1994 return ParseEnumConstant(enum_type->add_value(), location, containing_file);
1995 }
1996 }
1997
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1998 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
1999 const LocationRecorder& enum_value_location,
2000 const FileDescriptorProto* containing_file) {
2001 // Parse name.
2002 {
2003 LocationRecorder location(enum_value_location,
2004 EnumValueDescriptorProto::kNameFieldNumber);
2005 location.RecordLegacyLocation(enum_value,
2006 DescriptorPool::ErrorCollector::NAME);
2007 DO(ConsumeIdentifier(enum_value->mutable_name(),
2008 "Expected enum constant name."));
2009 }
2010
2011 DO(Consume("=", "Missing numeric value for enum constant."));
2012
2013 // Parse value.
2014 {
2015 LocationRecorder location(enum_value_location,
2016 EnumValueDescriptorProto::kNumberFieldNumber);
2017 location.RecordLegacyLocation(enum_value,
2018 DescriptorPool::ErrorCollector::NUMBER);
2019
2020 int number;
2021 DO(ConsumeSignedInteger(&number, "Expected integer."));
2022 enum_value->set_number(number);
2023 }
2024
2025 DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2026 containing_file));
2027
2028 DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2029
2030 return true;
2031 }
2032
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2033 bool Parser::ParseEnumConstantOptions(
2034 EnumValueDescriptorProto* value,
2035 const LocationRecorder& enum_value_location,
2036 const FileDescriptorProto* containing_file) {
2037 if (!LookingAt("[")) return true;
2038
2039 LocationRecorder location(enum_value_location,
2040 EnumValueDescriptorProto::kOptionsFieldNumber);
2041
2042 DO(Consume("["));
2043
2044 do {
2045 DO(ParseOption(value->mutable_options(), location, containing_file,
2046 OPTION_ASSIGNMENT));
2047 } while (TryConsume(","));
2048
2049 DO(Consume("]"));
2050 return true;
2051 }
2052
2053 // -------------------------------------------------------------------
2054 // Services
2055
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2056 bool Parser::ParseServiceDefinition(
2057 ServiceDescriptorProto* service, const LocationRecorder& service_location,
2058 const FileDescriptorProto* containing_file) {
2059 DO(Consume("service"));
2060
2061 {
2062 LocationRecorder location(service_location,
2063 ServiceDescriptorProto::kNameFieldNumber);
2064 location.RecordLegacyLocation(service,
2065 DescriptorPool::ErrorCollector::NAME);
2066 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2067 }
2068
2069 DO(ParseServiceBlock(service, service_location, containing_file));
2070 return true;
2071 }
2072
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2073 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2074 const LocationRecorder& service_location,
2075 const FileDescriptorProto* containing_file) {
2076 DO(ConsumeEndOfDeclaration("{", &service_location));
2077
2078 while (!TryConsumeEndOfDeclaration("}", NULL)) {
2079 if (AtEnd()) {
2080 AddError("Reached end of input in service definition (missing '}').");
2081 return false;
2082 }
2083
2084 if (!ParseServiceStatement(service, service_location, containing_file)) {
2085 // This statement failed to parse. Skip it, but keep looping to parse
2086 // other statements.
2087 SkipStatement();
2088 }
2089 }
2090
2091 return true;
2092 }
2093
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2094 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2095 const LocationRecorder& service_location,
2096 const FileDescriptorProto* containing_file) {
2097 if (TryConsumeEndOfDeclaration(";", NULL)) {
2098 // empty statement; ignore
2099 return true;
2100 } else if (LookingAt("option")) {
2101 LocationRecorder location(service_location,
2102 ServiceDescriptorProto::kOptionsFieldNumber);
2103 return ParseOption(service->mutable_options(), location, containing_file,
2104 OPTION_STATEMENT);
2105 } else {
2106 LocationRecorder location(service_location,
2107 ServiceDescriptorProto::kMethodFieldNumber,
2108 service->method_size());
2109 return ParseServiceMethod(service->add_method(), location, containing_file);
2110 }
2111 }
2112
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2113 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2114 const LocationRecorder& method_location,
2115 const FileDescriptorProto* containing_file) {
2116 DO(Consume("rpc"));
2117
2118 {
2119 LocationRecorder location(method_location,
2120 MethodDescriptorProto::kNameFieldNumber);
2121 location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2122 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2123 }
2124
2125 // Parse input type.
2126 DO(Consume("("));
2127 {
2128 if (LookingAt("stream")) {
2129 LocationRecorder location(
2130 method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2131 location.RecordLegacyLocation(method,
2132 DescriptorPool::ErrorCollector::OTHER);
2133 method->set_client_streaming(true);
2134 DO(Consume("stream"));
2135
2136 }
2137 LocationRecorder location(method_location,
2138 MethodDescriptorProto::kInputTypeFieldNumber);
2139 location.RecordLegacyLocation(method,
2140 DescriptorPool::ErrorCollector::INPUT_TYPE);
2141 DO(ParseUserDefinedType(method->mutable_input_type()));
2142 }
2143 DO(Consume(")"));
2144
2145 // Parse output type.
2146 DO(Consume("returns"));
2147 DO(Consume("("));
2148 {
2149 if (LookingAt("stream")) {
2150 LocationRecorder location(
2151 method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2152 location.RecordLegacyLocation(method,
2153 DescriptorPool::ErrorCollector::OTHER);
2154 DO(Consume("stream"));
2155 method->set_server_streaming(true);
2156
2157 }
2158 LocationRecorder location(method_location,
2159 MethodDescriptorProto::kOutputTypeFieldNumber);
2160 location.RecordLegacyLocation(method,
2161 DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2162 DO(ParseUserDefinedType(method->mutable_output_type()));
2163 }
2164 DO(Consume(")"));
2165
2166 if (LookingAt("{")) {
2167 // Options!
2168 DO(ParseMethodOptions(method_location, containing_file,
2169 MethodDescriptorProto::kOptionsFieldNumber,
2170 method->mutable_options()));
2171 } else {
2172 DO(ConsumeEndOfDeclaration(";", &method_location));
2173 }
2174
2175 return true;
2176 }
2177
2178
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2179 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2180 const FileDescriptorProto* containing_file,
2181 const int optionsFieldNumber,
2182 Message* mutable_options) {
2183 // Options!
2184 ConsumeEndOfDeclaration("{", &parent_location);
2185 while (!TryConsumeEndOfDeclaration("}", NULL)) {
2186 if (AtEnd()) {
2187 AddError("Reached end of input in method options (missing '}').");
2188 return false;
2189 }
2190
2191 if (TryConsumeEndOfDeclaration(";", NULL)) {
2192 // empty statement; ignore
2193 } else {
2194 LocationRecorder location(parent_location, optionsFieldNumber);
2195 if (!ParseOption(mutable_options, location, containing_file,
2196 OPTION_STATEMENT)) {
2197 // This statement failed to parse. Skip it, but keep looping to
2198 // parse other statements.
2199 SkipStatement();
2200 }
2201 }
2202 }
2203
2204 return true;
2205 }
2206
2207 // -------------------------------------------------------------------
2208
ParseLabel(FieldDescriptorProto::Label * label,const FileDescriptorProto * containing_file)2209 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2210 const FileDescriptorProto* containing_file) {
2211 if (TryConsume("optional")) {
2212 *label = FieldDescriptorProto::LABEL_OPTIONAL;
2213 return true;
2214 } else if (TryConsume("repeated")) {
2215 *label = FieldDescriptorProto::LABEL_REPEATED;
2216 return true;
2217 } else if (TryConsume("required")) {
2218 *label = FieldDescriptorProto::LABEL_REQUIRED;
2219 return true;
2220 }
2221 return false;
2222 }
2223
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2224 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2225 std::string* type_name) {
2226 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2227 if (iter != kTypeNames.end()) {
2228 *type = iter->second;
2229 input_->Next();
2230 } else {
2231 DO(ParseUserDefinedType(type_name));
2232 }
2233 return true;
2234 }
2235
ParseUserDefinedType(std::string * type_name)2236 bool Parser::ParseUserDefinedType(std::string* type_name) {
2237 type_name->clear();
2238
2239 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2240 if (iter != kTypeNames.end()) {
2241 // Note: The only place enum types are allowed is for field types, but
2242 // if we are parsing a field type then we would not get here because
2243 // primitives are allowed there as well. So this error message doesn't
2244 // need to account for enums.
2245 AddError("Expected message type.");
2246
2247 // Pretend to accept this type so that we can go on parsing.
2248 *type_name = input_->current().text;
2249 input_->Next();
2250 return true;
2251 }
2252
2253 // A leading "." means the name is fully-qualified.
2254 if (TryConsume(".")) type_name->append(".");
2255
2256 // Consume the first part of the name.
2257 std::string identifier;
2258 DO(ConsumeIdentifier(&identifier, "Expected type name."));
2259 type_name->append(identifier);
2260
2261 // Consume more parts.
2262 while (TryConsume(".")) {
2263 type_name->append(".");
2264 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2265 type_name->append(identifier);
2266 }
2267
2268 return true;
2269 }
2270
2271 // ===================================================================
2272
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2273 bool Parser::ParsePackage(FileDescriptorProto* file,
2274 const LocationRecorder& root_location,
2275 const FileDescriptorProto* containing_file) {
2276 if (file->has_package()) {
2277 AddError("Multiple package definitions.");
2278 // Don't append the new package to the old one. Just replace it. Not
2279 // that it really matters since this is an error anyway.
2280 file->clear_package();
2281 }
2282
2283 LocationRecorder location(root_location,
2284 FileDescriptorProto::kPackageFieldNumber);
2285 location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2286
2287 DO(Consume("package"));
2288
2289 while (true) {
2290 std::string identifier;
2291 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2292 file->mutable_package()->append(identifier);
2293 if (!TryConsume(".")) break;
2294 file->mutable_package()->append(".");
2295 }
2296
2297 DO(ConsumeEndOfDeclaration(";", &location));
2298
2299 return true;
2300 }
2301
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2302 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2303 RepeatedField<int32>* public_dependency,
2304 RepeatedField<int32>* weak_dependency,
2305 const LocationRecorder& root_location,
2306 const FileDescriptorProto* containing_file) {
2307 LocationRecorder location(root_location,
2308 FileDescriptorProto::kDependencyFieldNumber,
2309 dependency->size());
2310
2311 DO(Consume("import"));
2312
2313 if (LookingAt("public")) {
2314 LocationRecorder public_location(
2315 root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2316 public_dependency->size());
2317 DO(Consume("public"));
2318 *public_dependency->Add() = dependency->size();
2319 } else if (LookingAt("weak")) {
2320 LocationRecorder weak_location(
2321 root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2322 weak_dependency->size());
2323 weak_location.RecordLegacyImportLocation(containing_file, "weak");
2324 DO(Consume("weak"));
2325 *weak_dependency->Add() = dependency->size();
2326 }
2327
2328 string import_file;
2329 DO(ConsumeString(&import_file,
2330 "Expected a string naming the file to import."));
2331 *dependency->Add() = import_file;
2332 location.RecordLegacyImportLocation(containing_file, import_file);
2333
2334 DO(ConsumeEndOfDeclaration(";", &location));
2335
2336 return true;
2337 }
2338
2339 // ===================================================================
2340
SourceLocationTable()2341 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2342 SourceLocationTable::~SourceLocationTable() {}
2343
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2344 bool SourceLocationTable::Find(
2345 const Message* descriptor,
2346 DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2347 int* column) const {
2348 const std::pair<int, int>* result =
2349 FindOrNull(location_map_, std::make_pair(descriptor, location));
2350 if (result == NULL) {
2351 *line = -1;
2352 *column = 0;
2353 return false;
2354 } else {
2355 *line = result->first;
2356 *column = result->second;
2357 return true;
2358 }
2359 }
2360
FindImport(const Message * descriptor,const string & name,int * line,int * column) const2361 bool SourceLocationTable::FindImport(const Message* descriptor,
2362 const string& name, int* line,
2363 int* column) const {
2364 const std::pair<int, int>* result =
2365 FindOrNull(import_location_map_, std::make_pair(descriptor, name));
2366 if (result == nullptr) {
2367 *line = -1;
2368 *column = 0;
2369 return false;
2370 } else {
2371 *line = result->first;
2372 *column = result->second;
2373 return true;
2374 }
2375 }
2376
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2377 void SourceLocationTable::Add(
2378 const Message* descriptor,
2379 DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2380 int column) {
2381 location_map_[std::make_pair(descriptor, location)] =
2382 std::make_pair(line, column);
2383 }
2384
AddImport(const Message * descriptor,const string & name,int line,int column)2385 void SourceLocationTable::AddImport(const Message* descriptor,
2386 const string& name, int line, int column) {
2387 import_location_map_[std::make_pair(descriptor, name)] =
2388 std::make_pair(line, column);
2389 }
2390
Clear()2391 void SourceLocationTable::Clear() { location_map_.clear(); }
2392
2393 } // namespace compiler
2394 } // namespace protobuf
2395 } // namespace google
2396