1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36
37 #include <google/protobuf/compiler/parser.h>
38
39 #include <float.h>
40
41 #include <limits>
42 #include <unordered_map>
43 #include <unordered_set>
44
45 #include <google/protobuf/stubs/casts.h>
46 #include <google/protobuf/stubs/logging.h>
47 #include <google/protobuf/stubs/common.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/tokenizer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/wire_format.h>
52 #include <google/protobuf/stubs/strutil.h>
53 #include <google/protobuf/stubs/map_util.h>
54 #include <google/protobuf/stubs/hash.h>
55
56 namespace google {
57 namespace protobuf {
58 namespace compiler {
59
60 using internal::WireFormat;
61
62 namespace {
63
64 typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
65
MakeTypeNameTable()66 TypeNameMap MakeTypeNameTable() {
67 TypeNameMap result;
68
69 result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
70 result["float"] = FieldDescriptorProto::TYPE_FLOAT;
71 result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
72 result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
73 result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
74 result["bool"] = FieldDescriptorProto::TYPE_BOOL;
75 result["string"] = FieldDescriptorProto::TYPE_STRING;
76 result["group"] = FieldDescriptorProto::TYPE_GROUP;
77
78 result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
79 result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
80 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
81 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
82 result["int32"] = FieldDescriptorProto::TYPE_INT32;
83 result["int64"] = FieldDescriptorProto::TYPE_INT64;
84 result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
85 result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
86
87 return result;
88 }
89
90 const TypeNameMap kTypeNames = MakeTypeNameTable();
91
92 // Camel-case the field name and append "Entry" for generated map entry name.
93 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(const std::string & field_name)94 std::string MapEntryName(const std::string& field_name) {
95 std::string result;
96 static const char kSuffix[] = "Entry";
97 result.reserve(field_name.size() + sizeof(kSuffix));
98 bool cap_next = true;
99 for (int i = 0; i < field_name.size(); ++i) {
100 if (field_name[i] == '_') {
101 cap_next = true;
102 } else if (cap_next) {
103 // Note: Do not use ctype.h due to locales.
104 if ('a' <= field_name[i] && field_name[i] <= 'z') {
105 result.push_back(field_name[i] - 'a' + 'A');
106 } else {
107 result.push_back(field_name[i]);
108 }
109 cap_next = false;
110 } else {
111 result.push_back(field_name[i]);
112 }
113 }
114 result.append(kSuffix);
115 return result;
116 }
117
IsUppercase(char c)118 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
119
IsLowercase(char c)120 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
121
IsNumber(char c)122 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
123
IsUpperCamelCase(const std::string & name)124 bool IsUpperCamelCase(const std::string& name) {
125 if (name.empty()) {
126 return true;
127 }
128 // Name must start with an upper case character.
129 if (!IsUppercase(name[0])) {
130 return false;
131 }
132 // Must not contains underscore.
133 for (int i = 1; i < name.length(); i++) {
134 if (name[i] == '_') {
135 return false;
136 }
137 }
138 return true;
139 }
140
IsUpperUnderscore(const std::string & name)141 bool IsUpperUnderscore(const std::string& name) {
142 for (int i = 0; i < name.length(); i++) {
143 const char c = name[i];
144 if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
145 return false;
146 }
147 }
148 return true;
149 }
150
IsLowerUnderscore(const std::string & name)151 bool IsLowerUnderscore(const std::string& name) {
152 for (int i = 0; i < name.length(); i++) {
153 const char c = name[i];
154 if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
155 return false;
156 }
157 }
158 return true;
159 }
160
IsNumberFollowUnderscore(const std::string & name)161 bool IsNumberFollowUnderscore(const std::string& name) {
162 for (int i = 1; i < name.length(); i++) {
163 const char c = name[i];
164 if (IsNumber(c) && name[i - 1] == '_') {
165 return true;
166 }
167 }
168 return false;
169 }
170
171 } // anonymous namespace
172
173 // Makes code slightly more readable. The meaning of "DO(foo)" is
174 // "Execute foo and fail if it fails.", where failure is indicated by
175 // returning false.
176 #define DO(STATEMENT) \
177 if (STATEMENT) { \
178 } else \
179 return false
180
181 // ===================================================================
182
Parser()183 Parser::Parser()
184 : input_(NULL),
185 error_collector_(NULL),
186 source_location_table_(NULL),
187 had_errors_(false),
188 require_syntax_identifier_(false),
189 stop_after_syntax_identifier_(false) {
190 }
191
~Parser()192 Parser::~Parser() {}
193
194 // ===================================================================
195
LookingAt(const char * text)196 inline bool Parser::LookingAt(const char* text) {
197 return input_->current().text == text;
198 }
199
LookingAtType(io::Tokenizer::TokenType token_type)200 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
201 return input_->current().type == token_type;
202 }
203
AtEnd()204 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
205
TryConsume(const char * text)206 bool Parser::TryConsume(const char* text) {
207 if (LookingAt(text)) {
208 input_->Next();
209 return true;
210 } else {
211 return false;
212 }
213 }
214
Consume(const char * text,const char * error)215 bool Parser::Consume(const char* text, const char* error) {
216 if (TryConsume(text)) {
217 return true;
218 } else {
219 AddError(error);
220 return false;
221 }
222 }
223
Consume(const char * text)224 bool Parser::Consume(const char* text) {
225 if (TryConsume(text)) {
226 return true;
227 } else {
228 AddError("Expected \"" + std::string(text) + "\".");
229 return false;
230 }
231 }
232
ConsumeIdentifier(std::string * output,const char * error)233 bool Parser::ConsumeIdentifier(std::string* output, const char* error) {
234 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
235 *output = input_->current().text;
236 input_->Next();
237 return true;
238 } else {
239 AddError(error);
240 return false;
241 }
242 }
243
ConsumeInteger(int * output,const char * error)244 bool Parser::ConsumeInteger(int* output, const char* error) {
245 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
246 uint64 value = 0;
247 if (!io::Tokenizer::ParseInteger(input_->current().text, kint32max,
248 &value)) {
249 AddError("Integer out of range.");
250 // We still return true because we did, in fact, parse an integer.
251 }
252 *output = value;
253 input_->Next();
254 return true;
255 } else {
256 AddError(error);
257 return false;
258 }
259 }
260
ConsumeSignedInteger(int * output,const char * error)261 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
262 bool is_negative = false;
263 uint64 max_value = kint32max;
264 if (TryConsume("-")) {
265 is_negative = true;
266 max_value += 1;
267 }
268 uint64 value = 0;
269 DO(ConsumeInteger64(max_value, &value, error));
270 if (is_negative) value *= -1;
271 *output = value;
272 return true;
273 }
274
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)275 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
276 const char* error) {
277 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
278 if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
279 output)) {
280 AddError("Integer out of range.");
281 // We still return true because we did, in fact, parse an integer.
282 *output = 0;
283 }
284 input_->Next();
285 return true;
286 } else {
287 AddError(error);
288 return false;
289 }
290 }
291
ConsumeNumber(double * output,const char * error)292 bool Parser::ConsumeNumber(double* output, const char* error) {
293 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
294 *output = io::Tokenizer::ParseFloat(input_->current().text);
295 input_->Next();
296 return true;
297 } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
298 // Also accept integers.
299 uint64 value = 0;
300 if (!io::Tokenizer::ParseInteger(input_->current().text, kuint64max,
301 &value)) {
302 AddError("Integer out of range.");
303 // We still return true because we did, in fact, parse a number.
304 }
305 *output = value;
306 input_->Next();
307 return true;
308 } else if (LookingAt("inf")) {
309 *output = std::numeric_limits<double>::infinity();
310 input_->Next();
311 return true;
312 } else if (LookingAt("nan")) {
313 *output = std::numeric_limits<double>::quiet_NaN();
314 input_->Next();
315 return true;
316 } else {
317 AddError(error);
318 return false;
319 }
320 }
321
ConsumeString(std::string * output,const char * error)322 bool Parser::ConsumeString(std::string* output, const char* error) {
323 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
324 io::Tokenizer::ParseString(input_->current().text, output);
325 input_->Next();
326 // Allow C++ like concatenation of adjacent string tokens.
327 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
328 io::Tokenizer::ParseStringAppend(input_->current().text, output);
329 input_->Next();
330 }
331 return true;
332 } else {
333 AddError(error);
334 return false;
335 }
336 }
337
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)338 bool Parser::TryConsumeEndOfDeclaration(const char* text,
339 const LocationRecorder* location) {
340 if (LookingAt(text)) {
341 std::string leading, trailing;
342 std::vector<std::string> detached;
343 input_->NextWithComments(&trailing, &detached, &leading);
344
345 // Save the leading comments for next time, and recall the leading comments
346 // from last time.
347 leading.swap(upcoming_doc_comments_);
348
349 if (location != NULL) {
350 upcoming_detached_comments_.swap(detached);
351 location->AttachComments(&leading, &trailing, &detached);
352 } else if (strcmp(text, "}") == 0) {
353 // If the current location is null and we are finishing the current scope,
354 // drop pending upcoming detached comments.
355 upcoming_detached_comments_.swap(detached);
356 } else {
357 // Otherwise, append the new detached comments to the existing upcoming
358 // detached comments.
359 upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
360 detached.begin(), detached.end());
361 }
362
363 return true;
364 } else {
365 return false;
366 }
367 }
368
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)369 bool Parser::ConsumeEndOfDeclaration(const char* text,
370 const LocationRecorder* location) {
371 if (TryConsumeEndOfDeclaration(text, location)) {
372 return true;
373 } else {
374 AddError("Expected \"" + std::string(text) + "\".");
375 return false;
376 }
377 }
378
379 // -------------------------------------------------------------------
380
AddError(int line,int column,const std::string & error)381 void Parser::AddError(int line, int column, const std::string& error) {
382 if (error_collector_ != NULL) {
383 error_collector_->AddError(line, column, error);
384 }
385 had_errors_ = true;
386 }
387
AddError(const std::string & error)388 void Parser::AddError(const std::string& error) {
389 AddError(input_->current().line, input_->current().column, error);
390 }
391
AddWarning(const std::string & warning)392 void Parser::AddWarning(const std::string& warning) {
393 if (error_collector_ != nullptr) {
394 error_collector_->AddWarning(input_->current().line,
395 input_->current().column, warning);
396 }
397 }
398
399 // -------------------------------------------------------------------
400
LocationRecorder(Parser * parser)401 Parser::LocationRecorder::LocationRecorder(Parser* parser)
402 : parser_(parser),
403 source_code_info_(parser->source_code_info_),
404 location_(parser_->source_code_info_->add_location()) {
405 location_->add_span(parser_->input_->current().line);
406 location_->add_span(parser_->input_->current().column);
407 }
408
LocationRecorder(const LocationRecorder & parent)409 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
410 Init(parent, parent.source_code_info_);
411 }
412
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)413 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
414 int path1,
415 SourceCodeInfo* source_code_info) {
416 Init(parent, source_code_info);
417 AddPath(path1);
418 }
419
LocationRecorder(const LocationRecorder & parent,int path1)420 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
421 int path1) {
422 Init(parent, parent.source_code_info_);
423 AddPath(path1);
424 }
425
LocationRecorder(const LocationRecorder & parent,int path1,int path2)426 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
427 int path1, int path2) {
428 Init(parent, parent.source_code_info_);
429 AddPath(path1);
430 AddPath(path2);
431 }
432
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)433 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
434 SourceCodeInfo* source_code_info) {
435 parser_ = parent.parser_;
436 source_code_info_ = source_code_info;
437
438 location_ = source_code_info_->add_location();
439 location_->mutable_path()->CopyFrom(parent.location_->path());
440
441 location_->add_span(parser_->input_->current().line);
442 location_->add_span(parser_->input_->current().column);
443 }
444
~LocationRecorder()445 Parser::LocationRecorder::~LocationRecorder() {
446 if (location_->span_size() <= 2) {
447 EndAt(parser_->input_->previous());
448 }
449 }
450
AddPath(int path_component)451 void Parser::LocationRecorder::AddPath(int path_component) {
452 location_->add_path(path_component);
453 }
454
StartAt(const io::Tokenizer::Token & token)455 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
456 location_->set_span(0, token.line);
457 location_->set_span(1, token.column);
458 }
459
StartAt(const LocationRecorder & other)460 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
461 location_->set_span(0, other.location_->span(0));
462 location_->set_span(1, other.location_->span(1));
463 }
464
EndAt(const io::Tokenizer::Token & token)465 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
466 if (token.line != location_->span(0)) {
467 location_->add_span(token.line);
468 }
469 location_->add_span(token.end_column);
470 }
471
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)472 void Parser::LocationRecorder::RecordLegacyLocation(
473 const Message* descriptor,
474 DescriptorPool::ErrorCollector::ErrorLocation location) {
475 if (parser_->source_location_table_ != NULL) {
476 parser_->source_location_table_->Add(
477 descriptor, location, location_->span(0), location_->span(1));
478 }
479 }
480
RecordLegacyImportLocation(const Message * descriptor,const std::string & name)481 void Parser::LocationRecorder::RecordLegacyImportLocation(
482 const Message* descriptor, const std::string& name) {
483 if (parser_->source_location_table_ != nullptr) {
484 parser_->source_location_table_->AddImport(
485 descriptor, name, location_->span(0), location_->span(1));
486 }
487 }
488
CurrentPathSize() const489 int Parser::LocationRecorder::CurrentPathSize() const {
490 return location_->path_size();
491 }
492
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const493 void Parser::LocationRecorder::AttachComments(
494 std::string* leading, std::string* trailing,
495 std::vector<std::string>* detached_comments) const {
496 GOOGLE_CHECK(!location_->has_leading_comments());
497 GOOGLE_CHECK(!location_->has_trailing_comments());
498
499 if (!leading->empty()) {
500 location_->mutable_leading_comments()->swap(*leading);
501 }
502 if (!trailing->empty()) {
503 location_->mutable_trailing_comments()->swap(*trailing);
504 }
505 for (int i = 0; i < detached_comments->size(); ++i) {
506 location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
507 }
508 detached_comments->clear();
509 }
510
511 // -------------------------------------------------------------------
512
SkipStatement()513 void Parser::SkipStatement() {
514 while (true) {
515 if (AtEnd()) {
516 return;
517 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
518 if (TryConsumeEndOfDeclaration(";", NULL)) {
519 return;
520 } else if (TryConsume("{")) {
521 SkipRestOfBlock();
522 return;
523 } else if (LookingAt("}")) {
524 return;
525 }
526 }
527 input_->Next();
528 }
529 }
530
SkipRestOfBlock()531 void Parser::SkipRestOfBlock() {
532 while (true) {
533 if (AtEnd()) {
534 return;
535 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
536 if (TryConsumeEndOfDeclaration("}", NULL)) {
537 return;
538 } else if (TryConsume("{")) {
539 SkipRestOfBlock();
540 }
541 }
542 input_->Next();
543 }
544 }
545
546 // ===================================================================
547
ValidateEnum(const EnumDescriptorProto * proto)548 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
549 bool has_allow_alias = false;
550 bool allow_alias = false;
551
552 for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
553 const UninterpretedOption option = proto->options().uninterpreted_option(i);
554 if (option.name_size() > 1) {
555 continue;
556 }
557 if (!option.name(0).is_extension() &&
558 option.name(0).name_part() == "allow_alias") {
559 has_allow_alias = true;
560 if (option.identifier_value() == "true") {
561 allow_alias = true;
562 }
563 break;
564 }
565 }
566
567 if (has_allow_alias && !allow_alias) {
568 std::string error =
569 "\"" + proto->name() +
570 "\" declares 'option allow_alias = false;' which has no effect. "
571 "Please remove the declaration.";
572 // This needlessly clutters declarations with nops.
573 AddError(error);
574 return false;
575 }
576
577 std::set<int> used_values;
578 bool has_duplicates = false;
579 for (int i = 0; i < proto->value_size(); ++i) {
580 const EnumValueDescriptorProto& enum_value = proto->value(i);
581 if (used_values.find(enum_value.number()) != used_values.end()) {
582 has_duplicates = true;
583 break;
584 } else {
585 used_values.insert(enum_value.number());
586 }
587 }
588 if (allow_alias && !has_duplicates) {
589 std::string error =
590 "\"" + proto->name() +
591 "\" declares support for enum aliases but no enum values share field "
592 "numbers. Please remove the unnecessary 'option allow_alias = true;' "
593 "declaration.";
594 // Generate an error if an enum declares support for duplicate enum values
595 // and does not use it protect future authors.
596 AddError(error);
597 return false;
598 }
599
600 // Enforce that enum constants must be UPPER_CASE except in case of
601 // enum_alias.
602 if (!allow_alias) {
603 for (const auto& enum_value : proto->value()) {
604 if (!IsUpperUnderscore(enum_value.name())) {
605 AddWarning(
606 "Enum constant should be in UPPER_CASE. Found: " +
607 enum_value.name() +
608 ". See https://developers.google.com/protocol-buffers/docs/style");
609 }
610 }
611 }
612
613 return true;
614 }
615
Parse(io::Tokenizer * input,FileDescriptorProto * file)616 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
617 input_ = input;
618 had_errors_ = false;
619 syntax_identifier_.clear();
620
621 // Note that |file| could be NULL at this point if
622 // stop_after_syntax_identifier_ is true. So, we conservatively allocate
623 // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
624 // later on.
625 SourceCodeInfo source_code_info;
626 source_code_info_ = &source_code_info;
627
628 if (LookingAtType(io::Tokenizer::TYPE_START)) {
629 // Advance to first token.
630 input_->NextWithComments(NULL, &upcoming_detached_comments_,
631 &upcoming_doc_comments_);
632 }
633
634 {
635 LocationRecorder root_location(this);
636 root_location.RecordLegacyLocation(file,
637 DescriptorPool::ErrorCollector::OTHER);
638
639 if (require_syntax_identifier_ || LookingAt("syntax")) {
640 if (!ParseSyntaxIdentifier(root_location)) {
641 // Don't attempt to parse the file if we didn't recognize the syntax
642 // identifier.
643 return false;
644 }
645 // Store the syntax into the file.
646 if (file != NULL) file->set_syntax(syntax_identifier_);
647 } else if (!stop_after_syntax_identifier_) {
648 GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: " << file->name()
649 << ". Please use 'syntax = \"proto2\";' "
650 << "or 'syntax = \"proto3\";' to specify a syntax "
651 << "version. (Defaulted to proto2 syntax.)";
652 syntax_identifier_ = "proto2";
653 }
654
655 if (stop_after_syntax_identifier_) return !had_errors_;
656
657 // Repeatedly parse statements until we reach the end of the file.
658 while (!AtEnd()) {
659 if (!ParseTopLevelStatement(file, root_location)) {
660 // This statement failed to parse. Skip it, but keep looping to parse
661 // other statements.
662 SkipStatement();
663
664 if (LookingAt("}")) {
665 AddError("Unmatched \"}\".");
666 input_->NextWithComments(NULL, &upcoming_detached_comments_,
667 &upcoming_doc_comments_);
668 }
669 }
670 }
671 }
672
673 input_ = NULL;
674 source_code_info_ = NULL;
675 assert(file != NULL);
676 source_code_info.Swap(file->mutable_source_code_info());
677 return !had_errors_;
678 }
679
ParseSyntaxIdentifier(const LocationRecorder & parent)680 bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
681 LocationRecorder syntax_location(parent,
682 FileDescriptorProto::kSyntaxFieldNumber);
683 DO(Consume(
684 "syntax",
685 "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
686 DO(Consume("="));
687 io::Tokenizer::Token syntax_token = input_->current();
688 std::string syntax;
689 DO(ConsumeString(&syntax, "Expected syntax identifier."));
690 DO(ConsumeEndOfDeclaration(";", &syntax_location));
691
692 syntax_identifier_ = syntax;
693
694 if (syntax != "proto2" && syntax != "proto3" &&
695 !stop_after_syntax_identifier_) {
696 AddError(syntax_token.line, syntax_token.column,
697 "Unrecognized syntax identifier \"" + syntax +
698 "\". This parser "
699 "only recognizes \"proto2\" and \"proto3\".");
700 return false;
701 }
702
703 return true;
704 }
705
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)706 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
707 const LocationRecorder& root_location) {
708 if (TryConsumeEndOfDeclaration(";", NULL)) {
709 // empty statement; ignore
710 return true;
711 } else if (LookingAt("message")) {
712 LocationRecorder location(root_location,
713 FileDescriptorProto::kMessageTypeFieldNumber,
714 file->message_type_size());
715 return ParseMessageDefinition(file->add_message_type(), location, file);
716 } else if (LookingAt("enum")) {
717 LocationRecorder location(root_location,
718 FileDescriptorProto::kEnumTypeFieldNumber,
719 file->enum_type_size());
720 return ParseEnumDefinition(file->add_enum_type(), location, file);
721 } else if (LookingAt("service")) {
722 LocationRecorder location(root_location,
723 FileDescriptorProto::kServiceFieldNumber,
724 file->service_size());
725 return ParseServiceDefinition(file->add_service(), location, file);
726 } else if (LookingAt("extend")) {
727 LocationRecorder location(root_location,
728 FileDescriptorProto::kExtensionFieldNumber);
729 return ParseExtend(
730 file->mutable_extension(), file->mutable_message_type(), root_location,
731 FileDescriptorProto::kMessageTypeFieldNumber, location, file);
732 } else if (LookingAt("import")) {
733 return ParseImport(file->mutable_dependency(),
734 file->mutable_public_dependency(),
735 file->mutable_weak_dependency(), root_location, file);
736 } else if (LookingAt("package")) {
737 return ParsePackage(file, root_location, file);
738 } else if (LookingAt("option")) {
739 LocationRecorder location(root_location,
740 FileDescriptorProto::kOptionsFieldNumber);
741 return ParseOption(file->mutable_options(), location, file,
742 OPTION_STATEMENT);
743 } else {
744 AddError("Expected top-level statement (e.g. \"message\").");
745 return false;
746 }
747 }
748
749 // -------------------------------------------------------------------
750 // Messages
751
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)752 bool Parser::ParseMessageDefinition(
753 DescriptorProto* message, const LocationRecorder& message_location,
754 const FileDescriptorProto* containing_file) {
755 DO(Consume("message"));
756 {
757 LocationRecorder location(message_location,
758 DescriptorProto::kNameFieldNumber);
759 location.RecordLegacyLocation(message,
760 DescriptorPool::ErrorCollector::NAME);
761 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
762 if (!IsUpperCamelCase(message->name())) {
763 AddWarning(
764 "Message name should be in UpperCamelCase. Found: " +
765 message->name() +
766 ". See https://developers.google.com/protocol-buffers/docs/style");
767 }
768 }
769 DO(ParseMessageBlock(message, message_location, containing_file));
770
771 if (syntax_identifier_ == "proto3") {
772 // Add synthetic one-field oneofs for optional fields, except messages which
773 // already have presence in proto3.
774 //
775 // We have to make sure the oneof names don't conflict with any other
776 // field or oneof.
777 std::unordered_set<std::string> names;
778 for (const auto& field : message->field()) {
779 names.insert(field.name());
780 }
781 for (const auto& oneof : message->oneof_decl()) {
782 names.insert(oneof.name());
783 }
784
785 for (auto& field : *message->mutable_field()) {
786 if (field.proto3_optional()) {
787 std::string oneof_name = field.name();
788
789 // Prepend 'XXXXX_' until we are no longer conflicting.
790 // Avoid prepending a double-underscore because such names are
791 // reserved in C++.
792 if (oneof_name.empty() || oneof_name[0] != '_') {
793 oneof_name = '_' + oneof_name;
794 }
795 while (names.count(oneof_name) > 0) {
796 oneof_name = 'X' + oneof_name;
797 }
798
799 names.insert(oneof_name);
800 field.set_oneof_index(message->oneof_decl_size());
801 OneofDescriptorProto* oneof = message->add_oneof_decl();
802 oneof->set_name(oneof_name);
803 }
804 }
805 }
806
807 return true;
808 }
809
810 namespace {
811
812 const int kMaxRangeSentinel = -1;
813
IsMessageSetWireFormatMessage(const DescriptorProto & message)814 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
815 const MessageOptions& options = message.options();
816 for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
817 const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
818 if (uninterpreted.name_size() == 1 &&
819 uninterpreted.name(0).name_part() == "message_set_wire_format" &&
820 uninterpreted.identifier_value() == "true") {
821 return true;
822 }
823 }
824 return false;
825 }
826
827 // Modifies any extension ranges that specified 'max' as the end of the
828 // extension range, and sets them to the type-specific maximum. The actual max
829 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)830 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
831 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
832 const int max_extension_number =
833 is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
834 for (int i = 0; i < message->extension_range_size(); ++i) {
835 if (message->extension_range(i).end() == kMaxRangeSentinel) {
836 message->mutable_extension_range(i)->set_end(max_extension_number);
837 }
838 }
839 }
840
841 // Modifies any reserved ranges that specified 'max' as the end of the
842 // reserved range, and sets them to the type-specific maximum. The actual max
843 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)844 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
845 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
846 const int max_field_number =
847 is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
848 for (int i = 0; i < message->reserved_range_size(); ++i) {
849 if (message->reserved_range(i).end() == kMaxRangeSentinel) {
850 message->mutable_reserved_range(i)->set_end(max_field_number);
851 }
852 }
853 }
854
855 } // namespace
856
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)857 bool Parser::ParseMessageBlock(DescriptorProto* message,
858 const LocationRecorder& message_location,
859 const FileDescriptorProto* containing_file) {
860 DO(ConsumeEndOfDeclaration("{", &message_location));
861
862 while (!TryConsumeEndOfDeclaration("}", NULL)) {
863 if (AtEnd()) {
864 AddError("Reached end of input in message definition (missing '}').");
865 return false;
866 }
867
868 if (!ParseMessageStatement(message, message_location, containing_file)) {
869 // This statement failed to parse. Skip it, but keep looping to parse
870 // other statements.
871 SkipStatement();
872 }
873 }
874
875 if (message->extension_range_size() > 0) {
876 AdjustExtensionRangesWithMaxEndNumber(message);
877 }
878 if (message->reserved_range_size() > 0) {
879 AdjustReservedRangesWithMaxEndNumber(message);
880 }
881 return true;
882 }
883
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)884 bool Parser::ParseMessageStatement(DescriptorProto* message,
885 const LocationRecorder& message_location,
886 const FileDescriptorProto* containing_file) {
887 if (TryConsumeEndOfDeclaration(";", NULL)) {
888 // empty statement; ignore
889 return true;
890 } else if (LookingAt("message")) {
891 LocationRecorder location(message_location,
892 DescriptorProto::kNestedTypeFieldNumber,
893 message->nested_type_size());
894 return ParseMessageDefinition(message->add_nested_type(), location,
895 containing_file);
896 } else if (LookingAt("enum")) {
897 LocationRecorder location(message_location,
898 DescriptorProto::kEnumTypeFieldNumber,
899 message->enum_type_size());
900 return ParseEnumDefinition(message->add_enum_type(), location,
901 containing_file);
902 } else if (LookingAt("extensions")) {
903 LocationRecorder location(message_location,
904 DescriptorProto::kExtensionRangeFieldNumber);
905 return ParseExtensions(message, location, containing_file);
906 } else if (LookingAt("reserved")) {
907 return ParseReserved(message, message_location);
908 } else if (LookingAt("extend")) {
909 LocationRecorder location(message_location,
910 DescriptorProto::kExtensionFieldNumber);
911 return ParseExtend(message->mutable_extension(),
912 message->mutable_nested_type(), message_location,
913 DescriptorProto::kNestedTypeFieldNumber, location,
914 containing_file);
915 } else if (LookingAt("option")) {
916 LocationRecorder location(message_location,
917 DescriptorProto::kOptionsFieldNumber);
918 return ParseOption(message->mutable_options(), location, containing_file,
919 OPTION_STATEMENT);
920 } else if (LookingAt("oneof")) {
921 int oneof_index = message->oneof_decl_size();
922 LocationRecorder oneof_location(
923 message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
924
925 return ParseOneof(message->add_oneof_decl(), message, oneof_index,
926 oneof_location, message_location, containing_file);
927 } else {
928 LocationRecorder location(message_location,
929 DescriptorProto::kFieldFieldNumber,
930 message->field_size());
931 return ParseMessageField(
932 message->add_field(), message->mutable_nested_type(), message_location,
933 DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
934 }
935 }
936
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)937 bool Parser::ParseMessageField(FieldDescriptorProto* field,
938 RepeatedPtrField<DescriptorProto>* messages,
939 const LocationRecorder& parent_location,
940 int location_field_number_for_nested_type,
941 const LocationRecorder& field_location,
942 const FileDescriptorProto* containing_file) {
943 {
944 FieldDescriptorProto::Label label;
945 if (ParseLabel(&label, field_location, containing_file)) {
946 field->set_label(label);
947 if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
948 syntax_identifier_ == "proto3") {
949 field->set_proto3_optional(true);
950 }
951 }
952 }
953
954 return ParseMessageFieldNoLabel(field, messages, parent_location,
955 location_field_number_for_nested_type,
956 field_location, containing_file);
957 }
958
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)959 bool Parser::ParseMessageFieldNoLabel(
960 FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
961 const LocationRecorder& parent_location,
962 int location_field_number_for_nested_type,
963 const LocationRecorder& field_location,
964 const FileDescriptorProto* containing_file) {
965 MapField map_field;
966 // Parse type.
967 {
968 LocationRecorder location(field_location); // add path later
969 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
970
971 bool type_parsed = false;
972 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
973 std::string type_name;
974
975 // Special case map field. We only treat the field as a map field if the
976 // field type name starts with the word "map" with a following "<".
977 if (TryConsume("map")) {
978 if (LookingAt("<")) {
979 map_field.is_map_field = true;
980 } else {
981 // False positive
982 type_parsed = true;
983 type_name = "map";
984 }
985 }
986 if (map_field.is_map_field) {
987 if (field->has_oneof_index()) {
988 AddError("Map fields are not allowed in oneofs.");
989 return false;
990 }
991 if (field->has_label()) {
992 AddError(
993 "Field labels (required/optional/repeated) are not allowed on "
994 "map fields.");
995 return false;
996 }
997 if (field->has_extendee()) {
998 AddError("Map fields are not allowed to be extensions.");
999 return false;
1000 }
1001 field->set_label(FieldDescriptorProto::LABEL_REPEATED);
1002 DO(Consume("<"));
1003 DO(ParseType(&map_field.key_type, &map_field.key_type_name));
1004 DO(Consume(","));
1005 DO(ParseType(&map_field.value_type, &map_field.value_type_name));
1006 DO(Consume(">"));
1007 // Defer setting of the type name of the map field until the
1008 // field name is parsed. Add the source location though.
1009 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1010 } else {
1011 // Handle the case where no explicit label is given for a non-map field.
1012 if (!field->has_label() && DefaultToOptionalFields()) {
1013 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1014 }
1015 if (!field->has_label()) {
1016 AddError("Expected \"required\", \"optional\", or \"repeated\".");
1017 // We can actually reasonably recover here by just assuming the user
1018 // forgot the label altogether.
1019 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1020 }
1021
1022 // Handle the case where the actual type is a message or enum named "map",
1023 // which we already consumed in the code above.
1024 if (!type_parsed) {
1025 DO(ParseType(&type, &type_name));
1026 }
1027 if (type_name.empty()) {
1028 location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
1029 field->set_type(type);
1030 } else {
1031 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1032 field->set_type_name(type_name);
1033 }
1034 }
1035 }
1036
1037 // Parse name and '='.
1038 io::Tokenizer::Token name_token = input_->current();
1039 {
1040 LocationRecorder location(field_location,
1041 FieldDescriptorProto::kNameFieldNumber);
1042 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1043 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1044
1045 if (!IsLowerUnderscore(field->name())) {
1046 AddWarning(
1047 "Field name should be lowercase. Found: " + field->name() +
1048 ". See: https://developers.google.com/protocol-buffers/docs/style");
1049 }
1050 if (IsNumberFollowUnderscore(field->name())) {
1051 AddWarning(
1052 "Number should not come right after an underscore. Found: " +
1053 field->name() +
1054 ". See: https://developers.google.com/protocol-buffers/docs/style");
1055 }
1056 }
1057 DO(Consume("=", "Missing field number."));
1058
1059 // Parse field number.
1060 {
1061 LocationRecorder location(field_location,
1062 FieldDescriptorProto::kNumberFieldNumber);
1063 location.RecordLegacyLocation(field,
1064 DescriptorPool::ErrorCollector::NUMBER);
1065 int number;
1066 DO(ConsumeInteger(&number, "Expected field number."));
1067 field->set_number(number);
1068 }
1069
1070 // Parse options.
1071 DO(ParseFieldOptions(field, field_location, containing_file));
1072
1073 // Deal with groups.
1074 if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1075 // Awkward: Since a group declares both a message type and a field, we
1076 // have to create overlapping locations.
1077 LocationRecorder group_location(parent_location);
1078 group_location.StartAt(field_location);
1079 group_location.AddPath(location_field_number_for_nested_type);
1080 group_location.AddPath(messages->size());
1081
1082 DescriptorProto* group = messages->Add();
1083 group->set_name(field->name());
1084
1085 // Record name location to match the field name's location.
1086 {
1087 LocationRecorder location(group_location,
1088 DescriptorProto::kNameFieldNumber);
1089 location.StartAt(name_token);
1090 location.EndAt(name_token);
1091 location.RecordLegacyLocation(group,
1092 DescriptorPool::ErrorCollector::NAME);
1093 }
1094
1095 // The field's type_name also comes from the name. Confusing!
1096 {
1097 LocationRecorder location(field_location,
1098 FieldDescriptorProto::kTypeNameFieldNumber);
1099 location.StartAt(name_token);
1100 location.EndAt(name_token);
1101 }
1102
1103 // As a hack for backwards-compatibility, we force the group name to start
1104 // with a capital letter and lower-case the field name. New code should
1105 // not use groups; it should use nested messages.
1106 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1107 AddError(name_token.line, name_token.column,
1108 "Group names must start with a capital letter.");
1109 }
1110 LowerString(field->mutable_name());
1111
1112 field->set_type_name(group->name());
1113 if (LookingAt("{")) {
1114 DO(ParseMessageBlock(group, group_location, containing_file));
1115 } else {
1116 AddError("Missing group body.");
1117 return false;
1118 }
1119 } else {
1120 DO(ConsumeEndOfDeclaration(";", &field_location));
1121 }
1122
1123 // Create a map entry type if this is a map field.
1124 if (map_field.is_map_field) {
1125 GenerateMapEntry(map_field, field, messages);
1126 }
1127
1128 return true;
1129 }
1130
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1131 void Parser::GenerateMapEntry(const MapField& map_field,
1132 FieldDescriptorProto* field,
1133 RepeatedPtrField<DescriptorProto>* messages) {
1134 DescriptorProto* entry = messages->Add();
1135 std::string entry_name = MapEntryName(field->name());
1136 field->set_type_name(entry_name);
1137 entry->set_name(entry_name);
1138 entry->mutable_options()->set_map_entry(true);
1139 FieldDescriptorProto* key_field = entry->add_field();
1140 key_field->set_name("key");
1141 key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1142 key_field->set_number(1);
1143 if (map_field.key_type_name.empty()) {
1144 key_field->set_type(map_field.key_type);
1145 } else {
1146 key_field->set_type_name(map_field.key_type_name);
1147 }
1148 FieldDescriptorProto* value_field = entry->add_field();
1149 value_field->set_name("value");
1150 value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1151 value_field->set_number(2);
1152 if (map_field.value_type_name.empty()) {
1153 value_field->set_type(map_field.value_type);
1154 } else {
1155 value_field->set_type_name(map_field.value_type_name);
1156 }
1157 // Propagate the "enforce_utf8" option to key and value fields if they
1158 // are strings. This helps simplify the implementation of code generators
1159 // and also reflection-based parsing code.
1160 //
1161 // The following definition:
1162 // message Foo {
1163 // map<string, string> value = 1 [enforce_utf8 = false];
1164 // }
1165 // will be interpreted as:
1166 // message Foo {
1167 // message ValueEntry {
1168 // option map_entry = true;
1169 // string key = 1 [enforce_utf8 = false];
1170 // string value = 2 [enforce_utf8 = false];
1171 // }
1172 // repeated ValueEntry value = 1 [enforce_utf8 = false];
1173 // }
1174 //
1175 // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1176 // from protocol compiler.
1177 for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1178 const UninterpretedOption& option =
1179 field->options().uninterpreted_option(i);
1180 if (option.name_size() == 1 &&
1181 option.name(0).name_part() == "enforce_utf8" &&
1182 !option.name(0).is_extension()) {
1183 if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1184 key_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1185 option);
1186 }
1187 if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1188 value_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1189 option);
1190 }
1191 }
1192 }
1193 }
1194
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1195 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1196 const LocationRecorder& field_location,
1197 const FileDescriptorProto* containing_file) {
1198 if (!LookingAt("[")) return true;
1199
1200 LocationRecorder location(field_location,
1201 FieldDescriptorProto::kOptionsFieldNumber);
1202
1203 DO(Consume("["));
1204
1205 // Parse field options.
1206 do {
1207 if (LookingAt("default")) {
1208 // We intentionally pass field_location rather than location here, since
1209 // the default value is not actually an option.
1210 DO(ParseDefaultAssignment(field, field_location, containing_file));
1211 } else if (LookingAt("json_name")) {
1212 // Like default value, this "json_name" is not an actual option.
1213 DO(ParseJsonName(field, field_location, containing_file));
1214 } else {
1215 DO(ParseOption(field->mutable_options(), location, containing_file,
1216 OPTION_ASSIGNMENT));
1217 }
1218 } while (TryConsume(","));
1219
1220 DO(Consume("]"));
1221 return true;
1222 }
1223
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1224 bool Parser::ParseDefaultAssignment(
1225 FieldDescriptorProto* field, const LocationRecorder& field_location,
1226 const FileDescriptorProto* containing_file) {
1227 if (field->has_default_value()) {
1228 AddError("Already set option \"default\".");
1229 field->clear_default_value();
1230 }
1231
1232 DO(Consume("default"));
1233 DO(Consume("="));
1234
1235 LocationRecorder location(field_location,
1236 FieldDescriptorProto::kDefaultValueFieldNumber);
1237 location.RecordLegacyLocation(field,
1238 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1239 std::string* default_value = field->mutable_default_value();
1240
1241 if (!field->has_type()) {
1242 // The field has a type name, but we don't know if it is a message or an
1243 // enum yet. (If it were a primitive type, |field| would have a type set
1244 // already.) In this case, simply take the current string as the default
1245 // value; we will catch the error later if it is not a valid enum value.
1246 // (N.B. that we do not check whether the current token is an identifier:
1247 // doing so throws strange errors when the user mistypes a primitive
1248 // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1249 // = 42]". In such a case the fundamental error is really that "int" is not
1250 // a type, not that "42" is not an identifier. See b/12533582.)
1251 *default_value = input_->current().text;
1252 input_->Next();
1253 return true;
1254 }
1255
1256 switch (field->type()) {
1257 case FieldDescriptorProto::TYPE_INT32:
1258 case FieldDescriptorProto::TYPE_INT64:
1259 case FieldDescriptorProto::TYPE_SINT32:
1260 case FieldDescriptorProto::TYPE_SINT64:
1261 case FieldDescriptorProto::TYPE_SFIXED32:
1262 case FieldDescriptorProto::TYPE_SFIXED64: {
1263 uint64 max_value = kint64max;
1264 if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1265 field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1266 field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1267 max_value = kint32max;
1268 }
1269
1270 // These types can be negative.
1271 if (TryConsume("-")) {
1272 default_value->append("-");
1273 // Two's complement always has one more negative value than positive.
1274 ++max_value;
1275 }
1276 // Parse the integer to verify that it is not out-of-range.
1277 uint64 value;
1278 DO(ConsumeInteger64(max_value, &value,
1279 "Expected integer for field default value."));
1280 // And stringify it again.
1281 default_value->append(StrCat(value));
1282 break;
1283 }
1284
1285 case FieldDescriptorProto::TYPE_UINT32:
1286 case FieldDescriptorProto::TYPE_UINT64:
1287 case FieldDescriptorProto::TYPE_FIXED32:
1288 case FieldDescriptorProto::TYPE_FIXED64: {
1289 uint64 max_value = kuint64max;
1290 if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1291 field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1292 max_value = kuint32max;
1293 }
1294
1295 // Numeric, not negative.
1296 if (TryConsume("-")) {
1297 AddError("Unsigned field can't have negative default value.");
1298 }
1299 // Parse the integer to verify that it is not out-of-range.
1300 uint64 value;
1301 DO(ConsumeInteger64(max_value, &value,
1302 "Expected integer for field default value."));
1303 // And stringify it again.
1304 default_value->append(StrCat(value));
1305 break;
1306 }
1307
1308 case FieldDescriptorProto::TYPE_FLOAT:
1309 case FieldDescriptorProto::TYPE_DOUBLE:
1310 // These types can be negative.
1311 if (TryConsume("-")) {
1312 default_value->append("-");
1313 }
1314 // Parse the integer because we have to convert hex integers to decimal
1315 // floats.
1316 double value;
1317 DO(ConsumeNumber(&value, "Expected number."));
1318 // And stringify it again.
1319 default_value->append(SimpleDtoa(value));
1320 break;
1321
1322 case FieldDescriptorProto::TYPE_BOOL:
1323 if (TryConsume("true")) {
1324 default_value->assign("true");
1325 } else if (TryConsume("false")) {
1326 default_value->assign("false");
1327 } else {
1328 AddError("Expected \"true\" or \"false\".");
1329 return false;
1330 }
1331 break;
1332
1333 case FieldDescriptorProto::TYPE_STRING:
1334 // Note: When file option java_string_check_utf8 is true, if a
1335 // non-string representation (eg byte[]) is later supported, it must
1336 // be checked for UTF-8-ness.
1337 DO(ConsumeString(default_value,
1338 "Expected string for field default "
1339 "value."));
1340 break;
1341
1342 case FieldDescriptorProto::TYPE_BYTES:
1343 DO(ConsumeString(default_value, "Expected string."));
1344 *default_value = CEscape(*default_value);
1345 break;
1346
1347 case FieldDescriptorProto::TYPE_ENUM:
1348 DO(ConsumeIdentifier(default_value,
1349 "Expected enum identifier for field "
1350 "default value."));
1351 break;
1352
1353 case FieldDescriptorProto::TYPE_MESSAGE:
1354 case FieldDescriptorProto::TYPE_GROUP:
1355 AddError("Messages can't have default values.");
1356 return false;
1357 }
1358
1359 return true;
1360 }
1361
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1362 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1363 const LocationRecorder& field_location,
1364 const FileDescriptorProto* containing_file) {
1365 if (field->has_json_name()) {
1366 AddError("Already set option \"json_name\".");
1367 field->clear_json_name();
1368 }
1369
1370 LocationRecorder location(field_location,
1371 FieldDescriptorProto::kJsonNameFieldNumber);
1372 location.RecordLegacyLocation(field,
1373 DescriptorPool::ErrorCollector::OPTION_NAME);
1374
1375 DO(Consume("json_name"));
1376 DO(Consume("="));
1377
1378 LocationRecorder value_location(location);
1379 value_location.RecordLegacyLocation(
1380 field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1381
1382 DO(ConsumeString(field->mutable_json_name(),
1383 "Expected string for JSON name."));
1384 return true;
1385 }
1386
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1387 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1388 const LocationRecorder& part_location,
1389 const FileDescriptorProto* containing_file) {
1390 UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1391 std::string identifier; // We parse identifiers into this string.
1392 if (LookingAt("(")) { // This is an extension.
1393 DO(Consume("("));
1394
1395 {
1396 LocationRecorder location(
1397 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1398 // An extension name consists of dot-separated identifiers, and may begin
1399 // with a dot.
1400 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1401 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1402 name->mutable_name_part()->append(identifier);
1403 }
1404 while (LookingAt(".")) {
1405 DO(Consume("."));
1406 name->mutable_name_part()->append(".");
1407 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1408 name->mutable_name_part()->append(identifier);
1409 }
1410 }
1411
1412 DO(Consume(")"));
1413 name->set_is_extension(true);
1414 } else { // This is a regular field.
1415 LocationRecorder location(
1416 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1417 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1418 name->mutable_name_part()->append(identifier);
1419 name->set_is_extension(false);
1420 }
1421 return true;
1422 }
1423
ParseUninterpretedBlock(std::string * value)1424 bool Parser::ParseUninterpretedBlock(std::string* value) {
1425 // Note that enclosing braces are not added to *value.
1426 // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1427 // an expression, not a block of statements.
1428 DO(Consume("{"));
1429 int brace_depth = 1;
1430 while (!AtEnd()) {
1431 if (LookingAt("{")) {
1432 brace_depth++;
1433 } else if (LookingAt("}")) {
1434 brace_depth--;
1435 if (brace_depth == 0) {
1436 input_->Next();
1437 return true;
1438 }
1439 }
1440 // TODO(sanjay): Interpret line/column numbers to preserve formatting
1441 if (!value->empty()) value->push_back(' ');
1442 value->append(input_->current().text);
1443 input_->Next();
1444 }
1445 AddError("Unexpected end of stream while parsing aggregate value.");
1446 return false;
1447 }
1448
1449 // We don't interpret the option here. Instead we store it in an
1450 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1451 bool Parser::ParseOption(Message* options,
1452 const LocationRecorder& options_location,
1453 const FileDescriptorProto* containing_file,
1454 OptionStyle style) {
1455 // Create an entry in the uninterpreted_option field.
1456 const FieldDescriptor* uninterpreted_option_field =
1457 options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1458 GOOGLE_CHECK(uninterpreted_option_field != NULL)
1459 << "No field named \"uninterpreted_option\" in the Options proto.";
1460
1461 const Reflection* reflection = options->GetReflection();
1462
1463 LocationRecorder location(
1464 options_location, uninterpreted_option_field->number(),
1465 reflection->FieldSize(*options, uninterpreted_option_field));
1466
1467 if (style == OPTION_STATEMENT) {
1468 DO(Consume("option"));
1469 }
1470
1471 UninterpretedOption* uninterpreted_option =
1472 down_cast<UninterpretedOption*>(options->GetReflection()->AddMessage(
1473 options, uninterpreted_option_field));
1474
1475 // Parse dot-separated name.
1476 {
1477 LocationRecorder name_location(location,
1478 UninterpretedOption::kNameFieldNumber);
1479 name_location.RecordLegacyLocation(
1480 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1481
1482 {
1483 LocationRecorder part_location(name_location,
1484 uninterpreted_option->name_size());
1485 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1486 containing_file));
1487 }
1488
1489 while (LookingAt(".")) {
1490 DO(Consume("."));
1491 LocationRecorder part_location(name_location,
1492 uninterpreted_option->name_size());
1493 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1494 containing_file));
1495 }
1496 }
1497
1498 DO(Consume("="));
1499
1500 {
1501 LocationRecorder value_location(location);
1502 value_location.RecordLegacyLocation(
1503 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1504
1505 // All values are a single token, except for negative numbers, which consist
1506 // of a single '-' symbol, followed by a positive number.
1507 bool is_negative = TryConsume("-");
1508
1509 switch (input_->current().type) {
1510 case io::Tokenizer::TYPE_START:
1511 GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1512 return false;
1513
1514 case io::Tokenizer::TYPE_END:
1515 AddError("Unexpected end of stream while parsing option value.");
1516 return false;
1517
1518 case io::Tokenizer::TYPE_IDENTIFIER: {
1519 value_location.AddPath(
1520 UninterpretedOption::kIdentifierValueFieldNumber);
1521 if (is_negative) {
1522 AddError("Invalid '-' symbol before identifier.");
1523 return false;
1524 }
1525 std::string value;
1526 DO(ConsumeIdentifier(&value, "Expected identifier."));
1527 uninterpreted_option->set_identifier_value(value);
1528 break;
1529 }
1530
1531 case io::Tokenizer::TYPE_INTEGER: {
1532 uint64 value;
1533 uint64 max_value =
1534 is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1535 DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1536 if (is_negative) {
1537 value_location.AddPath(
1538 UninterpretedOption::kNegativeIntValueFieldNumber);
1539 uninterpreted_option->set_negative_int_value(
1540 static_cast<int64>(-value));
1541 } else {
1542 value_location.AddPath(
1543 UninterpretedOption::kPositiveIntValueFieldNumber);
1544 uninterpreted_option->set_positive_int_value(value);
1545 }
1546 break;
1547 }
1548
1549 case io::Tokenizer::TYPE_FLOAT: {
1550 value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1551 double value;
1552 DO(ConsumeNumber(&value, "Expected number."));
1553 uninterpreted_option->set_double_value(is_negative ? -value : value);
1554 break;
1555 }
1556
1557 case io::Tokenizer::TYPE_STRING: {
1558 value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1559 if (is_negative) {
1560 AddError("Invalid '-' symbol before string.");
1561 return false;
1562 }
1563 std::string value;
1564 DO(ConsumeString(&value, "Expected string."));
1565 uninterpreted_option->set_string_value(value);
1566 break;
1567 }
1568
1569 case io::Tokenizer::TYPE_SYMBOL:
1570 if (LookingAt("{")) {
1571 value_location.AddPath(
1572 UninterpretedOption::kAggregateValueFieldNumber);
1573 DO(ParseUninterpretedBlock(
1574 uninterpreted_option->mutable_aggregate_value()));
1575 } else {
1576 AddError("Expected option value.");
1577 return false;
1578 }
1579 break;
1580 }
1581 }
1582
1583 if (style == OPTION_STATEMENT) {
1584 DO(ConsumeEndOfDeclaration(";", &location));
1585 }
1586
1587 return true;
1588 }
1589
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1590 bool Parser::ParseExtensions(DescriptorProto* message,
1591 const LocationRecorder& extensions_location,
1592 const FileDescriptorProto* containing_file) {
1593 // Parse the declaration.
1594 DO(Consume("extensions"));
1595
1596 int old_range_size = message->extension_range_size();
1597
1598 do {
1599 // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1600 LocationRecorder location(extensions_location,
1601 message->extension_range_size());
1602
1603 DescriptorProto::ExtensionRange* range = message->add_extension_range();
1604 location.RecordLegacyLocation(range,
1605 DescriptorPool::ErrorCollector::NUMBER);
1606
1607 int start, end;
1608 io::Tokenizer::Token start_token;
1609
1610 {
1611 LocationRecorder start_location(
1612 location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1613 start_token = input_->current();
1614 DO(ConsumeInteger(&start, "Expected field number range."));
1615 }
1616
1617 if (TryConsume("to")) {
1618 LocationRecorder end_location(
1619 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1620 if (TryConsume("max")) {
1621 // Set to the sentinel value - 1 since we increment the value below.
1622 // The actual value of the end of the range should be set with
1623 // AdjustExtensionRangesWithMaxEndNumber.
1624 end = kMaxRangeSentinel - 1;
1625 } else {
1626 DO(ConsumeInteger(&end, "Expected integer."));
1627 }
1628 } else {
1629 LocationRecorder end_location(
1630 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1631 end_location.StartAt(start_token);
1632 end_location.EndAt(start_token);
1633 end = start;
1634 }
1635
1636 // Users like to specify inclusive ranges, but in code we like the end
1637 // number to be exclusive.
1638 ++end;
1639
1640 range->set_start(start);
1641 range->set_end(end);
1642 } while (TryConsume(","));
1643
1644 if (LookingAt("[")) {
1645 int range_number_index = extensions_location.CurrentPathSize();
1646 SourceCodeInfo info;
1647
1648 // Parse extension range options in the first range.
1649 ExtensionRangeOptions* options =
1650 message->mutable_extension_range(old_range_size)->mutable_options();
1651
1652 {
1653 LocationRecorder index_location(
1654 extensions_location, 0 /* we fill this in w/ actual index below */,
1655 &info);
1656 LocationRecorder location(
1657 index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1658 DO(Consume("["));
1659
1660 do {
1661 DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1662 } while (TryConsume(","));
1663
1664 DO(Consume("]"));
1665 }
1666
1667 // Then copy the extension range options to all of the other ranges we've
1668 // parsed.
1669 for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1670 message->mutable_extension_range(i)->mutable_options()->CopyFrom(
1671 *options);
1672 }
1673 // and copy source locations to the other ranges, too
1674 for (int i = old_range_size; i < message->extension_range_size(); i++) {
1675 for (int j = 0; j < info.location_size(); j++) {
1676 if (info.location(j).path_size() == range_number_index + 1) {
1677 // this location's path is up to the extension range index, but
1678 // doesn't include options; so it's redundant with location above
1679 continue;
1680 }
1681 SourceCodeInfo_Location* dest = source_code_info_->add_location();
1682 *dest = info.location(j);
1683 dest->set_path(range_number_index, i);
1684 }
1685 }
1686 }
1687
1688 DO(ConsumeEndOfDeclaration(";", &extensions_location));
1689 return true;
1690 }
1691
1692 // This is similar to extension range parsing, except that it accepts field
1693 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1694 bool Parser::ParseReserved(DescriptorProto* message,
1695 const LocationRecorder& message_location) {
1696 io::Tokenizer::Token start_token = input_->current();
1697 // Parse the declaration.
1698 DO(Consume("reserved"));
1699 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1700 LocationRecorder location(message_location,
1701 DescriptorProto::kReservedNameFieldNumber);
1702 location.StartAt(start_token);
1703 return ParseReservedNames(message, location);
1704 } else {
1705 LocationRecorder location(message_location,
1706 DescriptorProto::kReservedRangeFieldNumber);
1707 location.StartAt(start_token);
1708 return ParseReservedNumbers(message, location);
1709 }
1710 }
1711
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1712 bool Parser::ParseReservedNames(DescriptorProto* message,
1713 const LocationRecorder& parent_location) {
1714 do {
1715 LocationRecorder location(parent_location, message->reserved_name_size());
1716 DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1717 } while (TryConsume(","));
1718 DO(ConsumeEndOfDeclaration(";", &parent_location));
1719 return true;
1720 }
1721
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1722 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1723 const LocationRecorder& parent_location) {
1724 bool first = true;
1725 do {
1726 LocationRecorder location(parent_location, message->reserved_range_size());
1727
1728 DescriptorProto::ReservedRange* range = message->add_reserved_range();
1729 int start, end;
1730 io::Tokenizer::Token start_token;
1731 {
1732 LocationRecorder start_location(
1733 location, DescriptorProto::ReservedRange::kStartFieldNumber);
1734 start_token = input_->current();
1735 DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1736 : "Expected field number range.")));
1737 }
1738
1739 if (TryConsume("to")) {
1740 LocationRecorder end_location(
1741 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1742 if (TryConsume("max")) {
1743 // Set to the sentinel value - 1 since we increment the value below.
1744 // The actual value of the end of the range should be set with
1745 // AdjustExtensionRangesWithMaxEndNumber.
1746 end = kMaxRangeSentinel - 1;
1747 } else {
1748 DO(ConsumeInteger(&end, "Expected integer."));
1749 }
1750 } else {
1751 LocationRecorder end_location(
1752 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1753 end_location.StartAt(start_token);
1754 end_location.EndAt(start_token);
1755 end = start;
1756 }
1757
1758 // Users like to specify inclusive ranges, but in code we like the end
1759 // number to be exclusive.
1760 ++end;
1761
1762 range->set_start(start);
1763 range->set_end(end);
1764 first = false;
1765 } while (TryConsume(","));
1766
1767 DO(ConsumeEndOfDeclaration(";", &parent_location));
1768 return true;
1769 }
1770
ParseReserved(EnumDescriptorProto * message,const LocationRecorder & message_location)1771 bool Parser::ParseReserved(EnumDescriptorProto* message,
1772 const LocationRecorder& message_location) {
1773 io::Tokenizer::Token start_token = input_->current();
1774 // Parse the declaration.
1775 DO(Consume("reserved"));
1776 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1777 LocationRecorder location(message_location,
1778 DescriptorProto::kReservedNameFieldNumber);
1779 location.StartAt(start_token);
1780 return ParseReservedNames(message, location);
1781 } else {
1782 LocationRecorder location(message_location,
1783 DescriptorProto::kReservedRangeFieldNumber);
1784 location.StartAt(start_token);
1785 return ParseReservedNumbers(message, location);
1786 }
1787 }
1788
ParseReservedNames(EnumDescriptorProto * message,const LocationRecorder & parent_location)1789 bool Parser::ParseReservedNames(EnumDescriptorProto* message,
1790 const LocationRecorder& parent_location) {
1791 do {
1792 LocationRecorder location(parent_location, message->reserved_name_size());
1793 DO(ConsumeString(message->add_reserved_name(), "Expected enum value."));
1794 } while (TryConsume(","));
1795 DO(ConsumeEndOfDeclaration(";", &parent_location));
1796 return true;
1797 }
1798
ParseReservedNumbers(EnumDescriptorProto * message,const LocationRecorder & parent_location)1799 bool Parser::ParseReservedNumbers(EnumDescriptorProto* message,
1800 const LocationRecorder& parent_location) {
1801 bool first = true;
1802 do {
1803 LocationRecorder location(parent_location, message->reserved_range_size());
1804
1805 EnumDescriptorProto::EnumReservedRange* range =
1806 message->add_reserved_range();
1807 int start, end;
1808 io::Tokenizer::Token start_token;
1809 {
1810 LocationRecorder start_location(
1811 location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
1812 start_token = input_->current();
1813 DO(ConsumeSignedInteger(&start,
1814 (first ? "Expected enum value or number range."
1815 : "Expected enum number range.")));
1816 }
1817
1818 if (TryConsume("to")) {
1819 LocationRecorder end_location(
1820 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1821 if (TryConsume("max")) {
1822 // This is in the enum descriptor path, which doesn't have the message
1823 // set duality to fix up, so it doesn't integrate with the sentinel.
1824 end = INT_MAX;
1825 } else {
1826 DO(ConsumeSignedInteger(&end, "Expected integer."));
1827 }
1828 } else {
1829 LocationRecorder end_location(
1830 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1831 end_location.StartAt(start_token);
1832 end_location.EndAt(start_token);
1833 end = start;
1834 }
1835
1836 range->set_start(start);
1837 range->set_end(end);
1838 first = false;
1839 } while (TryConsume(","));
1840
1841 DO(ConsumeEndOfDeclaration(";", &parent_location));
1842 return true;
1843 }
1844
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1845 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1846 RepeatedPtrField<DescriptorProto>* messages,
1847 const LocationRecorder& parent_location,
1848 int location_field_number_for_nested_type,
1849 const LocationRecorder& extend_location,
1850 const FileDescriptorProto* containing_file) {
1851 DO(Consume("extend"));
1852
1853 // Parse the extendee type.
1854 io::Tokenizer::Token extendee_start = input_->current();
1855 std::string extendee;
1856 DO(ParseUserDefinedType(&extendee));
1857 io::Tokenizer::Token extendee_end = input_->previous();
1858
1859 // Parse the block.
1860 DO(ConsumeEndOfDeclaration("{", &extend_location));
1861
1862 bool is_first = true;
1863
1864 do {
1865 if (AtEnd()) {
1866 AddError("Reached end of input in extend definition (missing '}').");
1867 return false;
1868 }
1869
1870 // Note that kExtensionFieldNumber was already pushed by the parent.
1871 LocationRecorder location(extend_location, extensions->size());
1872
1873 FieldDescriptorProto* field = extensions->Add();
1874
1875 {
1876 LocationRecorder extendee_location(
1877 location, FieldDescriptorProto::kExtendeeFieldNumber);
1878 extendee_location.StartAt(extendee_start);
1879 extendee_location.EndAt(extendee_end);
1880
1881 if (is_first) {
1882 extendee_location.RecordLegacyLocation(
1883 field, DescriptorPool::ErrorCollector::EXTENDEE);
1884 is_first = false;
1885 }
1886 }
1887
1888 field->set_extendee(extendee);
1889
1890 if (!ParseMessageField(field, messages, parent_location,
1891 location_field_number_for_nested_type, location,
1892 containing_file)) {
1893 // This statement failed to parse. Skip it, but keep looping to parse
1894 // other statements.
1895 SkipStatement();
1896 }
1897 } while (!TryConsumeEndOfDeclaration("}", NULL));
1898
1899 return true;
1900 }
1901
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1902 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1903 DescriptorProto* containing_type, int oneof_index,
1904 const LocationRecorder& oneof_location,
1905 const LocationRecorder& containing_type_location,
1906 const FileDescriptorProto* containing_file) {
1907 DO(Consume("oneof"));
1908
1909 {
1910 LocationRecorder name_location(oneof_location,
1911 OneofDescriptorProto::kNameFieldNumber);
1912 DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1913 }
1914
1915 DO(ConsumeEndOfDeclaration("{", &oneof_location));
1916
1917 do {
1918 if (AtEnd()) {
1919 AddError("Reached end of input in oneof definition (missing '}').");
1920 return false;
1921 }
1922
1923 if (LookingAt("option")) {
1924 LocationRecorder option_location(
1925 oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1926 if (!ParseOption(oneof_decl->mutable_options(), option_location,
1927 containing_file, OPTION_STATEMENT)) {
1928 return false;
1929 }
1930 continue;
1931 }
1932
1933 // Print a nice error if the user accidentally tries to place a label
1934 // on an individual member of a oneof.
1935 if (LookingAt("required") || LookingAt("optional") ||
1936 LookingAt("repeated")) {
1937 AddError(
1938 "Fields in oneofs must not have labels (required / optional "
1939 "/ repeated).");
1940 // We can continue parsing here because we understand what the user
1941 // meant. The error report will still make parsing fail overall.
1942 input_->Next();
1943 }
1944
1945 LocationRecorder field_location(containing_type_location,
1946 DescriptorProto::kFieldFieldNumber,
1947 containing_type->field_size());
1948
1949 FieldDescriptorProto* field = containing_type->add_field();
1950 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1951 field->set_oneof_index(oneof_index);
1952
1953 if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
1954 containing_type_location,
1955 DescriptorProto::kNestedTypeFieldNumber,
1956 field_location, containing_file)) {
1957 // This statement failed to parse. Skip it, but keep looping to parse
1958 // other statements.
1959 SkipStatement();
1960 }
1961 } while (!TryConsumeEndOfDeclaration("}", NULL));
1962
1963 return true;
1964 }
1965
1966 // -------------------------------------------------------------------
1967 // Enums
1968
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1969 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1970 const LocationRecorder& enum_location,
1971 const FileDescriptorProto* containing_file) {
1972 DO(Consume("enum"));
1973
1974 {
1975 LocationRecorder location(enum_location,
1976 EnumDescriptorProto::kNameFieldNumber);
1977 location.RecordLegacyLocation(enum_type,
1978 DescriptorPool::ErrorCollector::NAME);
1979 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1980 }
1981
1982 DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1983
1984 DO(ValidateEnum(enum_type));
1985
1986 return true;
1987 }
1988
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1989 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1990 const LocationRecorder& enum_location,
1991 const FileDescriptorProto* containing_file) {
1992 DO(ConsumeEndOfDeclaration("{", &enum_location));
1993
1994 while (!TryConsumeEndOfDeclaration("}", NULL)) {
1995 if (AtEnd()) {
1996 AddError("Reached end of input in enum definition (missing '}').");
1997 return false;
1998 }
1999
2000 if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
2001 // This statement failed to parse. Skip it, but keep looping to parse
2002 // other statements.
2003 SkipStatement();
2004 }
2005 }
2006
2007 return true;
2008 }
2009
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2010 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
2011 const LocationRecorder& enum_location,
2012 const FileDescriptorProto* containing_file) {
2013 if (TryConsumeEndOfDeclaration(";", NULL)) {
2014 // empty statement; ignore
2015 return true;
2016 } else if (LookingAt("option")) {
2017 LocationRecorder location(enum_location,
2018 EnumDescriptorProto::kOptionsFieldNumber);
2019 return ParseOption(enum_type->mutable_options(), location, containing_file,
2020 OPTION_STATEMENT);
2021 } else if (LookingAt("reserved")) {
2022 return ParseReserved(enum_type, enum_location);
2023 } else {
2024 LocationRecorder location(enum_location,
2025 EnumDescriptorProto::kValueFieldNumber,
2026 enum_type->value_size());
2027 return ParseEnumConstant(enum_type->add_value(), location, containing_file);
2028 }
2029 }
2030
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2031 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
2032 const LocationRecorder& enum_value_location,
2033 const FileDescriptorProto* containing_file) {
2034 // Parse name.
2035 {
2036 LocationRecorder location(enum_value_location,
2037 EnumValueDescriptorProto::kNameFieldNumber);
2038 location.RecordLegacyLocation(enum_value,
2039 DescriptorPool::ErrorCollector::NAME);
2040 DO(ConsumeIdentifier(enum_value->mutable_name(),
2041 "Expected enum constant name."));
2042 }
2043
2044 DO(Consume("=", "Missing numeric value for enum constant."));
2045
2046 // Parse value.
2047 {
2048 LocationRecorder location(enum_value_location,
2049 EnumValueDescriptorProto::kNumberFieldNumber);
2050 location.RecordLegacyLocation(enum_value,
2051 DescriptorPool::ErrorCollector::NUMBER);
2052
2053 int number;
2054 DO(ConsumeSignedInteger(&number, "Expected integer."));
2055 enum_value->set_number(number);
2056 }
2057
2058 DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2059 containing_file));
2060
2061 DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2062
2063 return true;
2064 }
2065
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2066 bool Parser::ParseEnumConstantOptions(
2067 EnumValueDescriptorProto* value,
2068 const LocationRecorder& enum_value_location,
2069 const FileDescriptorProto* containing_file) {
2070 if (!LookingAt("[")) return true;
2071
2072 LocationRecorder location(enum_value_location,
2073 EnumValueDescriptorProto::kOptionsFieldNumber);
2074
2075 DO(Consume("["));
2076
2077 do {
2078 DO(ParseOption(value->mutable_options(), location, containing_file,
2079 OPTION_ASSIGNMENT));
2080 } while (TryConsume(","));
2081
2082 DO(Consume("]"));
2083 return true;
2084 }
2085
2086 // -------------------------------------------------------------------
2087 // Services
2088
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2089 bool Parser::ParseServiceDefinition(
2090 ServiceDescriptorProto* service, const LocationRecorder& service_location,
2091 const FileDescriptorProto* containing_file) {
2092 DO(Consume("service"));
2093
2094 {
2095 LocationRecorder location(service_location,
2096 ServiceDescriptorProto::kNameFieldNumber);
2097 location.RecordLegacyLocation(service,
2098 DescriptorPool::ErrorCollector::NAME);
2099 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2100 }
2101
2102 DO(ParseServiceBlock(service, service_location, containing_file));
2103 return true;
2104 }
2105
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2106 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2107 const LocationRecorder& service_location,
2108 const FileDescriptorProto* containing_file) {
2109 DO(ConsumeEndOfDeclaration("{", &service_location));
2110
2111 while (!TryConsumeEndOfDeclaration("}", NULL)) {
2112 if (AtEnd()) {
2113 AddError("Reached end of input in service definition (missing '}').");
2114 return false;
2115 }
2116
2117 if (!ParseServiceStatement(service, service_location, containing_file)) {
2118 // This statement failed to parse. Skip it, but keep looping to parse
2119 // other statements.
2120 SkipStatement();
2121 }
2122 }
2123
2124 return true;
2125 }
2126
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2127 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2128 const LocationRecorder& service_location,
2129 const FileDescriptorProto* containing_file) {
2130 if (TryConsumeEndOfDeclaration(";", NULL)) {
2131 // empty statement; ignore
2132 return true;
2133 } else if (LookingAt("option")) {
2134 LocationRecorder location(service_location,
2135 ServiceDescriptorProto::kOptionsFieldNumber);
2136 return ParseOption(service->mutable_options(), location, containing_file,
2137 OPTION_STATEMENT);
2138 } else {
2139 LocationRecorder location(service_location,
2140 ServiceDescriptorProto::kMethodFieldNumber,
2141 service->method_size());
2142 return ParseServiceMethod(service->add_method(), location, containing_file);
2143 }
2144 }
2145
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2146 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2147 const LocationRecorder& method_location,
2148 const FileDescriptorProto* containing_file) {
2149 DO(Consume("rpc"));
2150
2151 {
2152 LocationRecorder location(method_location,
2153 MethodDescriptorProto::kNameFieldNumber);
2154 location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2155 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2156 }
2157
2158 // Parse input type.
2159 DO(Consume("("));
2160 {
2161 if (LookingAt("stream")) {
2162 LocationRecorder location(
2163 method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2164 location.RecordLegacyLocation(method,
2165 DescriptorPool::ErrorCollector::OTHER);
2166 method->set_client_streaming(true);
2167 DO(Consume("stream"));
2168
2169 }
2170 LocationRecorder location(method_location,
2171 MethodDescriptorProto::kInputTypeFieldNumber);
2172 location.RecordLegacyLocation(method,
2173 DescriptorPool::ErrorCollector::INPUT_TYPE);
2174 DO(ParseUserDefinedType(method->mutable_input_type()));
2175 }
2176 DO(Consume(")"));
2177
2178 // Parse output type.
2179 DO(Consume("returns"));
2180 DO(Consume("("));
2181 {
2182 if (LookingAt("stream")) {
2183 LocationRecorder location(
2184 method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2185 location.RecordLegacyLocation(method,
2186 DescriptorPool::ErrorCollector::OTHER);
2187 DO(Consume("stream"));
2188 method->set_server_streaming(true);
2189
2190 }
2191 LocationRecorder location(method_location,
2192 MethodDescriptorProto::kOutputTypeFieldNumber);
2193 location.RecordLegacyLocation(method,
2194 DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2195 DO(ParseUserDefinedType(method->mutable_output_type()));
2196 }
2197 DO(Consume(")"));
2198
2199 if (LookingAt("{")) {
2200 // Options!
2201 DO(ParseMethodOptions(method_location, containing_file,
2202 MethodDescriptorProto::kOptionsFieldNumber,
2203 method->mutable_options()));
2204 } else {
2205 DO(ConsumeEndOfDeclaration(";", &method_location));
2206 }
2207
2208 return true;
2209 }
2210
2211
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2212 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2213 const FileDescriptorProto* containing_file,
2214 const int optionsFieldNumber,
2215 Message* mutable_options) {
2216 // Options!
2217 ConsumeEndOfDeclaration("{", &parent_location);
2218 while (!TryConsumeEndOfDeclaration("}", NULL)) {
2219 if (AtEnd()) {
2220 AddError("Reached end of input in method options (missing '}').");
2221 return false;
2222 }
2223
2224 if (TryConsumeEndOfDeclaration(";", NULL)) {
2225 // empty statement; ignore
2226 } else {
2227 LocationRecorder location(parent_location, optionsFieldNumber);
2228 if (!ParseOption(mutable_options, location, containing_file,
2229 OPTION_STATEMENT)) {
2230 // This statement failed to parse. Skip it, but keep looping to
2231 // parse other statements.
2232 SkipStatement();
2233 }
2234 }
2235 }
2236
2237 return true;
2238 }
2239
2240 // -------------------------------------------------------------------
2241
ParseLabel(FieldDescriptorProto::Label * label,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)2242 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2243 const LocationRecorder& field_location,
2244 const FileDescriptorProto* containing_file) {
2245 if (!LookingAt("optional") && !LookingAt("repeated") &&
2246 !LookingAt("required")) {
2247 return false;
2248 }
2249 LocationRecorder location(field_location,
2250 FieldDescriptorProto::kLabelFieldNumber);
2251 if (TryConsume("optional")) {
2252 *label = FieldDescriptorProto::LABEL_OPTIONAL;
2253 } else if (TryConsume("repeated")) {
2254 *label = FieldDescriptorProto::LABEL_REPEATED;
2255 } else {
2256 Consume("required");
2257 *label = FieldDescriptorProto::LABEL_REQUIRED;
2258 }
2259 return true;
2260 }
2261
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2262 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2263 std::string* type_name) {
2264 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2265 if (iter != kTypeNames.end()) {
2266 *type = iter->second;
2267 input_->Next();
2268 } else {
2269 DO(ParseUserDefinedType(type_name));
2270 }
2271 return true;
2272 }
2273
ParseUserDefinedType(std::string * type_name)2274 bool Parser::ParseUserDefinedType(std::string* type_name) {
2275 type_name->clear();
2276
2277 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2278 if (iter != kTypeNames.end()) {
2279 // Note: The only place enum types are allowed is for field types, but
2280 // if we are parsing a field type then we would not get here because
2281 // primitives are allowed there as well. So this error message doesn't
2282 // need to account for enums.
2283 AddError("Expected message type.");
2284
2285 // Pretend to accept this type so that we can go on parsing.
2286 *type_name = input_->current().text;
2287 input_->Next();
2288 return true;
2289 }
2290
2291 // A leading "." means the name is fully-qualified.
2292 if (TryConsume(".")) type_name->append(".");
2293
2294 // Consume the first part of the name.
2295 std::string identifier;
2296 DO(ConsumeIdentifier(&identifier, "Expected type name."));
2297 type_name->append(identifier);
2298
2299 // Consume more parts.
2300 while (TryConsume(".")) {
2301 type_name->append(".");
2302 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2303 type_name->append(identifier);
2304 }
2305
2306 return true;
2307 }
2308
2309 // ===================================================================
2310
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2311 bool Parser::ParsePackage(FileDescriptorProto* file,
2312 const LocationRecorder& root_location,
2313 const FileDescriptorProto* containing_file) {
2314 if (file->has_package()) {
2315 AddError("Multiple package definitions.");
2316 // Don't append the new package to the old one. Just replace it. Not
2317 // that it really matters since this is an error anyway.
2318 file->clear_package();
2319 }
2320
2321 LocationRecorder location(root_location,
2322 FileDescriptorProto::kPackageFieldNumber);
2323 location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2324
2325 DO(Consume("package"));
2326
2327 while (true) {
2328 std::string identifier;
2329 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2330 file->mutable_package()->append(identifier);
2331 if (!TryConsume(".")) break;
2332 file->mutable_package()->append(".");
2333 }
2334
2335 DO(ConsumeEndOfDeclaration(";", &location));
2336
2337 return true;
2338 }
2339
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2340 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2341 RepeatedField<int32>* public_dependency,
2342 RepeatedField<int32>* weak_dependency,
2343 const LocationRecorder& root_location,
2344 const FileDescriptorProto* containing_file) {
2345 LocationRecorder location(root_location,
2346 FileDescriptorProto::kDependencyFieldNumber,
2347 dependency->size());
2348
2349 DO(Consume("import"));
2350
2351 if (LookingAt("public")) {
2352 LocationRecorder public_location(
2353 root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2354 public_dependency->size());
2355 DO(Consume("public"));
2356 *public_dependency->Add() = dependency->size();
2357 } else if (LookingAt("weak")) {
2358 LocationRecorder weak_location(
2359 root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2360 weak_dependency->size());
2361 weak_location.RecordLegacyImportLocation(containing_file, "weak");
2362 DO(Consume("weak"));
2363 *weak_dependency->Add() = dependency->size();
2364 }
2365
2366 std::string import_file;
2367 DO(ConsumeString(&import_file,
2368 "Expected a string naming the file to import."));
2369 *dependency->Add() = import_file;
2370 location.RecordLegacyImportLocation(containing_file, import_file);
2371
2372 DO(ConsumeEndOfDeclaration(";", &location));
2373
2374 return true;
2375 }
2376
2377 // ===================================================================
2378
SourceLocationTable()2379 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2380 SourceLocationTable::~SourceLocationTable() {}
2381
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2382 bool SourceLocationTable::Find(
2383 const Message* descriptor,
2384 DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2385 int* column) const {
2386 const std::pair<int, int>* result =
2387 FindOrNull(location_map_, std::make_pair(descriptor, location));
2388 if (result == NULL) {
2389 *line = -1;
2390 *column = 0;
2391 return false;
2392 } else {
2393 *line = result->first;
2394 *column = result->second;
2395 return true;
2396 }
2397 }
2398
FindImport(const Message * descriptor,const std::string & name,int * line,int * column) const2399 bool SourceLocationTable::FindImport(const Message* descriptor,
2400 const std::string& name, int* line,
2401 int* column) const {
2402 const std::pair<int, int>* result =
2403 FindOrNull(import_location_map_, std::make_pair(descriptor, name));
2404 if (result == nullptr) {
2405 *line = -1;
2406 *column = 0;
2407 return false;
2408 } else {
2409 *line = result->first;
2410 *column = result->second;
2411 return true;
2412 }
2413 }
2414
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2415 void SourceLocationTable::Add(
2416 const Message* descriptor,
2417 DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2418 int column) {
2419 location_map_[std::make_pair(descriptor, location)] =
2420 std::make_pair(line, column);
2421 }
2422
AddImport(const Message * descriptor,const std::string & name,int line,int column)2423 void SourceLocationTable::AddImport(const Message* descriptor,
2424 const std::string& name, int line,
2425 int column) {
2426 import_location_map_[std::make_pair(descriptor, name)] =
2427 std::make_pair(line, column);
2428 }
2429
Clear()2430 void SourceLocationTable::Clear() { location_map_.clear(); }
2431
2432 } // namespace compiler
2433 } // namespace protobuf
2434 } // namespace google
2435