1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36
37 #include <google/protobuf/compiler/parser.h>
38
39 #include <float.h>
40
41 #include <cstdint>
42 #include <limits>
43 #include <unordered_map>
44 #include <unordered_set>
45
46 #include <google/protobuf/stubs/casts.h>
47 #include <google/protobuf/stubs/logging.h>
48 #include <google/protobuf/stubs/common.h>
49 #include <google/protobuf/stubs/strutil.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/descriptor.pb.h>
52 #include <google/protobuf/io/tokenizer.h>
53 #include <google/protobuf/wire_format.h>
54 #include <google/protobuf/stubs/map_util.h>
55 #include <google/protobuf/stubs/hash.h>
56
57 namespace google {
58 namespace protobuf {
59 namespace compiler {
60
61 using internal::WireFormat;
62
63 namespace {
64
65 typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
66
MakeTypeNameTable()67 TypeNameMap MakeTypeNameTable() {
68 TypeNameMap result;
69
70 result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
71 result["float"] = FieldDescriptorProto::TYPE_FLOAT;
72 result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
73 result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
74 result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
75 result["bool"] = FieldDescriptorProto::TYPE_BOOL;
76 result["string"] = FieldDescriptorProto::TYPE_STRING;
77 result["group"] = FieldDescriptorProto::TYPE_GROUP;
78
79 result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
80 result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
81 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
82 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
83 result["int32"] = FieldDescriptorProto::TYPE_INT32;
84 result["int64"] = FieldDescriptorProto::TYPE_INT64;
85 result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
86 result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
87
88 return result;
89 }
90
91 const TypeNameMap kTypeNames = MakeTypeNameTable();
92
93 // Camel-case the field name and append "Entry" for generated map entry name.
94 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(const std::string & field_name)95 std::string MapEntryName(const std::string& field_name) {
96 std::string result;
97 static const char kSuffix[] = "Entry";
98 result.reserve(field_name.size() + sizeof(kSuffix));
99 bool cap_next = true;
100 for (const char field_name_char : field_name) {
101 if (field_name_char == '_') {
102 cap_next = true;
103 } else if (cap_next) {
104 // Note: Do not use ctype.h due to locales.
105 if ('a' <= field_name_char && field_name_char <= 'z') {
106 result.push_back(field_name_char - 'a' + 'A');
107 } else {
108 result.push_back(field_name_char);
109 }
110 cap_next = false;
111 } else {
112 result.push_back(field_name_char);
113 }
114 }
115 result.append(kSuffix);
116 return result;
117 }
118
IsUppercase(char c)119 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
120
IsLowercase(char c)121 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
122
IsNumber(char c)123 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
124
IsUpperCamelCase(const std::string & name)125 bool IsUpperCamelCase(const std::string& name) {
126 if (name.empty()) {
127 return true;
128 }
129 // Name must start with an upper case character.
130 if (!IsUppercase(name[0])) {
131 return false;
132 }
133 // Must not contains underscore.
134 for (const char c : name) {
135 if (c == '_') {
136 return false;
137 }
138 }
139 return true;
140 }
141
IsUpperUnderscore(const std::string & name)142 bool IsUpperUnderscore(const std::string& name) {
143 for (const char c : name) {
144 if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
145 return false;
146 }
147 }
148 return true;
149 }
150
IsLowerUnderscore(const std::string & name)151 bool IsLowerUnderscore(const std::string& name) {
152 for (const char c : name) {
153 if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
154 return false;
155 }
156 }
157 return true;
158 }
159
IsNumberFollowUnderscore(const std::string & name)160 bool IsNumberFollowUnderscore(const std::string& name) {
161 for (int i = 1; i < name.length(); i++) {
162 const char c = name[i];
163 if (IsNumber(c) && name[i - 1] == '_') {
164 return true;
165 }
166 }
167 return false;
168 }
169
170 } // anonymous namespace
171
172 // Makes code slightly more readable. The meaning of "DO(foo)" is
173 // "Execute foo and fail if it fails.", where failure is indicated by
174 // returning false.
175 #define DO(STATEMENT) \
176 if (STATEMENT) { \
177 } else \
178 return false
179
180 // ===================================================================
181
Parser()182 Parser::Parser()
183 : input_(nullptr),
184 error_collector_(nullptr),
185 source_location_table_(nullptr),
186 had_errors_(false),
187 require_syntax_identifier_(false),
188 stop_after_syntax_identifier_(false) {
189 }
190
~Parser()191 Parser::~Parser() {}
192
193 // ===================================================================
194
LookingAt(const char * text)195 inline bool Parser::LookingAt(const char* text) {
196 return input_->current().text == text;
197 }
198
LookingAtType(io::Tokenizer::TokenType token_type)199 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
200 return input_->current().type == token_type;
201 }
202
AtEnd()203 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
204
TryConsume(const char * text)205 bool Parser::TryConsume(const char* text) {
206 if (LookingAt(text)) {
207 input_->Next();
208 return true;
209 } else {
210 return false;
211 }
212 }
213
Consume(const char * text,const char * error)214 bool Parser::Consume(const char* text, const char* error) {
215 if (TryConsume(text)) {
216 return true;
217 } else {
218 AddError(error);
219 return false;
220 }
221 }
222
Consume(const char * text)223 bool Parser::Consume(const char* text) {
224 std::string error = "Expected \"" + std::string(text) + "\".";
225 return Consume(text, error.c_str());
226 }
227
ConsumeIdentifier(std::string * output,const char * error)228 bool Parser::ConsumeIdentifier(std::string* output, const char* error) {
229 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
230 *output = input_->current().text;
231 input_->Next();
232 return true;
233 } else {
234 AddError(error);
235 return false;
236 }
237 }
238
ConsumeInteger(int * output,const char * error)239 bool Parser::ConsumeInteger(int* output, const char* error) {
240 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
241 uint64_t value = 0;
242 if (!io::Tokenizer::ParseInteger(input_->current().text,
243 std::numeric_limits<int32_t>::max(),
244 &value)) {
245 AddError("Integer out of range.");
246 // We still return true because we did, in fact, parse an integer.
247 }
248 *output = value;
249 input_->Next();
250 return true;
251 } else {
252 AddError(error);
253 return false;
254 }
255 }
256
ConsumeSignedInteger(int * output,const char * error)257 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
258 bool is_negative = false;
259 uint64_t max_value = std::numeric_limits<int32_t>::max();
260 if (TryConsume("-")) {
261 is_negative = true;
262 max_value += 1;
263 }
264 uint64_t value = 0;
265 DO(ConsumeInteger64(max_value, &value, error));
266 if (is_negative) value *= -1;
267 *output = value;
268 return true;
269 }
270
ConsumeInteger64(uint64_t max_value,uint64_t * output,const char * error)271 bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
272 const char* error) {
273 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
274 if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
275 output)) {
276 AddError("Integer out of range.");
277 // We still return true because we did, in fact, parse an integer.
278 *output = 0;
279 }
280 input_->Next();
281 return true;
282 } else {
283 AddError(error);
284 return false;
285 }
286 }
287
ConsumeNumber(double * output,const char * error)288 bool Parser::ConsumeNumber(double* output, const char* error) {
289 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
290 *output = io::Tokenizer::ParseFloat(input_->current().text);
291 input_->Next();
292 return true;
293 } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
294 // Also accept integers.
295 uint64_t value = 0;
296 if (!io::Tokenizer::ParseInteger(input_->current().text,
297 std::numeric_limits<uint64_t>::max(),
298 &value)) {
299 AddError("Integer out of range.");
300 // We still return true because we did, in fact, parse a number.
301 }
302 *output = value;
303 input_->Next();
304 return true;
305 } else if (LookingAt("inf")) {
306 *output = std::numeric_limits<double>::infinity();
307 input_->Next();
308 return true;
309 } else if (LookingAt("nan")) {
310 *output = std::numeric_limits<double>::quiet_NaN();
311 input_->Next();
312 return true;
313 } else {
314 AddError(error);
315 return false;
316 }
317 }
318
ConsumeString(std::string * output,const char * error)319 bool Parser::ConsumeString(std::string* output, const char* error) {
320 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
321 io::Tokenizer::ParseString(input_->current().text, output);
322 input_->Next();
323 // Allow C++ like concatenation of adjacent string tokens.
324 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
325 io::Tokenizer::ParseStringAppend(input_->current().text, output);
326 input_->Next();
327 }
328 return true;
329 } else {
330 AddError(error);
331 return false;
332 }
333 }
334
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)335 bool Parser::TryConsumeEndOfDeclaration(const char* text,
336 const LocationRecorder* location) {
337 if (LookingAt(text)) {
338 std::string leading, trailing;
339 std::vector<std::string> detached;
340 input_->NextWithComments(&trailing, &detached, &leading);
341
342 // Save the leading comments for next time, and recall the leading comments
343 // from last time.
344 leading.swap(upcoming_doc_comments_);
345
346 if (location != nullptr) {
347 upcoming_detached_comments_.swap(detached);
348 location->AttachComments(&leading, &trailing, &detached);
349 } else if (strcmp(text, "}") == 0) {
350 // If the current location is null and we are finishing the current scope,
351 // drop pending upcoming detached comments.
352 upcoming_detached_comments_.swap(detached);
353 } else {
354 // Otherwise, append the new detached comments to the existing upcoming
355 // detached comments.
356 upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
357 detached.begin(), detached.end());
358 }
359
360 return true;
361 } else {
362 return false;
363 }
364 }
365
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)366 bool Parser::ConsumeEndOfDeclaration(const char* text,
367 const LocationRecorder* location) {
368 if (TryConsumeEndOfDeclaration(text, location)) {
369 return true;
370 } else {
371 AddError("Expected \"" + std::string(text) + "\".");
372 return false;
373 }
374 }
375
376 // -------------------------------------------------------------------
377
AddError(int line,int column,const std::string & error)378 void Parser::AddError(int line, int column, const std::string& error) {
379 if (error_collector_ != nullptr) {
380 error_collector_->AddError(line, column, error);
381 }
382 had_errors_ = true;
383 }
384
AddError(const std::string & error)385 void Parser::AddError(const std::string& error) {
386 AddError(input_->current().line, input_->current().column, error);
387 }
388
AddWarning(const std::string & warning)389 void Parser::AddWarning(const std::string& warning) {
390 if (error_collector_ != nullptr) {
391 error_collector_->AddWarning(input_->current().line,
392 input_->current().column, warning);
393 }
394 }
395
396 // -------------------------------------------------------------------
397
LocationRecorder(Parser * parser)398 Parser::LocationRecorder::LocationRecorder(Parser* parser)
399 : parser_(parser),
400 source_code_info_(parser->source_code_info_),
401 location_(parser_->source_code_info_->add_location()) {
402 location_->add_span(parser_->input_->current().line);
403 location_->add_span(parser_->input_->current().column);
404 }
405
LocationRecorder(const LocationRecorder & parent)406 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
407 Init(parent, parent.source_code_info_);
408 }
409
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)410 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
411 int path1,
412 SourceCodeInfo* source_code_info) {
413 Init(parent, source_code_info);
414 AddPath(path1);
415 }
416
LocationRecorder(const LocationRecorder & parent,int path1)417 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
418 int path1) {
419 Init(parent, parent.source_code_info_);
420 AddPath(path1);
421 }
422
LocationRecorder(const LocationRecorder & parent,int path1,int path2)423 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
424 int path1, int path2) {
425 Init(parent, parent.source_code_info_);
426 AddPath(path1);
427 AddPath(path2);
428 }
429
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)430 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
431 SourceCodeInfo* source_code_info) {
432 parser_ = parent.parser_;
433 source_code_info_ = source_code_info;
434
435 location_ = source_code_info_->add_location();
436 location_->mutable_path()->CopyFrom(parent.location_->path());
437
438 location_->add_span(parser_->input_->current().line);
439 location_->add_span(parser_->input_->current().column);
440 }
441
~LocationRecorder()442 Parser::LocationRecorder::~LocationRecorder() {
443 if (location_->span_size() <= 2) {
444 EndAt(parser_->input_->previous());
445 }
446 }
447
AddPath(int path_component)448 void Parser::LocationRecorder::AddPath(int path_component) {
449 location_->add_path(path_component);
450 }
451
StartAt(const io::Tokenizer::Token & token)452 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
453 location_->set_span(0, token.line);
454 location_->set_span(1, token.column);
455 }
456
StartAt(const LocationRecorder & other)457 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
458 location_->set_span(0, other.location_->span(0));
459 location_->set_span(1, other.location_->span(1));
460 }
461
EndAt(const io::Tokenizer::Token & token)462 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
463 if (token.line != location_->span(0)) {
464 location_->add_span(token.line);
465 }
466 location_->add_span(token.end_column);
467 }
468
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)469 void Parser::LocationRecorder::RecordLegacyLocation(
470 const Message* descriptor,
471 DescriptorPool::ErrorCollector::ErrorLocation location) {
472 if (parser_->source_location_table_ != nullptr) {
473 parser_->source_location_table_->Add(
474 descriptor, location, location_->span(0), location_->span(1));
475 }
476 }
477
RecordLegacyImportLocation(const Message * descriptor,const std::string & name)478 void Parser::LocationRecorder::RecordLegacyImportLocation(
479 const Message* descriptor, const std::string& name) {
480 if (parser_->source_location_table_ != nullptr) {
481 parser_->source_location_table_->AddImport(
482 descriptor, name, location_->span(0), location_->span(1));
483 }
484 }
485
CurrentPathSize() const486 int Parser::LocationRecorder::CurrentPathSize() const {
487 return location_->path_size();
488 }
489
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const490 void Parser::LocationRecorder::AttachComments(
491 std::string* leading, std::string* trailing,
492 std::vector<std::string>* detached_comments) const {
493 GOOGLE_CHECK(!location_->has_leading_comments());
494 GOOGLE_CHECK(!location_->has_trailing_comments());
495
496 if (!leading->empty()) {
497 location_->mutable_leading_comments()->swap(*leading);
498 }
499 if (!trailing->empty()) {
500 location_->mutable_trailing_comments()->swap(*trailing);
501 }
502 for (int i = 0; i < detached_comments->size(); ++i) {
503 location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
504 }
505 detached_comments->clear();
506 }
507
508 // -------------------------------------------------------------------
509
SkipStatement()510 void Parser::SkipStatement() {
511 while (true) {
512 if (AtEnd()) {
513 return;
514 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
515 if (TryConsumeEndOfDeclaration(";", nullptr)) {
516 return;
517 } else if (TryConsume("{")) {
518 SkipRestOfBlock();
519 return;
520 } else if (LookingAt("}")) {
521 return;
522 }
523 }
524 input_->Next();
525 }
526 }
527
SkipRestOfBlock()528 void Parser::SkipRestOfBlock() {
529 while (true) {
530 if (AtEnd()) {
531 return;
532 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
533 if (TryConsumeEndOfDeclaration("}", nullptr)) {
534 return;
535 } else if (TryConsume("{")) {
536 SkipRestOfBlock();
537 }
538 }
539 input_->Next();
540 }
541 }
542
543 // ===================================================================
544
ValidateEnum(const EnumDescriptorProto * proto)545 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
546 bool has_allow_alias = false;
547 bool allow_alias = false;
548
549 for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
550 const UninterpretedOption option = proto->options().uninterpreted_option(i);
551 if (option.name_size() > 1) {
552 continue;
553 }
554 if (!option.name(0).is_extension() &&
555 option.name(0).name_part() == "allow_alias") {
556 has_allow_alias = true;
557 if (option.identifier_value() == "true") {
558 allow_alias = true;
559 }
560 break;
561 }
562 }
563
564 if (has_allow_alias && !allow_alias) {
565 std::string error =
566 "\"" + proto->name() +
567 "\" declares 'option allow_alias = false;' which has no effect. "
568 "Please remove the declaration.";
569 // This needlessly clutters declarations with nops.
570 AddError(error);
571 return false;
572 }
573
574 std::set<int> used_values;
575 bool has_duplicates = false;
576 for (int i = 0; i < proto->value_size(); ++i) {
577 const EnumValueDescriptorProto& enum_value = proto->value(i);
578 if (used_values.find(enum_value.number()) != used_values.end()) {
579 has_duplicates = true;
580 break;
581 } else {
582 used_values.insert(enum_value.number());
583 }
584 }
585 if (allow_alias && !has_duplicates) {
586 std::string error =
587 "\"" + proto->name() +
588 "\" declares support for enum aliases but no enum values share field "
589 "numbers. Please remove the unnecessary 'option allow_alias = true;' "
590 "declaration.";
591 // Generate an error if an enum declares support for duplicate enum values
592 // and does not use it protect future authors.
593 AddError(error);
594 return false;
595 }
596
597 // Enforce that enum constants must be UPPER_CASE except in case of
598 // enum_alias.
599 if (!allow_alias) {
600 for (const auto& enum_value : proto->value()) {
601 if (!IsUpperUnderscore(enum_value.name())) {
602 AddWarning(
603 "Enum constant should be in UPPER_CASE. Found: " +
604 enum_value.name() +
605 ". See https://developers.google.com/protocol-buffers/docs/style");
606 }
607 }
608 }
609
610 return true;
611 }
612
Parse(io::Tokenizer * input,FileDescriptorProto * file)613 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
614 input_ = input;
615 had_errors_ = false;
616 syntax_identifier_.clear();
617
618 // Note that |file| could be NULL at this point if
619 // stop_after_syntax_identifier_ is true. So, we conservatively allocate
620 // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
621 // later on.
622 SourceCodeInfo source_code_info;
623 source_code_info_ = &source_code_info;
624
625 if (LookingAtType(io::Tokenizer::TYPE_START)) {
626 // Advance to first token.
627 input_->NextWithComments(nullptr, &upcoming_detached_comments_,
628 &upcoming_doc_comments_);
629 }
630
631 {
632 LocationRecorder root_location(this);
633 root_location.RecordLegacyLocation(file,
634 DescriptorPool::ErrorCollector::OTHER);
635
636 if (require_syntax_identifier_ || LookingAt("syntax")) {
637 if (!ParseSyntaxIdentifier(root_location)) {
638 // Don't attempt to parse the file if we didn't recognize the syntax
639 // identifier.
640 return false;
641 }
642 // Store the syntax into the file.
643 if (file != nullptr) file->set_syntax(syntax_identifier_);
644 } else if (!stop_after_syntax_identifier_) {
645 GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: " << file->name()
646 << ". Please use 'syntax = \"proto2\";' "
647 << "or 'syntax = \"proto3\";' to specify a syntax "
648 << "version. (Defaulted to proto2 syntax.)";
649 syntax_identifier_ = "proto2";
650 }
651
652 if (stop_after_syntax_identifier_) return !had_errors_;
653
654 // Repeatedly parse statements until we reach the end of the file.
655 while (!AtEnd()) {
656 if (!ParseTopLevelStatement(file, root_location)) {
657 // This statement failed to parse. Skip it, but keep looping to parse
658 // other statements.
659 SkipStatement();
660
661 if (LookingAt("}")) {
662 AddError("Unmatched \"}\".");
663 input_->NextWithComments(nullptr, &upcoming_detached_comments_,
664 &upcoming_doc_comments_);
665 }
666 }
667 }
668 }
669
670 input_ = nullptr;
671 source_code_info_ = nullptr;
672 assert(file != nullptr);
673 source_code_info.Swap(file->mutable_source_code_info());
674 return !had_errors_;
675 }
676
ParseSyntaxIdentifier(const LocationRecorder & parent)677 bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
678 LocationRecorder syntax_location(parent,
679 FileDescriptorProto::kSyntaxFieldNumber);
680 DO(Consume(
681 "syntax",
682 "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
683 DO(Consume("="));
684 io::Tokenizer::Token syntax_token = input_->current();
685 std::string syntax;
686 DO(ConsumeString(&syntax, "Expected syntax identifier."));
687 DO(ConsumeEndOfDeclaration(";", &syntax_location));
688
689 syntax_identifier_ = syntax;
690
691 if (syntax != "proto2" && syntax != "proto3" &&
692 !stop_after_syntax_identifier_) {
693 AddError(syntax_token.line, syntax_token.column,
694 "Unrecognized syntax identifier \"" + syntax +
695 "\". This parser "
696 "only recognizes \"proto2\" and \"proto3\".");
697 return false;
698 }
699
700 return true;
701 }
702
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)703 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
704 const LocationRecorder& root_location) {
705 if (TryConsumeEndOfDeclaration(";", nullptr)) {
706 // empty statement; ignore
707 return true;
708 } else if (LookingAt("message")) {
709 LocationRecorder location(root_location,
710 FileDescriptorProto::kMessageTypeFieldNumber,
711 file->message_type_size());
712 return ParseMessageDefinition(file->add_message_type(), location, file);
713 } else if (LookingAt("enum")) {
714 LocationRecorder location(root_location,
715 FileDescriptorProto::kEnumTypeFieldNumber,
716 file->enum_type_size());
717 return ParseEnumDefinition(file->add_enum_type(), location, file);
718 } else if (LookingAt("service")) {
719 LocationRecorder location(root_location,
720 FileDescriptorProto::kServiceFieldNumber,
721 file->service_size());
722 return ParseServiceDefinition(file->add_service(), location, file);
723 } else if (LookingAt("extend")) {
724 LocationRecorder location(root_location,
725 FileDescriptorProto::kExtensionFieldNumber);
726 return ParseExtend(
727 file->mutable_extension(), file->mutable_message_type(), root_location,
728 FileDescriptorProto::kMessageTypeFieldNumber, location, file);
729 } else if (LookingAt("import")) {
730 return ParseImport(file->mutable_dependency(),
731 file->mutable_public_dependency(),
732 file->mutable_weak_dependency(), root_location, file);
733 } else if (LookingAt("package")) {
734 return ParsePackage(file, root_location, file);
735 } else if (LookingAt("option")) {
736 LocationRecorder location(root_location,
737 FileDescriptorProto::kOptionsFieldNumber);
738 return ParseOption(file->mutable_options(), location, file,
739 OPTION_STATEMENT);
740 } else {
741 AddError("Expected top-level statement (e.g. \"message\").");
742 return false;
743 }
744 }
745
746 // -------------------------------------------------------------------
747 // Messages
748
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)749 bool Parser::ParseMessageDefinition(
750 DescriptorProto* message, const LocationRecorder& message_location,
751 const FileDescriptorProto* containing_file) {
752 DO(Consume("message"));
753 {
754 LocationRecorder location(message_location,
755 DescriptorProto::kNameFieldNumber);
756 location.RecordLegacyLocation(message,
757 DescriptorPool::ErrorCollector::NAME);
758 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
759 if (!IsUpperCamelCase(message->name())) {
760 AddWarning(
761 "Message name should be in UpperCamelCase. Found: " +
762 message->name() +
763 ". See https://developers.google.com/protocol-buffers/docs/style");
764 }
765 }
766 DO(ParseMessageBlock(message, message_location, containing_file));
767
768 if (syntax_identifier_ == "proto3") {
769 // Add synthetic one-field oneofs for optional fields, except messages which
770 // already have presence in proto3.
771 //
772 // We have to make sure the oneof names don't conflict with any other
773 // field or oneof.
774 std::unordered_set<std::string> names;
775 for (const auto& field : message->field()) {
776 names.insert(field.name());
777 }
778 for (const auto& oneof : message->oneof_decl()) {
779 names.insert(oneof.name());
780 }
781
782 for (auto& field : *message->mutable_field()) {
783 if (field.proto3_optional()) {
784 std::string oneof_name = field.name();
785
786 // Prepend 'XXXXX_' until we are no longer conflicting.
787 // Avoid prepending a double-underscore because such names are
788 // reserved in C++.
789 if (oneof_name.empty() || oneof_name[0] != '_') {
790 oneof_name = '_' + oneof_name;
791 }
792 while (names.count(oneof_name) > 0) {
793 oneof_name = 'X' + oneof_name;
794 }
795
796 names.insert(oneof_name);
797 field.set_oneof_index(message->oneof_decl_size());
798 OneofDescriptorProto* oneof = message->add_oneof_decl();
799 oneof->set_name(oneof_name);
800 }
801 }
802 }
803
804 return true;
805 }
806
807 namespace {
808
809 const int kMaxRangeSentinel = -1;
810
IsMessageSetWireFormatMessage(const DescriptorProto & message)811 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
812 const MessageOptions& options = message.options();
813 for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
814 const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
815 if (uninterpreted.name_size() == 1 &&
816 uninterpreted.name(0).name_part() == "message_set_wire_format" &&
817 uninterpreted.identifier_value() == "true") {
818 return true;
819 }
820 }
821 return false;
822 }
823
824 // Modifies any extension ranges that specified 'max' as the end of the
825 // extension range, and sets them to the type-specific maximum. The actual max
826 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)827 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
828 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
829 const int max_extension_number = is_message_set
830 ? std::numeric_limits<int32_t>::max()
831 : FieldDescriptor::kMaxNumber + 1;
832 for (int i = 0; i < message->extension_range_size(); ++i) {
833 if (message->extension_range(i).end() == kMaxRangeSentinel) {
834 message->mutable_extension_range(i)->set_end(max_extension_number);
835 }
836 }
837 }
838
839 // Modifies any reserved ranges that specified 'max' as the end of the
840 // reserved range, and sets them to the type-specific maximum. The actual max
841 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)842 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
843 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
844 const int max_field_number = is_message_set
845 ? std::numeric_limits<int32_t>::max()
846 : FieldDescriptor::kMaxNumber + 1;
847 for (int i = 0; i < message->reserved_range_size(); ++i) {
848 if (message->reserved_range(i).end() == kMaxRangeSentinel) {
849 message->mutable_reserved_range(i)->set_end(max_field_number);
850 }
851 }
852 }
853
854 } // namespace
855
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)856 bool Parser::ParseMessageBlock(DescriptorProto* message,
857 const LocationRecorder& message_location,
858 const FileDescriptorProto* containing_file) {
859 DO(ConsumeEndOfDeclaration("{", &message_location));
860
861 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
862 if (AtEnd()) {
863 AddError("Reached end of input in message definition (missing '}').");
864 return false;
865 }
866
867 if (!ParseMessageStatement(message, message_location, containing_file)) {
868 // This statement failed to parse. Skip it, but keep looping to parse
869 // other statements.
870 SkipStatement();
871 }
872 }
873
874 if (message->extension_range_size() > 0) {
875 AdjustExtensionRangesWithMaxEndNumber(message);
876 }
877 if (message->reserved_range_size() > 0) {
878 AdjustReservedRangesWithMaxEndNumber(message);
879 }
880 return true;
881 }
882
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)883 bool Parser::ParseMessageStatement(DescriptorProto* message,
884 const LocationRecorder& message_location,
885 const FileDescriptorProto* containing_file) {
886 if (TryConsumeEndOfDeclaration(";", nullptr)) {
887 // empty statement; ignore
888 return true;
889 } else if (LookingAt("message")) {
890 LocationRecorder location(message_location,
891 DescriptorProto::kNestedTypeFieldNumber,
892 message->nested_type_size());
893 return ParseMessageDefinition(message->add_nested_type(), location,
894 containing_file);
895 } else if (LookingAt("enum")) {
896 LocationRecorder location(message_location,
897 DescriptorProto::kEnumTypeFieldNumber,
898 message->enum_type_size());
899 return ParseEnumDefinition(message->add_enum_type(), location,
900 containing_file);
901 } else if (LookingAt("extensions")) {
902 LocationRecorder location(message_location,
903 DescriptorProto::kExtensionRangeFieldNumber);
904 return ParseExtensions(message, location, containing_file);
905 } else if (LookingAt("reserved")) {
906 return ParseReserved(message, message_location);
907 } else if (LookingAt("extend")) {
908 LocationRecorder location(message_location,
909 DescriptorProto::kExtensionFieldNumber);
910 return ParseExtend(message->mutable_extension(),
911 message->mutable_nested_type(), message_location,
912 DescriptorProto::kNestedTypeFieldNumber, location,
913 containing_file);
914 } else if (LookingAt("option")) {
915 LocationRecorder location(message_location,
916 DescriptorProto::kOptionsFieldNumber);
917 return ParseOption(message->mutable_options(), location, containing_file,
918 OPTION_STATEMENT);
919 } else if (LookingAt("oneof")) {
920 int oneof_index = message->oneof_decl_size();
921 LocationRecorder oneof_location(
922 message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
923
924 return ParseOneof(message->add_oneof_decl(), message, oneof_index,
925 oneof_location, message_location, containing_file);
926 } else {
927 LocationRecorder location(message_location,
928 DescriptorProto::kFieldFieldNumber,
929 message->field_size());
930 return ParseMessageField(
931 message->add_field(), message->mutable_nested_type(), message_location,
932 DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
933 }
934 }
935
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)936 bool Parser::ParseMessageField(FieldDescriptorProto* field,
937 RepeatedPtrField<DescriptorProto>* messages,
938 const LocationRecorder& parent_location,
939 int location_field_number_for_nested_type,
940 const LocationRecorder& field_location,
941 const FileDescriptorProto* containing_file) {
942 {
943 FieldDescriptorProto::Label label;
944 if (ParseLabel(&label, field_location)) {
945 field->set_label(label);
946 if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
947 syntax_identifier_ == "proto3") {
948 field->set_proto3_optional(true);
949 }
950 }
951 }
952
953 return ParseMessageFieldNoLabel(field, messages, parent_location,
954 location_field_number_for_nested_type,
955 field_location, containing_file);
956 }
957
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)958 bool Parser::ParseMessageFieldNoLabel(
959 FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
960 const LocationRecorder& parent_location,
961 int location_field_number_for_nested_type,
962 const LocationRecorder& field_location,
963 const FileDescriptorProto* containing_file) {
964 MapField map_field;
965 // Parse type.
966 {
967 LocationRecorder location(field_location); // add path later
968 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
969
970 bool type_parsed = false;
971 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
972 std::string type_name;
973
974 // Special case map field. We only treat the field as a map field if the
975 // field type name starts with the word "map" with a following "<".
976 if (TryConsume("map")) {
977 if (LookingAt("<")) {
978 map_field.is_map_field = true;
979 } else {
980 // False positive
981 type_parsed = true;
982 type_name = "map";
983 }
984 }
985 if (map_field.is_map_field) {
986 if (field->has_oneof_index()) {
987 AddError("Map fields are not allowed in oneofs.");
988 return false;
989 }
990 if (field->has_label()) {
991 AddError(
992 "Field labels (required/optional/repeated) are not allowed on "
993 "map fields.");
994 return false;
995 }
996 if (field->has_extendee()) {
997 AddError("Map fields are not allowed to be extensions.");
998 return false;
999 }
1000 field->set_label(FieldDescriptorProto::LABEL_REPEATED);
1001 DO(Consume("<"));
1002 DO(ParseType(&map_field.key_type, &map_field.key_type_name));
1003 DO(Consume(","));
1004 DO(ParseType(&map_field.value_type, &map_field.value_type_name));
1005 DO(Consume(">"));
1006 // Defer setting of the type name of the map field until the
1007 // field name is parsed. Add the source location though.
1008 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1009 } else {
1010 // Handle the case where no explicit label is given for a non-map field.
1011 if (!field->has_label() && DefaultToOptionalFields()) {
1012 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1013 }
1014 if (!field->has_label()) {
1015 AddError("Expected \"required\", \"optional\", or \"repeated\".");
1016 // We can actually reasonably recover here by just assuming the user
1017 // forgot the label altogether.
1018 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1019 }
1020
1021 // Handle the case where the actual type is a message or enum named "map",
1022 // which we already consumed in the code above.
1023 if (!type_parsed) {
1024 DO(ParseType(&type, &type_name));
1025 }
1026 if (type_name.empty()) {
1027 location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
1028 field->set_type(type);
1029 } else {
1030 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1031 field->set_type_name(type_name);
1032 }
1033 }
1034 }
1035
1036 // Parse name and '='.
1037 io::Tokenizer::Token name_token = input_->current();
1038 {
1039 LocationRecorder location(field_location,
1040 FieldDescriptorProto::kNameFieldNumber);
1041 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1042 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1043
1044 if (!IsLowerUnderscore(field->name())) {
1045 AddWarning(
1046 "Field name should be lowercase. Found: " + field->name() +
1047 ". See: https://developers.google.com/protocol-buffers/docs/style");
1048 }
1049 if (IsNumberFollowUnderscore(field->name())) {
1050 AddWarning(
1051 "Number should not come right after an underscore. Found: " +
1052 field->name() +
1053 ". See: https://developers.google.com/protocol-buffers/docs/style");
1054 }
1055 }
1056 DO(Consume("=", "Missing field number."));
1057
1058 // Parse field number.
1059 {
1060 LocationRecorder location(field_location,
1061 FieldDescriptorProto::kNumberFieldNumber);
1062 location.RecordLegacyLocation(field,
1063 DescriptorPool::ErrorCollector::NUMBER);
1064 int number;
1065 DO(ConsumeInteger(&number, "Expected field number."));
1066 field->set_number(number);
1067 }
1068
1069 // Parse options.
1070 DO(ParseFieldOptions(field, field_location, containing_file));
1071
1072 // Deal with groups.
1073 if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1074 // Awkward: Since a group declares both a message type and a field, we
1075 // have to create overlapping locations.
1076 LocationRecorder group_location(parent_location);
1077 group_location.StartAt(field_location);
1078 group_location.AddPath(location_field_number_for_nested_type);
1079 group_location.AddPath(messages->size());
1080
1081 DescriptorProto* group = messages->Add();
1082 group->set_name(field->name());
1083
1084 // Record name location to match the field name's location.
1085 {
1086 LocationRecorder location(group_location,
1087 DescriptorProto::kNameFieldNumber);
1088 location.StartAt(name_token);
1089 location.EndAt(name_token);
1090 location.RecordLegacyLocation(group,
1091 DescriptorPool::ErrorCollector::NAME);
1092 }
1093
1094 // The field's type_name also comes from the name. Confusing!
1095 {
1096 LocationRecorder location(field_location,
1097 FieldDescriptorProto::kTypeNameFieldNumber);
1098 location.StartAt(name_token);
1099 location.EndAt(name_token);
1100 }
1101
1102 // As a hack for backwards-compatibility, we force the group name to start
1103 // with a capital letter and lower-case the field name. New code should
1104 // not use groups; it should use nested messages.
1105 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1106 AddError(name_token.line, name_token.column,
1107 "Group names must start with a capital letter.");
1108 }
1109 LowerString(field->mutable_name());
1110
1111 field->set_type_name(group->name());
1112 if (LookingAt("{")) {
1113 DO(ParseMessageBlock(group, group_location, containing_file));
1114 } else {
1115 AddError("Missing group body.");
1116 return false;
1117 }
1118 } else {
1119 DO(ConsumeEndOfDeclaration(";", &field_location));
1120 }
1121
1122 // Create a map entry type if this is a map field.
1123 if (map_field.is_map_field) {
1124 GenerateMapEntry(map_field, field, messages);
1125 }
1126
1127 return true;
1128 }
1129
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1130 void Parser::GenerateMapEntry(const MapField& map_field,
1131 FieldDescriptorProto* field,
1132 RepeatedPtrField<DescriptorProto>* messages) {
1133 DescriptorProto* entry = messages->Add();
1134 std::string entry_name = MapEntryName(field->name());
1135 field->set_type_name(entry_name);
1136 entry->set_name(entry_name);
1137 entry->mutable_options()->set_map_entry(true);
1138 FieldDescriptorProto* key_field = entry->add_field();
1139 key_field->set_name("key");
1140 key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1141 key_field->set_number(1);
1142 if (map_field.key_type_name.empty()) {
1143 key_field->set_type(map_field.key_type);
1144 } else {
1145 key_field->set_type_name(map_field.key_type_name);
1146 }
1147 FieldDescriptorProto* value_field = entry->add_field();
1148 value_field->set_name("value");
1149 value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1150 value_field->set_number(2);
1151 if (map_field.value_type_name.empty()) {
1152 value_field->set_type(map_field.value_type);
1153 } else {
1154 value_field->set_type_name(map_field.value_type_name);
1155 }
1156 // Propagate the "enforce_utf8" option to key and value fields if they
1157 // are strings. This helps simplify the implementation of code generators
1158 // and also reflection-based parsing code.
1159 //
1160 // The following definition:
1161 // message Foo {
1162 // map<string, string> value = 1 [enforce_utf8 = false];
1163 // }
1164 // will be interpreted as:
1165 // message Foo {
1166 // message ValueEntry {
1167 // option map_entry = true;
1168 // string key = 1 [enforce_utf8 = false];
1169 // string value = 2 [enforce_utf8 = false];
1170 // }
1171 // repeated ValueEntry value = 1 [enforce_utf8 = false];
1172 // }
1173 //
1174 // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1175 // from protocol compiler.
1176 for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1177 const UninterpretedOption& option =
1178 field->options().uninterpreted_option(i);
1179 if (option.name_size() == 1 &&
1180 option.name(0).name_part() == "enforce_utf8" &&
1181 !option.name(0).is_extension()) {
1182 if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1183 key_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1184 option);
1185 }
1186 if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1187 value_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1188 option);
1189 }
1190 }
1191 }
1192 }
1193
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1194 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1195 const LocationRecorder& field_location,
1196 const FileDescriptorProto* containing_file) {
1197 if (!LookingAt("[")) return true;
1198
1199 LocationRecorder location(field_location,
1200 FieldDescriptorProto::kOptionsFieldNumber);
1201
1202 DO(Consume("["));
1203
1204 // Parse field options.
1205 do {
1206 if (LookingAt("default")) {
1207 // We intentionally pass field_location rather than location here, since
1208 // the default value is not actually an option.
1209 DO(ParseDefaultAssignment(field, field_location, containing_file));
1210 } else if (LookingAt("json_name")) {
1211 // Like default value, this "json_name" is not an actual option.
1212 DO(ParseJsonName(field, field_location, containing_file));
1213 } else {
1214 DO(ParseOption(field->mutable_options(), location, containing_file,
1215 OPTION_ASSIGNMENT));
1216 }
1217 } while (TryConsume(","));
1218
1219 DO(Consume("]"));
1220 return true;
1221 }
1222
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1223 bool Parser::ParseDefaultAssignment(
1224 FieldDescriptorProto* field, const LocationRecorder& field_location,
1225 const FileDescriptorProto* containing_file) {
1226 if (field->has_default_value()) {
1227 AddError("Already set option \"default\".");
1228 field->clear_default_value();
1229 }
1230
1231 DO(Consume("default"));
1232 DO(Consume("="));
1233
1234 LocationRecorder location(field_location,
1235 FieldDescriptorProto::kDefaultValueFieldNumber);
1236 location.RecordLegacyLocation(field,
1237 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1238 std::string* default_value = field->mutable_default_value();
1239
1240 if (!field->has_type()) {
1241 // The field has a type name, but we don't know if it is a message or an
1242 // enum yet. (If it were a primitive type, |field| would have a type set
1243 // already.) In this case, simply take the current string as the default
1244 // value; we will catch the error later if it is not a valid enum value.
1245 // (N.B. that we do not check whether the current token is an identifier:
1246 // doing so throws strange errors when the user mistypes a primitive
1247 // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1248 // = 42]". In such a case the fundamental error is really that "int" is not
1249 // a type, not that "42" is not an identifier. See b/12533582.)
1250 *default_value = input_->current().text;
1251 input_->Next();
1252 return true;
1253 }
1254
1255 switch (field->type()) {
1256 case FieldDescriptorProto::TYPE_INT32:
1257 case FieldDescriptorProto::TYPE_INT64:
1258 case FieldDescriptorProto::TYPE_SINT32:
1259 case FieldDescriptorProto::TYPE_SINT64:
1260 case FieldDescriptorProto::TYPE_SFIXED32:
1261 case FieldDescriptorProto::TYPE_SFIXED64: {
1262 uint64_t max_value = std::numeric_limits<int64_t>::max();
1263 if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1264 field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1265 field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1266 max_value = std::numeric_limits<int32_t>::max();
1267 }
1268
1269 // These types can be negative.
1270 if (TryConsume("-")) {
1271 default_value->append("-");
1272 // Two's complement always has one more negative value than positive.
1273 ++max_value;
1274 }
1275 // Parse the integer to verify that it is not out-of-range.
1276 uint64_t value;
1277 DO(ConsumeInteger64(max_value, &value,
1278 "Expected integer for field default value."));
1279 // And stringify it again.
1280 default_value->append(StrCat(value));
1281 break;
1282 }
1283
1284 case FieldDescriptorProto::TYPE_UINT32:
1285 case FieldDescriptorProto::TYPE_UINT64:
1286 case FieldDescriptorProto::TYPE_FIXED32:
1287 case FieldDescriptorProto::TYPE_FIXED64: {
1288 uint64_t max_value = std::numeric_limits<uint64_t>::max();
1289 if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1290 field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1291 max_value = std::numeric_limits<uint32_t>::max();
1292 }
1293
1294 // Numeric, not negative.
1295 if (TryConsume("-")) {
1296 AddError("Unsigned field can't have negative default value.");
1297 }
1298 // Parse the integer to verify that it is not out-of-range.
1299 uint64_t value;
1300 DO(ConsumeInteger64(max_value, &value,
1301 "Expected integer for field default value."));
1302 // And stringify it again.
1303 default_value->append(StrCat(value));
1304 break;
1305 }
1306
1307 case FieldDescriptorProto::TYPE_FLOAT:
1308 case FieldDescriptorProto::TYPE_DOUBLE:
1309 // These types can be negative.
1310 if (TryConsume("-")) {
1311 default_value->append("-");
1312 }
1313 // Parse the integer because we have to convert hex integers to decimal
1314 // floats.
1315 double value;
1316 DO(ConsumeNumber(&value, "Expected number."));
1317 // And stringify it again.
1318 default_value->append(SimpleDtoa(value));
1319 break;
1320
1321 case FieldDescriptorProto::TYPE_BOOL:
1322 if (TryConsume("true")) {
1323 default_value->assign("true");
1324 } else if (TryConsume("false")) {
1325 default_value->assign("false");
1326 } else {
1327 AddError("Expected \"true\" or \"false\".");
1328 return false;
1329 }
1330 break;
1331
1332 case FieldDescriptorProto::TYPE_STRING:
1333 // Note: When file option java_string_check_utf8 is true, if a
1334 // non-string representation (eg byte[]) is later supported, it must
1335 // be checked for UTF-8-ness.
1336 DO(ConsumeString(default_value,
1337 "Expected string for field default "
1338 "value."));
1339 break;
1340
1341 case FieldDescriptorProto::TYPE_BYTES:
1342 DO(ConsumeString(default_value, "Expected string."));
1343 *default_value = CEscape(*default_value);
1344 break;
1345
1346 case FieldDescriptorProto::TYPE_ENUM:
1347 DO(ConsumeIdentifier(default_value,
1348 "Expected enum identifier for field "
1349 "default value."));
1350 break;
1351
1352 case FieldDescriptorProto::TYPE_MESSAGE:
1353 case FieldDescriptorProto::TYPE_GROUP:
1354 AddError("Messages can't have default values.");
1355 return false;
1356 }
1357
1358 return true;
1359 }
1360
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1361 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1362 const LocationRecorder& field_location,
1363 const FileDescriptorProto* containing_file) {
1364 if (field->has_json_name()) {
1365 AddError("Already set option \"json_name\".");
1366 field->clear_json_name();
1367 }
1368
1369 LocationRecorder location(field_location,
1370 FieldDescriptorProto::kJsonNameFieldNumber);
1371 location.RecordLegacyLocation(field,
1372 DescriptorPool::ErrorCollector::OPTION_NAME);
1373
1374 DO(Consume("json_name"));
1375 DO(Consume("="));
1376
1377 LocationRecorder value_location(location);
1378 value_location.RecordLegacyLocation(
1379 field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1380
1381 DO(ConsumeString(field->mutable_json_name(),
1382 "Expected string for JSON name."));
1383 return true;
1384 }
1385
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1386 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1387 const LocationRecorder& part_location,
1388 const FileDescriptorProto* containing_file) {
1389 UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1390 std::string identifier; // We parse identifiers into this string.
1391 if (LookingAt("(")) { // This is an extension.
1392 DO(Consume("("));
1393
1394 {
1395 LocationRecorder location(
1396 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1397 // An extension name consists of dot-separated identifiers, and may begin
1398 // with a dot.
1399 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1400 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1401 name->mutable_name_part()->append(identifier);
1402 }
1403 while (LookingAt(".")) {
1404 DO(Consume("."));
1405 name->mutable_name_part()->append(".");
1406 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1407 name->mutable_name_part()->append(identifier);
1408 }
1409 }
1410
1411 DO(Consume(")"));
1412 name->set_is_extension(true);
1413 } else { // This is a regular field.
1414 LocationRecorder location(
1415 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1416 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1417 name->mutable_name_part()->append(identifier);
1418 name->set_is_extension(false);
1419 }
1420 return true;
1421 }
1422
ParseUninterpretedBlock(std::string * value)1423 bool Parser::ParseUninterpretedBlock(std::string* value) {
1424 // Note that enclosing braces are not added to *value.
1425 // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1426 // an expression, not a block of statements.
1427 DO(Consume("{"));
1428 int brace_depth = 1;
1429 while (!AtEnd()) {
1430 if (LookingAt("{")) {
1431 brace_depth++;
1432 } else if (LookingAt("}")) {
1433 brace_depth--;
1434 if (brace_depth == 0) {
1435 input_->Next();
1436 return true;
1437 }
1438 }
1439 // TODO(sanjay): Interpret line/column numbers to preserve formatting
1440 if (!value->empty()) value->push_back(' ');
1441 value->append(input_->current().text);
1442 input_->Next();
1443 }
1444 AddError("Unexpected end of stream while parsing aggregate value.");
1445 return false;
1446 }
1447
1448 // We don't interpret the option here. Instead we store it in an
1449 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1450 bool Parser::ParseOption(Message* options,
1451 const LocationRecorder& options_location,
1452 const FileDescriptorProto* containing_file,
1453 OptionStyle style) {
1454 // Create an entry in the uninterpreted_option field.
1455 const FieldDescriptor* uninterpreted_option_field =
1456 options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1457 GOOGLE_CHECK(uninterpreted_option_field != nullptr)
1458 << "No field named \"uninterpreted_option\" in the Options proto.";
1459
1460 const Reflection* reflection = options->GetReflection();
1461
1462 LocationRecorder location(
1463 options_location, uninterpreted_option_field->number(),
1464 reflection->FieldSize(*options, uninterpreted_option_field));
1465
1466 if (style == OPTION_STATEMENT) {
1467 DO(Consume("option"));
1468 }
1469
1470 UninterpretedOption* uninterpreted_option =
1471 down_cast<UninterpretedOption*>(options->GetReflection()->AddMessage(
1472 options, uninterpreted_option_field));
1473
1474 // Parse dot-separated name.
1475 {
1476 LocationRecorder name_location(location,
1477 UninterpretedOption::kNameFieldNumber);
1478 name_location.RecordLegacyLocation(
1479 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1480
1481 {
1482 LocationRecorder part_location(name_location,
1483 uninterpreted_option->name_size());
1484 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1485 containing_file));
1486 }
1487
1488 while (LookingAt(".")) {
1489 DO(Consume("."));
1490 LocationRecorder part_location(name_location,
1491 uninterpreted_option->name_size());
1492 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1493 containing_file));
1494 }
1495 }
1496
1497 DO(Consume("="));
1498
1499 {
1500 LocationRecorder value_location(location);
1501 value_location.RecordLegacyLocation(
1502 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1503
1504 // All values are a single token, except for negative numbers, which consist
1505 // of a single '-' symbol, followed by a positive number.
1506 bool is_negative = TryConsume("-");
1507
1508 switch (input_->current().type) {
1509 case io::Tokenizer::TYPE_START:
1510 GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1511 return false;
1512
1513 case io::Tokenizer::TYPE_END:
1514 AddError("Unexpected end of stream while parsing option value.");
1515 return false;
1516
1517 case io::Tokenizer::TYPE_WHITESPACE:
1518 case io::Tokenizer::TYPE_NEWLINE:
1519 GOOGLE_CHECK(!input_->report_whitespace() && !input_->report_newlines())
1520 << "Whitespace tokens were not requested.";
1521 GOOGLE_LOG(FATAL) << "Tokenizer reported whitespace.";
1522 return false;
1523
1524 case io::Tokenizer::TYPE_IDENTIFIER: {
1525 value_location.AddPath(
1526 UninterpretedOption::kIdentifierValueFieldNumber);
1527 if (is_negative) {
1528 AddError("Invalid '-' symbol before identifier.");
1529 return false;
1530 }
1531 std::string value;
1532 DO(ConsumeIdentifier(&value, "Expected identifier."));
1533 uninterpreted_option->set_identifier_value(value);
1534 break;
1535 }
1536
1537 case io::Tokenizer::TYPE_INTEGER: {
1538 uint64_t value;
1539 uint64_t max_value =
1540 is_negative
1541 ? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
1542 : std::numeric_limits<uint64_t>::max();
1543 DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1544 if (is_negative) {
1545 value_location.AddPath(
1546 UninterpretedOption::kNegativeIntValueFieldNumber);
1547 uninterpreted_option->set_negative_int_value(
1548 static_cast<int64_t>(0 - value));
1549 } else {
1550 value_location.AddPath(
1551 UninterpretedOption::kPositiveIntValueFieldNumber);
1552 uninterpreted_option->set_positive_int_value(value);
1553 }
1554 break;
1555 }
1556
1557 case io::Tokenizer::TYPE_FLOAT: {
1558 value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1559 double value;
1560 DO(ConsumeNumber(&value, "Expected number."));
1561 uninterpreted_option->set_double_value(is_negative ? -value : value);
1562 break;
1563 }
1564
1565 case io::Tokenizer::TYPE_STRING: {
1566 value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1567 if (is_negative) {
1568 AddError("Invalid '-' symbol before string.");
1569 return false;
1570 }
1571 std::string value;
1572 DO(ConsumeString(&value, "Expected string."));
1573 uninterpreted_option->set_string_value(value);
1574 break;
1575 }
1576
1577 case io::Tokenizer::TYPE_SYMBOL:
1578 if (LookingAt("{")) {
1579 value_location.AddPath(
1580 UninterpretedOption::kAggregateValueFieldNumber);
1581 DO(ParseUninterpretedBlock(
1582 uninterpreted_option->mutable_aggregate_value()));
1583 } else {
1584 AddError("Expected option value.");
1585 return false;
1586 }
1587 break;
1588 }
1589 }
1590
1591 if (style == OPTION_STATEMENT) {
1592 DO(ConsumeEndOfDeclaration(";", &location));
1593 }
1594
1595 return true;
1596 }
1597
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1598 bool Parser::ParseExtensions(DescriptorProto* message,
1599 const LocationRecorder& extensions_location,
1600 const FileDescriptorProto* containing_file) {
1601 // Parse the declaration.
1602 DO(Consume("extensions"));
1603
1604 int old_range_size = message->extension_range_size();
1605
1606 do {
1607 // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1608 LocationRecorder location(extensions_location,
1609 message->extension_range_size());
1610
1611 DescriptorProto::ExtensionRange* range = message->add_extension_range();
1612 location.RecordLegacyLocation(range,
1613 DescriptorPool::ErrorCollector::NUMBER);
1614
1615 int start, end;
1616 io::Tokenizer::Token start_token;
1617
1618 {
1619 LocationRecorder start_location(
1620 location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1621 start_token = input_->current();
1622 DO(ConsumeInteger(&start, "Expected field number range."));
1623 }
1624
1625 if (TryConsume("to")) {
1626 LocationRecorder end_location(
1627 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1628 if (TryConsume("max")) {
1629 // Set to the sentinel value - 1 since we increment the value below.
1630 // The actual value of the end of the range should be set with
1631 // AdjustExtensionRangesWithMaxEndNumber.
1632 end = kMaxRangeSentinel - 1;
1633 } else {
1634 DO(ConsumeInteger(&end, "Expected integer."));
1635 }
1636 } else {
1637 LocationRecorder end_location(
1638 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1639 end_location.StartAt(start_token);
1640 end_location.EndAt(start_token);
1641 end = start;
1642 }
1643
1644 // Users like to specify inclusive ranges, but in code we like the end
1645 // number to be exclusive.
1646 ++end;
1647
1648 range->set_start(start);
1649 range->set_end(end);
1650 } while (TryConsume(","));
1651
1652 if (LookingAt("[")) {
1653 int range_number_index = extensions_location.CurrentPathSize();
1654 SourceCodeInfo info;
1655
1656 // Parse extension range options in the first range.
1657 ExtensionRangeOptions* options =
1658 message->mutable_extension_range(old_range_size)->mutable_options();
1659
1660 {
1661 LocationRecorder index_location(
1662 extensions_location, 0 /* we fill this in w/ actual index below */,
1663 &info);
1664 LocationRecorder location(
1665 index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1666 DO(Consume("["));
1667
1668 do {
1669 DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1670 } while (TryConsume(","));
1671
1672 DO(Consume("]"));
1673 }
1674
1675 // Then copy the extension range options to all of the other ranges we've
1676 // parsed.
1677 for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1678 message->mutable_extension_range(i)->mutable_options()->CopyFrom(
1679 *options);
1680 }
1681 // and copy source locations to the other ranges, too
1682 for (int i = old_range_size; i < message->extension_range_size(); i++) {
1683 for (int j = 0; j < info.location_size(); j++) {
1684 if (info.location(j).path_size() == range_number_index + 1) {
1685 // this location's path is up to the extension range index, but
1686 // doesn't include options; so it's redundant with location above
1687 continue;
1688 }
1689 SourceCodeInfo_Location* dest = source_code_info_->add_location();
1690 *dest = info.location(j);
1691 dest->set_path(range_number_index, i);
1692 }
1693 }
1694 }
1695
1696 DO(ConsumeEndOfDeclaration(";", &extensions_location));
1697 return true;
1698 }
1699
1700 // This is similar to extension range parsing, except that it accepts field
1701 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1702 bool Parser::ParseReserved(DescriptorProto* message,
1703 const LocationRecorder& message_location) {
1704 io::Tokenizer::Token start_token = input_->current();
1705 // Parse the declaration.
1706 DO(Consume("reserved"));
1707 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1708 LocationRecorder location(message_location,
1709 DescriptorProto::kReservedNameFieldNumber);
1710 location.StartAt(start_token);
1711 return ParseReservedNames(message, location);
1712 } else {
1713 LocationRecorder location(message_location,
1714 DescriptorProto::kReservedRangeFieldNumber);
1715 location.StartAt(start_token);
1716 return ParseReservedNumbers(message, location);
1717 }
1718 }
1719
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1720 bool Parser::ParseReservedNames(DescriptorProto* message,
1721 const LocationRecorder& parent_location) {
1722 do {
1723 LocationRecorder location(parent_location, message->reserved_name_size());
1724 DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1725 } while (TryConsume(","));
1726 DO(ConsumeEndOfDeclaration(";", &parent_location));
1727 return true;
1728 }
1729
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1730 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1731 const LocationRecorder& parent_location) {
1732 bool first = true;
1733 do {
1734 LocationRecorder location(parent_location, message->reserved_range_size());
1735
1736 DescriptorProto::ReservedRange* range = message->add_reserved_range();
1737 int start, end;
1738 io::Tokenizer::Token start_token;
1739 {
1740 LocationRecorder start_location(
1741 location, DescriptorProto::ReservedRange::kStartFieldNumber);
1742 start_token = input_->current();
1743 DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1744 : "Expected field number range.")));
1745 }
1746
1747 if (TryConsume("to")) {
1748 LocationRecorder end_location(
1749 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1750 if (TryConsume("max")) {
1751 // Set to the sentinel value - 1 since we increment the value below.
1752 // The actual value of the end of the range should be set with
1753 // AdjustExtensionRangesWithMaxEndNumber.
1754 end = kMaxRangeSentinel - 1;
1755 } else {
1756 DO(ConsumeInteger(&end, "Expected integer."));
1757 }
1758 } else {
1759 LocationRecorder end_location(
1760 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1761 end_location.StartAt(start_token);
1762 end_location.EndAt(start_token);
1763 end = start;
1764 }
1765
1766 // Users like to specify inclusive ranges, but in code we like the end
1767 // number to be exclusive.
1768 ++end;
1769
1770 range->set_start(start);
1771 range->set_end(end);
1772 first = false;
1773 } while (TryConsume(","));
1774
1775 DO(ConsumeEndOfDeclaration(";", &parent_location));
1776 return true;
1777 }
1778
ParseReserved(EnumDescriptorProto * message,const LocationRecorder & message_location)1779 bool Parser::ParseReserved(EnumDescriptorProto* message,
1780 const LocationRecorder& message_location) {
1781 io::Tokenizer::Token start_token = input_->current();
1782 // Parse the declaration.
1783 DO(Consume("reserved"));
1784 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1785 LocationRecorder location(message_location,
1786 EnumDescriptorProto::kReservedNameFieldNumber);
1787 location.StartAt(start_token);
1788 return ParseReservedNames(message, location);
1789 } else {
1790 LocationRecorder location(message_location,
1791 EnumDescriptorProto::kReservedRangeFieldNumber);
1792 location.StartAt(start_token);
1793 return ParseReservedNumbers(message, location);
1794 }
1795 }
1796
ParseReservedNames(EnumDescriptorProto * message,const LocationRecorder & parent_location)1797 bool Parser::ParseReservedNames(EnumDescriptorProto* message,
1798 const LocationRecorder& parent_location) {
1799 do {
1800 LocationRecorder location(parent_location, message->reserved_name_size());
1801 DO(ConsumeString(message->add_reserved_name(), "Expected enum value."));
1802 } while (TryConsume(","));
1803 DO(ConsumeEndOfDeclaration(";", &parent_location));
1804 return true;
1805 }
1806
ParseReservedNumbers(EnumDescriptorProto * message,const LocationRecorder & parent_location)1807 bool Parser::ParseReservedNumbers(EnumDescriptorProto* message,
1808 const LocationRecorder& parent_location) {
1809 bool first = true;
1810 do {
1811 LocationRecorder location(parent_location, message->reserved_range_size());
1812
1813 EnumDescriptorProto::EnumReservedRange* range =
1814 message->add_reserved_range();
1815 int start, end;
1816 io::Tokenizer::Token start_token;
1817 {
1818 LocationRecorder start_location(
1819 location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
1820 start_token = input_->current();
1821 DO(ConsumeSignedInteger(&start,
1822 (first ? "Expected enum value or number range."
1823 : "Expected enum number range.")));
1824 }
1825
1826 if (TryConsume("to")) {
1827 LocationRecorder end_location(
1828 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1829 if (TryConsume("max")) {
1830 // This is in the enum descriptor path, which doesn't have the message
1831 // set duality to fix up, so it doesn't integrate with the sentinel.
1832 end = INT_MAX;
1833 } else {
1834 DO(ConsumeSignedInteger(&end, "Expected integer."));
1835 }
1836 } else {
1837 LocationRecorder end_location(
1838 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1839 end_location.StartAt(start_token);
1840 end_location.EndAt(start_token);
1841 end = start;
1842 }
1843
1844 range->set_start(start);
1845 range->set_end(end);
1846 first = false;
1847 } while (TryConsume(","));
1848
1849 DO(ConsumeEndOfDeclaration(";", &parent_location));
1850 return true;
1851 }
1852
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1853 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1854 RepeatedPtrField<DescriptorProto>* messages,
1855 const LocationRecorder& parent_location,
1856 int location_field_number_for_nested_type,
1857 const LocationRecorder& extend_location,
1858 const FileDescriptorProto* containing_file) {
1859 DO(Consume("extend"));
1860
1861 // Parse the extendee type.
1862 io::Tokenizer::Token extendee_start = input_->current();
1863 std::string extendee;
1864 DO(ParseUserDefinedType(&extendee));
1865 io::Tokenizer::Token extendee_end = input_->previous();
1866
1867 // Parse the block.
1868 DO(ConsumeEndOfDeclaration("{", &extend_location));
1869
1870 bool is_first = true;
1871
1872 do {
1873 if (AtEnd()) {
1874 AddError("Reached end of input in extend definition (missing '}').");
1875 return false;
1876 }
1877
1878 // Note that kExtensionFieldNumber was already pushed by the parent.
1879 LocationRecorder location(extend_location, extensions->size());
1880
1881 FieldDescriptorProto* field = extensions->Add();
1882
1883 {
1884 LocationRecorder extendee_location(
1885 location, FieldDescriptorProto::kExtendeeFieldNumber);
1886 extendee_location.StartAt(extendee_start);
1887 extendee_location.EndAt(extendee_end);
1888
1889 if (is_first) {
1890 extendee_location.RecordLegacyLocation(
1891 field, DescriptorPool::ErrorCollector::EXTENDEE);
1892 is_first = false;
1893 }
1894 }
1895
1896 field->set_extendee(extendee);
1897
1898 if (!ParseMessageField(field, messages, parent_location,
1899 location_field_number_for_nested_type, location,
1900 containing_file)) {
1901 // This statement failed to parse. Skip it, but keep looping to parse
1902 // other statements.
1903 SkipStatement();
1904 }
1905 } while (!TryConsumeEndOfDeclaration("}", nullptr));
1906
1907 return true;
1908 }
1909
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1910 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1911 DescriptorProto* containing_type, int oneof_index,
1912 const LocationRecorder& oneof_location,
1913 const LocationRecorder& containing_type_location,
1914 const FileDescriptorProto* containing_file) {
1915 DO(Consume("oneof"));
1916
1917 {
1918 LocationRecorder name_location(oneof_location,
1919 OneofDescriptorProto::kNameFieldNumber);
1920 DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1921 }
1922
1923 DO(ConsumeEndOfDeclaration("{", &oneof_location));
1924
1925 do {
1926 if (AtEnd()) {
1927 AddError("Reached end of input in oneof definition (missing '}').");
1928 return false;
1929 }
1930
1931 if (LookingAt("option")) {
1932 LocationRecorder option_location(
1933 oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1934 if (!ParseOption(oneof_decl->mutable_options(), option_location,
1935 containing_file, OPTION_STATEMENT)) {
1936 return false;
1937 }
1938 continue;
1939 }
1940
1941 // Print a nice error if the user accidentally tries to place a label
1942 // on an individual member of a oneof.
1943 if (LookingAt("required") || LookingAt("optional") ||
1944 LookingAt("repeated")) {
1945 AddError(
1946 "Fields in oneofs must not have labels (required / optional "
1947 "/ repeated).");
1948 // We can continue parsing here because we understand what the user
1949 // meant. The error report will still make parsing fail overall.
1950 input_->Next();
1951 }
1952
1953 LocationRecorder field_location(containing_type_location,
1954 DescriptorProto::kFieldFieldNumber,
1955 containing_type->field_size());
1956
1957 FieldDescriptorProto* field = containing_type->add_field();
1958 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1959 field->set_oneof_index(oneof_index);
1960
1961 if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
1962 containing_type_location,
1963 DescriptorProto::kNestedTypeFieldNumber,
1964 field_location, containing_file)) {
1965 // This statement failed to parse. Skip it, but keep looping to parse
1966 // other statements.
1967 SkipStatement();
1968 }
1969 } while (!TryConsumeEndOfDeclaration("}", nullptr));
1970
1971 return true;
1972 }
1973
1974 // -------------------------------------------------------------------
1975 // Enums
1976
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1977 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1978 const LocationRecorder& enum_location,
1979 const FileDescriptorProto* containing_file) {
1980 DO(Consume("enum"));
1981
1982 {
1983 LocationRecorder location(enum_location,
1984 EnumDescriptorProto::kNameFieldNumber);
1985 location.RecordLegacyLocation(enum_type,
1986 DescriptorPool::ErrorCollector::NAME);
1987 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1988 }
1989
1990 DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1991
1992 DO(ValidateEnum(enum_type));
1993
1994 return true;
1995 }
1996
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1997 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1998 const LocationRecorder& enum_location,
1999 const FileDescriptorProto* containing_file) {
2000 DO(ConsumeEndOfDeclaration("{", &enum_location));
2001
2002 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2003 if (AtEnd()) {
2004 AddError("Reached end of input in enum definition (missing '}').");
2005 return false;
2006 }
2007
2008 if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
2009 // This statement failed to parse. Skip it, but keep looping to parse
2010 // other statements.
2011 SkipStatement();
2012 }
2013 }
2014
2015 return true;
2016 }
2017
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2018 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
2019 const LocationRecorder& enum_location,
2020 const FileDescriptorProto* containing_file) {
2021 if (TryConsumeEndOfDeclaration(";", nullptr)) {
2022 // empty statement; ignore
2023 return true;
2024 } else if (LookingAt("option")) {
2025 LocationRecorder location(enum_location,
2026 EnumDescriptorProto::kOptionsFieldNumber);
2027 return ParseOption(enum_type->mutable_options(), location, containing_file,
2028 OPTION_STATEMENT);
2029 } else if (LookingAt("reserved")) {
2030 return ParseReserved(enum_type, enum_location);
2031 } else {
2032 LocationRecorder location(enum_location,
2033 EnumDescriptorProto::kValueFieldNumber,
2034 enum_type->value_size());
2035 return ParseEnumConstant(enum_type->add_value(), location, containing_file);
2036 }
2037 }
2038
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2039 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
2040 const LocationRecorder& enum_value_location,
2041 const FileDescriptorProto* containing_file) {
2042 // Parse name.
2043 {
2044 LocationRecorder location(enum_value_location,
2045 EnumValueDescriptorProto::kNameFieldNumber);
2046 location.RecordLegacyLocation(enum_value,
2047 DescriptorPool::ErrorCollector::NAME);
2048 DO(ConsumeIdentifier(enum_value->mutable_name(),
2049 "Expected enum constant name."));
2050 }
2051
2052 DO(Consume("=", "Missing numeric value for enum constant."));
2053
2054 // Parse value.
2055 {
2056 LocationRecorder location(enum_value_location,
2057 EnumValueDescriptorProto::kNumberFieldNumber);
2058 location.RecordLegacyLocation(enum_value,
2059 DescriptorPool::ErrorCollector::NUMBER);
2060
2061 int number;
2062 DO(ConsumeSignedInteger(&number, "Expected integer."));
2063 enum_value->set_number(number);
2064 }
2065
2066 DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2067 containing_file));
2068
2069 DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2070
2071 return true;
2072 }
2073
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2074 bool Parser::ParseEnumConstantOptions(
2075 EnumValueDescriptorProto* value,
2076 const LocationRecorder& enum_value_location,
2077 const FileDescriptorProto* containing_file) {
2078 if (!LookingAt("[")) return true;
2079
2080 LocationRecorder location(enum_value_location,
2081 EnumValueDescriptorProto::kOptionsFieldNumber);
2082
2083 DO(Consume("["));
2084
2085 do {
2086 DO(ParseOption(value->mutable_options(), location, containing_file,
2087 OPTION_ASSIGNMENT));
2088 } while (TryConsume(","));
2089
2090 DO(Consume("]"));
2091 return true;
2092 }
2093
2094 // -------------------------------------------------------------------
2095 // Services
2096
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2097 bool Parser::ParseServiceDefinition(
2098 ServiceDescriptorProto* service, const LocationRecorder& service_location,
2099 const FileDescriptorProto* containing_file) {
2100 DO(Consume("service"));
2101
2102 {
2103 LocationRecorder location(service_location,
2104 ServiceDescriptorProto::kNameFieldNumber);
2105 location.RecordLegacyLocation(service,
2106 DescriptorPool::ErrorCollector::NAME);
2107 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2108 }
2109
2110 DO(ParseServiceBlock(service, service_location, containing_file));
2111 return true;
2112 }
2113
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2114 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2115 const LocationRecorder& service_location,
2116 const FileDescriptorProto* containing_file) {
2117 DO(ConsumeEndOfDeclaration("{", &service_location));
2118
2119 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2120 if (AtEnd()) {
2121 AddError("Reached end of input in service definition (missing '}').");
2122 return false;
2123 }
2124
2125 if (!ParseServiceStatement(service, service_location, containing_file)) {
2126 // This statement failed to parse. Skip it, but keep looping to parse
2127 // other statements.
2128 SkipStatement();
2129 }
2130 }
2131
2132 return true;
2133 }
2134
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2135 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2136 const LocationRecorder& service_location,
2137 const FileDescriptorProto* containing_file) {
2138 if (TryConsumeEndOfDeclaration(";", nullptr)) {
2139 // empty statement; ignore
2140 return true;
2141 } else if (LookingAt("option")) {
2142 LocationRecorder location(service_location,
2143 ServiceDescriptorProto::kOptionsFieldNumber);
2144 return ParseOption(service->mutable_options(), location, containing_file,
2145 OPTION_STATEMENT);
2146 } else {
2147 LocationRecorder location(service_location,
2148 ServiceDescriptorProto::kMethodFieldNumber,
2149 service->method_size());
2150 return ParseServiceMethod(service->add_method(), location, containing_file);
2151 }
2152 }
2153
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2154 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2155 const LocationRecorder& method_location,
2156 const FileDescriptorProto* containing_file) {
2157 DO(Consume("rpc"));
2158
2159 {
2160 LocationRecorder location(method_location,
2161 MethodDescriptorProto::kNameFieldNumber);
2162 location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2163 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2164 }
2165
2166 // Parse input type.
2167 DO(Consume("("));
2168 {
2169 if (LookingAt("stream")) {
2170 LocationRecorder location(
2171 method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2172 location.RecordLegacyLocation(method,
2173 DescriptorPool::ErrorCollector::OTHER);
2174 method->set_client_streaming(true);
2175 DO(Consume("stream"));
2176 }
2177 LocationRecorder location(method_location,
2178 MethodDescriptorProto::kInputTypeFieldNumber);
2179 location.RecordLegacyLocation(method,
2180 DescriptorPool::ErrorCollector::INPUT_TYPE);
2181 DO(ParseUserDefinedType(method->mutable_input_type()));
2182 }
2183 DO(Consume(")"));
2184
2185 // Parse output type.
2186 DO(Consume("returns"));
2187 DO(Consume("("));
2188 {
2189 if (LookingAt("stream")) {
2190 LocationRecorder location(
2191 method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2192 location.RecordLegacyLocation(method,
2193 DescriptorPool::ErrorCollector::OTHER);
2194 DO(Consume("stream"));
2195 method->set_server_streaming(true);
2196 }
2197 LocationRecorder location(method_location,
2198 MethodDescriptorProto::kOutputTypeFieldNumber);
2199 location.RecordLegacyLocation(method,
2200 DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2201 DO(ParseUserDefinedType(method->mutable_output_type()));
2202 }
2203 DO(Consume(")"));
2204
2205 if (LookingAt("{")) {
2206 // Options!
2207 DO(ParseMethodOptions(method_location, containing_file,
2208 MethodDescriptorProto::kOptionsFieldNumber,
2209 method->mutable_options()));
2210 } else {
2211 DO(ConsumeEndOfDeclaration(";", &method_location));
2212 }
2213
2214 return true;
2215 }
2216
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2217 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2218 const FileDescriptorProto* containing_file,
2219 const int optionsFieldNumber,
2220 Message* mutable_options) {
2221 // Options!
2222 ConsumeEndOfDeclaration("{", &parent_location);
2223 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2224 if (AtEnd()) {
2225 AddError("Reached end of input in method options (missing '}').");
2226 return false;
2227 }
2228
2229 if (TryConsumeEndOfDeclaration(";", nullptr)) {
2230 // empty statement; ignore
2231 } else {
2232 LocationRecorder location(parent_location, optionsFieldNumber);
2233 if (!ParseOption(mutable_options, location, containing_file,
2234 OPTION_STATEMENT)) {
2235 // This statement failed to parse. Skip it, but keep looping to
2236 // parse other statements.
2237 SkipStatement();
2238 }
2239 }
2240 }
2241
2242 return true;
2243 }
2244
2245 // -------------------------------------------------------------------
2246
ParseLabel(FieldDescriptorProto::Label * label,const LocationRecorder & field_location)2247 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2248 const LocationRecorder& field_location) {
2249 if (!LookingAt("optional") && !LookingAt("repeated") &&
2250 !LookingAt("required")) {
2251 return false;
2252 }
2253 LocationRecorder location(field_location,
2254 FieldDescriptorProto::kLabelFieldNumber);
2255 if (TryConsume("optional")) {
2256 *label = FieldDescriptorProto::LABEL_OPTIONAL;
2257 } else if (TryConsume("repeated")) {
2258 *label = FieldDescriptorProto::LABEL_REPEATED;
2259 } else {
2260 Consume("required");
2261 *label = FieldDescriptorProto::LABEL_REQUIRED;
2262 }
2263 return true;
2264 }
2265
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2266 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2267 std::string* type_name) {
2268 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2269 if (iter != kTypeNames.end()) {
2270 *type = iter->second;
2271 input_->Next();
2272 } else {
2273 DO(ParseUserDefinedType(type_name));
2274 }
2275 return true;
2276 }
2277
ParseUserDefinedType(std::string * type_name)2278 bool Parser::ParseUserDefinedType(std::string* type_name) {
2279 type_name->clear();
2280
2281 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2282 if (iter != kTypeNames.end()) {
2283 // Note: The only place enum types are allowed is for field types, but
2284 // if we are parsing a field type then we would not get here because
2285 // primitives are allowed there as well. So this error message doesn't
2286 // need to account for enums.
2287 AddError("Expected message type.");
2288
2289 // Pretend to accept this type so that we can go on parsing.
2290 *type_name = input_->current().text;
2291 input_->Next();
2292 return true;
2293 }
2294
2295 // A leading "." means the name is fully-qualified.
2296 if (TryConsume(".")) type_name->append(".");
2297
2298 // Consume the first part of the name.
2299 std::string identifier;
2300 DO(ConsumeIdentifier(&identifier, "Expected type name."));
2301 type_name->append(identifier);
2302
2303 // Consume more parts.
2304 while (TryConsume(".")) {
2305 type_name->append(".");
2306 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2307 type_name->append(identifier);
2308 }
2309
2310 return true;
2311 }
2312
2313 // ===================================================================
2314
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2315 bool Parser::ParsePackage(FileDescriptorProto* file,
2316 const LocationRecorder& root_location,
2317 const FileDescriptorProto* containing_file) {
2318 if (file->has_package()) {
2319 AddError("Multiple package definitions.");
2320 // Don't append the new package to the old one. Just replace it. Not
2321 // that it really matters since this is an error anyway.
2322 file->clear_package();
2323 }
2324
2325 LocationRecorder location(root_location,
2326 FileDescriptorProto::kPackageFieldNumber);
2327 location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2328
2329 DO(Consume("package"));
2330
2331 while (true) {
2332 std::string identifier;
2333 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2334 file->mutable_package()->append(identifier);
2335 if (!TryConsume(".")) break;
2336 file->mutable_package()->append(".");
2337 }
2338
2339 DO(ConsumeEndOfDeclaration(";", &location));
2340
2341 return true;
2342 }
2343
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32_t> * public_dependency,RepeatedField<int32_t> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2344 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2345 RepeatedField<int32_t>* public_dependency,
2346 RepeatedField<int32_t>* weak_dependency,
2347 const LocationRecorder& root_location,
2348 const FileDescriptorProto* containing_file) {
2349 LocationRecorder location(root_location,
2350 FileDescriptorProto::kDependencyFieldNumber,
2351 dependency->size());
2352
2353 DO(Consume("import"));
2354
2355 if (LookingAt("public")) {
2356 LocationRecorder public_location(
2357 root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2358 public_dependency->size());
2359 DO(Consume("public"));
2360 *public_dependency->Add() = dependency->size();
2361 } else if (LookingAt("weak")) {
2362 LocationRecorder weak_location(
2363 root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2364 weak_dependency->size());
2365 weak_location.RecordLegacyImportLocation(containing_file, "weak");
2366 DO(Consume("weak"));
2367 *weak_dependency->Add() = dependency->size();
2368 }
2369
2370 std::string import_file;
2371 DO(ConsumeString(&import_file,
2372 "Expected a string naming the file to import."));
2373 *dependency->Add() = import_file;
2374 location.RecordLegacyImportLocation(containing_file, import_file);
2375
2376 DO(ConsumeEndOfDeclaration(";", &location));
2377
2378 return true;
2379 }
2380
2381 // ===================================================================
2382
SourceLocationTable()2383 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2384 SourceLocationTable::~SourceLocationTable() {}
2385
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2386 bool SourceLocationTable::Find(
2387 const Message* descriptor,
2388 DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2389 int* column) const {
2390 const std::pair<int, int>* result =
2391 FindOrNull(location_map_, std::make_pair(descriptor, location));
2392 if (result == nullptr) {
2393 *line = -1;
2394 *column = 0;
2395 return false;
2396 } else {
2397 *line = result->first;
2398 *column = result->second;
2399 return true;
2400 }
2401 }
2402
FindImport(const Message * descriptor,const std::string & name,int * line,int * column) const2403 bool SourceLocationTable::FindImport(const Message* descriptor,
2404 const std::string& name, int* line,
2405 int* column) const {
2406 const std::pair<int, int>* result =
2407 FindOrNull(import_location_map_, std::make_pair(descriptor, name));
2408 if (result == nullptr) {
2409 *line = -1;
2410 *column = 0;
2411 return false;
2412 } else {
2413 *line = result->first;
2414 *column = result->second;
2415 return true;
2416 }
2417 }
2418
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2419 void SourceLocationTable::Add(
2420 const Message* descriptor,
2421 DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2422 int column) {
2423 location_map_[std::make_pair(descriptor, location)] =
2424 std::make_pair(line, column);
2425 }
2426
AddImport(const Message * descriptor,const std::string & name,int line,int column)2427 void SourceLocationTable::AddImport(const Message* descriptor,
2428 const std::string& name, int line,
2429 int column) {
2430 import_location_map_[std::make_pair(descriptor, name)] =
2431 std::make_pair(line, column);
2432 }
2433
Clear()2434 void SourceLocationTable::Clear() { location_map_.clear(); }
2435
2436 } // namespace compiler
2437 } // namespace protobuf
2438 } // namespace google
2439