• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36 
37 #include <google/protobuf/compiler/parser.h>
38 
39 #include <float.h>
40 
41 #include <limits>
42 #include <unordered_map>
43 #include <unordered_set>
44 
45 #include <google/protobuf/stubs/casts.h>
46 #include <google/protobuf/stubs/logging.h>
47 #include <google/protobuf/stubs/common.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/tokenizer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/wire_format.h>
52 #include <google/protobuf/stubs/strutil.h>
53 #include <google/protobuf/stubs/map_util.h>
54 #include <google/protobuf/stubs/hash.h>
55 
56 namespace google {
57 namespace protobuf {
58 namespace compiler {
59 
60 using internal::WireFormat;
61 
62 namespace {
63 
64 typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
65 
MakeTypeNameTable()66 TypeNameMap MakeTypeNameTable() {
67   TypeNameMap result;
68 
69   result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
70   result["float"] = FieldDescriptorProto::TYPE_FLOAT;
71   result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
72   result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
73   result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
74   result["bool"] = FieldDescriptorProto::TYPE_BOOL;
75   result["string"] = FieldDescriptorProto::TYPE_STRING;
76   result["group"] = FieldDescriptorProto::TYPE_GROUP;
77 
78   result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
79   result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
80   result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
81   result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
82   result["int32"] = FieldDescriptorProto::TYPE_INT32;
83   result["int64"] = FieldDescriptorProto::TYPE_INT64;
84   result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
85   result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
86 
87   return result;
88 }
89 
90 const TypeNameMap kTypeNames = MakeTypeNameTable();
91 
92 // Camel-case the field name and append "Entry" for generated map entry name.
93 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(const std::string & field_name)94 std::string MapEntryName(const std::string& field_name) {
95   std::string result;
96   static const char kSuffix[] = "Entry";
97   result.reserve(field_name.size() + sizeof(kSuffix));
98   bool cap_next = true;
99   for (int i = 0; i < field_name.size(); ++i) {
100     if (field_name[i] == '_') {
101       cap_next = true;
102     } else if (cap_next) {
103       // Note: Do not use ctype.h due to locales.
104       if ('a' <= field_name[i] && field_name[i] <= 'z') {
105         result.push_back(field_name[i] - 'a' + 'A');
106       } else {
107         result.push_back(field_name[i]);
108       }
109       cap_next = false;
110     } else {
111       result.push_back(field_name[i]);
112     }
113   }
114   result.append(kSuffix);
115   return result;
116 }
117 
IsUppercase(char c)118 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
119 
IsLowercase(char c)120 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
121 
IsNumber(char c)122 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
123 
IsUpperCamelCase(const std::string & name)124 bool IsUpperCamelCase(const std::string& name) {
125   if (name.empty()) {
126     return true;
127   }
128   // Name must start with an upper case character.
129   if (!IsUppercase(name[0])) {
130     return false;
131   }
132   // Must not contains underscore.
133   for (int i = 1; i < name.length(); i++) {
134     if (name[i] == '_') {
135       return false;
136     }
137   }
138   return true;
139 }
140 
IsUpperUnderscore(const std::string & name)141 bool IsUpperUnderscore(const std::string& name) {
142   for (int i = 0; i < name.length(); i++) {
143     const char c = name[i];
144     if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
145       return false;
146     }
147   }
148   return true;
149 }
150 
IsLowerUnderscore(const std::string & name)151 bool IsLowerUnderscore(const std::string& name) {
152   for (int i = 0; i < name.length(); i++) {
153     const char c = name[i];
154     if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
155       return false;
156     }
157   }
158   return true;
159 }
160 
IsNumberFollowUnderscore(const std::string & name)161 bool IsNumberFollowUnderscore(const std::string& name) {
162   for (int i = 1; i < name.length(); i++) {
163     const char c = name[i];
164     if (IsNumber(c) && name[i - 1] == '_') {
165       return true;
166     }
167   }
168   return false;
169 }
170 
171 }  // anonymous namespace
172 
173 // Makes code slightly more readable.  The meaning of "DO(foo)" is
174 // "Execute foo and fail if it fails.", where failure is indicated by
175 // returning false.
176 #define DO(STATEMENT) \
177   if (STATEMENT) {    \
178   } else              \
179     return false
180 
181 // ===================================================================
182 
Parser()183 Parser::Parser()
184     : input_(NULL),
185       error_collector_(NULL),
186       source_location_table_(NULL),
187       had_errors_(false),
188       require_syntax_identifier_(false),
189       stop_after_syntax_identifier_(false) {
190 }
191 
~Parser()192 Parser::~Parser() {}
193 
194 // ===================================================================
195 
LookingAt(const char * text)196 inline bool Parser::LookingAt(const char* text) {
197   return input_->current().text == text;
198 }
199 
LookingAtType(io::Tokenizer::TokenType token_type)200 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
201   return input_->current().type == token_type;
202 }
203 
AtEnd()204 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
205 
TryConsume(const char * text)206 bool Parser::TryConsume(const char* text) {
207   if (LookingAt(text)) {
208     input_->Next();
209     return true;
210   } else {
211     return false;
212   }
213 }
214 
Consume(const char * text,const char * error)215 bool Parser::Consume(const char* text, const char* error) {
216   if (TryConsume(text)) {
217     return true;
218   } else {
219     AddError(error);
220     return false;
221   }
222 }
223 
Consume(const char * text)224 bool Parser::Consume(const char* text) {
225   if (TryConsume(text)) {
226     return true;
227   } else {
228     AddError("Expected \"" + std::string(text) + "\".");
229     return false;
230   }
231 }
232 
ConsumeIdentifier(std::string * output,const char * error)233 bool Parser::ConsumeIdentifier(std::string* output, const char* error) {
234   if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
235     *output = input_->current().text;
236     input_->Next();
237     return true;
238   } else {
239     AddError(error);
240     return false;
241   }
242 }
243 
ConsumeInteger(int * output,const char * error)244 bool Parser::ConsumeInteger(int* output, const char* error) {
245   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
246     uint64 value = 0;
247     if (!io::Tokenizer::ParseInteger(input_->current().text, kint32max,
248                                      &value)) {
249       AddError("Integer out of range.");
250       // We still return true because we did, in fact, parse an integer.
251     }
252     *output = value;
253     input_->Next();
254     return true;
255   } else {
256     AddError(error);
257     return false;
258   }
259 }
260 
ConsumeSignedInteger(int * output,const char * error)261 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
262   bool is_negative = false;
263   uint64 max_value = kint32max;
264   if (TryConsume("-")) {
265     is_negative = true;
266     max_value += 1;
267   }
268   uint64 value = 0;
269   DO(ConsumeInteger64(max_value, &value, error));
270   if (is_negative) value *= -1;
271   *output = value;
272   return true;
273 }
274 
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)275 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
276                               const char* error) {
277   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
278     if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
279                                      output)) {
280       AddError("Integer out of range.");
281       // We still return true because we did, in fact, parse an integer.
282       *output = 0;
283     }
284     input_->Next();
285     return true;
286   } else {
287     AddError(error);
288     return false;
289   }
290 }
291 
ConsumeNumber(double * output,const char * error)292 bool Parser::ConsumeNumber(double* output, const char* error) {
293   if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
294     *output = io::Tokenizer::ParseFloat(input_->current().text);
295     input_->Next();
296     return true;
297   } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
298     // Also accept integers.
299     uint64 value = 0;
300     if (!io::Tokenizer::ParseInteger(input_->current().text, kuint64max,
301                                      &value)) {
302       AddError("Integer out of range.");
303       // We still return true because we did, in fact, parse a number.
304     }
305     *output = value;
306     input_->Next();
307     return true;
308   } else if (LookingAt("inf")) {
309     *output = std::numeric_limits<double>::infinity();
310     input_->Next();
311     return true;
312   } else if (LookingAt("nan")) {
313     *output = std::numeric_limits<double>::quiet_NaN();
314     input_->Next();
315     return true;
316   } else {
317     AddError(error);
318     return false;
319   }
320 }
321 
ConsumeString(std::string * output,const char * error)322 bool Parser::ConsumeString(std::string* output, const char* error) {
323   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
324     io::Tokenizer::ParseString(input_->current().text, output);
325     input_->Next();
326     // Allow C++ like concatenation of adjacent string tokens.
327     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
328       io::Tokenizer::ParseStringAppend(input_->current().text, output);
329       input_->Next();
330     }
331     return true;
332   } else {
333     AddError(error);
334     return false;
335   }
336 }
337 
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)338 bool Parser::TryConsumeEndOfDeclaration(const char* text,
339                                         const LocationRecorder* location) {
340   if (LookingAt(text)) {
341     std::string leading, trailing;
342     std::vector<std::string> detached;
343     input_->NextWithComments(&trailing, &detached, &leading);
344 
345     // Save the leading comments for next time, and recall the leading comments
346     // from last time.
347     leading.swap(upcoming_doc_comments_);
348 
349     if (location != NULL) {
350       upcoming_detached_comments_.swap(detached);
351       location->AttachComments(&leading, &trailing, &detached);
352     } else if (strcmp(text, "}") == 0) {
353       // If the current location is null and we are finishing the current scope,
354       // drop pending upcoming detached comments.
355       upcoming_detached_comments_.swap(detached);
356     } else {
357       // Otherwise, append the new detached comments to the existing upcoming
358       // detached comments.
359       upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
360                                          detached.begin(), detached.end());
361     }
362 
363     return true;
364   } else {
365     return false;
366   }
367 }
368 
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)369 bool Parser::ConsumeEndOfDeclaration(const char* text,
370                                      const LocationRecorder* location) {
371   if (TryConsumeEndOfDeclaration(text, location)) {
372     return true;
373   } else {
374     AddError("Expected \"" + std::string(text) + "\".");
375     return false;
376   }
377 }
378 
379 // -------------------------------------------------------------------
380 
AddError(int line,int column,const std::string & error)381 void Parser::AddError(int line, int column, const std::string& error) {
382   if (error_collector_ != NULL) {
383     error_collector_->AddError(line, column, error);
384   }
385   had_errors_ = true;
386 }
387 
AddError(const std::string & error)388 void Parser::AddError(const std::string& error) {
389   AddError(input_->current().line, input_->current().column, error);
390 }
391 
AddWarning(const std::string & warning)392 void Parser::AddWarning(const std::string& warning) {
393   if (error_collector_ != nullptr) {
394     error_collector_->AddWarning(input_->current().line,
395                                  input_->current().column, warning);
396   }
397 }
398 
399 // -------------------------------------------------------------------
400 
LocationRecorder(Parser * parser)401 Parser::LocationRecorder::LocationRecorder(Parser* parser)
402     : parser_(parser),
403       source_code_info_(parser->source_code_info_),
404       location_(parser_->source_code_info_->add_location()) {
405   location_->add_span(parser_->input_->current().line);
406   location_->add_span(parser_->input_->current().column);
407 }
408 
LocationRecorder(const LocationRecorder & parent)409 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
410   Init(parent, parent.source_code_info_);
411 }
412 
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)413 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
414                                            int path1,
415                                            SourceCodeInfo* source_code_info) {
416   Init(parent, source_code_info);
417   AddPath(path1);
418 }
419 
LocationRecorder(const LocationRecorder & parent,int path1)420 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
421                                            int path1) {
422   Init(parent, parent.source_code_info_);
423   AddPath(path1);
424 }
425 
LocationRecorder(const LocationRecorder & parent,int path1,int path2)426 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
427                                            int path1, int path2) {
428   Init(parent, parent.source_code_info_);
429   AddPath(path1);
430   AddPath(path2);
431 }
432 
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)433 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
434                                     SourceCodeInfo* source_code_info) {
435   parser_ = parent.parser_;
436   source_code_info_ = source_code_info;
437 
438   location_ = source_code_info_->add_location();
439   location_->mutable_path()->CopyFrom(parent.location_->path());
440 
441   location_->add_span(parser_->input_->current().line);
442   location_->add_span(parser_->input_->current().column);
443 }
444 
~LocationRecorder()445 Parser::LocationRecorder::~LocationRecorder() {
446   if (location_->span_size() <= 2) {
447     EndAt(parser_->input_->previous());
448   }
449 }
450 
AddPath(int path_component)451 void Parser::LocationRecorder::AddPath(int path_component) {
452   location_->add_path(path_component);
453 }
454 
StartAt(const io::Tokenizer::Token & token)455 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
456   location_->set_span(0, token.line);
457   location_->set_span(1, token.column);
458 }
459 
StartAt(const LocationRecorder & other)460 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
461   location_->set_span(0, other.location_->span(0));
462   location_->set_span(1, other.location_->span(1));
463 }
464 
EndAt(const io::Tokenizer::Token & token)465 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
466   if (token.line != location_->span(0)) {
467     location_->add_span(token.line);
468   }
469   location_->add_span(token.end_column);
470 }
471 
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)472 void Parser::LocationRecorder::RecordLegacyLocation(
473     const Message* descriptor,
474     DescriptorPool::ErrorCollector::ErrorLocation location) {
475   if (parser_->source_location_table_ != NULL) {
476     parser_->source_location_table_->Add(
477         descriptor, location, location_->span(0), location_->span(1));
478   }
479 }
480 
RecordLegacyImportLocation(const Message * descriptor,const std::string & name)481 void Parser::LocationRecorder::RecordLegacyImportLocation(
482     const Message* descriptor, const std::string& name) {
483   if (parser_->source_location_table_ != nullptr) {
484     parser_->source_location_table_->AddImport(
485         descriptor, name, location_->span(0), location_->span(1));
486   }
487 }
488 
CurrentPathSize() const489 int Parser::LocationRecorder::CurrentPathSize() const {
490   return location_->path_size();
491 }
492 
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const493 void Parser::LocationRecorder::AttachComments(
494     std::string* leading, std::string* trailing,
495     std::vector<std::string>* detached_comments) const {
496   GOOGLE_CHECK(!location_->has_leading_comments());
497   GOOGLE_CHECK(!location_->has_trailing_comments());
498 
499   if (!leading->empty()) {
500     location_->mutable_leading_comments()->swap(*leading);
501   }
502   if (!trailing->empty()) {
503     location_->mutable_trailing_comments()->swap(*trailing);
504   }
505   for (int i = 0; i < detached_comments->size(); ++i) {
506     location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
507   }
508   detached_comments->clear();
509 }
510 
511 // -------------------------------------------------------------------
512 
SkipStatement()513 void Parser::SkipStatement() {
514   while (true) {
515     if (AtEnd()) {
516       return;
517     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
518       if (TryConsumeEndOfDeclaration(";", NULL)) {
519         return;
520       } else if (TryConsume("{")) {
521         SkipRestOfBlock();
522         return;
523       } else if (LookingAt("}")) {
524         return;
525       }
526     }
527     input_->Next();
528   }
529 }
530 
SkipRestOfBlock()531 void Parser::SkipRestOfBlock() {
532   while (true) {
533     if (AtEnd()) {
534       return;
535     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
536       if (TryConsumeEndOfDeclaration("}", NULL)) {
537         return;
538       } else if (TryConsume("{")) {
539         SkipRestOfBlock();
540       }
541     }
542     input_->Next();
543   }
544 }
545 
546 // ===================================================================
547 
ValidateEnum(const EnumDescriptorProto * proto)548 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
549   bool has_allow_alias = false;
550   bool allow_alias = false;
551 
552   for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
553     const UninterpretedOption option = proto->options().uninterpreted_option(i);
554     if (option.name_size() > 1) {
555       continue;
556     }
557     if (!option.name(0).is_extension() &&
558         option.name(0).name_part() == "allow_alias") {
559       has_allow_alias = true;
560       if (option.identifier_value() == "true") {
561         allow_alias = true;
562       }
563       break;
564     }
565   }
566 
567   if (has_allow_alias && !allow_alias) {
568     std::string error =
569         "\"" + proto->name() +
570         "\" declares 'option allow_alias = false;' which has no effect. "
571         "Please remove the declaration.";
572     // This needlessly clutters declarations with nops.
573     AddError(error);
574     return false;
575   }
576 
577   std::set<int> used_values;
578   bool has_duplicates = false;
579   for (int i = 0; i < proto->value_size(); ++i) {
580     const EnumValueDescriptorProto& enum_value = proto->value(i);
581     if (used_values.find(enum_value.number()) != used_values.end()) {
582       has_duplicates = true;
583       break;
584     } else {
585       used_values.insert(enum_value.number());
586     }
587   }
588   if (allow_alias && !has_duplicates) {
589     std::string error =
590         "\"" + proto->name() +
591         "\" declares support for enum aliases but no enum values share field "
592         "numbers. Please remove the unnecessary 'option allow_alias = true;' "
593         "declaration.";
594     // Generate an error if an enum declares support for duplicate enum values
595     // and does not use it protect future authors.
596     AddError(error);
597     return false;
598   }
599 
600   // Enforce that enum constants must be UPPER_CASE except in case of
601   // enum_alias.
602   if (!allow_alias) {
603     for (const auto& enum_value : proto->value()) {
604       if (!IsUpperUnderscore(enum_value.name())) {
605         AddWarning(
606             "Enum constant should be in UPPER_CASE. Found: " +
607             enum_value.name() +
608             ". See https://developers.google.com/protocol-buffers/docs/style");
609       }
610     }
611   }
612 
613   return true;
614 }
615 
Parse(io::Tokenizer * input,FileDescriptorProto * file)616 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
617   input_ = input;
618   had_errors_ = false;
619   syntax_identifier_.clear();
620 
621   // Note that |file| could be NULL at this point if
622   // stop_after_syntax_identifier_ is true.  So, we conservatively allocate
623   // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
624   // later on.
625   SourceCodeInfo source_code_info;
626   source_code_info_ = &source_code_info;
627 
628   if (LookingAtType(io::Tokenizer::TYPE_START)) {
629     // Advance to first token.
630     input_->NextWithComments(NULL, &upcoming_detached_comments_,
631                              &upcoming_doc_comments_);
632   }
633 
634   {
635     LocationRecorder root_location(this);
636     root_location.RecordLegacyLocation(file,
637                                        DescriptorPool::ErrorCollector::OTHER);
638 
639     if (require_syntax_identifier_ || LookingAt("syntax")) {
640       if (!ParseSyntaxIdentifier(root_location)) {
641         // Don't attempt to parse the file if we didn't recognize the syntax
642         // identifier.
643         return false;
644       }
645       // Store the syntax into the file.
646       if (file != NULL) file->set_syntax(syntax_identifier_);
647     } else if (!stop_after_syntax_identifier_) {
648       GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: " << file->name()
649                    << ". Please use 'syntax = \"proto2\";' "
650                    << "or 'syntax = \"proto3\";' to specify a syntax "
651                    << "version. (Defaulted to proto2 syntax.)";
652       syntax_identifier_ = "proto2";
653     }
654 
655     if (stop_after_syntax_identifier_) return !had_errors_;
656 
657     // Repeatedly parse statements until we reach the end of the file.
658     while (!AtEnd()) {
659       if (!ParseTopLevelStatement(file, root_location)) {
660         // This statement failed to parse.  Skip it, but keep looping to parse
661         // other statements.
662         SkipStatement();
663 
664         if (LookingAt("}")) {
665           AddError("Unmatched \"}\".");
666           input_->NextWithComments(NULL, &upcoming_detached_comments_,
667                                    &upcoming_doc_comments_);
668         }
669       }
670     }
671   }
672 
673   input_ = NULL;
674   source_code_info_ = NULL;
675   assert(file != NULL);
676   source_code_info.Swap(file->mutable_source_code_info());
677   return !had_errors_;
678 }
679 
ParseSyntaxIdentifier(const LocationRecorder & parent)680 bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
681   LocationRecorder syntax_location(parent,
682                                    FileDescriptorProto::kSyntaxFieldNumber);
683   DO(Consume(
684       "syntax",
685       "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
686   DO(Consume("="));
687   io::Tokenizer::Token syntax_token = input_->current();
688   std::string syntax;
689   DO(ConsumeString(&syntax, "Expected syntax identifier."));
690   DO(ConsumeEndOfDeclaration(";", &syntax_location));
691 
692   syntax_identifier_ = syntax;
693 
694   if (syntax != "proto2" && syntax != "proto3" &&
695       !stop_after_syntax_identifier_) {
696     AddError(syntax_token.line, syntax_token.column,
697              "Unrecognized syntax identifier \"" + syntax +
698                  "\".  This parser "
699                  "only recognizes \"proto2\" and \"proto3\".");
700     return false;
701   }
702 
703   return true;
704 }
705 
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)706 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
707                                     const LocationRecorder& root_location) {
708   if (TryConsumeEndOfDeclaration(";", NULL)) {
709     // empty statement; ignore
710     return true;
711   } else if (LookingAt("message")) {
712     LocationRecorder location(root_location,
713                               FileDescriptorProto::kMessageTypeFieldNumber,
714                               file->message_type_size());
715     return ParseMessageDefinition(file->add_message_type(), location, file);
716   } else if (LookingAt("enum")) {
717     LocationRecorder location(root_location,
718                               FileDescriptorProto::kEnumTypeFieldNumber,
719                               file->enum_type_size());
720     return ParseEnumDefinition(file->add_enum_type(), location, file);
721   } else if (LookingAt("service")) {
722     LocationRecorder location(root_location,
723                               FileDescriptorProto::kServiceFieldNumber,
724                               file->service_size());
725     return ParseServiceDefinition(file->add_service(), location, file);
726   } else if (LookingAt("extend")) {
727     LocationRecorder location(root_location,
728                               FileDescriptorProto::kExtensionFieldNumber);
729     return ParseExtend(
730         file->mutable_extension(), file->mutable_message_type(), root_location,
731         FileDescriptorProto::kMessageTypeFieldNumber, location, file);
732   } else if (LookingAt("import")) {
733     return ParseImport(file->mutable_dependency(),
734                        file->mutable_public_dependency(),
735                        file->mutable_weak_dependency(), root_location, file);
736   } else if (LookingAt("package")) {
737     return ParsePackage(file, root_location, file);
738   } else if (LookingAt("option")) {
739     LocationRecorder location(root_location,
740                               FileDescriptorProto::kOptionsFieldNumber);
741     return ParseOption(file->mutable_options(), location, file,
742                        OPTION_STATEMENT);
743   } else {
744     AddError("Expected top-level statement (e.g. \"message\").");
745     return false;
746   }
747 }
748 
749 // -------------------------------------------------------------------
750 // Messages
751 
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)752 bool Parser::ParseMessageDefinition(
753     DescriptorProto* message, const LocationRecorder& message_location,
754     const FileDescriptorProto* containing_file) {
755   DO(Consume("message"));
756   {
757     LocationRecorder location(message_location,
758                               DescriptorProto::kNameFieldNumber);
759     location.RecordLegacyLocation(message,
760                                   DescriptorPool::ErrorCollector::NAME);
761     DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
762     if (!IsUpperCamelCase(message->name())) {
763       AddWarning(
764           "Message name should be in UpperCamelCase. Found: " +
765           message->name() +
766           ". See https://developers.google.com/protocol-buffers/docs/style");
767     }
768   }
769   DO(ParseMessageBlock(message, message_location, containing_file));
770 
771   if (syntax_identifier_ == "proto3") {
772     // Add synthetic one-field oneofs for optional fields, except messages which
773     // already have presence in proto3.
774     //
775     // We have to make sure the oneof names don't conflict with any other
776     // field or oneof.
777     std::unordered_set<std::string> names;
778     for (const auto& field : message->field()) {
779       names.insert(field.name());
780     }
781     for (const auto& oneof : message->oneof_decl()) {
782       names.insert(oneof.name());
783     }
784 
785     for (auto& field : *message->mutable_field()) {
786       if (field.proto3_optional()) {
787         std::string oneof_name = field.name();
788 
789         // Prepend 'XXXXX_' until we are no longer conflicting.
790         // Avoid prepending a double-underscore because such names are
791         // reserved in C++.
792         if (oneof_name.empty() || oneof_name[0] != '_') {
793           oneof_name = '_' + oneof_name;
794         }
795         while (names.count(oneof_name) > 0) {
796           oneof_name = 'X' + oneof_name;
797         }
798 
799         names.insert(oneof_name);
800         field.set_oneof_index(message->oneof_decl_size());
801         OneofDescriptorProto* oneof = message->add_oneof_decl();
802         oneof->set_name(oneof_name);
803       }
804     }
805   }
806 
807   return true;
808 }
809 
810 namespace {
811 
812 const int kMaxRangeSentinel = -1;
813 
IsMessageSetWireFormatMessage(const DescriptorProto & message)814 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
815   const MessageOptions& options = message.options();
816   for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
817     const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
818     if (uninterpreted.name_size() == 1 &&
819         uninterpreted.name(0).name_part() == "message_set_wire_format" &&
820         uninterpreted.identifier_value() == "true") {
821       return true;
822     }
823   }
824   return false;
825 }
826 
827 // Modifies any extension ranges that specified 'max' as the end of the
828 // extension range, and sets them to the type-specific maximum. The actual max
829 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)830 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
831   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
832   const int max_extension_number =
833       is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
834   for (int i = 0; i < message->extension_range_size(); ++i) {
835     if (message->extension_range(i).end() == kMaxRangeSentinel) {
836       message->mutable_extension_range(i)->set_end(max_extension_number);
837     }
838   }
839 }
840 
841 // Modifies any reserved ranges that specified 'max' as the end of the
842 // reserved range, and sets them to the type-specific maximum. The actual max
843 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)844 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
845   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
846   const int max_field_number =
847       is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
848   for (int i = 0; i < message->reserved_range_size(); ++i) {
849     if (message->reserved_range(i).end() == kMaxRangeSentinel) {
850       message->mutable_reserved_range(i)->set_end(max_field_number);
851     }
852   }
853 }
854 
855 }  // namespace
856 
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)857 bool Parser::ParseMessageBlock(DescriptorProto* message,
858                                const LocationRecorder& message_location,
859                                const FileDescriptorProto* containing_file) {
860   DO(ConsumeEndOfDeclaration("{", &message_location));
861 
862   while (!TryConsumeEndOfDeclaration("}", NULL)) {
863     if (AtEnd()) {
864       AddError("Reached end of input in message definition (missing '}').");
865       return false;
866     }
867 
868     if (!ParseMessageStatement(message, message_location, containing_file)) {
869       // This statement failed to parse.  Skip it, but keep looping to parse
870       // other statements.
871       SkipStatement();
872     }
873   }
874 
875   if (message->extension_range_size() > 0) {
876     AdjustExtensionRangesWithMaxEndNumber(message);
877   }
878   if (message->reserved_range_size() > 0) {
879     AdjustReservedRangesWithMaxEndNumber(message);
880   }
881   return true;
882 }
883 
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)884 bool Parser::ParseMessageStatement(DescriptorProto* message,
885                                    const LocationRecorder& message_location,
886                                    const FileDescriptorProto* containing_file) {
887   if (TryConsumeEndOfDeclaration(";", NULL)) {
888     // empty statement; ignore
889     return true;
890   } else if (LookingAt("message")) {
891     LocationRecorder location(message_location,
892                               DescriptorProto::kNestedTypeFieldNumber,
893                               message->nested_type_size());
894     return ParseMessageDefinition(message->add_nested_type(), location,
895                                   containing_file);
896   } else if (LookingAt("enum")) {
897     LocationRecorder location(message_location,
898                               DescriptorProto::kEnumTypeFieldNumber,
899                               message->enum_type_size());
900     return ParseEnumDefinition(message->add_enum_type(), location,
901                                containing_file);
902   } else if (LookingAt("extensions")) {
903     LocationRecorder location(message_location,
904                               DescriptorProto::kExtensionRangeFieldNumber);
905     return ParseExtensions(message, location, containing_file);
906   } else if (LookingAt("reserved")) {
907     return ParseReserved(message, message_location);
908   } else if (LookingAt("extend")) {
909     LocationRecorder location(message_location,
910                               DescriptorProto::kExtensionFieldNumber);
911     return ParseExtend(message->mutable_extension(),
912                        message->mutable_nested_type(), message_location,
913                        DescriptorProto::kNestedTypeFieldNumber, location,
914                        containing_file);
915   } else if (LookingAt("option")) {
916     LocationRecorder location(message_location,
917                               DescriptorProto::kOptionsFieldNumber);
918     return ParseOption(message->mutable_options(), location, containing_file,
919                        OPTION_STATEMENT);
920   } else if (LookingAt("oneof")) {
921     int oneof_index = message->oneof_decl_size();
922     LocationRecorder oneof_location(
923         message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
924 
925     return ParseOneof(message->add_oneof_decl(), message, oneof_index,
926                       oneof_location, message_location, containing_file);
927   } else {
928     LocationRecorder location(message_location,
929                               DescriptorProto::kFieldFieldNumber,
930                               message->field_size());
931     return ParseMessageField(
932         message->add_field(), message->mutable_nested_type(), message_location,
933         DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
934   }
935 }
936 
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)937 bool Parser::ParseMessageField(FieldDescriptorProto* field,
938                                RepeatedPtrField<DescriptorProto>* messages,
939                                const LocationRecorder& parent_location,
940                                int location_field_number_for_nested_type,
941                                const LocationRecorder& field_location,
942                                const FileDescriptorProto* containing_file) {
943   {
944     FieldDescriptorProto::Label label;
945     if (ParseLabel(&label, field_location, containing_file)) {
946       field->set_label(label);
947       if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
948           syntax_identifier_ == "proto3") {
949         field->set_proto3_optional(true);
950       }
951     }
952   }
953 
954   return ParseMessageFieldNoLabel(field, messages, parent_location,
955                                   location_field_number_for_nested_type,
956                                   field_location, containing_file);
957 }
958 
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)959 bool Parser::ParseMessageFieldNoLabel(
960     FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
961     const LocationRecorder& parent_location,
962     int location_field_number_for_nested_type,
963     const LocationRecorder& field_location,
964     const FileDescriptorProto* containing_file) {
965   MapField map_field;
966   // Parse type.
967   {
968     LocationRecorder location(field_location);  // add path later
969     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
970 
971     bool type_parsed = false;
972     FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
973     std::string type_name;
974 
975     // Special case map field. We only treat the field as a map field if the
976     // field type name starts with the word "map" with a following "<".
977     if (TryConsume("map")) {
978       if (LookingAt("<")) {
979         map_field.is_map_field = true;
980       } else {
981         // False positive
982         type_parsed = true;
983         type_name = "map";
984       }
985     }
986     if (map_field.is_map_field) {
987       if (field->has_oneof_index()) {
988         AddError("Map fields are not allowed in oneofs.");
989         return false;
990       }
991       if (field->has_label()) {
992         AddError(
993             "Field labels (required/optional/repeated) are not allowed on "
994             "map fields.");
995         return false;
996       }
997       if (field->has_extendee()) {
998         AddError("Map fields are not allowed to be extensions.");
999         return false;
1000       }
1001       field->set_label(FieldDescriptorProto::LABEL_REPEATED);
1002       DO(Consume("<"));
1003       DO(ParseType(&map_field.key_type, &map_field.key_type_name));
1004       DO(Consume(","));
1005       DO(ParseType(&map_field.value_type, &map_field.value_type_name));
1006       DO(Consume(">"));
1007       // Defer setting of the type name of the map field until the
1008       // field name is parsed. Add the source location though.
1009       location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1010     } else {
1011       // Handle the case where no explicit label is given for a non-map field.
1012       if (!field->has_label() && DefaultToOptionalFields()) {
1013         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1014       }
1015       if (!field->has_label()) {
1016         AddError("Expected \"required\", \"optional\", or \"repeated\".");
1017         // We can actually reasonably recover here by just assuming the user
1018         // forgot the label altogether.
1019         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1020       }
1021 
1022       // Handle the case where the actual type is a message or enum named "map",
1023       // which we already consumed in the code above.
1024       if (!type_parsed) {
1025         DO(ParseType(&type, &type_name));
1026       }
1027       if (type_name.empty()) {
1028         location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
1029         field->set_type(type);
1030       } else {
1031         location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1032         field->set_type_name(type_name);
1033       }
1034     }
1035   }
1036 
1037   // Parse name and '='.
1038   io::Tokenizer::Token name_token = input_->current();
1039   {
1040     LocationRecorder location(field_location,
1041                               FieldDescriptorProto::kNameFieldNumber);
1042     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1043     DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1044 
1045     if (!IsLowerUnderscore(field->name())) {
1046       AddWarning(
1047           "Field name should be lowercase. Found: " + field->name() +
1048           ". See: https://developers.google.com/protocol-buffers/docs/style");
1049     }
1050     if (IsNumberFollowUnderscore(field->name())) {
1051       AddWarning(
1052           "Number should not come right after an underscore. Found: " +
1053           field->name() +
1054           ". See: https://developers.google.com/protocol-buffers/docs/style");
1055     }
1056   }
1057   DO(Consume("=", "Missing field number."));
1058 
1059   // Parse field number.
1060   {
1061     LocationRecorder location(field_location,
1062                               FieldDescriptorProto::kNumberFieldNumber);
1063     location.RecordLegacyLocation(field,
1064                                   DescriptorPool::ErrorCollector::NUMBER);
1065     int number;
1066     DO(ConsumeInteger(&number, "Expected field number."));
1067     field->set_number(number);
1068   }
1069 
1070   // Parse options.
1071   DO(ParseFieldOptions(field, field_location, containing_file));
1072 
1073   // Deal with groups.
1074   if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1075     // Awkward:  Since a group declares both a message type and a field, we
1076     //   have to create overlapping locations.
1077     LocationRecorder group_location(parent_location);
1078     group_location.StartAt(field_location);
1079     group_location.AddPath(location_field_number_for_nested_type);
1080     group_location.AddPath(messages->size());
1081 
1082     DescriptorProto* group = messages->Add();
1083     group->set_name(field->name());
1084 
1085     // Record name location to match the field name's location.
1086     {
1087       LocationRecorder location(group_location,
1088                                 DescriptorProto::kNameFieldNumber);
1089       location.StartAt(name_token);
1090       location.EndAt(name_token);
1091       location.RecordLegacyLocation(group,
1092                                     DescriptorPool::ErrorCollector::NAME);
1093     }
1094 
1095     // The field's type_name also comes from the name.  Confusing!
1096     {
1097       LocationRecorder location(field_location,
1098                                 FieldDescriptorProto::kTypeNameFieldNumber);
1099       location.StartAt(name_token);
1100       location.EndAt(name_token);
1101     }
1102 
1103     // As a hack for backwards-compatibility, we force the group name to start
1104     // with a capital letter and lower-case the field name.  New code should
1105     // not use groups; it should use nested messages.
1106     if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1107       AddError(name_token.line, name_token.column,
1108                "Group names must start with a capital letter.");
1109     }
1110     LowerString(field->mutable_name());
1111 
1112     field->set_type_name(group->name());
1113     if (LookingAt("{")) {
1114       DO(ParseMessageBlock(group, group_location, containing_file));
1115     } else {
1116       AddError("Missing group body.");
1117       return false;
1118     }
1119   } else {
1120     DO(ConsumeEndOfDeclaration(";", &field_location));
1121   }
1122 
1123   // Create a map entry type if this is a map field.
1124   if (map_field.is_map_field) {
1125     GenerateMapEntry(map_field, field, messages);
1126   }
1127 
1128   return true;
1129 }
1130 
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1131 void Parser::GenerateMapEntry(const MapField& map_field,
1132                               FieldDescriptorProto* field,
1133                               RepeatedPtrField<DescriptorProto>* messages) {
1134   DescriptorProto* entry = messages->Add();
1135   std::string entry_name = MapEntryName(field->name());
1136   field->set_type_name(entry_name);
1137   entry->set_name(entry_name);
1138   entry->mutable_options()->set_map_entry(true);
1139   FieldDescriptorProto* key_field = entry->add_field();
1140   key_field->set_name("key");
1141   key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1142   key_field->set_number(1);
1143   if (map_field.key_type_name.empty()) {
1144     key_field->set_type(map_field.key_type);
1145   } else {
1146     key_field->set_type_name(map_field.key_type_name);
1147   }
1148   FieldDescriptorProto* value_field = entry->add_field();
1149   value_field->set_name("value");
1150   value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1151   value_field->set_number(2);
1152   if (map_field.value_type_name.empty()) {
1153     value_field->set_type(map_field.value_type);
1154   } else {
1155     value_field->set_type_name(map_field.value_type_name);
1156   }
1157   // Propagate the "enforce_utf8" option to key and value fields if they
1158   // are strings. This helps simplify the implementation of code generators
1159   // and also reflection-based parsing code.
1160   //
1161   // The following definition:
1162   //   message Foo {
1163   //     map<string, string> value = 1 [enforce_utf8 = false];
1164   //   }
1165   // will be interpreted as:
1166   //   message Foo {
1167   //     message ValueEntry {
1168   //       option map_entry = true;
1169   //       string key = 1 [enforce_utf8 = false];
1170   //       string value = 2 [enforce_utf8 = false];
1171   //     }
1172   //     repeated ValueEntry value = 1 [enforce_utf8 = false];
1173   //  }
1174   //
1175   // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1176   // from protocol compiler.
1177   for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1178     const UninterpretedOption& option =
1179         field->options().uninterpreted_option(i);
1180     if (option.name_size() == 1 &&
1181         option.name(0).name_part() == "enforce_utf8" &&
1182         !option.name(0).is_extension()) {
1183       if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1184         key_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1185             option);
1186       }
1187       if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1188         value_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1189             option);
1190       }
1191     }
1192   }
1193 }
1194 
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1195 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1196                                const LocationRecorder& field_location,
1197                                const FileDescriptorProto* containing_file) {
1198   if (!LookingAt("[")) return true;
1199 
1200   LocationRecorder location(field_location,
1201                             FieldDescriptorProto::kOptionsFieldNumber);
1202 
1203   DO(Consume("["));
1204 
1205   // Parse field options.
1206   do {
1207     if (LookingAt("default")) {
1208       // We intentionally pass field_location rather than location here, since
1209       // the default value is not actually an option.
1210       DO(ParseDefaultAssignment(field, field_location, containing_file));
1211     } else if (LookingAt("json_name")) {
1212       // Like default value, this "json_name" is not an actual option.
1213       DO(ParseJsonName(field, field_location, containing_file));
1214     } else {
1215       DO(ParseOption(field->mutable_options(), location, containing_file,
1216                      OPTION_ASSIGNMENT));
1217     }
1218   } while (TryConsume(","));
1219 
1220   DO(Consume("]"));
1221   return true;
1222 }
1223 
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1224 bool Parser::ParseDefaultAssignment(
1225     FieldDescriptorProto* field, const LocationRecorder& field_location,
1226     const FileDescriptorProto* containing_file) {
1227   if (field->has_default_value()) {
1228     AddError("Already set option \"default\".");
1229     field->clear_default_value();
1230   }
1231 
1232   DO(Consume("default"));
1233   DO(Consume("="));
1234 
1235   LocationRecorder location(field_location,
1236                             FieldDescriptorProto::kDefaultValueFieldNumber);
1237   location.RecordLegacyLocation(field,
1238                                 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1239   std::string* default_value = field->mutable_default_value();
1240 
1241   if (!field->has_type()) {
1242     // The field has a type name, but we don't know if it is a message or an
1243     // enum yet. (If it were a primitive type, |field| would have a type set
1244     // already.) In this case, simply take the current string as the default
1245     // value; we will catch the error later if it is not a valid enum value.
1246     // (N.B. that we do not check whether the current token is an identifier:
1247     // doing so throws strange errors when the user mistypes a primitive
1248     // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1249     // = 42]". In such a case the fundamental error is really that "int" is not
1250     // a type, not that "42" is not an identifier. See b/12533582.)
1251     *default_value = input_->current().text;
1252     input_->Next();
1253     return true;
1254   }
1255 
1256   switch (field->type()) {
1257     case FieldDescriptorProto::TYPE_INT32:
1258     case FieldDescriptorProto::TYPE_INT64:
1259     case FieldDescriptorProto::TYPE_SINT32:
1260     case FieldDescriptorProto::TYPE_SINT64:
1261     case FieldDescriptorProto::TYPE_SFIXED32:
1262     case FieldDescriptorProto::TYPE_SFIXED64: {
1263       uint64 max_value = kint64max;
1264       if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1265           field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1266           field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1267         max_value = kint32max;
1268       }
1269 
1270       // These types can be negative.
1271       if (TryConsume("-")) {
1272         default_value->append("-");
1273         // Two's complement always has one more negative value than positive.
1274         ++max_value;
1275       }
1276       // Parse the integer to verify that it is not out-of-range.
1277       uint64 value;
1278       DO(ConsumeInteger64(max_value, &value,
1279                           "Expected integer for field default value."));
1280       // And stringify it again.
1281       default_value->append(StrCat(value));
1282       break;
1283     }
1284 
1285     case FieldDescriptorProto::TYPE_UINT32:
1286     case FieldDescriptorProto::TYPE_UINT64:
1287     case FieldDescriptorProto::TYPE_FIXED32:
1288     case FieldDescriptorProto::TYPE_FIXED64: {
1289       uint64 max_value = kuint64max;
1290       if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1291           field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1292         max_value = kuint32max;
1293       }
1294 
1295       // Numeric, not negative.
1296       if (TryConsume("-")) {
1297         AddError("Unsigned field can't have negative default value.");
1298       }
1299       // Parse the integer to verify that it is not out-of-range.
1300       uint64 value;
1301       DO(ConsumeInteger64(max_value, &value,
1302                           "Expected integer for field default value."));
1303       // And stringify it again.
1304       default_value->append(StrCat(value));
1305       break;
1306     }
1307 
1308     case FieldDescriptorProto::TYPE_FLOAT:
1309     case FieldDescriptorProto::TYPE_DOUBLE:
1310       // These types can be negative.
1311       if (TryConsume("-")) {
1312         default_value->append("-");
1313       }
1314       // Parse the integer because we have to convert hex integers to decimal
1315       // floats.
1316       double value;
1317       DO(ConsumeNumber(&value, "Expected number."));
1318       // And stringify it again.
1319       default_value->append(SimpleDtoa(value));
1320       break;
1321 
1322     case FieldDescriptorProto::TYPE_BOOL:
1323       if (TryConsume("true")) {
1324         default_value->assign("true");
1325       } else if (TryConsume("false")) {
1326         default_value->assign("false");
1327       } else {
1328         AddError("Expected \"true\" or \"false\".");
1329         return false;
1330       }
1331       break;
1332 
1333     case FieldDescriptorProto::TYPE_STRING:
1334       // Note: When file option java_string_check_utf8 is true, if a
1335       // non-string representation (eg byte[]) is later supported, it must
1336       // be checked for UTF-8-ness.
1337       DO(ConsumeString(default_value,
1338                        "Expected string for field default "
1339                        "value."));
1340       break;
1341 
1342     case FieldDescriptorProto::TYPE_BYTES:
1343       DO(ConsumeString(default_value, "Expected string."));
1344       *default_value = CEscape(*default_value);
1345       break;
1346 
1347     case FieldDescriptorProto::TYPE_ENUM:
1348       DO(ConsumeIdentifier(default_value,
1349                            "Expected enum identifier for field "
1350                            "default value."));
1351       break;
1352 
1353     case FieldDescriptorProto::TYPE_MESSAGE:
1354     case FieldDescriptorProto::TYPE_GROUP:
1355       AddError("Messages can't have default values.");
1356       return false;
1357   }
1358 
1359   return true;
1360 }
1361 
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1362 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1363                            const LocationRecorder& field_location,
1364                            const FileDescriptorProto* containing_file) {
1365   if (field->has_json_name()) {
1366     AddError("Already set option \"json_name\".");
1367     field->clear_json_name();
1368   }
1369 
1370   LocationRecorder location(field_location,
1371                             FieldDescriptorProto::kJsonNameFieldNumber);
1372   location.RecordLegacyLocation(field,
1373                                 DescriptorPool::ErrorCollector::OPTION_NAME);
1374 
1375   DO(Consume("json_name"));
1376   DO(Consume("="));
1377 
1378   LocationRecorder value_location(location);
1379   value_location.RecordLegacyLocation(
1380       field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1381 
1382   DO(ConsumeString(field->mutable_json_name(),
1383                    "Expected string for JSON name."));
1384   return true;
1385 }
1386 
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1387 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1388                                  const LocationRecorder& part_location,
1389                                  const FileDescriptorProto* containing_file) {
1390   UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1391   std::string identifier;  // We parse identifiers into this string.
1392   if (LookingAt("(")) {    // This is an extension.
1393     DO(Consume("("));
1394 
1395     {
1396       LocationRecorder location(
1397           part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1398       // An extension name consists of dot-separated identifiers, and may begin
1399       // with a dot.
1400       if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1401         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1402         name->mutable_name_part()->append(identifier);
1403       }
1404       while (LookingAt(".")) {
1405         DO(Consume("."));
1406         name->mutable_name_part()->append(".");
1407         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1408         name->mutable_name_part()->append(identifier);
1409       }
1410     }
1411 
1412     DO(Consume(")"));
1413     name->set_is_extension(true);
1414   } else {  // This is a regular field.
1415     LocationRecorder location(
1416         part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1417     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1418     name->mutable_name_part()->append(identifier);
1419     name->set_is_extension(false);
1420   }
1421   return true;
1422 }
1423 
ParseUninterpretedBlock(std::string * value)1424 bool Parser::ParseUninterpretedBlock(std::string* value) {
1425   // Note that enclosing braces are not added to *value.
1426   // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1427   // an expression, not a block of statements.
1428   DO(Consume("{"));
1429   int brace_depth = 1;
1430   while (!AtEnd()) {
1431     if (LookingAt("{")) {
1432       brace_depth++;
1433     } else if (LookingAt("}")) {
1434       brace_depth--;
1435       if (brace_depth == 0) {
1436         input_->Next();
1437         return true;
1438       }
1439     }
1440     // TODO(sanjay): Interpret line/column numbers to preserve formatting
1441     if (!value->empty()) value->push_back(' ');
1442     value->append(input_->current().text);
1443     input_->Next();
1444   }
1445   AddError("Unexpected end of stream while parsing aggregate value.");
1446   return false;
1447 }
1448 
1449 // We don't interpret the option here. Instead we store it in an
1450 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1451 bool Parser::ParseOption(Message* options,
1452                          const LocationRecorder& options_location,
1453                          const FileDescriptorProto* containing_file,
1454                          OptionStyle style) {
1455   // Create an entry in the uninterpreted_option field.
1456   const FieldDescriptor* uninterpreted_option_field =
1457       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1458   GOOGLE_CHECK(uninterpreted_option_field != NULL)
1459       << "No field named \"uninterpreted_option\" in the Options proto.";
1460 
1461   const Reflection* reflection = options->GetReflection();
1462 
1463   LocationRecorder location(
1464       options_location, uninterpreted_option_field->number(),
1465       reflection->FieldSize(*options, uninterpreted_option_field));
1466 
1467   if (style == OPTION_STATEMENT) {
1468     DO(Consume("option"));
1469   }
1470 
1471   UninterpretedOption* uninterpreted_option =
1472       down_cast<UninterpretedOption*>(options->GetReflection()->AddMessage(
1473           options, uninterpreted_option_field));
1474 
1475   // Parse dot-separated name.
1476   {
1477     LocationRecorder name_location(location,
1478                                    UninterpretedOption::kNameFieldNumber);
1479     name_location.RecordLegacyLocation(
1480         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1481 
1482     {
1483       LocationRecorder part_location(name_location,
1484                                      uninterpreted_option->name_size());
1485       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1486                              containing_file));
1487     }
1488 
1489     while (LookingAt(".")) {
1490       DO(Consume("."));
1491       LocationRecorder part_location(name_location,
1492                                      uninterpreted_option->name_size());
1493       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1494                              containing_file));
1495     }
1496   }
1497 
1498   DO(Consume("="));
1499 
1500   {
1501     LocationRecorder value_location(location);
1502     value_location.RecordLegacyLocation(
1503         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1504 
1505     // All values are a single token, except for negative numbers, which consist
1506     // of a single '-' symbol, followed by a positive number.
1507     bool is_negative = TryConsume("-");
1508 
1509     switch (input_->current().type) {
1510       case io::Tokenizer::TYPE_START:
1511         GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1512         return false;
1513 
1514       case io::Tokenizer::TYPE_END:
1515         AddError("Unexpected end of stream while parsing option value.");
1516         return false;
1517 
1518       case io::Tokenizer::TYPE_IDENTIFIER: {
1519         value_location.AddPath(
1520             UninterpretedOption::kIdentifierValueFieldNumber);
1521         if (is_negative) {
1522           AddError("Invalid '-' symbol before identifier.");
1523           return false;
1524         }
1525         std::string value;
1526         DO(ConsumeIdentifier(&value, "Expected identifier."));
1527         uninterpreted_option->set_identifier_value(value);
1528         break;
1529       }
1530 
1531       case io::Tokenizer::TYPE_INTEGER: {
1532         uint64 value;
1533         uint64 max_value =
1534             is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1535         DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1536         if (is_negative) {
1537           value_location.AddPath(
1538               UninterpretedOption::kNegativeIntValueFieldNumber);
1539           uninterpreted_option->set_negative_int_value(
1540               static_cast<int64>(-value));
1541         } else {
1542           value_location.AddPath(
1543               UninterpretedOption::kPositiveIntValueFieldNumber);
1544           uninterpreted_option->set_positive_int_value(value);
1545         }
1546         break;
1547       }
1548 
1549       case io::Tokenizer::TYPE_FLOAT: {
1550         value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1551         double value;
1552         DO(ConsumeNumber(&value, "Expected number."));
1553         uninterpreted_option->set_double_value(is_negative ? -value : value);
1554         break;
1555       }
1556 
1557       case io::Tokenizer::TYPE_STRING: {
1558         value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1559         if (is_negative) {
1560           AddError("Invalid '-' symbol before string.");
1561           return false;
1562         }
1563         std::string value;
1564         DO(ConsumeString(&value, "Expected string."));
1565         uninterpreted_option->set_string_value(value);
1566         break;
1567       }
1568 
1569       case io::Tokenizer::TYPE_SYMBOL:
1570         if (LookingAt("{")) {
1571           value_location.AddPath(
1572               UninterpretedOption::kAggregateValueFieldNumber);
1573           DO(ParseUninterpretedBlock(
1574               uninterpreted_option->mutable_aggregate_value()));
1575         } else {
1576           AddError("Expected option value.");
1577           return false;
1578         }
1579         break;
1580     }
1581   }
1582 
1583   if (style == OPTION_STATEMENT) {
1584     DO(ConsumeEndOfDeclaration(";", &location));
1585   }
1586 
1587   return true;
1588 }
1589 
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1590 bool Parser::ParseExtensions(DescriptorProto* message,
1591                              const LocationRecorder& extensions_location,
1592                              const FileDescriptorProto* containing_file) {
1593   // Parse the declaration.
1594   DO(Consume("extensions"));
1595 
1596   int old_range_size = message->extension_range_size();
1597 
1598   do {
1599     // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1600     LocationRecorder location(extensions_location,
1601                               message->extension_range_size());
1602 
1603     DescriptorProto::ExtensionRange* range = message->add_extension_range();
1604     location.RecordLegacyLocation(range,
1605                                   DescriptorPool::ErrorCollector::NUMBER);
1606 
1607     int start, end;
1608     io::Tokenizer::Token start_token;
1609 
1610     {
1611       LocationRecorder start_location(
1612           location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1613       start_token = input_->current();
1614       DO(ConsumeInteger(&start, "Expected field number range."));
1615     }
1616 
1617     if (TryConsume("to")) {
1618       LocationRecorder end_location(
1619           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1620       if (TryConsume("max")) {
1621         // Set to the sentinel value - 1 since we increment the value below.
1622         // The actual value of the end of the range should be set with
1623         // AdjustExtensionRangesWithMaxEndNumber.
1624         end = kMaxRangeSentinel - 1;
1625       } else {
1626         DO(ConsumeInteger(&end, "Expected integer."));
1627       }
1628     } else {
1629       LocationRecorder end_location(
1630           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1631       end_location.StartAt(start_token);
1632       end_location.EndAt(start_token);
1633       end = start;
1634     }
1635 
1636     // Users like to specify inclusive ranges, but in code we like the end
1637     // number to be exclusive.
1638     ++end;
1639 
1640     range->set_start(start);
1641     range->set_end(end);
1642   } while (TryConsume(","));
1643 
1644   if (LookingAt("[")) {
1645     int range_number_index = extensions_location.CurrentPathSize();
1646     SourceCodeInfo info;
1647 
1648     // Parse extension range options in the first range.
1649     ExtensionRangeOptions* options =
1650         message->mutable_extension_range(old_range_size)->mutable_options();
1651 
1652     {
1653       LocationRecorder index_location(
1654           extensions_location, 0 /* we fill this in w/ actual index below */,
1655           &info);
1656       LocationRecorder location(
1657           index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1658       DO(Consume("["));
1659 
1660       do {
1661         DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1662       } while (TryConsume(","));
1663 
1664       DO(Consume("]"));
1665     }
1666 
1667     // Then copy the extension range options to all of the other ranges we've
1668     // parsed.
1669     for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1670       message->mutable_extension_range(i)->mutable_options()->CopyFrom(
1671           *options);
1672     }
1673     // and copy source locations to the other ranges, too
1674     for (int i = old_range_size; i < message->extension_range_size(); i++) {
1675       for (int j = 0; j < info.location_size(); j++) {
1676         if (info.location(j).path_size() == range_number_index + 1) {
1677           // this location's path is up to the extension range index, but
1678           // doesn't include options; so it's redundant with location above
1679           continue;
1680         }
1681         SourceCodeInfo_Location* dest = source_code_info_->add_location();
1682         *dest = info.location(j);
1683         dest->set_path(range_number_index, i);
1684       }
1685     }
1686   }
1687 
1688   DO(ConsumeEndOfDeclaration(";", &extensions_location));
1689   return true;
1690 }
1691 
1692 // This is similar to extension range parsing, except that it accepts field
1693 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1694 bool Parser::ParseReserved(DescriptorProto* message,
1695                            const LocationRecorder& message_location) {
1696   io::Tokenizer::Token start_token = input_->current();
1697   // Parse the declaration.
1698   DO(Consume("reserved"));
1699   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1700     LocationRecorder location(message_location,
1701                               DescriptorProto::kReservedNameFieldNumber);
1702     location.StartAt(start_token);
1703     return ParseReservedNames(message, location);
1704   } else {
1705     LocationRecorder location(message_location,
1706                               DescriptorProto::kReservedRangeFieldNumber);
1707     location.StartAt(start_token);
1708     return ParseReservedNumbers(message, location);
1709   }
1710 }
1711 
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1712 bool Parser::ParseReservedNames(DescriptorProto* message,
1713                                 const LocationRecorder& parent_location) {
1714   do {
1715     LocationRecorder location(parent_location, message->reserved_name_size());
1716     DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1717   } while (TryConsume(","));
1718   DO(ConsumeEndOfDeclaration(";", &parent_location));
1719   return true;
1720 }
1721 
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1722 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1723                                   const LocationRecorder& parent_location) {
1724   bool first = true;
1725   do {
1726     LocationRecorder location(parent_location, message->reserved_range_size());
1727 
1728     DescriptorProto::ReservedRange* range = message->add_reserved_range();
1729     int start, end;
1730     io::Tokenizer::Token start_token;
1731     {
1732       LocationRecorder start_location(
1733           location, DescriptorProto::ReservedRange::kStartFieldNumber);
1734       start_token = input_->current();
1735       DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1736                                        : "Expected field number range.")));
1737     }
1738 
1739     if (TryConsume("to")) {
1740       LocationRecorder end_location(
1741           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1742       if (TryConsume("max")) {
1743         // Set to the sentinel value - 1 since we increment the value below.
1744         // The actual value of the end of the range should be set with
1745         // AdjustExtensionRangesWithMaxEndNumber.
1746         end = kMaxRangeSentinel - 1;
1747       } else {
1748         DO(ConsumeInteger(&end, "Expected integer."));
1749       }
1750     } else {
1751       LocationRecorder end_location(
1752           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1753       end_location.StartAt(start_token);
1754       end_location.EndAt(start_token);
1755       end = start;
1756     }
1757 
1758     // Users like to specify inclusive ranges, but in code we like the end
1759     // number to be exclusive.
1760     ++end;
1761 
1762     range->set_start(start);
1763     range->set_end(end);
1764     first = false;
1765   } while (TryConsume(","));
1766 
1767   DO(ConsumeEndOfDeclaration(";", &parent_location));
1768   return true;
1769 }
1770 
ParseReserved(EnumDescriptorProto * message,const LocationRecorder & message_location)1771 bool Parser::ParseReserved(EnumDescriptorProto* message,
1772                            const LocationRecorder& message_location) {
1773   io::Tokenizer::Token start_token = input_->current();
1774   // Parse the declaration.
1775   DO(Consume("reserved"));
1776   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1777     LocationRecorder location(message_location,
1778                               DescriptorProto::kReservedNameFieldNumber);
1779     location.StartAt(start_token);
1780     return ParseReservedNames(message, location);
1781   } else {
1782     LocationRecorder location(message_location,
1783                               DescriptorProto::kReservedRangeFieldNumber);
1784     location.StartAt(start_token);
1785     return ParseReservedNumbers(message, location);
1786   }
1787 }
1788 
ParseReservedNames(EnumDescriptorProto * message,const LocationRecorder & parent_location)1789 bool Parser::ParseReservedNames(EnumDescriptorProto* message,
1790                                 const LocationRecorder& parent_location) {
1791   do {
1792     LocationRecorder location(parent_location, message->reserved_name_size());
1793     DO(ConsumeString(message->add_reserved_name(), "Expected enum value."));
1794   } while (TryConsume(","));
1795   DO(ConsumeEndOfDeclaration(";", &parent_location));
1796   return true;
1797 }
1798 
ParseReservedNumbers(EnumDescriptorProto * message,const LocationRecorder & parent_location)1799 bool Parser::ParseReservedNumbers(EnumDescriptorProto* message,
1800                                   const LocationRecorder& parent_location) {
1801   bool first = true;
1802   do {
1803     LocationRecorder location(parent_location, message->reserved_range_size());
1804 
1805     EnumDescriptorProto::EnumReservedRange* range =
1806         message->add_reserved_range();
1807     int start, end;
1808     io::Tokenizer::Token start_token;
1809     {
1810       LocationRecorder start_location(
1811           location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
1812       start_token = input_->current();
1813       DO(ConsumeSignedInteger(&start,
1814                               (first ? "Expected enum value or number range."
1815                                      : "Expected enum number range.")));
1816     }
1817 
1818     if (TryConsume("to")) {
1819       LocationRecorder end_location(
1820           location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1821       if (TryConsume("max")) {
1822         // This is in the enum descriptor path, which doesn't have the message
1823         // set duality to fix up, so it doesn't integrate with the sentinel.
1824         end = INT_MAX;
1825       } else {
1826         DO(ConsumeSignedInteger(&end, "Expected integer."));
1827       }
1828     } else {
1829       LocationRecorder end_location(
1830           location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1831       end_location.StartAt(start_token);
1832       end_location.EndAt(start_token);
1833       end = start;
1834     }
1835 
1836     range->set_start(start);
1837     range->set_end(end);
1838     first = false;
1839   } while (TryConsume(","));
1840 
1841   DO(ConsumeEndOfDeclaration(";", &parent_location));
1842   return true;
1843 }
1844 
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1845 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1846                          RepeatedPtrField<DescriptorProto>* messages,
1847                          const LocationRecorder& parent_location,
1848                          int location_field_number_for_nested_type,
1849                          const LocationRecorder& extend_location,
1850                          const FileDescriptorProto* containing_file) {
1851   DO(Consume("extend"));
1852 
1853   // Parse the extendee type.
1854   io::Tokenizer::Token extendee_start = input_->current();
1855   std::string extendee;
1856   DO(ParseUserDefinedType(&extendee));
1857   io::Tokenizer::Token extendee_end = input_->previous();
1858 
1859   // Parse the block.
1860   DO(ConsumeEndOfDeclaration("{", &extend_location));
1861 
1862   bool is_first = true;
1863 
1864   do {
1865     if (AtEnd()) {
1866       AddError("Reached end of input in extend definition (missing '}').");
1867       return false;
1868     }
1869 
1870     // Note that kExtensionFieldNumber was already pushed by the parent.
1871     LocationRecorder location(extend_location, extensions->size());
1872 
1873     FieldDescriptorProto* field = extensions->Add();
1874 
1875     {
1876       LocationRecorder extendee_location(
1877           location, FieldDescriptorProto::kExtendeeFieldNumber);
1878       extendee_location.StartAt(extendee_start);
1879       extendee_location.EndAt(extendee_end);
1880 
1881       if (is_first) {
1882         extendee_location.RecordLegacyLocation(
1883             field, DescriptorPool::ErrorCollector::EXTENDEE);
1884         is_first = false;
1885       }
1886     }
1887 
1888     field->set_extendee(extendee);
1889 
1890     if (!ParseMessageField(field, messages, parent_location,
1891                            location_field_number_for_nested_type, location,
1892                            containing_file)) {
1893       // This statement failed to parse.  Skip it, but keep looping to parse
1894       // other statements.
1895       SkipStatement();
1896     }
1897   } while (!TryConsumeEndOfDeclaration("}", NULL));
1898 
1899   return true;
1900 }
1901 
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1902 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1903                         DescriptorProto* containing_type, int oneof_index,
1904                         const LocationRecorder& oneof_location,
1905                         const LocationRecorder& containing_type_location,
1906                         const FileDescriptorProto* containing_file) {
1907   DO(Consume("oneof"));
1908 
1909   {
1910     LocationRecorder name_location(oneof_location,
1911                                    OneofDescriptorProto::kNameFieldNumber);
1912     DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1913   }
1914 
1915   DO(ConsumeEndOfDeclaration("{", &oneof_location));
1916 
1917   do {
1918     if (AtEnd()) {
1919       AddError("Reached end of input in oneof definition (missing '}').");
1920       return false;
1921     }
1922 
1923     if (LookingAt("option")) {
1924       LocationRecorder option_location(
1925           oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1926       if (!ParseOption(oneof_decl->mutable_options(), option_location,
1927                        containing_file, OPTION_STATEMENT)) {
1928         return false;
1929       }
1930       continue;
1931     }
1932 
1933     // Print a nice error if the user accidentally tries to place a label
1934     // on an individual member of a oneof.
1935     if (LookingAt("required") || LookingAt("optional") ||
1936         LookingAt("repeated")) {
1937       AddError(
1938           "Fields in oneofs must not have labels (required / optional "
1939           "/ repeated).");
1940       // We can continue parsing here because we understand what the user
1941       // meant.  The error report will still make parsing fail overall.
1942       input_->Next();
1943     }
1944 
1945     LocationRecorder field_location(containing_type_location,
1946                                     DescriptorProto::kFieldFieldNumber,
1947                                     containing_type->field_size());
1948 
1949     FieldDescriptorProto* field = containing_type->add_field();
1950     field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1951     field->set_oneof_index(oneof_index);
1952 
1953     if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
1954                                   containing_type_location,
1955                                   DescriptorProto::kNestedTypeFieldNumber,
1956                                   field_location, containing_file)) {
1957       // This statement failed to parse.  Skip it, but keep looping to parse
1958       // other statements.
1959       SkipStatement();
1960     }
1961   } while (!TryConsumeEndOfDeclaration("}", NULL));
1962 
1963   return true;
1964 }
1965 
1966 // -------------------------------------------------------------------
1967 // Enums
1968 
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1969 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1970                                  const LocationRecorder& enum_location,
1971                                  const FileDescriptorProto* containing_file) {
1972   DO(Consume("enum"));
1973 
1974   {
1975     LocationRecorder location(enum_location,
1976                               EnumDescriptorProto::kNameFieldNumber);
1977     location.RecordLegacyLocation(enum_type,
1978                                   DescriptorPool::ErrorCollector::NAME);
1979     DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1980   }
1981 
1982   DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1983 
1984   DO(ValidateEnum(enum_type));
1985 
1986   return true;
1987 }
1988 
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1989 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1990                             const LocationRecorder& enum_location,
1991                             const FileDescriptorProto* containing_file) {
1992   DO(ConsumeEndOfDeclaration("{", &enum_location));
1993 
1994   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1995     if (AtEnd()) {
1996       AddError("Reached end of input in enum definition (missing '}').");
1997       return false;
1998     }
1999 
2000     if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
2001       // This statement failed to parse.  Skip it, but keep looping to parse
2002       // other statements.
2003       SkipStatement();
2004     }
2005   }
2006 
2007   return true;
2008 }
2009 
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2010 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
2011                                 const LocationRecorder& enum_location,
2012                                 const FileDescriptorProto* containing_file) {
2013   if (TryConsumeEndOfDeclaration(";", NULL)) {
2014     // empty statement; ignore
2015     return true;
2016   } else if (LookingAt("option")) {
2017     LocationRecorder location(enum_location,
2018                               EnumDescriptorProto::kOptionsFieldNumber);
2019     return ParseOption(enum_type->mutable_options(), location, containing_file,
2020                        OPTION_STATEMENT);
2021   } else if (LookingAt("reserved")) {
2022     return ParseReserved(enum_type, enum_location);
2023   } else {
2024     LocationRecorder location(enum_location,
2025                               EnumDescriptorProto::kValueFieldNumber,
2026                               enum_type->value_size());
2027     return ParseEnumConstant(enum_type->add_value(), location, containing_file);
2028   }
2029 }
2030 
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2031 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
2032                                const LocationRecorder& enum_value_location,
2033                                const FileDescriptorProto* containing_file) {
2034   // Parse name.
2035   {
2036     LocationRecorder location(enum_value_location,
2037                               EnumValueDescriptorProto::kNameFieldNumber);
2038     location.RecordLegacyLocation(enum_value,
2039                                   DescriptorPool::ErrorCollector::NAME);
2040     DO(ConsumeIdentifier(enum_value->mutable_name(),
2041                          "Expected enum constant name."));
2042   }
2043 
2044   DO(Consume("=", "Missing numeric value for enum constant."));
2045 
2046   // Parse value.
2047   {
2048     LocationRecorder location(enum_value_location,
2049                               EnumValueDescriptorProto::kNumberFieldNumber);
2050     location.RecordLegacyLocation(enum_value,
2051                                   DescriptorPool::ErrorCollector::NUMBER);
2052 
2053     int number;
2054     DO(ConsumeSignedInteger(&number, "Expected integer."));
2055     enum_value->set_number(number);
2056   }
2057 
2058   DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2059                               containing_file));
2060 
2061   DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2062 
2063   return true;
2064 }
2065 
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2066 bool Parser::ParseEnumConstantOptions(
2067     EnumValueDescriptorProto* value,
2068     const LocationRecorder& enum_value_location,
2069     const FileDescriptorProto* containing_file) {
2070   if (!LookingAt("[")) return true;
2071 
2072   LocationRecorder location(enum_value_location,
2073                             EnumValueDescriptorProto::kOptionsFieldNumber);
2074 
2075   DO(Consume("["));
2076 
2077   do {
2078     DO(ParseOption(value->mutable_options(), location, containing_file,
2079                    OPTION_ASSIGNMENT));
2080   } while (TryConsume(","));
2081 
2082   DO(Consume("]"));
2083   return true;
2084 }
2085 
2086 // -------------------------------------------------------------------
2087 // Services
2088 
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2089 bool Parser::ParseServiceDefinition(
2090     ServiceDescriptorProto* service, const LocationRecorder& service_location,
2091     const FileDescriptorProto* containing_file) {
2092   DO(Consume("service"));
2093 
2094   {
2095     LocationRecorder location(service_location,
2096                               ServiceDescriptorProto::kNameFieldNumber);
2097     location.RecordLegacyLocation(service,
2098                                   DescriptorPool::ErrorCollector::NAME);
2099     DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2100   }
2101 
2102   DO(ParseServiceBlock(service, service_location, containing_file));
2103   return true;
2104 }
2105 
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2106 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2107                                const LocationRecorder& service_location,
2108                                const FileDescriptorProto* containing_file) {
2109   DO(ConsumeEndOfDeclaration("{", &service_location));
2110 
2111   while (!TryConsumeEndOfDeclaration("}", NULL)) {
2112     if (AtEnd()) {
2113       AddError("Reached end of input in service definition (missing '}').");
2114       return false;
2115     }
2116 
2117     if (!ParseServiceStatement(service, service_location, containing_file)) {
2118       // This statement failed to parse.  Skip it, but keep looping to parse
2119       // other statements.
2120       SkipStatement();
2121     }
2122   }
2123 
2124   return true;
2125 }
2126 
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2127 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2128                                    const LocationRecorder& service_location,
2129                                    const FileDescriptorProto* containing_file) {
2130   if (TryConsumeEndOfDeclaration(";", NULL)) {
2131     // empty statement; ignore
2132     return true;
2133   } else if (LookingAt("option")) {
2134     LocationRecorder location(service_location,
2135                               ServiceDescriptorProto::kOptionsFieldNumber);
2136     return ParseOption(service->mutable_options(), location, containing_file,
2137                        OPTION_STATEMENT);
2138   } else {
2139     LocationRecorder location(service_location,
2140                               ServiceDescriptorProto::kMethodFieldNumber,
2141                               service->method_size());
2142     return ParseServiceMethod(service->add_method(), location, containing_file);
2143   }
2144 }
2145 
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2146 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2147                                 const LocationRecorder& method_location,
2148                                 const FileDescriptorProto* containing_file) {
2149   DO(Consume("rpc"));
2150 
2151   {
2152     LocationRecorder location(method_location,
2153                               MethodDescriptorProto::kNameFieldNumber);
2154     location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2155     DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2156   }
2157 
2158   // Parse input type.
2159   DO(Consume("("));
2160   {
2161     if (LookingAt("stream")) {
2162       LocationRecorder location(
2163           method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2164       location.RecordLegacyLocation(method,
2165                                     DescriptorPool::ErrorCollector::OTHER);
2166       method->set_client_streaming(true);
2167       DO(Consume("stream"));
2168 
2169     }
2170     LocationRecorder location(method_location,
2171                               MethodDescriptorProto::kInputTypeFieldNumber);
2172     location.RecordLegacyLocation(method,
2173                                   DescriptorPool::ErrorCollector::INPUT_TYPE);
2174     DO(ParseUserDefinedType(method->mutable_input_type()));
2175   }
2176   DO(Consume(")"));
2177 
2178   // Parse output type.
2179   DO(Consume("returns"));
2180   DO(Consume("("));
2181   {
2182     if (LookingAt("stream")) {
2183       LocationRecorder location(
2184           method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2185       location.RecordLegacyLocation(method,
2186                                     DescriptorPool::ErrorCollector::OTHER);
2187       DO(Consume("stream"));
2188       method->set_server_streaming(true);
2189 
2190     }
2191     LocationRecorder location(method_location,
2192                               MethodDescriptorProto::kOutputTypeFieldNumber);
2193     location.RecordLegacyLocation(method,
2194                                   DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2195     DO(ParseUserDefinedType(method->mutable_output_type()));
2196   }
2197   DO(Consume(")"));
2198 
2199   if (LookingAt("{")) {
2200     // Options!
2201     DO(ParseMethodOptions(method_location, containing_file,
2202                           MethodDescriptorProto::kOptionsFieldNumber,
2203                           method->mutable_options()));
2204   } else {
2205     DO(ConsumeEndOfDeclaration(";", &method_location));
2206   }
2207 
2208   return true;
2209 }
2210 
2211 
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2212 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2213                                 const FileDescriptorProto* containing_file,
2214                                 const int optionsFieldNumber,
2215                                 Message* mutable_options) {
2216   // Options!
2217   ConsumeEndOfDeclaration("{", &parent_location);
2218   while (!TryConsumeEndOfDeclaration("}", NULL)) {
2219     if (AtEnd()) {
2220       AddError("Reached end of input in method options (missing '}').");
2221       return false;
2222     }
2223 
2224     if (TryConsumeEndOfDeclaration(";", NULL)) {
2225       // empty statement; ignore
2226     } else {
2227       LocationRecorder location(parent_location, optionsFieldNumber);
2228       if (!ParseOption(mutable_options, location, containing_file,
2229                        OPTION_STATEMENT)) {
2230         // This statement failed to parse.  Skip it, but keep looping to
2231         // parse other statements.
2232         SkipStatement();
2233       }
2234     }
2235   }
2236 
2237   return true;
2238 }
2239 
2240 // -------------------------------------------------------------------
2241 
ParseLabel(FieldDescriptorProto::Label * label,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)2242 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2243                         const LocationRecorder& field_location,
2244                         const FileDescriptorProto* containing_file) {
2245   if (!LookingAt("optional") && !LookingAt("repeated") &&
2246       !LookingAt("required")) {
2247     return false;
2248   }
2249   LocationRecorder location(field_location,
2250                             FieldDescriptorProto::kLabelFieldNumber);
2251   if (TryConsume("optional")) {
2252     *label = FieldDescriptorProto::LABEL_OPTIONAL;
2253   } else if (TryConsume("repeated")) {
2254     *label = FieldDescriptorProto::LABEL_REPEATED;
2255   } else {
2256     Consume("required");
2257     *label = FieldDescriptorProto::LABEL_REQUIRED;
2258   }
2259   return true;
2260 }
2261 
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2262 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2263                        std::string* type_name) {
2264   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2265   if (iter != kTypeNames.end()) {
2266     *type = iter->second;
2267     input_->Next();
2268   } else {
2269     DO(ParseUserDefinedType(type_name));
2270   }
2271   return true;
2272 }
2273 
ParseUserDefinedType(std::string * type_name)2274 bool Parser::ParseUserDefinedType(std::string* type_name) {
2275   type_name->clear();
2276 
2277   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2278   if (iter != kTypeNames.end()) {
2279     // Note:  The only place enum types are allowed is for field types, but
2280     //   if we are parsing a field type then we would not get here because
2281     //   primitives are allowed there as well.  So this error message doesn't
2282     //   need to account for enums.
2283     AddError("Expected message type.");
2284 
2285     // Pretend to accept this type so that we can go on parsing.
2286     *type_name = input_->current().text;
2287     input_->Next();
2288     return true;
2289   }
2290 
2291   // A leading "." means the name is fully-qualified.
2292   if (TryConsume(".")) type_name->append(".");
2293 
2294   // Consume the first part of the name.
2295   std::string identifier;
2296   DO(ConsumeIdentifier(&identifier, "Expected type name."));
2297   type_name->append(identifier);
2298 
2299   // Consume more parts.
2300   while (TryConsume(".")) {
2301     type_name->append(".");
2302     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2303     type_name->append(identifier);
2304   }
2305 
2306   return true;
2307 }
2308 
2309 // ===================================================================
2310 
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2311 bool Parser::ParsePackage(FileDescriptorProto* file,
2312                           const LocationRecorder& root_location,
2313                           const FileDescriptorProto* containing_file) {
2314   if (file->has_package()) {
2315     AddError("Multiple package definitions.");
2316     // Don't append the new package to the old one.  Just replace it.  Not
2317     // that it really matters since this is an error anyway.
2318     file->clear_package();
2319   }
2320 
2321   LocationRecorder location(root_location,
2322                             FileDescriptorProto::kPackageFieldNumber);
2323   location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2324 
2325   DO(Consume("package"));
2326 
2327   while (true) {
2328     std::string identifier;
2329     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2330     file->mutable_package()->append(identifier);
2331     if (!TryConsume(".")) break;
2332     file->mutable_package()->append(".");
2333   }
2334 
2335   DO(ConsumeEndOfDeclaration(";", &location));
2336 
2337   return true;
2338 }
2339 
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2340 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2341                          RepeatedField<int32>* public_dependency,
2342                          RepeatedField<int32>* weak_dependency,
2343                          const LocationRecorder& root_location,
2344                          const FileDescriptorProto* containing_file) {
2345   LocationRecorder location(root_location,
2346                             FileDescriptorProto::kDependencyFieldNumber,
2347                             dependency->size());
2348 
2349   DO(Consume("import"));
2350 
2351   if (LookingAt("public")) {
2352     LocationRecorder public_location(
2353         root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2354         public_dependency->size());
2355     DO(Consume("public"));
2356     *public_dependency->Add() = dependency->size();
2357   } else if (LookingAt("weak")) {
2358     LocationRecorder weak_location(
2359         root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2360         weak_dependency->size());
2361     weak_location.RecordLegacyImportLocation(containing_file, "weak");
2362     DO(Consume("weak"));
2363     *weak_dependency->Add() = dependency->size();
2364   }
2365 
2366   std::string import_file;
2367   DO(ConsumeString(&import_file,
2368                    "Expected a string naming the file to import."));
2369   *dependency->Add() = import_file;
2370   location.RecordLegacyImportLocation(containing_file, import_file);
2371 
2372   DO(ConsumeEndOfDeclaration(";", &location));
2373 
2374   return true;
2375 }
2376 
2377 // ===================================================================
2378 
SourceLocationTable()2379 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2380 SourceLocationTable::~SourceLocationTable() {}
2381 
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2382 bool SourceLocationTable::Find(
2383     const Message* descriptor,
2384     DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2385     int* column) const {
2386   const std::pair<int, int>* result =
2387       FindOrNull(location_map_, std::make_pair(descriptor, location));
2388   if (result == NULL) {
2389     *line = -1;
2390     *column = 0;
2391     return false;
2392   } else {
2393     *line = result->first;
2394     *column = result->second;
2395     return true;
2396   }
2397 }
2398 
FindImport(const Message * descriptor,const std::string & name,int * line,int * column) const2399 bool SourceLocationTable::FindImport(const Message* descriptor,
2400                                      const std::string& name, int* line,
2401                                      int* column) const {
2402   const std::pair<int, int>* result =
2403       FindOrNull(import_location_map_, std::make_pair(descriptor, name));
2404   if (result == nullptr) {
2405     *line = -1;
2406     *column = 0;
2407     return false;
2408   } else {
2409     *line = result->first;
2410     *column = result->second;
2411     return true;
2412   }
2413 }
2414 
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2415 void SourceLocationTable::Add(
2416     const Message* descriptor,
2417     DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2418     int column) {
2419   location_map_[std::make_pair(descriptor, location)] =
2420       std::make_pair(line, column);
2421 }
2422 
AddImport(const Message * descriptor,const std::string & name,int line,int column)2423 void SourceLocationTable::AddImport(const Message* descriptor,
2424                                     const std::string& name, int line,
2425                                     int column) {
2426   import_location_map_[std::make_pair(descriptor, name)] =
2427       std::make_pair(line, column);
2428 }
2429 
Clear()2430 void SourceLocationTable::Clear() { location_map_.clear(); }
2431 
2432 }  // namespace compiler
2433 }  // namespace protobuf
2434 }  // namespace google
2435