• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // Recursive descent FTW.
13 
14 #include "google/protobuf/compiler/parser.h"
15 
16 #include <float.h>
17 
18 #include <cstddef>
19 #include <cstdint>
20 #include <limits>
21 #include <string>
22 #include <tuple>
23 #include <utility>
24 #include <vector>
25 
26 #include "absl/base/casts.h"
27 #include "absl/cleanup/cleanup.h"
28 #include "absl/container/flat_hash_map.h"
29 #include "absl/container/flat_hash_set.h"
30 #include "absl/log/absl_check.h"
31 #include "absl/log/absl_log.h"
32 #include "absl/strings/ascii.h"
33 #include "absl/strings/escaping.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_format.h"
36 #include "absl/strings/string_view.h"
37 #include "google/protobuf/descriptor.h"
38 #include "google/protobuf/descriptor.pb.h"
39 #include "google/protobuf/io/strtod.h"
40 #include "google/protobuf/io/tokenizer.h"
41 #include "google/protobuf/message_lite.h"
42 #include "google/protobuf/port.h"
43 #include "google/protobuf/wire_format.h"
44 
45 // Must be included last.
46 #include "google/protobuf/port_def.inc"
47 
48 namespace google {
49 namespace protobuf {
50 namespace compiler {
51 namespace {
52 
53 using TypeNameMap =
54     absl::flat_hash_map<absl::string_view, FieldDescriptorProto::Type>;
55 
GetTypeNameTable()56 const TypeNameMap& GetTypeNameTable() {
57   static auto* table = new auto([]() {
58     TypeNameMap result;
59 
60     result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
61     result["float"] = FieldDescriptorProto::TYPE_FLOAT;
62     result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
63     result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
64     result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
65     result["bool"] = FieldDescriptorProto::TYPE_BOOL;
66     result["string"] = FieldDescriptorProto::TYPE_STRING;
67     result["group"] = FieldDescriptorProto::TYPE_GROUP;
68 
69     result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
70     result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
71     result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
72     result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
73     result["int32"] = FieldDescriptorProto::TYPE_INT32;
74     result["int64"] = FieldDescriptorProto::TYPE_INT64;
75     result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
76     result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
77 
78     return result;
79   }());
80   return *table;
81 }
82 
83 // Camel-case the field name and append "Entry" for generated map entry name.
84 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(absl::string_view field_name)85 std::string MapEntryName(absl::string_view field_name) {
86   std::string result;
87   static const char kSuffix[] = "Entry";
88   result.reserve(field_name.size() + sizeof(kSuffix));
89   bool cap_next = true;
90   for (const char field_name_char : field_name) {
91     if (field_name_char == '_') {
92       cap_next = true;
93     } else if (cap_next) {
94       // Note: Do not use ctype.h due to locales.
95       if ('a' <= field_name_char && field_name_char <= 'z') {
96         result.push_back(field_name_char - 'a' + 'A');
97       } else {
98         result.push_back(field_name_char);
99       }
100       cap_next = false;
101     } else {
102       result.push_back(field_name_char);
103     }
104   }
105   result.append(kSuffix);
106   return result;
107 }
108 
IsUppercase(char c)109 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
110 
IsLowercase(char c)111 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
112 
IsNumber(char c)113 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
114 
IsUpperCamelCase(absl::string_view name)115 bool IsUpperCamelCase(absl::string_view name) {
116   if (name.empty()) {
117     return true;
118   }
119   // Name must start with an upper case character.
120   if (!IsUppercase(name[0])) {
121     return false;
122   }
123   // Must not contains underscore.
124   for (const char c : name) {
125     if (c == '_') {
126       return false;
127     }
128   }
129   return true;
130 }
131 
IsUpperUnderscore(absl::string_view name)132 bool IsUpperUnderscore(absl::string_view name) {
133   for (const char c : name) {
134     if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
135       return false;
136     }
137   }
138   return true;
139 }
140 
IsLowerUnderscore(absl::string_view name)141 bool IsLowerUnderscore(absl::string_view name) {
142   for (const char c : name) {
143     if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
144       return false;
145     }
146   }
147   return true;
148 }
149 
IsNumberFollowUnderscore(absl::string_view name)150 bool IsNumberFollowUnderscore(absl::string_view name) {
151   for (int i = 1; i < name.length(); i++) {
152     const char c = name[i];
153     if (IsNumber(c) && name[i - 1] == '_') {
154       return true;
155     }
156   }
157   return false;
158 }
159 
160 }  // anonymous namespace
161 
162 // Makes code slightly more readable.  The meaning of "DO(foo)" is
163 // "Execute foo and fail if it fails.", where failure is indicated by
164 // returning false.
165 #define DO(STATEMENT) \
166   if (STATEMENT) {    \
167   } else              \
168     return false
169 
170 // ===================================================================
171 
Parser()172 Parser::Parser()
173     : input_(nullptr),
174       error_collector_(nullptr),
175       source_location_table_(nullptr),
176       had_errors_(false),
177       require_syntax_identifier_(false),
178       stop_after_syntax_identifier_(false) {
179 }
180 
181 Parser::~Parser() = default;
182 // ===================================================================
183 
LookingAt(absl::string_view text)184 inline bool Parser::LookingAt(absl::string_view text) {
185   return input_->current().text == text;
186 }
187 
LookingAtType(io::Tokenizer::TokenType token_type)188 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
189   return input_->current().type == token_type;
190 }
191 
AtEnd()192 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
193 
TryConsume(absl::string_view text)194 bool Parser::TryConsume(absl::string_view text) {
195   if (LookingAt(text)) {
196     input_->Next();
197     return true;
198   } else {
199     return false;
200   }
201 }
202 
Consume(absl::string_view text,ErrorMaker error)203 bool Parser::Consume(absl::string_view text, ErrorMaker error) {
204   if (TryConsume(text)) {
205     return true;
206   } else {
207     RecordError(error);
208     return false;
209   }
210 }
211 
Consume(absl::string_view text)212 bool Parser::Consume(absl::string_view text) {
213   return Consume(text,
214                  [&] { return absl::StrCat("Expected \"", text, "\"."); });
215 }
216 
ConsumeIdentifier(std::string * output,ErrorMaker error)217 bool Parser::ConsumeIdentifier(std::string* output, ErrorMaker error) {
218   if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
219     *output = input_->current().text;
220     input_->Next();
221     return true;
222   } else {
223     RecordError(error);
224     return false;
225   }
226 }
227 
ConsumeInteger(int * output,ErrorMaker error)228 bool Parser::ConsumeInteger(int* output, ErrorMaker error) {
229   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
230     uint64_t value = 0;
231     if (!io::Tokenizer::ParseInteger(input_->current().text,
232                                      std::numeric_limits<int32_t>::max(),
233                                      &value)) {
234       RecordError("Integer out of range.");
235       // We still return true because we did, in fact, parse an integer.
236     }
237     *output = value;
238     input_->Next();
239     return true;
240   } else {
241     RecordError(error);
242     return false;
243   }
244 }
245 
ConsumeSignedInteger(int * output,ErrorMaker error)246 bool Parser::ConsumeSignedInteger(int* output, ErrorMaker error) {
247   bool is_negative = false;
248   uint64_t max_value = std::numeric_limits<int32_t>::max();
249   if (TryConsume("-")) {
250     is_negative = true;
251     max_value += 1;
252   }
253   uint64_t value = 0;
254   DO(ConsumeInteger64(max_value, &value, error));
255   if (is_negative) value *= -1;
256   *output = value;
257   return true;
258 }
259 
ConsumeInteger64(uint64_t max_value,uint64_t * output,ErrorMaker error)260 bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
261                               ErrorMaker error) {
262   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
263     if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
264                                      output)) {
265       RecordError("Integer out of range.");
266       // We still return true because we did, in fact, parse an integer.
267       *output = 0;
268     }
269     input_->Next();
270     return true;
271   } else {
272     RecordError(error);
273     return false;
274   }
275 }
276 
TryConsumeInteger64(uint64_t max_value,uint64_t * output)277 bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
278   if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
279       io::Tokenizer::ParseInteger(input_->current().text, max_value, output)) {
280     input_->Next();
281     return true;
282   }
283   return false;
284 }
285 
ConsumeNumber(double * output,ErrorMaker error)286 bool Parser::ConsumeNumber(double* output, ErrorMaker error) {
287   if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
288     *output = io::Tokenizer::ParseFloat(input_->current().text);
289     input_->Next();
290     return true;
291   } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
292     // Also accept integers.
293     uint64_t value = 0;
294     if (io::Tokenizer::ParseInteger(input_->current().text,
295                                     std::numeric_limits<uint64_t>::max(),
296                                     &value)) {
297       *output = value;
298     } else if (input_->current().text[0] == '0') {
299       // octal or hexadecimal; don't bother parsing as float
300       RecordError("Integer out of range.");
301       // We still return true because we did, in fact, parse a number.
302     } else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
303       // out of int range, and not valid float? ��
304       RecordError("Integer out of range.");
305       // We still return true because we did, in fact, parse a number.
306     }
307     input_->Next();
308     return true;
309   } else if (LookingAt("inf")) {
310     *output = std::numeric_limits<double>::infinity();
311     input_->Next();
312     return true;
313   } else if (LookingAt("nan")) {
314     *output = std::numeric_limits<double>::quiet_NaN();
315     input_->Next();
316     return true;
317   } else {
318     RecordError(error);
319     return false;
320   }
321 }
322 
ConsumeString(std::string * output,ErrorMaker error)323 bool Parser::ConsumeString(std::string* output, ErrorMaker error) {
324   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
325     io::Tokenizer::ParseString(input_->current().text, output);
326     input_->Next();
327     // Allow C++ like concatenation of adjacent string tokens.
328     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
329       io::Tokenizer::ParseStringAppend(input_->current().text, output);
330       input_->Next();
331     }
332     return true;
333   } else {
334     RecordError(error);
335     return false;
336   }
337 }
338 
TryConsumeEndOfDeclaration(absl::string_view text,const LocationRecorder * location)339 bool Parser::TryConsumeEndOfDeclaration(absl::string_view text,
340                                         const LocationRecorder* location) {
341   if (LookingAt(text)) {
342     std::string leading, trailing;
343     std::vector<std::string> detached;
344     input_->NextWithComments(&trailing, &detached, &leading);
345 
346     // Save the leading comments for next time, and recall the leading comments
347     // from last time.
348     leading.swap(upcoming_doc_comments_);
349 
350     if (location != nullptr) {
351       upcoming_detached_comments_.swap(detached);
352       location->AttachComments(&leading, &trailing, &detached);
353     } else if (text == "}") {
354       // If the current location is null and we are finishing the current scope,
355       // drop pending upcoming detached comments.
356       upcoming_detached_comments_.swap(detached);
357     } else {
358       // Otherwise, append the new detached comments to the existing upcoming
359       // detached comments.
360       upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
361                                          detached.begin(), detached.end());
362     }
363 
364     return true;
365   } else {
366     return false;
367   }
368 }
369 
ConsumeEndOfDeclaration(absl::string_view text,const LocationRecorder * location)370 bool Parser::ConsumeEndOfDeclaration(absl::string_view text,
371                                      const LocationRecorder* location) {
372   if (TryConsumeEndOfDeclaration(text, location)) {
373     return true;
374   } else {
375     RecordError([&] { return absl::StrCat("Expected \"", text, "\"."); });
376     return false;
377   }
378 }
379 
380 // -------------------------------------------------------------------
381 
RecordError(int line,int column,ErrorMaker error)382 void Parser::RecordError(int line, int column, ErrorMaker error) {
383   if (error_collector_ != nullptr) {
384     error_collector_->RecordError(line, column, error.get());
385   }
386   had_errors_ = true;
387 }
388 
RecordError(ErrorMaker error)389 void Parser::RecordError(ErrorMaker error) {
390   RecordError(input_->current().line, input_->current().column, error);
391 }
392 
RecordWarning(int line,int column,ErrorMaker error)393 void Parser::RecordWarning(int line, int column, ErrorMaker error) {
394   if (error_collector_ != nullptr) {
395     error_collector_->RecordWarning(line, column, error.get());
396   }
397 }
398 
399 // Invokes error_collector_->RecordWarning() with the line and column number
400 // of the current token.
RecordWarning(ErrorMaker error)401 void Parser::RecordWarning(ErrorMaker error) {
402   RecordWarning(input_->current().line, input_->current().column, error);
403 }
404 
405 // -------------------------------------------------------------------
406 
LocationRecorder(Parser * parser)407 Parser::LocationRecorder::LocationRecorder(Parser* parser)
408     : parser_(parser),
409       source_code_info_(parser->source_code_info_),
410       location_(parser_->source_code_info_->add_location()) {
411   location_->add_span(parser_->input_->current().line);
412   location_->add_span(parser_->input_->current().column);
413 }
414 
LocationRecorder(const LocationRecorder & parent)415 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
416   Init(parent, parent.source_code_info_);
417 }
418 
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)419 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
420                                            int path1,
421                                            SourceCodeInfo* source_code_info) {
422   Init(parent, source_code_info);
423   AddPath(path1);
424 }
425 
LocationRecorder(const LocationRecorder & parent,int path1)426 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
427                                            int path1) {
428   Init(parent, parent.source_code_info_);
429   AddPath(path1);
430 }
431 
LocationRecorder(const LocationRecorder & parent,int path1,int path2)432 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
433                                            int path1, int path2) {
434   Init(parent, parent.source_code_info_);
435   AddPath(path1);
436   AddPath(path2);
437 }
438 
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)439 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
440                                     SourceCodeInfo* source_code_info) {
441   parser_ = parent.parser_;
442   source_code_info_ = source_code_info;
443 
444   location_ = source_code_info_->add_location();
445   location_->mutable_path()->CopyFrom(parent.location_->path());
446 
447   location_->add_span(parser_->input_->current().line);
448   location_->add_span(parser_->input_->current().column);
449 }
450 
~LocationRecorder()451 Parser::LocationRecorder::~LocationRecorder() {
452   if (location_->span_size() <= 2) {
453     EndAt(parser_->input_->previous());
454   }
455 }
456 
AddPath(int path_component)457 void Parser::LocationRecorder::AddPath(int path_component) {
458   location_->add_path(path_component);
459 }
460 
StartAt(const io::Tokenizer::Token & token)461 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
462   location_->set_span(0, token.line);
463   location_->set_span(1, token.column);
464 }
465 
StartAt(const LocationRecorder & other)466 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
467   location_->set_span(0, other.location_->span(0));
468   location_->set_span(1, other.location_->span(1));
469 }
470 
EndAt(const io::Tokenizer::Token & token)471 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
472   if (token.line != location_->span(0)) {
473     location_->add_span(token.line);
474   }
475   location_->add_span(token.end_column);
476 }
477 
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)478 void Parser::LocationRecorder::RecordLegacyLocation(
479     const Message* descriptor,
480     DescriptorPool::ErrorCollector::ErrorLocation location) {
481   if (parser_->source_location_table_ != nullptr) {
482     parser_->source_location_table_->Add(
483         descriptor, location, location_->span(0), location_->span(1));
484   }
485 }
486 
RecordLegacyImportLocation(const Message * descriptor,const std::string & name)487 void Parser::LocationRecorder::RecordLegacyImportLocation(
488     const Message* descriptor, const std::string& name) {
489   if (parser_->source_location_table_ != nullptr) {
490     parser_->source_location_table_->AddImport(
491         descriptor, name, location_->span(0), location_->span(1));
492   }
493 }
494 
CurrentPathSize() const495 int Parser::LocationRecorder::CurrentPathSize() const {
496   return location_->path_size();
497 }
498 
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const499 void Parser::LocationRecorder::AttachComments(
500     std::string* leading, std::string* trailing,
501     std::vector<std::string>* detached_comments) const {
502   ABSL_CHECK(!location_->has_leading_comments());
503   ABSL_CHECK(!location_->has_trailing_comments());
504 
505   if (!leading->empty()) {
506     location_->mutable_leading_comments()->swap(*leading);
507   }
508   if (!trailing->empty()) {
509     location_->mutable_trailing_comments()->swap(*trailing);
510   }
511   for (int i = 0; i < detached_comments->size(); ++i) {
512     location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
513   }
514   detached_comments->clear();
515 }
516 
517 // -------------------------------------------------------------------
518 
SkipStatement()519 void Parser::SkipStatement() {
520   while (true) {
521     if (AtEnd()) {
522       return;
523     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
524       if (TryConsumeEndOfDeclaration(";", nullptr)) {
525         return;
526       } else if (TryConsume("{")) {
527         SkipRestOfBlock();
528         return;
529       } else if (LookingAt("}")) {
530         return;
531       }
532     }
533     input_->Next();
534   }
535 }
536 
SkipRestOfBlock()537 void Parser::SkipRestOfBlock() {
538   size_t block_count = 1;
539   while (true) {
540     if (AtEnd()) {
541       return;
542     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
543       if (TryConsumeEndOfDeclaration("}", nullptr)) {
544         if (--block_count == 0) break;
545       } else if (TryConsume("{")) {
546         ++block_count;
547       }
548     }
549     input_->Next();
550   }
551 }
552 
553 // ===================================================================
554 
ValidateMessage(const DescriptorProto * proto)555 bool Parser::ValidateMessage(const DescriptorProto* proto) {
556   for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
557     const UninterpretedOption& option =
558         proto->options().uninterpreted_option(i);
559     if (option.name_size() > 0 && !option.name(0).is_extension() &&
560         option.name(0).name_part() == "map_entry") {
561       int line = -1, col = 0;  // indicates line and column not known
562       if (source_location_table_ != nullptr) {
563         source_location_table_->Find(
564             &option, DescriptorPool::ErrorCollector::OPTION_NAME, &line, &col);
565       }
566       RecordError(line, col,
567                   "map_entry should not be set explicitly. "
568                   "Use map<KeyType, ValueType> instead.");
569       return false;
570     }
571   }
572   return true;
573 }
574 
ValidateEnum(const EnumDescriptorProto * proto)575 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
576   bool has_allow_alias = false;
577   bool allow_alias = false;
578 
579   for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
580     const UninterpretedOption option = proto->options().uninterpreted_option(i);
581     if (option.name_size() > 1) {
582       continue;
583     }
584     if (!option.name(0).is_extension() &&
585         option.name(0).name_part() == "allow_alias") {
586       has_allow_alias = true;
587       if (option.identifier_value() == "true") {
588         allow_alias = true;
589       }
590       break;
591     }
592   }
593 
594   if (has_allow_alias && !allow_alias) {
595     // This needlessly clutters declarations with nops.
596     RecordError([=] {
597       return absl::StrCat(
598           "\"", proto->name(),
599           "\" declares 'option allow_alias = false;' which has no effect. "
600           "Please remove the declaration.");
601     });
602     return false;
603   }
604 
605   absl::flat_hash_set<int> used_values;
606   bool has_duplicates = false;
607   for (int i = 0; i < proto->value_size(); ++i) {
608     const EnumValueDescriptorProto& enum_value = proto->value(i);
609     if (used_values.find(enum_value.number()) != used_values.end()) {
610       has_duplicates = true;
611       break;
612     } else {
613       used_values.insert(enum_value.number());
614     }
615   }
616   if (allow_alias && !has_duplicates) {
617     // Generate an error if an enum declares support for duplicate enum values
618     // and does not use it protect future authors.
619     RecordError([=] {
620       return absl::StrCat(
621           "\"", proto->name(),
622           "\" declares support for enum aliases but no enum values share field "
623           "numbers. Please remove the unnecessary 'option allow_alias = true;' "
624           "declaration.");
625     });
626     return false;
627   }
628 
629   // Enforce that enum constants must be UPPER_CASE except in case of
630   // enum_alias.
631   if (!allow_alias) {
632     for (const auto& enum_value : proto->value()) {
633       if (!IsUpperUnderscore(enum_value.name())) {
634         RecordWarning([&] {
635           return absl::StrCat(
636               "Enum constant should be in UPPER_CASE. Found: ",
637               enum_value.name(),
638               ". See "
639               "https://developers.google.com/protocol-buffers/docs/style");
640         });
641       }
642     }
643   }
644 
645   return true;
646 }
647 
Parse(io::Tokenizer * input,FileDescriptorProto * file)648 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
649   input_ = input;
650   had_errors_ = false;
651   syntax_identifier_.clear();
652 
653   // Note that |file| could be NULL at this point if
654   // stop_after_syntax_identifier_ is true.  So, we conservatively allocate
655   // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
656   // later on.
657   SourceCodeInfo source_code_info;
658   source_code_info_ = &source_code_info;
659 
660   if (LookingAtType(io::Tokenizer::TYPE_START)) {
661     // Advance to first token.
662     input_->NextWithComments(nullptr, &upcoming_detached_comments_,
663                              &upcoming_doc_comments_);
664   }
665 
666   {
667     LocationRecorder root_location(this);
668     root_location.RecordLegacyLocation(file,
669                                        DescriptorPool::ErrorCollector::OTHER);
670 
671     if (require_syntax_identifier_ || LookingAt("syntax") ||
672         LookingAt("edition")) {
673       if (!ParseSyntaxIdentifier(file, root_location)) {
674         // Don't attempt to parse the file if we didn't recognize the syntax
675         // identifier.
676         return false;
677       }
678       // Store the syntax into the file.
679       if (file != nullptr) {
680         file->set_syntax(syntax_identifier_);
681         if (syntax_identifier_ == "editions") {
682           file->set_edition(edition_);
683         }
684       }
685     } else if (!stop_after_syntax_identifier_) {
686       ABSL_LOG(WARNING) << "No syntax specified for the proto file: "
687                         << file->name()
688                         << ". Please use 'syntax = \"proto2\";' "
689                         << "or 'syntax = \"proto3\";' to specify a syntax "
690                         << "version. (Defaulted to proto2 syntax.)";
691       syntax_identifier_ = "proto2";
692     }
693 
694     if (stop_after_syntax_identifier_) return !had_errors_;
695 
696     // Repeatedly parse statements until we reach the end of the file.
697     while (!AtEnd()) {
698       if (!ParseTopLevelStatement(file, root_location)) {
699         // This statement failed to parse.  Skip it, but keep looping to parse
700         // other statements.
701         SkipStatement();
702 
703         if (LookingAt("}")) {
704           RecordError("Unmatched \"}\".");
705           input_->NextWithComments(nullptr, &upcoming_detached_comments_,
706                                    &upcoming_doc_comments_);
707         }
708       }
709     }
710   }
711 
712   input_ = nullptr;
713   source_code_info_ = nullptr;
714   assert(file != nullptr);
715   source_code_info.Swap(file->mutable_source_code_info());
716   return !had_errors_;
717 }
718 
ParseSyntaxIdentifier(const FileDescriptorProto * file,const LocationRecorder & parent)719 bool Parser::ParseSyntaxIdentifier(const FileDescriptorProto* file,
720                                    const LocationRecorder& parent) {
721   LocationRecorder syntax_location(parent,
722                                    FileDescriptorProto::kSyntaxFieldNumber);
723   syntax_location.RecordLegacyLocation(
724       file, DescriptorPool::ErrorCollector::EDITIONS);
725   bool has_edition = false;
726   if (TryConsume("edition")) {
727     has_edition = true;
728   } else {
729     DO(Consume("syntax",
730                "File must begin with a syntax statement, e.g. 'syntax = "
731                "\"proto2\";'."));
732   }
733 
734   DO(Consume("="));
735   io::Tokenizer::Token syntax_token = input_->current();
736   std::string syntax;
737   DO(ConsumeString(&syntax, "Expected syntax identifier."));
738   DO(ConsumeEndOfDeclaration(";", &syntax_location));
739 
740   if (has_edition) {
741     if (!Edition_Parse(absl::StrCat("EDITION_", syntax), &edition_) ||
742         edition_ == Edition::EDITION_PROTO2 ||
743         edition_ == Edition::EDITION_PROTO3 ||
744         edition_ == Edition::EDITION_UNKNOWN) {
745       RecordError(syntax_token.line, syntax_token.column, [&] {
746         return absl::StrCat("Unknown edition \"", syntax, "\".");
747       });
748       return false;
749     }
750     syntax_identifier_ = "editions";
751     return true;
752   }
753 
754   syntax_identifier_ = syntax;
755   if (syntax != "proto2" && syntax != "proto3" &&
756       !stop_after_syntax_identifier_) {
757     RecordError(syntax_token.line, syntax_token.column, [&] {
758       return absl::StrCat("Unrecognized syntax identifier \"", syntax,
759                           "\".  This parser "
760                           "only recognizes \"proto2\" and \"proto3\".");
761     });
762     return false;
763   }
764 
765   return true;
766 }
767 
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)768 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
769                                     const LocationRecorder& root_location) {
770   if (TryConsumeEndOfDeclaration(";", nullptr)) {
771     // empty statement; ignore
772     return true;
773   } else if (LookingAt("message")) {
774     LocationRecorder location(root_location,
775                               FileDescriptorProto::kMessageTypeFieldNumber,
776                               file->message_type_size());
777     // Maximum depth allowed by the DescriptorPool.
778     recursion_depth_ = internal::cpp::MaxMessageDeclarationNestingDepth();
779     return ParseMessageDefinition(file->add_message_type(), location, file);
780   } else if (LookingAt("enum")) {
781     LocationRecorder location(root_location,
782                               FileDescriptorProto::kEnumTypeFieldNumber,
783                               file->enum_type_size());
784     return ParseEnumDefinition(file->add_enum_type(), location, file);
785   } else if (LookingAt("service")) {
786     LocationRecorder location(root_location,
787                               FileDescriptorProto::kServiceFieldNumber,
788                               file->service_size());
789     return ParseServiceDefinition(file->add_service(), location, file);
790   } else if (LookingAt("extend")) {
791     LocationRecorder location(root_location,
792                               FileDescriptorProto::kExtensionFieldNumber);
793     return ParseExtend(
794         file->mutable_extension(), file->mutable_message_type(), root_location,
795         FileDescriptorProto::kMessageTypeFieldNumber, location, file);
796   } else if (LookingAt("import")) {
797     return ParseImport(file->mutable_dependency(),
798                        file->mutable_public_dependency(),
799                        file->mutable_weak_dependency(), root_location, file);
800   } else if (LookingAt("package")) {
801     return ParsePackage(file, root_location, file);
802   } else if (LookingAt("option")) {
803     LocationRecorder location(root_location,
804                               FileDescriptorProto::kOptionsFieldNumber);
805     return ParseOption(file->mutable_options(), location, file,
806                        OPTION_STATEMENT);
807   } else {
808     RecordError("Expected top-level statement (e.g. \"message\").");
809     return false;
810   }
811 }
812 
813 // -------------------------------------------------------------------
814 // Messages
815 
GenerateSyntheticOneofs(DescriptorProto * message)816 PROTOBUF_NOINLINE static void GenerateSyntheticOneofs(
817     DescriptorProto* message) {
818   // Add synthetic one-field oneofs for optional fields, except messages which
819   // already have presence in proto3.
820   //
821   // We have to make sure the oneof names don't conflict with any other
822   // field or oneof.
823   absl::flat_hash_set<std::string> names;
824   for (const auto& field : message->field()) {
825     names.insert(field.name());
826   }
827   for (const auto& oneof : message->oneof_decl()) {
828     names.insert(oneof.name());
829   }
830 
831   for (auto& field : *message->mutable_field()) {
832     if (field.proto3_optional()) {
833       std::string oneof_name = field.name();
834 
835       // Prepend 'XXXXX_' until we are no longer conflicting.
836       // Avoid prepending a double-underscore because such names are
837       // reserved in C++.
838       if (oneof_name.empty() || oneof_name[0] != '_') {
839         oneof_name.insert(0, "_");
840       }
841       while (names.count(oneof_name) > 0) {
842         oneof_name.insert(0, "X");
843       }
844 
845       names.insert(oneof_name);
846       field.set_oneof_index(message->oneof_decl_size());
847       OneofDescriptorProto* oneof = message->add_oneof_decl();
848       oneof->set_name(std::move(oneof_name));
849     }
850   }
851 }
852 
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)853 bool Parser::ParseMessageDefinition(
854     DescriptorProto* message, const LocationRecorder& message_location,
855     const FileDescriptorProto* containing_file) {
856   const auto undo_depth = absl::MakeCleanup([&] { ++recursion_depth_; });
857   if (--recursion_depth_ <= 0) {
858     RecordError("Reached maximum recursion limit for nested messages.");
859     return false;
860   }
861 
862   DO(Consume("message"));
863   {
864     LocationRecorder location(message_location,
865                               DescriptorProto::kNameFieldNumber);
866     location.RecordLegacyLocation(message,
867                                   DescriptorPool::ErrorCollector::NAME);
868     DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
869     if (!IsUpperCamelCase(message->name())) {
870       RecordWarning([=] {
871         return absl::StrCat(
872             "Message name should be in UpperCamelCase. Found: ",
873             message->name(),
874             ". See https://developers.google.com/protocol-buffers/docs/style");
875       });
876     }
877   }
878   DO(ParseMessageBlock(message, message_location, containing_file));
879 
880   if (syntax_identifier_ == "proto3") {
881     GenerateSyntheticOneofs(message);
882   }
883 
884   return true;
885 }
886 
887 namespace {
888 
889 const int kMaxRangeSentinel = -1;
890 
IsMessageSetWireFormatMessage(const DescriptorProto & message)891 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
892   const MessageOptions& options = message.options();
893   for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
894     const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
895     if (uninterpreted.name_size() == 1 &&
896         !uninterpreted.name(0).is_extension() &&
897         uninterpreted.name(0).name_part() == "message_set_wire_format" &&
898         uninterpreted.identifier_value() == "true") {
899       return true;
900     }
901   }
902   return false;
903 }
904 
905 // Modifies any extension ranges that specified 'max' as the end of the
906 // extension range, and sets them to the type-specific maximum. The actual max
907 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)908 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
909   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
910   const int max_extension_number = is_message_set
911                                        ? std::numeric_limits<int32_t>::max()
912                                        : FieldDescriptor::kMaxNumber + 1;
913   for (int i = 0; i < message->extension_range_size(); ++i) {
914     if (message->extension_range(i).end() == kMaxRangeSentinel) {
915       message->mutable_extension_range(i)->set_end(max_extension_number);
916     }
917   }
918 }
919 
920 // Modifies any reserved ranges that specified 'max' as the end of the
921 // reserved range, and sets them to the type-specific maximum. The actual max
922 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)923 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
924   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
925   const int max_field_number = is_message_set
926                                    ? std::numeric_limits<int32_t>::max()
927                                    : FieldDescriptor::kMaxNumber + 1;
928   for (int i = 0; i < message->reserved_range_size(); ++i) {
929     if (message->reserved_range(i).end() == kMaxRangeSentinel) {
930       message->mutable_reserved_range(i)->set_end(max_field_number);
931     }
932   }
933 }
934 
935 }  // namespace
936 
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)937 bool Parser::ParseMessageBlock(DescriptorProto* message,
938                                const LocationRecorder& message_location,
939                                const FileDescriptorProto* containing_file) {
940   DO(ConsumeEndOfDeclaration("{", &message_location));
941 
942   while (!TryConsumeEndOfDeclaration("}", nullptr)) {
943     if (AtEnd()) {
944       RecordError("Reached end of input in message definition (missing '}').");
945       return false;
946     }
947 
948     if (!ParseMessageStatement(message, message_location, containing_file)) {
949       // This statement failed to parse.  Skip it, but keep looping to parse
950       // other statements.
951       SkipStatement();
952     }
953   }
954 
955   if (message->extension_range_size() > 0) {
956     AdjustExtensionRangesWithMaxEndNumber(message);
957   }
958   if (message->reserved_range_size() > 0) {
959     AdjustReservedRangesWithMaxEndNumber(message);
960   }
961 
962   DO(ValidateMessage(message));
963 
964   return true;
965 }
966 
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)967 bool Parser::ParseMessageStatement(DescriptorProto* message,
968                                    const LocationRecorder& message_location,
969                                    const FileDescriptorProto* containing_file) {
970   if (TryConsumeEndOfDeclaration(";", nullptr)) {
971     // empty statement; ignore
972     return true;
973   } else if (LookingAt("message")) {
974     LocationRecorder location(message_location,
975                               DescriptorProto::kNestedTypeFieldNumber,
976                               message->nested_type_size());
977     return ParseMessageDefinition(message->add_nested_type(), location,
978                                   containing_file);
979   } else if (LookingAt("enum")) {
980     LocationRecorder location(message_location,
981                               DescriptorProto::kEnumTypeFieldNumber,
982                               message->enum_type_size());
983     return ParseEnumDefinition(message->add_enum_type(), location,
984                                containing_file);
985   } else if (LookingAt("extensions")) {
986     LocationRecorder location(message_location,
987                               DescriptorProto::kExtensionRangeFieldNumber);
988     return ParseExtensions(message, location, containing_file);
989   } else if (LookingAt("reserved")) {
990     return ParseReserved(message, message_location);
991   } else if (LookingAt("extend")) {
992     LocationRecorder location(message_location,
993                               DescriptorProto::kExtensionFieldNumber);
994     return ParseExtend(message->mutable_extension(),
995                        message->mutable_nested_type(), message_location,
996                        DescriptorProto::kNestedTypeFieldNumber, location,
997                        containing_file);
998   } else if (LookingAt("option")) {
999     LocationRecorder location(message_location,
1000                               DescriptorProto::kOptionsFieldNumber);
1001     return ParseOption(message->mutable_options(), location, containing_file,
1002                        OPTION_STATEMENT);
1003   } else if (LookingAt("oneof")) {
1004     int oneof_index = message->oneof_decl_size();
1005     LocationRecorder oneof_location(
1006         message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
1007 
1008     return ParseOneof(message->add_oneof_decl(), message, oneof_index,
1009                       oneof_location, message_location, containing_file);
1010   } else {
1011     LocationRecorder location(message_location,
1012                               DescriptorProto::kFieldFieldNumber,
1013                               message->field_size());
1014     return ParseMessageField(
1015         message->add_field(), message->mutable_nested_type(), message_location,
1016         DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
1017   }
1018 }
1019 
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1020 bool Parser::ParseMessageField(FieldDescriptorProto* field,
1021                                RepeatedPtrField<DescriptorProto>* messages,
1022                                const LocationRecorder& parent_location,
1023                                int location_field_number_for_nested_type,
1024                                const LocationRecorder& field_location,
1025                                const FileDescriptorProto* containing_file) {
1026   {
1027     FieldDescriptorProto::Label label;
1028     if (ParseLabel(&label, field_location)) {
1029       field->set_label(label);
1030       if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
1031           syntax_identifier_ == "proto3") {
1032         field->set_proto3_optional(true);
1033       }
1034     }
1035   }
1036 
1037   return ParseMessageFieldNoLabel(field, messages, parent_location,
1038                                   location_field_number_for_nested_type,
1039                                   field_location, containing_file);
1040 }
1041 
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1042 bool Parser::ParseMessageFieldNoLabel(
1043     FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
1044     const LocationRecorder& parent_location,
1045     int location_field_number_for_nested_type,
1046     const LocationRecorder& field_location,
1047     const FileDescriptorProto* containing_file) {
1048   MapField map_field;
1049   // Parse type.
1050   {
1051     LocationRecorder location(field_location);  // add path later
1052     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
1053 
1054     bool type_parsed = false;
1055     FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
1056     std::string type_name;
1057 
1058     // Special case map field. We only treat the field as a map field if the
1059     // field type name starts with the word "map" with a following "<".
1060     if (TryConsume("map")) {
1061       if (LookingAt("<")) {
1062         map_field.is_map_field = true;
1063         DO(ParseMapType(&map_field, field, location));
1064       } else {
1065         // False positive
1066         type_parsed = true;
1067         type_name = "map";
1068       }
1069     }
1070     if (!map_field.is_map_field) {
1071       // Handle the case where no explicit label is given for a non-map field.
1072       if (!field->has_label() && DefaultToOptionalFields()) {
1073         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1074       }
1075       if (!field->has_label()) {
1076         RecordError("Expected \"required\", \"optional\", or \"repeated\".");
1077         // We can actually reasonably recover here by just assuming the user
1078         // forgot the label altogether.
1079         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1080       }
1081 
1082       // Handle the case where the actual type is a message or enum named
1083       // "map", which we already consumed in the code above.
1084       if (!type_parsed) {
1085         DO(ParseType(&type, &type_name));
1086       }
1087       if (type_name.empty()) {
1088         location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
1089         field->set_type(type);
1090       } else {
1091         location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1092         field->set_type_name(type_name);
1093       }
1094     }
1095   }
1096 
1097   // Parse name and '='.
1098   io::Tokenizer::Token name_token = input_->current();
1099   {
1100     LocationRecorder location(field_location,
1101                               FieldDescriptorProto::kNameFieldNumber);
1102     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1103     DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1104 
1105     if (!IsLowerUnderscore(field->name())) {
1106       RecordWarning([=] {
1107         return absl::StrCat(
1108             "Field name should be lowercase. Found: ", field->name(),
1109             ". See: https://developers.google.com/protocol-buffers/docs/style");
1110       });
1111     }
1112     if (IsNumberFollowUnderscore(field->name())) {
1113       RecordWarning([=] {
1114         return absl::StrCat(
1115             "Number should not come right after an underscore. Found: ",
1116             field->name(),
1117             ". See: https://developers.google.com/protocol-buffers/docs/style");
1118       });
1119     }
1120   }
1121   DO(Consume("=", "Missing field number."));
1122 
1123   // Parse field number.
1124   {
1125     LocationRecorder location(field_location,
1126                               FieldDescriptorProto::kNumberFieldNumber);
1127     location.RecordLegacyLocation(field,
1128                                   DescriptorPool::ErrorCollector::NUMBER);
1129     int number;
1130     DO(ConsumeInteger(&number, "Expected field number."));
1131     field->set_number(number);
1132   }
1133 
1134   // Parse options.
1135   DO(ParseFieldOptions(field, field_location, containing_file));
1136 
1137   // Deal with groups.
1138   if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1139     // Awkward:  Since a group declares both a message type and a field, we
1140     //   have to create overlapping locations.
1141     LocationRecorder group_location(parent_location);
1142     group_location.StartAt(field_location);
1143     group_location.AddPath(location_field_number_for_nested_type);
1144     group_location.AddPath(messages->size());
1145 
1146     DescriptorProto* group = messages->Add();
1147     group->set_name(field->name());
1148 
1149     // Record name location to match the field name's location.
1150     {
1151       LocationRecorder location(group_location,
1152                                 DescriptorProto::kNameFieldNumber);
1153       location.StartAt(name_token);
1154       location.EndAt(name_token);
1155       location.RecordLegacyLocation(group,
1156                                     DescriptorPool::ErrorCollector::NAME);
1157     }
1158 
1159     // The field's type_name also comes from the name.  Confusing!
1160     {
1161       LocationRecorder location(field_location,
1162                                 FieldDescriptorProto::kTypeNameFieldNumber);
1163       location.StartAt(name_token);
1164       location.EndAt(name_token);
1165     }
1166 
1167     // As a hack for backwards-compatibility, we force the group name to start
1168     // with a capital letter and lower-case the field name.  New code should
1169     // not use groups; it should use nested messages.
1170     if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1171       RecordError(name_token.line, name_token.column,
1172                   "Group names must start with a capital letter.");
1173     }
1174     absl::AsciiStrToLower(field->mutable_name());
1175 
1176     field->set_type_name(group->name());
1177     if (LookingAt("{")) {
1178       DO(ParseMessageBlock(group, group_location, containing_file));
1179     } else {
1180       RecordError("Missing group body.");
1181       return false;
1182     }
1183   } else {
1184     DO(ConsumeEndOfDeclaration(";", &field_location));
1185   }
1186 
1187   // Create a map entry type if this is a map field.
1188   if (map_field.is_map_field) {
1189     GenerateMapEntry(map_field, field, messages);
1190   }
1191 
1192   return true;
1193 }
1194 
ParseMapType(MapField * map_field,FieldDescriptorProto * field,LocationRecorder & type_name_location)1195 bool Parser::ParseMapType(MapField* map_field, FieldDescriptorProto* field,
1196                           LocationRecorder& type_name_location) {
1197   if (field->has_oneof_index()) {
1198     RecordError("Map fields are not allowed in oneofs.");
1199     return false;
1200   }
1201   if (field->has_label()) {
1202     RecordError(
1203         "Field labels (required/optional/repeated) are not allowed on "
1204         "map fields.");
1205     return false;
1206   }
1207   if (field->has_extendee()) {
1208     RecordError("Map fields are not allowed to be extensions.");
1209     return false;
1210   }
1211   field->set_label(FieldDescriptorProto::LABEL_REPEATED);
1212   DO(Consume("<"));
1213   DO(ParseType(&map_field->key_type, &map_field->key_type_name));
1214   DO(Consume(","));
1215   DO(ParseType(&map_field->value_type, &map_field->value_type_name));
1216   DO(Consume(">"));
1217   // Defer setting of the type name of the map field until the
1218   // field name is parsed. Add the source location though.
1219   type_name_location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1220   return true;
1221 }
1222 
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1223 void Parser::GenerateMapEntry(const MapField& map_field,
1224                               FieldDescriptorProto* field,
1225                               RepeatedPtrField<DescriptorProto>* messages) {
1226   DescriptorProto* entry = messages->Add();
1227   std::string entry_name = MapEntryName(field->name());
1228   field->set_type_name(entry_name);
1229   entry->set_name(entry_name);
1230   entry->mutable_options()->set_map_entry(true);
1231   FieldDescriptorProto* key_field = entry->add_field();
1232   key_field->set_name("key");
1233   key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1234   key_field->set_number(1);
1235   if (map_field.key_type_name.empty()) {
1236     key_field->set_type(map_field.key_type);
1237   } else {
1238     key_field->set_type_name(map_field.key_type_name);
1239   }
1240   FieldDescriptorProto* value_field = entry->add_field();
1241   value_field->set_name("value");
1242   value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1243   value_field->set_number(2);
1244   if (map_field.value_type_name.empty()) {
1245     value_field->set_type(map_field.value_type);
1246   } else {
1247     value_field->set_type_name(map_field.value_type_name);
1248   }
1249   // Propagate all features to the generated key and value fields. This helps
1250   // simplify the implementation of code generators and also reflection-based
1251   // parsing code. Instead of having to implement complex inheritance rules
1252   // special-casing maps, we can just copy them at generation time.
1253   //
1254   // The following definition:
1255   //   message Foo {
1256   //     map<string, string> value = 1 [features.some_feature = VALUE];
1257   //   }
1258   // will be interpreted as:
1259   //   message Foo {
1260   //     message ValueEntry {
1261   //       option map_entry = true;
1262   //       string key = 1 [features.some_feature = VALUE];
1263   //       string value = 2 [features.some_feature = VALUE];
1264   //     }
1265   //     repeated ValueEntry value = 1 [features.some_feature = VALUE];
1266   //  }
1267   for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1268     const UninterpretedOption& option =
1269         field->options().uninterpreted_option(i);
1270     // Legacy handling for the `enforce_utf8` option, which bears a striking
1271     // similarity to features in many respects.
1272     // TODO Delete this once proto2/proto3 have been turned down.
1273     if (option.name_size() == 1 &&
1274         option.name(0).name_part() == "enforce_utf8" &&
1275         !option.name(0).is_extension()) {
1276       if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1277         *key_field->mutable_options()->add_uninterpreted_option() = option;
1278       }
1279       if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1280         *value_field->mutable_options()->add_uninterpreted_option() = option;
1281       }
1282     }
1283     if (option.name(0).name_part() == "features" &&
1284         !option.name(0).is_extension()) {
1285       *key_field->mutable_options()->add_uninterpreted_option() = option;
1286       *value_field->mutable_options()->add_uninterpreted_option() = option;
1287     }
1288   }
1289 }
1290 
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1291 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1292                                const LocationRecorder& field_location,
1293                                const FileDescriptorProto* containing_file) {
1294   if (!LookingAt("[")) return true;
1295 
1296   LocationRecorder location(field_location,
1297                             FieldDescriptorProto::kOptionsFieldNumber);
1298 
1299   DO(Consume("["));
1300 
1301   // Parse field options.
1302   do {
1303     if (LookingAt("default")) {
1304       // We intentionally pass field_location rather than location here, since
1305       // the default value is not actually an option.
1306       DO(ParseDefaultAssignment(field, field_location, containing_file));
1307     } else if (LookingAt("json_name")) {
1308       // Like default value, this "json_name" is not an actual option.
1309       DO(ParseJsonName(field, field_location, containing_file));
1310     } else {
1311       DO(ParseOption(field->mutable_options(), location, containing_file,
1312                      OPTION_ASSIGNMENT));
1313     }
1314   } while (TryConsume(","));
1315 
1316   DO(Consume("]"));
1317   return true;
1318 }
1319 
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1320 bool Parser::ParseDefaultAssignment(
1321     FieldDescriptorProto* field, const LocationRecorder& field_location,
1322     const FileDescriptorProto* containing_file) {
1323   if (field->has_default_value()) {
1324     RecordError("Already set option \"default\".");
1325     field->clear_default_value();
1326   }
1327 
1328   DO(Consume("default"));
1329   DO(Consume("="));
1330 
1331   LocationRecorder location(field_location,
1332                             FieldDescriptorProto::kDefaultValueFieldNumber);
1333   location.RecordLegacyLocation(field,
1334                                 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1335   std::string* default_value = field->mutable_default_value();
1336 
1337   if (!field->has_type()) {
1338     // The field has a type name, but we don't know if it is a message or an
1339     // enum yet. (If it were a primitive type, |field| would have a type set
1340     // already.) In this case, simply take the current string as the default
1341     // value; we will catch the error later if it is not a valid enum value.
1342     // (N.B. that we do not check whether the current token is an identifier:
1343     // doing so throws strange errors when the user mistypes a primitive
1344     // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1345     // = 42]". In such a case the fundamental error is really that "int" is not
1346     // a type, not that "42" is not an identifier. See b/12533582.)
1347     *default_value = input_->current().text;
1348     input_->Next();
1349     return true;
1350   }
1351 
1352   switch (field->type()) {
1353     case FieldDescriptorProto::TYPE_INT32:
1354     case FieldDescriptorProto::TYPE_INT64:
1355     case FieldDescriptorProto::TYPE_SINT32:
1356     case FieldDescriptorProto::TYPE_SINT64:
1357     case FieldDescriptorProto::TYPE_SFIXED32:
1358     case FieldDescriptorProto::TYPE_SFIXED64: {
1359       uint64_t max_value = std::numeric_limits<int64_t>::max();
1360       if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1361           field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1362           field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1363         max_value = std::numeric_limits<int32_t>::max();
1364       }
1365 
1366       // These types can be negative.
1367       if (TryConsume("-")) {
1368         default_value->append("-");
1369         // Two's complement always has one more negative value than positive.
1370         ++max_value;
1371       }
1372       // Parse the integer to verify that it is not out-of-range.
1373       uint64_t value;
1374       DO(ConsumeInteger64(max_value, &value,
1375                           "Expected integer for field default value."));
1376       // And stringify it again.
1377       default_value->append(absl::StrCat(value));
1378       break;
1379     }
1380 
1381     case FieldDescriptorProto::TYPE_UINT32:
1382     case FieldDescriptorProto::TYPE_UINT64:
1383     case FieldDescriptorProto::TYPE_FIXED32:
1384     case FieldDescriptorProto::TYPE_FIXED64: {
1385       uint64_t max_value = std::numeric_limits<uint64_t>::max();
1386       if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1387           field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1388         max_value = std::numeric_limits<uint32_t>::max();
1389       }
1390 
1391       // Numeric, not negative.
1392       if (TryConsume("-")) {
1393         RecordError("Unsigned field can't have negative default value.");
1394       }
1395       // Parse the integer to verify that it is not out-of-range.
1396       uint64_t value;
1397       DO(ConsumeInteger64(max_value, &value,
1398                           "Expected integer for field default value."));
1399       // And stringify it again.
1400       default_value->append(absl::StrCat(value));
1401       break;
1402     }
1403 
1404     case FieldDescriptorProto::TYPE_FLOAT:
1405     case FieldDescriptorProto::TYPE_DOUBLE: {
1406       // These types can be negative.
1407       if (TryConsume("-")) {
1408         default_value->append("-");
1409       }
1410       // Parse the integer because we have to convert hex integers to decimal
1411       // floats.
1412       double value = 0.0;
1413       DO(ConsumeNumber(&value, "Expected number."));
1414       // And stringify it again.
1415       default_value->append(io::SimpleDtoa(value));
1416       break;
1417     }
1418     case FieldDescriptorProto::TYPE_BOOL:
1419       if (TryConsume("true")) {
1420         default_value->assign("true");
1421       } else if (TryConsume("false")) {
1422         default_value->assign("false");
1423       } else {
1424         RecordError("Expected \"true\" or \"false\".");
1425         return false;
1426       }
1427       break;
1428 
1429     case FieldDescriptorProto::TYPE_STRING:
1430       // Note: When file option java_string_check_utf8 is true, if a
1431       // non-string representation (eg byte[]) is later supported, it must
1432       // be checked for UTF-8-ness.
1433       DO(ConsumeString(default_value,
1434                        "Expected string for field default "
1435                        "value."));
1436       break;
1437 
1438     case FieldDescriptorProto::TYPE_BYTES:
1439       DO(ConsumeString(default_value, "Expected string."));
1440       *default_value = absl::CEscape(*default_value);
1441       break;
1442 
1443     case FieldDescriptorProto::TYPE_ENUM:
1444       DO(ConsumeIdentifier(default_value,
1445                            "Expected enum identifier for field "
1446                            "default value."));
1447       break;
1448 
1449     case FieldDescriptorProto::TYPE_MESSAGE:
1450     case FieldDescriptorProto::TYPE_GROUP:
1451       RecordError("Messages can't have default values.");
1452       return false;
1453   }
1454 
1455   return true;
1456 }
1457 
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1458 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1459                            const LocationRecorder& field_location,
1460                            const FileDescriptorProto* containing_file) {
1461   if (field->has_json_name()) {
1462     RecordError("Already set option \"json_name\".");
1463     field->clear_json_name();
1464   }
1465 
1466   LocationRecorder location(field_location,
1467                             FieldDescriptorProto::kJsonNameFieldNumber);
1468   location.RecordLegacyLocation(field,
1469                                 DescriptorPool::ErrorCollector::OPTION_NAME);
1470 
1471   DO(Consume("json_name"));
1472   DO(Consume("="));
1473 
1474   LocationRecorder value_location(location);
1475   value_location.RecordLegacyLocation(
1476       field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1477 
1478   DO(ConsumeString(field->mutable_json_name(),
1479                    "Expected string for JSON name."));
1480   return true;
1481 }
1482 
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1483 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1484                                  const LocationRecorder& part_location,
1485                                  const FileDescriptorProto* containing_file) {
1486   UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1487   std::string identifier;  // We parse identifiers into this string.
1488   if (LookingAt("(")) {    // This is an extension.
1489     DO(Consume("("));
1490 
1491     {
1492       LocationRecorder location(
1493           part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1494       // An extension name consists of dot-separated identifiers, and may begin
1495       // with a dot.
1496       if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1497         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1498         name->mutable_name_part()->append(identifier);
1499       }
1500       while (LookingAt(".")) {
1501         DO(Consume("."));
1502         name->mutable_name_part()->append(".");
1503         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1504         name->mutable_name_part()->append(identifier);
1505       }
1506     }
1507 
1508     DO(Consume(")"));
1509     name->set_is_extension(true);
1510   } else {  // This is a regular field.
1511     LocationRecorder location(
1512         part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1513     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1514     name->mutable_name_part()->append(identifier);
1515     name->set_is_extension(false);
1516   }
1517   return true;
1518 }
1519 
ParseUninterpretedBlock(std::string * value)1520 bool Parser::ParseUninterpretedBlock(std::string* value) {
1521   // Note that enclosing braces are not added to *value.
1522   // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1523   // an expression, not a block of statements.
1524   DO(Consume("{"));
1525   int brace_depth = 1;
1526   while (!AtEnd()) {
1527     if (LookingAt("{")) {
1528       brace_depth++;
1529     } else if (LookingAt("}")) {
1530       brace_depth--;
1531       if (brace_depth == 0) {
1532         input_->Next();
1533         return true;
1534       }
1535     }
1536     // TODO: Interpret line/column numbers to preserve formatting
1537     if (!value->empty()) value->push_back(' ');
1538     value->append(input_->current().text);
1539     input_->Next();
1540   }
1541   RecordError("Unexpected end of stream while parsing aggregate value.");
1542   return false;
1543 }
1544 
1545 // We don't interpret the option here. Instead we store it in an
1546 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1547 bool Parser::ParseOption(Message* options,
1548                          const LocationRecorder& options_location,
1549                          const FileDescriptorProto* containing_file,
1550                          OptionStyle style) {
1551   // Create an entry in the uninterpreted_option field.
1552   const FieldDescriptor* uninterpreted_option_field =
1553       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1554   ABSL_CHECK(uninterpreted_option_field != nullptr)
1555       << "No field named \"uninterpreted_option\" in the Options proto.";
1556 
1557   const Reflection* reflection = options->GetReflection();
1558 
1559   LocationRecorder location(
1560       options_location, uninterpreted_option_field->number(),
1561       reflection->FieldSize(*options, uninterpreted_option_field));
1562 
1563   if (style == OPTION_STATEMENT) {
1564     DO(Consume("option"));
1565   }
1566 
1567   UninterpretedOption* uninterpreted_option =
1568       DownCastMessage<UninterpretedOption>(options->GetReflection()->AddMessage(
1569           options, uninterpreted_option_field));
1570 
1571   // Parse dot-separated name.
1572   {
1573     LocationRecorder name_location(location,
1574                                    UninterpretedOption::kNameFieldNumber);
1575     name_location.RecordLegacyLocation(
1576         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1577 
1578     {
1579       LocationRecorder part_location(name_location,
1580                                      uninterpreted_option->name_size());
1581       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1582                              containing_file));
1583     }
1584 
1585     while (LookingAt(".")) {
1586       DO(Consume("."));
1587       LocationRecorder part_location(name_location,
1588                                      uninterpreted_option->name_size());
1589       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1590                              containing_file));
1591     }
1592   }
1593 
1594   DO(Consume("="));
1595 
1596   {
1597     LocationRecorder value_location(location);
1598     value_location.RecordLegacyLocation(
1599         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1600 
1601     // All values are a single token, except for negative numbers, which consist
1602     // of a single '-' symbol, followed by a positive number.
1603     bool is_negative = TryConsume("-");
1604 
1605     switch (input_->current().type) {
1606       case io::Tokenizer::TYPE_START:
1607         ABSL_LOG(FATAL)
1608             << "Trying to read value before any tokens have been read.";
1609         return false;
1610 
1611       case io::Tokenizer::TYPE_END:
1612         RecordError("Unexpected end of stream while parsing option value.");
1613         return false;
1614 
1615       case io::Tokenizer::TYPE_WHITESPACE:
1616       case io::Tokenizer::TYPE_NEWLINE:
1617         ABSL_CHECK(!input_->report_whitespace() && !input_->report_newlines())
1618             << "Whitespace tokens were not requested.";
1619         ABSL_LOG(FATAL) << "Tokenizer reported whitespace.";
1620         return false;
1621 
1622       case io::Tokenizer::TYPE_IDENTIFIER: {
1623         value_location.AddPath(
1624             UninterpretedOption::kIdentifierValueFieldNumber);
1625         std::string value;
1626         DO(ConsumeIdentifier(&value, "Expected identifier."));
1627         if (is_negative) {
1628           if (value == "inf") {
1629             uninterpreted_option->set_double_value(
1630                 -std::numeric_limits<double>::infinity());
1631           } else if (value == "nan") {
1632             uninterpreted_option->set_double_value(
1633                 std::numeric_limits<double>::quiet_NaN());
1634           } else {
1635             RecordError("Identifier after '-' symbol must be inf or nan.");
1636             return false;
1637           }
1638           break;
1639         }
1640         uninterpreted_option->set_identifier_value(value);
1641         break;
1642       }
1643 
1644       case io::Tokenizer::TYPE_INTEGER: {
1645         uint64_t value;
1646         uint64_t max_value =
1647             is_negative
1648                 ? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
1649                 : std::numeric_limits<uint64_t>::max();
1650         if (TryConsumeInteger64(max_value, &value)) {
1651           if (is_negative) {
1652             value_location.AddPath(
1653                 UninterpretedOption::kNegativeIntValueFieldNumber);
1654             uninterpreted_option->set_negative_int_value(
1655                 static_cast<int64_t>(0 - value));
1656           } else {
1657             value_location.AddPath(
1658                 UninterpretedOption::kPositiveIntValueFieldNumber);
1659             uninterpreted_option->set_positive_int_value(value);
1660           }
1661           break;
1662         }
1663         // value too large for an integer; fall through below to treat as
1664         // floating point
1665         ABSL_FALLTHROUGH_INTENDED;
1666       }
1667 
1668       case io::Tokenizer::TYPE_FLOAT: {
1669         value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1670         double value = 0.0;
1671         DO(ConsumeNumber(&value, "Expected number."));
1672         uninterpreted_option->set_double_value(is_negative ? -value : value);
1673         break;
1674       }
1675 
1676       case io::Tokenizer::TYPE_STRING: {
1677         value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1678         if (is_negative) {
1679           RecordError("Invalid '-' symbol before string.");
1680           return false;
1681         }
1682         std::string value;
1683         DO(ConsumeString(&value, "Expected string."));
1684         uninterpreted_option->set_string_value(value);
1685         break;
1686       }
1687 
1688       case io::Tokenizer::TYPE_SYMBOL:
1689         if (LookingAt("{")) {
1690           value_location.AddPath(
1691               UninterpretedOption::kAggregateValueFieldNumber);
1692           DO(ParseUninterpretedBlock(
1693               uninterpreted_option->mutable_aggregate_value()));
1694         } else {
1695           RecordError("Expected option value.");
1696           return false;
1697         }
1698         break;
1699     }
1700   }
1701 
1702   if (style == OPTION_STATEMENT) {
1703     DO(ConsumeEndOfDeclaration(";", &location));
1704   }
1705 
1706   return true;
1707 }
1708 
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1709 bool Parser::ParseExtensions(DescriptorProto* message,
1710                              const LocationRecorder& extensions_location,
1711                              const FileDescriptorProto* containing_file) {
1712   // Parse the declaration.
1713   DO(Consume("extensions"));
1714 
1715   int old_range_size = message->extension_range_size();
1716 
1717   do {
1718     // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1719     LocationRecorder location(extensions_location,
1720                               message->extension_range_size());
1721 
1722     DescriptorProto::ExtensionRange* range = message->add_extension_range();
1723     location.RecordLegacyLocation(range,
1724                                   DescriptorPool::ErrorCollector::NUMBER);
1725 
1726     int start, end;
1727     io::Tokenizer::Token start_token;
1728 
1729     {
1730       LocationRecorder start_location(
1731           location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1732       start_token = input_->current();
1733       DO(ConsumeInteger(&start, "Expected field number range."));
1734 
1735       if (start == std::numeric_limits<int>::max()) {
1736         RecordError("Field number out of bounds.");
1737         return false;
1738       }
1739     }
1740 
1741     if (TryConsume("to")) {
1742       LocationRecorder end_location(
1743           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1744       if (TryConsume("max")) {
1745         // Set to the sentinel value - 1 since we increment the value below.
1746         // The actual value of the end of the range should be set with
1747         // AdjustExtensionRangesWithMaxEndNumber.
1748         end = kMaxRangeSentinel - 1;
1749       } else {
1750         DO(ConsumeInteger(&end, "Expected integer."));
1751 
1752         if (end == std::numeric_limits<int>::max()) {
1753           RecordError("Field number out of bounds.");
1754           return false;
1755         }
1756       }
1757     } else {
1758       LocationRecorder end_location(
1759           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1760       end_location.StartAt(start_token);
1761       end_location.EndAt(start_token);
1762       end = start;
1763     }
1764 
1765     // Users like to specify inclusive ranges, but in code we like the end
1766     // number to be exclusive.
1767     ++end;
1768 
1769     range->set_start(start);
1770     range->set_end(end);
1771   } while (TryConsume(","));
1772 
1773   if (LookingAt("[")) {
1774     int range_number_index = extensions_location.CurrentPathSize();
1775     SourceCodeInfo info;
1776 
1777     // Parse extension range options in the first range.
1778     ExtensionRangeOptions* options =
1779         message->mutable_extension_range(old_range_size)->mutable_options();
1780 
1781     {
1782       LocationRecorder index_location(
1783           extensions_location, 0 /* we fill this in w/ actual index below */,
1784           &info);
1785       LocationRecorder location(
1786           index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1787       DO(Consume("["));
1788 
1789       do {
1790         DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1791       } while (TryConsume(","));
1792 
1793       DO(Consume("]"));
1794     }
1795 
1796     // Then copy the extension range options to all of the other ranges we've
1797     // parsed.
1798     for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1799       *message->mutable_extension_range(i)->mutable_options() = *options;
1800     }
1801     // and copy source locations to the other ranges, too
1802     for (int i = old_range_size; i < message->extension_range_size(); i++) {
1803       for (int j = 0; j < info.location_size(); j++) {
1804         if (info.location(j).path_size() == range_number_index + 1) {
1805           // this location's path is up to the extension range index, but
1806           // doesn't include options; so it's redundant with location above
1807           continue;
1808         }
1809         SourceCodeInfo_Location* dest = source_code_info_->add_location();
1810         *dest = info.location(j);
1811         dest->set_path(range_number_index, i);
1812       }
1813     }
1814   }
1815 
1816   DO(ConsumeEndOfDeclaration(";", &extensions_location));
1817   return true;
1818 }
1819 
1820 // This is similar to extension range parsing, except that it accepts field
1821 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1822 bool Parser::ParseReserved(DescriptorProto* message,
1823                            const LocationRecorder& message_location) {
1824   io::Tokenizer::Token start_token = input_->current();
1825   // Parse the declaration.
1826   DO(Consume("reserved"));
1827   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1828     if (syntax_identifier_ == "editions") {
1829       RecordError(
1830           "Reserved names must be identifiers in editions, not string "
1831           "literals.");
1832       return false;
1833     }
1834     LocationRecorder location(message_location,
1835                               DescriptorProto::kReservedNameFieldNumber);
1836     location.StartAt(start_token);
1837     return ParseReservedNames(message, location);
1838   } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1839     if (syntax_identifier_ != "editions") {
1840       RecordError(
1841           "Reserved names must be string literals. (Only editions supports "
1842           "identifiers.)");
1843       return false;
1844     }
1845     LocationRecorder location(message_location,
1846                               DescriptorProto::kReservedNameFieldNumber);
1847     location.StartAt(start_token);
1848     return ParseReservedIdentifiers(message, location);
1849   } else {
1850     LocationRecorder location(message_location,
1851                               DescriptorProto::kReservedRangeFieldNumber);
1852     location.StartAt(start_token);
1853     return ParseReservedNumbers(message, location);
1854   }
1855 }
1856 
ParseReservedName(std::string * name,ErrorMaker error_message)1857 bool Parser::ParseReservedName(std::string* name, ErrorMaker error_message) {
1858   // Capture the position of the token, in case we have to report an
1859   // error after it is consumed.
1860   int line = input_->current().line;
1861   int col = input_->current().column;
1862   DO(ConsumeString(name, error_message));
1863   if (!io::Tokenizer::IsIdentifier(*name)) {
1864     RecordWarning(line, col, [=] {
1865       return absl::StrFormat("Reserved name \"%s\" is not a valid identifier.",
1866                              *name);
1867     });
1868   }
1869   return true;
1870 }
1871 
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1872 bool Parser::ParseReservedNames(DescriptorProto* message,
1873                                 const LocationRecorder& parent_location) {
1874   do {
1875     LocationRecorder location(parent_location, message->reserved_name_size());
1876     DO(ParseReservedName(message->add_reserved_name(),
1877                          "Expected field name string literal."));
1878   } while (TryConsume(","));
1879   DO(ConsumeEndOfDeclaration(";", &parent_location));
1880   return true;
1881 }
1882 
ParseReservedIdentifier(std::string * name,ErrorMaker error_message)1883 bool Parser::ParseReservedIdentifier(std::string* name,
1884                                      ErrorMaker error_message) {
1885   DO(ConsumeIdentifier(name, error_message));
1886   return true;
1887 }
1888 
ParseReservedIdentifiers(DescriptorProto * message,const LocationRecorder & parent_location)1889 bool Parser::ParseReservedIdentifiers(DescriptorProto* message,
1890                                       const LocationRecorder& parent_location) {
1891   do {
1892     LocationRecorder location(parent_location, message->reserved_name_size());
1893     DO(ParseReservedIdentifier(message->add_reserved_name(),
1894                                "Expected field name identifier."));
1895   } while (TryConsume(","));
1896   DO(ConsumeEndOfDeclaration(";", &parent_location));
1897   return true;
1898 }
1899 
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1900 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1901                                   const LocationRecorder& parent_location) {
1902   bool first = true;
1903   do {
1904     LocationRecorder location(parent_location, message->reserved_range_size());
1905 
1906     DescriptorProto::ReservedRange* range = message->add_reserved_range();
1907     location.RecordLegacyLocation(range,
1908                                   DescriptorPool::ErrorCollector::NUMBER);
1909     int start, end;
1910     io::Tokenizer::Token start_token;
1911     {
1912       LocationRecorder start_location(
1913           location, DescriptorProto::ReservedRange::kStartFieldNumber);
1914       start_token = input_->current();
1915       DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1916                                        : "Expected field number range.")));
1917     }
1918 
1919     if (TryConsume("to")) {
1920       LocationRecorder end_location(
1921           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1922       if (TryConsume("max")) {
1923         // Set to the sentinel value - 1 since we increment the value below.
1924         // The actual value of the end of the range should be set with
1925         // AdjustExtensionRangesWithMaxEndNumber.
1926         end = kMaxRangeSentinel - 1;
1927       } else {
1928         DO(ConsumeInteger(&end, "Expected integer."));
1929       }
1930     } else {
1931       LocationRecorder end_location(
1932           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1933       end_location.StartAt(start_token);
1934       end_location.EndAt(start_token);
1935       end = start;
1936     }
1937 
1938     // Users like to specify inclusive ranges, but in code we like the end
1939     // number to be exclusive.
1940     ++end;
1941 
1942     range->set_start(start);
1943     range->set_end(end);
1944     first = false;
1945   } while (TryConsume(","));
1946 
1947   DO(ConsumeEndOfDeclaration(";", &parent_location));
1948   return true;
1949 }
1950 
ParseReserved(EnumDescriptorProto * proto,const LocationRecorder & enum_location)1951 bool Parser::ParseReserved(EnumDescriptorProto* proto,
1952                            const LocationRecorder& enum_location) {
1953   io::Tokenizer::Token start_token = input_->current();
1954   // Parse the declaration.
1955   DO(Consume("reserved"));
1956   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1957     if (syntax_identifier_ == "editions") {
1958       RecordError(
1959           "Reserved names must be identifiers in editions, not string "
1960           "literals.");
1961       return false;
1962     }
1963     LocationRecorder location(enum_location,
1964                               EnumDescriptorProto::kReservedNameFieldNumber);
1965     location.StartAt(start_token);
1966     return ParseReservedNames(proto, location);
1967   } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1968     if (syntax_identifier_ != "editions") {
1969       RecordError(
1970           "Reserved names must be string literals. (Only editions supports "
1971           "identifiers.)");
1972       return false;
1973     }
1974     LocationRecorder location(enum_location,
1975                               EnumDescriptorProto::kReservedNameFieldNumber);
1976     location.StartAt(start_token);
1977     return ParseReservedIdentifiers(proto, location);
1978   } else {
1979     LocationRecorder location(enum_location,
1980                               EnumDescriptorProto::kReservedRangeFieldNumber);
1981     location.StartAt(start_token);
1982     return ParseReservedNumbers(proto, location);
1983   }
1984 }
1985 
ParseReservedNames(EnumDescriptorProto * proto,const LocationRecorder & parent_location)1986 bool Parser::ParseReservedNames(EnumDescriptorProto* proto,
1987                                 const LocationRecorder& parent_location) {
1988   do {
1989     LocationRecorder location(parent_location, proto->reserved_name_size());
1990     DO(ParseReservedName(proto->add_reserved_name(),
1991                          "Expected enum value string literal."));
1992   } while (TryConsume(","));
1993   DO(ConsumeEndOfDeclaration(";", &parent_location));
1994   return true;
1995 }
1996 
ParseReservedIdentifiers(EnumDescriptorProto * proto,const LocationRecorder & parent_location)1997 bool Parser::ParseReservedIdentifiers(EnumDescriptorProto* proto,
1998                                       const LocationRecorder& parent_location) {
1999   do {
2000     LocationRecorder location(parent_location, proto->reserved_name_size());
2001     DO(ParseReservedIdentifier(proto->add_reserved_name(),
2002                                "Expected enum value identifier."));
2003   } while (TryConsume(","));
2004   DO(ConsumeEndOfDeclaration(";", &parent_location));
2005   return true;
2006 }
2007 
ParseReservedNumbers(EnumDescriptorProto * proto,const LocationRecorder & parent_location)2008 bool Parser::ParseReservedNumbers(EnumDescriptorProto* proto,
2009                                   const LocationRecorder& parent_location) {
2010   bool first = true;
2011   do {
2012     LocationRecorder location(parent_location, proto->reserved_range_size());
2013 
2014     EnumDescriptorProto::EnumReservedRange* range = proto->add_reserved_range();
2015     location.RecordLegacyLocation(range,
2016                                   DescriptorPool::ErrorCollector::NUMBER);
2017     int start, end;
2018     io::Tokenizer::Token start_token;
2019     {
2020       LocationRecorder start_location(
2021           location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
2022       start_token = input_->current();
2023       DO(ConsumeSignedInteger(&start,
2024                               (first ? "Expected enum value or number range."
2025                                      : "Expected enum number range.")));
2026     }
2027 
2028     if (TryConsume("to")) {
2029       LocationRecorder end_location(
2030           location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
2031       if (TryConsume("max")) {
2032         // This is in the enum descriptor path, which doesn't have the message
2033         // set duality to fix up, so it doesn't integrate with the sentinel.
2034         end = INT_MAX;
2035       } else {
2036         DO(ConsumeSignedInteger(&end, "Expected integer."));
2037       }
2038     } else {
2039       LocationRecorder end_location(
2040           location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
2041       end_location.StartAt(start_token);
2042       end_location.EndAt(start_token);
2043       end = start;
2044     }
2045 
2046     range->set_start(start);
2047     range->set_end(end);
2048     first = false;
2049   } while (TryConsume(","));
2050 
2051   DO(ConsumeEndOfDeclaration(";", &parent_location));
2052   return true;
2053 }
2054 
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)2055 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
2056                          RepeatedPtrField<DescriptorProto>* messages,
2057                          const LocationRecorder& parent_location,
2058                          int location_field_number_for_nested_type,
2059                          const LocationRecorder& extend_location,
2060                          const FileDescriptorProto* containing_file) {
2061   DO(Consume("extend"));
2062 
2063   // Parse the extendee type.
2064   io::Tokenizer::Token extendee_start = input_->current();
2065   std::string extendee;
2066   DO(ParseUserDefinedType(&extendee));
2067   io::Tokenizer::Token extendee_end = input_->previous();
2068 
2069   // Parse the block.
2070   DO(ConsumeEndOfDeclaration("{", &extend_location));
2071 
2072   bool is_first = true;
2073 
2074   do {
2075     if (AtEnd()) {
2076       RecordError("Reached end of input in extend definition (missing '}').");
2077       return false;
2078     }
2079 
2080     // Note that kExtensionFieldNumber was already pushed by the parent.
2081     LocationRecorder location(extend_location, extensions->size());
2082 
2083     FieldDescriptorProto* field = extensions->Add();
2084 
2085     {
2086       LocationRecorder extendee_location(
2087           location, FieldDescriptorProto::kExtendeeFieldNumber);
2088       extendee_location.StartAt(extendee_start);
2089       extendee_location.EndAt(extendee_end);
2090 
2091       if (is_first) {
2092         extendee_location.RecordLegacyLocation(
2093             field, DescriptorPool::ErrorCollector::EXTENDEE);
2094         is_first = false;
2095       }
2096     }
2097 
2098     field->set_extendee(extendee);
2099 
2100     if (!ParseMessageField(field, messages, parent_location,
2101                            location_field_number_for_nested_type, location,
2102                            containing_file)) {
2103       // This statement failed to parse.  Skip it, but keep looping to parse
2104       // other statements.
2105       SkipStatement();
2106     }
2107   } while (!TryConsumeEndOfDeclaration("}", nullptr));
2108 
2109   return true;
2110 }
2111 
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)2112 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
2113                         DescriptorProto* containing_type, int oneof_index,
2114                         const LocationRecorder& oneof_location,
2115                         const LocationRecorder& containing_type_location,
2116                         const FileDescriptorProto* containing_file) {
2117   DO(Consume("oneof"));
2118 
2119   {
2120     LocationRecorder name_location(oneof_location,
2121                                    OneofDescriptorProto::kNameFieldNumber);
2122     DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
2123   }
2124 
2125   DO(ConsumeEndOfDeclaration("{", &oneof_location));
2126 
2127   do {
2128     if (AtEnd()) {
2129       RecordError("Reached end of input in oneof definition (missing '}').");
2130       return false;
2131     }
2132 
2133     if (LookingAt("option")) {
2134       LocationRecorder option_location(
2135           oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
2136       if (!ParseOption(oneof_decl->mutable_options(), option_location,
2137                        containing_file, OPTION_STATEMENT)) {
2138         return false;
2139       }
2140       continue;
2141     }
2142 
2143     // Print a nice error if the user accidentally tries to place a label
2144     // on an individual member of a oneof.
2145     if (LookingAt("required") || LookingAt("optional") ||
2146         LookingAt("repeated")) {
2147       RecordError(
2148           "Fields in oneofs must not have labels (required / optional "
2149           "/ repeated).");
2150       // We can continue parsing here because we understand what the user
2151       // meant.  The error report will still make parsing fail overall.
2152       input_->Next();
2153     }
2154 
2155     LocationRecorder field_location(containing_type_location,
2156                                     DescriptorProto::kFieldFieldNumber,
2157                                     containing_type->field_size());
2158 
2159     FieldDescriptorProto* field = containing_type->add_field();
2160     field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
2161     field->set_oneof_index(oneof_index);
2162 
2163     if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
2164                                   containing_type_location,
2165                                   DescriptorProto::kNestedTypeFieldNumber,
2166                                   field_location, containing_file)) {
2167       // This statement failed to parse.  Skip it, but keep looping to parse
2168       // other statements.
2169       SkipStatement();
2170     }
2171   } while (!TryConsumeEndOfDeclaration("}", nullptr));
2172 
2173   return true;
2174 }
2175 
2176 // -------------------------------------------------------------------
2177 // Enums
2178 
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2179 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
2180                                  const LocationRecorder& enum_location,
2181                                  const FileDescriptorProto* containing_file) {
2182   DO(Consume("enum"));
2183 
2184   {
2185     LocationRecorder location(enum_location,
2186                               EnumDescriptorProto::kNameFieldNumber);
2187     location.RecordLegacyLocation(enum_type,
2188                                   DescriptorPool::ErrorCollector::NAME);
2189     DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
2190   }
2191 
2192   DO(ParseEnumBlock(enum_type, enum_location, containing_file));
2193 
2194   DO(ValidateEnum(enum_type));
2195 
2196   return true;
2197 }
2198 
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2199 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
2200                             const LocationRecorder& enum_location,
2201                             const FileDescriptorProto* containing_file) {
2202   DO(ConsumeEndOfDeclaration("{", &enum_location));
2203 
2204   while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2205     if (AtEnd()) {
2206       RecordError("Reached end of input in enum definition (missing '}').");
2207       return false;
2208     }
2209 
2210     if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
2211       // This statement failed to parse.  Skip it, but keep looping to parse
2212       // other statements.
2213       SkipStatement();
2214     }
2215   }
2216 
2217   return true;
2218 }
2219 
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2220 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
2221                                 const LocationRecorder& enum_location,
2222                                 const FileDescriptorProto* containing_file) {
2223   if (TryConsumeEndOfDeclaration(";", nullptr)) {
2224     // empty statement; ignore
2225     return true;
2226   } else if (LookingAt("option")) {
2227     LocationRecorder location(enum_location,
2228                               EnumDescriptorProto::kOptionsFieldNumber);
2229     return ParseOption(enum_type->mutable_options(), location, containing_file,
2230                        OPTION_STATEMENT);
2231   } else if (LookingAt("reserved")) {
2232     return ParseReserved(enum_type, enum_location);
2233   } else {
2234     LocationRecorder location(enum_location,
2235                               EnumDescriptorProto::kValueFieldNumber,
2236                               enum_type->value_size());
2237     return ParseEnumConstant(enum_type->add_value(), location, containing_file);
2238   }
2239 }
2240 
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2241 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
2242                                const LocationRecorder& enum_value_location,
2243                                const FileDescriptorProto* containing_file) {
2244   // Parse name.
2245   {
2246     LocationRecorder location(enum_value_location,
2247                               EnumValueDescriptorProto::kNameFieldNumber);
2248     location.RecordLegacyLocation(enum_value,
2249                                   DescriptorPool::ErrorCollector::NAME);
2250     DO(ConsumeIdentifier(enum_value->mutable_name(),
2251                          "Expected enum constant name."));
2252   }
2253 
2254   DO(Consume("=", "Missing numeric value for enum constant."));
2255 
2256   // Parse value.
2257   {
2258     LocationRecorder location(enum_value_location,
2259                               EnumValueDescriptorProto::kNumberFieldNumber);
2260     location.RecordLegacyLocation(enum_value,
2261                                   DescriptorPool::ErrorCollector::NUMBER);
2262 
2263     int number;
2264     DO(ConsumeSignedInteger(&number, "Expected integer."));
2265     enum_value->set_number(number);
2266   }
2267 
2268   DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2269                               containing_file));
2270 
2271   DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2272 
2273   return true;
2274 }
2275 
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2276 bool Parser::ParseEnumConstantOptions(
2277     EnumValueDescriptorProto* value,
2278     const LocationRecorder& enum_value_location,
2279     const FileDescriptorProto* containing_file) {
2280   if (!LookingAt("[")) return true;
2281 
2282   LocationRecorder location(enum_value_location,
2283                             EnumValueDescriptorProto::kOptionsFieldNumber);
2284 
2285   DO(Consume("["));
2286 
2287   do {
2288     DO(ParseOption(value->mutable_options(), location, containing_file,
2289                    OPTION_ASSIGNMENT));
2290   } while (TryConsume(","));
2291 
2292   DO(Consume("]"));
2293   return true;
2294 }
2295 
2296 // -------------------------------------------------------------------
2297 // Services
2298 
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2299 bool Parser::ParseServiceDefinition(
2300     ServiceDescriptorProto* service, const LocationRecorder& service_location,
2301     const FileDescriptorProto* containing_file) {
2302   DO(Consume("service"));
2303 
2304   {
2305     LocationRecorder location(service_location,
2306                               ServiceDescriptorProto::kNameFieldNumber);
2307     location.RecordLegacyLocation(service,
2308                                   DescriptorPool::ErrorCollector::NAME);
2309     DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2310   }
2311 
2312   DO(ParseServiceBlock(service, service_location, containing_file));
2313   return true;
2314 }
2315 
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2316 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2317                                const LocationRecorder& service_location,
2318                                const FileDescriptorProto* containing_file) {
2319   DO(ConsumeEndOfDeclaration("{", &service_location));
2320 
2321   while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2322     if (AtEnd()) {
2323       RecordError("Reached end of input in service definition (missing '}').");
2324       return false;
2325     }
2326 
2327     if (!ParseServiceStatement(service, service_location, containing_file)) {
2328       // This statement failed to parse.  Skip it, but keep looping to parse
2329       // other statements.
2330       SkipStatement();
2331     }
2332   }
2333 
2334   return true;
2335 }
2336 
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2337 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2338                                    const LocationRecorder& service_location,
2339                                    const FileDescriptorProto* containing_file) {
2340   if (TryConsumeEndOfDeclaration(";", nullptr)) {
2341     // empty statement; ignore
2342     return true;
2343   } else if (LookingAt("option")) {
2344     LocationRecorder location(service_location,
2345                               ServiceDescriptorProto::kOptionsFieldNumber);
2346     return ParseOption(service->mutable_options(), location, containing_file,
2347                        OPTION_STATEMENT);
2348   } else {
2349     LocationRecorder location(service_location,
2350                               ServiceDescriptorProto::kMethodFieldNumber,
2351                               service->method_size());
2352     return ParseServiceMethod(service->add_method(), location, containing_file);
2353   }
2354 }
2355 
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2356 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2357                                 const LocationRecorder& method_location,
2358                                 const FileDescriptorProto* containing_file) {
2359   DO(Consume("rpc"));
2360 
2361   {
2362     LocationRecorder location(method_location,
2363                               MethodDescriptorProto::kNameFieldNumber);
2364     location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2365     DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2366   }
2367 
2368   // Parse input type.
2369   DO(Consume("("));
2370   {
2371     if (LookingAt("stream")) {
2372       LocationRecorder location(
2373           method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2374       location.RecordLegacyLocation(method,
2375                                     DescriptorPool::ErrorCollector::OTHER);
2376       method->set_client_streaming(true);
2377       DO(Consume("stream"));
2378     }
2379     LocationRecorder location(method_location,
2380                               MethodDescriptorProto::kInputTypeFieldNumber);
2381     location.RecordLegacyLocation(method,
2382                                   DescriptorPool::ErrorCollector::INPUT_TYPE);
2383     DO(ParseUserDefinedType(method->mutable_input_type()));
2384   }
2385   DO(Consume(")"));
2386 
2387   // Parse output type.
2388   DO(Consume("returns"));
2389   DO(Consume("("));
2390   {
2391     if (LookingAt("stream")) {
2392       LocationRecorder location(
2393           method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2394       location.RecordLegacyLocation(method,
2395                                     DescriptorPool::ErrorCollector::OTHER);
2396       DO(Consume("stream"));
2397       method->set_server_streaming(true);
2398     }
2399     LocationRecorder location(method_location,
2400                               MethodDescriptorProto::kOutputTypeFieldNumber);
2401     location.RecordLegacyLocation(method,
2402                                   DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2403     DO(ParseUserDefinedType(method->mutable_output_type()));
2404   }
2405   DO(Consume(")"));
2406 
2407   if (LookingAt("{")) {
2408     // Options!
2409     DO(ParseMethodOptions(method_location, containing_file,
2410                           MethodDescriptorProto::kOptionsFieldNumber,
2411                           method->mutable_options()));
2412   } else {
2413     DO(ConsumeEndOfDeclaration(";", &method_location));
2414   }
2415 
2416   return true;
2417 }
2418 
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2419 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2420                                 const FileDescriptorProto* containing_file,
2421                                 const int optionsFieldNumber,
2422                                 Message* mutable_options) {
2423   // Options!
2424   ConsumeEndOfDeclaration("{", &parent_location);
2425   while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2426     if (AtEnd()) {
2427       RecordError("Reached end of input in method options (missing '}').");
2428       return false;
2429     }
2430 
2431     if (TryConsumeEndOfDeclaration(";", nullptr)) {
2432       // empty statement; ignore
2433     } else {
2434       LocationRecorder location(parent_location, optionsFieldNumber);
2435       if (!ParseOption(mutable_options, location, containing_file,
2436                        OPTION_STATEMENT)) {
2437         // This statement failed to parse.  Skip it, but keep looping to
2438         // parse other statements.
2439         SkipStatement();
2440       }
2441     }
2442   }
2443 
2444   return true;
2445 }
2446 
2447 // -------------------------------------------------------------------
2448 
ParseLabel(FieldDescriptorProto::Label * label,const LocationRecorder & field_location)2449 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2450                         const LocationRecorder& field_location) {
2451   if (!LookingAt("optional") && !LookingAt("repeated") &&
2452       !LookingAt("required")) {
2453     return false;
2454   }
2455   if (LookingAt("optional") && syntax_identifier_ == "editions") {
2456     RecordError(
2457         "Label \"optional\" is not supported in editions. By default, all "
2458         "singular fields have presence unless features.field_presence is set.");
2459   }
2460   if (LookingAt("required") && syntax_identifier_ == "editions") {
2461     RecordError(
2462         "Label \"required\" is not supported in editions, use "
2463         "features.field_presence = LEGACY_REQUIRED.");
2464   }
2465 
2466   LocationRecorder location(field_location,
2467                             FieldDescriptorProto::kLabelFieldNumber);
2468   if (TryConsume("optional")) {
2469     *label = FieldDescriptorProto::LABEL_OPTIONAL;
2470   } else if (TryConsume("repeated")) {
2471     *label = FieldDescriptorProto::LABEL_REPEATED;
2472   } else {
2473     Consume("required");
2474     *label = FieldDescriptorProto::LABEL_REQUIRED;
2475   }
2476   return true;
2477 }
2478 
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2479 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2480                        std::string* type_name) {
2481   const auto& type_names_table = GetTypeNameTable();
2482   auto iter = type_names_table.find(input_->current().text);
2483   if (iter != type_names_table.end()) {
2484     if (syntax_identifier_ == "editions" &&
2485         iter->second == FieldDescriptorProto::TYPE_GROUP) {
2486       RecordError(
2487           "Group syntax is no longer supported in editions. To get group "
2488           "behavior you can specify features.message_encoding = DELIMITED on a "
2489           "message field.");
2490     }
2491     *type = iter->second;
2492     input_->Next();
2493   } else {
2494     DO(ParseUserDefinedType(type_name));
2495   }
2496   return true;
2497 }
2498 
ParseUserDefinedType(std::string * type_name)2499 bool Parser::ParseUserDefinedType(std::string* type_name) {
2500   type_name->clear();
2501 
2502   const auto& type_names_table = GetTypeNameTable();
2503   auto iter = type_names_table.find(input_->current().text);
2504   if (iter != type_names_table.end()) {
2505     // Note:  The only place enum types are allowed is for field types, but
2506     //   if we are parsing a field type then we would not get here because
2507     //   primitives are allowed there as well.  So this error message doesn't
2508     //   need to account for enums.
2509     RecordError("Expected message type.");
2510 
2511     // Pretend to accept this type so that we can go on parsing.
2512     *type_name = input_->current().text;
2513     input_->Next();
2514     return true;
2515   }
2516 
2517   // A leading "." means the name is fully-qualified.
2518   if (TryConsume(".")) type_name->append(".");
2519 
2520   // Consume the first part of the name.
2521   std::string identifier;
2522   DO(ConsumeIdentifier(&identifier, "Expected type name."));
2523   type_name->append(identifier);
2524 
2525   // Consume more parts.
2526   while (TryConsume(".")) {
2527     type_name->append(".");
2528     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2529     type_name->append(identifier);
2530   }
2531 
2532   return true;
2533 }
2534 
2535 // ===================================================================
2536 
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2537 bool Parser::ParsePackage(FileDescriptorProto* file,
2538                           const LocationRecorder& root_location,
2539                           const FileDescriptorProto* containing_file) {
2540   if (file->has_package()) {
2541     RecordError("Multiple package definitions.");
2542     // Don't append the new package to the old one.  Just replace it.  Not
2543     // that it really matters since this is an error anyway.
2544     file->clear_package();
2545   }
2546 
2547   LocationRecorder location(root_location,
2548                             FileDescriptorProto::kPackageFieldNumber);
2549   location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2550 
2551   DO(Consume("package"));
2552 
2553   while (true) {
2554     std::string identifier;
2555     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2556     file->mutable_package()->append(identifier);
2557     if (!TryConsume(".")) break;
2558     file->mutable_package()->append(".");
2559   }
2560 
2561   DO(ConsumeEndOfDeclaration(";", &location));
2562 
2563   return true;
2564 }
2565 
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32_t> * public_dependency,RepeatedField<int32_t> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2566 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2567                          RepeatedField<int32_t>* public_dependency,
2568                          RepeatedField<int32_t>* weak_dependency,
2569                          const LocationRecorder& root_location,
2570                          const FileDescriptorProto* containing_file) {
2571   LocationRecorder location(root_location,
2572                             FileDescriptorProto::kDependencyFieldNumber,
2573                             dependency->size());
2574 
2575   DO(Consume("import"));
2576 
2577   if (LookingAt("public")) {
2578     LocationRecorder public_location(
2579         root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2580         public_dependency->size());
2581     DO(Consume("public"));
2582     *public_dependency->Add() = dependency->size();
2583   } else if (LookingAt("weak")) {
2584     LocationRecorder weak_location(
2585         root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2586         weak_dependency->size());
2587     weak_location.RecordLegacyImportLocation(containing_file, "weak");
2588     DO(Consume("weak"));
2589     *weak_dependency->Add() = dependency->size();
2590   }
2591 
2592   std::string import_file;
2593   DO(ConsumeString(&import_file,
2594                    "Expected a string naming the file to import."));
2595   *dependency->Add() = import_file;
2596   location.RecordLegacyImportLocation(containing_file, import_file);
2597 
2598   DO(ConsumeEndOfDeclaration(";", &location));
2599 
2600   return true;
2601 }
2602 
2603 // ===================================================================
2604 
SourceLocationTable()2605 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2606 SourceLocationTable::~SourceLocationTable() {}
2607 
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2608 bool SourceLocationTable::Find(
2609     const Message* descriptor,
2610     DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2611     int* column) const {
2612   auto it = location_map_.find({descriptor, location});
2613   if (it == location_map_.end()) {
2614     *line = -1;
2615     *column = 0;
2616     return false;
2617   }
2618   std::tie(*line, *column) = it->second;
2619   return true;
2620 }
2621 
FindImport(const Message * descriptor,absl::string_view name,int * line,int * column) const2622 bool SourceLocationTable::FindImport(const Message* descriptor,
2623                                      absl::string_view name, int* line,
2624                                      int* column) const {
2625   auto it = import_location_map_.find({descriptor, std::string(name)});
2626   if (it == import_location_map_.end()) {
2627     *line = -1;
2628     *column = 0;
2629     return false;
2630   }
2631   std::tie(*line, *column) = it->second;
2632   return true;
2633 }
2634 
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2635 void SourceLocationTable::Add(
2636     const Message* descriptor,
2637     DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2638     int column) {
2639   location_map_[std::make_pair(descriptor, location)] =
2640       std::make_pair(line, column);
2641 }
2642 
AddImport(const Message * descriptor,const std::string & name,int line,int column)2643 void SourceLocationTable::AddImport(const Message* descriptor,
2644                                     const std::string& name, int line,
2645                                     int column) {
2646   import_location_map_[std::make_pair(descriptor, name)] =
2647       std::make_pair(line, column);
2648 }
2649 
Clear()2650 void SourceLocationTable::Clear() { location_map_.clear(); }
2651 
2652 }  // namespace compiler
2653 }  // namespace protobuf
2654 }  // namespace google
2655 
2656 #include "google/protobuf/port_undef.inc"
2657