1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // Recursive descent FTW.
13
14 #include "google/protobuf/compiler/parser.h"
15
16 #include <float.h>
17
18 #include <cstddef>
19 #include <cstdint>
20 #include <limits>
21 #include <string>
22 #include <tuple>
23 #include <utility>
24 #include <vector>
25
26 #include "absl/base/casts.h"
27 #include "absl/cleanup/cleanup.h"
28 #include "absl/container/flat_hash_map.h"
29 #include "absl/container/flat_hash_set.h"
30 #include "absl/log/absl_check.h"
31 #include "absl/log/absl_log.h"
32 #include "absl/strings/ascii.h"
33 #include "absl/strings/escaping.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_format.h"
36 #include "absl/strings/string_view.h"
37 #include "google/protobuf/descriptor.h"
38 #include "google/protobuf/descriptor.pb.h"
39 #include "google/protobuf/io/strtod.h"
40 #include "google/protobuf/io/tokenizer.h"
41 #include "google/protobuf/message_lite.h"
42 #include "google/protobuf/port.h"
43 #include "google/protobuf/wire_format.h"
44
45 // Must be included last.
46 #include "google/protobuf/port_def.inc"
47
48 namespace google {
49 namespace protobuf {
50 namespace compiler {
51 namespace {
52
53 using TypeNameMap =
54 absl::flat_hash_map<absl::string_view, FieldDescriptorProto::Type>;
55
GetTypeNameTable()56 const TypeNameMap& GetTypeNameTable() {
57 static auto* table = new auto([]() {
58 TypeNameMap result;
59
60 result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
61 result["float"] = FieldDescriptorProto::TYPE_FLOAT;
62 result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
63 result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
64 result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
65 result["bool"] = FieldDescriptorProto::TYPE_BOOL;
66 result["string"] = FieldDescriptorProto::TYPE_STRING;
67 result["group"] = FieldDescriptorProto::TYPE_GROUP;
68
69 result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
70 result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
71 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
72 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
73 result["int32"] = FieldDescriptorProto::TYPE_INT32;
74 result["int64"] = FieldDescriptorProto::TYPE_INT64;
75 result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
76 result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
77
78 return result;
79 }());
80 return *table;
81 }
82
83 // Camel-case the field name and append "Entry" for generated map entry name.
84 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(absl::string_view field_name)85 std::string MapEntryName(absl::string_view field_name) {
86 std::string result;
87 static const char kSuffix[] = "Entry";
88 result.reserve(field_name.size() + sizeof(kSuffix));
89 bool cap_next = true;
90 for (const char field_name_char : field_name) {
91 if (field_name_char == '_') {
92 cap_next = true;
93 } else if (cap_next) {
94 // Note: Do not use ctype.h due to locales.
95 if ('a' <= field_name_char && field_name_char <= 'z') {
96 result.push_back(field_name_char - 'a' + 'A');
97 } else {
98 result.push_back(field_name_char);
99 }
100 cap_next = false;
101 } else {
102 result.push_back(field_name_char);
103 }
104 }
105 result.append(kSuffix);
106 return result;
107 }
108
IsUppercase(char c)109 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
110
IsLowercase(char c)111 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
112
IsNumber(char c)113 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
114
IsUpperCamelCase(absl::string_view name)115 bool IsUpperCamelCase(absl::string_view name) {
116 if (name.empty()) {
117 return true;
118 }
119 // Name must start with an upper case character.
120 if (!IsUppercase(name[0])) {
121 return false;
122 }
123 // Must not contains underscore.
124 for (const char c : name) {
125 if (c == '_') {
126 return false;
127 }
128 }
129 return true;
130 }
131
IsUpperUnderscore(absl::string_view name)132 bool IsUpperUnderscore(absl::string_view name) {
133 for (const char c : name) {
134 if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
135 return false;
136 }
137 }
138 return true;
139 }
140
IsLowerUnderscore(absl::string_view name)141 bool IsLowerUnderscore(absl::string_view name) {
142 for (const char c : name) {
143 if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
144 return false;
145 }
146 }
147 return true;
148 }
149
IsNumberFollowUnderscore(absl::string_view name)150 bool IsNumberFollowUnderscore(absl::string_view name) {
151 for (int i = 1; i < name.length(); i++) {
152 const char c = name[i];
153 if (IsNumber(c) && name[i - 1] == '_') {
154 return true;
155 }
156 }
157 return false;
158 }
159
160 } // anonymous namespace
161
162 // Makes code slightly more readable. The meaning of "DO(foo)" is
163 // "Execute foo and fail if it fails.", where failure is indicated by
164 // returning false.
165 #define DO(STATEMENT) \
166 if (STATEMENT) { \
167 } else \
168 return false
169
170 // ===================================================================
171
Parser()172 Parser::Parser()
173 : input_(nullptr),
174 error_collector_(nullptr),
175 source_location_table_(nullptr),
176 had_errors_(false),
177 require_syntax_identifier_(false),
178 stop_after_syntax_identifier_(false) {
179 }
180
181 Parser::~Parser() = default;
182 // ===================================================================
183
LookingAt(absl::string_view text)184 inline bool Parser::LookingAt(absl::string_view text) {
185 return input_->current().text == text;
186 }
187
LookingAtType(io::Tokenizer::TokenType token_type)188 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
189 return input_->current().type == token_type;
190 }
191
AtEnd()192 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
193
TryConsume(absl::string_view text)194 bool Parser::TryConsume(absl::string_view text) {
195 if (LookingAt(text)) {
196 input_->Next();
197 return true;
198 } else {
199 return false;
200 }
201 }
202
Consume(absl::string_view text,ErrorMaker error)203 bool Parser::Consume(absl::string_view text, ErrorMaker error) {
204 if (TryConsume(text)) {
205 return true;
206 } else {
207 RecordError(error);
208 return false;
209 }
210 }
211
Consume(absl::string_view text)212 bool Parser::Consume(absl::string_view text) {
213 return Consume(text,
214 [&] { return absl::StrCat("Expected \"", text, "\"."); });
215 }
216
ConsumeIdentifier(std::string * output,ErrorMaker error)217 bool Parser::ConsumeIdentifier(std::string* output, ErrorMaker error) {
218 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
219 *output = input_->current().text;
220 input_->Next();
221 return true;
222 } else {
223 RecordError(error);
224 return false;
225 }
226 }
227
ConsumeInteger(int * output,ErrorMaker error)228 bool Parser::ConsumeInteger(int* output, ErrorMaker error) {
229 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
230 uint64_t value = 0;
231 if (!io::Tokenizer::ParseInteger(input_->current().text,
232 std::numeric_limits<int32_t>::max(),
233 &value)) {
234 RecordError("Integer out of range.");
235 // We still return true because we did, in fact, parse an integer.
236 }
237 *output = value;
238 input_->Next();
239 return true;
240 } else {
241 RecordError(error);
242 return false;
243 }
244 }
245
ConsumeSignedInteger(int * output,ErrorMaker error)246 bool Parser::ConsumeSignedInteger(int* output, ErrorMaker error) {
247 bool is_negative = false;
248 uint64_t max_value = std::numeric_limits<int32_t>::max();
249 if (TryConsume("-")) {
250 is_negative = true;
251 max_value += 1;
252 }
253 uint64_t value = 0;
254 DO(ConsumeInteger64(max_value, &value, error));
255 if (is_negative) value *= -1;
256 *output = value;
257 return true;
258 }
259
ConsumeInteger64(uint64_t max_value,uint64_t * output,ErrorMaker error)260 bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
261 ErrorMaker error) {
262 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
263 if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
264 output)) {
265 RecordError("Integer out of range.");
266 // We still return true because we did, in fact, parse an integer.
267 *output = 0;
268 }
269 input_->Next();
270 return true;
271 } else {
272 RecordError(error);
273 return false;
274 }
275 }
276
TryConsumeInteger64(uint64_t max_value,uint64_t * output)277 bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
278 if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
279 io::Tokenizer::ParseInteger(input_->current().text, max_value, output)) {
280 input_->Next();
281 return true;
282 }
283 return false;
284 }
285
ConsumeNumber(double * output,ErrorMaker error)286 bool Parser::ConsumeNumber(double* output, ErrorMaker error) {
287 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
288 *output = io::Tokenizer::ParseFloat(input_->current().text);
289 input_->Next();
290 return true;
291 } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
292 // Also accept integers.
293 uint64_t value = 0;
294 if (io::Tokenizer::ParseInteger(input_->current().text,
295 std::numeric_limits<uint64_t>::max(),
296 &value)) {
297 *output = value;
298 } else if (input_->current().text[0] == '0') {
299 // octal or hexadecimal; don't bother parsing as float
300 RecordError("Integer out of range.");
301 // We still return true because we did, in fact, parse a number.
302 } else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
303 // out of int range, and not valid float?
304 RecordError("Integer out of range.");
305 // We still return true because we did, in fact, parse a number.
306 }
307 input_->Next();
308 return true;
309 } else if (LookingAt("inf")) {
310 *output = std::numeric_limits<double>::infinity();
311 input_->Next();
312 return true;
313 } else if (LookingAt("nan")) {
314 *output = std::numeric_limits<double>::quiet_NaN();
315 input_->Next();
316 return true;
317 } else {
318 RecordError(error);
319 return false;
320 }
321 }
322
ConsumeString(std::string * output,ErrorMaker error)323 bool Parser::ConsumeString(std::string* output, ErrorMaker error) {
324 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
325 io::Tokenizer::ParseString(input_->current().text, output);
326 input_->Next();
327 // Allow C++ like concatenation of adjacent string tokens.
328 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
329 io::Tokenizer::ParseStringAppend(input_->current().text, output);
330 input_->Next();
331 }
332 return true;
333 } else {
334 RecordError(error);
335 return false;
336 }
337 }
338
TryConsumeEndOfDeclaration(absl::string_view text,const LocationRecorder * location)339 bool Parser::TryConsumeEndOfDeclaration(absl::string_view text,
340 const LocationRecorder* location) {
341 if (LookingAt(text)) {
342 std::string leading, trailing;
343 std::vector<std::string> detached;
344 input_->NextWithComments(&trailing, &detached, &leading);
345
346 // Save the leading comments for next time, and recall the leading comments
347 // from last time.
348 leading.swap(upcoming_doc_comments_);
349
350 if (location != nullptr) {
351 upcoming_detached_comments_.swap(detached);
352 location->AttachComments(&leading, &trailing, &detached);
353 } else if (text == "}") {
354 // If the current location is null and we are finishing the current scope,
355 // drop pending upcoming detached comments.
356 upcoming_detached_comments_.swap(detached);
357 } else {
358 // Otherwise, append the new detached comments to the existing upcoming
359 // detached comments.
360 upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
361 detached.begin(), detached.end());
362 }
363
364 return true;
365 } else {
366 return false;
367 }
368 }
369
ConsumeEndOfDeclaration(absl::string_view text,const LocationRecorder * location)370 bool Parser::ConsumeEndOfDeclaration(absl::string_view text,
371 const LocationRecorder* location) {
372 if (TryConsumeEndOfDeclaration(text, location)) {
373 return true;
374 } else {
375 RecordError([&] { return absl::StrCat("Expected \"", text, "\"."); });
376 return false;
377 }
378 }
379
380 // -------------------------------------------------------------------
381
RecordError(int line,int column,ErrorMaker error)382 void Parser::RecordError(int line, int column, ErrorMaker error) {
383 if (error_collector_ != nullptr) {
384 error_collector_->RecordError(line, column, error.get());
385 }
386 had_errors_ = true;
387 }
388
RecordError(ErrorMaker error)389 void Parser::RecordError(ErrorMaker error) {
390 RecordError(input_->current().line, input_->current().column, error);
391 }
392
RecordWarning(int line,int column,ErrorMaker error)393 void Parser::RecordWarning(int line, int column, ErrorMaker error) {
394 if (error_collector_ != nullptr) {
395 error_collector_->RecordWarning(line, column, error.get());
396 }
397 }
398
399 // Invokes error_collector_->RecordWarning() with the line and column number
400 // of the current token.
RecordWarning(ErrorMaker error)401 void Parser::RecordWarning(ErrorMaker error) {
402 RecordWarning(input_->current().line, input_->current().column, error);
403 }
404
405 // -------------------------------------------------------------------
406
LocationRecorder(Parser * parser)407 Parser::LocationRecorder::LocationRecorder(Parser* parser)
408 : parser_(parser),
409 source_code_info_(parser->source_code_info_),
410 location_(parser_->source_code_info_->add_location()) {
411 location_->add_span(parser_->input_->current().line);
412 location_->add_span(parser_->input_->current().column);
413 }
414
LocationRecorder(const LocationRecorder & parent)415 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
416 Init(parent, parent.source_code_info_);
417 }
418
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)419 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
420 int path1,
421 SourceCodeInfo* source_code_info) {
422 Init(parent, source_code_info);
423 AddPath(path1);
424 }
425
LocationRecorder(const LocationRecorder & parent,int path1)426 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
427 int path1) {
428 Init(parent, parent.source_code_info_);
429 AddPath(path1);
430 }
431
LocationRecorder(const LocationRecorder & parent,int path1,int path2)432 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
433 int path1, int path2) {
434 Init(parent, parent.source_code_info_);
435 AddPath(path1);
436 AddPath(path2);
437 }
438
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)439 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
440 SourceCodeInfo* source_code_info) {
441 parser_ = parent.parser_;
442 source_code_info_ = source_code_info;
443
444 location_ = source_code_info_->add_location();
445 location_->mutable_path()->CopyFrom(parent.location_->path());
446
447 location_->add_span(parser_->input_->current().line);
448 location_->add_span(parser_->input_->current().column);
449 }
450
~LocationRecorder()451 Parser::LocationRecorder::~LocationRecorder() {
452 if (location_->span_size() <= 2) {
453 EndAt(parser_->input_->previous());
454 }
455 }
456
AddPath(int path_component)457 void Parser::LocationRecorder::AddPath(int path_component) {
458 location_->add_path(path_component);
459 }
460
StartAt(const io::Tokenizer::Token & token)461 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
462 location_->set_span(0, token.line);
463 location_->set_span(1, token.column);
464 }
465
StartAt(const LocationRecorder & other)466 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
467 location_->set_span(0, other.location_->span(0));
468 location_->set_span(1, other.location_->span(1));
469 }
470
EndAt(const io::Tokenizer::Token & token)471 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
472 if (token.line != location_->span(0)) {
473 location_->add_span(token.line);
474 }
475 location_->add_span(token.end_column);
476 }
477
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)478 void Parser::LocationRecorder::RecordLegacyLocation(
479 const Message* descriptor,
480 DescriptorPool::ErrorCollector::ErrorLocation location) {
481 if (parser_->source_location_table_ != nullptr) {
482 parser_->source_location_table_->Add(
483 descriptor, location, location_->span(0), location_->span(1));
484 }
485 }
486
RecordLegacyImportLocation(const Message * descriptor,const std::string & name)487 void Parser::LocationRecorder::RecordLegacyImportLocation(
488 const Message* descriptor, const std::string& name) {
489 if (parser_->source_location_table_ != nullptr) {
490 parser_->source_location_table_->AddImport(
491 descriptor, name, location_->span(0), location_->span(1));
492 }
493 }
494
CurrentPathSize() const495 int Parser::LocationRecorder::CurrentPathSize() const {
496 return location_->path_size();
497 }
498
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const499 void Parser::LocationRecorder::AttachComments(
500 std::string* leading, std::string* trailing,
501 std::vector<std::string>* detached_comments) const {
502 ABSL_CHECK(!location_->has_leading_comments());
503 ABSL_CHECK(!location_->has_trailing_comments());
504
505 if (!leading->empty()) {
506 location_->mutable_leading_comments()->swap(*leading);
507 }
508 if (!trailing->empty()) {
509 location_->mutable_trailing_comments()->swap(*trailing);
510 }
511 for (int i = 0; i < detached_comments->size(); ++i) {
512 location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
513 }
514 detached_comments->clear();
515 }
516
517 // -------------------------------------------------------------------
518
SkipStatement()519 void Parser::SkipStatement() {
520 while (true) {
521 if (AtEnd()) {
522 return;
523 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
524 if (TryConsumeEndOfDeclaration(";", nullptr)) {
525 return;
526 } else if (TryConsume("{")) {
527 SkipRestOfBlock();
528 return;
529 } else if (LookingAt("}")) {
530 return;
531 }
532 }
533 input_->Next();
534 }
535 }
536
SkipRestOfBlock()537 void Parser::SkipRestOfBlock() {
538 size_t block_count = 1;
539 while (true) {
540 if (AtEnd()) {
541 return;
542 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
543 if (TryConsumeEndOfDeclaration("}", nullptr)) {
544 if (--block_count == 0) break;
545 } else if (TryConsume("{")) {
546 ++block_count;
547 }
548 }
549 input_->Next();
550 }
551 }
552
553 // ===================================================================
554
ValidateMessage(const DescriptorProto * proto)555 bool Parser::ValidateMessage(const DescriptorProto* proto) {
556 for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
557 const UninterpretedOption& option =
558 proto->options().uninterpreted_option(i);
559 if (option.name_size() > 0 && !option.name(0).is_extension() &&
560 option.name(0).name_part() == "map_entry") {
561 int line = -1, col = 0; // indicates line and column not known
562 if (source_location_table_ != nullptr) {
563 source_location_table_->Find(
564 &option, DescriptorPool::ErrorCollector::OPTION_NAME, &line, &col);
565 }
566 RecordError(line, col,
567 "map_entry should not be set explicitly. "
568 "Use map<KeyType, ValueType> instead.");
569 return false;
570 }
571 }
572 return true;
573 }
574
ValidateEnum(const EnumDescriptorProto * proto)575 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
576 bool has_allow_alias = false;
577 bool allow_alias = false;
578
579 for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
580 const UninterpretedOption option = proto->options().uninterpreted_option(i);
581 if (option.name_size() > 1) {
582 continue;
583 }
584 if (!option.name(0).is_extension() &&
585 option.name(0).name_part() == "allow_alias") {
586 has_allow_alias = true;
587 if (option.identifier_value() == "true") {
588 allow_alias = true;
589 }
590 break;
591 }
592 }
593
594 if (has_allow_alias && !allow_alias) {
595 // This needlessly clutters declarations with nops.
596 RecordError([=] {
597 return absl::StrCat(
598 "\"", proto->name(),
599 "\" declares 'option allow_alias = false;' which has no effect. "
600 "Please remove the declaration.");
601 });
602 return false;
603 }
604
605 absl::flat_hash_set<int> used_values;
606 bool has_duplicates = false;
607 for (int i = 0; i < proto->value_size(); ++i) {
608 const EnumValueDescriptorProto& enum_value = proto->value(i);
609 if (used_values.find(enum_value.number()) != used_values.end()) {
610 has_duplicates = true;
611 break;
612 } else {
613 used_values.insert(enum_value.number());
614 }
615 }
616 if (allow_alias && !has_duplicates) {
617 // Generate an error if an enum declares support for duplicate enum values
618 // and does not use it protect future authors.
619 RecordError([=] {
620 return absl::StrCat(
621 "\"", proto->name(),
622 "\" declares support for enum aliases but no enum values share field "
623 "numbers. Please remove the unnecessary 'option allow_alias = true;' "
624 "declaration.");
625 });
626 return false;
627 }
628
629 // Enforce that enum constants must be UPPER_CASE except in case of
630 // enum_alias.
631 if (!allow_alias) {
632 for (const auto& enum_value : proto->value()) {
633 if (!IsUpperUnderscore(enum_value.name())) {
634 RecordWarning([&] {
635 return absl::StrCat(
636 "Enum constant should be in UPPER_CASE. Found: ",
637 enum_value.name(),
638 ". See "
639 "https://developers.google.com/protocol-buffers/docs/style");
640 });
641 }
642 }
643 }
644
645 return true;
646 }
647
Parse(io::Tokenizer * input,FileDescriptorProto * file)648 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
649 input_ = input;
650 had_errors_ = false;
651 syntax_identifier_.clear();
652
653 // Note that |file| could be NULL at this point if
654 // stop_after_syntax_identifier_ is true. So, we conservatively allocate
655 // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
656 // later on.
657 SourceCodeInfo source_code_info;
658 source_code_info_ = &source_code_info;
659
660 if (LookingAtType(io::Tokenizer::TYPE_START)) {
661 // Advance to first token.
662 input_->NextWithComments(nullptr, &upcoming_detached_comments_,
663 &upcoming_doc_comments_);
664 }
665
666 {
667 LocationRecorder root_location(this);
668 root_location.RecordLegacyLocation(file,
669 DescriptorPool::ErrorCollector::OTHER);
670
671 if (require_syntax_identifier_ || LookingAt("syntax") ||
672 LookingAt("edition")) {
673 if (!ParseSyntaxIdentifier(file, root_location)) {
674 // Don't attempt to parse the file if we didn't recognize the syntax
675 // identifier.
676 return false;
677 }
678 // Store the syntax into the file.
679 if (file != nullptr) {
680 file->set_syntax(syntax_identifier_);
681 if (syntax_identifier_ == "editions") {
682 file->set_edition(edition_);
683 }
684 }
685 } else if (!stop_after_syntax_identifier_) {
686 ABSL_LOG(WARNING) << "No syntax specified for the proto file: "
687 << file->name()
688 << ". Please use 'syntax = \"proto2\";' "
689 << "or 'syntax = \"proto3\";' to specify a syntax "
690 << "version. (Defaulted to proto2 syntax.)";
691 syntax_identifier_ = "proto2";
692 }
693
694 if (stop_after_syntax_identifier_) return !had_errors_;
695
696 // Repeatedly parse statements until we reach the end of the file.
697 while (!AtEnd()) {
698 if (!ParseTopLevelStatement(file, root_location)) {
699 // This statement failed to parse. Skip it, but keep looping to parse
700 // other statements.
701 SkipStatement();
702
703 if (LookingAt("}")) {
704 RecordError("Unmatched \"}\".");
705 input_->NextWithComments(nullptr, &upcoming_detached_comments_,
706 &upcoming_doc_comments_);
707 }
708 }
709 }
710 }
711
712 input_ = nullptr;
713 source_code_info_ = nullptr;
714 assert(file != nullptr);
715 source_code_info.Swap(file->mutable_source_code_info());
716 return !had_errors_;
717 }
718
ParseSyntaxIdentifier(const FileDescriptorProto * file,const LocationRecorder & parent)719 bool Parser::ParseSyntaxIdentifier(const FileDescriptorProto* file,
720 const LocationRecorder& parent) {
721 LocationRecorder syntax_location(parent,
722 FileDescriptorProto::kSyntaxFieldNumber);
723 syntax_location.RecordLegacyLocation(
724 file, DescriptorPool::ErrorCollector::EDITIONS);
725 bool has_edition = false;
726 if (TryConsume("edition")) {
727 has_edition = true;
728 } else {
729 DO(Consume("syntax",
730 "File must begin with a syntax statement, e.g. 'syntax = "
731 "\"proto2\";'."));
732 }
733
734 DO(Consume("="));
735 io::Tokenizer::Token syntax_token = input_->current();
736 std::string syntax;
737 DO(ConsumeString(&syntax, "Expected syntax identifier."));
738 DO(ConsumeEndOfDeclaration(";", &syntax_location));
739
740 if (has_edition) {
741 if (!Edition_Parse(absl::StrCat("EDITION_", syntax), &edition_) ||
742 edition_ == Edition::EDITION_PROTO2 ||
743 edition_ == Edition::EDITION_PROTO3 ||
744 edition_ == Edition::EDITION_UNKNOWN) {
745 RecordError(syntax_token.line, syntax_token.column, [&] {
746 return absl::StrCat("Unknown edition \"", syntax, "\".");
747 });
748 return false;
749 }
750 syntax_identifier_ = "editions";
751 return true;
752 }
753
754 syntax_identifier_ = syntax;
755 if (syntax != "proto2" && syntax != "proto3" &&
756 !stop_after_syntax_identifier_) {
757 RecordError(syntax_token.line, syntax_token.column, [&] {
758 return absl::StrCat("Unrecognized syntax identifier \"", syntax,
759 "\". This parser "
760 "only recognizes \"proto2\" and \"proto3\".");
761 });
762 return false;
763 }
764
765 return true;
766 }
767
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)768 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
769 const LocationRecorder& root_location) {
770 if (TryConsumeEndOfDeclaration(";", nullptr)) {
771 // empty statement; ignore
772 return true;
773 } else if (LookingAt("message")) {
774 LocationRecorder location(root_location,
775 FileDescriptorProto::kMessageTypeFieldNumber,
776 file->message_type_size());
777 // Maximum depth allowed by the DescriptorPool.
778 recursion_depth_ = internal::cpp::MaxMessageDeclarationNestingDepth();
779 return ParseMessageDefinition(file->add_message_type(), location, file);
780 } else if (LookingAt("enum")) {
781 LocationRecorder location(root_location,
782 FileDescriptorProto::kEnumTypeFieldNumber,
783 file->enum_type_size());
784 return ParseEnumDefinition(file->add_enum_type(), location, file);
785 } else if (LookingAt("service")) {
786 LocationRecorder location(root_location,
787 FileDescriptorProto::kServiceFieldNumber,
788 file->service_size());
789 return ParseServiceDefinition(file->add_service(), location, file);
790 } else if (LookingAt("extend")) {
791 LocationRecorder location(root_location,
792 FileDescriptorProto::kExtensionFieldNumber);
793 return ParseExtend(
794 file->mutable_extension(), file->mutable_message_type(), root_location,
795 FileDescriptorProto::kMessageTypeFieldNumber, location, file);
796 } else if (LookingAt("import")) {
797 return ParseImport(file->mutable_dependency(),
798 file->mutable_public_dependency(),
799 file->mutable_weak_dependency(), root_location, file);
800 } else if (LookingAt("package")) {
801 return ParsePackage(file, root_location, file);
802 } else if (LookingAt("option")) {
803 LocationRecorder location(root_location,
804 FileDescriptorProto::kOptionsFieldNumber);
805 return ParseOption(file->mutable_options(), location, file,
806 OPTION_STATEMENT);
807 } else {
808 RecordError("Expected top-level statement (e.g. \"message\").");
809 return false;
810 }
811 }
812
813 // -------------------------------------------------------------------
814 // Messages
815
GenerateSyntheticOneofs(DescriptorProto * message)816 PROTOBUF_NOINLINE static void GenerateSyntheticOneofs(
817 DescriptorProto* message) {
818 // Add synthetic one-field oneofs for optional fields, except messages which
819 // already have presence in proto3.
820 //
821 // We have to make sure the oneof names don't conflict with any other
822 // field or oneof.
823 absl::flat_hash_set<std::string> names;
824 for (const auto& field : message->field()) {
825 names.insert(field.name());
826 }
827 for (const auto& oneof : message->oneof_decl()) {
828 names.insert(oneof.name());
829 }
830
831 for (auto& field : *message->mutable_field()) {
832 if (field.proto3_optional()) {
833 std::string oneof_name = field.name();
834
835 // Prepend 'XXXXX_' until we are no longer conflicting.
836 // Avoid prepending a double-underscore because such names are
837 // reserved in C++.
838 if (oneof_name.empty() || oneof_name[0] != '_') {
839 oneof_name.insert(0, "_");
840 }
841 while (names.count(oneof_name) > 0) {
842 oneof_name.insert(0, "X");
843 }
844
845 names.insert(oneof_name);
846 field.set_oneof_index(message->oneof_decl_size());
847 OneofDescriptorProto* oneof = message->add_oneof_decl();
848 oneof->set_name(std::move(oneof_name));
849 }
850 }
851 }
852
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)853 bool Parser::ParseMessageDefinition(
854 DescriptorProto* message, const LocationRecorder& message_location,
855 const FileDescriptorProto* containing_file) {
856 const auto undo_depth = absl::MakeCleanup([&] { ++recursion_depth_; });
857 if (--recursion_depth_ <= 0) {
858 RecordError("Reached maximum recursion limit for nested messages.");
859 return false;
860 }
861
862 DO(Consume("message"));
863 {
864 LocationRecorder location(message_location,
865 DescriptorProto::kNameFieldNumber);
866 location.RecordLegacyLocation(message,
867 DescriptorPool::ErrorCollector::NAME);
868 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
869 if (!IsUpperCamelCase(message->name())) {
870 RecordWarning([=] {
871 return absl::StrCat(
872 "Message name should be in UpperCamelCase. Found: ",
873 message->name(),
874 ". See https://developers.google.com/protocol-buffers/docs/style");
875 });
876 }
877 }
878 DO(ParseMessageBlock(message, message_location, containing_file));
879
880 if (syntax_identifier_ == "proto3") {
881 GenerateSyntheticOneofs(message);
882 }
883
884 return true;
885 }
886
887 namespace {
888
889 const int kMaxRangeSentinel = -1;
890
IsMessageSetWireFormatMessage(const DescriptorProto & message)891 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
892 const MessageOptions& options = message.options();
893 for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
894 const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
895 if (uninterpreted.name_size() == 1 &&
896 !uninterpreted.name(0).is_extension() &&
897 uninterpreted.name(0).name_part() == "message_set_wire_format" &&
898 uninterpreted.identifier_value() == "true") {
899 return true;
900 }
901 }
902 return false;
903 }
904
905 // Modifies any extension ranges that specified 'max' as the end of the
906 // extension range, and sets them to the type-specific maximum. The actual max
907 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)908 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
909 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
910 const int max_extension_number = is_message_set
911 ? std::numeric_limits<int32_t>::max()
912 : FieldDescriptor::kMaxNumber + 1;
913 for (int i = 0; i < message->extension_range_size(); ++i) {
914 if (message->extension_range(i).end() == kMaxRangeSentinel) {
915 message->mutable_extension_range(i)->set_end(max_extension_number);
916 }
917 }
918 }
919
920 // Modifies any reserved ranges that specified 'max' as the end of the
921 // reserved range, and sets them to the type-specific maximum. The actual max
922 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)923 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
924 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
925 const int max_field_number = is_message_set
926 ? std::numeric_limits<int32_t>::max()
927 : FieldDescriptor::kMaxNumber + 1;
928 for (int i = 0; i < message->reserved_range_size(); ++i) {
929 if (message->reserved_range(i).end() == kMaxRangeSentinel) {
930 message->mutable_reserved_range(i)->set_end(max_field_number);
931 }
932 }
933 }
934
935 } // namespace
936
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)937 bool Parser::ParseMessageBlock(DescriptorProto* message,
938 const LocationRecorder& message_location,
939 const FileDescriptorProto* containing_file) {
940 DO(ConsumeEndOfDeclaration("{", &message_location));
941
942 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
943 if (AtEnd()) {
944 RecordError("Reached end of input in message definition (missing '}').");
945 return false;
946 }
947
948 if (!ParseMessageStatement(message, message_location, containing_file)) {
949 // This statement failed to parse. Skip it, but keep looping to parse
950 // other statements.
951 SkipStatement();
952 }
953 }
954
955 if (message->extension_range_size() > 0) {
956 AdjustExtensionRangesWithMaxEndNumber(message);
957 }
958 if (message->reserved_range_size() > 0) {
959 AdjustReservedRangesWithMaxEndNumber(message);
960 }
961
962 DO(ValidateMessage(message));
963
964 return true;
965 }
966
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)967 bool Parser::ParseMessageStatement(DescriptorProto* message,
968 const LocationRecorder& message_location,
969 const FileDescriptorProto* containing_file) {
970 if (TryConsumeEndOfDeclaration(";", nullptr)) {
971 // empty statement; ignore
972 return true;
973 } else if (LookingAt("message")) {
974 LocationRecorder location(message_location,
975 DescriptorProto::kNestedTypeFieldNumber,
976 message->nested_type_size());
977 return ParseMessageDefinition(message->add_nested_type(), location,
978 containing_file);
979 } else if (LookingAt("enum")) {
980 LocationRecorder location(message_location,
981 DescriptorProto::kEnumTypeFieldNumber,
982 message->enum_type_size());
983 return ParseEnumDefinition(message->add_enum_type(), location,
984 containing_file);
985 } else if (LookingAt("extensions")) {
986 LocationRecorder location(message_location,
987 DescriptorProto::kExtensionRangeFieldNumber);
988 return ParseExtensions(message, location, containing_file);
989 } else if (LookingAt("reserved")) {
990 return ParseReserved(message, message_location);
991 } else if (LookingAt("extend")) {
992 LocationRecorder location(message_location,
993 DescriptorProto::kExtensionFieldNumber);
994 return ParseExtend(message->mutable_extension(),
995 message->mutable_nested_type(), message_location,
996 DescriptorProto::kNestedTypeFieldNumber, location,
997 containing_file);
998 } else if (LookingAt("option")) {
999 LocationRecorder location(message_location,
1000 DescriptorProto::kOptionsFieldNumber);
1001 return ParseOption(message->mutable_options(), location, containing_file,
1002 OPTION_STATEMENT);
1003 } else if (LookingAt("oneof")) {
1004 int oneof_index = message->oneof_decl_size();
1005 LocationRecorder oneof_location(
1006 message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
1007
1008 return ParseOneof(message->add_oneof_decl(), message, oneof_index,
1009 oneof_location, message_location, containing_file);
1010 } else {
1011 LocationRecorder location(message_location,
1012 DescriptorProto::kFieldFieldNumber,
1013 message->field_size());
1014 return ParseMessageField(
1015 message->add_field(), message->mutable_nested_type(), message_location,
1016 DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
1017 }
1018 }
1019
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1020 bool Parser::ParseMessageField(FieldDescriptorProto* field,
1021 RepeatedPtrField<DescriptorProto>* messages,
1022 const LocationRecorder& parent_location,
1023 int location_field_number_for_nested_type,
1024 const LocationRecorder& field_location,
1025 const FileDescriptorProto* containing_file) {
1026 {
1027 FieldDescriptorProto::Label label;
1028 if (ParseLabel(&label, field_location)) {
1029 field->set_label(label);
1030 if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
1031 syntax_identifier_ == "proto3") {
1032 field->set_proto3_optional(true);
1033 }
1034 }
1035 }
1036
1037 return ParseMessageFieldNoLabel(field, messages, parent_location,
1038 location_field_number_for_nested_type,
1039 field_location, containing_file);
1040 }
1041
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1042 bool Parser::ParseMessageFieldNoLabel(
1043 FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
1044 const LocationRecorder& parent_location,
1045 int location_field_number_for_nested_type,
1046 const LocationRecorder& field_location,
1047 const FileDescriptorProto* containing_file) {
1048 MapField map_field;
1049 // Parse type.
1050 {
1051 LocationRecorder location(field_location); // add path later
1052 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
1053
1054 bool type_parsed = false;
1055 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
1056 std::string type_name;
1057
1058 // Special case map field. We only treat the field as a map field if the
1059 // field type name starts with the word "map" with a following "<".
1060 if (TryConsume("map")) {
1061 if (LookingAt("<")) {
1062 map_field.is_map_field = true;
1063 DO(ParseMapType(&map_field, field, location));
1064 } else {
1065 // False positive
1066 type_parsed = true;
1067 type_name = "map";
1068 }
1069 }
1070 if (!map_field.is_map_field) {
1071 // Handle the case where no explicit label is given for a non-map field.
1072 if (!field->has_label() && DefaultToOptionalFields()) {
1073 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1074 }
1075 if (!field->has_label()) {
1076 RecordError("Expected \"required\", \"optional\", or \"repeated\".");
1077 // We can actually reasonably recover here by just assuming the user
1078 // forgot the label altogether.
1079 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1080 }
1081
1082 // Handle the case where the actual type is a message or enum named
1083 // "map", which we already consumed in the code above.
1084 if (!type_parsed) {
1085 DO(ParseType(&type, &type_name));
1086 }
1087 if (type_name.empty()) {
1088 location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
1089 field->set_type(type);
1090 } else {
1091 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1092 field->set_type_name(type_name);
1093 }
1094 }
1095 }
1096
1097 // Parse name and '='.
1098 io::Tokenizer::Token name_token = input_->current();
1099 {
1100 LocationRecorder location(field_location,
1101 FieldDescriptorProto::kNameFieldNumber);
1102 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1103 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1104
1105 if (!IsLowerUnderscore(field->name())) {
1106 RecordWarning([=] {
1107 return absl::StrCat(
1108 "Field name should be lowercase. Found: ", field->name(),
1109 ". See: https://developers.google.com/protocol-buffers/docs/style");
1110 });
1111 }
1112 if (IsNumberFollowUnderscore(field->name())) {
1113 RecordWarning([=] {
1114 return absl::StrCat(
1115 "Number should not come right after an underscore. Found: ",
1116 field->name(),
1117 ". See: https://developers.google.com/protocol-buffers/docs/style");
1118 });
1119 }
1120 }
1121 DO(Consume("=", "Missing field number."));
1122
1123 // Parse field number.
1124 {
1125 LocationRecorder location(field_location,
1126 FieldDescriptorProto::kNumberFieldNumber);
1127 location.RecordLegacyLocation(field,
1128 DescriptorPool::ErrorCollector::NUMBER);
1129 int number;
1130 DO(ConsumeInteger(&number, "Expected field number."));
1131 field->set_number(number);
1132 }
1133
1134 // Parse options.
1135 DO(ParseFieldOptions(field, field_location, containing_file));
1136
1137 // Deal with groups.
1138 if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1139 // Awkward: Since a group declares both a message type and a field, we
1140 // have to create overlapping locations.
1141 LocationRecorder group_location(parent_location);
1142 group_location.StartAt(field_location);
1143 group_location.AddPath(location_field_number_for_nested_type);
1144 group_location.AddPath(messages->size());
1145
1146 DescriptorProto* group = messages->Add();
1147 group->set_name(field->name());
1148
1149 // Record name location to match the field name's location.
1150 {
1151 LocationRecorder location(group_location,
1152 DescriptorProto::kNameFieldNumber);
1153 location.StartAt(name_token);
1154 location.EndAt(name_token);
1155 location.RecordLegacyLocation(group,
1156 DescriptorPool::ErrorCollector::NAME);
1157 }
1158
1159 // The field's type_name also comes from the name. Confusing!
1160 {
1161 LocationRecorder location(field_location,
1162 FieldDescriptorProto::kTypeNameFieldNumber);
1163 location.StartAt(name_token);
1164 location.EndAt(name_token);
1165 }
1166
1167 // As a hack for backwards-compatibility, we force the group name to start
1168 // with a capital letter and lower-case the field name. New code should
1169 // not use groups; it should use nested messages.
1170 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1171 RecordError(name_token.line, name_token.column,
1172 "Group names must start with a capital letter.");
1173 }
1174 absl::AsciiStrToLower(field->mutable_name());
1175
1176 field->set_type_name(group->name());
1177 if (LookingAt("{")) {
1178 DO(ParseMessageBlock(group, group_location, containing_file));
1179 } else {
1180 RecordError("Missing group body.");
1181 return false;
1182 }
1183 } else {
1184 DO(ConsumeEndOfDeclaration(";", &field_location));
1185 }
1186
1187 // Create a map entry type if this is a map field.
1188 if (map_field.is_map_field) {
1189 GenerateMapEntry(map_field, field, messages);
1190 }
1191
1192 return true;
1193 }
1194
ParseMapType(MapField * map_field,FieldDescriptorProto * field,LocationRecorder & type_name_location)1195 bool Parser::ParseMapType(MapField* map_field, FieldDescriptorProto* field,
1196 LocationRecorder& type_name_location) {
1197 if (field->has_oneof_index()) {
1198 RecordError("Map fields are not allowed in oneofs.");
1199 return false;
1200 }
1201 if (field->has_label()) {
1202 RecordError(
1203 "Field labels (required/optional/repeated) are not allowed on "
1204 "map fields.");
1205 return false;
1206 }
1207 if (field->has_extendee()) {
1208 RecordError("Map fields are not allowed to be extensions.");
1209 return false;
1210 }
1211 field->set_label(FieldDescriptorProto::LABEL_REPEATED);
1212 DO(Consume("<"));
1213 DO(ParseType(&map_field->key_type, &map_field->key_type_name));
1214 DO(Consume(","));
1215 DO(ParseType(&map_field->value_type, &map_field->value_type_name));
1216 DO(Consume(">"));
1217 // Defer setting of the type name of the map field until the
1218 // field name is parsed. Add the source location though.
1219 type_name_location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
1220 return true;
1221 }
1222
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1223 void Parser::GenerateMapEntry(const MapField& map_field,
1224 FieldDescriptorProto* field,
1225 RepeatedPtrField<DescriptorProto>* messages) {
1226 DescriptorProto* entry = messages->Add();
1227 std::string entry_name = MapEntryName(field->name());
1228 field->set_type_name(entry_name);
1229 entry->set_name(entry_name);
1230 entry->mutable_options()->set_map_entry(true);
1231 FieldDescriptorProto* key_field = entry->add_field();
1232 key_field->set_name("key");
1233 key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1234 key_field->set_number(1);
1235 if (map_field.key_type_name.empty()) {
1236 key_field->set_type(map_field.key_type);
1237 } else {
1238 key_field->set_type_name(map_field.key_type_name);
1239 }
1240 FieldDescriptorProto* value_field = entry->add_field();
1241 value_field->set_name("value");
1242 value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1243 value_field->set_number(2);
1244 if (map_field.value_type_name.empty()) {
1245 value_field->set_type(map_field.value_type);
1246 } else {
1247 value_field->set_type_name(map_field.value_type_name);
1248 }
1249 // Propagate all features to the generated key and value fields. This helps
1250 // simplify the implementation of code generators and also reflection-based
1251 // parsing code. Instead of having to implement complex inheritance rules
1252 // special-casing maps, we can just copy them at generation time.
1253 //
1254 // The following definition:
1255 // message Foo {
1256 // map<string, string> value = 1 [features.some_feature = VALUE];
1257 // }
1258 // will be interpreted as:
1259 // message Foo {
1260 // message ValueEntry {
1261 // option map_entry = true;
1262 // string key = 1 [features.some_feature = VALUE];
1263 // string value = 2 [features.some_feature = VALUE];
1264 // }
1265 // repeated ValueEntry value = 1 [features.some_feature = VALUE];
1266 // }
1267 for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1268 const UninterpretedOption& option =
1269 field->options().uninterpreted_option(i);
1270 // Legacy handling for the `enforce_utf8` option, which bears a striking
1271 // similarity to features in many respects.
1272 // TODO Delete this once proto2/proto3 have been turned down.
1273 if (option.name_size() == 1 &&
1274 option.name(0).name_part() == "enforce_utf8" &&
1275 !option.name(0).is_extension()) {
1276 if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1277 *key_field->mutable_options()->add_uninterpreted_option() = option;
1278 }
1279 if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1280 *value_field->mutable_options()->add_uninterpreted_option() = option;
1281 }
1282 }
1283 if (option.name(0).name_part() == "features" &&
1284 !option.name(0).is_extension()) {
1285 *key_field->mutable_options()->add_uninterpreted_option() = option;
1286 *value_field->mutable_options()->add_uninterpreted_option() = option;
1287 }
1288 }
1289 }
1290
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1291 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1292 const LocationRecorder& field_location,
1293 const FileDescriptorProto* containing_file) {
1294 if (!LookingAt("[")) return true;
1295
1296 LocationRecorder location(field_location,
1297 FieldDescriptorProto::kOptionsFieldNumber);
1298
1299 DO(Consume("["));
1300
1301 // Parse field options.
1302 do {
1303 if (LookingAt("default")) {
1304 // We intentionally pass field_location rather than location here, since
1305 // the default value is not actually an option.
1306 DO(ParseDefaultAssignment(field, field_location, containing_file));
1307 } else if (LookingAt("json_name")) {
1308 // Like default value, this "json_name" is not an actual option.
1309 DO(ParseJsonName(field, field_location, containing_file));
1310 } else {
1311 DO(ParseOption(field->mutable_options(), location, containing_file,
1312 OPTION_ASSIGNMENT));
1313 }
1314 } while (TryConsume(","));
1315
1316 DO(Consume("]"));
1317 return true;
1318 }
1319
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1320 bool Parser::ParseDefaultAssignment(
1321 FieldDescriptorProto* field, const LocationRecorder& field_location,
1322 const FileDescriptorProto* containing_file) {
1323 if (field->has_default_value()) {
1324 RecordError("Already set option \"default\".");
1325 field->clear_default_value();
1326 }
1327
1328 DO(Consume("default"));
1329 DO(Consume("="));
1330
1331 LocationRecorder location(field_location,
1332 FieldDescriptorProto::kDefaultValueFieldNumber);
1333 location.RecordLegacyLocation(field,
1334 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1335 std::string* default_value = field->mutable_default_value();
1336
1337 if (!field->has_type()) {
1338 // The field has a type name, but we don't know if it is a message or an
1339 // enum yet. (If it were a primitive type, |field| would have a type set
1340 // already.) In this case, simply take the current string as the default
1341 // value; we will catch the error later if it is not a valid enum value.
1342 // (N.B. that we do not check whether the current token is an identifier:
1343 // doing so throws strange errors when the user mistypes a primitive
1344 // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1345 // = 42]". In such a case the fundamental error is really that "int" is not
1346 // a type, not that "42" is not an identifier. See b/12533582.)
1347 *default_value = input_->current().text;
1348 input_->Next();
1349 return true;
1350 }
1351
1352 switch (field->type()) {
1353 case FieldDescriptorProto::TYPE_INT32:
1354 case FieldDescriptorProto::TYPE_INT64:
1355 case FieldDescriptorProto::TYPE_SINT32:
1356 case FieldDescriptorProto::TYPE_SINT64:
1357 case FieldDescriptorProto::TYPE_SFIXED32:
1358 case FieldDescriptorProto::TYPE_SFIXED64: {
1359 uint64_t max_value = std::numeric_limits<int64_t>::max();
1360 if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1361 field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1362 field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1363 max_value = std::numeric_limits<int32_t>::max();
1364 }
1365
1366 // These types can be negative.
1367 if (TryConsume("-")) {
1368 default_value->append("-");
1369 // Two's complement always has one more negative value than positive.
1370 ++max_value;
1371 }
1372 // Parse the integer to verify that it is not out-of-range.
1373 uint64_t value;
1374 DO(ConsumeInteger64(max_value, &value,
1375 "Expected integer for field default value."));
1376 // And stringify it again.
1377 default_value->append(absl::StrCat(value));
1378 break;
1379 }
1380
1381 case FieldDescriptorProto::TYPE_UINT32:
1382 case FieldDescriptorProto::TYPE_UINT64:
1383 case FieldDescriptorProto::TYPE_FIXED32:
1384 case FieldDescriptorProto::TYPE_FIXED64: {
1385 uint64_t max_value = std::numeric_limits<uint64_t>::max();
1386 if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1387 field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1388 max_value = std::numeric_limits<uint32_t>::max();
1389 }
1390
1391 // Numeric, not negative.
1392 if (TryConsume("-")) {
1393 RecordError("Unsigned field can't have negative default value.");
1394 }
1395 // Parse the integer to verify that it is not out-of-range.
1396 uint64_t value;
1397 DO(ConsumeInteger64(max_value, &value,
1398 "Expected integer for field default value."));
1399 // And stringify it again.
1400 default_value->append(absl::StrCat(value));
1401 break;
1402 }
1403
1404 case FieldDescriptorProto::TYPE_FLOAT:
1405 case FieldDescriptorProto::TYPE_DOUBLE: {
1406 // These types can be negative.
1407 if (TryConsume("-")) {
1408 default_value->append("-");
1409 }
1410 // Parse the integer because we have to convert hex integers to decimal
1411 // floats.
1412 double value = 0.0;
1413 DO(ConsumeNumber(&value, "Expected number."));
1414 // And stringify it again.
1415 default_value->append(io::SimpleDtoa(value));
1416 break;
1417 }
1418 case FieldDescriptorProto::TYPE_BOOL:
1419 if (TryConsume("true")) {
1420 default_value->assign("true");
1421 } else if (TryConsume("false")) {
1422 default_value->assign("false");
1423 } else {
1424 RecordError("Expected \"true\" or \"false\".");
1425 return false;
1426 }
1427 break;
1428
1429 case FieldDescriptorProto::TYPE_STRING:
1430 // Note: When file option java_string_check_utf8 is true, if a
1431 // non-string representation (eg byte[]) is later supported, it must
1432 // be checked for UTF-8-ness.
1433 DO(ConsumeString(default_value,
1434 "Expected string for field default "
1435 "value."));
1436 break;
1437
1438 case FieldDescriptorProto::TYPE_BYTES:
1439 DO(ConsumeString(default_value, "Expected string."));
1440 *default_value = absl::CEscape(*default_value);
1441 break;
1442
1443 case FieldDescriptorProto::TYPE_ENUM:
1444 DO(ConsumeIdentifier(default_value,
1445 "Expected enum identifier for field "
1446 "default value."));
1447 break;
1448
1449 case FieldDescriptorProto::TYPE_MESSAGE:
1450 case FieldDescriptorProto::TYPE_GROUP:
1451 RecordError("Messages can't have default values.");
1452 return false;
1453 }
1454
1455 return true;
1456 }
1457
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1458 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1459 const LocationRecorder& field_location,
1460 const FileDescriptorProto* containing_file) {
1461 if (field->has_json_name()) {
1462 RecordError("Already set option \"json_name\".");
1463 field->clear_json_name();
1464 }
1465
1466 LocationRecorder location(field_location,
1467 FieldDescriptorProto::kJsonNameFieldNumber);
1468 location.RecordLegacyLocation(field,
1469 DescriptorPool::ErrorCollector::OPTION_NAME);
1470
1471 DO(Consume("json_name"));
1472 DO(Consume("="));
1473
1474 LocationRecorder value_location(location);
1475 value_location.RecordLegacyLocation(
1476 field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1477
1478 DO(ConsumeString(field->mutable_json_name(),
1479 "Expected string for JSON name."));
1480 return true;
1481 }
1482
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1483 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1484 const LocationRecorder& part_location,
1485 const FileDescriptorProto* containing_file) {
1486 UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1487 std::string identifier; // We parse identifiers into this string.
1488 if (LookingAt("(")) { // This is an extension.
1489 DO(Consume("("));
1490
1491 {
1492 LocationRecorder location(
1493 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1494 // An extension name consists of dot-separated identifiers, and may begin
1495 // with a dot.
1496 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1497 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1498 name->mutable_name_part()->append(identifier);
1499 }
1500 while (LookingAt(".")) {
1501 DO(Consume("."));
1502 name->mutable_name_part()->append(".");
1503 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1504 name->mutable_name_part()->append(identifier);
1505 }
1506 }
1507
1508 DO(Consume(")"));
1509 name->set_is_extension(true);
1510 } else { // This is a regular field.
1511 LocationRecorder location(
1512 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1513 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1514 name->mutable_name_part()->append(identifier);
1515 name->set_is_extension(false);
1516 }
1517 return true;
1518 }
1519
ParseUninterpretedBlock(std::string * value)1520 bool Parser::ParseUninterpretedBlock(std::string* value) {
1521 // Note that enclosing braces are not added to *value.
1522 // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1523 // an expression, not a block of statements.
1524 DO(Consume("{"));
1525 int brace_depth = 1;
1526 while (!AtEnd()) {
1527 if (LookingAt("{")) {
1528 brace_depth++;
1529 } else if (LookingAt("}")) {
1530 brace_depth--;
1531 if (brace_depth == 0) {
1532 input_->Next();
1533 return true;
1534 }
1535 }
1536 // TODO: Interpret line/column numbers to preserve formatting
1537 if (!value->empty()) value->push_back(' ');
1538 value->append(input_->current().text);
1539 input_->Next();
1540 }
1541 RecordError("Unexpected end of stream while parsing aggregate value.");
1542 return false;
1543 }
1544
1545 // We don't interpret the option here. Instead we store it in an
1546 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1547 bool Parser::ParseOption(Message* options,
1548 const LocationRecorder& options_location,
1549 const FileDescriptorProto* containing_file,
1550 OptionStyle style) {
1551 // Create an entry in the uninterpreted_option field.
1552 const FieldDescriptor* uninterpreted_option_field =
1553 options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1554 ABSL_CHECK(uninterpreted_option_field != nullptr)
1555 << "No field named \"uninterpreted_option\" in the Options proto.";
1556
1557 const Reflection* reflection = options->GetReflection();
1558
1559 LocationRecorder location(
1560 options_location, uninterpreted_option_field->number(),
1561 reflection->FieldSize(*options, uninterpreted_option_field));
1562
1563 if (style == OPTION_STATEMENT) {
1564 DO(Consume("option"));
1565 }
1566
1567 UninterpretedOption* uninterpreted_option =
1568 DownCastMessage<UninterpretedOption>(options->GetReflection()->AddMessage(
1569 options, uninterpreted_option_field));
1570
1571 // Parse dot-separated name.
1572 {
1573 LocationRecorder name_location(location,
1574 UninterpretedOption::kNameFieldNumber);
1575 name_location.RecordLegacyLocation(
1576 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1577
1578 {
1579 LocationRecorder part_location(name_location,
1580 uninterpreted_option->name_size());
1581 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1582 containing_file));
1583 }
1584
1585 while (LookingAt(".")) {
1586 DO(Consume("."));
1587 LocationRecorder part_location(name_location,
1588 uninterpreted_option->name_size());
1589 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1590 containing_file));
1591 }
1592 }
1593
1594 DO(Consume("="));
1595
1596 {
1597 LocationRecorder value_location(location);
1598 value_location.RecordLegacyLocation(
1599 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1600
1601 // All values are a single token, except for negative numbers, which consist
1602 // of a single '-' symbol, followed by a positive number.
1603 bool is_negative = TryConsume("-");
1604
1605 switch (input_->current().type) {
1606 case io::Tokenizer::TYPE_START:
1607 ABSL_LOG(FATAL)
1608 << "Trying to read value before any tokens have been read.";
1609 return false;
1610
1611 case io::Tokenizer::TYPE_END:
1612 RecordError("Unexpected end of stream while parsing option value.");
1613 return false;
1614
1615 case io::Tokenizer::TYPE_WHITESPACE:
1616 case io::Tokenizer::TYPE_NEWLINE:
1617 ABSL_CHECK(!input_->report_whitespace() && !input_->report_newlines())
1618 << "Whitespace tokens were not requested.";
1619 ABSL_LOG(FATAL) << "Tokenizer reported whitespace.";
1620 return false;
1621
1622 case io::Tokenizer::TYPE_IDENTIFIER: {
1623 value_location.AddPath(
1624 UninterpretedOption::kIdentifierValueFieldNumber);
1625 std::string value;
1626 DO(ConsumeIdentifier(&value, "Expected identifier."));
1627 if (is_negative) {
1628 if (value == "inf") {
1629 uninterpreted_option->set_double_value(
1630 -std::numeric_limits<double>::infinity());
1631 } else if (value == "nan") {
1632 uninterpreted_option->set_double_value(
1633 std::numeric_limits<double>::quiet_NaN());
1634 } else {
1635 RecordError("Identifier after '-' symbol must be inf or nan.");
1636 return false;
1637 }
1638 break;
1639 }
1640 uninterpreted_option->set_identifier_value(value);
1641 break;
1642 }
1643
1644 case io::Tokenizer::TYPE_INTEGER: {
1645 uint64_t value;
1646 uint64_t max_value =
1647 is_negative
1648 ? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
1649 : std::numeric_limits<uint64_t>::max();
1650 if (TryConsumeInteger64(max_value, &value)) {
1651 if (is_negative) {
1652 value_location.AddPath(
1653 UninterpretedOption::kNegativeIntValueFieldNumber);
1654 uninterpreted_option->set_negative_int_value(
1655 static_cast<int64_t>(0 - value));
1656 } else {
1657 value_location.AddPath(
1658 UninterpretedOption::kPositiveIntValueFieldNumber);
1659 uninterpreted_option->set_positive_int_value(value);
1660 }
1661 break;
1662 }
1663 // value too large for an integer; fall through below to treat as
1664 // floating point
1665 ABSL_FALLTHROUGH_INTENDED;
1666 }
1667
1668 case io::Tokenizer::TYPE_FLOAT: {
1669 value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1670 double value = 0.0;
1671 DO(ConsumeNumber(&value, "Expected number."));
1672 uninterpreted_option->set_double_value(is_negative ? -value : value);
1673 break;
1674 }
1675
1676 case io::Tokenizer::TYPE_STRING: {
1677 value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1678 if (is_negative) {
1679 RecordError("Invalid '-' symbol before string.");
1680 return false;
1681 }
1682 std::string value;
1683 DO(ConsumeString(&value, "Expected string."));
1684 uninterpreted_option->set_string_value(value);
1685 break;
1686 }
1687
1688 case io::Tokenizer::TYPE_SYMBOL:
1689 if (LookingAt("{")) {
1690 value_location.AddPath(
1691 UninterpretedOption::kAggregateValueFieldNumber);
1692 DO(ParseUninterpretedBlock(
1693 uninterpreted_option->mutable_aggregate_value()));
1694 } else {
1695 RecordError("Expected option value.");
1696 return false;
1697 }
1698 break;
1699 }
1700 }
1701
1702 if (style == OPTION_STATEMENT) {
1703 DO(ConsumeEndOfDeclaration(";", &location));
1704 }
1705
1706 return true;
1707 }
1708
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1709 bool Parser::ParseExtensions(DescriptorProto* message,
1710 const LocationRecorder& extensions_location,
1711 const FileDescriptorProto* containing_file) {
1712 // Parse the declaration.
1713 DO(Consume("extensions"));
1714
1715 int old_range_size = message->extension_range_size();
1716
1717 do {
1718 // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1719 LocationRecorder location(extensions_location,
1720 message->extension_range_size());
1721
1722 DescriptorProto::ExtensionRange* range = message->add_extension_range();
1723 location.RecordLegacyLocation(range,
1724 DescriptorPool::ErrorCollector::NUMBER);
1725
1726 int start, end;
1727 io::Tokenizer::Token start_token;
1728
1729 {
1730 LocationRecorder start_location(
1731 location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1732 start_token = input_->current();
1733 DO(ConsumeInteger(&start, "Expected field number range."));
1734
1735 if (start == std::numeric_limits<int>::max()) {
1736 RecordError("Field number out of bounds.");
1737 return false;
1738 }
1739 }
1740
1741 if (TryConsume("to")) {
1742 LocationRecorder end_location(
1743 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1744 if (TryConsume("max")) {
1745 // Set to the sentinel value - 1 since we increment the value below.
1746 // The actual value of the end of the range should be set with
1747 // AdjustExtensionRangesWithMaxEndNumber.
1748 end = kMaxRangeSentinel - 1;
1749 } else {
1750 DO(ConsumeInteger(&end, "Expected integer."));
1751
1752 if (end == std::numeric_limits<int>::max()) {
1753 RecordError("Field number out of bounds.");
1754 return false;
1755 }
1756 }
1757 } else {
1758 LocationRecorder end_location(
1759 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1760 end_location.StartAt(start_token);
1761 end_location.EndAt(start_token);
1762 end = start;
1763 }
1764
1765 // Users like to specify inclusive ranges, but in code we like the end
1766 // number to be exclusive.
1767 ++end;
1768
1769 range->set_start(start);
1770 range->set_end(end);
1771 } while (TryConsume(","));
1772
1773 if (LookingAt("[")) {
1774 int range_number_index = extensions_location.CurrentPathSize();
1775 SourceCodeInfo info;
1776
1777 // Parse extension range options in the first range.
1778 ExtensionRangeOptions* options =
1779 message->mutable_extension_range(old_range_size)->mutable_options();
1780
1781 {
1782 LocationRecorder index_location(
1783 extensions_location, 0 /* we fill this in w/ actual index below */,
1784 &info);
1785 LocationRecorder location(
1786 index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1787 DO(Consume("["));
1788
1789 do {
1790 DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1791 } while (TryConsume(","));
1792
1793 DO(Consume("]"));
1794 }
1795
1796 // Then copy the extension range options to all of the other ranges we've
1797 // parsed.
1798 for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1799 *message->mutable_extension_range(i)->mutable_options() = *options;
1800 }
1801 // and copy source locations to the other ranges, too
1802 for (int i = old_range_size; i < message->extension_range_size(); i++) {
1803 for (int j = 0; j < info.location_size(); j++) {
1804 if (info.location(j).path_size() == range_number_index + 1) {
1805 // this location's path is up to the extension range index, but
1806 // doesn't include options; so it's redundant with location above
1807 continue;
1808 }
1809 SourceCodeInfo_Location* dest = source_code_info_->add_location();
1810 *dest = info.location(j);
1811 dest->set_path(range_number_index, i);
1812 }
1813 }
1814 }
1815
1816 DO(ConsumeEndOfDeclaration(";", &extensions_location));
1817 return true;
1818 }
1819
1820 // This is similar to extension range parsing, except that it accepts field
1821 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1822 bool Parser::ParseReserved(DescriptorProto* message,
1823 const LocationRecorder& message_location) {
1824 io::Tokenizer::Token start_token = input_->current();
1825 // Parse the declaration.
1826 DO(Consume("reserved"));
1827 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1828 if (syntax_identifier_ == "editions") {
1829 RecordError(
1830 "Reserved names must be identifiers in editions, not string "
1831 "literals.");
1832 return false;
1833 }
1834 LocationRecorder location(message_location,
1835 DescriptorProto::kReservedNameFieldNumber);
1836 location.StartAt(start_token);
1837 return ParseReservedNames(message, location);
1838 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1839 if (syntax_identifier_ != "editions") {
1840 RecordError(
1841 "Reserved names must be string literals. (Only editions supports "
1842 "identifiers.)");
1843 return false;
1844 }
1845 LocationRecorder location(message_location,
1846 DescriptorProto::kReservedNameFieldNumber);
1847 location.StartAt(start_token);
1848 return ParseReservedIdentifiers(message, location);
1849 } else {
1850 LocationRecorder location(message_location,
1851 DescriptorProto::kReservedRangeFieldNumber);
1852 location.StartAt(start_token);
1853 return ParseReservedNumbers(message, location);
1854 }
1855 }
1856
ParseReservedName(std::string * name,ErrorMaker error_message)1857 bool Parser::ParseReservedName(std::string* name, ErrorMaker error_message) {
1858 // Capture the position of the token, in case we have to report an
1859 // error after it is consumed.
1860 int line = input_->current().line;
1861 int col = input_->current().column;
1862 DO(ConsumeString(name, error_message));
1863 if (!io::Tokenizer::IsIdentifier(*name)) {
1864 RecordWarning(line, col, [=] {
1865 return absl::StrFormat("Reserved name \"%s\" is not a valid identifier.",
1866 *name);
1867 });
1868 }
1869 return true;
1870 }
1871
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1872 bool Parser::ParseReservedNames(DescriptorProto* message,
1873 const LocationRecorder& parent_location) {
1874 do {
1875 LocationRecorder location(parent_location, message->reserved_name_size());
1876 DO(ParseReservedName(message->add_reserved_name(),
1877 "Expected field name string literal."));
1878 } while (TryConsume(","));
1879 DO(ConsumeEndOfDeclaration(";", &parent_location));
1880 return true;
1881 }
1882
ParseReservedIdentifier(std::string * name,ErrorMaker error_message)1883 bool Parser::ParseReservedIdentifier(std::string* name,
1884 ErrorMaker error_message) {
1885 DO(ConsumeIdentifier(name, error_message));
1886 return true;
1887 }
1888
ParseReservedIdentifiers(DescriptorProto * message,const LocationRecorder & parent_location)1889 bool Parser::ParseReservedIdentifiers(DescriptorProto* message,
1890 const LocationRecorder& parent_location) {
1891 do {
1892 LocationRecorder location(parent_location, message->reserved_name_size());
1893 DO(ParseReservedIdentifier(message->add_reserved_name(),
1894 "Expected field name identifier."));
1895 } while (TryConsume(","));
1896 DO(ConsumeEndOfDeclaration(";", &parent_location));
1897 return true;
1898 }
1899
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1900 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1901 const LocationRecorder& parent_location) {
1902 bool first = true;
1903 do {
1904 LocationRecorder location(parent_location, message->reserved_range_size());
1905
1906 DescriptorProto::ReservedRange* range = message->add_reserved_range();
1907 location.RecordLegacyLocation(range,
1908 DescriptorPool::ErrorCollector::NUMBER);
1909 int start, end;
1910 io::Tokenizer::Token start_token;
1911 {
1912 LocationRecorder start_location(
1913 location, DescriptorProto::ReservedRange::kStartFieldNumber);
1914 start_token = input_->current();
1915 DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1916 : "Expected field number range.")));
1917 }
1918
1919 if (TryConsume("to")) {
1920 LocationRecorder end_location(
1921 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1922 if (TryConsume("max")) {
1923 // Set to the sentinel value - 1 since we increment the value below.
1924 // The actual value of the end of the range should be set with
1925 // AdjustExtensionRangesWithMaxEndNumber.
1926 end = kMaxRangeSentinel - 1;
1927 } else {
1928 DO(ConsumeInteger(&end, "Expected integer."));
1929 }
1930 } else {
1931 LocationRecorder end_location(
1932 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1933 end_location.StartAt(start_token);
1934 end_location.EndAt(start_token);
1935 end = start;
1936 }
1937
1938 // Users like to specify inclusive ranges, but in code we like the end
1939 // number to be exclusive.
1940 ++end;
1941
1942 range->set_start(start);
1943 range->set_end(end);
1944 first = false;
1945 } while (TryConsume(","));
1946
1947 DO(ConsumeEndOfDeclaration(";", &parent_location));
1948 return true;
1949 }
1950
ParseReserved(EnumDescriptorProto * proto,const LocationRecorder & enum_location)1951 bool Parser::ParseReserved(EnumDescriptorProto* proto,
1952 const LocationRecorder& enum_location) {
1953 io::Tokenizer::Token start_token = input_->current();
1954 // Parse the declaration.
1955 DO(Consume("reserved"));
1956 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1957 if (syntax_identifier_ == "editions") {
1958 RecordError(
1959 "Reserved names must be identifiers in editions, not string "
1960 "literals.");
1961 return false;
1962 }
1963 LocationRecorder location(enum_location,
1964 EnumDescriptorProto::kReservedNameFieldNumber);
1965 location.StartAt(start_token);
1966 return ParseReservedNames(proto, location);
1967 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1968 if (syntax_identifier_ != "editions") {
1969 RecordError(
1970 "Reserved names must be string literals. (Only editions supports "
1971 "identifiers.)");
1972 return false;
1973 }
1974 LocationRecorder location(enum_location,
1975 EnumDescriptorProto::kReservedNameFieldNumber);
1976 location.StartAt(start_token);
1977 return ParseReservedIdentifiers(proto, location);
1978 } else {
1979 LocationRecorder location(enum_location,
1980 EnumDescriptorProto::kReservedRangeFieldNumber);
1981 location.StartAt(start_token);
1982 return ParseReservedNumbers(proto, location);
1983 }
1984 }
1985
ParseReservedNames(EnumDescriptorProto * proto,const LocationRecorder & parent_location)1986 bool Parser::ParseReservedNames(EnumDescriptorProto* proto,
1987 const LocationRecorder& parent_location) {
1988 do {
1989 LocationRecorder location(parent_location, proto->reserved_name_size());
1990 DO(ParseReservedName(proto->add_reserved_name(),
1991 "Expected enum value string literal."));
1992 } while (TryConsume(","));
1993 DO(ConsumeEndOfDeclaration(";", &parent_location));
1994 return true;
1995 }
1996
ParseReservedIdentifiers(EnumDescriptorProto * proto,const LocationRecorder & parent_location)1997 bool Parser::ParseReservedIdentifiers(EnumDescriptorProto* proto,
1998 const LocationRecorder& parent_location) {
1999 do {
2000 LocationRecorder location(parent_location, proto->reserved_name_size());
2001 DO(ParseReservedIdentifier(proto->add_reserved_name(),
2002 "Expected enum value identifier."));
2003 } while (TryConsume(","));
2004 DO(ConsumeEndOfDeclaration(";", &parent_location));
2005 return true;
2006 }
2007
ParseReservedNumbers(EnumDescriptorProto * proto,const LocationRecorder & parent_location)2008 bool Parser::ParseReservedNumbers(EnumDescriptorProto* proto,
2009 const LocationRecorder& parent_location) {
2010 bool first = true;
2011 do {
2012 LocationRecorder location(parent_location, proto->reserved_range_size());
2013
2014 EnumDescriptorProto::EnumReservedRange* range = proto->add_reserved_range();
2015 location.RecordLegacyLocation(range,
2016 DescriptorPool::ErrorCollector::NUMBER);
2017 int start, end;
2018 io::Tokenizer::Token start_token;
2019 {
2020 LocationRecorder start_location(
2021 location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
2022 start_token = input_->current();
2023 DO(ConsumeSignedInteger(&start,
2024 (first ? "Expected enum value or number range."
2025 : "Expected enum number range.")));
2026 }
2027
2028 if (TryConsume("to")) {
2029 LocationRecorder end_location(
2030 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
2031 if (TryConsume("max")) {
2032 // This is in the enum descriptor path, which doesn't have the message
2033 // set duality to fix up, so it doesn't integrate with the sentinel.
2034 end = INT_MAX;
2035 } else {
2036 DO(ConsumeSignedInteger(&end, "Expected integer."));
2037 }
2038 } else {
2039 LocationRecorder end_location(
2040 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
2041 end_location.StartAt(start_token);
2042 end_location.EndAt(start_token);
2043 end = start;
2044 }
2045
2046 range->set_start(start);
2047 range->set_end(end);
2048 first = false;
2049 } while (TryConsume(","));
2050
2051 DO(ConsumeEndOfDeclaration(";", &parent_location));
2052 return true;
2053 }
2054
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)2055 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
2056 RepeatedPtrField<DescriptorProto>* messages,
2057 const LocationRecorder& parent_location,
2058 int location_field_number_for_nested_type,
2059 const LocationRecorder& extend_location,
2060 const FileDescriptorProto* containing_file) {
2061 DO(Consume("extend"));
2062
2063 // Parse the extendee type.
2064 io::Tokenizer::Token extendee_start = input_->current();
2065 std::string extendee;
2066 DO(ParseUserDefinedType(&extendee));
2067 io::Tokenizer::Token extendee_end = input_->previous();
2068
2069 // Parse the block.
2070 DO(ConsumeEndOfDeclaration("{", &extend_location));
2071
2072 bool is_first = true;
2073
2074 do {
2075 if (AtEnd()) {
2076 RecordError("Reached end of input in extend definition (missing '}').");
2077 return false;
2078 }
2079
2080 // Note that kExtensionFieldNumber was already pushed by the parent.
2081 LocationRecorder location(extend_location, extensions->size());
2082
2083 FieldDescriptorProto* field = extensions->Add();
2084
2085 {
2086 LocationRecorder extendee_location(
2087 location, FieldDescriptorProto::kExtendeeFieldNumber);
2088 extendee_location.StartAt(extendee_start);
2089 extendee_location.EndAt(extendee_end);
2090
2091 if (is_first) {
2092 extendee_location.RecordLegacyLocation(
2093 field, DescriptorPool::ErrorCollector::EXTENDEE);
2094 is_first = false;
2095 }
2096 }
2097
2098 field->set_extendee(extendee);
2099
2100 if (!ParseMessageField(field, messages, parent_location,
2101 location_field_number_for_nested_type, location,
2102 containing_file)) {
2103 // This statement failed to parse. Skip it, but keep looping to parse
2104 // other statements.
2105 SkipStatement();
2106 }
2107 } while (!TryConsumeEndOfDeclaration("}", nullptr));
2108
2109 return true;
2110 }
2111
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)2112 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
2113 DescriptorProto* containing_type, int oneof_index,
2114 const LocationRecorder& oneof_location,
2115 const LocationRecorder& containing_type_location,
2116 const FileDescriptorProto* containing_file) {
2117 DO(Consume("oneof"));
2118
2119 {
2120 LocationRecorder name_location(oneof_location,
2121 OneofDescriptorProto::kNameFieldNumber);
2122 DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
2123 }
2124
2125 DO(ConsumeEndOfDeclaration("{", &oneof_location));
2126
2127 do {
2128 if (AtEnd()) {
2129 RecordError("Reached end of input in oneof definition (missing '}').");
2130 return false;
2131 }
2132
2133 if (LookingAt("option")) {
2134 LocationRecorder option_location(
2135 oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
2136 if (!ParseOption(oneof_decl->mutable_options(), option_location,
2137 containing_file, OPTION_STATEMENT)) {
2138 return false;
2139 }
2140 continue;
2141 }
2142
2143 // Print a nice error if the user accidentally tries to place a label
2144 // on an individual member of a oneof.
2145 if (LookingAt("required") || LookingAt("optional") ||
2146 LookingAt("repeated")) {
2147 RecordError(
2148 "Fields in oneofs must not have labels (required / optional "
2149 "/ repeated).");
2150 // We can continue parsing here because we understand what the user
2151 // meant. The error report will still make parsing fail overall.
2152 input_->Next();
2153 }
2154
2155 LocationRecorder field_location(containing_type_location,
2156 DescriptorProto::kFieldFieldNumber,
2157 containing_type->field_size());
2158
2159 FieldDescriptorProto* field = containing_type->add_field();
2160 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
2161 field->set_oneof_index(oneof_index);
2162
2163 if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
2164 containing_type_location,
2165 DescriptorProto::kNestedTypeFieldNumber,
2166 field_location, containing_file)) {
2167 // This statement failed to parse. Skip it, but keep looping to parse
2168 // other statements.
2169 SkipStatement();
2170 }
2171 } while (!TryConsumeEndOfDeclaration("}", nullptr));
2172
2173 return true;
2174 }
2175
2176 // -------------------------------------------------------------------
2177 // Enums
2178
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2179 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
2180 const LocationRecorder& enum_location,
2181 const FileDescriptorProto* containing_file) {
2182 DO(Consume("enum"));
2183
2184 {
2185 LocationRecorder location(enum_location,
2186 EnumDescriptorProto::kNameFieldNumber);
2187 location.RecordLegacyLocation(enum_type,
2188 DescriptorPool::ErrorCollector::NAME);
2189 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
2190 }
2191
2192 DO(ParseEnumBlock(enum_type, enum_location, containing_file));
2193
2194 DO(ValidateEnum(enum_type));
2195
2196 return true;
2197 }
2198
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2199 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
2200 const LocationRecorder& enum_location,
2201 const FileDescriptorProto* containing_file) {
2202 DO(ConsumeEndOfDeclaration("{", &enum_location));
2203
2204 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2205 if (AtEnd()) {
2206 RecordError("Reached end of input in enum definition (missing '}').");
2207 return false;
2208 }
2209
2210 if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
2211 // This statement failed to parse. Skip it, but keep looping to parse
2212 // other statements.
2213 SkipStatement();
2214 }
2215 }
2216
2217 return true;
2218 }
2219
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)2220 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
2221 const LocationRecorder& enum_location,
2222 const FileDescriptorProto* containing_file) {
2223 if (TryConsumeEndOfDeclaration(";", nullptr)) {
2224 // empty statement; ignore
2225 return true;
2226 } else if (LookingAt("option")) {
2227 LocationRecorder location(enum_location,
2228 EnumDescriptorProto::kOptionsFieldNumber);
2229 return ParseOption(enum_type->mutable_options(), location, containing_file,
2230 OPTION_STATEMENT);
2231 } else if (LookingAt("reserved")) {
2232 return ParseReserved(enum_type, enum_location);
2233 } else {
2234 LocationRecorder location(enum_location,
2235 EnumDescriptorProto::kValueFieldNumber,
2236 enum_type->value_size());
2237 return ParseEnumConstant(enum_type->add_value(), location, containing_file);
2238 }
2239 }
2240
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2241 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
2242 const LocationRecorder& enum_value_location,
2243 const FileDescriptorProto* containing_file) {
2244 // Parse name.
2245 {
2246 LocationRecorder location(enum_value_location,
2247 EnumValueDescriptorProto::kNameFieldNumber);
2248 location.RecordLegacyLocation(enum_value,
2249 DescriptorPool::ErrorCollector::NAME);
2250 DO(ConsumeIdentifier(enum_value->mutable_name(),
2251 "Expected enum constant name."));
2252 }
2253
2254 DO(Consume("=", "Missing numeric value for enum constant."));
2255
2256 // Parse value.
2257 {
2258 LocationRecorder location(enum_value_location,
2259 EnumValueDescriptorProto::kNumberFieldNumber);
2260 location.RecordLegacyLocation(enum_value,
2261 DescriptorPool::ErrorCollector::NUMBER);
2262
2263 int number;
2264 DO(ConsumeSignedInteger(&number, "Expected integer."));
2265 enum_value->set_number(number);
2266 }
2267
2268 DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2269 containing_file));
2270
2271 DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2272
2273 return true;
2274 }
2275
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2276 bool Parser::ParseEnumConstantOptions(
2277 EnumValueDescriptorProto* value,
2278 const LocationRecorder& enum_value_location,
2279 const FileDescriptorProto* containing_file) {
2280 if (!LookingAt("[")) return true;
2281
2282 LocationRecorder location(enum_value_location,
2283 EnumValueDescriptorProto::kOptionsFieldNumber);
2284
2285 DO(Consume("["));
2286
2287 do {
2288 DO(ParseOption(value->mutable_options(), location, containing_file,
2289 OPTION_ASSIGNMENT));
2290 } while (TryConsume(","));
2291
2292 DO(Consume("]"));
2293 return true;
2294 }
2295
2296 // -------------------------------------------------------------------
2297 // Services
2298
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2299 bool Parser::ParseServiceDefinition(
2300 ServiceDescriptorProto* service, const LocationRecorder& service_location,
2301 const FileDescriptorProto* containing_file) {
2302 DO(Consume("service"));
2303
2304 {
2305 LocationRecorder location(service_location,
2306 ServiceDescriptorProto::kNameFieldNumber);
2307 location.RecordLegacyLocation(service,
2308 DescriptorPool::ErrorCollector::NAME);
2309 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2310 }
2311
2312 DO(ParseServiceBlock(service, service_location, containing_file));
2313 return true;
2314 }
2315
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2316 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2317 const LocationRecorder& service_location,
2318 const FileDescriptorProto* containing_file) {
2319 DO(ConsumeEndOfDeclaration("{", &service_location));
2320
2321 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2322 if (AtEnd()) {
2323 RecordError("Reached end of input in service definition (missing '}').");
2324 return false;
2325 }
2326
2327 if (!ParseServiceStatement(service, service_location, containing_file)) {
2328 // This statement failed to parse. Skip it, but keep looping to parse
2329 // other statements.
2330 SkipStatement();
2331 }
2332 }
2333
2334 return true;
2335 }
2336
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2337 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2338 const LocationRecorder& service_location,
2339 const FileDescriptorProto* containing_file) {
2340 if (TryConsumeEndOfDeclaration(";", nullptr)) {
2341 // empty statement; ignore
2342 return true;
2343 } else if (LookingAt("option")) {
2344 LocationRecorder location(service_location,
2345 ServiceDescriptorProto::kOptionsFieldNumber);
2346 return ParseOption(service->mutable_options(), location, containing_file,
2347 OPTION_STATEMENT);
2348 } else {
2349 LocationRecorder location(service_location,
2350 ServiceDescriptorProto::kMethodFieldNumber,
2351 service->method_size());
2352 return ParseServiceMethod(service->add_method(), location, containing_file);
2353 }
2354 }
2355
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2356 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2357 const LocationRecorder& method_location,
2358 const FileDescriptorProto* containing_file) {
2359 DO(Consume("rpc"));
2360
2361 {
2362 LocationRecorder location(method_location,
2363 MethodDescriptorProto::kNameFieldNumber);
2364 location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2365 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2366 }
2367
2368 // Parse input type.
2369 DO(Consume("("));
2370 {
2371 if (LookingAt("stream")) {
2372 LocationRecorder location(
2373 method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2374 location.RecordLegacyLocation(method,
2375 DescriptorPool::ErrorCollector::OTHER);
2376 method->set_client_streaming(true);
2377 DO(Consume("stream"));
2378 }
2379 LocationRecorder location(method_location,
2380 MethodDescriptorProto::kInputTypeFieldNumber);
2381 location.RecordLegacyLocation(method,
2382 DescriptorPool::ErrorCollector::INPUT_TYPE);
2383 DO(ParseUserDefinedType(method->mutable_input_type()));
2384 }
2385 DO(Consume(")"));
2386
2387 // Parse output type.
2388 DO(Consume("returns"));
2389 DO(Consume("("));
2390 {
2391 if (LookingAt("stream")) {
2392 LocationRecorder location(
2393 method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2394 location.RecordLegacyLocation(method,
2395 DescriptorPool::ErrorCollector::OTHER);
2396 DO(Consume("stream"));
2397 method->set_server_streaming(true);
2398 }
2399 LocationRecorder location(method_location,
2400 MethodDescriptorProto::kOutputTypeFieldNumber);
2401 location.RecordLegacyLocation(method,
2402 DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2403 DO(ParseUserDefinedType(method->mutable_output_type()));
2404 }
2405 DO(Consume(")"));
2406
2407 if (LookingAt("{")) {
2408 // Options!
2409 DO(ParseMethodOptions(method_location, containing_file,
2410 MethodDescriptorProto::kOptionsFieldNumber,
2411 method->mutable_options()));
2412 } else {
2413 DO(ConsumeEndOfDeclaration(";", &method_location));
2414 }
2415
2416 return true;
2417 }
2418
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2419 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2420 const FileDescriptorProto* containing_file,
2421 const int optionsFieldNumber,
2422 Message* mutable_options) {
2423 // Options!
2424 ConsumeEndOfDeclaration("{", &parent_location);
2425 while (!TryConsumeEndOfDeclaration("}", nullptr)) {
2426 if (AtEnd()) {
2427 RecordError("Reached end of input in method options (missing '}').");
2428 return false;
2429 }
2430
2431 if (TryConsumeEndOfDeclaration(";", nullptr)) {
2432 // empty statement; ignore
2433 } else {
2434 LocationRecorder location(parent_location, optionsFieldNumber);
2435 if (!ParseOption(mutable_options, location, containing_file,
2436 OPTION_STATEMENT)) {
2437 // This statement failed to parse. Skip it, but keep looping to
2438 // parse other statements.
2439 SkipStatement();
2440 }
2441 }
2442 }
2443
2444 return true;
2445 }
2446
2447 // -------------------------------------------------------------------
2448
ParseLabel(FieldDescriptorProto::Label * label,const LocationRecorder & field_location)2449 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2450 const LocationRecorder& field_location) {
2451 if (!LookingAt("optional") && !LookingAt("repeated") &&
2452 !LookingAt("required")) {
2453 return false;
2454 }
2455 if (LookingAt("optional") && syntax_identifier_ == "editions") {
2456 RecordError(
2457 "Label \"optional\" is not supported in editions. By default, all "
2458 "singular fields have presence unless features.field_presence is set.");
2459 }
2460 if (LookingAt("required") && syntax_identifier_ == "editions") {
2461 RecordError(
2462 "Label \"required\" is not supported in editions, use "
2463 "features.field_presence = LEGACY_REQUIRED.");
2464 }
2465
2466 LocationRecorder location(field_location,
2467 FieldDescriptorProto::kLabelFieldNumber);
2468 if (TryConsume("optional")) {
2469 *label = FieldDescriptorProto::LABEL_OPTIONAL;
2470 } else if (TryConsume("repeated")) {
2471 *label = FieldDescriptorProto::LABEL_REPEATED;
2472 } else {
2473 Consume("required");
2474 *label = FieldDescriptorProto::LABEL_REQUIRED;
2475 }
2476 return true;
2477 }
2478
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2479 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2480 std::string* type_name) {
2481 const auto& type_names_table = GetTypeNameTable();
2482 auto iter = type_names_table.find(input_->current().text);
2483 if (iter != type_names_table.end()) {
2484 if (syntax_identifier_ == "editions" &&
2485 iter->second == FieldDescriptorProto::TYPE_GROUP) {
2486 RecordError(
2487 "Group syntax is no longer supported in editions. To get group "
2488 "behavior you can specify features.message_encoding = DELIMITED on a "
2489 "message field.");
2490 }
2491 *type = iter->second;
2492 input_->Next();
2493 } else {
2494 DO(ParseUserDefinedType(type_name));
2495 }
2496 return true;
2497 }
2498
ParseUserDefinedType(std::string * type_name)2499 bool Parser::ParseUserDefinedType(std::string* type_name) {
2500 type_name->clear();
2501
2502 const auto& type_names_table = GetTypeNameTable();
2503 auto iter = type_names_table.find(input_->current().text);
2504 if (iter != type_names_table.end()) {
2505 // Note: The only place enum types are allowed is for field types, but
2506 // if we are parsing a field type then we would not get here because
2507 // primitives are allowed there as well. So this error message doesn't
2508 // need to account for enums.
2509 RecordError("Expected message type.");
2510
2511 // Pretend to accept this type so that we can go on parsing.
2512 *type_name = input_->current().text;
2513 input_->Next();
2514 return true;
2515 }
2516
2517 // A leading "." means the name is fully-qualified.
2518 if (TryConsume(".")) type_name->append(".");
2519
2520 // Consume the first part of the name.
2521 std::string identifier;
2522 DO(ConsumeIdentifier(&identifier, "Expected type name."));
2523 type_name->append(identifier);
2524
2525 // Consume more parts.
2526 while (TryConsume(".")) {
2527 type_name->append(".");
2528 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2529 type_name->append(identifier);
2530 }
2531
2532 return true;
2533 }
2534
2535 // ===================================================================
2536
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2537 bool Parser::ParsePackage(FileDescriptorProto* file,
2538 const LocationRecorder& root_location,
2539 const FileDescriptorProto* containing_file) {
2540 if (file->has_package()) {
2541 RecordError("Multiple package definitions.");
2542 // Don't append the new package to the old one. Just replace it. Not
2543 // that it really matters since this is an error anyway.
2544 file->clear_package();
2545 }
2546
2547 LocationRecorder location(root_location,
2548 FileDescriptorProto::kPackageFieldNumber);
2549 location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2550
2551 DO(Consume("package"));
2552
2553 while (true) {
2554 std::string identifier;
2555 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2556 file->mutable_package()->append(identifier);
2557 if (!TryConsume(".")) break;
2558 file->mutable_package()->append(".");
2559 }
2560
2561 DO(ConsumeEndOfDeclaration(";", &location));
2562
2563 return true;
2564 }
2565
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32_t> * public_dependency,RepeatedField<int32_t> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2566 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2567 RepeatedField<int32_t>* public_dependency,
2568 RepeatedField<int32_t>* weak_dependency,
2569 const LocationRecorder& root_location,
2570 const FileDescriptorProto* containing_file) {
2571 LocationRecorder location(root_location,
2572 FileDescriptorProto::kDependencyFieldNumber,
2573 dependency->size());
2574
2575 DO(Consume("import"));
2576
2577 if (LookingAt("public")) {
2578 LocationRecorder public_location(
2579 root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2580 public_dependency->size());
2581 DO(Consume("public"));
2582 *public_dependency->Add() = dependency->size();
2583 } else if (LookingAt("weak")) {
2584 LocationRecorder weak_location(
2585 root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2586 weak_dependency->size());
2587 weak_location.RecordLegacyImportLocation(containing_file, "weak");
2588 DO(Consume("weak"));
2589 *weak_dependency->Add() = dependency->size();
2590 }
2591
2592 std::string import_file;
2593 DO(ConsumeString(&import_file,
2594 "Expected a string naming the file to import."));
2595 *dependency->Add() = import_file;
2596 location.RecordLegacyImportLocation(containing_file, import_file);
2597
2598 DO(ConsumeEndOfDeclaration(";", &location));
2599
2600 return true;
2601 }
2602
2603 // ===================================================================
2604
SourceLocationTable()2605 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2606 SourceLocationTable::~SourceLocationTable() {}
2607
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2608 bool SourceLocationTable::Find(
2609 const Message* descriptor,
2610 DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2611 int* column) const {
2612 auto it = location_map_.find({descriptor, location});
2613 if (it == location_map_.end()) {
2614 *line = -1;
2615 *column = 0;
2616 return false;
2617 }
2618 std::tie(*line, *column) = it->second;
2619 return true;
2620 }
2621
FindImport(const Message * descriptor,absl::string_view name,int * line,int * column) const2622 bool SourceLocationTable::FindImport(const Message* descriptor,
2623 absl::string_view name, int* line,
2624 int* column) const {
2625 auto it = import_location_map_.find({descriptor, std::string(name)});
2626 if (it == import_location_map_.end()) {
2627 *line = -1;
2628 *column = 0;
2629 return false;
2630 }
2631 std::tie(*line, *column) = it->second;
2632 return true;
2633 }
2634
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2635 void SourceLocationTable::Add(
2636 const Message* descriptor,
2637 DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2638 int column) {
2639 location_map_[std::make_pair(descriptor, location)] =
2640 std::make_pair(line, column);
2641 }
2642
AddImport(const Message * descriptor,const std::string & name,int line,int column)2643 void SourceLocationTable::AddImport(const Message* descriptor,
2644 const std::string& name, int line,
2645 int column) {
2646 import_location_map_[std::make_pair(descriptor, name)] =
2647 std::make_pair(line, column);
2648 }
2649
Clear()2650 void SourceLocationTable::Clear() { location_map_.clear(); }
2651
2652 } // namespace compiler
2653 } // namespace protobuf
2654 } // namespace google
2655
2656 #include "google/protobuf/port_undef.inc"
2657