1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11
12 #include "google/protobuf/descriptor.h"
13
14 #include <fcntl.h>
15 #include <limits.h>
16
17 #include <algorithm>
18 #include <array>
19 #include <atomic>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <cstring>
23 #include <functional>
24 #include <initializer_list>
25 #include <iterator>
26 #include <limits>
27 #include <memory>
28 #include <new> // IWYU pragma: keep
29 #include <sstream>
30 #include <string>
31 #include <tuple>
32 #include <type_traits>
33 #include <utility>
34 #include <vector>
35
36 #include "absl/base/attributes.h"
37 #include "absl/base/call_once.h"
38 #include "absl/base/casts.h"
39 #include "absl/base/const_init.h"
40 #include "absl/base/dynamic_annotations.h"
41 #include "absl/base/thread_annotations.h"
42 #include "absl/cleanup/cleanup.h"
43 #include "absl/container/btree_map.h"
44 #include "absl/container/flat_hash_map.h"
45 #include "absl/container/flat_hash_set.h"
46 #include "absl/functional/function_ref.h"
47 #include "absl/hash/hash.h"
48 #include "absl/log/absl_check.h"
49 #include "absl/log/absl_log.h"
50 #include "absl/memory/memory.h"
51 #include "absl/status/status.h"
52 #include "absl/status/statusor.h"
53 #include "absl/strings/ascii.h"
54 #include "absl/strings/escaping.h"
55 #include "absl/strings/match.h"
56 #include "absl/strings/str_cat.h"
57 #include "absl/strings/str_format.h"
58 #include "absl/strings/str_join.h"
59 #include "absl/strings/str_split.h"
60 #include "absl/strings/string_view.h"
61 #include "absl/strings/strip.h"
62 #include "absl/strings/substitute.h"
63 #include "absl/synchronization/mutex.h"
64 #include "absl/types/optional.h"
65 #include "absl/types/span.h"
66 #include "google/protobuf/any.h"
67 #include "google/protobuf/cpp_edition_defaults.h"
68 #include "google/protobuf/cpp_features.pb.h"
69 #include "google/protobuf/descriptor.pb.h"
70 #include "google/protobuf/descriptor_database.h"
71 #include "google/protobuf/descriptor_lite.h"
72 #include "google/protobuf/descriptor_visitor.h"
73 #include "google/protobuf/dynamic_message.h"
74 #include "google/protobuf/feature_resolver.h"
75 #include "google/protobuf/generated_message_util.h"
76 #include "google/protobuf/io/strtod.h"
77 #include "google/protobuf/io/tokenizer.h"
78 #include "google/protobuf/message.h"
79 #include "google/protobuf/message_lite.h"
80 #include "google/protobuf/parse_context.h"
81 #include "google/protobuf/port.h"
82 #include "google/protobuf/repeated_ptr_field.h"
83 #include "google/protobuf/text_format.h"
84 #include "google/protobuf/unknown_field_set.h"
85
86
87 // Must be included last.
88 #include "google/protobuf/port_def.inc"
89
90 namespace google {
91 namespace protobuf {
92 namespace {
93
94 const int kPackageLimit = 100;
95
96
ToCamelCase(const absl::string_view input,bool lower_first)97 std::string ToCamelCase(const absl::string_view input, bool lower_first) {
98 bool capitalize_next = !lower_first;
99 std::string result;
100 result.reserve(input.size());
101
102 for (char character : input) {
103 if (character == '_') {
104 capitalize_next = true;
105 } else if (capitalize_next) {
106 result.push_back(absl::ascii_toupper(character));
107 capitalize_next = false;
108 } else {
109 result.push_back(character);
110 }
111 }
112
113 // Lower-case the first letter.
114 if (lower_first && !result.empty()) {
115 result[0] = absl::ascii_tolower(result[0]);
116 }
117
118 return result;
119 }
120
ToJsonName(const absl::string_view input)121 std::string ToJsonName(const absl::string_view input) {
122 bool capitalize_next = false;
123 std::string result;
124 result.reserve(input.size());
125
126 for (char character : input) {
127 if (character == '_') {
128 capitalize_next = true;
129 } else if (capitalize_next) {
130 result.push_back(absl::ascii_toupper(character));
131 capitalize_next = false;
132 } else {
133 result.push_back(character);
134 }
135 }
136
137 return result;
138 }
139
140 template <typename OptionsT>
IsLegacyJsonFieldConflictEnabled(const OptionsT & options)141 bool IsLegacyJsonFieldConflictEnabled(const OptionsT& options) {
142 #ifdef __GNUC__
143 #pragma GCC diagnostic push
144 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
145 #endif
146 return options.deprecated_legacy_json_field_conflicts();
147 #ifdef __GNUC__
148 #pragma GCC diagnostic pop
149 #endif
150 }
151
152 // Backport of fold expressions for the comma operator to C++11.
153 // Usage: Fold({expr...});
154 // Guaranteed to evaluate left-to-right
155 struct ExpressionEater {
156 template <typename T>
ExpressionEatergoogle::protobuf::__anona654feba0111::ExpressionEater157 ExpressionEater(T&&) {} // NOLINT
158 };
Fold(std::initializer_list<ExpressionEater>)159 void Fold(std::initializer_list<ExpressionEater>) {}
160
161 template <int R>
RoundUpTo(size_t n)162 constexpr size_t RoundUpTo(size_t n) {
163 static_assert((R & (R - 1)) == 0, "Must be power of two");
164 return (n + (R - 1)) & ~(R - 1);
165 }
166
Max(size_t a,size_t b)167 constexpr size_t Max(size_t a, size_t b) { return a > b ? a : b; }
168 template <typename T, typename... Ts>
Max(T a,Ts...b)169 constexpr size_t Max(T a, Ts... b) {
170 return Max(a, Max(b...));
171 }
172
173 template <typename T>
EffectiveAlignof()174 constexpr size_t EffectiveAlignof() {
175 // `char` is special in that it gets aligned to 8. It is where we drop the
176 // trivial structs.
177 return std::is_same<T, char>::value ? 8 : alignof(T);
178 }
179
180 template <int align, typename U, typename... T>
181 using AppendIfAlign =
182 typename std::conditional<EffectiveAlignof<U>() == align, void (*)(T..., U),
183 void (*)(T...)>::type;
184
185 // Metafunction to sort types in descending order of alignment.
186 // Useful for the flat allocator to ensure proper alignment of all elements
187 // without having to add padding.
188 // Instead of implementing a proper sort metafunction we just do a
189 // filter+merge, which is much simpler to write as a metafunction.
190 // We have a fixed set of alignments we can filter on.
191 // For simplicity we use a function pointer as a type list.
192 template <typename In, typename T16, typename T8, typename T4, typename T2,
193 typename T1>
194 struct TypeListSortImpl;
195
196 template <typename... T16, typename... T8, typename... T4, typename... T2,
197 typename... T1>
198 struct TypeListSortImpl<void (*)(), void (*)(T16...), void (*)(T8...),
199 void (*)(T4...), void (*)(T2...), void (*)(T1...)> {
200 using type = void (*)(T16..., T8..., T4..., T2..., T1...);
201 };
202
203 template <typename First, typename... Rest, typename... T16, typename... T8,
204 typename... T4, typename... T2, typename... T1>
205 struct TypeListSortImpl<void (*)(First, Rest...), void (*)(T16...),
206 void (*)(T8...), void (*)(T4...), void (*)(T2...),
207 void (*)(T1...)> {
208 using type = typename TypeListSortImpl<
209 void (*)(Rest...), AppendIfAlign<16, First, T16...>,
210 AppendIfAlign<8, First, T8...>, AppendIfAlign<4, First, T4...>,
211 AppendIfAlign<2, First, T2...>, AppendIfAlign<1, First, T1...>>::type;
212 };
213
214 template <typename... T>
215 using SortByAlignment =
216 typename TypeListSortImpl<void (*)(T...), void (*)(), void (*)(),
217 void (*)(), void (*)(), void (*)()>::type;
218
219 template <template <typename...> class C, typename... T>
220 auto ApplyTypeList(void (*)(T...)) -> C<T...>;
221
222 template <typename T>
FindTypeIndex()223 constexpr int FindTypeIndex() {
224 return -1;
225 }
226
227 template <typename T, typename T1, typename... Ts>
FindTypeIndex()228 constexpr int FindTypeIndex() {
229 return std::is_same<T, T1>::value ? 0 : FindTypeIndex<T, Ts...>() + 1;
230 }
231
232 // A type to value map, where the possible keys as specified in `Keys...`.
233 // The values for key `K` is `ValueT<K>`
234 template <template <typename> class ValueT, typename... Keys>
235 class TypeMap {
236 public:
237 template <typename K>
Get()238 ValueT<K>& Get() {
239 return static_cast<Base<K>&>(payload_).value;
240 }
241
242 template <typename K>
Get() const243 const ValueT<K>& Get() const {
244 return static_cast<const Base<K>&>(payload_).value;
245 }
246
247 private:
248 template <typename K>
249 struct Base {
250 ValueT<K> value{};
251 };
252 struct Payload : Base<Keys>... {};
253 Payload payload_;
254 };
255
256 template <typename T>
257 using IntT = int;
258 template <typename T>
259 using PointerT = T*;
260
261 // Manages an allocation of sequential arrays of type `T...`.
262 // It is more space efficient than storing N (ptr, size) pairs, by storing only
263 // the pointer to the head and the boundaries between the arrays.
264 template <typename... T>
265 class FlatAllocation {
266 public:
267 static constexpr size_t kMaxAlign = Max(alignof(T)...);
268
FlatAllocation(const TypeMap<IntT,T...> & ends)269 explicit FlatAllocation(const TypeMap<IntT, T...>& ends) : ends_(ends) {
270 // The arrays start just after FlatAllocation, so adjust the ends.
271 Fold({(ends_.template Get<T>() +=
272 RoundUpTo<kMaxAlign>(sizeof(FlatAllocation)))...});
273 Fold({Init<T>()...});
274 }
275
Destroy()276 void Destroy() {
277 Fold({Destroy<T>()...});
278 internal::SizedDelete(this, total_bytes());
279 }
280
281 template <int I>
282 using type = typename std::tuple_element<I, std::tuple<T...>>::type;
283
284 // Gets a tuple of the head pointers for the arrays
Pointers() const285 TypeMap<PointerT, T...> Pointers() const {
286 TypeMap<PointerT, T...> out;
287 Fold({(out.template Get<T>() = Begin<T>())...});
288 return out;
289 }
290
291
292 private:
293 // Total number of bytes used by all arrays.
total_bytes() const294 int total_bytes() const {
295 // Get the last end.
296 return ends_.template Get<typename std::tuple_element<
297 sizeof...(T) - 1, std::tuple<T...>>::type>();
298 }
299
300
301 template <typename U>
BeginOffset() const302 int BeginOffset() const {
303 constexpr int type_index = FindTypeIndex<U, T...>();
304 // Avoid a negative value here to keep it compiling when type_index == 0
305 constexpr int prev_type_index = type_index == 0 ? 0 : type_index - 1;
306 using PrevType =
307 typename std::tuple_element<prev_type_index, std::tuple<T...>>::type;
308 // Ensure the types are properly aligned.
309 static_assert(EffectiveAlignof<PrevType>() >= EffectiveAlignof<U>(), "");
310 return type_index == 0 ? RoundUpTo<kMaxAlign>(sizeof(FlatAllocation))
311 : ends_.template Get<PrevType>();
312 }
313
314 template <typename U>
EndOffset() const315 int EndOffset() const {
316 return ends_.template Get<U>();
317 }
318
319 // Avoid the reinterpret_cast if the array is empty.
320 // Clang's Control Flow Integrity does not like the cast pointing to memory
321 // that is not yet initialized to be of that type.
322 // (from -fsanitize=cfi-unrelated-cast)
323 template <typename U>
Begin() const324 U* Begin() const {
325 int begin = BeginOffset<U>(), end = EndOffset<U>();
326 if (begin == end) return nullptr;
327 return reinterpret_cast<U*>(data() + begin);
328 }
329
330 template <typename U>
End() const331 U* End() const {
332 int begin = BeginOffset<U>(), end = EndOffset<U>();
333 if (begin == end) return nullptr;
334 return reinterpret_cast<U*>(data() + end);
335 }
336
337 template <typename U>
Init()338 bool Init() {
339 // Skip for the `char` block. No need to zero initialize it.
340 if (std::is_same<U, char>::value) return true;
341 for (char *p = data() + BeginOffset<U>(), *end = data() + EndOffset<U>();
342 p != end; p += sizeof(U)) {
343 ::new (p) U{};
344 }
345 return true;
346 }
347
348 template <typename U>
Destroy()349 bool Destroy() {
350 if (std::is_trivially_destructible<U>::value) return true;
351 for (U *it = Begin<U>(), *end = End<U>(); it != end; ++it) {
352 it->~U();
353 }
354 return true;
355 }
356
data() const357 char* data() const {
358 return const_cast<char*>(reinterpret_cast<const char*>(this));
359 }
360
361 TypeMap<IntT, T...> ends_;
362 };
363
364 template <typename... T>
CalculateEnds(const TypeMap<IntT,T...> & sizes)365 TypeMap<IntT, T...> CalculateEnds(const TypeMap<IntT, T...>& sizes) {
366 int total = 0;
367 TypeMap<IntT, T...> out;
368 Fold({(out.template Get<T>() = total +=
369 sizeof(T) * sizes.template Get<T>())...});
370 return out;
371 }
372
373 // The implementation for FlatAllocator below.
374 // This separate class template makes it easier to have methods that fold on
375 // `T...`.
376 template <typename... T>
377 class FlatAllocatorImpl {
378 public:
379 using Allocation = FlatAllocation<T...>;
380
381 template <typename U>
PlanArray(int array_size)382 void PlanArray(int array_size) {
383 // We can't call PlanArray after FinalizePlanning has been called.
384 ABSL_CHECK(!has_allocated());
385 if (std::is_trivially_destructible<U>::value) {
386 // Trivial types are aligned to 8 bytes.
387 static_assert(alignof(U) <= 8, "");
388 total_.template Get<char>() += RoundUpTo<8>(array_size * sizeof(U));
389 } else {
390 // Since we can't use `if constexpr`, just make the expression compile
391 // when this path is not taken.
392 using TypeToUse =
393 typename std::conditional<std::is_trivially_destructible<U>::value,
394 char, U>::type;
395 total_.template Get<TypeToUse>() += array_size;
396 }
397 }
398
399 template <typename U>
AllocateArray(int array_size)400 U* AllocateArray(int array_size) {
401 constexpr bool trivial = std::is_trivially_destructible<U>::value;
402 using TypeToUse = typename std::conditional<trivial, char, U>::type;
403
404 // We can only allocate after FinalizePlanning has been called.
405 ABSL_CHECK(has_allocated());
406
407 TypeToUse*& data = pointers_.template Get<TypeToUse>();
408 int& used = used_.template Get<TypeToUse>();
409 U* res = reinterpret_cast<U*>(data + used);
410 used += trivial ? RoundUpTo<8>(array_size * sizeof(U)) : array_size;
411 ABSL_CHECK_LE(used, total_.template Get<TypeToUse>());
412 return res;
413 }
414
415 template <typename... In>
AllocateStrings(In &&...in)416 const std::string* AllocateStrings(In&&... in) {
417 std::string* strings = AllocateArray<std::string>(sizeof...(in));
418 std::string* res = strings;
419 Fold({(*strings++ = std::string(std::forward<In>(in)))...});
420 return res;
421 }
422
423 // Allocate all 5 names of the field:
424 // name, full name, lowercase, camelcase and json.
425 // It will dedup the strings when possible.
426 // The resulting array contains `name` at index 0, `full_name` at index 1
427 // and the other 3 indices are specified in the result.
PlanFieldNames(const std::string & name,const std::string * opt_json_name)428 void PlanFieldNames(const std::string& name,
429 const std::string* opt_json_name) {
430 ABSL_CHECK(!has_allocated());
431
432 // Fast path for snake_case names, which follow the style guide.
433 if (opt_json_name == nullptr) {
434 switch (GetFieldNameCase(name)) {
435 case FieldNameCase::kAllLower:
436 // Case 1: they are all the same.
437 return PlanArray<std::string>(2);
438 case FieldNameCase::kSnakeCase:
439 // Case 2: name==lower, camel==json
440 return PlanArray<std::string>(3);
441 default:
442 break;
443 }
444 }
445
446 std::string lowercase_name = name;
447 absl::AsciiStrToLower(&lowercase_name);
448
449 std::string camelcase_name = ToCamelCase(name, /* lower_first = */ true);
450 std::string json_name =
451 opt_json_name != nullptr ? *opt_json_name : ToJsonName(name);
452
453 absl::string_view all_names[] = {name, lowercase_name, camelcase_name,
454 json_name};
455 std::sort(all_names, all_names + 4);
456 int unique =
457 static_cast<int>(std::unique(all_names, all_names + 4) - all_names);
458
459 PlanArray<std::string>(unique + 1);
460 }
461
462 struct FieldNamesResult {
463 const std::string* array;
464 int lowercase_index;
465 int camelcase_index;
466 int json_index;
467 };
AllocateFieldNames(const absl::string_view name,const absl::string_view scope,const std::string * opt_json_name)468 FieldNamesResult AllocateFieldNames(const absl::string_view name,
469 const absl::string_view scope,
470 const std::string* opt_json_name) {
471 ABSL_CHECK(has_allocated());
472
473 std::string full_name =
474 scope.empty() ? std::string(name) : absl::StrCat(scope, ".", name);
475
476 // Fast path for snake_case names, which follow the style guide.
477 if (opt_json_name == nullptr) {
478 switch (GetFieldNameCase(name)) {
479 case FieldNameCase::kAllLower:
480 // Case 1: they are all the same.
481 return {AllocateStrings(name, std::move(full_name)), 0, 0, 0};
482 case FieldNameCase::kSnakeCase:
483 // Case 2: name==lower, camel==json
484 return {AllocateStrings(name, std::move(full_name),
485 ToCamelCase(name, /* lower_first = */ true)),
486 0, 2, 2};
487 default:
488 break;
489 }
490 }
491
492 std::vector<std::string> names;
493 names.emplace_back(name);
494 names.push_back(std::move(full_name));
495
496 const auto push_name = [&](std::string new_name) {
497 for (size_t i = 0; i < names.size(); ++i) {
498 // Do not compare the full_name. It is unlikely to match, except in
499 // custom json_name. We are not taking this into account in
500 // PlanFieldNames so better to not try it.
501 if (i == 1) continue;
502 if (names[i] == new_name) return i;
503 }
504 names.push_back(std::move(new_name));
505 return names.size() - 1;
506 };
507
508 FieldNamesResult result{nullptr, 0, 0, 0};
509
510 std::string lowercase_name = std::string(name);
511 absl::AsciiStrToLower(&lowercase_name);
512 result.lowercase_index = push_name(std::move(lowercase_name));
513 result.camelcase_index =
514 push_name(ToCamelCase(name, /* lower_first = */ true));
515 result.json_index =
516 push_name(opt_json_name != nullptr ? *opt_json_name : ToJsonName(name));
517
518 std::string* all_names = AllocateArray<std::string>(names.size());
519 result.array = all_names;
520 std::move(names.begin(), names.end(), all_names);
521
522 return result;
523 }
524
525 template <typename Alloc>
FinalizePlanning(Alloc & alloc)526 void FinalizePlanning(Alloc& alloc) {
527 ABSL_CHECK(!has_allocated());
528
529 pointers_ = alloc->CreateFlatAlloc(total_)->Pointers();
530
531 ABSL_CHECK(has_allocated());
532 }
533
ExpectConsumed() const534 void ExpectConsumed() const {
535 // We verify that we consumed all the memory requested if there was no
536 // error in processing.
537 Fold({ExpectConsumed<T>()...});
538 }
539
540 private:
has_allocated() const541 bool has_allocated() const {
542 return pointers_.template Get<char>() != nullptr;
543 }
544
IsLower(char c)545 static bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
IsDigit(char c)546 static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
IsLowerOrDigit(char c)547 static bool IsLowerOrDigit(char c) { return IsLower(c) || IsDigit(c); }
548
549 enum class FieldNameCase { kAllLower, kSnakeCase, kOther };
GetFieldNameCase(const absl::string_view name)550 FieldNameCase GetFieldNameCase(const absl::string_view name) {
551 if (!name.empty() && !IsLower(name[0])) return FieldNameCase::kOther;
552 FieldNameCase best = FieldNameCase::kAllLower;
553 for (char c : name) {
554 if (IsLowerOrDigit(c)) {
555 // nothing to do
556 } else if (c == '_') {
557 best = FieldNameCase::kSnakeCase;
558 } else {
559 return FieldNameCase::kOther;
560 }
561 }
562 return best;
563 }
564
565 template <typename U>
ExpectConsumed() const566 bool ExpectConsumed() const {
567 ABSL_CHECK_EQ(total_.template Get<U>(), used_.template Get<U>());
568 return true;
569 }
570
571 TypeMap<PointerT, T...> pointers_;
572 TypeMap<IntT, T...> total_;
573 TypeMap<IntT, T...> used_;
574 };
575
576 // Allows us to disable tracking in the current thread while certain build steps
577 // are happening.
is_tracking_enabled()578 bool& is_tracking_enabled() {
579 static PROTOBUF_THREAD_LOCAL bool value = true;
580 return value;
581 }
582
DisableTracking()583 auto DisableTracking() {
584 bool old_value = is_tracking_enabled();
585 is_tracking_enabled() = false;
586 return absl::MakeCleanup([=] { is_tracking_enabled() = old_value; });
587 }
588
589 } // namespace
590
591 class Symbol {
592 public:
593 enum Type {
594 NULL_SYMBOL,
595 MESSAGE,
596 FIELD,
597 ONEOF,
598 ENUM,
599 ENUM_VALUE,
600 ENUM_VALUE_OTHER_PARENT,
601 SERVICE,
602 METHOD,
603 FULL_PACKAGE,
604 SUB_PACKAGE,
605 };
606
Symbol()607 Symbol() {
608 static constexpr internal::SymbolBase null_symbol{};
609 static_assert(null_symbol.symbol_type_ == NULL_SYMBOL, "");
610 // Initialize with a sentinel to make sure `ptr_` is never null.
611 ptr_ = &null_symbol;
612 }
613
614 // Every object we store derives from internal::SymbolBase, where we store the
615 // symbol type enum.
616 // Storing in the object can be done without using more space in most cases,
617 // while storing it in the Symbol type would require 8 bytes.
618 #define DEFINE_MEMBERS(TYPE, TYPE_CONSTANT, FIELD) \
619 explicit Symbol(TYPE* value) : ptr_(value) { \
620 value->symbol_type_ = TYPE_CONSTANT; \
621 } \
622 const TYPE* FIELD() const { \
623 return type() == TYPE_CONSTANT ? static_cast<const TYPE*>(ptr_) : nullptr; \
624 }
625
626 DEFINE_MEMBERS(Descriptor, MESSAGE, descriptor)
627 DEFINE_MEMBERS(FieldDescriptor, FIELD, field_descriptor)
628 DEFINE_MEMBERS(OneofDescriptor, ONEOF, oneof_descriptor)
629 DEFINE_MEMBERS(EnumDescriptor, ENUM, enum_descriptor)
630 DEFINE_MEMBERS(ServiceDescriptor, SERVICE, service_descriptor)
631 DEFINE_MEMBERS(MethodDescriptor, METHOD, method_descriptor)
632 DEFINE_MEMBERS(FileDescriptor, FULL_PACKAGE, file_descriptor)
633
634 // We use a special node for subpackage FileDescriptor.
635 // It is potentially added to the table with multiple different names, so we
636 // need a separate place to put the name.
637 struct Subpackage : internal::SymbolBase {
638 int name_size;
639 const FileDescriptor* file;
640 };
DEFINE_MEMBERS(Subpackage,SUB_PACKAGE,sub_package_file_descriptor)641 DEFINE_MEMBERS(Subpackage, SUB_PACKAGE, sub_package_file_descriptor)
642
643 // Enum values have two different parents.
644 // We use two different identitied for the same object to determine the two
645 // different insertions in the map.
646 static Symbol EnumValue(EnumValueDescriptor* value, int n) {
647 Symbol s;
648 internal::SymbolBase* ptr;
649 if (n == 0) {
650 ptr = static_cast<internal::SymbolBaseN<0>*>(value);
651 ptr->symbol_type_ = ENUM_VALUE;
652 } else {
653 ptr = static_cast<internal::SymbolBaseN<1>*>(value);
654 ptr->symbol_type_ = ENUM_VALUE_OTHER_PARENT;
655 }
656 s.ptr_ = ptr;
657 return s;
658 }
659
enum_value_descriptor() const660 const EnumValueDescriptor* enum_value_descriptor() const {
661 return type() == ENUM_VALUE
662 ? static_cast<const EnumValueDescriptor*>(
663 static_cast<const internal::SymbolBaseN<0>*>(ptr_))
664 : type() == ENUM_VALUE_OTHER_PARENT
665 ? static_cast<const EnumValueDescriptor*>(
666 static_cast<const internal::SymbolBaseN<1>*>(ptr_))
667 : nullptr;
668 }
669
670 #undef DEFINE_MEMBERS
671
type() const672 Type type() const { return static_cast<Type>(ptr_->symbol_type_); }
IsNull() const673 bool IsNull() const { return type() == NULL_SYMBOL; }
IsType() const674 bool IsType() const { return type() == MESSAGE || type() == ENUM; }
IsAggregate() const675 bool IsAggregate() const {
676 return IsType() || IsPackage() || type() == SERVICE;
677 }
IsPackage() const678 bool IsPackage() const {
679 return type() == FULL_PACKAGE || type() == SUB_PACKAGE;
680 }
681
GetFile() const682 const FileDescriptor* GetFile() const {
683 switch (type()) {
684 case MESSAGE:
685 return descriptor()->file();
686 case FIELD:
687 return field_descriptor()->file();
688 case ONEOF:
689 return oneof_descriptor()->containing_type()->file();
690 case ENUM:
691 return enum_descriptor()->file();
692 case ENUM_VALUE:
693 return enum_value_descriptor()->type()->file();
694 case SERVICE:
695 return service_descriptor()->file();
696 case METHOD:
697 return method_descriptor()->service()->file();
698 case FULL_PACKAGE:
699 return file_descriptor();
700 case SUB_PACKAGE:
701 return sub_package_file_descriptor()->file;
702 default:
703 return nullptr;
704 }
705 }
706
full_name() const707 absl::string_view full_name() const {
708 switch (type()) {
709 case MESSAGE:
710 return descriptor()->full_name();
711 case FIELD:
712 return field_descriptor()->full_name();
713 case ONEOF:
714 return oneof_descriptor()->full_name();
715 case ENUM:
716 return enum_descriptor()->full_name();
717 case ENUM_VALUE:
718 return enum_value_descriptor()->full_name();
719 case SERVICE:
720 return service_descriptor()->full_name();
721 case METHOD:
722 return method_descriptor()->full_name();
723 case FULL_PACKAGE:
724 return file_descriptor()->package();
725 case SUB_PACKAGE:
726 return absl::string_view(sub_package_file_descriptor()->file->package())
727 .substr(0, sub_package_file_descriptor()->name_size);
728 default:
729 ABSL_CHECK(false);
730 }
731 return "";
732 }
733
parent_name_key() const734 std::pair<const void*, absl::string_view> parent_name_key() const {
735 const auto or_file = [&](const void* p) { return p ? p : GetFile(); };
736 switch (type()) {
737 case MESSAGE:
738 return {or_file(descriptor()->containing_type()), descriptor()->name()};
739 case FIELD: {
740 auto* field = field_descriptor();
741 return {or_file(field->is_extension() ? field->extension_scope()
742 : field->containing_type()),
743 field->name()};
744 }
745 case ONEOF:
746 return {oneof_descriptor()->containing_type(),
747 oneof_descriptor()->name()};
748 case ENUM:
749 return {or_file(enum_descriptor()->containing_type()),
750 enum_descriptor()->name()};
751 case ENUM_VALUE:
752 return {or_file(enum_value_descriptor()->type()->containing_type()),
753 enum_value_descriptor()->name()};
754 case ENUM_VALUE_OTHER_PARENT:
755 return {enum_value_descriptor()->type(),
756 enum_value_descriptor()->name()};
757 case SERVICE:
758 return {GetFile(), service_descriptor()->name()};
759 case METHOD:
760 return {method_descriptor()->service(), method_descriptor()->name()};
761 default:
762 ABSL_CHECK(false);
763 }
764 return {};
765 }
766
767 private:
768 const internal::SymbolBase* ptr_;
769 };
770
771 const FieldDescriptor::CppType
772 FieldDescriptor::kTypeToCppTypeMap[MAX_TYPE + 1] = {
773 static_cast<CppType>(0), // 0 is reserved for errors
774
775 CPPTYPE_DOUBLE, // TYPE_DOUBLE
776 CPPTYPE_FLOAT, // TYPE_FLOAT
777 CPPTYPE_INT64, // TYPE_INT64
778 CPPTYPE_UINT64, // TYPE_UINT64
779 CPPTYPE_INT32, // TYPE_INT32
780 CPPTYPE_UINT64, // TYPE_FIXED64
781 CPPTYPE_UINT32, // TYPE_FIXED32
782 CPPTYPE_BOOL, // TYPE_BOOL
783 CPPTYPE_STRING, // TYPE_STRING
784 CPPTYPE_MESSAGE, // TYPE_GROUP
785 CPPTYPE_MESSAGE, // TYPE_MESSAGE
786 CPPTYPE_STRING, // TYPE_BYTES
787 CPPTYPE_UINT32, // TYPE_UINT32
788 CPPTYPE_ENUM, // TYPE_ENUM
789 CPPTYPE_INT32, // TYPE_SFIXED32
790 CPPTYPE_INT64, // TYPE_SFIXED64
791 CPPTYPE_INT32, // TYPE_SINT32
792 CPPTYPE_INT64, // TYPE_SINT64
793 };
794
795 const char* const FieldDescriptor::kTypeToName[MAX_TYPE + 1] = {
796 "ERROR", // 0 is reserved for errors
797
798 "double", // TYPE_DOUBLE
799 "float", // TYPE_FLOAT
800 "int64", // TYPE_INT64
801 "uint64", // TYPE_UINT64
802 "int32", // TYPE_INT32
803 "fixed64", // TYPE_FIXED64
804 "fixed32", // TYPE_FIXED32
805 "bool", // TYPE_BOOL
806 "string", // TYPE_STRING
807 "group", // TYPE_GROUP
808 "message", // TYPE_MESSAGE
809 "bytes", // TYPE_BYTES
810 "uint32", // TYPE_UINT32
811 "enum", // TYPE_ENUM
812 "sfixed32", // TYPE_SFIXED32
813 "sfixed64", // TYPE_SFIXED64
814 "sint32", // TYPE_SINT32
815 "sint64", // TYPE_SINT64
816 };
817
818 const char* const FieldDescriptor::kCppTypeToName[MAX_CPPTYPE + 1] = {
819 "ERROR", // 0 is reserved for errors
820
821 "int32", // CPPTYPE_INT32
822 "int64", // CPPTYPE_INT64
823 "uint32", // CPPTYPE_UINT32
824 "uint64", // CPPTYPE_UINT64
825 "double", // CPPTYPE_DOUBLE
826 "float", // CPPTYPE_FLOAT
827 "bool", // CPPTYPE_BOOL
828 "enum", // CPPTYPE_ENUM
829 "string", // CPPTYPE_STRING
830 "message", // CPPTYPE_MESSAGE
831 };
832
833 const char* const FieldDescriptor::kLabelToName[MAX_LABEL + 1] = {
834 "ERROR", // 0 is reserved for errors
835
836 "optional", // LABEL_OPTIONAL
837 "required", // LABEL_REQUIRED
838 "repeated", // LABEL_REPEATED
839 };
840
841 static const char* const kNonLinkedWeakMessageReplacementName = "google.protobuf.Empty";
842
843 #if !defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912)
844 const int FieldDescriptor::kMaxNumber;
845 const int FieldDescriptor::kFirstReservedNumber;
846 const int FieldDescriptor::kLastReservedNumber;
847 #endif
848
849 namespace {
850
EnumValueToPascalCase(const std::string & input)851 std::string EnumValueToPascalCase(const std::string& input) {
852 bool next_upper = true;
853 std::string result;
854 result.reserve(input.size());
855
856 for (char character : input) {
857 if (character == '_') {
858 next_upper = true;
859 } else {
860 if (next_upper) {
861 result.push_back(absl::ascii_toupper(character));
862 } else {
863 result.push_back(absl::ascii_tolower(character));
864 }
865 next_upper = false;
866 }
867 }
868
869 return result;
870 }
871
872 // Class to remove an enum prefix from enum values.
873 class PrefixRemover {
874 public:
PrefixRemover(absl::string_view prefix)875 explicit PrefixRemover(absl::string_view prefix) {
876 // Strip underscores and lower-case the prefix.
877 for (char character : prefix) {
878 if (character != '_') {
879 prefix_ += absl::ascii_tolower(character);
880 }
881 }
882 }
883
884 // Tries to remove the enum prefix from this enum value.
885 // If this is not possible, returns the input verbatim.
MaybeRemove(absl::string_view str)886 std::string MaybeRemove(absl::string_view str) {
887 // We can't just lowercase and strip str and look for a prefix.
888 // We need to properly recognize the difference between:
889 //
890 // enum Foo {
891 // FOO_BAR_BAZ = 0;
892 // FOO_BARBAZ = 1;
893 // }
894 //
895 // This is acceptable (though perhaps not advisable) because even when
896 // we PascalCase, these two will still be distinct (BarBaz vs. Barbaz).
897 size_t i, j;
898
899 // Skip past prefix_ in str if we can.
900 for (i = 0, j = 0; i < str.size() && j < prefix_.size(); i++) {
901 if (str[i] == '_') {
902 continue;
903 }
904
905 if (absl::ascii_tolower(str[i]) != prefix_[j++]) {
906 return std::string(str);
907 }
908 }
909
910 // If we didn't make it through the prefix, we've failed to strip the
911 // prefix.
912 if (j < prefix_.size()) {
913 return std::string(str);
914 }
915
916 // Skip underscores between prefix and further characters.
917 while (i < str.size() && str[i] == '_') {
918 i++;
919 }
920
921 // Enum label can't be the empty string.
922 if (i == str.size()) {
923 return std::string(str);
924 }
925
926 // We successfully stripped the prefix.
927 str.remove_prefix(i);
928 return std::string(str);
929 }
930
931 private:
932 std::string prefix_;
933 };
934
935 // A DescriptorPool contains a bunch of hash-maps to implement the
936 // various Find*By*() methods. Since hashtable lookups are O(1), it's
937 // most efficient to construct a fixed set of large hash-maps used by
938 // all objects in the pool rather than construct one or more small
939 // hash-maps for each object.
940 //
941 // The keys to these hash-maps are (parent, name) or (parent, number) pairs.
942 struct FullNameQuery {
943 absl::string_view query;
full_namegoogle::protobuf::__anona654feba0511::FullNameQuery944 absl::string_view full_name() const { return query; }
945 };
946 struct SymbolByFullNameHash {
947 using is_transparent = void;
948
949 template <typename T>
operator ()google::protobuf::__anona654feba0511::SymbolByFullNameHash950 size_t operator()(const T& s) const {
951 return absl::HashOf(s.full_name());
952 }
953 };
954 struct SymbolByFullNameEq {
955 using is_transparent = void;
956
957 template <typename T, typename U>
operator ()google::protobuf::__anona654feba0511::SymbolByFullNameEq958 bool operator()(const T& a, const U& b) const {
959 return a.full_name() == b.full_name();
960 }
961 };
962 using SymbolsByNameSet =
963 absl::flat_hash_set<Symbol, SymbolByFullNameHash, SymbolByFullNameEq>;
964
965 struct ParentNameQuery {
966 std::pair<const void*, absl::string_view> query;
parent_name_keygoogle::protobuf::__anona654feba0511::ParentNameQuery967 std::pair<const void*, absl::string_view> parent_name_key() const {
968 return query;
969 }
970 };
971 struct SymbolByParentHash {
972 using is_transparent = void;
973
974 template <typename T>
operator ()google::protobuf::__anona654feba0511::SymbolByParentHash975 size_t operator()(const T& s) const {
976 return absl::HashOf(s.parent_name_key());
977 }
978 };
979 struct SymbolByParentEq {
980 using is_transparent = void;
981
982 template <typename T, typename U>
operator ()google::protobuf::__anona654feba0511::SymbolByParentEq983 bool operator()(const T& a, const U& b) const {
984 return a.parent_name_key() == b.parent_name_key();
985 }
986 };
987 using SymbolsByParentSet =
988 absl::flat_hash_set<Symbol, SymbolByParentHash, SymbolByParentEq>;
989
990 template <typename DescriptorT>
991 struct DescriptorsByNameHash {
992 using is_transparent = void;
993
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameHash994 size_t operator()(absl::string_view name) const { return absl::HashOf(name); }
995
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameHash996 size_t operator()(const DescriptorT* file) const {
997 return absl::HashOf(file->name());
998 }
999 };
1000
1001 template <typename DescriptorT>
1002 struct DescriptorsByNameEq {
1003 using is_transparent = void;
1004
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1005 bool operator()(absl::string_view lhs, absl::string_view rhs) const {
1006 return lhs == rhs;
1007 }
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1008 bool operator()(absl::string_view lhs, const DescriptorT* rhs) const {
1009 return lhs == rhs->name();
1010 }
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1011 bool operator()(const DescriptorT* lhs, absl::string_view rhs) const {
1012 return lhs->name() == rhs;
1013 }
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1014 bool operator()(const DescriptorT* lhs, const DescriptorT* rhs) const {
1015 return lhs == rhs || lhs->name() == rhs->name();
1016 }
1017 };
1018
1019 template <typename DescriptorT>
1020 using DescriptorsByNameSet =
1021 absl::flat_hash_set<const DescriptorT*, DescriptorsByNameHash<DescriptorT>,
1022 DescriptorsByNameEq<DescriptorT>>;
1023
1024 using FieldsByNameMap =
1025 absl::flat_hash_map<std::pair<const void*, absl::string_view>,
1026 const FieldDescriptor*>;
1027
1028 struct ParentNumberQuery {
1029 std::pair<const void*, int> query;
1030 };
ObjectToParentNumber(const FieldDescriptor * field)1031 std::pair<const void*, int> ObjectToParentNumber(const FieldDescriptor* field) {
1032 return {field->containing_type(), field->number()};
1033 }
ObjectToParentNumber(const EnumValueDescriptor * enum_value)1034 std::pair<const void*, int> ObjectToParentNumber(
1035 const EnumValueDescriptor* enum_value) {
1036 return {enum_value->type(), enum_value->number()};
1037 }
ObjectToParentNumber(ParentNumberQuery query)1038 std::pair<const void*, int> ObjectToParentNumber(ParentNumberQuery query) {
1039 return query.query;
1040 }
1041 struct ParentNumberHash {
1042 using is_transparent = void;
1043
1044 template <typename T>
operator ()google::protobuf::__anona654feba0511::ParentNumberHash1045 size_t operator()(const T& t) const {
1046 return absl::HashOf(ObjectToParentNumber(t));
1047 }
1048 };
1049 struct ParentNumberEq {
1050 using is_transparent = void;
1051
1052 template <typename T, typename U>
operator ()google::protobuf::__anona654feba0511::ParentNumberEq1053 bool operator()(const T& a, const U& b) const {
1054 return ObjectToParentNumber(a) == ObjectToParentNumber(b);
1055 }
1056 };
1057 using FieldsByNumberSet = absl::flat_hash_set<const FieldDescriptor*,
1058 ParentNumberHash, ParentNumberEq>;
1059 using EnumValuesByNumberSet =
1060 absl::flat_hash_set<const EnumValueDescriptor*, ParentNumberHash,
1061 ParentNumberEq>;
1062
1063 // This is a map rather than a hash-map, since we use it to iterate
1064 // through all the extensions that extend a given Descriptor, and an
1065 // ordered data structure that implements lower_bound is convenient
1066 // for that.
1067 using ExtensionsGroupedByDescriptorMap =
1068 absl::btree_map<std::pair<const Descriptor*, int>, const FieldDescriptor*>;
1069 using LocationsByPathMap =
1070 absl::flat_hash_map<std::string, const SourceCodeInfo_Location*>;
1071
NewAllowedProto3Extendee()1072 absl::flat_hash_set<std::string>* NewAllowedProto3Extendee() {
1073 const char* kOptionNames[] = {
1074 "FileOptions", "MessageOptions", "FieldOptions",
1075 "EnumOptions", "EnumValueOptions", "ServiceOptions",
1076 "MethodOptions", "OneofOptions", "ExtensionRangeOptions"};
1077 auto allowed_proto3_extendees = new absl::flat_hash_set<std::string>();
1078 allowed_proto3_extendees->reserve(sizeof(kOptionNames) /
1079 sizeof(kOptionNames[0]));
1080
1081 for (const char* option_name : kOptionNames) {
1082 // descriptor.proto has a different package name in opensource. We allow
1083 // both so the opensource protocol compiler can also compile internal
1084 // proto3 files with custom options. See: b/27567912
1085 allowed_proto3_extendees->insert(std::string("google.protobuf.") +
1086 option_name);
1087 // Split the word to trick the opensource processing scripts so they
1088 // will keep the original package name.
1089 allowed_proto3_extendees->insert(std::string("proto2.") + option_name);
1090 }
1091 return allowed_proto3_extendees;
1092 }
1093
1094 // Checks whether the extendee type is allowed in proto3.
1095 // Only extensions to descriptor options are allowed. We use name comparison
1096 // instead of comparing the descriptor directly because the extensions may be
1097 // defined in a different pool.
AllowedExtendeeInProto3(const absl::string_view name)1098 bool AllowedExtendeeInProto3(const absl::string_view name) {
1099 static auto allowed_proto3_extendees =
1100 internal::OnShutdownDelete(NewAllowedProto3Extendee());
1101 return allowed_proto3_extendees->find(name) !=
1102 allowed_proto3_extendees->end();
1103 }
1104
GetCppFeatureSetDefaults()1105 const FeatureSetDefaults& GetCppFeatureSetDefaults() {
1106 static const FeatureSetDefaults* default_spec =
1107 internal::OnShutdownDelete([] {
1108 auto* defaults = new FeatureSetDefaults();
1109 internal::ParseNoReflection(
1110 absl::string_view{
1111 PROTOBUF_INTERNAL_CPP_EDITION_DEFAULTS,
1112 sizeof(PROTOBUF_INTERNAL_CPP_EDITION_DEFAULTS) - 1},
1113 *defaults);
1114 return defaults;
1115 }());
1116 return *default_spec;
1117 }
1118
1119 template <typename ProtoT>
RestoreFeaturesToOptions(const FeatureSet * features,ProtoT * proto)1120 void RestoreFeaturesToOptions(const FeatureSet* features, ProtoT* proto) {
1121 if (features != &FeatureSet::default_instance()) {
1122 *proto->mutable_options()->mutable_features() = *features;
1123 }
1124 }
1125
1126 template <typename DescriptorT>
GetFullName(const DescriptorT & desc)1127 absl::string_view GetFullName(const DescriptorT& desc) {
1128 return desc.full_name();
1129 }
1130
GetFullName(const FileDescriptor & desc)1131 absl::string_view GetFullName(const FileDescriptor& desc) {
1132 return desc.name();
1133 }
1134
1135 template <typename DescriptorT>
GetFile(const DescriptorT & desc)1136 const FileDescriptor* GetFile(const DescriptorT& desc) {
1137 return desc.file();
1138 }
1139
GetFile(const FileDescriptor & desc)1140 const FileDescriptor* GetFile(const FileDescriptor& desc) { return &desc; }
1141
GetParentFeatures(const FileDescriptor * file)1142 const FeatureSet& GetParentFeatures(const FileDescriptor* file) {
1143 return FeatureSet::default_instance();
1144 }
1145
GetParentFeatures(const Descriptor * message)1146 const FeatureSet& GetParentFeatures(const Descriptor* message) {
1147 if (message->containing_type() == nullptr) {
1148 return internal::InternalFeatureHelper::GetFeatures(*message->file());
1149 }
1150 return internal::InternalFeatureHelper::GetFeatures(
1151 *message->containing_type());
1152 }
1153
GetParentFeatures(const OneofDescriptor * oneof)1154 const FeatureSet& GetParentFeatures(const OneofDescriptor* oneof) {
1155 return internal::InternalFeatureHelper::GetFeatures(
1156 *oneof->containing_type());
1157 }
1158
GetParentFeatures(const Descriptor::ExtensionRange * range)1159 const FeatureSet& GetParentFeatures(const Descriptor::ExtensionRange* range) {
1160 return internal::InternalFeatureHelper::GetFeatures(
1161 *range->containing_type());
1162 }
1163
GetParentFeatures(const FieldDescriptor * field)1164 const FeatureSet& GetParentFeatures(const FieldDescriptor* field) {
1165 if (field->containing_oneof() != nullptr) {
1166 return internal::InternalFeatureHelper::GetFeatures(
1167 *field->containing_oneof());
1168 } else if (field->is_extension()) {
1169 if (field->extension_scope() == nullptr) {
1170 return internal::InternalFeatureHelper::GetFeatures(*field->file());
1171 }
1172 return internal::InternalFeatureHelper::GetFeatures(
1173 *field->extension_scope());
1174 }
1175 return internal::InternalFeatureHelper::GetFeatures(
1176 *field->containing_type());
1177 }
1178
GetParentFeatures(const EnumDescriptor * enm)1179 const FeatureSet& GetParentFeatures(const EnumDescriptor* enm) {
1180 if (enm->containing_type() == nullptr) {
1181 return internal::InternalFeatureHelper::GetFeatures(*enm->file());
1182 }
1183 return internal::InternalFeatureHelper::GetFeatures(*enm->containing_type());
1184 }
1185
GetParentFeatures(const EnumValueDescriptor * value)1186 const FeatureSet& GetParentFeatures(const EnumValueDescriptor* value) {
1187 return internal::InternalFeatureHelper::GetFeatures(*value->type());
1188 }
1189
GetParentFeatures(const ServiceDescriptor * service)1190 const FeatureSet& GetParentFeatures(const ServiceDescriptor* service) {
1191 return internal::InternalFeatureHelper::GetFeatures(*service->file());
1192 }
1193
GetParentFeatures(const MethodDescriptor * method)1194 const FeatureSet& GetParentFeatures(const MethodDescriptor* method) {
1195 return internal::InternalFeatureHelper::GetFeatures(*method->service());
1196 }
1197
IsLegacyEdition(Edition edition)1198 bool IsLegacyEdition(Edition edition) {
1199 return edition < Edition::EDITION_2023;
1200 }
1201
1202 } // anonymous namespace
1203
1204 // Contains tables specific to a particular file. These tables are not
1205 // modified once the file has been constructed, so they need not be
1206 // protected by a mutex. This makes operations that depend only on the
1207 // contents of a single file -- e.g. Descriptor::FindFieldByName() --
1208 // lock-free.
1209 //
1210 // For historical reasons, the definitions of the methods of
1211 // FileDescriptorTables and DescriptorPool::Tables are interleaved below.
1212 // These used to be a single class.
1213 class FileDescriptorTables {
1214 public:
1215 FileDescriptorTables();
1216 ~FileDescriptorTables();
1217
1218 // Empty table, used with placeholder files.
1219 inline static const FileDescriptorTables& GetEmptyInstance();
1220
1221 // -----------------------------------------------------------------
1222 // Finding items.
1223
1224 // Returns a null Symbol (symbol.IsNull() is true) if not found.
1225 // TODO: All callers to this function know the type they are looking
1226 // for. If we propagate that information statically we can make the query
1227 // faster.
1228 inline Symbol FindNestedSymbol(const void* parent,
1229 absl::string_view name) const;
1230
1231 // These return nullptr if not found.
1232 inline const FieldDescriptor* FindFieldByNumber(const Descriptor* parent,
1233 int number) const;
1234 inline const FieldDescriptor* FindFieldByLowercaseName(
1235 const void* parent, absl::string_view lowercase_name) const;
1236 inline const FieldDescriptor* FindFieldByCamelcaseName(
1237 const void* parent, absl::string_view camelcase_name) const;
1238 inline const EnumValueDescriptor* FindEnumValueByNumber(
1239 const EnumDescriptor* parent, int number) const;
1240 // This creates a new EnumValueDescriptor if not found, in a thread-safe way.
1241 inline const EnumValueDescriptor* FindEnumValueByNumberCreatingIfUnknown(
1242 const EnumDescriptor* parent, int number) const;
1243
1244 // -----------------------------------------------------------------
1245 // Adding items.
1246
1247 // These add items to the corresponding tables. They return false if
1248 // the key already exists in the table.
1249 bool AddAliasUnderParent(const void* parent, absl::string_view name,
1250 Symbol symbol);
1251 bool AddFieldByNumber(FieldDescriptor* field);
1252 bool AddEnumValueByNumber(EnumValueDescriptor* value);
1253
1254 // Populates p->first->locations_by_path_ from p->second.
1255 // Unusual signature dictated by absl::call_once.
1256 static void BuildLocationsByPath(
1257 std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p);
1258
1259 // Returns the location denoted by the specified path through info,
1260 // or nullptr if not found.
1261 // The value of info must be that of the corresponding FileDescriptor.
1262 // (Conceptually a pure function, but stateful as an optimisation.)
1263 const SourceCodeInfo_Location* GetSourceLocation(
1264 const std::vector<int>& path, const SourceCodeInfo* info) const;
1265
1266 // Must be called after BuildFileImpl(), even if the build failed and
1267 // we are going to roll back to the last checkpoint.
1268 void FinalizeTables();
1269
1270 private:
1271 const void* FindParentForFieldsByMap(const FieldDescriptor* field) const;
1272 static void FieldsByLowercaseNamesLazyInitStatic(
1273 const FileDescriptorTables* tables);
1274 void FieldsByLowercaseNamesLazyInitInternal() const;
1275 static void FieldsByCamelcaseNamesLazyInitStatic(
1276 const FileDescriptorTables* tables);
1277 void FieldsByCamelcaseNamesLazyInitInternal() const;
1278
1279 SymbolsByParentSet symbols_by_parent_;
1280 mutable absl::once_flag fields_by_lowercase_name_once_;
1281 mutable absl::once_flag fields_by_camelcase_name_once_;
1282 // Make these fields atomic to avoid race conditions with
1283 // GetEstimatedOwnedMemoryBytesSize. Once the pointer is set the map won't
1284 // change anymore.
1285 mutable std::atomic<const FieldsByNameMap*> fields_by_lowercase_name_{};
1286 mutable std::atomic<const FieldsByNameMap*> fields_by_camelcase_name_{};
1287 FieldsByNumberSet fields_by_number_; // Not including extensions.
1288 EnumValuesByNumberSet enum_values_by_number_;
1289 mutable EnumValuesByNumberSet unknown_enum_values_by_number_
1290 ABSL_GUARDED_BY(unknown_enum_values_mu_);
1291
1292 // Populated on first request to save space, hence constness games.
1293 mutable absl::once_flag locations_by_path_once_;
1294 mutable LocationsByPathMap locations_by_path_;
1295
1296 // Mutex to protect the unknown-enum-value map due to dynamic
1297 // EnumValueDescriptor creation on unknown values.
1298 mutable absl::Mutex unknown_enum_values_mu_;
1299 };
1300
1301 namespace internal {
1302
1303 // Small sequential allocator to be used within a single file.
1304 // Most of the memory for a single FileDescriptor and everything under it is
1305 // allocated in a single block of memory, with the FlatAllocator giving it out
1306 // in parts later.
1307 // The code first plans the total number of bytes needed by calling PlanArray
1308 // with all the allocations that will happen afterwards, then calls
1309 // FinalizePlanning passing the underlying allocator (the DescriptorPool::Tables
1310 // instance), and then proceeds to get the memory via
1311 // `AllocateArray`/`AllocateString` calls. The calls to PlanArray and
1312 // The calls have to match between planning and allocating, though not
1313 // necessarily in the same order.
1314 class FlatAllocator
1315 : public decltype(ApplyTypeList<FlatAllocatorImpl>(
1316 SortByAlignment<char, std::string, SourceCodeInfo,
1317 FileDescriptorTables, FeatureSet,
1318 // Option types
1319 MessageOptions, FieldOptions, EnumOptions,
1320 EnumValueOptions, ExtensionRangeOptions, OneofOptions,
1321 ServiceOptions, MethodOptions, FileOptions>())) {};
1322
1323 } // namespace internal
1324
1325 // ===================================================================
1326 // DescriptorPool::DeferredValidation
1327
1328 // This class stores information required to defer validation until we're
1329 // outside the mutex lock. These are reflective checks that also require us to
1330 // acquire the lock.
1331 class DescriptorPool::DeferredValidation {
1332 public:
DeferredValidation(const DescriptorPool * pool,ErrorCollector * error_collector)1333 DeferredValidation(const DescriptorPool* pool,
1334 ErrorCollector* error_collector)
1335 : pool_(pool), error_collector_(error_collector) {}
DeferredValidation(const DescriptorPool * pool)1336 explicit DeferredValidation(const DescriptorPool* pool)
1337 : pool_(pool), error_collector_(pool->default_error_collector_) {}
1338
1339 DeferredValidation(const DeferredValidation&) = delete;
1340 DeferredValidation& operator=(const DeferredValidation&) = delete;
1341 DeferredValidation(DeferredValidation&&) = delete;
1342 DeferredValidation& operator=(DeferredValidation&&) = delete;
1343
~DeferredValidation()1344 ~DeferredValidation() {
1345 ABSL_CHECK(lifetimes_info_map_.empty())
1346 << "DeferredValidation destroyed with unvalidated features";
1347 }
1348
1349 struct LifetimesInfo {
1350 const FeatureSet* proto_features;
1351 const Message* proto;
1352 absl::string_view full_name;
1353 absl::string_view filename;
1354 };
ValidateFeatureLifetimes(const FileDescriptor * file,LifetimesInfo info)1355 void ValidateFeatureLifetimes(const FileDescriptor* file,
1356 LifetimesInfo info) {
1357 lifetimes_info_map_[file].emplace_back(std::move(info));
1358 }
1359
RollbackFile(const FileDescriptor * file)1360 void RollbackFile(const FileDescriptor* file) {
1361 lifetimes_info_map_.erase(file);
1362 }
1363
1364 // Create a new file proto with an extended lifetime for deferred error
1365 // reporting. If any temporary file protos don't outlive this object, the
1366 // reported errors won't be able to safely reference a location in the
1367 // original proto file.
CreateProto()1368 FileDescriptorProto& CreateProto() {
1369 owned_protos_.push_back(Arena::Create<FileDescriptorProto>(&arena_));
1370 return *owned_protos_.back();
1371 }
1372
Validate()1373 bool Validate() {
1374 if (lifetimes_info_map_.empty()) return true;
1375
1376 static absl::string_view feature_set_name = "google.protobuf.FeatureSet";
1377 const Descriptor* feature_set =
1378 pool_->FindMessageTypeByName(feature_set_name);
1379
1380 bool has_errors = false;
1381 for (const auto& it : lifetimes_info_map_) {
1382 const FileDescriptor* file = it.first;
1383
1384 for (const auto& info : it.second) {
1385 auto results = FeatureResolver::ValidateFeatureLifetimes(
1386 file->edition(), *info.proto_features, feature_set);
1387 for (const auto& error : results.errors) {
1388 has_errors = true;
1389 if (error_collector_ == nullptr) {
1390 ABSL_LOG(ERROR)
1391 << info.filename << " " << info.full_name << ": " << error;
1392 } else {
1393 error_collector_->RecordError(
1394 info.filename, info.full_name, info.proto,
1395 DescriptorPool::ErrorCollector::NAME, error);
1396 }
1397 }
1398 if (pool_->direct_input_files_.find(file->name()) !=
1399 pool_->direct_input_files_.end()) {
1400 for (const auto& warning : results.warnings) {
1401 if (error_collector_ == nullptr) {
1402 ABSL_LOG(WARNING)
1403 << info.filename << " " << info.full_name << ": " << warning;
1404 } else {
1405 error_collector_->RecordWarning(
1406 info.filename, info.full_name, info.proto,
1407 DescriptorPool::ErrorCollector::NAME, warning);
1408 }
1409 }
1410 }
1411 }
1412 }
1413 lifetimes_info_map_.clear();
1414 return !has_errors;
1415 }
1416
1417 private:
1418 Arena arena_;
1419 const DescriptorPool* pool_;
1420 ErrorCollector* error_collector_;
1421 absl::flat_hash_map<const FileDescriptor*, std::vector<LifetimesInfo>>
1422 lifetimes_info_map_;
1423 std::vector<FileDescriptorProto*> owned_protos_;
1424 };
1425
1426 // ===================================================================
1427 // DescriptorPool::Tables
1428
1429 class DescriptorPool::Tables {
1430 public:
1431 Tables();
1432 ~Tables();
1433
1434 // Record the current state of the tables to the stack of checkpoints.
1435 // Each call to AddCheckpoint() must be paired with exactly one call to either
1436 // ClearLastCheckpoint() or RollbackToLastCheckpoint().
1437 //
1438 // This is used when building files, since some kinds of validation errors
1439 // cannot be detected until the file's descriptors have already been added to
1440 // the tables.
1441 //
1442 // This supports recursive checkpoints, since building a file may trigger
1443 // recursive building of other files. Note that recursive checkpoints are not
1444 // normally necessary; explicit dependencies are built prior to checkpointing.
1445 // So although we recursively build transitive imports, there is at most one
1446 // checkpoint in the stack during dependency building.
1447 //
1448 // Recursive checkpoints only arise during cross-linking of the descriptors.
1449 // Symbol references must be resolved, via DescriptorBuilder::FindSymbol and
1450 // friends. If the pending file references an unknown symbol
1451 // (e.g., it is not defined in the pending file's explicit dependencies), and
1452 // the pool is using a fallback database, and that database contains a file
1453 // defining that symbol, and that file has not yet been built by the pool,
1454 // the pool builds the file during cross-linking, leading to another
1455 // checkpoint.
1456 void AddCheckpoint();
1457
1458 // Mark the last checkpoint as having cleared successfully, removing it from
1459 // the stack. If the stack is empty, all pending symbols will be committed.
1460 //
1461 // Note that this does not guarantee that the symbols added since the last
1462 // checkpoint won't be rolled back: if a checkpoint gets rolled back,
1463 // everything past that point gets rolled back, including symbols added after
1464 // checkpoints that were pushed onto the stack after it and marked as cleared.
1465 void ClearLastCheckpoint();
1466
1467 // Roll back the Tables to the state of the checkpoint at the top of the
1468 // stack, removing everything that was added after that point.
1469 void RollbackToLastCheckpoint(DeferredValidation& deferred_validation);
1470
1471 // The stack of files which are currently being built. Used to detect
1472 // cyclic dependencies when loading files from a DescriptorDatabase. Not
1473 // used when fallback_database_ == nullptr.
1474 std::vector<std::string> pending_files_;
1475
1476 // A set of files which we have tried to load from the fallback database
1477 // and encountered errors. We will not attempt to load them again during
1478 // execution of the current public API call, but for compatibility with
1479 // legacy clients, this is cleared at the beginning of each public API call.
1480 // Not used when fallback_database_ == nullptr.
1481 absl::flat_hash_set<std::string> known_bad_files_;
1482
1483 // A set of symbols which we have tried to load from the fallback database
1484 // and encountered errors. We will not attempt to load them again during
1485 // execution of the current public API call, but for compatibility with
1486 // legacy clients, this is cleared at the beginning of each public API call.
1487 absl::flat_hash_set<std::string> known_bad_symbols_;
1488
1489 // The set of descriptors for which we've already loaded the full
1490 // set of extensions numbers from fallback_database_.
1491 absl::flat_hash_set<const Descriptor*> extensions_loaded_from_db_;
1492
1493 // Maps type name to Descriptor::WellKnownType. This is logically global
1494 // and const, but we make it a member here to simplify its construction and
1495 // destruction. This only has 20-ish entries and is one per DescriptorPool,
1496 // so the overhead is small.
1497 absl::flat_hash_map<std::string, Descriptor::WellKnownType> well_known_types_;
1498
1499 // -----------------------------------------------------------------
1500 // Finding items.
1501
1502 // Find symbols. This returns a null Symbol (symbol.IsNull() is true)
1503 // if not found.
1504 inline Symbol FindSymbol(absl::string_view key) const;
1505
1506 // This implements the body of DescriptorPool::Find*ByName(). It should
1507 // really be a private method of DescriptorPool, but that would require
1508 // declaring Symbol in descriptor.h, which would drag all kinds of other
1509 // stuff into the header. Yay C++.
1510 Symbol FindByNameHelper(const DescriptorPool* pool, absl::string_view name);
1511
1512 // These return nullptr if not found.
1513 inline const FileDescriptor* FindFile(absl::string_view key) const;
1514 inline const FieldDescriptor* FindExtension(const Descriptor* extendee,
1515 int number) const;
1516 inline void FindAllExtensions(const Descriptor* extendee,
1517 std::vector<const FieldDescriptor*>* out) const;
1518
1519 // -----------------------------------------------------------------
1520 // Adding items.
1521
1522 // These add items to the corresponding tables. They return false if
1523 // the key already exists in the table. For AddSymbol(), the string passed
1524 // in must be one that was constructed using AllocateString(), as it will
1525 // be used as a key in the symbols_by_name_ map without copying.
1526 bool AddSymbol(absl::string_view full_name, Symbol symbol);
1527 bool AddFile(const FileDescriptor* file);
1528 bool AddExtension(const FieldDescriptor* field);
1529
1530 // Caches a feature set and returns a stable reference to the cached
1531 // allocation owned by the pool.
1532 const FeatureSet* InternFeatureSet(FeatureSet&& features);
1533
1534 // -----------------------------------------------------------------
1535 // Allocating memory.
1536
1537 // Allocate an object which will be reclaimed when the pool is
1538 // destroyed. Note that the object's destructor will never be called,
1539 // so its fields must be plain old data (primitive data types and
1540 // pointers). All of the descriptor types are such objects.
1541 template <typename Type>
1542 Type* Allocate();
1543
1544 // Allocate some bytes which will be reclaimed when the pool is
1545 // destroyed. Memory is aligned to 8 bytes.
1546 void* AllocateBytes(int size);
1547
1548 // Create a FlatAllocation for the corresponding sizes.
1549 // All objects within it will be default constructed.
1550 // The whole allocation, including the non-trivial objects within, will be
1551 // destroyed with the pool.
1552 template <typename... T>
1553 internal::FlatAllocator::Allocation* CreateFlatAlloc(
1554 const TypeMap<IntT, T...>& sizes);
1555
1556
1557 private:
1558 // All memory allocated in the pool. Must be first as other objects can
1559 // point into these.
1560 struct MiscDeleter {
operator ()google::protobuf::DescriptorPool::Tables::MiscDeleter1561 void operator()(int* p) const { internal::SizedDelete(p, *p + 8); }
1562 };
1563 // Miscellaneous allocations are length prefixed. The paylaod is 8 bytes after
1564 // the `int` that contains the size. This keeps the payload aligned.
1565 std::vector<std::unique_ptr<int, MiscDeleter>> misc_allocs_;
1566 struct FlatAllocDeleter {
operator ()google::protobuf::DescriptorPool::Tables::FlatAllocDeleter1567 void operator()(internal::FlatAllocator::Allocation* p) const {
1568 p->Destroy();
1569 }
1570 };
1571 std::vector<
1572 std::unique_ptr<internal::FlatAllocator::Allocation, FlatAllocDeleter>>
1573 flat_allocs_;
1574
1575 SymbolsByNameSet symbols_by_name_;
1576 DescriptorsByNameSet<FileDescriptor> files_by_name_;
1577 ExtensionsGroupedByDescriptorMap extensions_;
1578
1579 // A cache of all unique feature sets seen. Since we expect this number to be
1580 // relatively low compared to descriptors, it's significantly cheaper to share
1581 // these within the pool than have each file create its own feature sets.
1582 absl::flat_hash_map<std::string, std::unique_ptr<FeatureSet>>
1583 feature_set_cache_;
1584
1585 struct CheckPoint {
CheckPointgoogle::protobuf::DescriptorPool::Tables::CheckPoint1586 explicit CheckPoint(const Tables* tables)
1587 : flat_allocations_before_checkpoint(
1588 static_cast<int>(tables->flat_allocs_.size())),
1589 misc_allocations_before_checkpoint(
1590 static_cast<int>(tables->misc_allocs_.size())),
1591 pending_symbols_before_checkpoint(
1592 tables->symbols_after_checkpoint_.size()),
1593 pending_files_before_checkpoint(
1594 tables->files_after_checkpoint_.size()),
1595 pending_extensions_before_checkpoint(
1596 tables->extensions_after_checkpoint_.size()) {}
1597 int flat_allocations_before_checkpoint;
1598 int misc_allocations_before_checkpoint;
1599 int pending_symbols_before_checkpoint;
1600 int pending_files_before_checkpoint;
1601 int pending_extensions_before_checkpoint;
1602 };
1603 std::vector<CheckPoint> checkpoints_;
1604 std::vector<Symbol> symbols_after_checkpoint_;
1605 std::vector<const FileDescriptor*> files_after_checkpoint_;
1606 std::vector<std::pair<const Descriptor*, int>> extensions_after_checkpoint_;
1607 };
1608
Tables()1609 DescriptorPool::Tables::Tables() {
1610 well_known_types_.insert({
1611 {"google.protobuf.DoubleValue", Descriptor::WELLKNOWNTYPE_DOUBLEVALUE},
1612 {"google.protobuf.FloatValue", Descriptor::WELLKNOWNTYPE_FLOATVALUE},
1613 {"google.protobuf.Int64Value", Descriptor::WELLKNOWNTYPE_INT64VALUE},
1614 {"google.protobuf.UInt64Value", Descriptor::WELLKNOWNTYPE_UINT64VALUE},
1615 {"google.protobuf.Int32Value", Descriptor::WELLKNOWNTYPE_INT32VALUE},
1616 {"google.protobuf.UInt32Value", Descriptor::WELLKNOWNTYPE_UINT32VALUE},
1617 {"google.protobuf.StringValue", Descriptor::WELLKNOWNTYPE_STRINGVALUE},
1618 {"google.protobuf.BytesValue", Descriptor::WELLKNOWNTYPE_BYTESVALUE},
1619 {"google.protobuf.BoolValue", Descriptor::WELLKNOWNTYPE_BOOLVALUE},
1620 {"google.protobuf.Any", Descriptor::WELLKNOWNTYPE_ANY},
1621 {"google.protobuf.FieldMask", Descriptor::WELLKNOWNTYPE_FIELDMASK},
1622 {"google.protobuf.Duration", Descriptor::WELLKNOWNTYPE_DURATION},
1623 {"google.protobuf.Timestamp", Descriptor::WELLKNOWNTYPE_TIMESTAMP},
1624 {"google.protobuf.Value", Descriptor::WELLKNOWNTYPE_VALUE},
1625 {"google.protobuf.ListValue", Descriptor::WELLKNOWNTYPE_LISTVALUE},
1626 {"google.protobuf.Struct", Descriptor::WELLKNOWNTYPE_STRUCT},
1627 });
1628 }
1629
~Tables()1630 DescriptorPool::Tables::~Tables() { ABSL_DCHECK(checkpoints_.empty()); }
1631
1632 FileDescriptorTables::FileDescriptorTables() = default;
1633
~FileDescriptorTables()1634 FileDescriptorTables::~FileDescriptorTables() {
1635 delete fields_by_lowercase_name_.load(std::memory_order_acquire);
1636 delete fields_by_camelcase_name_.load(std::memory_order_acquire);
1637 }
1638
GetEmptyInstance()1639 inline const FileDescriptorTables& FileDescriptorTables::GetEmptyInstance() {
1640 static auto file_descriptor_tables =
1641 internal::OnShutdownDelete(new FileDescriptorTables());
1642 return *file_descriptor_tables;
1643 }
1644
AddCheckpoint()1645 void DescriptorPool::Tables::AddCheckpoint() {
1646 checkpoints_.emplace_back(this);
1647 }
1648
ClearLastCheckpoint()1649 void DescriptorPool::Tables::ClearLastCheckpoint() {
1650 ABSL_DCHECK(!checkpoints_.empty());
1651 checkpoints_.pop_back();
1652 if (checkpoints_.empty()) {
1653 // All checkpoints have been cleared: we can now commit all of the pending
1654 // data.
1655 symbols_after_checkpoint_.clear();
1656 files_after_checkpoint_.clear();
1657 extensions_after_checkpoint_.clear();
1658 }
1659 }
1660
RollbackToLastCheckpoint(DeferredValidation & deferred_validation)1661 void DescriptorPool::Tables::RollbackToLastCheckpoint(
1662 DeferredValidation& deferred_validation) {
1663 ABSL_DCHECK(!checkpoints_.empty());
1664 const CheckPoint& checkpoint = checkpoints_.back();
1665
1666 for (size_t i = checkpoint.pending_symbols_before_checkpoint;
1667 i < symbols_after_checkpoint_.size(); i++) {
1668 symbols_by_name_.erase(symbols_after_checkpoint_[i]);
1669 }
1670 for (size_t i = checkpoint.pending_files_before_checkpoint;
1671 i < files_after_checkpoint_.size(); i++) {
1672 deferred_validation.RollbackFile(files_after_checkpoint_[i]);
1673 files_by_name_.erase(files_after_checkpoint_[i]);
1674 }
1675 for (size_t i = checkpoint.pending_extensions_before_checkpoint;
1676 i < extensions_after_checkpoint_.size(); i++) {
1677 extensions_.erase(extensions_after_checkpoint_[i]);
1678 }
1679
1680 symbols_after_checkpoint_.resize(
1681 checkpoint.pending_symbols_before_checkpoint);
1682 files_after_checkpoint_.resize(checkpoint.pending_files_before_checkpoint);
1683 extensions_after_checkpoint_.resize(
1684 checkpoint.pending_extensions_before_checkpoint);
1685
1686 flat_allocs_.resize(checkpoint.flat_allocations_before_checkpoint);
1687 misc_allocs_.resize(checkpoint.misc_allocations_before_checkpoint);
1688 checkpoints_.pop_back();
1689 }
1690
1691 // -------------------------------------------------------------------
1692
FindSymbol(absl::string_view key) const1693 inline Symbol DescriptorPool::Tables::FindSymbol(absl::string_view key) const {
1694 auto it = symbols_by_name_.find(FullNameQuery{key});
1695 return it == symbols_by_name_.end() ? Symbol() : *it;
1696 }
1697
FindNestedSymbol(const void * parent,absl::string_view name) const1698 inline Symbol FileDescriptorTables::FindNestedSymbol(
1699 const void* parent, absl::string_view name) const {
1700 auto it = symbols_by_parent_.find(ParentNameQuery{{parent, name}});
1701 return it == symbols_by_parent_.end() ? Symbol() : *it;
1702 }
1703
FindByNameHelper(const DescriptorPool * pool,absl::string_view name)1704 Symbol DescriptorPool::Tables::FindByNameHelper(const DescriptorPool* pool,
1705 absl::string_view name) {
1706 if (pool->mutex_ != nullptr) {
1707 // Fast path: the Symbol is already cached. This is just a hash lookup.
1708 absl::ReaderMutexLock lock(pool->mutex_);
1709 if (known_bad_symbols_.empty() && known_bad_files_.empty()) {
1710 Symbol result = FindSymbol(name);
1711 if (!result.IsNull()) return result;
1712 }
1713 }
1714 DescriptorPool::DeferredValidation deferred_validation(pool);
1715 Symbol result;
1716 {
1717 absl::MutexLockMaybe lock(pool->mutex_);
1718 if (pool->fallback_database_ != nullptr) {
1719 known_bad_symbols_.clear();
1720 known_bad_files_.clear();
1721 }
1722 result = FindSymbol(name);
1723
1724 if (result.IsNull() && pool->underlay_ != nullptr) {
1725 // Symbol not found; check the underlay.
1726 result =
1727 pool->underlay_->tables_->FindByNameHelper(pool->underlay_, name);
1728 }
1729
1730 if (result.IsNull()) {
1731 // Symbol still not found, so check fallback database.
1732 if (pool->TryFindSymbolInFallbackDatabase(name, deferred_validation)) {
1733 result = FindSymbol(name);
1734 }
1735 }
1736 }
1737
1738 if (!deferred_validation.Validate()) {
1739 return Symbol();
1740 }
1741 return result;
1742 }
1743
FindFile(absl::string_view key) const1744 inline const FileDescriptor* DescriptorPool::Tables::FindFile(
1745 absl::string_view key) const {
1746 auto it = files_by_name_.find(key);
1747 if (it == files_by_name_.end()) return nullptr;
1748 return *it;
1749 }
1750
FindFieldByNumber(const Descriptor * parent,int number) const1751 inline const FieldDescriptor* FileDescriptorTables::FindFieldByNumber(
1752 const Descriptor* parent, int number) const {
1753 // If `number` is within the sequential range, just index into the parent
1754 // without doing a table lookup.
1755 if (parent != nullptr && //
1756 1 <= number && number <= parent->sequential_field_limit_) {
1757 return parent->field(number - 1);
1758 }
1759
1760 auto it = fields_by_number_.find(ParentNumberQuery{{parent, number}});
1761 return it == fields_by_number_.end() ? nullptr : *it;
1762 }
1763
FindParentForFieldsByMap(const FieldDescriptor * field) const1764 const void* FileDescriptorTables::FindParentForFieldsByMap(
1765 const FieldDescriptor* field) const {
1766 if (field->is_extension()) {
1767 if (field->extension_scope() == nullptr) {
1768 return field->file();
1769 } else {
1770 return field->extension_scope();
1771 }
1772 } else {
1773 return field->containing_type();
1774 }
1775 }
1776
FieldsByLowercaseNamesLazyInitStatic(const FileDescriptorTables * tables)1777 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic(
1778 const FileDescriptorTables* tables) {
1779 tables->FieldsByLowercaseNamesLazyInitInternal();
1780 }
1781
FieldsByLowercaseNamesLazyInitInternal() const1782 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitInternal() const {
1783 auto* map = new FieldsByNameMap;
1784 for (Symbol symbol : symbols_by_parent_) {
1785 const FieldDescriptor* field = symbol.field_descriptor();
1786 if (!field) continue;
1787 (*map)[{FindParentForFieldsByMap(field), field->lowercase_name()}] = field;
1788 }
1789 fields_by_lowercase_name_.store(map, std::memory_order_release);
1790 }
1791
FindFieldByLowercaseName(const void * parent,absl::string_view lowercase_name) const1792 inline const FieldDescriptor* FileDescriptorTables::FindFieldByLowercaseName(
1793 const void* parent, absl::string_view lowercase_name) const {
1794 absl::call_once(fields_by_lowercase_name_once_,
1795 &FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic,
1796 this);
1797 const auto* fields =
1798 fields_by_lowercase_name_.load(std::memory_order_acquire);
1799 auto it = fields->find({parent, lowercase_name});
1800 if (it == fields->end()) return nullptr;
1801 return it->second;
1802 }
1803
FieldsByCamelcaseNamesLazyInitStatic(const FileDescriptorTables * tables)1804 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic(
1805 const FileDescriptorTables* tables) {
1806 tables->FieldsByCamelcaseNamesLazyInitInternal();
1807 }
1808
FieldsByCamelcaseNamesLazyInitInternal() const1809 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitInternal() const {
1810 auto* map = new FieldsByNameMap;
1811 for (Symbol symbol : symbols_by_parent_) {
1812 const FieldDescriptor* field = symbol.field_descriptor();
1813 if (!field) continue;
1814 const void* parent = FindParentForFieldsByMap(field);
1815 // If we already have a field with this camelCase name, keep the field with
1816 // the smallest field number. This way we get a deterministic mapping.
1817 const FieldDescriptor*& found = (*map)[{parent, field->camelcase_name()}];
1818 if (found == nullptr || found->number() > field->number()) {
1819 found = field;
1820 }
1821 }
1822 fields_by_camelcase_name_.store(map, std::memory_order_release);
1823 }
1824
FindFieldByCamelcaseName(const void * parent,absl::string_view camelcase_name) const1825 inline const FieldDescriptor* FileDescriptorTables::FindFieldByCamelcaseName(
1826 const void* parent, absl::string_view camelcase_name) const {
1827 absl::call_once(fields_by_camelcase_name_once_,
1828 FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic,
1829 this);
1830 auto* fields = fields_by_camelcase_name_.load(std::memory_order_acquire);
1831 auto it = fields->find({parent, camelcase_name});
1832 if (it == fields->end()) return nullptr;
1833 return it->second;
1834 }
1835
FindEnumValueByNumber(const EnumDescriptor * parent,int number) const1836 inline const EnumValueDescriptor* FileDescriptorTables::FindEnumValueByNumber(
1837 const EnumDescriptor* parent, int number) const {
1838 // If `number` is within the sequential range, just index into the parent
1839 // without doing a table lookup.
1840 const int base = parent->value(0)->number();
1841 if (base <= number &&
1842 number <= static_cast<int64_t>(base) + parent->sequential_value_limit_) {
1843 return parent->value(number - base);
1844 }
1845
1846 auto it = enum_values_by_number_.find(ParentNumberQuery{{parent, number}});
1847 return it == enum_values_by_number_.end() ? nullptr : *it;
1848 }
1849
1850 inline const EnumValueDescriptor*
FindEnumValueByNumberCreatingIfUnknown(const EnumDescriptor * parent,int number) const1851 FileDescriptorTables::FindEnumValueByNumberCreatingIfUnknown(
1852 const EnumDescriptor* parent, int number) const {
1853 // First try, with map of compiled-in values.
1854 {
1855 const auto* value = FindEnumValueByNumber(parent, number);
1856 if (value != nullptr) {
1857 return value;
1858 }
1859 }
1860
1861 const ParentNumberQuery query{{parent, number}};
1862
1863 // Second try, with reader lock held on unknown enum values: common case.
1864 {
1865 absl::ReaderMutexLock l(&unknown_enum_values_mu_);
1866 auto it = unknown_enum_values_by_number_.find(query);
1867 if (it != unknown_enum_values_by_number_.end()) {
1868 return *it;
1869 }
1870 }
1871 // If not found, try again with writer lock held, and create new descriptor if
1872 // necessary.
1873 {
1874 absl::WriterMutexLock l(&unknown_enum_values_mu_);
1875 auto it = unknown_enum_values_by_number_.find(query);
1876 if (it != unknown_enum_values_by_number_.end()) {
1877 return *it;
1878 }
1879
1880 // Create an EnumValueDescriptor dynamically. We don't insert it into the
1881 // EnumDescriptor (it's not a part of the enum as originally defined), but
1882 // we do insert it into the table so that we can return the same pointer
1883 // later.
1884 std::string enum_value_name =
1885 absl::StrFormat("UNKNOWN_ENUM_VALUE_%s_%d", parent->name(), number);
1886 auto* pool = DescriptorPool::generated_pool();
1887 auto* tables = const_cast<DescriptorPool::Tables*>(pool->tables_.get());
1888 internal::FlatAllocator alloc;
1889 alloc.PlanArray<EnumValueDescriptor>(1);
1890 alloc.PlanArray<std::string>(2);
1891
1892 {
1893 // Must lock the pool because we will do allocations in the shared arena.
1894 absl::MutexLockMaybe l2(pool->mutex_);
1895 alloc.FinalizePlanning(tables);
1896 }
1897 EnumValueDescriptor* result = alloc.AllocateArray<EnumValueDescriptor>(1);
1898 result->all_names_ = alloc.AllocateStrings(
1899 enum_value_name,
1900 absl::StrCat(parent->full_name(), ".", enum_value_name));
1901 result->number_ = number;
1902 result->type_ = parent;
1903 result->options_ = &EnumValueOptions::default_instance();
1904 unknown_enum_values_by_number_.insert(result);
1905 return result;
1906 }
1907 }
1908
FindExtension(const Descriptor * extendee,int number) const1909 inline const FieldDescriptor* DescriptorPool::Tables::FindExtension(
1910 const Descriptor* extendee, int number) const {
1911 auto it = extensions_.find({extendee, number});
1912 if (it == extensions_.end()) return nullptr;
1913 return it->second;
1914 }
1915
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const1916 inline void DescriptorPool::Tables::FindAllExtensions(
1917 const Descriptor* extendee,
1918 std::vector<const FieldDescriptor*>* out) const {
1919 ExtensionsGroupedByDescriptorMap::const_iterator it =
1920 extensions_.lower_bound(std::make_pair(extendee, 0));
1921 for (; it != extensions_.end() && it->first.first == extendee; ++it) {
1922 out->push_back(it->second);
1923 }
1924 }
1925
1926 // -------------------------------------------------------------------
1927
AddSymbol(absl::string_view full_name,Symbol symbol)1928 bool DescriptorPool::Tables::AddSymbol(absl::string_view full_name,
1929 Symbol symbol) {
1930 ABSL_DCHECK_EQ(full_name, symbol.full_name());
1931 if (symbols_by_name_.insert(symbol).second) {
1932 symbols_after_checkpoint_.push_back(symbol);
1933 return true;
1934 } else {
1935 return false;
1936 }
1937 }
1938
AddAliasUnderParent(const void * parent,absl::string_view name,Symbol symbol)1939 bool FileDescriptorTables::AddAliasUnderParent(const void* parent,
1940 absl::string_view name,
1941 Symbol symbol) {
1942 ABSL_DCHECK_EQ(name, symbol.parent_name_key().second);
1943 ABSL_DCHECK_EQ(parent, symbol.parent_name_key().first);
1944 return symbols_by_parent_.insert(symbol).second;
1945 }
1946
AddFile(const FileDescriptor * file)1947 bool DescriptorPool::Tables::AddFile(const FileDescriptor* file) {
1948 if (files_by_name_.insert(file).second) {
1949 files_after_checkpoint_.push_back(file);
1950 return true;
1951 } else {
1952 return false;
1953 }
1954 }
1955
FinalizeTables()1956 void FileDescriptorTables::FinalizeTables() {}
1957
AddFieldByNumber(FieldDescriptor * field)1958 bool FileDescriptorTables::AddFieldByNumber(FieldDescriptor* field) {
1959 // Skip fields that are at the start of the sequence.
1960 if (field->containing_type() != nullptr && field->number() >= 1 &&
1961 field->number() <= field->containing_type()->sequential_field_limit_) {
1962 if (field->is_extension()) {
1963 // Conflicts with the field that already exists in the sequential range.
1964 return false;
1965 }
1966 // Only return true if the field at that index matches. Otherwise it
1967 // conflicts with the existing field in the sequential range.
1968 return field->containing_type()->field(field->number() - 1) == field;
1969 }
1970
1971 return fields_by_number_.insert(field).second;
1972 }
1973
AddEnumValueByNumber(EnumValueDescriptor * value)1974 bool FileDescriptorTables::AddEnumValueByNumber(EnumValueDescriptor* value) {
1975 // Skip values that are at the start of the sequence.
1976 const int base = value->type()->value(0)->number();
1977 if (base <= value->number() &&
1978 value->number() <=
1979 static_cast<int64_t>(base) + value->type()->sequential_value_limit_)
1980 return true;
1981 return enum_values_by_number_.insert(value).second;
1982 }
1983
AddExtension(const FieldDescriptor * field)1984 bool DescriptorPool::Tables::AddExtension(const FieldDescriptor* field) {
1985 auto it_inserted =
1986 extensions_.insert({{field->containing_type(), field->number()}, field});
1987 if (it_inserted.second) {
1988 extensions_after_checkpoint_.push_back(it_inserted.first->first);
1989 return true;
1990 } else {
1991 return false;
1992 }
1993 }
1994
InternFeatureSet(FeatureSet && features)1995 const FeatureSet* DescriptorPool::Tables::InternFeatureSet(
1996 FeatureSet&& features) {
1997 // Use the serialized feature set as the cache key. If multiple equivalent
1998 // feature sets serialize to different strings, that just bloats the cache a
1999 // little.
2000 auto& result = feature_set_cache_[features.SerializeAsString()];
2001 if (result == nullptr) {
2002 result = absl::make_unique<FeatureSet>(std::move(features));
2003 }
2004 return result.get();
2005 }
2006
2007 // -------------------------------------------------------------------
2008
2009 template <typename Type>
Allocate()2010 Type* DescriptorPool::Tables::Allocate() {
2011 static_assert(std::is_trivially_destructible<Type>::value, "");
2012 static_assert(alignof(Type) <= 8, "");
2013 return ::new (AllocateBytes(sizeof(Type))) Type{};
2014 }
2015
AllocateBytes(int size)2016 void* DescriptorPool::Tables::AllocateBytes(int size) {
2017 if (size == 0) return nullptr;
2018 void* p = ::operator new(size + RoundUpTo<8>(sizeof(int)));
2019 int* sizep = static_cast<int*>(p);
2020 misc_allocs_.emplace_back(sizep);
2021 *sizep = size;
2022 return static_cast<char*>(p) + RoundUpTo<8>(sizeof(int));
2023 }
2024
2025 template <typename... T>
CreateFlatAlloc(const TypeMap<IntT,T...> & sizes)2026 internal::FlatAllocator::Allocation* DescriptorPool::Tables::CreateFlatAlloc(
2027 const TypeMap<IntT, T...>& sizes) {
2028 auto ends = CalculateEnds(sizes);
2029 using FlatAlloc = internal::FlatAllocator::Allocation;
2030
2031 int last_end = ends.template Get<
2032 typename std::tuple_element<sizeof...(T) - 1, std::tuple<T...>>::type>();
2033 size_t total_size =
2034 last_end + RoundUpTo<FlatAlloc::kMaxAlign>(sizeof(FlatAlloc));
2035 char* data = static_cast<char*>(::operator new(total_size));
2036 auto* res = ::new (data) FlatAlloc(ends);
2037 flat_allocs_.emplace_back(res);
2038
2039 return res;
2040 }
2041
BuildLocationsByPath(std::pair<const FileDescriptorTables *,const SourceCodeInfo * > * p)2042 void FileDescriptorTables::BuildLocationsByPath(
2043 std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p) {
2044 for (int i = 0, len = p->second->location_size(); i < len; ++i) {
2045 const SourceCodeInfo_Location* loc = &p->second->location().Get(i);
2046 p->first->locations_by_path_[absl::StrJoin(loc->path(), ",")] = loc;
2047 }
2048 }
2049
GetSourceLocation(const std::vector<int> & path,const SourceCodeInfo * info) const2050 const SourceCodeInfo_Location* FileDescriptorTables::GetSourceLocation(
2051 const std::vector<int>& path, const SourceCodeInfo* info) const {
2052 std::pair<const FileDescriptorTables*, const SourceCodeInfo*> p(
2053 std::make_pair(this, info));
2054 absl::call_once(locations_by_path_once_,
2055 FileDescriptorTables::BuildLocationsByPath, &p);
2056 auto it = locations_by_path_.find(absl::StrJoin(path, ","));
2057 if (it == locations_by_path_.end()) return nullptr;
2058 return it->second;
2059 }
2060
2061 // ===================================================================
2062 // DescriptorPool
2063
2064 DescriptorPool::ErrorCollector::~ErrorCollector() = default;
2065
ErrorLocationName(ErrorLocation location)2066 absl::string_view DescriptorPool::ErrorCollector::ErrorLocationName(
2067 ErrorLocation location) {
2068 switch (location) {
2069 case NAME:
2070 return "NAME";
2071 case NUMBER:
2072 return "NUMBER";
2073 case TYPE:
2074 return "TYPE";
2075 case EXTENDEE:
2076 return "EXTENDEE";
2077 case DEFAULT_VALUE:
2078 return "DEFAULT_VALUE";
2079 case OPTION_NAME:
2080 return "OPTION_NAME";
2081 case OPTION_VALUE:
2082 return "OPTION_VALUE";
2083 case INPUT_TYPE:
2084 return "INPUT_TYPE";
2085 case OUTPUT_TYPE:
2086 return "OUTPUT_TYPE";
2087 case IMPORT:
2088 return "IMPORT";
2089 case EDITIONS:
2090 return "EDITIONS";
2091 case OTHER:
2092 return "OTHER";
2093 }
2094 return "UNKNOWN";
2095 }
2096
DescriptorPool()2097 DescriptorPool::DescriptorPool()
2098 : mutex_(nullptr),
2099 fallback_database_(nullptr),
2100 default_error_collector_(nullptr),
2101 underlay_(nullptr),
2102 tables_(new Tables),
2103 enforce_dependencies_(true),
2104 lazily_build_dependencies_(false),
2105 allow_unknown_(false),
2106 enforce_weak_(false),
2107 enforce_extension_declarations_(false),
2108 disallow_enforce_utf8_(false),
2109 deprecated_legacy_json_field_conflicts_(false) {}
2110
DescriptorPool(DescriptorDatabase * fallback_database,ErrorCollector * error_collector)2111 DescriptorPool::DescriptorPool(DescriptorDatabase* fallback_database,
2112 ErrorCollector* error_collector)
2113 : mutex_(new absl::Mutex),
2114 fallback_database_(fallback_database),
2115 default_error_collector_(error_collector),
2116 underlay_(nullptr),
2117 tables_(new Tables),
2118 enforce_dependencies_(true),
2119 lazily_build_dependencies_(false),
2120 allow_unknown_(false),
2121 enforce_weak_(false),
2122 enforce_extension_declarations_(false),
2123 disallow_enforce_utf8_(false),
2124 deprecated_legacy_json_field_conflicts_(false) {}
2125
DescriptorPool(const DescriptorPool * underlay)2126 DescriptorPool::DescriptorPool(const DescriptorPool* underlay)
2127 : mutex_(nullptr),
2128 fallback_database_(nullptr),
2129 default_error_collector_(nullptr),
2130 underlay_(underlay),
2131 tables_(new Tables),
2132 enforce_dependencies_(true),
2133 lazily_build_dependencies_(false),
2134 allow_unknown_(false),
2135 enforce_weak_(false),
2136 enforce_extension_declarations_(false),
2137 disallow_enforce_utf8_(false),
2138 deprecated_legacy_json_field_conflicts_(false) {}
2139
~DescriptorPool()2140 DescriptorPool::~DescriptorPool() {
2141 if (mutex_ != nullptr) delete mutex_;
2142 }
2143
2144 // DescriptorPool::BuildFile() defined later.
2145 // DescriptorPool::BuildFileCollectingErrors() defined later.
2146
InternalDontEnforceDependencies()2147 void DescriptorPool::InternalDontEnforceDependencies() {
2148 enforce_dependencies_ = false;
2149 }
2150
AddDirectInputFile(absl::string_view file_name,bool is_error)2151 void DescriptorPool::AddDirectInputFile(absl::string_view file_name,
2152 bool is_error) {
2153 direct_input_files_[file_name] = is_error;
2154 }
2155
IsReadyForCheckingDescriptorExtDecl(absl::string_view message_name) const2156 bool DescriptorPool::IsReadyForCheckingDescriptorExtDecl(
2157 absl::string_view message_name) const {
2158 static const auto& kDescriptorTypes = *new absl::flat_hash_set<std::string>({
2159 "google.protobuf.EnumOptions",
2160 "google.protobuf.EnumValueOptions",
2161 "google.protobuf.ExtensionRangeOptions",
2162 "google.protobuf.FieldOptions",
2163 "google.protobuf.FileOptions",
2164 "google.protobuf.MessageOptions",
2165 "google.protobuf.MethodOptions",
2166 "google.protobuf.OneofOptions",
2167 "google.protobuf.ServiceOptions",
2168 "google.protobuf.StreamOptions",
2169 });
2170 return kDescriptorTypes.contains(message_name);
2171 }
2172
2173
ClearDirectInputFiles()2174 void DescriptorPool::ClearDirectInputFiles() { direct_input_files_.clear(); }
2175
InternalIsFileLoaded(absl::string_view filename) const2176 bool DescriptorPool::InternalIsFileLoaded(absl::string_view filename) const {
2177 absl::MutexLockMaybe lock(mutex_);
2178 return tables_->FindFile(filename) != nullptr;
2179 }
2180
2181 // generated_pool ====================================================
2182
2183 namespace {
2184
2185
GeneratedDatabase()2186 EncodedDescriptorDatabase* GeneratedDatabase() {
2187 static auto generated_database =
2188 internal::OnShutdownDelete(new EncodedDescriptorDatabase());
2189 return generated_database;
2190 }
2191
NewGeneratedPool()2192 DescriptorPool* NewGeneratedPool() {
2193 auto generated_pool = new DescriptorPool(GeneratedDatabase());
2194 generated_pool->InternalSetLazilyBuildDependencies();
2195 return generated_pool;
2196 }
2197
2198 } // anonymous namespace
2199
internal_generated_database()2200 DescriptorDatabase* DescriptorPool::internal_generated_database() {
2201 return GeneratedDatabase();
2202 }
2203
internal_generated_pool()2204 DescriptorPool* DescriptorPool::internal_generated_pool() {
2205 static DescriptorPool* generated_pool =
2206 internal::OnShutdownDelete(NewGeneratedPool());
2207 return generated_pool;
2208 }
2209
generated_pool()2210 const DescriptorPool* DescriptorPool::generated_pool() {
2211 const DescriptorPool* pool = internal_generated_pool();
2212 // Ensure that descriptor.proto and cpp_features.proto get registered in the
2213 // generated pool. They're special cases because they're included in the full
2214 // runtime. We have to avoid registering it pre-main, because we need to
2215 // ensure that the linker --gc-sections step can strip out the full runtime if
2216 // it is unused.
2217 DescriptorProto::descriptor();
2218 pb::CppFeatures::descriptor();
2219 return pool;
2220 }
2221
2222
InternalAddGeneratedFile(const void * encoded_file_descriptor,int size)2223 void DescriptorPool::InternalAddGeneratedFile(
2224 const void* encoded_file_descriptor, int size) {
2225 // So, this function is called in the process of initializing the
2226 // descriptors for generated proto classes. Each generated .pb.cc file
2227 // has an internal procedure called AddDescriptors() which is called at
2228 // process startup, and that function calls this one in order to register
2229 // the raw bytes of the FileDescriptorProto representing the file.
2230 //
2231 // We do not actually construct the descriptor objects right away. We just
2232 // hang on to the bytes until they are actually needed. We actually construct
2233 // the descriptor the first time one of the following things happens:
2234 // * Someone calls a method like descriptor(), GetDescriptor(), or
2235 // GetReflection() on the generated types, which requires returning the
2236 // descriptor or an object based on it.
2237 // * Someone looks up the descriptor in DescriptorPool::generated_pool().
2238 //
2239 // Once one of these happens, the DescriptorPool actually parses the
2240 // FileDescriptorProto and generates a FileDescriptor (and all its children)
2241 // based on it.
2242 //
2243 // Note that FileDescriptorProto is itself a generated protocol message.
2244 // Therefore, when we parse one, we have to be very careful to avoid using
2245 // any descriptor-based operations, since this might cause infinite recursion
2246 // or deadlock.
2247 absl::MutexLockMaybe lock(internal_generated_pool()->mutex_);
2248 ABSL_CHECK(GeneratedDatabase()->Add(encoded_file_descriptor, size));
2249 }
2250
2251
2252 // Find*By* methods ==================================================
2253
2254 // TODO: There's a lot of repeated code here, but I'm not sure if
2255 // there's any good way to factor it out. Think about this some time when
2256 // there's nothing more important to do (read: never).
2257
FindFileByName(absl::string_view name) const2258 const FileDescriptor* DescriptorPool::FindFileByName(
2259 absl::string_view name) const {
2260 DeferredValidation deferred_validation(this);
2261 const FileDescriptor* result = nullptr;
2262 {
2263 absl::MutexLockMaybe lock(mutex_);
2264 if (fallback_database_ != nullptr) {
2265 tables_->known_bad_symbols_.clear();
2266 tables_->known_bad_files_.clear();
2267 }
2268 result = tables_->FindFile(name);
2269 if (result != nullptr) return result;
2270 if (underlay_ != nullptr) {
2271 result = underlay_->FindFileByName(name);
2272 if (result != nullptr) return result;
2273 }
2274 if (TryFindFileInFallbackDatabase(name, deferred_validation)) {
2275 result = tables_->FindFile(name);
2276 }
2277 }
2278 if (!deferred_validation.Validate()) {
2279 return nullptr;
2280 }
2281 return result;
2282 }
2283
FindFileContainingSymbol(absl::string_view symbol_name) const2284 const FileDescriptor* DescriptorPool::FindFileContainingSymbol(
2285 absl::string_view symbol_name) const {
2286 const FileDescriptor* file_result = nullptr;
2287 DeferredValidation deferred_validation(this);
2288 {
2289 absl::MutexLockMaybe lock(mutex_);
2290 if (fallback_database_ != nullptr) {
2291 tables_->known_bad_symbols_.clear();
2292 tables_->known_bad_files_.clear();
2293 }
2294 Symbol result = tables_->FindSymbol(symbol_name);
2295 if (!result.IsNull()) return result.GetFile();
2296 if (underlay_ != nullptr) {
2297 file_result = underlay_->FindFileContainingSymbol(symbol_name);
2298 if (file_result != nullptr) return file_result;
2299 }
2300 if (TryFindSymbolInFallbackDatabase(symbol_name, deferred_validation)) {
2301 result = tables_->FindSymbol(symbol_name);
2302 if (!result.IsNull()) file_result = result.GetFile();
2303 }
2304 }
2305 if (!deferred_validation.Validate()) {
2306 return nullptr;
2307 }
2308 return file_result;
2309 }
2310
FindMessageTypeByName(absl::string_view name) const2311 const Descriptor* DescriptorPool::FindMessageTypeByName(
2312 absl::string_view name) const {
2313 return tables_->FindByNameHelper(this, name).descriptor();
2314 }
2315
FindFieldByName(absl::string_view name) const2316 const FieldDescriptor* DescriptorPool::FindFieldByName(
2317 absl::string_view name) const {
2318 if (const FieldDescriptor* field =
2319 tables_->FindByNameHelper(this, name).field_descriptor()) {
2320 if (!field->is_extension()) {
2321 return field;
2322 }
2323 }
2324 return nullptr;
2325 }
2326
FindExtensionByName(absl::string_view name) const2327 const FieldDescriptor* DescriptorPool::FindExtensionByName(
2328 absl::string_view name) const {
2329 if (const FieldDescriptor* field =
2330 tables_->FindByNameHelper(this, name).field_descriptor()) {
2331 if (field->is_extension()) {
2332 return field;
2333 }
2334 }
2335 return nullptr;
2336 }
2337
FindOneofByName(absl::string_view name) const2338 const OneofDescriptor* DescriptorPool::FindOneofByName(
2339 absl::string_view name) const {
2340 return tables_->FindByNameHelper(this, name).oneof_descriptor();
2341 }
2342
FindEnumTypeByName(absl::string_view name) const2343 const EnumDescriptor* DescriptorPool::FindEnumTypeByName(
2344 absl::string_view name) const {
2345 return tables_->FindByNameHelper(this, name).enum_descriptor();
2346 }
2347
FindEnumValueByName(absl::string_view name) const2348 const EnumValueDescriptor* DescriptorPool::FindEnumValueByName(
2349 absl::string_view name) const {
2350 return tables_->FindByNameHelper(this, name).enum_value_descriptor();
2351 }
2352
FindServiceByName(absl::string_view name) const2353 const ServiceDescriptor* DescriptorPool::FindServiceByName(
2354 absl::string_view name) const {
2355 return tables_->FindByNameHelper(this, name).service_descriptor();
2356 }
2357
FindMethodByName(absl::string_view name) const2358 const MethodDescriptor* DescriptorPool::FindMethodByName(
2359 absl::string_view name) const {
2360 return tables_->FindByNameHelper(this, name).method_descriptor();
2361 }
2362
FindExtensionByNumber(const Descriptor * extendee,int number) const2363 const FieldDescriptor* DescriptorPool::FindExtensionByNumber(
2364 const Descriptor* extendee, int number) const {
2365 if (extendee->extension_range_count() == 0) return nullptr;
2366 // A faster path to reduce lock contention in finding extensions, assuming
2367 // most extensions will be cache hit.
2368 if (mutex_ != nullptr) {
2369 absl::ReaderMutexLock lock(mutex_);
2370 const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2371 if (result != nullptr) {
2372 return result;
2373 }
2374 }
2375 const FieldDescriptor* result = nullptr;
2376 DeferredValidation deferred_validation(this);
2377 {
2378 absl::MutexLockMaybe lock(mutex_);
2379 if (fallback_database_ != nullptr) {
2380 tables_->known_bad_symbols_.clear();
2381 tables_->known_bad_files_.clear();
2382 }
2383 result = tables_->FindExtension(extendee, number);
2384 if (result != nullptr) {
2385 return result;
2386 }
2387 if (underlay_ != nullptr) {
2388 result = underlay_->FindExtensionByNumber(extendee, number);
2389 if (result != nullptr) return result;
2390 }
2391 if (TryFindExtensionInFallbackDatabase(extendee, number,
2392 deferred_validation)) {
2393 result = tables_->FindExtension(extendee, number);
2394 }
2395 }
2396 if (!deferred_validation.Validate()) {
2397 return nullptr;
2398 }
2399 return result;
2400 }
2401
InternalFindExtensionByNumberNoLock(const Descriptor * extendee,int number) const2402 const FieldDescriptor* DescriptorPool::InternalFindExtensionByNumberNoLock(
2403 const Descriptor* extendee, int number) const {
2404 if (extendee->extension_range_count() == 0) return nullptr;
2405
2406 const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2407 if (result != nullptr) {
2408 return result;
2409 }
2410
2411 if (underlay_ != nullptr) {
2412 result = underlay_->InternalFindExtensionByNumberNoLock(extendee, number);
2413 if (result != nullptr) return result;
2414 }
2415
2416 return nullptr;
2417 }
2418
FindExtensionByPrintableName(const Descriptor * extendee,absl::string_view printable_name) const2419 const FieldDescriptor* DescriptorPool::FindExtensionByPrintableName(
2420 const Descriptor* extendee, absl::string_view printable_name) const {
2421 if (extendee->extension_range_count() == 0) return nullptr;
2422 const FieldDescriptor* result = FindExtensionByName(printable_name);
2423 if (result != nullptr && result->containing_type() == extendee) {
2424 return result;
2425 }
2426 if (extendee->options().message_set_wire_format()) {
2427 // MessageSet extensions may be identified by type name.
2428 const Descriptor* type = FindMessageTypeByName(printable_name);
2429 if (type != nullptr) {
2430 // Look for a matching extension in the foreign type's scope.
2431 const int type_extension_count = type->extension_count();
2432 for (int i = 0; i < type_extension_count; i++) {
2433 const FieldDescriptor* extension = type->extension(i);
2434 if (extension->containing_type() == extendee &&
2435 extension->type() == FieldDescriptor::TYPE_MESSAGE &&
2436 extension->is_optional() && extension->message_type() == type) {
2437 // Found it.
2438 return extension;
2439 }
2440 }
2441 }
2442 }
2443 return nullptr;
2444 }
2445
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const2446 void DescriptorPool::FindAllExtensions(
2447 const Descriptor* extendee,
2448 std::vector<const FieldDescriptor*>* out) const {
2449 DeferredValidation deferred_validation(this);
2450 std::vector<const FieldDescriptor*> extensions;
2451 {
2452 absl::MutexLockMaybe lock(mutex_);
2453 if (fallback_database_ != nullptr) {
2454 tables_->known_bad_symbols_.clear();
2455 tables_->known_bad_files_.clear();
2456 }
2457
2458 // Initialize tables_->extensions_ from the fallback database first
2459 // (but do this only once per descriptor).
2460 if (fallback_database_ != nullptr &&
2461 tables_->extensions_loaded_from_db_.count(extendee) == 0) {
2462 std::vector<int> numbers;
2463 if (fallback_database_->FindAllExtensionNumbers(
2464 std::string(extendee->full_name()), &numbers)) {
2465 for (int number : numbers) {
2466 if (tables_->FindExtension(extendee, number) == nullptr) {
2467 TryFindExtensionInFallbackDatabase(extendee, number,
2468 deferred_validation);
2469 }
2470 }
2471 tables_->extensions_loaded_from_db_.insert(extendee);
2472 }
2473 }
2474
2475 tables_->FindAllExtensions(extendee, &extensions);
2476 if (underlay_ != nullptr) {
2477 underlay_->FindAllExtensions(extendee, &extensions);
2478 }
2479 }
2480 if (deferred_validation.Validate()) {
2481 out->insert(out->end(), extensions.begin(), extensions.end());
2482 }
2483 }
2484
2485
2486 // -------------------------------------------------------------------
2487
FindFieldByNumber(int number) const2488 const FieldDescriptor* Descriptor::FindFieldByNumber(int number) const {
2489 const FieldDescriptor* result =
2490 file()->tables_->FindFieldByNumber(this, number);
2491 if (result == nullptr || result->is_extension()) {
2492 return nullptr;
2493 } else {
2494 return result;
2495 }
2496 }
2497
FindFieldByLowercaseName(absl::string_view lowercase_name) const2498 const FieldDescriptor* Descriptor::FindFieldByLowercaseName(
2499 absl::string_view lowercase_name) const {
2500 const FieldDescriptor* result =
2501 file()->tables_->FindFieldByLowercaseName(this, lowercase_name);
2502 if (result == nullptr || result->is_extension()) {
2503 return nullptr;
2504 } else {
2505 return result;
2506 }
2507 }
2508
FindFieldByCamelcaseName(absl::string_view camelcase_name) const2509 const FieldDescriptor* Descriptor::FindFieldByCamelcaseName(
2510 absl::string_view camelcase_name) const {
2511 const FieldDescriptor* result =
2512 file()->tables_->FindFieldByCamelcaseName(this, camelcase_name);
2513 if (result == nullptr || result->is_extension()) {
2514 return nullptr;
2515 } else {
2516 return result;
2517 }
2518 }
2519
FindFieldByName(absl::string_view name) const2520 const FieldDescriptor* Descriptor::FindFieldByName(
2521 absl::string_view name) const {
2522 const FieldDescriptor* field =
2523 file()->tables_->FindNestedSymbol(this, name).field_descriptor();
2524 return field != nullptr && !field->is_extension() ? field : nullptr;
2525 }
2526
FindOneofByName(absl::string_view name) const2527 const OneofDescriptor* Descriptor::FindOneofByName(
2528 absl::string_view name) const {
2529 return file()->tables_->FindNestedSymbol(this, name).oneof_descriptor();
2530 }
2531
FindExtensionByName(absl::string_view name) const2532 const FieldDescriptor* Descriptor::FindExtensionByName(
2533 absl::string_view name) const {
2534 const FieldDescriptor* field =
2535 file()->tables_->FindNestedSymbol(this, name).field_descriptor();
2536 return field != nullptr && field->is_extension() ? field : nullptr;
2537 }
2538
FindExtensionByLowercaseName(absl::string_view name) const2539 const FieldDescriptor* Descriptor::FindExtensionByLowercaseName(
2540 absl::string_view name) const {
2541 const FieldDescriptor* result =
2542 file()->tables_->FindFieldByLowercaseName(this, name);
2543 if (result == nullptr || !result->is_extension()) {
2544 return nullptr;
2545 } else {
2546 return result;
2547 }
2548 }
2549
FindExtensionByCamelcaseName(absl::string_view name) const2550 const FieldDescriptor* Descriptor::FindExtensionByCamelcaseName(
2551 absl::string_view name) const {
2552 const FieldDescriptor* result =
2553 file()->tables_->FindFieldByCamelcaseName(this, name);
2554 if (result == nullptr || !result->is_extension()) {
2555 return nullptr;
2556 } else {
2557 return result;
2558 }
2559 }
2560
FindNestedTypeByName(absl::string_view name) const2561 const Descriptor* Descriptor::FindNestedTypeByName(
2562 absl::string_view name) const {
2563 return file()->tables_->FindNestedSymbol(this, name).descriptor();
2564 }
2565
FindEnumTypeByName(absl::string_view name) const2566 const EnumDescriptor* Descriptor::FindEnumTypeByName(
2567 absl::string_view name) const {
2568 return file()->tables_->FindNestedSymbol(this, name).enum_descriptor();
2569 }
2570
FindEnumValueByName(absl::string_view name) const2571 const EnumValueDescriptor* Descriptor::FindEnumValueByName(
2572 absl::string_view name) const {
2573 return file()->tables_->FindNestedSymbol(this, name).enum_value_descriptor();
2574 }
2575
map_key() const2576 const FieldDescriptor* Descriptor::map_key() const {
2577 if (!options().map_entry()) return nullptr;
2578 ABSL_DCHECK_EQ(field_count(), 2);
2579 return field(0);
2580 }
2581
map_value() const2582 const FieldDescriptor* Descriptor::map_value() const {
2583 if (!options().map_entry()) return nullptr;
2584 ABSL_DCHECK_EQ(field_count(), 2);
2585 return field(1);
2586 }
2587
FindValueByName(absl::string_view name) const2588 const EnumValueDescriptor* EnumDescriptor::FindValueByName(
2589 absl::string_view name) const {
2590 return file()->tables_->FindNestedSymbol(this, name).enum_value_descriptor();
2591 }
2592
FindValueByNumber(int number) const2593 const EnumValueDescriptor* EnumDescriptor::FindValueByNumber(int number) const {
2594 return file()->tables_->FindEnumValueByNumber(this, number);
2595 }
2596
FindValueByNumberCreatingIfUnknown(int number) const2597 const EnumValueDescriptor* EnumDescriptor::FindValueByNumberCreatingIfUnknown(
2598 int number) const {
2599 return file()->tables_->FindEnumValueByNumberCreatingIfUnknown(this, number);
2600 }
2601
FindMethodByName(absl::string_view name) const2602 const MethodDescriptor* ServiceDescriptor::FindMethodByName(
2603 absl::string_view name) const {
2604 return file()->tables_->FindNestedSymbol(this, name).method_descriptor();
2605 }
2606
FindMessageTypeByName(absl::string_view name) const2607 const Descriptor* FileDescriptor::FindMessageTypeByName(
2608 absl::string_view name) const {
2609 return tables_->FindNestedSymbol(this, name).descriptor();
2610 }
2611
FindEnumTypeByName(absl::string_view name) const2612 const EnumDescriptor* FileDescriptor::FindEnumTypeByName(
2613 absl::string_view name) const {
2614 return tables_->FindNestedSymbol(this, name).enum_descriptor();
2615 }
2616
FindEnumValueByName(absl::string_view name) const2617 const EnumValueDescriptor* FileDescriptor::FindEnumValueByName(
2618 absl::string_view name) const {
2619 return tables_->FindNestedSymbol(this, name).enum_value_descriptor();
2620 }
2621
FindServiceByName(absl::string_view name) const2622 const ServiceDescriptor* FileDescriptor::FindServiceByName(
2623 absl::string_view name) const {
2624 return tables_->FindNestedSymbol(this, name).service_descriptor();
2625 }
2626
FindExtensionByName(absl::string_view name) const2627 const FieldDescriptor* FileDescriptor::FindExtensionByName(
2628 absl::string_view name) const {
2629 const FieldDescriptor* field =
2630 tables_->FindNestedSymbol(this, name).field_descriptor();
2631 return field != nullptr && field->is_extension() ? field : nullptr;
2632 }
2633
FindExtensionByLowercaseName(absl::string_view name) const2634 const FieldDescriptor* FileDescriptor::FindExtensionByLowercaseName(
2635 absl::string_view name) const {
2636 const FieldDescriptor* result = tables_->FindFieldByLowercaseName(this, name);
2637 if (result == nullptr || !result->is_extension()) {
2638 return nullptr;
2639 } else {
2640 return result;
2641 }
2642 }
2643
FindExtensionByCamelcaseName(absl::string_view name) const2644 const FieldDescriptor* FileDescriptor::FindExtensionByCamelcaseName(
2645 absl::string_view name) const {
2646 const FieldDescriptor* result = tables_->FindFieldByCamelcaseName(this, name);
2647 if (result == nullptr || !result->is_extension()) {
2648 return nullptr;
2649 } else {
2650 return result;
2651 }
2652 }
2653
CopyTo(DescriptorProto_ExtensionRange * proto) const2654 void Descriptor::ExtensionRange::CopyTo(
2655 DescriptorProto_ExtensionRange* proto) const {
2656 proto->set_start(start_);
2657 proto->set_end(end_);
2658 if (options_ != &ExtensionRangeOptions::default_instance()) {
2659 *proto->mutable_options() = *options_;
2660 }
2661 RestoreFeaturesToOptions(proto_features_, proto);
2662 }
2663
2664 const Descriptor::ExtensionRange*
FindExtensionRangeContainingNumber(int number) const2665 Descriptor::FindExtensionRangeContainingNumber(int number) const {
2666 // Linear search should be fine because we don't expect a message to have
2667 // more than a couple extension ranges.
2668 for (int i = 0; i < extension_range_count(); i++) {
2669 if (number >= extension_range(i)->start_number() &&
2670 number < extension_range(i)->end_number()) {
2671 return extension_range(i);
2672 }
2673 }
2674 return nullptr;
2675 }
2676
FindReservedRangeContainingNumber(int number) const2677 const Descriptor::ReservedRange* Descriptor::FindReservedRangeContainingNumber(
2678 int number) const {
2679 // TODO: Consider a non-linear search.
2680 for (int i = 0; i < reserved_range_count(); i++) {
2681 if (number >= reserved_range(i)->start && number < reserved_range(i)->end) {
2682 return reserved_range(i);
2683 }
2684 }
2685 return nullptr;
2686 }
2687
2688 const EnumDescriptor::ReservedRange*
FindReservedRangeContainingNumber(int number) const2689 EnumDescriptor::FindReservedRangeContainingNumber(int number) const {
2690 // TODO: Consider a non-linear search.
2691 for (int i = 0; i < reserved_range_count(); i++) {
2692 if (number >= reserved_range(i)->start &&
2693 number <= reserved_range(i)->end) {
2694 return reserved_range(i);
2695 }
2696 }
2697 return nullptr;
2698 }
2699
2700 // -------------------------------------------------------------------
2701
TryFindFileInFallbackDatabase(absl::string_view name,DeferredValidation & deferred_validation) const2702 bool DescriptorPool::TryFindFileInFallbackDatabase(
2703 absl::string_view name, DeferredValidation& deferred_validation) const {
2704 if (fallback_database_ == nullptr) return false;
2705
2706 if (tables_->known_bad_files_.contains(name)) return false;
2707
2708 // NOINLINE to reduce the stack cost of the operation in the caller.
2709 const auto find_file = [](DescriptorDatabase& database,
2710 absl::string_view filename,
2711 FileDescriptorProto& output) PROTOBUF_NOINLINE {
2712 return database.FindFileByName(std::string(filename), &output);
2713 };
2714
2715 auto& file_proto = deferred_validation.CreateProto();
2716 if (!find_file(*fallback_database_, name, file_proto) ||
2717 BuildFileFromDatabase(file_proto, deferred_validation) == nullptr) {
2718 tables_->known_bad_files_.emplace(name);
2719 return false;
2720 }
2721 return true;
2722 }
2723
IsSubSymbolOfBuiltType(absl::string_view name) const2724 bool DescriptorPool::IsSubSymbolOfBuiltType(absl::string_view name) const {
2725 for (size_t pos = name.find('.'); pos != name.npos;
2726 pos = name.find('.', pos + 1)) {
2727 auto prefix = name.substr(0, pos);
2728 Symbol symbol = tables_->FindSymbol(prefix);
2729 if (symbol.IsNull()) {
2730 break;
2731 }
2732 if (!symbol.IsPackage()) {
2733 // If the symbol type is anything other than PACKAGE, then its complete
2734 // definition is already known.
2735 return true;
2736 }
2737 }
2738 if (underlay_ != nullptr) {
2739 // Check to see if any prefix of this symbol exists in the underlay.
2740 return underlay_->IsSubSymbolOfBuiltType(name);
2741 }
2742 return false;
2743 }
2744
TryFindSymbolInFallbackDatabase(absl::string_view name,DeferredValidation & deferred_validation) const2745 bool DescriptorPool::TryFindSymbolInFallbackDatabase(
2746 absl::string_view name, DeferredValidation& deferred_validation) const {
2747 if (fallback_database_ == nullptr) return false;
2748
2749 if (tables_->known_bad_symbols_.contains(name)) return false;
2750
2751 std::string name_string(name);
2752 auto& file_proto = deferred_validation.CreateProto();
2753 if ( // We skip looking in the fallback database if the name is a sub-symbol
2754 // of any descriptor that already exists in the descriptor pool (except
2755 // for package descriptors). This is valid because all symbols except
2756 // for packages are defined in a single file, so if the symbol exists
2757 // then we should already have its definition.
2758 //
2759 // The other reason to do this is to support "overriding" type
2760 // definitions by merging two databases that define the same type. (Yes,
2761 // people do this.) The main difficulty with making this work is that
2762 // FindFileContainingSymbol() is allowed to return both false positives
2763 // (e.g., SimpleDescriptorDatabase, UpgradedDescriptorDatabase) and
2764 // false negatives (e.g. ProtoFileParser, SourceTreeDescriptorDatabase).
2765 // When two such databases are merged, looking up a non-existent
2766 // sub-symbol of a type that already exists in the descriptor pool can
2767 // result in an attempt to load multiple definitions of the same type.
2768 // The check below avoids this.
2769 IsSubSymbolOfBuiltType(name)
2770
2771 // Look up file containing this symbol in fallback database.
2772 || !fallback_database_->FindFileContainingSymbol(name_string, &file_proto)
2773
2774 // Check if we've already built this file. If so, it apparently doesn't
2775 // contain the symbol we're looking for. Some DescriptorDatabases
2776 // return false positives.
2777 || tables_->FindFile(file_proto.name()) != nullptr
2778
2779 // Build the file.
2780 || BuildFileFromDatabase(file_proto, deferred_validation) == nullptr) {
2781 tables_->known_bad_symbols_.insert(std::move(name_string));
2782 return false;
2783 }
2784
2785 return true;
2786 }
2787
TryFindExtensionInFallbackDatabase(const Descriptor * containing_type,int field_number,DeferredValidation & deferred_validation) const2788 bool DescriptorPool::TryFindExtensionInFallbackDatabase(
2789 const Descriptor* containing_type, int field_number,
2790 DeferredValidation& deferred_validation) const {
2791 if (fallback_database_ == nullptr) return false;
2792
2793 auto& file_proto = deferred_validation.CreateProto();
2794 if (!fallback_database_->FindFileContainingExtension(
2795 std::string(containing_type->full_name()), field_number,
2796 &file_proto)) {
2797 return false;
2798 }
2799
2800 if (tables_->FindFile(file_proto.name()) != nullptr) {
2801 // We've already loaded this file, and it apparently doesn't contain the
2802 // extension we're looking for. Some DescriptorDatabases return false
2803 // positives.
2804 return false;
2805 }
2806
2807 if (BuildFileFromDatabase(file_proto, deferred_validation) == nullptr) {
2808 return false;
2809 }
2810
2811 return true;
2812 }
2813
2814 // ===================================================================
2815
is_map_message_type() const2816 bool FieldDescriptor::is_map_message_type() const {
2817 return message_type()->options().map_entry();
2818 }
2819
DefaultValueAsString(bool quote_string_type) const2820 std::string FieldDescriptor::DefaultValueAsString(
2821 bool quote_string_type) const {
2822 ABSL_CHECK(has_default_value()) << "No default value";
2823 switch (cpp_type()) {
2824 case CPPTYPE_INT32:
2825 return absl::StrCat(default_value_int32_t());
2826 case CPPTYPE_INT64:
2827 return absl::StrCat(default_value_int64_t());
2828 case CPPTYPE_UINT32:
2829 return absl::StrCat(default_value_uint32_t());
2830 case CPPTYPE_UINT64:
2831 return absl::StrCat(default_value_uint64_t());
2832 case CPPTYPE_FLOAT:
2833 return io::SimpleFtoa(default_value_float());
2834 case CPPTYPE_DOUBLE:
2835 return io::SimpleDtoa(default_value_double());
2836 case CPPTYPE_BOOL:
2837 return default_value_bool() ? "true" : "false";
2838 case CPPTYPE_STRING:
2839 if (quote_string_type) {
2840 return absl::StrCat("\"", absl::CEscape(default_value_string()), "\"");
2841 } else {
2842 if (type() == TYPE_BYTES) {
2843 return absl::CEscape(default_value_string());
2844 } else {
2845 return std::string(default_value_string());
2846 }
2847 }
2848 case CPPTYPE_ENUM:
2849 return std::string(default_value_enum()->name());
2850 case CPPTYPE_MESSAGE:
2851 ABSL_DLOG(FATAL) << "Messages can't have default values!";
2852 break;
2853 }
2854 ABSL_LOG(FATAL) << "Can't get here: failed to get default value as string";
2855 return "";
2856 }
2857
2858 // Out-of-line constructor definitions ==============================
2859 // When using constructor type homing in Clang, debug info for a type
2860 // is only emitted when a constructor definition is emitted, as an
2861 // optimization. These constructors are never called, so we define them
2862 // out of line to make sure the debug info is emitted somewhere.
2863
2864 Descriptor::Descriptor() = default;
FieldDescriptor()2865 FieldDescriptor::FieldDescriptor() {}
2866 OneofDescriptor::OneofDescriptor() = default;
2867 EnumDescriptor::EnumDescriptor() = default;
2868 EnumValueDescriptor::EnumValueDescriptor() = default;
2869 ServiceDescriptor::ServiceDescriptor() = default;
2870 MethodDescriptor::MethodDescriptor() = default;
2871 FileDescriptor::FileDescriptor() = default;
2872
2873 // CopyTo methods ====================================================
2874
CopyTo(FileDescriptorProto * proto) const2875 void FileDescriptor::CopyTo(FileDescriptorProto* proto) const {
2876 CopyHeadingTo(proto);
2877
2878 for (int i = 0; i < dependency_count(); i++) {
2879 proto->add_dependency(dependency(i)->name());
2880 }
2881
2882 for (int i = 0; i < public_dependency_count(); i++) {
2883 proto->add_public_dependency(public_dependencies_[i]);
2884 }
2885
2886 for (int i = 0; i < weak_dependency_count(); i++) {
2887 proto->add_weak_dependency(weak_dependencies_[i]);
2888 }
2889
2890 for (int i = 0; i < message_type_count(); i++) {
2891 message_type(i)->CopyTo(proto->add_message_type());
2892 }
2893 for (int i = 0; i < enum_type_count(); i++) {
2894 enum_type(i)->CopyTo(proto->add_enum_type());
2895 }
2896 for (int i = 0; i < service_count(); i++) {
2897 service(i)->CopyTo(proto->add_service());
2898 }
2899 for (int i = 0; i < extension_count(); i++) {
2900 extension(i)->CopyTo(proto->add_extension());
2901 }
2902 }
2903
CopyHeadingTo(FileDescriptorProto * proto) const2904 void FileDescriptor::CopyHeadingTo(FileDescriptorProto* proto) const {
2905 proto->set_name(name());
2906 if (!package().empty()) {
2907 proto->set_package(package());
2908 }
2909
2910 if (edition() == Edition::EDITION_PROTO3) {
2911 proto->set_syntax("proto3");
2912 } else if (!IsLegacyEdition(edition())) {
2913 proto->set_syntax("editions");
2914 proto->set_edition(edition());
2915 }
2916
2917 if (&options() != &FileOptions::default_instance()) {
2918 *proto->mutable_options() = options();
2919 }
2920 RestoreFeaturesToOptions(proto_features_, proto);
2921 }
2922
CopyJsonNameTo(FileDescriptorProto * proto) const2923 void FileDescriptor::CopyJsonNameTo(FileDescriptorProto* proto) const {
2924 if (message_type_count() != proto->message_type_size() ||
2925 extension_count() != proto->extension_size()) {
2926 ABSL_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2927 return;
2928 }
2929 for (int i = 0; i < message_type_count(); i++) {
2930 message_type(i)->CopyJsonNameTo(proto->mutable_message_type(i));
2931 }
2932 for (int i = 0; i < extension_count(); i++) {
2933 extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
2934 }
2935 }
2936
CopySourceCodeInfoTo(FileDescriptorProto * proto) const2937 void FileDescriptor::CopySourceCodeInfoTo(FileDescriptorProto* proto) const {
2938 if (source_code_info_ &&
2939 source_code_info_ != &SourceCodeInfo::default_instance()) {
2940 *proto->mutable_source_code_info() = *source_code_info_;
2941 }
2942 }
2943
CopyTo(DescriptorProto * proto) const2944 void Descriptor::CopyTo(DescriptorProto* proto) const {
2945 CopyHeadingTo(proto);
2946
2947 for (int i = 0; i < field_count(); i++) {
2948 field(i)->CopyTo(proto->add_field());
2949 }
2950 for (int i = 0; i < oneof_decl_count(); i++) {
2951 oneof_decl(i)->CopyTo(proto->add_oneof_decl());
2952 }
2953 for (int i = 0; i < nested_type_count(); i++) {
2954 nested_type(i)->CopyTo(proto->add_nested_type());
2955 }
2956 for (int i = 0; i < enum_type_count(); i++) {
2957 enum_type(i)->CopyTo(proto->add_enum_type());
2958 }
2959 for (int i = 0; i < extension_range_count(); i++) {
2960 extension_range(i)->CopyTo(proto->add_extension_range());
2961 }
2962 for (int i = 0; i < extension_count(); i++) {
2963 extension(i)->CopyTo(proto->add_extension());
2964 }
2965 }
2966
CopyHeadingTo(DescriptorProto * proto) const2967 void Descriptor::CopyHeadingTo(DescriptorProto* proto) const {
2968 proto->set_name(name());
2969
2970 for (int i = 0; i < reserved_range_count(); i++) {
2971 DescriptorProto::ReservedRange* range = proto->add_reserved_range();
2972 range->set_start(reserved_range(i)->start);
2973 range->set_end(reserved_range(i)->end);
2974 }
2975 for (int i = 0; i < reserved_name_count(); i++) {
2976 proto->add_reserved_name(reserved_name(i));
2977 }
2978
2979 if (&options() != &MessageOptions::default_instance()) {
2980 *proto->mutable_options() = options();
2981 }
2982
2983 RestoreFeaturesToOptions(proto_features_, proto);
2984 }
2985
CopyJsonNameTo(DescriptorProto * proto) const2986 void Descriptor::CopyJsonNameTo(DescriptorProto* proto) const {
2987 if (field_count() != proto->field_size() ||
2988 nested_type_count() != proto->nested_type_size() ||
2989 extension_count() != proto->extension_size()) {
2990 ABSL_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2991 return;
2992 }
2993 for (int i = 0; i < field_count(); i++) {
2994 field(i)->CopyJsonNameTo(proto->mutable_field(i));
2995 }
2996 for (int i = 0; i < nested_type_count(); i++) {
2997 nested_type(i)->CopyJsonNameTo(proto->mutable_nested_type(i));
2998 }
2999 for (int i = 0; i < extension_count(); i++) {
3000 extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
3001 }
3002 }
3003
CopyTo(FieldDescriptorProto * proto) const3004 void FieldDescriptor::CopyTo(FieldDescriptorProto* proto) const {
3005 proto->set_name(name());
3006 proto->set_number(number());
3007 if (has_json_name_) {
3008 proto->set_json_name(json_name());
3009 }
3010 if (proto3_optional_) {
3011 proto->set_proto3_optional(true);
3012 }
3013 // Some compilers do not allow static_cast directly between two enum types,
3014 // so we must cast to int first.
3015 if (is_required() && !IsLegacyEdition(file()->edition())) {
3016 // Editions files have no required keyword, and we only set this label
3017 // during descriptor build.
3018 proto->set_label(static_cast<FieldDescriptorProto::Label>(
3019 absl::implicit_cast<int>(LABEL_OPTIONAL)));
3020 } else {
3021 proto->set_label(static_cast<FieldDescriptorProto::Label>(
3022 absl::implicit_cast<int>(label())));
3023 }
3024 if (type() == TYPE_GROUP && !IsLegacyEdition(file()->edition())) {
3025 // Editions files have no group keyword, and we only set this label
3026 // during descriptor build.
3027 proto->set_type(static_cast<FieldDescriptorProto::Type>(
3028 absl::implicit_cast<int>(TYPE_MESSAGE)));
3029 } else {
3030 proto->set_type(static_cast<FieldDescriptorProto::Type>(
3031 absl::implicit_cast<int>(type())));
3032 }
3033
3034 if (is_extension()) {
3035 if (!containing_type()->is_unqualified_placeholder_) {
3036 proto->set_extendee(".");
3037 }
3038 proto->mutable_extendee()->append(containing_type()->full_name());
3039 }
3040
3041 if (cpp_type() == CPPTYPE_MESSAGE) {
3042 if (message_type()->is_placeholder_) {
3043 // We don't actually know if the type is a message type. It could be
3044 // an enum.
3045 proto->clear_type();
3046 }
3047
3048 if (!message_type()->is_unqualified_placeholder_) {
3049 proto->set_type_name(".");
3050 }
3051 proto->mutable_type_name()->append(message_type()->full_name());
3052 } else if (cpp_type() == CPPTYPE_ENUM) {
3053 if (!enum_type()->is_unqualified_placeholder_) {
3054 proto->set_type_name(".");
3055 }
3056 proto->mutable_type_name()->append(enum_type()->full_name());
3057 }
3058
3059 if (has_default_value()) {
3060 proto->set_default_value(DefaultValueAsString(false));
3061 }
3062
3063 if (containing_oneof() != nullptr && !is_extension()) {
3064 proto->set_oneof_index(containing_oneof()->index());
3065 }
3066
3067 if (&options() != &FieldOptions::default_instance()) {
3068 *proto->mutable_options() = options();
3069 if (proto_features_->GetExtension(pb::cpp).has_string_type()) {
3070 // ctype must have been set in InferLegacyProtoFeatures so avoid copying.
3071 proto->mutable_options()->clear_ctype();
3072 }
3073 }
3074
3075 RestoreFeaturesToOptions(proto_features_, proto);
3076 }
3077
CopyJsonNameTo(FieldDescriptorProto * proto) const3078 void FieldDescriptor::CopyJsonNameTo(FieldDescriptorProto* proto) const {
3079 proto->set_json_name(json_name());
3080 }
3081
CopyTo(OneofDescriptorProto * proto) const3082 void OneofDescriptor::CopyTo(OneofDescriptorProto* proto) const {
3083 proto->set_name(name());
3084 if (&options() != &OneofOptions::default_instance()) {
3085 *proto->mutable_options() = options();
3086 }
3087 RestoreFeaturesToOptions(proto_features_, proto);
3088 }
3089
CopyTo(EnumDescriptorProto * proto) const3090 void EnumDescriptor::CopyTo(EnumDescriptorProto* proto) const {
3091 proto->set_name(name());
3092
3093 for (int i = 0; i < value_count(); i++) {
3094 value(i)->CopyTo(proto->add_value());
3095 }
3096 for (int i = 0; i < reserved_range_count(); i++) {
3097 EnumDescriptorProto::EnumReservedRange* range = proto->add_reserved_range();
3098 range->set_start(reserved_range(i)->start);
3099 range->set_end(reserved_range(i)->end);
3100 }
3101 for (int i = 0; i < reserved_name_count(); i++) {
3102 proto->add_reserved_name(reserved_name(i));
3103 }
3104
3105 if (&options() != &EnumOptions::default_instance()) {
3106 *proto->mutable_options() = options();
3107 }
3108 RestoreFeaturesToOptions(proto_features_, proto);
3109 }
3110
CopyTo(EnumValueDescriptorProto * proto) const3111 void EnumValueDescriptor::CopyTo(EnumValueDescriptorProto* proto) const {
3112 proto->set_name(name());
3113 proto->set_number(number());
3114
3115 if (&options() != &EnumValueOptions::default_instance()) {
3116 *proto->mutable_options() = options();
3117 }
3118 RestoreFeaturesToOptions(proto_features_, proto);
3119 }
3120
CopyTo(ServiceDescriptorProto * proto) const3121 void ServiceDescriptor::CopyTo(ServiceDescriptorProto* proto) const {
3122 proto->set_name(name());
3123
3124 for (int i = 0; i < method_count(); i++) {
3125 method(i)->CopyTo(proto->add_method());
3126 }
3127
3128 if (&options() != &ServiceOptions::default_instance()) {
3129 *proto->mutable_options() = options();
3130 }
3131 RestoreFeaturesToOptions(proto_features_, proto);
3132 }
3133
CopyTo(MethodDescriptorProto * proto) const3134 void MethodDescriptor::CopyTo(MethodDescriptorProto* proto) const {
3135 proto->set_name(name());
3136
3137 if (!input_type()->is_unqualified_placeholder_) {
3138 proto->set_input_type(".");
3139 }
3140 proto->mutable_input_type()->append(input_type()->full_name());
3141
3142 if (!output_type()->is_unqualified_placeholder_) {
3143 proto->set_output_type(".");
3144 }
3145 proto->mutable_output_type()->append(output_type()->full_name());
3146
3147 if (&options() != &MethodOptions::default_instance()) {
3148 *proto->mutable_options() = options();
3149 }
3150
3151 if (client_streaming_) {
3152 proto->set_client_streaming(true);
3153 }
3154 if (server_streaming_) {
3155 proto->set_server_streaming(true);
3156 }
3157 RestoreFeaturesToOptions(proto_features_, proto);
3158 }
3159
3160 // DebugString methods ===============================================
3161
3162 namespace {
3163
IsGroupSyntax(Edition edition,const FieldDescriptor * desc)3164 bool IsGroupSyntax(Edition edition, const FieldDescriptor* desc) {
3165 return IsLegacyEdition(edition) &&
3166 desc->type() == FieldDescriptor::TYPE_GROUP;
3167 }
3168
3169 template <typename OptionsT>
CopyFeaturesToOptions(const FeatureSet * features,OptionsT * options)3170 void CopyFeaturesToOptions(const FeatureSet* features, OptionsT* options) {
3171 if (features != &FeatureSet::default_instance()) {
3172 *options->mutable_features() = *features;
3173 }
3174 }
3175
RetrieveOptionsAssumingRightPool(int depth,const Message & options,std::vector<std::string> * option_entries)3176 bool RetrieveOptionsAssumingRightPool(
3177 int depth, const Message& options,
3178 std::vector<std::string>* option_entries) {
3179 option_entries->clear();
3180 const Reflection* reflection = options.GetReflection();
3181 std::vector<const FieldDescriptor*> fields;
3182 reflection->ListFields(options, &fields);
3183 for (const FieldDescriptor* field : fields) {
3184 int count = 1;
3185 bool repeated = false;
3186 if (field->is_repeated()) {
3187 count = reflection->FieldSize(options, field);
3188 repeated = true;
3189 }
3190 for (int j = 0; j < count; j++) {
3191 std::string fieldval;
3192 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
3193 std::string tmp;
3194 TextFormat::Printer printer;
3195 printer.SetExpandAny(true);
3196 printer.SetInitialIndentLevel(depth + 1);
3197 printer.PrintFieldValueToString(options, field, repeated ? j : -1,
3198 &tmp);
3199 fieldval.append("{\n");
3200 fieldval.append(tmp);
3201 fieldval.append(depth * 2, ' ');
3202 fieldval.append("}");
3203 } else {
3204 TextFormat::PrintFieldValueToString(options, field, repeated ? j : -1,
3205 &fieldval);
3206 }
3207 std::string name;
3208 if (field->is_extension()) {
3209 name = absl::StrCat("(.", field->full_name(), ")");
3210 } else {
3211 name = field->name();
3212 }
3213 option_entries->push_back(absl::StrCat(name, " = ", fieldval));
3214 }
3215 }
3216 return !option_entries->empty();
3217 }
3218
3219 // Used by each of the option formatters.
RetrieveOptions(int depth,const Message & options,const DescriptorPool * pool,std::vector<std::string> * option_entries)3220 bool RetrieveOptions(int depth, const Message& options,
3221 const DescriptorPool* pool,
3222 std::vector<std::string>* option_entries) {
3223 // When printing custom options for a descriptor, we must use an options
3224 // message built on top of the same DescriptorPool where the descriptor
3225 // is coming from. This is to ensure we are interpreting custom options
3226 // against the right pool.
3227 if (options.GetDescriptor()->file()->pool() == pool) {
3228 return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
3229 } else {
3230 const Descriptor* option_descriptor =
3231 pool->FindMessageTypeByName(options.GetDescriptor()->full_name());
3232 if (option_descriptor == nullptr) {
3233 // descriptor.proto is not in the pool. This means no custom options are
3234 // used so we are safe to proceed with the compiled options message type.
3235 return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
3236 }
3237 DynamicMessageFactory factory;
3238 std::unique_ptr<Message> dynamic_options(
3239 factory.GetPrototype(option_descriptor)->New());
3240 std::string serialized = options.SerializeAsString();
3241 io::CodedInputStream input(
3242 reinterpret_cast<const uint8_t*>(serialized.data()), serialized.size());
3243 input.SetExtensionRegistry(pool, &factory);
3244 if (dynamic_options->ParseFromCodedStream(&input)) {
3245 return RetrieveOptionsAssumingRightPool(depth, *dynamic_options,
3246 option_entries);
3247 } else {
3248 ABSL_LOG(ERROR) << "Found invalid proto option data for: "
3249 << options.GetDescriptor()->full_name();
3250 return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
3251 }
3252 }
3253 }
3254
3255 // Formats options that all appear together in brackets. Does not include
3256 // brackets.
FormatBracketedOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)3257 bool FormatBracketedOptions(int depth, const Message& options,
3258 const DescriptorPool* pool, std::string* output) {
3259 std::vector<std::string> all_options;
3260 if (RetrieveOptions(depth, options, pool, &all_options)) {
3261 output->append(absl::StrJoin(all_options, ", "));
3262 }
3263 return !all_options.empty();
3264 }
3265
3266 // Formats options one per line
FormatLineOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)3267 bool FormatLineOptions(int depth, const Message& options,
3268 const DescriptorPool* pool, std::string* output) {
3269 std::string prefix(depth * 2, ' ');
3270 std::vector<std::string> all_options;
3271 if (RetrieveOptions(depth, options, pool, &all_options)) {
3272 for (const std::string& option : all_options) {
3273 absl::SubstituteAndAppend(output, "$0option $1;\n", prefix, option);
3274 }
3275 }
3276 return !all_options.empty();
3277 }
3278
GetLegacySyntaxName(Edition edition)3279 static std::string GetLegacySyntaxName(Edition edition) {
3280 if (edition == Edition::EDITION_PROTO3) {
3281 return "proto3";
3282 }
3283 return "proto2";
3284 }
3285
3286
3287 class SourceLocationCommentPrinter {
3288 public:
3289 template <typename DescType>
SourceLocationCommentPrinter(const DescType * desc,const std::string & prefix,const DebugStringOptions & options)3290 SourceLocationCommentPrinter(const DescType* desc, const std::string& prefix,
3291 const DebugStringOptions& options)
3292 : options_(options), prefix_(prefix) {
3293 // Perform the SourceLocation lookup only if we're including user comments,
3294 // because the lookup is fairly expensive.
3295 have_source_loc_ =
3296 options.include_comments && desc->GetSourceLocation(&source_loc_);
3297 }
SourceLocationCommentPrinter(const FileDescriptor * file,const std::vector<int> & path,const std::string & prefix,const DebugStringOptions & options)3298 SourceLocationCommentPrinter(const FileDescriptor* file,
3299 const std::vector<int>& path,
3300 const std::string& prefix,
3301 const DebugStringOptions& options)
3302 : options_(options), prefix_(prefix) {
3303 // Perform the SourceLocation lookup only if we're including user comments,
3304 // because the lookup is fairly expensive.
3305 have_source_loc_ =
3306 options.include_comments && file->GetSourceLocation(path, &source_loc_);
3307 }
AddPreComment(std::string * output)3308 void AddPreComment(std::string* output) {
3309 if (have_source_loc_) {
3310 // Detached leading comments.
3311 for (const std::string& leading_detached_comment :
3312 source_loc_.leading_detached_comments) {
3313 absl::StrAppend(output, FormatComment(leading_detached_comment), "\n");
3314 }
3315 // Attached leading comments.
3316 if (!source_loc_.leading_comments.empty()) {
3317 absl::StrAppend(output, FormatComment(source_loc_.leading_comments));
3318 }
3319 }
3320 }
AddPostComment(std::string * output)3321 void AddPostComment(std::string* output) {
3322 if (have_source_loc_ && source_loc_.trailing_comments.size() > 0) {
3323 absl::StrAppend(output, FormatComment(source_loc_.trailing_comments));
3324 }
3325 }
3326
3327 // Format comment such that each line becomes a full-line C++-style comment in
3328 // the DebugString() output.
FormatComment(const std::string & comment_text)3329 std::string FormatComment(const std::string& comment_text) {
3330 std::string stripped_comment = comment_text;
3331 absl::StripAsciiWhitespace(&stripped_comment);
3332 std::string output;
3333 for (absl::string_view line : absl::StrSplit(stripped_comment, '\n')) {
3334 absl::SubstituteAndAppend(&output, "$0// $1\n", prefix_, line);
3335 }
3336 return output;
3337 }
3338
3339 private:
3340
3341 bool have_source_loc_;
3342 SourceLocation source_loc_;
3343 DebugStringOptions options_;
3344 std::string prefix_;
3345 };
3346
3347 } // anonymous namespace
3348
DebugString() const3349 std::string FileDescriptor::DebugString() const {
3350 DebugStringOptions options; // default options
3351 return DebugStringWithOptions(options);
3352 }
3353
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const3354 std::string FileDescriptor::DebugStringWithOptions(
3355 const DebugStringOptions& debug_string_options) const {
3356 std::string contents;
3357 {
3358 std::vector<int> path;
3359 path.push_back(FileDescriptorProto::kSyntaxFieldNumber);
3360 SourceLocationCommentPrinter syntax_comment(this, path, "",
3361 debug_string_options);
3362 syntax_comment.AddPreComment(&contents);
3363 if (IsLegacyEdition(edition())) {
3364 absl::SubstituteAndAppend(&contents, "syntax = \"$0\";\n\n",
3365 GetLegacySyntaxName(edition()));
3366 } else {
3367 absl::SubstituteAndAppend(&contents, "edition = \"$0\";\n\n", edition());
3368 }
3369 syntax_comment.AddPostComment(&contents);
3370 }
3371
3372 SourceLocationCommentPrinter comment_printer(this, "", debug_string_options);
3373 comment_printer.AddPreComment(&contents);
3374
3375 absl::flat_hash_set<int> public_dependencies(
3376 public_dependencies_, public_dependencies_ + public_dependency_count_);
3377 absl::flat_hash_set<int> weak_dependencies(
3378 weak_dependencies_, weak_dependencies_ + weak_dependency_count_);
3379
3380 for (int i = 0; i < dependency_count(); i++) {
3381 if (public_dependencies.contains(i)) {
3382 absl::SubstituteAndAppend(&contents, "import public \"$0\";\n",
3383 dependency(i)->name());
3384 } else if (weak_dependencies.contains(i)) {
3385 absl::SubstituteAndAppend(&contents, "import weak \"$0\";\n",
3386 dependency(i)->name());
3387 } else {
3388 absl::SubstituteAndAppend(&contents, "import \"$0\";\n",
3389 dependency(i)->name());
3390 }
3391 }
3392
3393 if (!package().empty()) {
3394 std::vector<int> path;
3395 path.push_back(FileDescriptorProto::kPackageFieldNumber);
3396 SourceLocationCommentPrinter package_comment(this, path, "",
3397 debug_string_options);
3398 package_comment.AddPreComment(&contents);
3399 absl::SubstituteAndAppend(&contents, "package $0;\n\n", package());
3400 package_comment.AddPostComment(&contents);
3401 }
3402
3403 FileOptions full_options = options();
3404 CopyFeaturesToOptions(proto_features_, &full_options);
3405 if (FormatLineOptions(0, full_options, pool(), &contents)) {
3406 contents.append("\n"); // add some space if we had options
3407 }
3408
3409 for (int i = 0; i < enum_type_count(); i++) {
3410 enum_type(i)->DebugString(0, &contents, debug_string_options);
3411 contents.append("\n");
3412 }
3413
3414 // Find all the 'group' type extensions; we will not output their nested
3415 // definitions (those will be done with their group field descriptor).
3416 absl::flat_hash_set<const Descriptor*> groups;
3417 for (int i = 0; i < extension_count(); i++) {
3418 if (IsGroupSyntax(edition(), extension(i))) {
3419 groups.insert(extension(i)->message_type());
3420 }
3421 }
3422
3423 for (int i = 0; i < message_type_count(); i++) {
3424 if (!groups.contains(message_type(i))) {
3425 message_type(i)->DebugString(0, &contents, debug_string_options,
3426 /* include_opening_clause */ true);
3427 contents.append("\n");
3428 }
3429 }
3430
3431 for (int i = 0; i < service_count(); i++) {
3432 service(i)->DebugString(&contents, debug_string_options);
3433 contents.append("\n");
3434 }
3435
3436 const Descriptor* containing_type = nullptr;
3437 for (int i = 0; i < extension_count(); i++) {
3438 if (extension(i)->containing_type() != containing_type) {
3439 if (i > 0) contents.append("}\n\n");
3440 containing_type = extension(i)->containing_type();
3441 absl::SubstituteAndAppend(&contents, "extend .$0 {\n",
3442 containing_type->full_name());
3443 }
3444 extension(i)->DebugString(1, &contents, debug_string_options);
3445 }
3446 if (extension_count() > 0) contents.append("}\n\n");
3447
3448 comment_printer.AddPostComment(&contents);
3449
3450 return contents;
3451 }
3452
DebugString() const3453 std::string Descriptor::DebugString() const {
3454 DebugStringOptions options; // default options
3455 return DebugStringWithOptions(options);
3456 }
3457
DebugStringWithOptions(const DebugStringOptions & options) const3458 std::string Descriptor::DebugStringWithOptions(
3459 const DebugStringOptions& options) const {
3460 std::string contents;
3461 DebugString(0, &contents, options, /* include_opening_clause */ true);
3462 return contents;
3463 }
3464
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options,bool include_opening_clause) const3465 void Descriptor::DebugString(int depth, std::string* contents,
3466 const DebugStringOptions& debug_string_options,
3467 bool include_opening_clause) const {
3468 if (options().map_entry()) {
3469 // Do not generate debug string for auto-generated map-entry type.
3470 return;
3471 }
3472 std::string prefix(depth * 2, ' ');
3473 ++depth;
3474
3475 SourceLocationCommentPrinter comment_printer(this, prefix,
3476 debug_string_options);
3477 comment_printer.AddPreComment(contents);
3478
3479 if (include_opening_clause) {
3480 absl::SubstituteAndAppend(contents, "$0message $1", prefix, name());
3481 }
3482 contents->append(" {\n");
3483
3484 MessageOptions full_options = options();
3485 CopyFeaturesToOptions(proto_features_, &full_options);
3486 FormatLineOptions(depth, full_options, file()->pool(), contents);
3487
3488 // Find all the 'group' types for fields and extensions; we will not output
3489 // their nested definitions (those will be done with their group field
3490 // descriptor).
3491 absl::flat_hash_set<const Descriptor*> groups;
3492 for (int i = 0; i < field_count(); i++) {
3493 if (IsGroupSyntax(file()->edition(), field(i))) {
3494 groups.insert(field(i)->message_type());
3495 }
3496 }
3497 for (int i = 0; i < extension_count(); i++) {
3498 if (IsGroupSyntax(file()->edition(), extension(i))) {
3499 groups.insert(extension(i)->message_type());
3500 }
3501 }
3502
3503 for (int i = 0; i < nested_type_count(); i++) {
3504 if (!groups.contains(nested_type(i))) {
3505 nested_type(i)->DebugString(depth, contents, debug_string_options,
3506 /* include_opening_clause */ true);
3507 }
3508 }
3509 for (int i = 0; i < enum_type_count(); i++) {
3510 enum_type(i)->DebugString(depth, contents, debug_string_options);
3511 }
3512 for (int i = 0; i < field_count(); i++) {
3513 if (field(i)->real_containing_oneof() == nullptr) {
3514 field(i)->DebugString(depth, contents, debug_string_options);
3515 } else if (field(i)->containing_oneof()->field(0) == field(i)) {
3516 // This is the first field in this oneof, so print the whole oneof.
3517 field(i)->containing_oneof()->DebugString(depth, contents,
3518 debug_string_options);
3519 }
3520 }
3521
3522 for (int i = 0; i < extension_range_count(); i++) {
3523 absl::SubstituteAndAppend(contents, "$0 extensions $1", prefix,
3524 extension_range(i)->start_number());
3525 if (extension_range(i)->end_number() >
3526 extension_range(i)->start_number() + 1) {
3527 absl::SubstituteAndAppend(contents, " to $0",
3528 extension_range(i)->end_number() - 1);
3529 }
3530 ExtensionRangeOptions range_options = extension_range(i)->options();
3531 CopyFeaturesToOptions(extension_range(i)->proto_features_, &range_options);
3532 std::string formatted_options;
3533 if (FormatBracketedOptions(depth, range_options, file()->pool(),
3534 &formatted_options)) {
3535 absl::StrAppend(contents, " [", formatted_options, "]");
3536 }
3537 absl::StrAppend(contents, ";\n");
3538 }
3539
3540 // Group extensions by what they extend, so they can be printed out together.
3541 const Descriptor* containing_type = nullptr;
3542 for (int i = 0; i < extension_count(); i++) {
3543 if (extension(i)->containing_type() != containing_type) {
3544 if (i > 0) absl::SubstituteAndAppend(contents, "$0 }\n", prefix);
3545 containing_type = extension(i)->containing_type();
3546 absl::SubstituteAndAppend(contents, "$0 extend .$1 {\n", prefix,
3547 containing_type->full_name());
3548 }
3549 extension(i)->DebugString(depth + 1, contents, debug_string_options);
3550 }
3551 if (extension_count() > 0)
3552 absl::SubstituteAndAppend(contents, "$0 }\n", prefix);
3553
3554 if (reserved_range_count() > 0) {
3555 absl::SubstituteAndAppend(contents, "$0 reserved ", prefix);
3556 for (int i = 0; i < reserved_range_count(); i++) {
3557 const Descriptor::ReservedRange* range = reserved_range(i);
3558 if (range->end == range->start + 1) {
3559 absl::SubstituteAndAppend(contents, "$0, ", range->start);
3560 } else if (range->end > FieldDescriptor::kMaxNumber) {
3561 absl::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3562 } else {
3563 absl::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3564 range->end - 1);
3565 }
3566 }
3567 contents->replace(contents->size() - 2, 2, ";\n");
3568 }
3569
3570 if (reserved_name_count() > 0) {
3571 absl::SubstituteAndAppend(contents, "$0 reserved ", prefix);
3572 for (int i = 0; i < reserved_name_count(); i++) {
3573 absl::SubstituteAndAppend(
3574 contents,
3575 file()->edition() < Edition::EDITION_2023 ? "\"$0\", " : "$0, ",
3576 absl::CEscape(reserved_name(i)));
3577 }
3578 contents->replace(contents->size() - 2, 2, ";\n");
3579 }
3580
3581 absl::SubstituteAndAppend(contents, "$0}\n", prefix);
3582 comment_printer.AddPostComment(contents);
3583 }
3584
DebugString() const3585 std::string FieldDescriptor::DebugString() const {
3586 DebugStringOptions options; // default options
3587 return DebugStringWithOptions(options);
3588 }
3589
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const3590 std::string FieldDescriptor::DebugStringWithOptions(
3591 const DebugStringOptions& debug_string_options) const {
3592 std::string contents;
3593 int depth = 0;
3594 if (is_extension()) {
3595 absl::SubstituteAndAppend(&contents, "extend .$0 {\n",
3596 containing_type()->full_name());
3597 depth = 1;
3598 }
3599 DebugString(depth, &contents, debug_string_options);
3600 if (is_extension()) {
3601 contents.append("}\n");
3602 }
3603 return contents;
3604 }
3605
3606 // The field type string used in FieldDescriptor::DebugString()
FieldTypeNameDebugString() const3607 std::string FieldDescriptor::FieldTypeNameDebugString() const {
3608 switch (type()) {
3609 case TYPE_MESSAGE:
3610 case TYPE_GROUP:
3611 if (IsGroupSyntax(file()->edition(), this)) {
3612 return kTypeToName[type()];
3613 }
3614 return absl::StrCat(".", message_type()->full_name());
3615 case TYPE_ENUM:
3616 return absl::StrCat(".", enum_type()->full_name());
3617 default:
3618 return kTypeToName[type()];
3619 }
3620 }
3621
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3622 void FieldDescriptor::DebugString(
3623 int depth, std::string* contents,
3624 const DebugStringOptions& debug_string_options) const {
3625 std::string prefix(depth * 2, ' ');
3626 std::string field_type;
3627
3628 // Special case map fields.
3629 if (is_map()) {
3630 absl::SubstituteAndAppend(
3631 &field_type, "map<$0, $1>",
3632 message_type()->field(0)->FieldTypeNameDebugString(),
3633 message_type()->field(1)->FieldTypeNameDebugString());
3634 } else {
3635 field_type = FieldTypeNameDebugString();
3636 }
3637
3638 std::string label = absl::StrCat(kLabelToName[this->label()], " ");
3639
3640 // Label is omitted for maps, oneof, and plain proto3 fields.
3641 if (is_map() || real_containing_oneof() ||
3642 (is_optional() && !has_optional_keyword())) {
3643 label.clear();
3644 }
3645 // Label is omitted for optional and required fields under editions.
3646 if ((is_optional() || is_required()) && !IsLegacyEdition(file()->edition())) {
3647 label.clear();
3648 }
3649
3650 SourceLocationCommentPrinter comment_printer(this, prefix,
3651 debug_string_options);
3652 comment_printer.AddPreComment(contents);
3653
3654 absl::SubstituteAndAppend(
3655 contents, "$0$1$2 $3 = $4", prefix, label, field_type,
3656 IsGroupSyntax(file()->edition(), this) ? message_type()->name() : name(),
3657 number());
3658
3659 bool bracketed = false;
3660 if (has_default_value()) {
3661 bracketed = true;
3662 absl::SubstituteAndAppend(contents, " [default = $0",
3663 DefaultValueAsString(true));
3664 }
3665 if (has_json_name_) {
3666 if (!bracketed) {
3667 bracketed = true;
3668 contents->append(" [");
3669 } else {
3670 contents->append(", ");
3671 }
3672 contents->append("json_name = \"");
3673 contents->append(absl::CEscape(json_name()));
3674 contents->append("\"");
3675 }
3676
3677 FieldOptions full_options = options();
3678 CopyFeaturesToOptions(proto_features_, &full_options);
3679 std::string formatted_options;
3680 if (FormatBracketedOptions(depth, full_options, file()->pool(),
3681 &formatted_options)) {
3682 contents->append(bracketed ? ", " : " [");
3683 bracketed = true;
3684 contents->append(formatted_options);
3685 }
3686
3687 if (bracketed) {
3688 contents->append("]");
3689 }
3690
3691 if (IsGroupSyntax(file()->edition(), this)) {
3692 if (debug_string_options.elide_group_body) {
3693 contents->append(" { ... };\n");
3694 } else {
3695 message_type()->DebugString(depth, contents, debug_string_options,
3696 /* include_opening_clause */ false);
3697 }
3698 } else {
3699 contents->append(";\n");
3700 }
3701
3702 comment_printer.AddPostComment(contents);
3703 }
3704
DebugString() const3705 std::string OneofDescriptor::DebugString() const {
3706 DebugStringOptions options; // default values
3707 return DebugStringWithOptions(options);
3708 }
3709
DebugStringWithOptions(const DebugStringOptions & options) const3710 std::string OneofDescriptor::DebugStringWithOptions(
3711 const DebugStringOptions& options) const {
3712 std::string contents;
3713 DebugString(0, &contents, options);
3714 return contents;
3715 }
3716
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3717 void OneofDescriptor::DebugString(
3718 int depth, std::string* contents,
3719 const DebugStringOptions& debug_string_options) const {
3720 std::string prefix(depth * 2, ' ');
3721 ++depth;
3722 SourceLocationCommentPrinter comment_printer(this, prefix,
3723 debug_string_options);
3724 comment_printer.AddPreComment(contents);
3725 absl::SubstituteAndAppend(contents, "$0oneof $1 {", prefix, name());
3726
3727 OneofOptions full_options = options();
3728 CopyFeaturesToOptions(proto_features_, &full_options);
3729 FormatLineOptions(depth, full_options, containing_type()->file()->pool(),
3730 contents);
3731
3732 if (debug_string_options.elide_oneof_body) {
3733 contents->append(" ... }\n");
3734 } else {
3735 contents->append("\n");
3736 for (int i = 0; i < field_count(); i++) {
3737 field(i)->DebugString(depth, contents, debug_string_options);
3738 }
3739 absl::SubstituteAndAppend(contents, "$0}\n", prefix);
3740 }
3741 comment_printer.AddPostComment(contents);
3742 }
3743
DebugString() const3744 std::string EnumDescriptor::DebugString() const {
3745 DebugStringOptions options; // default values
3746 return DebugStringWithOptions(options);
3747 }
3748
DebugStringWithOptions(const DebugStringOptions & options) const3749 std::string EnumDescriptor::DebugStringWithOptions(
3750 const DebugStringOptions& options) const {
3751 std::string contents;
3752 DebugString(0, &contents, options);
3753 return contents;
3754 }
3755
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3756 void EnumDescriptor::DebugString(
3757 int depth, std::string* contents,
3758 const DebugStringOptions& debug_string_options) const {
3759 std::string prefix(depth * 2, ' ');
3760 ++depth;
3761
3762 SourceLocationCommentPrinter comment_printer(this, prefix,
3763 debug_string_options);
3764 comment_printer.AddPreComment(contents);
3765
3766 absl::SubstituteAndAppend(contents, "$0enum $1 {\n", prefix, name());
3767
3768 EnumOptions full_options = options();
3769 CopyFeaturesToOptions(proto_features_, &full_options);
3770 FormatLineOptions(depth, full_options, file()->pool(), contents);
3771
3772 for (int i = 0; i < value_count(); i++) {
3773 value(i)->DebugString(depth, contents, debug_string_options);
3774 }
3775
3776 if (reserved_range_count() > 0) {
3777 absl::SubstituteAndAppend(contents, "$0 reserved ", prefix);
3778 for (int i = 0; i < reserved_range_count(); i++) {
3779 const EnumDescriptor::ReservedRange* range = reserved_range(i);
3780 if (range->end == range->start) {
3781 absl::SubstituteAndAppend(contents, "$0, ", range->start);
3782 } else if (range->end == INT_MAX) {
3783 absl::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3784 } else {
3785 absl::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3786 range->end);
3787 }
3788 }
3789 contents->replace(contents->size() - 2, 2, ";\n");
3790 }
3791
3792 if (reserved_name_count() > 0) {
3793 absl::SubstituteAndAppend(contents, "$0 reserved ", prefix);
3794 for (int i = 0; i < reserved_name_count(); i++) {
3795 absl::SubstituteAndAppend(
3796 contents,
3797 file()->edition() < Edition::EDITION_2023 ? "\"$0\", " : "$0, ",
3798 absl::CEscape(reserved_name(i)));
3799 }
3800 contents->replace(contents->size() - 2, 2, ";\n");
3801 }
3802
3803 absl::SubstituteAndAppend(contents, "$0}\n", prefix);
3804
3805 comment_printer.AddPostComment(contents);
3806 }
3807
DebugString() const3808 std::string EnumValueDescriptor::DebugString() const {
3809 DebugStringOptions options; // default values
3810 return DebugStringWithOptions(options);
3811 }
3812
DebugStringWithOptions(const DebugStringOptions & options) const3813 std::string EnumValueDescriptor::DebugStringWithOptions(
3814 const DebugStringOptions& options) const {
3815 std::string contents;
3816 DebugString(0, &contents, options);
3817 return contents;
3818 }
3819
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3820 void EnumValueDescriptor::DebugString(
3821 int depth, std::string* contents,
3822 const DebugStringOptions& debug_string_options) const {
3823 std::string prefix(depth * 2, ' ');
3824
3825 SourceLocationCommentPrinter comment_printer(this, prefix,
3826 debug_string_options);
3827 comment_printer.AddPreComment(contents);
3828
3829 absl::SubstituteAndAppend(contents, "$0$1 = $2", prefix, name(), number());
3830
3831 EnumValueOptions full_options = options();
3832 CopyFeaturesToOptions(proto_features_, &full_options);
3833 std::string formatted_options;
3834 if (FormatBracketedOptions(depth, full_options, type()->file()->pool(),
3835 &formatted_options)) {
3836 absl::SubstituteAndAppend(contents, " [$0]", formatted_options);
3837 }
3838 contents->append(";\n");
3839
3840 comment_printer.AddPostComment(contents);
3841 }
3842
DebugString() const3843 std::string ServiceDescriptor::DebugString() const {
3844 DebugStringOptions options; // default values
3845 return DebugStringWithOptions(options);
3846 }
3847
DebugStringWithOptions(const DebugStringOptions & options) const3848 std::string ServiceDescriptor::DebugStringWithOptions(
3849 const DebugStringOptions& options) const {
3850 std::string contents;
3851 DebugString(&contents, options);
3852 return contents;
3853 }
3854
DebugString(std::string * contents,const DebugStringOptions & debug_string_options) const3855 void ServiceDescriptor::DebugString(
3856 std::string* contents,
3857 const DebugStringOptions& debug_string_options) const {
3858 SourceLocationCommentPrinter comment_printer(this, /* prefix */ "",
3859 debug_string_options);
3860 comment_printer.AddPreComment(contents);
3861
3862 absl::SubstituteAndAppend(contents, "service $0 {\n", name());
3863
3864 ServiceOptions full_options = options();
3865 CopyFeaturesToOptions(proto_features_, &full_options);
3866 FormatLineOptions(1, full_options, file()->pool(), contents);
3867
3868 for (int i = 0; i < method_count(); i++) {
3869 method(i)->DebugString(1, contents, debug_string_options);
3870 }
3871
3872 contents->append("}\n");
3873
3874 comment_printer.AddPostComment(contents);
3875 }
3876
DebugString() const3877 std::string MethodDescriptor::DebugString() const {
3878 DebugStringOptions options; // default values
3879 return DebugStringWithOptions(options);
3880 }
3881
DebugStringWithOptions(const DebugStringOptions & options) const3882 std::string MethodDescriptor::DebugStringWithOptions(
3883 const DebugStringOptions& options) const {
3884 std::string contents;
3885 DebugString(0, &contents, options);
3886 return contents;
3887 }
3888
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3889 void MethodDescriptor::DebugString(
3890 int depth, std::string* contents,
3891 const DebugStringOptions& debug_string_options) const {
3892 std::string prefix(depth * 2, ' ');
3893 ++depth;
3894
3895 SourceLocationCommentPrinter comment_printer(this, prefix,
3896 debug_string_options);
3897 comment_printer.AddPreComment(contents);
3898
3899 absl::SubstituteAndAppend(
3900 contents, "$0rpc $1($4.$2) returns ($5.$3)", prefix, name(),
3901 input_type()->full_name(), output_type()->full_name(),
3902 client_streaming() ? "stream " : "", server_streaming() ? "stream " : "");
3903
3904 MethodOptions full_options = options();
3905 CopyFeaturesToOptions(proto_features_, &full_options);
3906 std::string formatted_options;
3907 if (FormatLineOptions(depth, full_options, service()->file()->pool(),
3908 &formatted_options)) {
3909 absl::SubstituteAndAppend(contents, " {\n$0$1}\n", formatted_options,
3910 prefix);
3911 } else {
3912 contents->append(";\n");
3913 }
3914
3915 comment_printer.AddPostComment(contents);
3916 }
3917
3918 // Feature methods ===============================================
3919
is_closed() const3920 bool EnumDescriptor::is_closed() const {
3921 return features().enum_type() == FeatureSet::CLOSED;
3922 }
3923
is_packed() const3924 bool FieldDescriptor::is_packed() const {
3925 if (!is_packable()) return false;
3926 return features().repeated_field_encoding() == FeatureSet::PACKED;
3927 }
3928
IsStrictUtf8(const FieldDescriptor * field)3929 static bool IsStrictUtf8(const FieldDescriptor* field) {
3930 return internal::InternalFeatureHelper::GetFeatures(*field)
3931 .utf8_validation() == FeatureSet::VERIFY;
3932 }
3933
requires_utf8_validation() const3934 bool FieldDescriptor::requires_utf8_validation() const {
3935 return type() == TYPE_STRING && IsStrictUtf8(this);
3936 }
3937
has_presence() const3938 bool FieldDescriptor::has_presence() const {
3939 if (is_repeated()) return false;
3940 return cpp_type() == CPPTYPE_MESSAGE || is_extension() ||
3941 containing_oneof() ||
3942 features().field_presence() != FeatureSet::IMPLICIT;
3943 }
3944
is_required() const3945 bool FieldDescriptor::is_required() const {
3946 return features().field_presence() == FeatureSet::LEGACY_REQUIRED;
3947 }
3948
legacy_enum_field_treated_as_closed() const3949 bool FieldDescriptor::legacy_enum_field_treated_as_closed() const {
3950 return type() == TYPE_ENUM &&
3951 (features().GetExtension(pb::cpp).legacy_closed_enum() ||
3952 enum_type()->is_closed());
3953 }
3954
has_optional_keyword() const3955 bool FieldDescriptor::has_optional_keyword() const {
3956 return proto3_optional_ || (file()->edition() == Edition::EDITION_PROTO2 &&
3957 is_optional() && !containing_oneof());
3958 }
3959
cpp_string_type() const3960 FieldDescriptor::CppStringType FieldDescriptor::cpp_string_type() const {
3961 ABSL_DCHECK(cpp_type() == FieldDescriptor::CPPTYPE_STRING);
3962 switch (features().GetExtension(pb::cpp).string_type()) {
3963 case pb::CppFeatures::VIEW:
3964 return CppStringType::kView;
3965 case pb::CppFeatures::CORD:
3966 // In open-source, protobuf CORD is only supported for singular bytes
3967 // fields.
3968 if (type() != FieldDescriptor::TYPE_BYTES || is_repeated() ||
3969 is_extension()) {
3970 return CppStringType::kString;
3971 }
3972 return CppStringType::kCord;
3973 case pb::CppFeatures::STRING:
3974 return CppStringType::kString;
3975 default:
3976 // If features haven't been resolved, this is a dynamic build not for C++
3977 // codegen. Just use string type.
3978 ABSL_DCHECK(!features().GetExtension(pb::cpp).has_string_type());
3979 return CppStringType::kString;
3980 }
3981 }
3982
3983 // Location methods ===============================================
3984
GetSourceLocation(const std::vector<int> & path,SourceLocation * out_location) const3985 bool FileDescriptor::GetSourceLocation(const std::vector<int>& path,
3986 SourceLocation* out_location) const {
3987 ABSL_CHECK(out_location != nullptr);
3988 if (source_code_info_) {
3989 if (const SourceCodeInfo_Location* loc =
3990 tables_->GetSourceLocation(path, source_code_info_)) {
3991 const RepeatedField<int32_t>& span = loc->span();
3992 if (span.size() == 3 || span.size() == 4) {
3993 out_location->start_line = span.Get(0);
3994 out_location->start_column = span.Get(1);
3995 out_location->end_line = span.Get(span.size() == 3 ? 0 : 2);
3996 out_location->end_column = span.Get(span.size() - 1);
3997
3998 out_location->leading_comments = loc->leading_comments();
3999 out_location->trailing_comments = loc->trailing_comments();
4000 out_location->leading_detached_comments.assign(
4001 loc->leading_detached_comments().begin(),
4002 loc->leading_detached_comments().end());
4003 return true;
4004 }
4005 }
4006 }
4007 return false;
4008 }
4009
GetSourceLocation(SourceLocation * out_location) const4010 bool FileDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4011 std::vector<int> path; // empty path for root FileDescriptor
4012 return GetSourceLocation(path, out_location);
4013 }
4014
GetSourceLocation(SourceLocation * out_location) const4015 bool Descriptor::GetSourceLocation(SourceLocation* out_location) const {
4016 std::vector<int> path;
4017 GetLocationPath(&path);
4018 return file()->GetSourceLocation(path, out_location);
4019 }
4020
GetSourceLocation(SourceLocation * out_location) const4021 bool FieldDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4022 std::vector<int> path;
4023 GetLocationPath(&path);
4024 return file()->GetSourceLocation(path, out_location);
4025 }
4026
GetSourceLocation(SourceLocation * out_location) const4027 bool OneofDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4028 std::vector<int> path;
4029 GetLocationPath(&path);
4030 return containing_type()->file()->GetSourceLocation(path, out_location);
4031 }
4032
GetSourceLocation(SourceLocation * out_location) const4033 bool EnumDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4034 std::vector<int> path;
4035 GetLocationPath(&path);
4036 return file()->GetSourceLocation(path, out_location);
4037 }
4038
GetSourceLocation(SourceLocation * out_location) const4039 bool MethodDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4040 std::vector<int> path;
4041 GetLocationPath(&path);
4042 return service()->file()->GetSourceLocation(path, out_location);
4043 }
4044
GetSourceLocation(SourceLocation * out_location) const4045 bool ServiceDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4046 std::vector<int> path;
4047 GetLocationPath(&path);
4048 return file()->GetSourceLocation(path, out_location);
4049 }
4050
GetSourceLocation(SourceLocation * out_location) const4051 bool EnumValueDescriptor::GetSourceLocation(
4052 SourceLocation* out_location) const {
4053 std::vector<int> path;
4054 GetLocationPath(&path);
4055 return type()->file()->GetSourceLocation(path, out_location);
4056 }
4057
GetLocationPath(std::vector<int> * output) const4058 void Descriptor::GetLocationPath(std::vector<int>* output) const {
4059 if (containing_type()) {
4060 containing_type()->GetLocationPath(output);
4061 output->push_back(DescriptorProto::kNestedTypeFieldNumber);
4062 output->push_back(index());
4063 } else {
4064 output->push_back(FileDescriptorProto::kMessageTypeFieldNumber);
4065 output->push_back(index());
4066 }
4067 }
4068
GetLocationPath(std::vector<int> * output) const4069 void FieldDescriptor::GetLocationPath(std::vector<int>* output) const {
4070 if (is_extension()) {
4071 if (extension_scope() == nullptr) {
4072 output->push_back(FileDescriptorProto::kExtensionFieldNumber);
4073 output->push_back(index());
4074 } else {
4075 extension_scope()->GetLocationPath(output);
4076 output->push_back(DescriptorProto::kExtensionFieldNumber);
4077 output->push_back(index());
4078 }
4079 } else {
4080 containing_type()->GetLocationPath(output);
4081 output->push_back(DescriptorProto::kFieldFieldNumber);
4082 output->push_back(index());
4083 }
4084 }
4085
GetLocationPath(std::vector<int> * output) const4086 void OneofDescriptor::GetLocationPath(std::vector<int>* output) const {
4087 containing_type()->GetLocationPath(output);
4088 output->push_back(DescriptorProto::kOneofDeclFieldNumber);
4089 output->push_back(index());
4090 }
4091
GetLocationPath(std::vector<int> * output) const4092 void Descriptor::ExtensionRange::GetLocationPath(
4093 std::vector<int>* output) const {
4094 containing_type()->GetLocationPath(output);
4095 output->push_back(DescriptorProto::kExtensionRangeFieldNumber);
4096 output->push_back(index());
4097 }
4098
GetLocationPath(std::vector<int> * output) const4099 void EnumDescriptor::GetLocationPath(std::vector<int>* output) const {
4100 if (containing_type()) {
4101 containing_type()->GetLocationPath(output);
4102 output->push_back(DescriptorProto::kEnumTypeFieldNumber);
4103 output->push_back(index());
4104 } else {
4105 output->push_back(FileDescriptorProto::kEnumTypeFieldNumber);
4106 output->push_back(index());
4107 }
4108 }
4109
GetLocationPath(std::vector<int> * output) const4110 void EnumValueDescriptor::GetLocationPath(std::vector<int>* output) const {
4111 type()->GetLocationPath(output);
4112 output->push_back(EnumDescriptorProto::kValueFieldNumber);
4113 output->push_back(index());
4114 }
4115
GetLocationPath(std::vector<int> * output) const4116 void ServiceDescriptor::GetLocationPath(std::vector<int>* output) const {
4117 output->push_back(FileDescriptorProto::kServiceFieldNumber);
4118 output->push_back(index());
4119 }
4120
GetLocationPath(std::vector<int> * output) const4121 void MethodDescriptor::GetLocationPath(std::vector<int>* output) const {
4122 service()->GetLocationPath(output);
4123 output->push_back(ServiceDescriptorProto::kMethodFieldNumber);
4124 output->push_back(index());
4125 }
4126
4127 // ===================================================================
4128
4129 namespace {
4130
4131 // Represents an options message to interpret. Extension names in the option
4132 // name are resolved relative to name_scope. element_name and orig_opt are
4133 // used only for error reporting (since the parser records locations against
4134 // pointers in the original options, not the mutable copy). The Message must be
4135 // one of the Options messages in descriptor.proto.
4136 struct OptionsToInterpret {
OptionsToInterpretgoogle::protobuf::__anona654feba0a11::OptionsToInterpret4137 OptionsToInterpret(absl::string_view ns, absl::string_view el,
4138 absl::Span<const int> path, const Message* orig_opt,
4139 Message* opt)
4140 : name_scope(ns),
4141 element_name(el),
4142 element_path(path.begin(), path.end()),
4143 original_options(orig_opt),
4144 options(opt) {}
4145 std::string name_scope;
4146 std::string element_name;
4147 std::vector<int> element_path;
4148 const Message* original_options;
4149 Message* options;
4150 };
4151
4152 } // namespace
4153
4154 class DescriptorBuilder {
4155 public:
New(const DescriptorPool * pool,DescriptorPool::Tables * tables,DescriptorPool::DeferredValidation & deferred_validation,DescriptorPool::ErrorCollector * error_collector)4156 static std::unique_ptr<DescriptorBuilder> New(
4157 const DescriptorPool* pool, DescriptorPool::Tables* tables,
4158 DescriptorPool::DeferredValidation& deferred_validation,
4159 DescriptorPool::ErrorCollector* error_collector) {
4160 return std::unique_ptr<DescriptorBuilder>(new DescriptorBuilder(
4161 pool, tables, deferred_validation, error_collector));
4162 }
4163
4164 ~DescriptorBuilder();
4165
4166 const FileDescriptor* BuildFile(const FileDescriptorProto& proto);
4167
4168 private:
4169 DescriptorBuilder(const DescriptorPool* pool, DescriptorPool::Tables* tables,
4170 DescriptorPool::DeferredValidation& deferred_validation,
4171 DescriptorPool::ErrorCollector* error_collector);
4172
4173 friend class OptionInterpreter;
4174
4175 // Non-recursive part of BuildFile functionality.
4176 FileDescriptor* BuildFileImpl(const FileDescriptorProto& proto,
4177 internal::FlatAllocator& alloc);
4178
4179 const DescriptorPool* pool_;
4180 DescriptorPool::Tables* tables_; // for convenience
4181 DescriptorPool::DeferredValidation& deferred_validation_;
4182 DescriptorPool::ErrorCollector* error_collector_;
4183
4184 absl::optional<FeatureResolver> feature_resolver_ = absl::nullopt;
4185
4186 // As we build descriptors we store copies of the options messages in
4187 // them. We put pointers to those copies in this vector, as we build, so we
4188 // can later (after cross-linking) interpret those options.
4189 std::vector<OptionsToInterpret> options_to_interpret_;
4190
4191 bool had_errors_;
4192 std::string filename_;
4193 FileDescriptor* file_;
4194 FileDescriptorTables* file_tables_;
4195 absl::flat_hash_set<const FileDescriptor*> dependencies_;
4196
4197 struct MessageHints {
4198 int fields_to_suggest = 0;
4199 const Message* first_reason = nullptr;
4200 DescriptorPool::ErrorCollector::ErrorLocation first_reason_location =
4201 DescriptorPool::ErrorCollector::ErrorLocation::OTHER;
4202
RequestHintOnFieldNumbersgoogle::protobuf::DescriptorBuilder::MessageHints4203 void RequestHintOnFieldNumbers(
4204 const Message& reason,
4205 DescriptorPool::ErrorCollector::ErrorLocation reason_location,
4206 int range_start = 0, int range_end = 1) {
4207 auto fit = [](int value) {
4208 return std::min(std::max(value, 0), FieldDescriptor::kMaxNumber);
4209 };
4210 fields_to_suggest =
4211 fit(fields_to_suggest + fit(fit(range_end) - fit(range_start)));
4212 if (first_reason) return;
4213 first_reason = &reason;
4214 first_reason_location = reason_location;
4215 }
4216 };
4217
4218 absl::flat_hash_map<const Descriptor*, MessageHints> message_hints_;
4219
4220 // unused_dependency_ is used to record the unused imported files.
4221 // Note: public import is not considered.
4222 absl::flat_hash_set<const FileDescriptor*> unused_dependency_;
4223
4224 // If LookupSymbol() finds a symbol that is in a file which is not a declared
4225 // dependency of this file, it will fail, but will set
4226 // possible_undeclared_dependency_ to point at that file. This is only used
4227 // by AddNotDefinedError() to report a more useful error message.
4228 // possible_undeclared_dependency_name_ is the name of the symbol that was
4229 // actually found in possible_undeclared_dependency_, which may be a parent
4230 // of the symbol actually looked for.
4231 const FileDescriptor* possible_undeclared_dependency_;
4232 std::string possible_undeclared_dependency_name_;
4233
4234 // If LookupSymbol() could resolve a symbol which is not defined,
4235 // record the resolved name. This is only used by AddNotDefinedError()
4236 // to report a more useful error message.
4237 std::string undefine_resolved_name_;
4238
4239 // Tracker for current recursion depth to implement recursion protection.
4240 //
4241 // Counts down to 0 when there is no depth remaining.
4242 //
4243 // Maximum recursion depth corresponds to 32 nested message declarations.
4244 int recursion_depth_ = internal::cpp::MaxMessageDeclarationNestingDepth();
4245
4246 // Note: Both AddError and AddWarning functions are extremely sensitive to
4247 // the *caller* stack space used. We call these functions many times in
4248 // complex code paths that are hot and likely to be inlined heavily. However,
4249 // these calls themselves are cold error paths. But stack space used by the
4250 // code that sets up the call in many cases is paid for even when the call
4251 // isn't reached. To optimize this, we use `const std::string &` to reuse
4252 // string objects where possible for the inputs and for the error message
4253 // itself we use a closure to build the error message inside these routines.
4254 // The routines themselves are marked to prevent inlining and this lets us
4255 // move the large code sometimes required to produce a useful error message
4256 // entirely into a helper closure rather than the immediate caller.
4257 //
4258 // The `const char*` overload should only be used for string literal messages
4259 // where this is a frustrating amount of overhead and there is no harm in
4260 // directly using the literal.
4261 void AddError(absl::string_view element_name, const Message& descriptor,
4262 DescriptorPool::ErrorCollector::ErrorLocation location,
4263 absl::FunctionRef<std::string()> make_error);
4264 void AddError(absl::string_view element_name, const Message& descriptor,
4265 DescriptorPool::ErrorCollector::ErrorLocation location,
4266 const char* error);
4267 void AddRecursiveImportError(const FileDescriptorProto& proto, int from_here);
4268 void AddTwiceListedError(const FileDescriptorProto& proto, int index);
4269 void AddImportError(const FileDescriptorProto& proto, int index);
4270
4271 // Adds an error indicating that undefined_symbol was not defined. Must
4272 // only be called after LookupSymbol() fails.
4273 void AddNotDefinedError(
4274 absl::string_view element_name, const Message& descriptor,
4275 DescriptorPool::ErrorCollector::ErrorLocation location,
4276 absl::string_view undefined_symbol);
4277
4278 void AddWarning(absl::string_view element_name, const Message& descriptor,
4279 DescriptorPool::ErrorCollector::ErrorLocation location,
4280 absl::FunctionRef<std::string()> make_error);
4281 void AddWarning(absl::string_view element_name, const Message& descriptor,
4282 DescriptorPool::ErrorCollector::ErrorLocation location,
4283 const char* error);
4284
4285 // Silly helper which determines if the given file is in the given package.
4286 // I.e., either file->package() == package_name or file->package() is a
4287 // nested package within package_name.
4288 bool IsInPackage(const FileDescriptor* file, absl::string_view package_name);
4289
4290 // Helper function which finds all public dependencies of the given file, and
4291 // stores the them in the dependencies_ set in the builder.
4292 void RecordPublicDependencies(const FileDescriptor* file);
4293
4294 // Like tables_->FindSymbol(), but additionally:
4295 // - Search the pool's underlay if not found in tables_.
4296 // - Insure that the resulting Symbol is from one of the file's declared
4297 // dependencies.
4298 Symbol FindSymbol(absl::string_view name, bool build_it = true);
4299
4300 // Like FindSymbol() but does not require that the symbol is in one of the
4301 // file's declared dependencies.
4302 Symbol FindSymbolNotEnforcingDeps(absl::string_view name,
4303 bool build_it = true);
4304
4305 // This implements the body of FindSymbolNotEnforcingDeps().
4306 Symbol FindSymbolNotEnforcingDepsHelper(const DescriptorPool* pool,
4307 absl::string_view name,
4308 bool build_it = true);
4309
4310 // Like FindSymbol(), but looks up the name relative to some other symbol
4311 // name. This first searches siblings of relative_to, then siblings of its
4312 // parents, etc. For example, LookupSymbol("foo.bar", "baz.moo.corge") makes
4313 // the following calls, returning the first non-null result:
4314 // FindSymbol("baz.moo.foo.bar"), FindSymbol("baz.foo.bar"),
4315 // FindSymbol("foo.bar"). If AllowUnknownDependencies() has been called
4316 // on the DescriptorPool, this will generate a placeholder type if
4317 // the name is not found (unless the name itself is malformed). The
4318 // placeholder_type parameter indicates what kind of placeholder should be
4319 // constructed in this case. The resolve_mode parameter determines whether
4320 // any symbol is returned, or only symbols that are types. Note, however,
4321 // that LookupSymbol may still return a non-type symbol in LOOKUP_TYPES mode,
4322 // if it believes that's all it could refer to. The caller should always
4323 // check that it receives the type of symbol it was expecting.
4324 enum ResolveMode { LOOKUP_ALL, LOOKUP_TYPES };
4325 Symbol LookupSymbol(absl::string_view name, absl::string_view relative_to,
4326 DescriptorPool::PlaceholderType placeholder_type =
4327 DescriptorPool::PLACEHOLDER_MESSAGE,
4328 ResolveMode resolve_mode = LOOKUP_ALL,
4329 bool build_it = true);
4330
4331 // Like LookupSymbol() but will not return a placeholder even if
4332 // AllowUnknownDependencies() has been used.
4333 Symbol LookupSymbolNoPlaceholder(absl::string_view name,
4334 absl::string_view relative_to,
4335 ResolveMode resolve_mode = LOOKUP_ALL,
4336 bool build_it = true);
4337
4338 // Calls tables_->AddSymbol() and records an error if it fails. Returns
4339 // true if successful or false if failed, though most callers can ignore
4340 // the return value since an error has already been recorded.
4341 bool AddSymbol(absl::string_view full_name, const void* parent,
4342 absl::string_view name, const Message& proto, Symbol symbol);
4343
4344 // Like AddSymbol(), but succeeds if the symbol is already defined as long
4345 // as the existing definition is also a package (because it's OK to define
4346 // the same package in two different files). Also adds all parents of the
4347 // package to the symbol table (e.g. AddPackage("foo.bar", ...) will add
4348 // "foo.bar" and "foo" to the table).
4349 void AddPackage(absl::string_view name, const Message& proto,
4350 FileDescriptor* file, bool toplevel);
4351
4352 // Checks that the symbol name contains only alphanumeric characters and
4353 // underscores. Records an error otherwise.
4354 void ValidateSymbolName(absl::string_view name, absl::string_view full_name,
4355 const Message& proto);
4356
4357 // Allocates a copy of orig_options in tables_ and stores it in the
4358 // descriptor. Remembers its uninterpreted options, to be interpreted
4359 // later. DescriptorT must be one of the Descriptor messages from
4360 // descriptor.proto.
4361 template <class DescriptorT>
4362 void AllocateOptions(const typename DescriptorT::Proto& proto,
4363 DescriptorT* descriptor, int options_field_tag,
4364 absl::string_view option_name,
4365 internal::FlatAllocator& alloc);
4366 // Specialization for FileOptions.
4367 void AllocateOptions(const FileDescriptorProto& proto,
4368 FileDescriptor* descriptor,
4369 internal::FlatAllocator& alloc);
4370
4371 // Implementation for AllocateOptions(). Don't call this directly.
4372 template <class DescriptorT>
4373 const typename DescriptorT::OptionsType* AllocateOptionsImpl(
4374 absl::string_view name_scope, absl::string_view element_name,
4375 const typename DescriptorT::Proto& proto,
4376 absl::Span<const int> options_path, absl::string_view option_name,
4377 internal::FlatAllocator& alloc);
4378
4379 // Allocates and resolves any feature sets that need to be owned by a given
4380 // descriptor. This also strips features out of the mutable options message to
4381 // prevent leaking of unresolved features.
4382 // Note: This must be used during a pre-order traversal of the
4383 // descriptor tree, so that each descriptor's parent has a fully resolved
4384 // feature set already.
4385 template <class DescriptorT>
4386 void ResolveFeatures(const typename DescriptorT::Proto& proto,
4387 DescriptorT* descriptor,
4388 typename DescriptorT::OptionsType* options,
4389 internal::FlatAllocator& alloc);
4390 void ResolveFeatures(const FileDescriptorProto& proto,
4391 FileDescriptor* descriptor, FileOptions* options,
4392 internal::FlatAllocator& alloc);
4393 template <class DescriptorT>
4394 void ResolveFeaturesImpl(
4395 Edition edition, const typename DescriptorT::Proto& proto,
4396 DescriptorT* descriptor, typename DescriptorT::OptionsType* options,
4397 internal::FlatAllocator& alloc,
4398 DescriptorPool::ErrorCollector::ErrorLocation error_location,
4399 bool force_merge = false);
4400
4401 void PostProcessFieldFeatures(FieldDescriptor& field,
4402 const FieldDescriptorProto& proto);
4403
4404 // Allocates an array of two strings, the first one is a copy of
4405 // `proto_name`, and the second one is the full name. Full proto name is
4406 // "scope.proto_name" if scope is non-empty and "proto_name" otherwise.
4407 const std::string* AllocateNameStrings(absl::string_view scope,
4408 absl::string_view proto_name,
4409 internal::FlatAllocator& alloc);
4410
4411 // These methods all have the same signature for the sake of the BUILD_ARRAY
4412 // macro, below.
4413 void BuildMessage(const DescriptorProto& proto, const Descriptor* parent,
4414 Descriptor* result, internal::FlatAllocator& alloc);
4415 void BuildFieldOrExtension(const FieldDescriptorProto& proto,
4416 Descriptor* parent, FieldDescriptor* result,
4417 bool is_extension, internal::FlatAllocator& alloc);
BuildField(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,internal::FlatAllocator & alloc)4418 void BuildField(const FieldDescriptorProto& proto, Descriptor* parent,
4419 FieldDescriptor* result, internal::FlatAllocator& alloc) {
4420 BuildFieldOrExtension(proto, parent, result, false, alloc);
4421 }
BuildExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,internal::FlatAllocator & alloc)4422 void BuildExtension(const FieldDescriptorProto& proto, Descriptor* parent,
4423 FieldDescriptor* result, internal::FlatAllocator& alloc) {
4424 BuildFieldOrExtension(proto, parent, result, true, alloc);
4425 }
4426 void BuildExtensionRange(const DescriptorProto::ExtensionRange& proto,
4427 const Descriptor* parent,
4428 Descriptor::ExtensionRange* result,
4429 internal::FlatAllocator& alloc);
4430 void BuildReservedRange(const DescriptorProto::ReservedRange& proto,
4431 const Descriptor* parent,
4432 Descriptor::ReservedRange* result,
4433 internal::FlatAllocator& alloc);
4434 void BuildReservedRange(const EnumDescriptorProto::EnumReservedRange& proto,
4435 const EnumDescriptor* parent,
4436 EnumDescriptor::ReservedRange* result,
4437 internal::FlatAllocator& alloc);
4438 void BuildOneof(const OneofDescriptorProto& proto, Descriptor* parent,
4439 OneofDescriptor* result, internal::FlatAllocator& alloc);
4440 void BuildEnum(const EnumDescriptorProto& proto, const Descriptor* parent,
4441 EnumDescriptor* result, internal::FlatAllocator& alloc);
4442 void BuildEnumValue(const EnumValueDescriptorProto& proto,
4443 const EnumDescriptor* parent, EnumValueDescriptor* result,
4444 internal::FlatAllocator& alloc);
4445 void BuildService(const ServiceDescriptorProto& proto, const void* dummy,
4446 ServiceDescriptor* result, internal::FlatAllocator& alloc);
4447 void BuildMethod(const MethodDescriptorProto& proto,
4448 const ServiceDescriptor* parent, MethodDescriptor* result,
4449 internal::FlatAllocator& alloc);
4450
4451 void CheckFieldJsonNameUniqueness(const DescriptorProto& proto,
4452 const Descriptor* result);
4453 void CheckFieldJsonNameUniqueness(absl::string_view message_name,
4454 const DescriptorProto& message,
4455 const Descriptor* descriptor,
4456 bool use_custom_names);
4457 void CheckEnumValueUniqueness(const EnumDescriptorProto& proto,
4458 const EnumDescriptor* result);
4459
4460 void LogUnusedDependency(const FileDescriptorProto& proto,
4461 const FileDescriptor* result);
4462
4463 // Must be run only after building.
4464 //
4465 // NOTE: Options will not be available during cross-linking, as they
4466 // have not yet been interpreted. Defer any handling of options to the
4467 // Validate*Options methods.
4468 void CrossLinkFile(FileDescriptor* file, const FileDescriptorProto& proto);
4469 void CrossLinkMessage(Descriptor* message, const DescriptorProto& proto);
4470 void CrossLinkField(FieldDescriptor* field,
4471 const FieldDescriptorProto& proto);
4472 void CrossLinkService(ServiceDescriptor* service,
4473 const ServiceDescriptorProto& proto);
4474 void CrossLinkMethod(MethodDescriptor* method,
4475 const MethodDescriptorProto& proto);
4476 void SuggestFieldNumbers(FileDescriptor* file,
4477 const FileDescriptorProto& proto);
4478
4479 // Checks that the extension field matches what is declared.
4480 void CheckExtensionDeclaration(const FieldDescriptor& field,
4481 const FieldDescriptorProto& proto,
4482 absl::string_view declared_full_name,
4483 absl::string_view declared_type_name,
4484 bool is_repeated);
4485 // Checks that the extension field type matches the declared type. It also
4486 // handles message types that look like non-message types such as "fixed64" vs
4487 // ".fixed64".
4488 void CheckExtensionDeclarationFieldType(const FieldDescriptor& field,
4489 const FieldDescriptorProto& proto,
4490 absl::string_view type);
4491
4492 // A helper class for interpreting options.
4493 class OptionInterpreter {
4494 public:
4495 // Creates an interpreter that operates in the context of the pool of the
4496 // specified builder, which must not be nullptr. We don't take ownership of
4497 // the builder.
4498 explicit OptionInterpreter(DescriptorBuilder* builder);
4499 OptionInterpreter(const OptionInterpreter&) = delete;
4500 OptionInterpreter& operator=(const OptionInterpreter&) = delete;
4501
4502 ~OptionInterpreter();
4503
4504 // Interprets the uninterpreted options in the specified Options message.
4505 // On error, calls AddError() on the underlying builder and returns false.
4506 // Otherwise returns true.
4507 bool InterpretOptionExtensions(OptionsToInterpret* options_to_interpret);
4508
4509 // Interprets the uninterpreted feature options in the specified Options
4510 // message. On error, calls AddError() on the underlying builder and returns
4511 // false. Otherwise returns true.
4512 bool InterpretNonExtensionOptions(OptionsToInterpret* options_to_interpret);
4513
4514 // Updates the given source code info by re-writing uninterpreted option
4515 // locations to refer to the corresponding interpreted option.
4516 void UpdateSourceCodeInfo(SourceCodeInfo* info);
4517
4518 class AggregateOptionFinder;
4519
4520 private:
4521 bool InterpretOptionsImpl(OptionsToInterpret* options_to_interpret,
4522 bool skip_extensions);
4523
4524 // Interprets uninterpreted_option_ on the specified message, which
4525 // must be the mutable copy of the original options message to which
4526 // uninterpreted_option_ belongs. The given src_path is the source
4527 // location path to the uninterpreted option, and options_path is the
4528 // source location path to the options message. The location paths are
4529 // recorded and then used in UpdateSourceCodeInfo.
4530 // The features boolean controls whether or not we should only interpret
4531 // feature options or skip them entirely.
4532 bool InterpretSingleOption(Message* options,
4533 const std::vector<int>& src_path,
4534 const std::vector<int>& options_path,
4535 bool skip_extensions);
4536
4537 // Adds the uninterpreted_option to the given options message verbatim.
4538 // Used when AllowUnknownDependencies() is in effect and we can't find
4539 // the option's definition.
4540 void AddWithoutInterpreting(const UninterpretedOption& uninterpreted_option,
4541 Message* options);
4542
4543 // A recursive helper function that drills into the intermediate fields
4544 // in unknown_fields to check if field innermost_field is set on the
4545 // innermost message. Returns false and sets an error if so.
4546 bool ExamineIfOptionIsSet(
4547 std::vector<const FieldDescriptor*>::const_iterator
4548 intermediate_fields_iter,
4549 std::vector<const FieldDescriptor*>::const_iterator
4550 intermediate_fields_end,
4551 const FieldDescriptor* innermost_field,
4552 const std::string& debug_msg_name,
4553 const UnknownFieldSet& unknown_fields);
4554
4555 // Validates the value for the option field of the currently interpreted
4556 // option and then sets it on the unknown_field.
4557 bool SetOptionValue(const FieldDescriptor* option_field,
4558 UnknownFieldSet* unknown_fields);
4559
4560 // Parses an aggregate value for a CPPTYPE_MESSAGE option and
4561 // saves it into *unknown_fields.
4562 bool SetAggregateOption(const FieldDescriptor* option_field,
4563 UnknownFieldSet* unknown_fields);
4564
4565 // Convenience functions to set an int field the right way, depending on
4566 // its wire type (a single int CppType can represent multiple wire types).
4567 void SetInt32(int number, int32_t value, FieldDescriptor::Type type,
4568 UnknownFieldSet* unknown_fields);
4569 void SetInt64(int number, int64_t value, FieldDescriptor::Type type,
4570 UnknownFieldSet* unknown_fields);
4571 void SetUInt32(int number, uint32_t value, FieldDescriptor::Type type,
4572 UnknownFieldSet* unknown_fields);
4573 void SetUInt64(int number, uint64_t value, FieldDescriptor::Type type,
4574 UnknownFieldSet* unknown_fields);
4575
4576 // A helper function that adds an error at the specified location of the
4577 // option we're currently interpreting, and returns false.
AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,absl::FunctionRef<std::string ()> make_error)4578 bool AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,
4579 absl::FunctionRef<std::string()> make_error) {
4580 builder_->AddError(options_to_interpret_->element_name,
4581 *uninterpreted_option_, location, make_error);
4582 return false;
4583 }
4584
4585 // A helper function that adds an error at the location of the option name
4586 // and returns false.
AddNameError(absl::FunctionRef<std::string ()> make_error)4587 bool AddNameError(absl::FunctionRef<std::string()> make_error) {
4588 #ifdef PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4589 return true;
4590 #else // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4591 return AddOptionError(DescriptorPool::ErrorCollector::OPTION_NAME,
4592 make_error);
4593 #endif // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4594 }
4595
4596 // A helper function that adds an error at the location of the option name
4597 // and returns false.
AddValueError(absl::FunctionRef<std::string ()> make_error)4598 bool AddValueError(absl::FunctionRef<std::string()> make_error) {
4599 return AddOptionError(DescriptorPool::ErrorCollector::OPTION_VALUE,
4600 make_error);
4601 }
4602
4603 // We interpret against this builder's pool. Is never nullptr. We don't own
4604 // this pointer.
4605 DescriptorBuilder* builder_;
4606
4607 // The options we're currently interpreting, or nullptr if we're not in a
4608 // call to InterpretOptions.
4609 const OptionsToInterpret* options_to_interpret_;
4610
4611 // The option we're currently interpreting within options_to_interpret_, or
4612 // nullptr if we're not in a call to InterpretOptions(). This points to a
4613 // submessage of the original option, not the mutable copy. Therefore we
4614 // can use it to find locations recorded by the parser.
4615 const UninterpretedOption* uninterpreted_option_;
4616
4617 // This maps the element path of uninterpreted options to the element path
4618 // of the resulting interpreted option. This is used to modify a file's
4619 // source code info to account for option interpretation.
4620 absl::flat_hash_map<std::vector<int>, std::vector<int>> interpreted_paths_;
4621
4622 // This maps the path to a repeated option field to the known number of
4623 // elements the field contains. This is used to track the compute the
4624 // index portion of the element path when interpreting a single option.
4625 absl::flat_hash_map<std::vector<int>, int> repeated_option_counts_;
4626
4627 // Factory used to create the dynamic messages we need to parse
4628 // any aggregate option values we encounter.
4629 DynamicMessageFactory dynamic_factory_;
4630 };
4631
4632 // Work-around for broken compilers: According to the C++ standard,
4633 // OptionInterpreter should have access to the private members of any class
4634 // which has declared DescriptorBuilder as a friend. Unfortunately some old
4635 // versions of GCC and other compilers do not implement this correctly. So,
4636 // we have to have these intermediate methods to provide access. We also
4637 // redundantly declare OptionInterpreter a friend just to make things extra
4638 // clear for these bad compilers.
4639 friend class OptionInterpreter;
4640 friend class OptionInterpreter::AggregateOptionFinder;
4641
get_allow_unknown(const DescriptorPool * pool)4642 static inline bool get_allow_unknown(const DescriptorPool* pool) {
4643 return pool->allow_unknown_;
4644 }
get_enforce_weak(const DescriptorPool * pool)4645 static inline bool get_enforce_weak(const DescriptorPool* pool) {
4646 return pool->enforce_weak_;
4647 }
get_is_placeholder(const Descriptor * descriptor)4648 static inline bool get_is_placeholder(const Descriptor* descriptor) {
4649 return descriptor != nullptr && descriptor->is_placeholder_;
4650 }
assert_mutex_held(const DescriptorPool * pool)4651 static inline void assert_mutex_held(const DescriptorPool* pool) {
4652 if (pool->mutex_ != nullptr) {
4653 pool->mutex_->AssertHeld();
4654 }
4655 }
4656
4657 // Must be run only after options have been interpreted.
4658 //
4659 // NOTE: Validation code must only reference the options in the mutable
4660 // descriptors, which are the ones that have been interpreted. The const
4661 // proto references are passed in only so they can be provided to calls to
4662 // AddError(). Do not look at their options, which have not been interpreted.
4663 void ValidateOptions(const FileDescriptor* file,
4664 const FileDescriptorProto& proto);
4665 void ValidateFileFeatures(const FileDescriptor* file,
4666 const FileDescriptorProto& proto);
4667 void ValidateOptions(const Descriptor* message, const DescriptorProto& proto);
4668 void ValidateOptions(const OneofDescriptor* oneof,
4669 const OneofDescriptorProto& proto);
4670 void ValidateOptions(const FieldDescriptor* field,
4671 const FieldDescriptorProto& proto);
4672 void ValidateFieldFeatures(const FieldDescriptor* field,
4673 const FieldDescriptorProto& proto);
4674 void ValidateOptions(const EnumDescriptor* enm,
4675 const EnumDescriptorProto& proto);
4676 void ValidateOptions(const EnumValueDescriptor* enum_value,
4677 const EnumValueDescriptorProto& proto);
ValidateOptions(const Descriptor::ExtensionRange * range,const DescriptorProto::ExtensionRange & proto)4678 void ValidateOptions(const Descriptor::ExtensionRange* range,
4679 const DescriptorProto::ExtensionRange& proto) {}
4680 void ValidateExtensionRangeOptions(const DescriptorProto& proto,
4681 const Descriptor& message);
4682 void ValidateExtensionDeclaration(
4683 absl::string_view full_name,
4684 const RepeatedPtrField<ExtensionRangeOptions_Declaration>& declarations,
4685 const DescriptorProto_ExtensionRange& proto,
4686 absl::flat_hash_set<absl::string_view>& full_name_set);
4687 void ValidateOptions(const ServiceDescriptor* service,
4688 const ServiceDescriptorProto& proto);
4689 void ValidateOptions(const MethodDescriptor* method,
4690 const MethodDescriptorProto& proto);
4691 void ValidateProto3(const FileDescriptor* file,
4692 const FileDescriptorProto& proto);
4693 void ValidateProto3Message(const Descriptor* message,
4694 const DescriptorProto& proto);
4695 void ValidateProto3Field(const FieldDescriptor* field,
4696 const FieldDescriptorProto& proto);
4697
4698 // Returns true if the map entry message is compatible with the
4699 // auto-generated entry message from map fields syntax.
4700 bool ValidateMapEntry(const FieldDescriptor* field,
4701 const FieldDescriptorProto& proto);
4702
4703 // Recursively detects naming conflicts with map entry types for a
4704 // better error message.
4705 void DetectMapConflicts(const Descriptor* message,
4706 const DescriptorProto& proto);
4707
4708 void ValidateJSType(const FieldDescriptor* field,
4709 const FieldDescriptorProto& proto);
4710 };
4711
BuildFile(const FileDescriptorProto & proto)4712 const FileDescriptor* DescriptorPool::BuildFile(
4713 const FileDescriptorProto& proto) {
4714 return BuildFileCollectingErrors(proto, nullptr);
4715 }
4716
BuildFileCollectingErrors(const FileDescriptorProto & proto,ErrorCollector * error_collector)4717 const FileDescriptor* DescriptorPool::BuildFileCollectingErrors(
4718 const FileDescriptorProto& proto, ErrorCollector* error_collector) {
4719 ABSL_CHECK(fallback_database_ == nullptr)
4720 << "Cannot call BuildFile on a DescriptorPool that uses a "
4721 "DescriptorDatabase. You must instead find a way to get your file "
4722 "into the underlying database.";
4723 ABSL_CHECK(mutex_ == nullptr); // Implied by the above ABSL_CHECK.
4724 tables_->known_bad_symbols_.clear();
4725 tables_->known_bad_files_.clear();
4726 build_started_ = true;
4727 DeferredValidation deferred_validation(this, error_collector);
4728 const FileDescriptor* file =
4729 DescriptorBuilder::New(this, tables_.get(), deferred_validation,
4730 error_collector)
4731 ->BuildFile(proto);
4732 if (deferred_validation.Validate()) {
4733 return file;
4734 }
4735 return nullptr;
4736 }
4737
BuildFileFromDatabase(const FileDescriptorProto & proto,DeferredValidation & deferred_validation) const4738 const FileDescriptor* DescriptorPool::BuildFileFromDatabase(
4739 const FileDescriptorProto& proto,
4740 DeferredValidation& deferred_validation) const {
4741 mutex_->AssertHeld();
4742 build_started_ = true;
4743 if (tables_->known_bad_files_.contains(proto.name())) {
4744 return nullptr;
4745 }
4746 const FileDescriptor* result;
4747 const auto build_file = [&] {
4748 result = DescriptorBuilder::New(this, tables_.get(), deferred_validation,
4749 default_error_collector_)
4750 ->BuildFile(proto);
4751 };
4752 if (dispatcher_ != nullptr) {
4753 (*dispatcher_)(build_file);
4754 } else {
4755 build_file();
4756 }
4757 if (result == nullptr) {
4758 tables_->known_bad_files_.insert(proto.name());
4759 }
4760 return result;
4761 }
4762
SetFeatureSetDefaults(FeatureSetDefaults spec)4763 absl::Status DescriptorPool::SetFeatureSetDefaults(FeatureSetDefaults spec) {
4764 if (build_started_) {
4765 return absl::FailedPreconditionError(
4766 "Feature set defaults can't be changed once the pool has started "
4767 "building.");
4768 }
4769 if (spec.minimum_edition() > spec.maximum_edition()) {
4770 return absl::InvalidArgumentError(
4771 absl::StrCat("Invalid edition range ", spec.minimum_edition(), " to ",
4772 spec.maximum_edition(), "."));
4773 }
4774 Edition prev_edition = EDITION_UNKNOWN;
4775 for (const auto& edition_default : spec.defaults()) {
4776 if (edition_default.edition() == EDITION_UNKNOWN) {
4777 return absl::InvalidArgumentError(absl::StrCat(
4778 "Invalid edition ", edition_default.edition(), " specified."));
4779 }
4780 if (edition_default.edition() <= prev_edition) {
4781 return absl::InvalidArgumentError(absl::StrCat(
4782 "Feature set defaults are not strictly increasing. Edition ",
4783 prev_edition, " is greater than or equal to edition ",
4784 edition_default.edition(), "."));
4785 }
4786 prev_edition = edition_default.edition();
4787 }
4788 feature_set_defaults_spec_ =
4789 absl::make_unique<FeatureSetDefaults>(std::move(spec));
4790 return absl::OkStatus();
4791 }
4792
DescriptorBuilder(const DescriptorPool * pool,DescriptorPool::Tables * tables,DescriptorPool::DeferredValidation & deferred_validation,DescriptorPool::ErrorCollector * error_collector)4793 DescriptorBuilder::DescriptorBuilder(
4794 const DescriptorPool* pool, DescriptorPool::Tables* tables,
4795 DescriptorPool::DeferredValidation& deferred_validation,
4796 DescriptorPool::ErrorCollector* error_collector)
4797 : pool_(pool),
4798 tables_(tables),
4799 deferred_validation_(deferred_validation),
4800 error_collector_(error_collector),
4801 had_errors_(false),
4802 possible_undeclared_dependency_(nullptr),
4803 undefine_resolved_name_("") {
4804 // Ensure that any lazily loaded static initializers from the generated pool
4805 // (e.g. from bootstrapped protos) are run before building any descriptors. We
4806 // have to avoid registering these pre-main, because we need to ensure that
4807 // the linker --gc-sections step can strip out the full runtime if it is
4808 // unused.
4809 PROTOBUF_UNUSED static std::true_type lazy_register =
4810 (internal::ExtensionSet::RegisterMessageExtension(
4811 &FeatureSet::default_instance(), pb::cpp.number(),
4812 FieldDescriptor::TYPE_MESSAGE, false, false,
4813 &pb::CppFeatures::default_instance(),
4814 nullptr,
4815 internal::LazyAnnotation::kUndefined),
4816 std::true_type{});
4817 }
4818
4819 DescriptorBuilder::~DescriptorBuilder() = default;
4820
AddError(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,absl::FunctionRef<std::string ()> make_error)4821 PROTOBUF_NOINLINE void DescriptorBuilder::AddError(
4822 const absl::string_view element_name, const Message& descriptor,
4823 DescriptorPool::ErrorCollector::ErrorLocation location,
4824 absl::FunctionRef<std::string()> make_error) {
4825 std::string error = make_error();
4826 if (error_collector_ == nullptr) {
4827 if (!had_errors_) {
4828 ABSL_LOG(ERROR) << "Invalid proto descriptor for file \"" << filename_
4829 << "\":";
4830 }
4831 ABSL_LOG(ERROR) << " " << element_name << ": " << error;
4832 } else {
4833 error_collector_->RecordError(filename_, element_name, &descriptor,
4834 location, error);
4835 }
4836 had_errors_ = true;
4837 }
4838
AddError(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const char * error)4839 PROTOBUF_NOINLINE void DescriptorBuilder::AddError(
4840 const absl::string_view element_name, const Message& descriptor,
4841 DescriptorPool::ErrorCollector::ErrorLocation location, const char* error) {
4842 AddError(element_name, descriptor, location, [error] { return error; });
4843 }
4844
AddNotDefinedError(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const absl::string_view undefined_symbol)4845 PROTOBUF_NOINLINE void DescriptorBuilder::AddNotDefinedError(
4846 const absl::string_view element_name, const Message& descriptor,
4847 DescriptorPool::ErrorCollector::ErrorLocation location,
4848 const absl::string_view undefined_symbol) {
4849 if (possible_undeclared_dependency_ == nullptr &&
4850 undefine_resolved_name_.empty()) {
4851 AddError(element_name, descriptor, location, [&] {
4852 return absl::StrCat("\"", undefined_symbol, "\" is not defined.");
4853 });
4854 } else {
4855 if (possible_undeclared_dependency_ != nullptr) {
4856 AddError(element_name, descriptor, location, [&] {
4857 return absl::StrCat("\"", possible_undeclared_dependency_name_,
4858 "\" seems to be defined in \"",
4859 possible_undeclared_dependency_->name(),
4860 "\", which is not "
4861 "imported by \"",
4862 filename_,
4863 "\". To use it here, please "
4864 "add the necessary import.");
4865 });
4866 }
4867 if (!undefine_resolved_name_.empty()) {
4868 AddError(element_name, descriptor, location, [&] {
4869 return absl::StrCat(
4870 "\"", undefined_symbol, "\" is resolved to \"",
4871 undefine_resolved_name_,
4872 "\", which is not defined. "
4873 "The innermost scope is searched first in name resolution. "
4874 "Consider using a leading '.'(i.e., \".",
4875 undefined_symbol, "\") to start from the outermost scope.");
4876 });
4877 }
4878 }
4879 }
4880
AddWarning(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,absl::FunctionRef<std::string ()> make_error)4881 PROTOBUF_NOINLINE void DescriptorBuilder::AddWarning(
4882 const absl::string_view element_name, const Message& descriptor,
4883 DescriptorPool::ErrorCollector::ErrorLocation location,
4884 absl::FunctionRef<std::string()> make_error) {
4885 std::string error = make_error();
4886 if (error_collector_ == nullptr) {
4887 ABSL_LOG(WARNING) << filename_ << " " << element_name << ": " << error;
4888 } else {
4889 error_collector_->RecordWarning(filename_, element_name, &descriptor,
4890 location, error);
4891 }
4892 }
4893
AddWarning(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const char * error)4894 PROTOBUF_NOINLINE void DescriptorBuilder::AddWarning(
4895 const absl::string_view element_name, const Message& descriptor,
4896 DescriptorPool::ErrorCollector::ErrorLocation location, const char* error) {
4897 AddWarning(element_name, descriptor, location,
4898 [error]() -> std::string { return error; });
4899 }
4900
IsInPackage(const FileDescriptor * file,absl::string_view package_name)4901 bool DescriptorBuilder::IsInPackage(const FileDescriptor* file,
4902 absl::string_view package_name) {
4903 return absl::StartsWith(file->package(), package_name) &&
4904 (file->package().size() == package_name.size() ||
4905 file->package()[package_name.size()] == '.');
4906 }
4907
RecordPublicDependencies(const FileDescriptor * file)4908 void DescriptorBuilder::RecordPublicDependencies(const FileDescriptor* file) {
4909 if (file == nullptr || !dependencies_.insert(file).second) return;
4910 for (int i = 0; file != nullptr && i < file->public_dependency_count(); i++) {
4911 RecordPublicDependencies(file->public_dependency(i));
4912 }
4913 }
4914
FindSymbolNotEnforcingDepsHelper(const DescriptorPool * pool,const absl::string_view name,bool build_it)4915 Symbol DescriptorBuilder::FindSymbolNotEnforcingDepsHelper(
4916 const DescriptorPool* pool, const absl::string_view name, bool build_it) {
4917 // If we are looking at an underlay, we must lock its mutex_, since we are
4918 // accessing the underlay's tables_ directly.
4919 absl::MutexLockMaybe lock((pool == pool_) ? nullptr : pool->mutex_);
4920
4921 Symbol result = pool->tables_->FindSymbol(name);
4922 if (result.IsNull() && pool->underlay_ != nullptr) {
4923 // Symbol not found; check the underlay.
4924 result = FindSymbolNotEnforcingDepsHelper(pool->underlay_, name);
4925 }
4926
4927 if (result.IsNull()) {
4928 // With lazily_build_dependencies_, a symbol lookup at cross link time is
4929 // not guaranteed to be successful. In most cases, build_it will be false,
4930 // which intentionally prevents us from building an import until it's
4931 // actually needed. In some cases, like registering an extension, we want
4932 // to build the file containing the symbol, and build_it will be set.
4933 // Also, build_it will be true when !lazily_build_dependencies_, to provide
4934 // better error reporting of missing dependencies.
4935 if (build_it &&
4936 pool->TryFindSymbolInFallbackDatabase(name, deferred_validation_)) {
4937 result = pool->tables_->FindSymbol(name);
4938 }
4939 }
4940
4941 return result;
4942 }
4943
FindSymbolNotEnforcingDeps(const absl::string_view name,bool build_it)4944 Symbol DescriptorBuilder::FindSymbolNotEnforcingDeps(
4945 const absl::string_view name, bool build_it) {
4946 Symbol result = FindSymbolNotEnforcingDepsHelper(pool_, name, build_it);
4947 // Only find symbols which were defined in this file or one of its
4948 // dependencies.
4949 const FileDescriptor* file = result.GetFile();
4950 if ((file == file_ || dependencies_.contains(file)) && !result.IsPackage()) {
4951 unused_dependency_.erase(file);
4952 }
4953 return result;
4954 }
4955
FindSymbol(const absl::string_view name,bool build_it)4956 Symbol DescriptorBuilder::FindSymbol(const absl::string_view name,
4957 bool build_it) {
4958 Symbol result = FindSymbolNotEnforcingDeps(name, build_it);
4959
4960 if (result.IsNull()) return result;
4961
4962 if (!pool_->enforce_dependencies_) {
4963 // Hack for CompilerUpgrader, and also used for lazily_build_dependencies_
4964 return result;
4965 }
4966
4967 // Only find symbols which were defined in this file or one of its
4968 // dependencies.
4969 const FileDescriptor* file = result.GetFile();
4970 if (file == file_ || dependencies_.contains(file)) {
4971 return result;
4972 }
4973
4974 if (result.IsPackage()) {
4975 // Arg, this is overcomplicated. The symbol is a package name. It could
4976 // be that the package was defined in multiple files. result.GetFile()
4977 // returns the first file we saw that used this package. We've determined
4978 // that that file is not a direct dependency of the file we are currently
4979 // building, but it could be that some other file which *is* a direct
4980 // dependency also defines the same package. We can't really rule out this
4981 // symbol unless none of the dependencies define it.
4982 if (IsInPackage(file_, name)) return result;
4983 for (const auto* dep : dependencies_) {
4984 // Note: A dependency may be nullptr if it was not found or had errors.
4985 if (dep != nullptr && IsInPackage(dep, name)) return result;
4986 }
4987 }
4988
4989 possible_undeclared_dependency_ = file;
4990 possible_undeclared_dependency_name_ = std::string(name);
4991 return Symbol();
4992 }
4993
LookupSymbolNoPlaceholder(const absl::string_view name,const absl::string_view relative_to,ResolveMode resolve_mode,bool build_it)4994 Symbol DescriptorBuilder::LookupSymbolNoPlaceholder(
4995 const absl::string_view name, const absl::string_view relative_to,
4996 ResolveMode resolve_mode, bool build_it) {
4997 possible_undeclared_dependency_ = nullptr;
4998 undefine_resolved_name_.clear();
4999
5000 if (!name.empty() && name[0] == '.') {
5001 // Fully-qualified name.
5002 return FindSymbol(name.substr(1), build_it);
5003 }
5004
5005 // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
5006 // defined in multiple parent scopes, we only want to find "Bar.baz" in the
5007 // innermost one. E.g., the following should produce an error:
5008 // message Bar { message Baz {} }
5009 // message Foo {
5010 // message Bar {
5011 // }
5012 // optional Bar.Baz baz = 1;
5013 // }
5014 // So, we look for just "Foo" first, then look for "Bar.baz" within it if
5015 // found.
5016 std::string::size_type name_dot_pos = name.find_first_of('.');
5017 absl::string_view first_part_of_name;
5018 if (name_dot_pos == std::string::npos) {
5019 first_part_of_name = name;
5020 } else {
5021 first_part_of_name = name.substr(0, name_dot_pos);
5022 }
5023
5024 std::string scope_to_try(relative_to);
5025
5026 while (true) {
5027 // Chop off the last component of the scope.
5028 std::string::size_type dot_pos = scope_to_try.find_last_of('.');
5029 if (dot_pos == std::string::npos) {
5030 return FindSymbol(name, build_it);
5031 } else {
5032 scope_to_try.erase(dot_pos);
5033 }
5034
5035 // Append ".first_part_of_name" and try to find.
5036 std::string::size_type old_size = scope_to_try.size();
5037 absl::StrAppend(&scope_to_try, ".", first_part_of_name);
5038 Symbol result = FindSymbol(scope_to_try, build_it);
5039 if (!result.IsNull()) {
5040 if (first_part_of_name.size() < name.size()) {
5041 // name is a compound symbol, of which we only found the first part.
5042 // Now try to look up the rest of it.
5043 if (result.IsAggregate()) {
5044 absl::StrAppend(&scope_to_try,
5045 name.substr(first_part_of_name.size()));
5046 result = FindSymbol(scope_to_try, build_it);
5047 if (result.IsNull()) {
5048 undefine_resolved_name_ = scope_to_try;
5049 }
5050 return result;
5051 } else {
5052 // We found a symbol but it's not an aggregate. Continue the loop.
5053 }
5054 } else {
5055 if (resolve_mode == LOOKUP_TYPES && !result.IsType()) {
5056 // We found a symbol but it's not a type. Continue the loop.
5057 } else {
5058 return result;
5059 }
5060 }
5061 }
5062
5063 // Not found. Remove the name so we can try again.
5064 scope_to_try.erase(old_size);
5065 }
5066 }
5067
LookupSymbol(const absl::string_view name,const absl::string_view relative_to,DescriptorPool::PlaceholderType placeholder_type,ResolveMode resolve_mode,bool build_it)5068 Symbol DescriptorBuilder::LookupSymbol(
5069 const absl::string_view name, const absl::string_view relative_to,
5070 DescriptorPool::PlaceholderType placeholder_type, ResolveMode resolve_mode,
5071 bool build_it) {
5072 Symbol result =
5073 LookupSymbolNoPlaceholder(name, relative_to, resolve_mode, build_it);
5074 if (result.IsNull() && pool_->allow_unknown_) {
5075 // Not found, but AllowUnknownDependencies() is enabled. Return a
5076 // placeholder instead.
5077 result = pool_->NewPlaceholderWithMutexHeld(name, placeholder_type);
5078 }
5079 return result;
5080 }
5081
ValidateQualifiedName(absl::string_view name)5082 static bool ValidateQualifiedName(absl::string_view name) {
5083 bool last_was_period = false;
5084
5085 for (char character : name) {
5086 // I don't trust isalnum() due to locales. :(
5087 if (('a' <= character && character <= 'z') ||
5088 ('A' <= character && character <= 'Z') ||
5089 ('0' <= character && character <= '9') || (character == '_')) {
5090 last_was_period = false;
5091 } else if (character == '.') {
5092 if (last_was_period) return false;
5093 last_was_period = true;
5094 } else {
5095 return false;
5096 }
5097 }
5098
5099 return !name.empty() && !last_was_period;
5100 }
5101
NewPlaceholder(absl::string_view name,PlaceholderType placeholder_type) const5102 Symbol DescriptorPool::NewPlaceholder(absl::string_view name,
5103 PlaceholderType placeholder_type) const {
5104 absl::MutexLockMaybe lock(mutex_);
5105 return NewPlaceholderWithMutexHeld(name, placeholder_type);
5106 }
5107
NewPlaceholderWithMutexHeld(absl::string_view name,PlaceholderType placeholder_type) const5108 Symbol DescriptorPool::NewPlaceholderWithMutexHeld(
5109 absl::string_view name, PlaceholderType placeholder_type) const {
5110 if (mutex_) {
5111 mutex_->AssertHeld();
5112 }
5113 // Compute names.
5114 absl::string_view placeholder_full_name;
5115 absl::string_view placeholder_name;
5116 const std::string* placeholder_package;
5117
5118 if (!ValidateQualifiedName(name)) return Symbol();
5119 if (name[0] == '.') {
5120 // Fully-qualified.
5121 placeholder_full_name = name.substr(1);
5122 } else {
5123 placeholder_full_name = name;
5124 }
5125
5126 // Create the placeholders.
5127 internal::FlatAllocator alloc;
5128 alloc.PlanArray<FileDescriptor>(1);
5129 alloc.PlanArray<std::string>(2);
5130 if (placeholder_type == PLACEHOLDER_ENUM) {
5131 alloc.PlanArray<EnumDescriptor>(1);
5132 alloc.PlanArray<EnumValueDescriptor>(1);
5133 alloc.PlanArray<std::string>(2); // names for the descriptor.
5134 alloc.PlanArray<std::string>(2); // names for the value.
5135 } else {
5136 alloc.PlanArray<Descriptor>(1);
5137 alloc.PlanArray<std::string>(2); // names for the descriptor.
5138 if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
5139 alloc.PlanArray<Descriptor::ExtensionRange>(1);
5140 }
5141 }
5142 alloc.FinalizePlanning(tables_);
5143
5144 const std::string::size_type dotpos = placeholder_full_name.find_last_of('.');
5145 if (dotpos != std::string::npos) {
5146 placeholder_package =
5147 alloc.AllocateStrings(placeholder_full_name.substr(0, dotpos));
5148 placeholder_name = placeholder_full_name.substr(dotpos + 1);
5149 } else {
5150 placeholder_package = alloc.AllocateStrings("");
5151 placeholder_name = placeholder_full_name;
5152 }
5153
5154 FileDescriptor* placeholder_file = NewPlaceholderFileWithMutexHeld(
5155 absl::StrCat(placeholder_full_name, ".placeholder.proto"), alloc);
5156 placeholder_file->package_ = placeholder_package;
5157
5158 if (placeholder_type == PLACEHOLDER_ENUM) {
5159 placeholder_file->enum_type_count_ = 1;
5160 placeholder_file->enum_types_ = alloc.AllocateArray<EnumDescriptor>(1);
5161
5162 EnumDescriptor* placeholder_enum = &placeholder_file->enum_types_[0];
5163 memset(static_cast<void*>(placeholder_enum), 0, sizeof(*placeholder_enum));
5164
5165 placeholder_enum->all_names_ =
5166 alloc.AllocateStrings(placeholder_name, placeholder_full_name);
5167 placeholder_enum->file_ = placeholder_file;
5168 placeholder_enum->options_ = &EnumOptions::default_instance();
5169 placeholder_enum->proto_features_ = &FeatureSet::default_instance();
5170 placeholder_enum->merged_features_ = &FeatureSet::default_instance();
5171 placeholder_enum->is_placeholder_ = true;
5172 placeholder_enum->is_unqualified_placeholder_ = (name[0] != '.');
5173
5174 // Enums must have at least one value.
5175 placeholder_enum->value_count_ = 1;
5176 placeholder_enum->values_ = alloc.AllocateArray<EnumValueDescriptor>(1);
5177 // Disable fast-path lookup for this enum.
5178 placeholder_enum->sequential_value_limit_ = -1;
5179
5180 EnumValueDescriptor* placeholder_value = &placeholder_enum->values_[0];
5181 memset(static_cast<void*>(placeholder_value), 0,
5182 sizeof(*placeholder_value));
5183
5184 // Note that enum value names are siblings of their type, not children.
5185 placeholder_value->all_names_ = alloc.AllocateStrings(
5186 "PLACEHOLDER_VALUE",
5187 placeholder_package->empty()
5188 ? "PLACEHOLDER_VALUE"
5189 : absl::StrCat(*placeholder_package, ".PLACEHOLDER_VALUE"));
5190
5191 placeholder_value->number_ = 0;
5192 placeholder_value->type_ = placeholder_enum;
5193 placeholder_value->options_ = &EnumValueOptions::default_instance();
5194
5195 return Symbol(placeholder_enum);
5196 } else {
5197 placeholder_file->message_type_count_ = 1;
5198 placeholder_file->message_types_ = alloc.AllocateArray<Descriptor>(1);
5199
5200 Descriptor* placeholder_message = &placeholder_file->message_types_[0];
5201 memset(static_cast<void*>(placeholder_message), 0,
5202 sizeof(*placeholder_message));
5203
5204 placeholder_message->all_names_ =
5205 alloc.AllocateStrings(placeholder_name, placeholder_full_name);
5206 placeholder_message->file_ = placeholder_file;
5207 placeholder_message->options_ = &MessageOptions::default_instance();
5208 placeholder_message->proto_features_ = &FeatureSet::default_instance();
5209 placeholder_message->merged_features_ = &FeatureSet::default_instance();
5210 placeholder_message->is_placeholder_ = true;
5211 placeholder_message->is_unqualified_placeholder_ = (name[0] != '.');
5212
5213 if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
5214 placeholder_message->extension_range_count_ = 1;
5215 placeholder_message->extension_ranges_ =
5216 alloc.AllocateArray<Descriptor::ExtensionRange>(1);
5217 placeholder_message->extension_ranges_[0].start_ = 1;
5218 // kMaxNumber + 1 because ExtensionRange::end is exclusive.
5219 placeholder_message->extension_ranges_[0].end_ =
5220 FieldDescriptor::kMaxNumber + 1;
5221 placeholder_message->extension_ranges_[0].options_ = nullptr;
5222 placeholder_message->extension_ranges_[0].proto_features_ =
5223 &FeatureSet::default_instance();
5224 placeholder_message->extension_ranges_[0].merged_features_ =
5225 &FeatureSet::default_instance();
5226 }
5227
5228 return Symbol(placeholder_message);
5229 }
5230 }
5231
NewPlaceholderFile(const absl::string_view name) const5232 FileDescriptor* DescriptorPool::NewPlaceholderFile(
5233 const absl::string_view name) const {
5234 absl::MutexLockMaybe lock(mutex_);
5235 internal::FlatAllocator alloc;
5236 alloc.PlanArray<FileDescriptor>(1);
5237 alloc.PlanArray<std::string>(1);
5238 alloc.FinalizePlanning(tables_);
5239
5240 return NewPlaceholderFileWithMutexHeld(name, alloc);
5241 }
5242
NewPlaceholderFileWithMutexHeld(const absl::string_view name,internal::FlatAllocator & alloc) const5243 FileDescriptor* DescriptorPool::NewPlaceholderFileWithMutexHeld(
5244 const absl::string_view name, internal::FlatAllocator& alloc) const {
5245 if (mutex_) {
5246 mutex_->AssertHeld();
5247 }
5248 FileDescriptor* placeholder = alloc.AllocateArray<FileDescriptor>(1);
5249 memset(static_cast<void*>(placeholder), 0, sizeof(*placeholder));
5250
5251 placeholder->name_ = alloc.AllocateStrings(name);
5252 placeholder->package_ = &internal::GetEmptyString();
5253 placeholder->pool_ = this;
5254 placeholder->options_ = &FileOptions::default_instance();
5255 placeholder->proto_features_ = &FeatureSet::default_instance();
5256 placeholder->merged_features_ = &FeatureSet::default_instance();
5257 placeholder->tables_ = &FileDescriptorTables::GetEmptyInstance();
5258 placeholder->source_code_info_ = &SourceCodeInfo::default_instance();
5259 placeholder->is_placeholder_ = true;
5260 placeholder->finished_building_ = true;
5261 // All other fields are zero or nullptr.
5262
5263 return placeholder;
5264 }
5265
AddSymbol(const absl::string_view full_name,const void * parent,const absl::string_view name,const Message & proto,Symbol symbol)5266 bool DescriptorBuilder::AddSymbol(const absl::string_view full_name,
5267 const void* parent,
5268 const absl::string_view name,
5269 const Message& proto, Symbol symbol) {
5270 // If the caller passed nullptr for the parent, the symbol is at file scope.
5271 // Use its file as the parent instead.
5272 if (parent == nullptr) parent = file_;
5273
5274 if (absl::StrContains(full_name, '\0')) {
5275 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5276 return absl::StrCat("\"", full_name, "\" contains null character.");
5277 });
5278 return false;
5279 }
5280 if (tables_->AddSymbol(full_name, symbol)) {
5281 if (!file_tables_->AddAliasUnderParent(parent, name, symbol)) {
5282 // This is only possible if there was already an error adding something of
5283 // the same name.
5284 if (!had_errors_) {
5285 ABSL_DLOG(FATAL) << "\"" << full_name
5286 << "\" not previously defined in "
5287 "symbols_by_name_, but was defined in "
5288 "symbols_by_parent_; this shouldn't be possible.";
5289 }
5290 return false;
5291 }
5292 return true;
5293 } else {
5294 const FileDescriptor* other_file = tables_->FindSymbol(full_name).GetFile();
5295 if (other_file == file_) {
5296 std::string::size_type dot_pos = full_name.find_last_of('.');
5297 if (dot_pos == std::string::npos) {
5298 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5299 return absl::StrCat("\"", full_name, "\" is already defined.");
5300 });
5301 } else {
5302 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5303 return absl::StrCat("\"", full_name.substr(dot_pos + 1),
5304 "\" is already defined in \"",
5305 full_name.substr(0, dot_pos), "\".");
5306 });
5307 }
5308 } else {
5309 // Symbol seems to have been defined in a different file.
5310 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5311 return absl::StrCat(
5312 "\"", full_name, "\" is already defined in file \"",
5313 (other_file == nullptr ? "null" : other_file->name()), "\".");
5314 });
5315 }
5316 return false;
5317 }
5318 }
5319
AddPackage(const absl::string_view name,const Message & proto,FileDescriptor * file,bool toplevel)5320 void DescriptorBuilder::AddPackage(const absl::string_view name,
5321 const Message& proto, FileDescriptor* file,
5322 bool toplevel) {
5323 if (absl::StrContains(name, '\0')) {
5324 AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5325 return absl::StrCat("\"", name, "\" contains null character.");
5326 });
5327 return;
5328 }
5329
5330 Symbol existing_symbol = tables_->FindSymbol(name);
5331 // It's OK to redefine a package.
5332 if (existing_symbol.IsNull()) {
5333 if (toplevel) {
5334 // It is the toplevel package name, so insert the descriptor directly.
5335 tables_->AddSymbol(file->package(), Symbol(file));
5336 } else {
5337 auto* package = tables_->Allocate<Symbol::Subpackage>();
5338 // If the name is the package name, then it is already in the arena.
5339 // If not, copy it there. It came from the call to AddPackage below.
5340 package->name_size = static_cast<int>(name.size());
5341 package->file = file;
5342 tables_->AddSymbol(name, Symbol(package));
5343 }
5344 // Also add parent package, if any.
5345 std::string::size_type dot_pos = name.find_last_of('.');
5346 if (dot_pos == std::string::npos) {
5347 // No parents.
5348 ValidateSymbolName(name, name, proto);
5349 } else {
5350 // Has parent.
5351 AddPackage(name.substr(0, dot_pos), proto, file, false);
5352 ValidateSymbolName(name.substr(dot_pos + 1), name, proto);
5353 }
5354 } else if (!existing_symbol.IsPackage()) {
5355 // Symbol seems to have been defined in a different file.
5356 const FileDescriptor* other_file = existing_symbol.GetFile();
5357 AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5358 return absl::StrCat("\"", name,
5359 "\" is already defined (as something other than "
5360 "a package) in file \"",
5361 (other_file == nullptr ? "null" : other_file->name()),
5362 "\".");
5363 });
5364 }
5365 }
5366
ValidateSymbolName(const absl::string_view name,const absl::string_view full_name,const Message & proto)5367 void DescriptorBuilder::ValidateSymbolName(const absl::string_view name,
5368 const absl::string_view full_name,
5369 const Message& proto) {
5370 if (name.empty()) {
5371 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
5372 "Missing name.");
5373 } else {
5374 for (char character : name) {
5375 // I don't trust isalnum() due to locales. :(
5376 if ((character < 'a' || 'z' < character) &&
5377 (character < 'A' || 'Z' < character) &&
5378 (character < '0' || '9' < character) && (character != '_')) {
5379 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5380 return absl::StrCat("\"", name, "\" is not a valid identifier.");
5381 });
5382 return;
5383 }
5384 }
5385 }
5386 }
5387
5388 // -------------------------------------------------------------------
5389
5390 // This generic implementation is good for all descriptors except
5391 // FileDescriptor.
5392 template <class DescriptorT>
AllocateOptions(const typename DescriptorT::Proto & proto,DescriptorT * descriptor,int options_field_tag,absl::string_view option_name,internal::FlatAllocator & alloc)5393 void DescriptorBuilder::AllocateOptions(
5394 const typename DescriptorT::Proto& proto, DescriptorT* descriptor,
5395 int options_field_tag, absl::string_view option_name,
5396 internal::FlatAllocator& alloc) {
5397 std::vector<int> options_path;
5398 descriptor->GetLocationPath(&options_path);
5399 options_path.push_back(options_field_tag);
5400 auto options = AllocateOptionsImpl<DescriptorT>(
5401 descriptor->full_name(), descriptor->full_name(), proto, options_path,
5402 option_name, alloc);
5403 descriptor->options_ = options;
5404 descriptor->proto_features_ = &FeatureSet::default_instance();
5405 descriptor->merged_features_ = &FeatureSet::default_instance();
5406 }
5407
5408 // We specialize for FileDescriptor.
AllocateOptions(const FileDescriptorProto & proto,FileDescriptor * descriptor,internal::FlatAllocator & alloc)5409 void DescriptorBuilder::AllocateOptions(const FileDescriptorProto& proto,
5410 FileDescriptor* descriptor,
5411 internal::FlatAllocator& alloc) {
5412 std::vector<int> options_path;
5413 options_path.push_back(FileDescriptorProto::kOptionsFieldNumber);
5414 // We add the dummy token so that LookupSymbol does the right thing.
5415 auto options = AllocateOptionsImpl<FileDescriptor>(
5416 absl::StrCat(descriptor->package(), ".dummy"), descriptor->name(), proto,
5417 options_path, "google.protobuf.FileOptions", alloc);
5418 descriptor->options_ = options;
5419 descriptor->proto_features_ = &FeatureSet::default_instance();
5420 descriptor->merged_features_ = &FeatureSet::default_instance();
5421 }
5422
5423 template <class DescriptorT>
AllocateOptionsImpl(absl::string_view name_scope,absl::string_view element_name,const typename DescriptorT::Proto & proto,absl::Span<const int> options_path,absl::string_view option_name,internal::FlatAllocator & alloc)5424 const typename DescriptorT::OptionsType* DescriptorBuilder::AllocateOptionsImpl(
5425 absl::string_view name_scope, absl::string_view element_name,
5426 const typename DescriptorT::Proto& proto,
5427 absl::Span<const int> options_path, absl::string_view option_name,
5428 internal::FlatAllocator& alloc) {
5429 if (!proto.has_options()) {
5430 return &DescriptorT::OptionsType::default_instance();
5431 }
5432 const typename DescriptorT::OptionsType& orig_options = proto.options();
5433
5434 auto* options = alloc.AllocateArray<typename DescriptorT::OptionsType>(1);
5435
5436 if (!orig_options.IsInitialized()) {
5437 AddError(absl::StrCat(name_scope, ".", element_name), orig_options,
5438 DescriptorPool::ErrorCollector::OPTION_NAME,
5439 "Uninterpreted option is missing name or value.");
5440 return &DescriptorT::OptionsType::default_instance();
5441 }
5442
5443 const bool parse_success =
5444 internal::ParseNoReflection(orig_options.SerializeAsString(), *options);
5445 ABSL_DCHECK(parse_success);
5446
5447 // Don't add to options_to_interpret_ unless there were uninterpreted
5448 // options. This not only avoids unnecessary work, but prevents a
5449 // bootstrapping problem when building descriptors for descriptor.proto.
5450 // descriptor.proto does not contain any uninterpreted options, but
5451 // attempting to interpret options anyway will cause
5452 // OptionsType::GetDescriptor() to be called which may then deadlock since
5453 // we're still trying to build it.
5454 if (options->uninterpreted_option_size() > 0) {
5455 options_to_interpret_.push_back(OptionsToInterpret(
5456 name_scope, element_name, options_path, &orig_options, options));
5457 }
5458
5459 // If the custom option is in unknown fields, no need to interpret it.
5460 // Remove the dependency file from unused_dependency.
5461 const UnknownFieldSet& unknown_fields = orig_options.unknown_fields();
5462 if (!unknown_fields.empty()) {
5463 // Can not use options->GetDescriptor() which may case deadlock.
5464 Symbol msg_symbol = tables_->FindSymbol(option_name);
5465 if (msg_symbol.type() == Symbol::MESSAGE) {
5466 for (int i = 0; i < unknown_fields.field_count(); ++i) {
5467 assert_mutex_held(pool_);
5468 const FieldDescriptor* field =
5469 pool_->InternalFindExtensionByNumberNoLock(
5470 msg_symbol.descriptor(), unknown_fields.field(i).number());
5471 if (field) {
5472 unused_dependency_.erase(field->file());
5473 }
5474 }
5475 }
5476 }
5477 return options;
5478 }
5479
5480 template <class ProtoT, class OptionsT>
InferLegacyProtoFeatures(const ProtoT & proto,const OptionsT & options,Edition edition,FeatureSet & features)5481 static void InferLegacyProtoFeatures(const ProtoT& proto,
5482 const OptionsT& options, Edition edition,
5483 FeatureSet& features) {}
5484
InferLegacyProtoFeatures(const FieldDescriptorProto & proto,const FieldOptions & options,Edition edition,FeatureSet & features)5485 static void InferLegacyProtoFeatures(const FieldDescriptorProto& proto,
5486 const FieldOptions& options,
5487 Edition edition, FeatureSet& features) {
5488 if (!features.MutableExtension(pb::cpp)->has_string_type()) {
5489 if (options.ctype() == FieldOptions::CORD) {
5490 features.MutableExtension(pb::cpp)->set_string_type(
5491 pb::CppFeatures::CORD);
5492 }
5493 }
5494
5495 // Everything below is specifically for proto2/proto.
5496 if (!IsLegacyEdition(edition)) return;
5497
5498 if (proto.label() == FieldDescriptorProto::LABEL_REQUIRED) {
5499 features.set_field_presence(FeatureSet::LEGACY_REQUIRED);
5500 }
5501 if (proto.type() == FieldDescriptorProto::TYPE_GROUP) {
5502 features.set_message_encoding(FeatureSet::DELIMITED);
5503 }
5504 if (options.packed()) {
5505 features.set_repeated_field_encoding(FeatureSet::PACKED);
5506 }
5507 if (edition == Edition::EDITION_PROTO3) {
5508 if (options.has_packed() && !options.packed()) {
5509 features.set_repeated_field_encoding(FeatureSet::EXPANDED);
5510 }
5511 }
5512 }
5513
5514 // TODO: we should update proto code to not need ctype to be set
5515 // when string_type is set.
EnforceCTypeStringTypeConsistency(Edition edition,FieldDescriptor::CppType type,const pb::CppFeatures & cpp_features,FieldOptions & options)5516 static void EnforceCTypeStringTypeConsistency(
5517 Edition edition, FieldDescriptor::CppType type,
5518 const pb::CppFeatures& cpp_features, FieldOptions& options) {
5519 if (&options == &FieldOptions::default_instance()) return;
5520 if (type == FieldDescriptor::CPPTYPE_STRING) {
5521 switch (cpp_features.string_type()) {
5522 case pb::CppFeatures::CORD:
5523 options.set_ctype(FieldOptions::CORD);
5524 break;
5525 default:
5526 break;
5527 }
5528 }
5529 }
5530
5531 template <class DescriptorT>
ResolveFeaturesImpl(Edition edition,const typename DescriptorT::Proto & proto,DescriptorT * descriptor,typename DescriptorT::OptionsType * options,internal::FlatAllocator & alloc,DescriptorPool::ErrorCollector::ErrorLocation error_location,bool force_merge)5532 void DescriptorBuilder::ResolveFeaturesImpl(
5533 Edition edition, const typename DescriptorT::Proto& proto,
5534 DescriptorT* descriptor, typename DescriptorT::OptionsType* options,
5535 internal::FlatAllocator& alloc,
5536 DescriptorPool::ErrorCollector::ErrorLocation error_location,
5537 bool force_merge) {
5538 const FeatureSet& parent_features = GetParentFeatures(descriptor);
5539 descriptor->proto_features_ = &FeatureSet::default_instance();
5540 descriptor->merged_features_ = &FeatureSet::default_instance();
5541
5542 ABSL_CHECK(feature_resolver_.has_value());
5543
5544 if (options->has_features()) {
5545 // Remove the features from the child's options proto to avoid leaking
5546 // internal details.
5547 descriptor->proto_features_ =
5548 tables_->InternFeatureSet(std::move(*options->mutable_features()));
5549 options->clear_features();
5550 }
5551
5552 FeatureSet base_features = *descriptor->proto_features_;
5553
5554 // Handle feature inference from proto2/proto3.
5555 if (IsLegacyEdition(edition)) {
5556 if (descriptor->proto_features_ != &FeatureSet::default_instance()) {
5557 AddError(descriptor->name(), proto, error_location,
5558 "Features are only valid under editions.");
5559 }
5560 }
5561 InferLegacyProtoFeatures(proto, *options, edition, base_features);
5562
5563 if (base_features.ByteSizeLong() == 0 && !force_merge) {
5564 // Nothing to merge, and we aren't forcing it.
5565 descriptor->merged_features_ = &parent_features;
5566 return;
5567 }
5568
5569 // Calculate the merged features for this target.
5570 absl::StatusOr<FeatureSet> merged =
5571 feature_resolver_->MergeFeatures(parent_features, base_features);
5572 if (!merged.ok()) {
5573 AddError(descriptor->name(), proto, error_location,
5574 [&] { return std::string(merged.status().message()); });
5575 return;
5576 }
5577
5578 descriptor->merged_features_ = tables_->InternFeatureSet(*std::move(merged));
5579 }
5580
5581 template <class DescriptorT>
ResolveFeatures(const typename DescriptorT::Proto & proto,DescriptorT * descriptor,typename DescriptorT::OptionsType * options,internal::FlatAllocator & alloc)5582 void DescriptorBuilder::ResolveFeatures(
5583 const typename DescriptorT::Proto& proto, DescriptorT* descriptor,
5584 typename DescriptorT::OptionsType* options,
5585 internal::FlatAllocator& alloc) {
5586 ResolveFeaturesImpl(descriptor->file()->edition(), proto, descriptor, options,
5587 alloc, DescriptorPool::ErrorCollector::NAME);
5588 }
5589
ResolveFeatures(const FileDescriptorProto & proto,FileDescriptor * descriptor,FileOptions * options,internal::FlatAllocator & alloc)5590 void DescriptorBuilder::ResolveFeatures(const FileDescriptorProto& proto,
5591 FileDescriptor* descriptor,
5592 FileOptions* options,
5593 internal::FlatAllocator& alloc) {
5594 // File descriptors always need their own merged feature set, even without
5595 // any explicit features.
5596 ResolveFeaturesImpl(descriptor->edition(), proto, descriptor, options, alloc,
5597 DescriptorPool::ErrorCollector::EDITIONS,
5598 /*force_merge=*/true);
5599 }
5600
PostProcessFieldFeatures(FieldDescriptor & field,const FieldDescriptorProto & proto)5601 void DescriptorBuilder::PostProcessFieldFeatures(
5602 FieldDescriptor& field, const FieldDescriptorProto& proto) {
5603 // TODO This can be replace by a runtime check in `is_required`
5604 // once the `label` getter is hidden.
5605 if (field.features().field_presence() == FeatureSet::LEGACY_REQUIRED &&
5606 field.label_ == FieldDescriptor::LABEL_OPTIONAL) {
5607 field.label_ = FieldDescriptor::LABEL_REQUIRED;
5608 }
5609 // TODO This can be replace by a runtime check of `is_delimited`
5610 // once the `TYPE_GROUP` value is removed.
5611 if (field.type_ == FieldDescriptor::TYPE_MESSAGE &&
5612 !field.containing_type()->options().map_entry() &&
5613 field.features().message_encoding() == FeatureSet::DELIMITED) {
5614 Symbol type =
5615 LookupSymbol(proto.type_name(), field.full_name(),
5616 DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_TYPES, false);
5617 if (type.descriptor() == nullptr ||
5618 !type.descriptor()->options().map_entry()) {
5619 field.type_ = FieldDescriptor::TYPE_GROUP;
5620 }
5621 }
5622 }
5623
5624 // A common pattern: We want to convert a repeated field in the descriptor
5625 // to an array of values, calling some method to build each value.
5626 #define BUILD_ARRAY(INPUT, OUTPUT, NAME, METHOD, PARENT) \
5627 OUTPUT->NAME##_count_ = INPUT.NAME##_size(); \
5628 OUTPUT->NAME##s_ = alloc.AllocateArray< \
5629 typename std::remove_pointer<decltype(OUTPUT->NAME##s_)>::type>( \
5630 INPUT.NAME##_size()); \
5631 for (int i = 0; i < INPUT.NAME##_size(); i++) { \
5632 METHOD(INPUT.NAME(i), PARENT, OUTPUT->NAME##s_ + i, alloc); \
5633 }
5634
AddRecursiveImportError(const FileDescriptorProto & proto,int from_here)5635 PROTOBUF_NOINLINE void DescriptorBuilder::AddRecursiveImportError(
5636 const FileDescriptorProto& proto, int from_here) {
5637 auto make_error = [&] {
5638 std::string error_message("File recursively imports itself: ");
5639 for (size_t i = from_here; i < tables_->pending_files_.size(); i++) {
5640 error_message.append(tables_->pending_files_[i]);
5641 error_message.append(" -> ");
5642 }
5643 error_message.append(proto.name());
5644 return error_message;
5645 };
5646
5647 if (static_cast<size_t>(from_here) < tables_->pending_files_.size() - 1) {
5648 AddError(tables_->pending_files_[from_here + 1], proto,
5649 DescriptorPool::ErrorCollector::IMPORT, make_error);
5650 } else {
5651 AddError(proto.name(), proto, DescriptorPool::ErrorCollector::IMPORT,
5652 make_error);
5653 }
5654 }
5655
AddTwiceListedError(const FileDescriptorProto & proto,int index)5656 void DescriptorBuilder::AddTwiceListedError(const FileDescriptorProto& proto,
5657 int index) {
5658 AddError(proto.dependency(index), proto,
5659 DescriptorPool::ErrorCollector::IMPORT, [&] {
5660 return absl::StrCat("Import \"", proto.dependency(index),
5661 "\" was listed twice.");
5662 });
5663 }
5664
AddImportError(const FileDescriptorProto & proto,int index)5665 void DescriptorBuilder::AddImportError(const FileDescriptorProto& proto,
5666 int index) {
5667 auto make_error = [&] {
5668 if (pool_->fallback_database_ == nullptr) {
5669 return absl::StrCat("Import \"", proto.dependency(index),
5670 "\" has not been loaded.");
5671 }
5672 return absl::StrCat("Import \"", proto.dependency(index),
5673 "\" was not found or had errors.");
5674 };
5675 AddError(proto.dependency(index), proto,
5676 DescriptorPool::ErrorCollector::IMPORT, make_error);
5677 }
5678
ExistingFileMatchesProto(Edition edition,const FileDescriptor * existing_file,const FileDescriptorProto & proto)5679 PROTOBUF_NOINLINE static bool ExistingFileMatchesProto(
5680 Edition edition, const FileDescriptor* existing_file,
5681 const FileDescriptorProto& proto) {
5682 FileDescriptorProto existing_proto;
5683 existing_file->CopyTo(&existing_proto);
5684 if (edition == Edition::EDITION_PROTO2 && proto.has_syntax()) {
5685 existing_proto.set_syntax("proto2");
5686 }
5687
5688 return existing_proto.SerializeAsString() == proto.SerializeAsString();
5689 }
5690
5691 // These PlanAllocationSize functions will gather into the FlatAllocator all the
5692 // necessary memory allocations that BuildXXX functions below will do on the
5693 // Tables object.
5694 // They *must* be kept in sync. If we miss some PlanArray call we won't have
5695 // enough memory and will ABSL_CHECK-fail.
PlanAllocationSize(const RepeatedPtrField<EnumValueDescriptorProto> & values,internal::FlatAllocator & alloc)5696 static void PlanAllocationSize(
5697 const RepeatedPtrField<EnumValueDescriptorProto>& values,
5698 internal::FlatAllocator& alloc) {
5699 alloc.PlanArray<EnumValueDescriptor>(values.size());
5700 alloc.PlanArray<std::string>(2 * values.size()); // name + full_name
5701 for (const auto& v : values) {
5702 if (v.has_options()) alloc.PlanArray<EnumValueOptions>(1);
5703 }
5704 }
5705
PlanAllocationSize(const RepeatedPtrField<EnumDescriptorProto> & enums,internal::FlatAllocator & alloc)5706 static void PlanAllocationSize(
5707 const RepeatedPtrField<EnumDescriptorProto>& enums,
5708 internal::FlatAllocator& alloc) {
5709 alloc.PlanArray<EnumDescriptor>(enums.size());
5710 alloc.PlanArray<std::string>(2 * enums.size()); // name + full_name
5711 for (const auto& e : enums) {
5712 if (e.has_options()) alloc.PlanArray<EnumOptions>(1);
5713 PlanAllocationSize(e.value(), alloc);
5714 alloc.PlanArray<EnumDescriptor::ReservedRange>(e.reserved_range_size());
5715 alloc.PlanArray<const std::string*>(e.reserved_name_size());
5716 alloc.PlanArray<std::string>(e.reserved_name_size());
5717 }
5718 }
5719
PlanAllocationSize(const RepeatedPtrField<OneofDescriptorProto> & oneofs,internal::FlatAllocator & alloc)5720 static void PlanAllocationSize(
5721 const RepeatedPtrField<OneofDescriptorProto>& oneofs,
5722 internal::FlatAllocator& alloc) {
5723 alloc.PlanArray<OneofDescriptor>(oneofs.size());
5724 alloc.PlanArray<std::string>(2 * oneofs.size()); // name + full_name
5725 for (const auto& oneof : oneofs) {
5726 if (oneof.has_options()) alloc.PlanArray<OneofOptions>(1);
5727 }
5728 }
5729
PlanAllocationSize(const RepeatedPtrField<FieldDescriptorProto> & fields,internal::FlatAllocator & alloc)5730 static void PlanAllocationSize(
5731 const RepeatedPtrField<FieldDescriptorProto>& fields,
5732 internal::FlatAllocator& alloc) {
5733 alloc.PlanArray<FieldDescriptor>(fields.size());
5734 for (const auto& field : fields) {
5735 if (field.has_options()) alloc.PlanArray<FieldOptions>(1);
5736 alloc.PlanFieldNames(field.name(),
5737 field.has_json_name() ? &field.json_name() : nullptr);
5738 if (field.has_default_value() && field.has_type() &&
5739 (field.type() == FieldDescriptorProto::TYPE_STRING ||
5740 field.type() == FieldDescriptorProto::TYPE_BYTES)) {
5741 // For the default string value.
5742 alloc.PlanArray<std::string>(1);
5743 }
5744 }
5745 }
5746
PlanAllocationSize(const RepeatedPtrField<DescriptorProto::ExtensionRange> & ranges,internal::FlatAllocator & alloc)5747 static void PlanAllocationSize(
5748 const RepeatedPtrField<DescriptorProto::ExtensionRange>& ranges,
5749 internal::FlatAllocator& alloc) {
5750 alloc.PlanArray<Descriptor::ExtensionRange>(ranges.size());
5751 for (const auto& r : ranges) {
5752 if (r.has_options()) alloc.PlanArray<ExtensionRangeOptions>(1);
5753 }
5754 }
5755
PlanAllocationSize(const RepeatedPtrField<DescriptorProto> & messages,internal::FlatAllocator & alloc)5756 static void PlanAllocationSize(
5757 const RepeatedPtrField<DescriptorProto>& messages,
5758 internal::FlatAllocator& alloc) {
5759 alloc.PlanArray<Descriptor>(messages.size());
5760 alloc.PlanArray<std::string>(2 * messages.size()); // name + full_name
5761
5762 for (const auto& message : messages) {
5763 if (message.has_options()) alloc.PlanArray<MessageOptions>(1);
5764 PlanAllocationSize(message.nested_type(), alloc);
5765 PlanAllocationSize(message.field(), alloc);
5766 PlanAllocationSize(message.extension(), alloc);
5767 PlanAllocationSize(message.extension_range(), alloc);
5768 alloc.PlanArray<Descriptor::ReservedRange>(message.reserved_range_size());
5769 alloc.PlanArray<const std::string*>(message.reserved_name_size());
5770 alloc.PlanArray<std::string>(message.reserved_name_size());
5771 PlanAllocationSize(message.enum_type(), alloc);
5772 PlanAllocationSize(message.oneof_decl(), alloc);
5773 }
5774 }
5775
PlanAllocationSize(const RepeatedPtrField<MethodDescriptorProto> & methods,internal::FlatAllocator & alloc)5776 static void PlanAllocationSize(
5777 const RepeatedPtrField<MethodDescriptorProto>& methods,
5778 internal::FlatAllocator& alloc) {
5779 alloc.PlanArray<MethodDescriptor>(methods.size());
5780 alloc.PlanArray<std::string>(2 * methods.size()); // name + full_name
5781 for (const auto& m : methods) {
5782 if (m.has_options()) alloc.PlanArray<MethodOptions>(1);
5783 }
5784 }
5785
PlanAllocationSize(const RepeatedPtrField<ServiceDescriptorProto> & services,internal::FlatAllocator & alloc)5786 static void PlanAllocationSize(
5787 const RepeatedPtrField<ServiceDescriptorProto>& services,
5788 internal::FlatAllocator& alloc) {
5789 alloc.PlanArray<ServiceDescriptor>(services.size());
5790 alloc.PlanArray<std::string>(2 * services.size()); // name + full_name
5791 for (const auto& service : services) {
5792 if (service.has_options()) alloc.PlanArray<ServiceOptions>(1);
5793 PlanAllocationSize(service.method(), alloc);
5794 }
5795 }
5796
PlanAllocationSize(const FileDescriptorProto & proto,internal::FlatAllocator & alloc)5797 static void PlanAllocationSize(const FileDescriptorProto& proto,
5798 internal::FlatAllocator& alloc) {
5799 alloc.PlanArray<FileDescriptor>(1);
5800 alloc.PlanArray<FileDescriptorTables>(1);
5801 alloc.PlanArray<std::string>(2); // name + package
5802 if (proto.has_options()) alloc.PlanArray<FileOptions>(1);
5803 if (proto.has_source_code_info()) alloc.PlanArray<SourceCodeInfo>(1);
5804
5805 PlanAllocationSize(proto.service(), alloc);
5806 PlanAllocationSize(proto.message_type(), alloc);
5807 PlanAllocationSize(proto.enum_type(), alloc);
5808 PlanAllocationSize(proto.extension(), alloc);
5809
5810 alloc.PlanArray<int>(proto.weak_dependency_size());
5811 alloc.PlanArray<int>(proto.public_dependency_size());
5812 alloc.PlanArray<const FileDescriptor*>(proto.dependency_size());
5813 }
5814
BuildFile(const FileDescriptorProto & proto)5815 const FileDescriptor* DescriptorBuilder::BuildFile(
5816 const FileDescriptorProto& proto) {
5817 // Ensure the generated pool has been lazily initialized. This is most
5818 // important for protos that use C++-specific features, since that extension
5819 // is only registered lazily and we always parse options into the generated
5820 // pool.
5821 if (pool_ != DescriptorPool::internal_generated_pool()) {
5822 DescriptorPool::generated_pool();
5823 }
5824
5825 filename_ = proto.name();
5826
5827 // Check if the file already exists and is identical to the one being built.
5828 // Note: This only works if the input is canonical -- that is, it
5829 // fully-qualifies all type names, has no UninterpretedOptions, etc.
5830 // This is fine, because this idempotency "feature" really only exists to
5831 // accommodate one hack in the proto1->proto2 migration layer.
5832 const FileDescriptor* existing_file = tables_->FindFile(filename_);
5833 if (existing_file != nullptr) {
5834 // File already in pool. Compare the existing one to the input.
5835 if (ExistingFileMatchesProto(existing_file->edition(), existing_file,
5836 proto)) {
5837 // They're identical. Return the existing descriptor.
5838 return existing_file;
5839 }
5840
5841 // Not a match. The error will be detected and handled later.
5842 }
5843
5844 // Check to see if this file is already on the pending files list.
5845 // TODO: Allow recursive imports? It may not work with some
5846 // (most?) programming languages. E.g., in C++, a forward declaration
5847 // of a type is not sufficient to allow it to be used even in a
5848 // generated header file due to inlining. This could perhaps be
5849 // worked around using tricks involving inserting #include statements
5850 // mid-file, but that's pretty ugly, and I'm pretty sure there are
5851 // some languages out there that do not allow recursive dependencies
5852 // at all.
5853 for (size_t i = 0; i < tables_->pending_files_.size(); i++) {
5854 if (tables_->pending_files_[i] == proto.name()) {
5855 AddRecursiveImportError(proto, i);
5856 return nullptr;
5857 }
5858 }
5859
5860 static const int kMaximumPackageLength = 511;
5861 if (proto.package().size() > kMaximumPackageLength) {
5862 AddError(proto.package(), proto, DescriptorPool::ErrorCollector::NAME,
5863 "Package name is too long");
5864 return nullptr;
5865 }
5866
5867 // If we have a fallback_database_, and we aren't doing lazy import building,
5868 // attempt to load all dependencies now, before checkpointing tables_. This
5869 // avoids confusion with recursive checkpoints.
5870 if (!pool_->lazily_build_dependencies_) {
5871 if (pool_->fallback_database_ != nullptr) {
5872 tables_->pending_files_.push_back(proto.name());
5873 for (int i = 0; i < proto.dependency_size(); i++) {
5874 if (tables_->FindFile(proto.dependency(i)) == nullptr &&
5875 (pool_->underlay_ == nullptr ||
5876 pool_->underlay_->FindFileByName(proto.dependency(i)) ==
5877 nullptr)) {
5878 // We don't care what this returns since we'll find out below anyway.
5879 pool_->TryFindFileInFallbackDatabase(proto.dependency(i),
5880 deferred_validation_);
5881 }
5882 }
5883 tables_->pending_files_.pop_back();
5884 }
5885 }
5886
5887 // Checkpoint the tables so that we can roll back if something goes wrong.
5888 tables_->AddCheckpoint();
5889
5890 auto alloc = absl::make_unique<internal::FlatAllocator>();
5891 PlanAllocationSize(proto, *alloc);
5892 alloc->FinalizePlanning(tables_);
5893 FileDescriptor* result = BuildFileImpl(proto, *alloc);
5894
5895 file_tables_->FinalizeTables();
5896 if (result) {
5897 tables_->ClearLastCheckpoint();
5898 result->finished_building_ = true;
5899 alloc->ExpectConsumed();
5900 } else {
5901 tables_->RollbackToLastCheckpoint(deferred_validation_);
5902 }
5903
5904 return result;
5905 }
5906
BuildFileImpl(const FileDescriptorProto & proto,internal::FlatAllocator & alloc)5907 FileDescriptor* DescriptorBuilder::BuildFileImpl(
5908 const FileDescriptorProto& proto, internal::FlatAllocator& alloc) {
5909 FileDescriptor* result = alloc.AllocateArray<FileDescriptor>(1);
5910 file_ = result;
5911
5912 if (proto.has_edition()) {
5913 file_->edition_ = proto.edition();
5914 } else if (proto.syntax().empty() || proto.syntax() == "proto2") {
5915 file_->edition_ = Edition::EDITION_PROTO2;
5916 } else if (proto.syntax() == "proto3") {
5917 file_->edition_ = Edition::EDITION_PROTO3;
5918 } else {
5919 file_->edition_ = Edition::EDITION_UNKNOWN;
5920 AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER, [&] {
5921 return absl::StrCat("Unrecognized syntax: ", proto.syntax());
5922 });
5923 }
5924
5925 const FeatureSetDefaults& defaults =
5926 pool_->feature_set_defaults_spec_ == nullptr
5927 ? GetCppFeatureSetDefaults()
5928 : *pool_->feature_set_defaults_spec_;
5929
5930 absl::StatusOr<FeatureResolver> feature_resolver =
5931 FeatureResolver::Create(file_->edition_, defaults);
5932 if (!feature_resolver.ok()) {
5933 AddError(proto.name(), proto, DescriptorPool::ErrorCollector::EDITIONS,
5934 [&] { return std::string(feature_resolver.status().message()); });
5935 } else {
5936 feature_resolver_.emplace(std::move(feature_resolver).value());
5937 }
5938
5939 result->is_placeholder_ = false;
5940 result->finished_building_ = false;
5941 SourceCodeInfo* info = nullptr;
5942 if (proto.has_source_code_info()) {
5943 info = alloc.AllocateArray<SourceCodeInfo>(1);
5944 *info = proto.source_code_info();
5945 result->source_code_info_ = info;
5946 } else {
5947 result->source_code_info_ = &SourceCodeInfo::default_instance();
5948 }
5949
5950 file_tables_ = alloc.AllocateArray<FileDescriptorTables>(1);
5951 file_->tables_ = file_tables_;
5952
5953 if (!proto.has_name()) {
5954 AddError("", proto, DescriptorPool::ErrorCollector::OTHER,
5955 "Missing field: FileDescriptorProto.name.");
5956 }
5957
5958 result->name_ = alloc.AllocateStrings(proto.name());
5959 if (proto.has_package()) {
5960 result->package_ = alloc.AllocateStrings(proto.package());
5961 } else {
5962 // We cannot rely on proto.package() returning a valid string if
5963 // proto.has_package() is false, because we might be running at static
5964 // initialization time, in which case default values have not yet been
5965 // initialized.
5966 result->package_ = alloc.AllocateStrings("");
5967 }
5968 result->pool_ = pool_;
5969
5970 if (absl::StrContains(result->name(), '\0')) {
5971 AddError(result->name(), proto, DescriptorPool::ErrorCollector::NAME, [&] {
5972 return absl::StrCat("\"", result->name(), "\" contains null character.");
5973 });
5974 return nullptr;
5975 }
5976
5977 // Add to tables.
5978 if (!tables_->AddFile(result)) {
5979 AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5980 "A file with this name is already in the pool.");
5981 // Bail out early so that if this is actually the exact same file, we
5982 // don't end up reporting that every single symbol is already defined.
5983 return nullptr;
5984 }
5985 if (!result->package().empty()) {
5986 if (std::count(result->package().begin(), result->package().end(), '.') >
5987 kPackageLimit) {
5988 AddError(result->package(), proto, DescriptorPool::ErrorCollector::NAME,
5989 "Exceeds Maximum Package Depth");
5990 return nullptr;
5991 }
5992 AddPackage(result->package(), proto, result, true);
5993 }
5994
5995 // Make sure all dependencies are loaded.
5996 absl::flat_hash_set<absl::string_view> seen_dependencies;
5997 result->dependency_count_ = proto.dependency_size();
5998 result->dependencies_ =
5999 alloc.AllocateArray<const FileDescriptor*>(proto.dependency_size());
6000 result->dependencies_once_ = nullptr;
6001 unused_dependency_.clear();
6002 absl::flat_hash_set<int> weak_deps;
6003 for (int i = 0; i < proto.weak_dependency_size(); ++i) {
6004 weak_deps.insert(proto.weak_dependency(i));
6005 }
6006
6007 bool need_lazy_deps = false;
6008 for (int i = 0; i < proto.dependency_size(); i++) {
6009 if (!seen_dependencies.insert(proto.dependency(i)).second) {
6010 AddTwiceListedError(proto, i);
6011 }
6012
6013 const FileDescriptor* dependency = tables_->FindFile(proto.dependency(i));
6014 if (dependency == nullptr && pool_->underlay_ != nullptr) {
6015 dependency = pool_->underlay_->FindFileByName(proto.dependency(i));
6016 }
6017
6018 if (dependency == result) {
6019 // Recursive import. dependency/result is not fully initialized, and it's
6020 // dangerous to try to do anything with it. The recursive import error
6021 // will be detected and reported in DescriptorBuilder::BuildFile().
6022 return nullptr;
6023 }
6024
6025 if (dependency == nullptr) {
6026 if (!pool_->lazily_build_dependencies_) {
6027 if (pool_->allow_unknown_ ||
6028 (!pool_->enforce_weak_ && weak_deps.contains(i))) {
6029 internal::FlatAllocator lazy_dep_alloc;
6030 lazy_dep_alloc.PlanArray<FileDescriptor>(1);
6031 lazy_dep_alloc.PlanArray<std::string>(1);
6032 lazy_dep_alloc.FinalizePlanning(tables_);
6033 dependency = pool_->NewPlaceholderFileWithMutexHeld(
6034 proto.dependency(i), lazy_dep_alloc);
6035 } else {
6036 AddImportError(proto, i);
6037 }
6038 }
6039 } else {
6040 // Add to unused_dependency_ to track unused imported files.
6041 // Note: do not track unused imported files for public import.
6042 if (pool_->enforce_dependencies_ &&
6043 (pool_->direct_input_files_.find(proto.name()) !=
6044 pool_->direct_input_files_.end()) &&
6045 (dependency->public_dependency_count() == 0)) {
6046 unused_dependency_.insert(dependency);
6047 }
6048 }
6049
6050 result->dependencies_[i] = dependency;
6051 if (pool_->lazily_build_dependencies_ && !dependency) {
6052 need_lazy_deps = true;
6053 }
6054 }
6055 if (need_lazy_deps) {
6056 int total_char_size = 0;
6057 for (int i = 0; i < proto.dependency_size(); i++) {
6058 if (result->dependencies_[i] == nullptr) {
6059 total_char_size += static_cast<int>(proto.dependency(i).size());
6060 }
6061 ++total_char_size; // For NUL char
6062 }
6063
6064 void* data = tables_->AllocateBytes(
6065 static_cast<int>(sizeof(absl::once_flag)) + total_char_size);
6066 result->dependencies_once_ = ::new (data) absl::once_flag{};
6067 char* name_data = reinterpret_cast<char*>(result->dependencies_once_ + 1);
6068
6069 for (int i = 0; i < proto.dependency_size(); i++) {
6070 if (result->dependencies_[i] == nullptr) {
6071 memcpy(name_data, proto.dependency(i).data(),
6072 proto.dependency(i).size());
6073 name_data += proto.dependency(i).size();
6074 }
6075 *name_data++ = '\0';
6076 }
6077 }
6078
6079 // Check public dependencies.
6080 int public_dependency_count = 0;
6081 result->public_dependencies_ =
6082 alloc.AllocateArray<int>(proto.public_dependency_size());
6083 for (int i = 0; i < proto.public_dependency_size(); i++) {
6084 // Only put valid public dependency indexes.
6085 int index = proto.public_dependency(i);
6086 if (index >= 0 && index < proto.dependency_size()) {
6087 result->public_dependencies_[public_dependency_count++] = index;
6088 // Do not track unused imported files for public import.
6089 // Calling dependency(i) builds that file when doing lazy imports,
6090 // need to avoid doing this. Unused dependency detection isn't done
6091 // when building lazily, anyways.
6092 if (!pool_->lazily_build_dependencies_) {
6093 unused_dependency_.erase(result->dependency(index));
6094 }
6095 } else {
6096 AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
6097 "Invalid public dependency index.");
6098 }
6099 }
6100 result->public_dependency_count_ = public_dependency_count;
6101
6102 // Build dependency set
6103 dependencies_.clear();
6104 // We don't/can't do proper dependency error checking when
6105 // lazily_build_dependencies_, and calling dependency(i) will force
6106 // a dependency to be built, which we don't want.
6107 if (!pool_->lazily_build_dependencies_) {
6108 for (int i = 0; i < result->dependency_count(); i++) {
6109 RecordPublicDependencies(result->dependency(i));
6110 }
6111 }
6112
6113 // Check weak dependencies.
6114 int weak_dependency_count = 0;
6115 result->weak_dependencies_ =
6116 alloc.AllocateArray<int>(proto.weak_dependency_size());
6117 for (int i = 0; i < proto.weak_dependency_size(); i++) {
6118 int index = proto.weak_dependency(i);
6119 if (index >= 0 && index < proto.dependency_size()) {
6120 result->weak_dependencies_[weak_dependency_count++] = index;
6121 } else {
6122 AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
6123 "Invalid weak dependency index.");
6124 }
6125 }
6126 result->weak_dependency_count_ = weak_dependency_count;
6127
6128 // Convert children.
6129 BUILD_ARRAY(proto, result, message_type, BuildMessage, nullptr);
6130 BUILD_ARRAY(proto, result, enum_type, BuildEnum, nullptr);
6131 BUILD_ARRAY(proto, result, service, BuildService, nullptr);
6132 BUILD_ARRAY(proto, result, extension, BuildExtension, nullptr);
6133
6134 // Copy options.
6135 AllocateOptions(proto, result, alloc);
6136
6137 // Note that the following steps must occur in exactly the specified order.
6138
6139 // Cross-link.
6140 CrossLinkFile(result, proto);
6141
6142 if (!message_hints_.empty()) {
6143 SuggestFieldNumbers(result, proto);
6144 }
6145
6146 // Interpret only the non-extension options first, including features and
6147 // their extensions. This has to be done in two passes, since option
6148 // extensions defined in this file may have features attached to them.
6149 if (!had_errors_) {
6150 OptionInterpreter option_interpreter(this);
6151 for (std::vector<OptionsToInterpret>::iterator iter =
6152 options_to_interpret_.begin();
6153 iter != options_to_interpret_.end(); ++iter) {
6154 option_interpreter.InterpretNonExtensionOptions(&(*iter));
6155 }
6156
6157 // TODO: move this check back to generator.cc once we no longer
6158 // need to set both ctype and string_type internally.
6159 internal::VisitDescriptors(
6160 *result, proto,
6161 [&](const FieldDescriptor& field, const FieldDescriptorProto& proto) {
6162 if (field.options_->has_ctype() && field.options_->features()
6163 .GetExtension(pb::cpp)
6164 .has_string_type()) {
6165 AddError(field.full_name(), proto,
6166 DescriptorPool::ErrorCollector::TYPE, [&] {
6167 return absl::StrFormat(
6168 "Field %s specifies both string_type and ctype "
6169 "which is not supported.",
6170 field.full_name());
6171 });
6172 }
6173 });
6174
6175 // Handle feature resolution. This must occur after option interpretation,
6176 // but before validation.
6177 {
6178 auto cleanup = DisableTracking();
6179 internal::VisitDescriptors(
6180 *result, proto, [&](const auto& descriptor, const auto& proto) {
6181 using OptionsT =
6182 typename std::remove_const<typename std::remove_pointer<
6183 decltype(descriptor.options_)>::type>::type;
6184 using DescriptorT =
6185 typename std::remove_const<typename std::remove_reference<
6186 decltype(descriptor)>::type>::type;
6187
6188 ResolveFeatures(
6189 proto, const_cast<DescriptorT*>(&descriptor),
6190 const_cast< // NOLINT(google3-runtime-proto-const-cast)
6191 OptionsT*>(descriptor.options_),
6192 alloc);
6193 });
6194 }
6195
6196 internal::VisitDescriptors(*result, [&](const FieldDescriptor& field) {
6197 if (result->edition() >= Edition::EDITION_2024 &&
6198 field.options().has_ctype()) {
6199 // "ctype" is no longer supported in edition 2024 and beyond.
6200 AddError(
6201 field.full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6202 "ctype option is not allowed under edition 2024 and beyond. Use "
6203 "the feature string_type = VIEW|CORD|STRING|... instead.");
6204 }
6205 EnforceCTypeStringTypeConsistency(
6206 field.file()->edition(), field.cpp_type(),
6207 field.merged_features_->GetExtension(pb::cpp),
6208 const_cast< // NOLINT(google3-runtime-proto-const-cast)
6209 FieldOptions&>(*field.options_));
6210 });
6211
6212 // Post-process cleanup for field features.
6213 internal::VisitDescriptors(
6214 *result, proto,
6215 [&](const FieldDescriptor& field, const FieldDescriptorProto& proto) {
6216 PostProcessFieldFeatures(const_cast<FieldDescriptor&>(field), proto);
6217 });
6218
6219 // Interpret any remaining uninterpreted options gathered into
6220 // options_to_interpret_ during descriptor building. Cross-linking has made
6221 // extension options known, so all interpretations should now succeed.
6222 for (std::vector<OptionsToInterpret>::iterator iter =
6223 options_to_interpret_.begin();
6224 iter != options_to_interpret_.end(); ++iter) {
6225 option_interpreter.InterpretOptionExtensions(&(*iter));
6226 }
6227 options_to_interpret_.clear();
6228 if (info != nullptr) {
6229 option_interpreter.UpdateSourceCodeInfo(info);
6230 }
6231 }
6232
6233 // Validate options. See comments at InternalSetLazilyBuildDependencies about
6234 // error checking and lazy import building.
6235 if (!had_errors_ && !pool_->lazily_build_dependencies_) {
6236 internal::VisitDescriptors(
6237 *result, proto, [&](const auto& descriptor, const auto& desc_proto) {
6238 ValidateOptions(&descriptor, desc_proto);
6239 });
6240 }
6241
6242 // Additional naming conflict check for map entry types. Only need to check
6243 // this if there are already errors.
6244 if (had_errors_) {
6245 for (int i = 0; i < proto.message_type_size(); ++i) {
6246 DetectMapConflicts(result->message_type(i), proto.message_type(i));
6247 }
6248 }
6249
6250
6251 // Again, see comments at InternalSetLazilyBuildDependencies about error
6252 // checking. Also, don't log unused dependencies if there were previous
6253 // errors, since the results might be inaccurate.
6254 if (!had_errors_ && !unused_dependency_.empty() &&
6255 !pool_->lazily_build_dependencies_) {
6256 LogUnusedDependency(proto, result);
6257 }
6258
6259 // Store feature information for deferred validation outside of the database
6260 // mutex.
6261 if (!had_errors_ && !pool_->lazily_build_dependencies_) {
6262 internal::VisitDescriptors(
6263 *result, proto, [&](const auto& descriptor, const auto& desc_proto) {
6264 if (descriptor.proto_features_ != &FeatureSet::default_instance()) {
6265 deferred_validation_.ValidateFeatureLifetimes(
6266 GetFile(descriptor), {descriptor.proto_features_, &desc_proto,
6267 GetFullName(descriptor), proto.name()});
6268 }
6269 });
6270 }
6271
6272 if (had_errors_) {
6273 return nullptr;
6274 } else {
6275 return result;
6276 }
6277 }
6278
6279
AllocateNameStrings(const absl::string_view scope,const absl::string_view proto_name,internal::FlatAllocator & alloc)6280 const std::string* DescriptorBuilder::AllocateNameStrings(
6281 const absl::string_view scope, const absl::string_view proto_name,
6282 internal::FlatAllocator& alloc) {
6283 if (scope.empty()) {
6284 return alloc.AllocateStrings(proto_name, proto_name);
6285 } else {
6286 return alloc.AllocateStrings(proto_name,
6287 absl::StrCat(scope, ".", proto_name));
6288 }
6289 }
6290
6291 namespace {
6292
6293 // Helper for BuildMessage below.
6294 struct IncrementWhenDestroyed {
~IncrementWhenDestroyedgoogle::protobuf::__anona654feba2711::IncrementWhenDestroyed6295 ~IncrementWhenDestroyed() { ++to_increment; }
6296 int& to_increment;
6297 };
6298
6299 } // namespace
6300
6301 namespace {
IsNonMessageType(absl::string_view type)6302 bool IsNonMessageType(absl::string_view type) {
6303 static const auto* non_message_types =
6304 new absl::flat_hash_set<absl::string_view>(
6305 {"double", "float", "int64", "uint64", "int32", "fixed32", "fixed64",
6306 "bool", "string", "bytes", "uint32", "enum", "sfixed32", "sfixed64",
6307 "sint32", "sint64"});
6308 return non_message_types->contains(type);
6309 }
6310 } // namespace
6311
6312
BuildMessage(const DescriptorProto & proto,const Descriptor * parent,Descriptor * result,internal::FlatAllocator & alloc)6313 void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
6314 const Descriptor* parent,
6315 Descriptor* result,
6316 internal::FlatAllocator& alloc) {
6317 const absl::string_view scope =
6318 (parent == nullptr) ? file_->package() : parent->full_name();
6319 result->all_names_ = AllocateNameStrings(scope, proto.name(), alloc);
6320 ValidateSymbolName(proto.name(), result->full_name(), proto);
6321
6322 result->file_ = file_;
6323 result->containing_type_ = parent;
6324 result->is_placeholder_ = false;
6325 result->is_unqualified_placeholder_ = false;
6326 result->well_known_type_ = Descriptor::WELLKNOWNTYPE_UNSPECIFIED;
6327 result->options_ = nullptr; // Set to default_instance later if necessary.
6328
6329 auto it = pool_->tables_->well_known_types_.find(result->full_name());
6330 if (it != pool_->tables_->well_known_types_.end()) {
6331 result->well_known_type_ = it->second;
6332 }
6333
6334 // Calculate the continuous sequence of fields.
6335 // These can be fast-path'd during lookup and don't need to be added to the
6336 // tables.
6337 // We use uint16_t to save space for sequential_field_limit_, so stop before
6338 // overflowing it. Worst case, we are not taking full advantage on huge
6339 // messages, but it is unlikely.
6340 result->sequential_field_limit_ = 0;
6341 for (int i = 0; i < std::numeric_limits<uint16_t>::max() &&
6342 i < proto.field_size() && proto.field(i).number() == i + 1;
6343 ++i) {
6344 result->sequential_field_limit_ = i + 1;
6345 }
6346
6347 // Build oneofs first so that fields and extension ranges can refer to them.
6348 BUILD_ARRAY(proto, result, oneof_decl, BuildOneof, result);
6349 BUILD_ARRAY(proto, result, field, BuildField, result);
6350 BUILD_ARRAY(proto, result, enum_type, BuildEnum, result);
6351 BUILD_ARRAY(proto, result, extension_range, BuildExtensionRange, result);
6352 BUILD_ARRAY(proto, result, extension, BuildExtension, result);
6353 BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
6354
6355 // Copy options.
6356 AllocateOptions(proto, result, DescriptorProto::kOptionsFieldNumber,
6357 "google.protobuf.MessageOptions", alloc);
6358
6359 // Before building submessages, check recursion limit.
6360 --recursion_depth_;
6361 IncrementWhenDestroyed revert{recursion_depth_};
6362 if (recursion_depth_ <= 0) {
6363 AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::OTHER,
6364 "Reached maximum recursion limit for nested messages.");
6365 result->nested_types_ = nullptr;
6366 result->nested_type_count_ = 0;
6367 return;
6368 }
6369 BUILD_ARRAY(proto, result, nested_type, BuildMessage, result);
6370
6371 // Copy reserved names.
6372 int reserved_name_count = proto.reserved_name_size();
6373 result->reserved_name_count_ = reserved_name_count;
6374 result->reserved_names_ =
6375 alloc.AllocateArray<const std::string*>(reserved_name_count);
6376 for (int i = 0; i < reserved_name_count; ++i) {
6377 result->reserved_names_[i] = alloc.AllocateStrings(proto.reserved_name(i));
6378 }
6379
6380 AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6381
6382 for (int i = 0; i < proto.reserved_range_size(); i++) {
6383 const DescriptorProto_ReservedRange& range1 = proto.reserved_range(i);
6384 for (int j = i + 1; j < proto.reserved_range_size(); j++) {
6385 const DescriptorProto_ReservedRange& range2 = proto.reserved_range(j);
6386 if (range1.end() > range2.start() && range2.end() > range1.start()) {
6387 AddError(result->full_name(), proto.reserved_range(i),
6388 DescriptorPool::ErrorCollector::NUMBER, [&] {
6389 return absl::Substitute(
6390 "Reserved range $0 to $1 overlaps with "
6391 "already-defined range $2 to $3.",
6392 range2.start(), range2.end() - 1, range1.start(),
6393 range1.end() - 1);
6394 });
6395 }
6396 }
6397 }
6398
6399 absl::flat_hash_set<absl::string_view> reserved_name_set;
6400 for (const std::string& name : proto.reserved_name()) {
6401 if (!reserved_name_set.insert(name).second) {
6402 AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
6403 return absl::Substitute("Field name \"$0\" is reserved multiple times.",
6404 name);
6405 });
6406 }
6407 }
6408 // Check that fields aren't using reserved names or numbers and that they
6409 // aren't using extension numbers.
6410 for (int i = 0; i < result->field_count(); i++) {
6411 const FieldDescriptor* field = result->field(i);
6412 for (int j = 0; j < result->extension_range_count(); j++) {
6413 const Descriptor::ExtensionRange* range = result->extension_range(j);
6414 if (range->start_number() <= field->number() &&
6415 field->number() < range->end_number()) {
6416 message_hints_[result].RequestHintOnFieldNumbers(
6417 proto.extension_range(j), DescriptorPool::ErrorCollector::NUMBER);
6418 AddError(field->full_name(), proto.extension_range(j),
6419 DescriptorPool::ErrorCollector::NUMBER, [&] {
6420 return absl::Substitute(
6421 "Extension range $0 to $1 includes field \"$2\" ($3).",
6422 range->start_number(), range->end_number() - 1,
6423 field->name(), field->number());
6424 });
6425 }
6426 }
6427 for (int j = 0; j < result->reserved_range_count(); j++) {
6428 const Descriptor::ReservedRange* range = result->reserved_range(j);
6429 if (range->start <= field->number() && field->number() < range->end) {
6430 message_hints_[result].RequestHintOnFieldNumbers(
6431 proto.reserved_range(j), DescriptorPool::ErrorCollector::NUMBER);
6432 AddError(field->full_name(), proto.reserved_range(j),
6433 DescriptorPool::ErrorCollector::NUMBER, [&] {
6434 return absl::Substitute(
6435 "Field \"$0\" uses reserved number $1.", field->name(),
6436 field->number());
6437 });
6438 }
6439 }
6440 if (reserved_name_set.contains(field->name())) {
6441 AddError(field->full_name(), proto.field(i),
6442 DescriptorPool::ErrorCollector::NAME, [&] {
6443 return absl::Substitute("Field name \"$0\" is reserved.",
6444 field->name());
6445 });
6446 }
6447 }
6448
6449 // Check that extension ranges don't overlap and don't include
6450 // reserved field numbers or names.
6451 for (int i = 0; i < result->extension_range_count(); i++) {
6452 const Descriptor::ExtensionRange* range1 = result->extension_range(i);
6453 for (int j = 0; j < result->reserved_range_count(); j++) {
6454 const Descriptor::ReservedRange* range2 = result->reserved_range(j);
6455 if (range1->end_number() > range2->start &&
6456 range2->end > range1->start_number()) {
6457 AddError(result->full_name(), proto.extension_range(i),
6458 DescriptorPool::ErrorCollector::NUMBER, [&] {
6459 return absl::Substitute(
6460 "Extension range $0 to $1 overlaps with "
6461 "reserved range $2 to $3.",
6462 range1->start_number(), range1->end_number() - 1,
6463 range2->start, range2->end - 1);
6464 });
6465 }
6466 }
6467 for (int j = i + 1; j < result->extension_range_count(); j++) {
6468 const Descriptor::ExtensionRange* range2 = result->extension_range(j);
6469 if (range1->end_number() > range2->start_number() &&
6470 range2->end_number() > range1->start_number()) {
6471 AddError(result->full_name(), proto.extension_range(i),
6472 DescriptorPool::ErrorCollector::NUMBER, [&] {
6473 return absl::Substitute(
6474 "Extension range $0 to $1 overlaps with "
6475 "already-defined range $2 to $3.",
6476 range2->start_number(), range2->end_number() - 1,
6477 range1->start_number(), range1->end_number() - 1);
6478 });
6479 }
6480 }
6481 }
6482 }
6483
CheckFieldJsonNameUniqueness(const DescriptorProto & proto,const Descriptor * result)6484 void DescriptorBuilder::CheckFieldJsonNameUniqueness(
6485 const DescriptorProto& proto, const Descriptor* result) {
6486 const absl::string_view message_name = result->full_name();
6487 if (!pool_->deprecated_legacy_json_field_conflicts_ &&
6488 !IsLegacyJsonFieldConflictEnabled(result->options())) {
6489 // Check both with and without taking json_name into consideration. This is
6490 // needed for field masks, which don't use json_name.
6491 CheckFieldJsonNameUniqueness(message_name, proto, result, false);
6492 CheckFieldJsonNameUniqueness(message_name, proto, result, true);
6493 }
6494 }
6495
6496 namespace {
6497 // Helpers for function below
6498
6499 struct JsonNameDetails {
6500 const FieldDescriptorProto* field;
6501 std::string orig_name;
6502 bool is_custom;
6503 };
6504
GetJsonNameDetails(const FieldDescriptorProto * field,bool use_custom)6505 JsonNameDetails GetJsonNameDetails(const FieldDescriptorProto* field,
6506 bool use_custom) {
6507 std::string default_json_name = ToJsonName(field->name());
6508 if (use_custom && field->has_json_name() &&
6509 field->json_name() != default_json_name) {
6510 return {field, field->json_name(), true};
6511 }
6512 return {field, std::move(default_json_name), false};
6513 }
6514
JsonNameLooksLikeExtension(std::string name)6515 bool JsonNameLooksLikeExtension(std::string name) {
6516 return !name.empty() && name.front() == '[' && name.back() == ']';
6517 }
6518
6519 } // namespace
6520
CheckFieldJsonNameUniqueness(const absl::string_view message_name,const DescriptorProto & message,const Descriptor * descriptor,bool use_custom_names)6521 void DescriptorBuilder::CheckFieldJsonNameUniqueness(
6522 const absl::string_view message_name, const DescriptorProto& message,
6523 const Descriptor* descriptor, bool use_custom_names) {
6524 absl::flat_hash_map<std::string, JsonNameDetails> name_to_field;
6525 for (const FieldDescriptorProto& field : message.field()) {
6526 JsonNameDetails details = GetJsonNameDetails(&field, use_custom_names);
6527 if (details.is_custom && JsonNameLooksLikeExtension(details.orig_name)) {
6528 auto make_error = [&] {
6529 return absl::StrFormat(
6530 "The custom JSON name of field \"%s\" (\"%s\") is invalid: "
6531 "JSON names may not start with '[' and end with ']'.",
6532 field.name(), details.orig_name);
6533 };
6534 AddError(message_name, field, DescriptorPool::ErrorCollector::NAME,
6535 make_error);
6536 continue;
6537 }
6538 auto it_inserted = name_to_field.try_emplace(details.orig_name, details);
6539 if (it_inserted.second) {
6540 continue;
6541 }
6542 JsonNameDetails& match = it_inserted.first->second;
6543 if (use_custom_names && !details.is_custom && !match.is_custom) {
6544 // if this pass is considering custom JSON names, but neither of the
6545 // names involved in the conflict are custom, don't bother with a
6546 // message. That will have been reported from other pass (non-custom
6547 // JSON names).
6548 continue;
6549 }
6550 auto make_error = [&] {
6551 absl::string_view this_type = details.is_custom ? "custom" : "default";
6552 absl::string_view existing_type = match.is_custom ? "custom" : "default";
6553 // If the matched name differs (which it can only differ in case), include
6554 // it in the error message, for maximum clarity to user.
6555 std::string name_suffix = "";
6556 if (details.orig_name != match.orig_name) {
6557 name_suffix = absl::StrCat(" (\"", match.orig_name, "\")");
6558 }
6559 return absl::StrFormat(
6560 "The %s JSON name of field \"%s\" (\"%s\") conflicts "
6561 "with the %s JSON name of field \"%s\"%s.",
6562 this_type, field.name(), details.orig_name, existing_type,
6563 match.field->name(), name_suffix);
6564 };
6565
6566 bool involves_default = !details.is_custom || !match.is_custom;
6567 if (descriptor->features().json_format() ==
6568 FeatureSet::LEGACY_BEST_EFFORT &&
6569 involves_default) {
6570 // TODO Upgrade this to an error once downstream protos
6571 // have been fixed.
6572 AddWarning(message_name, field, DescriptorPool::ErrorCollector::NAME,
6573 make_error);
6574 } else {
6575 AddError(message_name, field, DescriptorPool::ErrorCollector::NAME,
6576 make_error);
6577 }
6578 }
6579 }
6580
BuildFieldOrExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,bool is_extension,internal::FlatAllocator & alloc)6581 void DescriptorBuilder::BuildFieldOrExtension(const FieldDescriptorProto& proto,
6582 Descriptor* parent,
6583 FieldDescriptor* result,
6584 bool is_extension,
6585 internal::FlatAllocator& alloc) {
6586 const absl::string_view scope =
6587 (parent == nullptr) ? file_->package() : parent->full_name();
6588
6589 // We allocate all names in a single array, and dedup them.
6590 // We remember the indices for the potentially deduped values.
6591 auto all_names = alloc.AllocateFieldNames(
6592 proto.name(), scope,
6593 proto.has_json_name() ? &proto.json_name() : nullptr);
6594 result->all_names_ = all_names.array;
6595 result->lowercase_name_index_ = all_names.lowercase_index;
6596 result->camelcase_name_index_ = all_names.camelcase_index;
6597 result->json_name_index_ = all_names.json_index;
6598
6599 ValidateSymbolName(proto.name(), result->full_name(), proto);
6600
6601 result->file_ = file_;
6602 result->number_ = proto.number();
6603 result->is_extension_ = is_extension;
6604 result->is_oneof_ = false;
6605 result->in_real_oneof_ = false;
6606 result->proto3_optional_ = proto.proto3_optional();
6607
6608 if (proto.proto3_optional() && file_->edition() != Edition::EDITION_PROTO3) {
6609 AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6610 [&] {
6611 return absl::StrCat(
6612 "The [proto3_optional=true] option may only be set on proto3"
6613 "fields, not ",
6614 result->full_name());
6615 });
6616 }
6617
6618 result->has_json_name_ = proto.has_json_name();
6619
6620 result->type_ = proto.type();
6621 result->label_ = proto.label();
6622 result->is_repeated_ = result->label_ == FieldDescriptor::LABEL_REPEATED;
6623
6624 if (result->label() == FieldDescriptor::LABEL_REQUIRED) {
6625 // An extension cannot have a required field (b/13365836).
6626 if (result->is_extension_) {
6627 AddError(result->full_name(), proto,
6628 // Error location `TYPE`: we would really like to indicate
6629 // `LABEL`, but the `ErrorLocation` enum has no entry for this,
6630 // and we don't necessarily know about all implementations of the
6631 // `ErrorCollector` interface to extend them to handle the new
6632 // error location type properly.
6633 DescriptorPool::ErrorCollector::TYPE, [&] {
6634 return absl::StrCat("The extension ", result->full_name(),
6635 " cannot be required.");
6636 });
6637 }
6638 }
6639
6640 // Some of these may be filled in when cross-linking.
6641 result->containing_type_ = nullptr;
6642 result->type_once_ = nullptr;
6643 result->default_value_enum_ = nullptr;
6644
6645 result->has_default_value_ = proto.has_default_value();
6646 if (proto.has_default_value() && result->is_repeated()) {
6647 AddError(result->full_name(), proto,
6648 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6649 "Repeated fields can't have default values.");
6650 }
6651
6652 if (proto.has_type()) {
6653 if (proto.has_default_value()) {
6654 char* end_pos = nullptr;
6655 switch (result->cpp_type()) {
6656 case FieldDescriptor::CPPTYPE_INT32:
6657 result->default_value_int32_t_ =
6658 std::strtol(proto.default_value().c_str(), &end_pos, 0);
6659 break;
6660 case FieldDescriptor::CPPTYPE_INT64:
6661 static_assert(sizeof(int64_t) == sizeof(long long), // NOLINT
6662 "sizeof int64_t is not sizeof long long");
6663 result->default_value_int64_t_ =
6664 std::strtoll(proto.default_value().c_str(), &end_pos, 0);
6665 break;
6666 case FieldDescriptor::CPPTYPE_UINT32:
6667 result->default_value_uint32_t_ =
6668 std::strtoul(proto.default_value().c_str(), &end_pos, 0);
6669 break;
6670 case FieldDescriptor::CPPTYPE_UINT64:
6671 static_assert(
6672 sizeof(uint64_t) == sizeof(unsigned long long), // NOLINT
6673 "sizeof uint64_t is not sizeof unsigned long long");
6674 result->default_value_uint64_t_ =
6675 std::strtoull(proto.default_value().c_str(), &end_pos, 0);
6676 break;
6677 case FieldDescriptor::CPPTYPE_FLOAT:
6678 if (proto.default_value() == "inf") {
6679 result->default_value_float_ =
6680 std::numeric_limits<float>::infinity();
6681 } else if (proto.default_value() == "-inf") {
6682 result->default_value_float_ =
6683 -std::numeric_limits<float>::infinity();
6684 } else if (proto.default_value() == "nan") {
6685 result->default_value_float_ =
6686 std::numeric_limits<float>::quiet_NaN();
6687 } else {
6688 result->default_value_float_ = io::SafeDoubleToFloat(
6689 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos));
6690 }
6691 break;
6692 case FieldDescriptor::CPPTYPE_DOUBLE:
6693 if (proto.default_value() == "inf") {
6694 result->default_value_double_ =
6695 std::numeric_limits<double>::infinity();
6696 } else if (proto.default_value() == "-inf") {
6697 result->default_value_double_ =
6698 -std::numeric_limits<double>::infinity();
6699 } else if (proto.default_value() == "nan") {
6700 result->default_value_double_ =
6701 std::numeric_limits<double>::quiet_NaN();
6702 } else {
6703 result->default_value_double_ =
6704 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos);
6705 }
6706 break;
6707 case FieldDescriptor::CPPTYPE_BOOL:
6708 if (proto.default_value() == "true") {
6709 result->default_value_bool_ = true;
6710 } else if (proto.default_value() == "false") {
6711 result->default_value_bool_ = false;
6712 } else {
6713 AddError(result->full_name(), proto,
6714 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6715 "Boolean default must be true or false.");
6716 }
6717 break;
6718 case FieldDescriptor::CPPTYPE_ENUM:
6719 // This will be filled in when cross-linking.
6720 result->default_value_enum_ = nullptr;
6721 break;
6722 case FieldDescriptor::CPPTYPE_STRING:
6723 if (result->type() == FieldDescriptor::TYPE_BYTES) {
6724 std::string value;
6725 if (absl::CUnescape(proto.default_value(), &value)) {
6726 result->default_value_string_ = alloc.AllocateStrings(value);
6727 } else {
6728 AddError(result->full_name(), proto,
6729 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6730 "Invalid escaping in default value.");
6731 }
6732 } else {
6733 result->default_value_string_ =
6734 alloc.AllocateStrings(proto.default_value());
6735 }
6736 break;
6737 case FieldDescriptor::CPPTYPE_MESSAGE:
6738 AddError(result->full_name(), proto,
6739 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6740 "Messages can't have default values.");
6741 result->has_default_value_ = false;
6742 result->default_generated_instance_ = nullptr;
6743 break;
6744 }
6745
6746 if (end_pos != nullptr) {
6747 // end_pos is only set non-null by the parsers for numeric types,
6748 // above. This checks that the default was non-empty and had no extra
6749 // junk after the end of the number.
6750 if (proto.default_value().empty() || *end_pos != '\0') {
6751 AddError(result->full_name(), proto,
6752 DescriptorPool::ErrorCollector::DEFAULT_VALUE, [&] {
6753 return absl::StrCat("Couldn't parse default value \"",
6754 proto.default_value(), "\".");
6755 });
6756 }
6757 }
6758 } else {
6759 // No explicit default value
6760 switch (result->cpp_type()) {
6761 case FieldDescriptor::CPPTYPE_INT32:
6762 result->default_value_int32_t_ = 0;
6763 break;
6764 case FieldDescriptor::CPPTYPE_INT64:
6765 result->default_value_int64_t_ = 0;
6766 break;
6767 case FieldDescriptor::CPPTYPE_UINT32:
6768 result->default_value_uint32_t_ = 0;
6769 break;
6770 case FieldDescriptor::CPPTYPE_UINT64:
6771 result->default_value_uint64_t_ = 0;
6772 break;
6773 case FieldDescriptor::CPPTYPE_FLOAT:
6774 result->default_value_float_ = 0.0f;
6775 break;
6776 case FieldDescriptor::CPPTYPE_DOUBLE:
6777 result->default_value_double_ = 0.0;
6778 break;
6779 case FieldDescriptor::CPPTYPE_BOOL:
6780 result->default_value_bool_ = false;
6781 break;
6782 case FieldDescriptor::CPPTYPE_ENUM:
6783 // This will be filled in when cross-linking.
6784 result->default_value_enum_ = nullptr;
6785 break;
6786 case FieldDescriptor::CPPTYPE_STRING:
6787 result->default_value_string_ = &internal::GetEmptyString();
6788 break;
6789 case FieldDescriptor::CPPTYPE_MESSAGE:
6790 result->default_generated_instance_ = nullptr;
6791 break;
6792 }
6793 }
6794 }
6795
6796 if (result->number() <= 0) {
6797 message_hints_[parent].RequestHintOnFieldNumbers(
6798 proto, DescriptorPool::ErrorCollector::NUMBER);
6799 AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6800 "Field numbers must be positive integers.");
6801 } else if (!is_extension && result->number() > FieldDescriptor::kMaxNumber) {
6802 // Only validate that the number is within the valid field range if it is
6803 // not an extension. Since extension numbers are validated with the
6804 // extendee's valid set of extension numbers, and those are in turn
6805 // validated against the max allowed number, the check is unnecessary for
6806 // extension fields.
6807 // This avoids cross-linking issues that arise when attempting to check if
6808 // the extendee is a message_set_wire_format message, which has a higher max
6809 // on extension numbers.
6810 message_hints_[parent].RequestHintOnFieldNumbers(
6811 proto, DescriptorPool::ErrorCollector::NUMBER);
6812 AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6813 [&] {
6814 return absl::Substitute(
6815 "Field numbers cannot be greater than $0.",
6816 FieldDescriptor::kMaxNumber);
6817 });
6818 }
6819
6820 if (is_extension) {
6821 if (!proto.has_extendee()) {
6822 AddError(result->full_name(), proto,
6823 DescriptorPool::ErrorCollector::EXTENDEE,
6824 "FieldDescriptorProto.extendee not set for extension field.");
6825 }
6826
6827 result->scope_.extension_scope = parent;
6828
6829 if (proto.has_oneof_index()) {
6830 AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6831 "FieldDescriptorProto.oneof_index should not be set for "
6832 "extensions.");
6833 }
6834 } else {
6835 if (proto.has_extendee()) {
6836 AddError(result->full_name(), proto,
6837 DescriptorPool::ErrorCollector::EXTENDEE,
6838 "FieldDescriptorProto.extendee set for non-extension field.");
6839 }
6840
6841 result->containing_type_ = parent;
6842
6843 if (proto.has_oneof_index()) {
6844 if (proto.oneof_index() < 0 ||
6845 proto.oneof_index() >= parent->oneof_decl_count()) {
6846 AddError(result->full_name(), proto,
6847 DescriptorPool::ErrorCollector::TYPE, [&] {
6848 return absl::Substitute(
6849 "FieldDescriptorProto.oneof_index $0 is "
6850 "out of range for type \"$1\".",
6851 proto.oneof_index(), parent->name());
6852 });
6853 } else {
6854 result->is_oneof_ = true;
6855 result->scope_.containing_oneof =
6856 parent->oneof_decl(proto.oneof_index());
6857 result->in_real_oneof_ = !result->proto3_optional_;
6858 }
6859 }
6860 }
6861
6862 // Copy options.
6863 AllocateOptions(proto, result, FieldDescriptorProto::kOptionsFieldNumber,
6864 "google.protobuf.FieldOptions", alloc);
6865
6866 AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6867 }
6868
BuildExtensionRange(const DescriptorProto::ExtensionRange & proto,const Descriptor * parent,Descriptor::ExtensionRange * result,internal::FlatAllocator & alloc)6869 void DescriptorBuilder::BuildExtensionRange(
6870 const DescriptorProto::ExtensionRange& proto, const Descriptor* parent,
6871 Descriptor::ExtensionRange* result, internal::FlatAllocator& alloc) {
6872 result->start_ = proto.start();
6873 result->end_ = proto.end();
6874 result->containing_type_ = parent;
6875
6876 if (result->start_number() <= 0) {
6877 message_hints_[parent].RequestHintOnFieldNumbers(
6878 proto, DescriptorPool::ErrorCollector::NUMBER, result->start_number(),
6879 result->end_number());
6880 AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6881 "Extension numbers must be positive integers.");
6882 }
6883
6884 // Checking of the upper bound of the extension range is deferred until after
6885 // options interpreting. This allows messages with message_set_wire_format to
6886 // have extensions beyond FieldDescriptor::kMaxNumber, since the extension
6887 // numbers are actually used as int32s in the message_set_wire_format.
6888
6889 if (result->start_number() >= result->end_number()) {
6890 AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6891 "Extension range end number must be greater than start number.");
6892 }
6893
6894 // Copy options
6895 AllocateOptions(proto, result,
6896 DescriptorProto_ExtensionRange::kOptionsFieldNumber,
6897 "google.protobuf.ExtensionRangeOptions", alloc);
6898 }
6899
BuildReservedRange(const DescriptorProto::ReservedRange & proto,const Descriptor * parent,Descriptor::ReservedRange * result,internal::FlatAllocator &)6900 void DescriptorBuilder::BuildReservedRange(
6901 const DescriptorProto::ReservedRange& proto, const Descriptor* parent,
6902 Descriptor::ReservedRange* result, internal::FlatAllocator&) {
6903 result->start = proto.start();
6904 result->end = proto.end();
6905 if (result->start <= 0) {
6906 message_hints_[parent].RequestHintOnFieldNumbers(
6907 proto, DescriptorPool::ErrorCollector::NUMBER, result->start,
6908 result->end);
6909 AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6910 "Reserved numbers must be positive integers.");
6911 }
6912 if (result->start >= result->end) {
6913 AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6914 "Reserved range end number must be greater than start number.");
6915 }
6916 }
6917
BuildReservedRange(const EnumDescriptorProto::EnumReservedRange & proto,const EnumDescriptor * parent,EnumDescriptor::ReservedRange * result,internal::FlatAllocator &)6918 void DescriptorBuilder::BuildReservedRange(
6919 const EnumDescriptorProto::EnumReservedRange& proto,
6920 const EnumDescriptor* parent, EnumDescriptor::ReservedRange* result,
6921 internal::FlatAllocator&) {
6922 result->start = proto.start();
6923 result->end = proto.end();
6924
6925 if (result->start > result->end) {
6926 AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6927 "Reserved range end number must be greater than start number.");
6928 }
6929 }
6930
BuildOneof(const OneofDescriptorProto & proto,Descriptor * parent,OneofDescriptor * result,internal::FlatAllocator & alloc)6931 void DescriptorBuilder::BuildOneof(const OneofDescriptorProto& proto,
6932 Descriptor* parent, OneofDescriptor* result,
6933 internal::FlatAllocator& alloc) {
6934 result->all_names_ =
6935 AllocateNameStrings(parent->full_name(), proto.name(), alloc);
6936 ValidateSymbolName(proto.name(), result->full_name(), proto);
6937
6938 result->containing_type_ = parent;
6939
6940 // We need to fill these in later.
6941 result->field_count_ = 0;
6942 result->fields_ = nullptr;
6943
6944 // Copy options.
6945 AllocateOptions(proto, result, OneofDescriptorProto::kOptionsFieldNumber,
6946 "google.protobuf.OneofOptions", alloc);
6947
6948 AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6949 }
6950
CheckEnumValueUniqueness(const EnumDescriptorProto & proto,const EnumDescriptor * result)6951 void DescriptorBuilder::CheckEnumValueUniqueness(
6952 const EnumDescriptorProto& proto, const EnumDescriptor* result) {
6953
6954 // Check that enum labels are still unique when we remove the enum prefix from
6955 // values that have it.
6956 //
6957 // This will fail for something like:
6958 //
6959 // enum MyEnum {
6960 // MY_ENUM_FOO = 0;
6961 // FOO = 1;
6962 // }
6963 //
6964 // By enforcing this reasonable constraint, we allow code generators to strip
6965 // the prefix and/or PascalCase it without creating conflicts. This can lead
6966 // to much nicer language-specific enums like:
6967 //
6968 // enum NameType {
6969 // FirstName = 1,
6970 // LastName = 2,
6971 // }
6972 //
6973 // Instead of:
6974 //
6975 // enum NameType {
6976 // NAME_TYPE_FIRST_NAME = 1,
6977 // NAME_TYPE_LAST_NAME = 2,
6978 // }
6979 PrefixRemover remover(result->name());
6980 absl::flat_hash_map<std::string, const EnumValueDescriptor*> values;
6981 for (int i = 0; i < result->value_count(); i++) {
6982 const EnumValueDescriptor* value = result->value(i);
6983 std::string stripped =
6984 EnumValueToPascalCase(remover.MaybeRemove(value->name()));
6985 auto insert_result = values.try_emplace(stripped, value);
6986 bool inserted = insert_result.second;
6987
6988 // We don't throw the error if the two conflicting symbols are identical, or
6989 // if they map to the same number. In the former case, the normal symbol
6990 // duplication error will fire so we don't need to (and its error message
6991 // will make more sense). We allow the latter case so users can create
6992 // aliases which add or remove the prefix (code generators that do prefix
6993 // stripping should de-dup the labels in this case).
6994 if (!inserted && insert_result.first->second->name() != value->name() &&
6995 insert_result.first->second->number() != value->number()) {
6996 auto make_error = [&] {
6997 return absl::StrFormat(
6998 "Enum name %s has the same name as %s if you ignore case and strip "
6999 "out the enum name prefix (if any). (If you are using allow_alias, "
7000 "please assign the same number to each enum value name.)",
7001 value->name(), insert_result.first->second->name());
7002 };
7003 // There are proto2 enums out there with conflicting names, so to preserve
7004 // compatibility we issue only a warning for proto2.
7005 if ((pool_->deprecated_legacy_json_field_conflicts_ ||
7006 IsLegacyJsonFieldConflictEnabled(result->options())) &&
7007 result->file()->edition() == Edition::EDITION_PROTO2) {
7008 AddWarning(value->full_name(), proto.value(i),
7009 DescriptorPool::ErrorCollector::NAME, make_error);
7010 continue;
7011 }
7012 AddError(value->full_name(), proto.value(i),
7013 DescriptorPool::ErrorCollector::NAME, make_error);
7014 }
7015 }
7016 }
7017
BuildEnum(const EnumDescriptorProto & proto,const Descriptor * parent,EnumDescriptor * result,internal::FlatAllocator & alloc)7018 void DescriptorBuilder::BuildEnum(const EnumDescriptorProto& proto,
7019 const Descriptor* parent,
7020 EnumDescriptor* result,
7021 internal::FlatAllocator& alloc) {
7022 const absl::string_view scope =
7023 (parent == nullptr) ? file_->package() : parent->full_name();
7024
7025 result->all_names_ = AllocateNameStrings(scope, proto.name(), alloc);
7026 ValidateSymbolName(proto.name(), result->full_name(), proto);
7027 result->file_ = file_;
7028 result->containing_type_ = parent;
7029 result->is_placeholder_ = false;
7030 result->is_unqualified_placeholder_ = false;
7031
7032 if (proto.value_size() == 0) {
7033 // We cannot allow enums with no values because this would mean there
7034 // would be no valid default value for fields of this type.
7035 AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7036 "Enums must contain at least one value.");
7037 }
7038
7039 // Calculate the continuous sequence of the labels.
7040 // These can be fast-path'd during lookup and don't need to be added to the
7041 // tables.
7042 // We use uint16_t to save space for sequential_value_limit_, so stop before
7043 // overflowing it. Worst case, we are not taking full advantage on huge
7044 // enums, but it is unlikely.
7045 for (int i = 0;
7046 i < std::numeric_limits<uint16_t>::max() && i < proto.value_size() &&
7047 // We do the math in int64_t to avoid overflows.
7048 proto.value(i).number() ==
7049 static_cast<int64_t>(i) + proto.value(0).number();
7050 ++i) {
7051 result->sequential_value_limit_ = i;
7052 }
7053
7054 BUILD_ARRAY(proto, result, value, BuildEnumValue, result);
7055 BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
7056
7057 // Copy reserved names.
7058 int reserved_name_count = proto.reserved_name_size();
7059 result->reserved_name_count_ = reserved_name_count;
7060 result->reserved_names_ =
7061 alloc.AllocateArray<const std::string*>(reserved_name_count);
7062 for (int i = 0; i < reserved_name_count; ++i) {
7063 result->reserved_names_[i] = alloc.AllocateStrings(proto.reserved_name(i));
7064 }
7065
7066 // Copy options.
7067 AllocateOptions(proto, result, EnumDescriptorProto::kOptionsFieldNumber,
7068 "google.protobuf.EnumOptions", alloc);
7069
7070 AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
7071
7072 for (int i = 0; i < proto.reserved_range_size(); i++) {
7073 const EnumDescriptorProto_EnumReservedRange& range1 =
7074 proto.reserved_range(i);
7075 for (int j = i + 1; j < proto.reserved_range_size(); j++) {
7076 const EnumDescriptorProto_EnumReservedRange& range2 =
7077 proto.reserved_range(j);
7078 if (range1.end() >= range2.start() && range2.end() >= range1.start()) {
7079 AddError(result->full_name(), proto.reserved_range(i),
7080 DescriptorPool::ErrorCollector::NUMBER, [&] {
7081 return absl::Substitute(
7082 "Reserved range $0 to $1 overlaps with "
7083 "already-defined range $2 to $3.",
7084 range2.start(), range2.end(), range1.start(),
7085 range1.end());
7086 });
7087 }
7088 }
7089 }
7090
7091 absl::flat_hash_set<absl::string_view> reserved_name_set;
7092 for (const std::string& name : proto.reserved_name()) {
7093 if (!reserved_name_set.insert(name).second) {
7094 AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
7095 return absl::Substitute("Enum value \"$0\" is reserved multiple times.",
7096 name);
7097 });
7098 }
7099 }
7100
7101 for (int i = 0; i < result->value_count(); i++) {
7102 const EnumValueDescriptor* value = result->value(i);
7103 for (int j = 0; j < result->reserved_range_count(); j++) {
7104 const EnumDescriptor::ReservedRange* range = result->reserved_range(j);
7105 if (range->start <= value->number() && value->number() <= range->end) {
7106 AddError(value->full_name(), proto.reserved_range(j),
7107 DescriptorPool::ErrorCollector::NUMBER, [&] {
7108 return absl::Substitute(
7109 "Enum value \"$0\" uses reserved number $1.",
7110 value->name(), value->number());
7111 });
7112 }
7113 }
7114 if (reserved_name_set.contains(value->name())) {
7115 AddError(value->full_name(), proto.value(i),
7116 DescriptorPool::ErrorCollector::NAME, [&] {
7117 return absl::Substitute("Enum value \"$0\" is reserved.",
7118 value->name());
7119 });
7120 }
7121 }
7122 }
7123
BuildEnumValue(const EnumValueDescriptorProto & proto,const EnumDescriptor * parent,EnumValueDescriptor * result,internal::FlatAllocator & alloc)7124 void DescriptorBuilder::BuildEnumValue(const EnumValueDescriptorProto& proto,
7125 const EnumDescriptor* parent,
7126 EnumValueDescriptor* result,
7127 internal::FlatAllocator& alloc) {
7128 // Note: full_name for enum values is a sibling to the parent's name, not a
7129 // child of it.
7130 std::string full_name;
7131 size_t scope_len = parent->full_name().size() - parent->name().size();
7132 full_name.reserve(scope_len + proto.name().size());
7133 full_name.append(parent->full_name().data(), scope_len);
7134 full_name.append(proto.name());
7135
7136 result->all_names_ =
7137 alloc.AllocateStrings(proto.name(), std::move(full_name));
7138 result->number_ = proto.number();
7139 result->type_ = parent;
7140
7141 ValidateSymbolName(proto.name(), result->full_name(), proto);
7142
7143 // Copy options.
7144 AllocateOptions(proto, result, EnumValueDescriptorProto::kOptionsFieldNumber,
7145 "google.protobuf.EnumValueOptions", alloc);
7146
7147 // Again, enum values are weird because we makes them appear as siblings
7148 // of the enum type instead of children of it. So, we use
7149 // parent->containing_type() as the value's parent.
7150 bool added_to_outer_scope =
7151 AddSymbol(result->full_name(), parent->containing_type(), result->name(),
7152 proto, Symbol::EnumValue(result, 0));
7153
7154 // However, we also want to be able to search for values within a single
7155 // enum type, so we add it as a child of the enum type itself, too.
7156 // Note: This could fail, but if it does, the error has already been
7157 // reported by the above AddSymbol() call, so we ignore the return code.
7158 bool added_to_inner_scope = file_tables_->AddAliasUnderParent(
7159 parent, result->name(), Symbol::EnumValue(result, 1));
7160
7161 if (added_to_inner_scope && !added_to_outer_scope) {
7162 // This value did not conflict with any values defined in the same enum,
7163 // but it did conflict with some other symbol defined in the enum type's
7164 // scope. Let's print an additional error to explain this.
7165 std::string outer_scope;
7166 if (parent->containing_type() == nullptr) {
7167 outer_scope = file_->package();
7168 } else {
7169 outer_scope = parent->containing_type()->full_name();
7170 }
7171
7172 if (outer_scope.empty()) {
7173 outer_scope = "the global scope";
7174 } else {
7175 outer_scope = absl::StrCat("\"", outer_scope, "\"");
7176 }
7177
7178 AddError(
7179 result->full_name(), proto, DescriptorPool::ErrorCollector::NAME, [&] {
7180 return absl::StrCat(
7181 "Note that enum values use C++ scoping rules, meaning that "
7182 "enum values are siblings of their type, not children of it. "
7183 "Therefore, \"",
7184 result->name(), "\" must be unique within ", outer_scope,
7185 ", not just within \"", parent->name(), "\".");
7186 });
7187 }
7188
7189 // An enum is allowed to define two numbers that refer to the same value.
7190 // FindValueByNumber() should return the first such value, so we simply
7191 // ignore AddEnumValueByNumber()'s return code.
7192 file_tables_->AddEnumValueByNumber(result);
7193 }
7194
BuildService(const ServiceDescriptorProto & proto,const void *,ServiceDescriptor * result,internal::FlatAllocator & alloc)7195 void DescriptorBuilder::BuildService(const ServiceDescriptorProto& proto,
7196 const void* /* dummy */,
7197 ServiceDescriptor* result,
7198 internal::FlatAllocator& alloc) {
7199 result->all_names_ =
7200 AllocateNameStrings(file_->package(), proto.name(), alloc);
7201 result->file_ = file_;
7202 ValidateSymbolName(proto.name(), result->full_name(), proto);
7203
7204 BUILD_ARRAY(proto, result, method, BuildMethod, result);
7205
7206 // Copy options.
7207 AllocateOptions(proto, result, ServiceDescriptorProto::kOptionsFieldNumber,
7208 "google.protobuf.ServiceOptions", alloc);
7209
7210 AddSymbol(result->full_name(), nullptr, result->name(), proto,
7211 Symbol(result));
7212 }
7213
BuildMethod(const MethodDescriptorProto & proto,const ServiceDescriptor * parent,MethodDescriptor * result,internal::FlatAllocator & alloc)7214 void DescriptorBuilder::BuildMethod(const MethodDescriptorProto& proto,
7215 const ServiceDescriptor* parent,
7216 MethodDescriptor* result,
7217 internal::FlatAllocator& alloc) {
7218 result->service_ = parent;
7219 result->all_names_ =
7220 AllocateNameStrings(parent->full_name(), proto.name(), alloc);
7221
7222 ValidateSymbolName(proto.name(), result->full_name(), proto);
7223
7224 // These will be filled in when cross-linking.
7225 result->input_type_.Init();
7226 result->output_type_.Init();
7227
7228 // Copy options.
7229 AllocateOptions(proto, result, MethodDescriptorProto::kOptionsFieldNumber,
7230 "google.protobuf.MethodOptions", alloc);
7231
7232 result->client_streaming_ = proto.client_streaming();
7233 result->server_streaming_ = proto.server_streaming();
7234
7235 AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
7236 }
7237
7238 #undef BUILD_ARRAY
7239
7240 // -------------------------------------------------------------------
7241
CrossLinkFile(FileDescriptor * file,const FileDescriptorProto & proto)7242 void DescriptorBuilder::CrossLinkFile(FileDescriptor* file,
7243 const FileDescriptorProto& proto) {
7244 for (int i = 0; i < file->message_type_count(); i++) {
7245 CrossLinkMessage(&file->message_types_[i], proto.message_type(i));
7246 }
7247
7248 for (int i = 0; i < file->extension_count(); i++) {
7249 CrossLinkField(&file->extensions_[i], proto.extension(i));
7250 }
7251
7252 for (int i = 0; i < file->service_count(); i++) {
7253 CrossLinkService(&file->services_[i], proto.service(i));
7254 }
7255 }
7256
CrossLinkMessage(Descriptor * message,const DescriptorProto & proto)7257 void DescriptorBuilder::CrossLinkMessage(Descriptor* message,
7258 const DescriptorProto& proto) {
7259 for (int i = 0; i < message->nested_type_count(); i++) {
7260 CrossLinkMessage(&message->nested_types_[i], proto.nested_type(i));
7261 }
7262
7263 for (int i = 0; i < message->field_count(); i++) {
7264 CrossLinkField(&message->fields_[i], proto.field(i));
7265 }
7266
7267 for (int i = 0; i < message->extension_count(); i++) {
7268 CrossLinkField(&message->extensions_[i], proto.extension(i));
7269 }
7270
7271 // Set up field array for each oneof.
7272
7273 // First count the number of fields per oneof.
7274 for (int i = 0; i < message->field_count(); i++) {
7275 const OneofDescriptor* oneof_decl = message->field(i)->containing_oneof();
7276 if (oneof_decl != nullptr) {
7277 // Make sure fields belonging to the same oneof are defined consecutively.
7278 // This enables optimizations in codegens and reflection libraries to
7279 // skip fields in the oneof group, as only one of the field can be set.
7280 // Note that field_count() returns how many fields in this oneof we have
7281 // seen so far. field_count() > 0 guarantees that i > 0, so field(i-1) is
7282 // safe.
7283 if (oneof_decl->field_count() > 0 &&
7284 message->field(i - 1)->containing_oneof() != oneof_decl) {
7285 AddError(
7286 absl::StrCat(message->full_name(), ".",
7287 message->field(i - 1)->name()),
7288 proto.field(i - 1), DescriptorPool::ErrorCollector::TYPE, [&] {
7289 return absl::Substitute(
7290 "Fields in the same oneof must be defined consecutively. "
7291 "\"$0\" cannot be defined before the completion of the "
7292 "\"$1\" oneof definition.",
7293 message->field(i - 1)->name(), oneof_decl->name());
7294 });
7295 }
7296 // Must go through oneof_decls_ array to get a non-const version of the
7297 // OneofDescriptor.
7298 auto& out_oneof_decl = message->oneof_decls_[oneof_decl->index()];
7299 if (out_oneof_decl.field_count_ == 0) {
7300 out_oneof_decl.fields_ = message->field(i);
7301 }
7302
7303 if (!had_errors_) {
7304 // Verify that they are contiguous.
7305 // This is assumed by OneofDescriptor::field(i).
7306 // But only if there are no errors.
7307 ABSL_CHECK_EQ(out_oneof_decl.fields_ + out_oneof_decl.field_count_,
7308 message->field(i));
7309 }
7310 ++out_oneof_decl.field_count_;
7311 }
7312 }
7313
7314 // Then verify the sizes.
7315 for (int i = 0; i < message->oneof_decl_count(); i++) {
7316 OneofDescriptor* oneof_decl = &message->oneof_decls_[i];
7317
7318 if (oneof_decl->field_count() == 0) {
7319 AddError(absl::StrCat(message->full_name(), ".", oneof_decl->name()),
7320 proto.oneof_decl(i), DescriptorPool::ErrorCollector::NAME,
7321 "Oneof must have at least one field.");
7322 }
7323 }
7324
7325 for (int i = 0; i < message->field_count(); i++) {
7326 const FieldDescriptor* field = message->field(i);
7327 if (field->proto3_optional_) {
7328 if (!field->containing_oneof() ||
7329 !field->containing_oneof()->is_synthetic()) {
7330 AddError(message->full_name(), proto.field(i),
7331 DescriptorPool::ErrorCollector::OTHER,
7332 "Fields with proto3_optional set must be "
7333 "a member of a one-field oneof");
7334 }
7335 }
7336 }
7337
7338 // Synthetic oneofs must be last.
7339 int first_synthetic = -1;
7340 for (int i = 0; i < message->oneof_decl_count(); i++) {
7341 if (message->oneof_decl(i)->is_synthetic()) {
7342 if (first_synthetic == -1) {
7343 first_synthetic = i;
7344 }
7345 } else {
7346 if (first_synthetic != -1) {
7347 AddError(message->full_name(), proto.oneof_decl(i),
7348 DescriptorPool::ErrorCollector::OTHER,
7349 "Synthetic oneofs must be after all other oneofs");
7350 }
7351 }
7352 }
7353
7354 if (first_synthetic == -1) {
7355 message->real_oneof_decl_count_ = message->oneof_decl_count_;
7356 } else {
7357 message->real_oneof_decl_count_ = first_synthetic;
7358 }
7359 }
7360
CheckExtensionDeclarationFieldType(const FieldDescriptor & field,const FieldDescriptorProto & proto,absl::string_view type)7361 void DescriptorBuilder::CheckExtensionDeclarationFieldType(
7362 const FieldDescriptor& field, const FieldDescriptorProto& proto,
7363 absl::string_view type) {
7364 if (had_errors_) return;
7365 std::string actual_type = field.type_name();
7366 std::string expected_type(type);
7367 if (field.message_type() || field.enum_type()) {
7368 // Field message type descriptor can be in a partial state which will cause
7369 // segmentation fault if it is being accessed.
7370 if (had_errors_) return;
7371 absl::string_view full_name = field.message_type() != nullptr
7372 ? field.message_type()->full_name()
7373 : field.enum_type()->full_name();
7374 actual_type = absl::StrCat(".", full_name);
7375 }
7376 if (!IsNonMessageType(type) && !absl::StartsWith(type, ".")) {
7377 expected_type = absl::StrCat(".", type);
7378 }
7379 if (expected_type != actual_type) {
7380 AddError(field.full_name(), proto, DescriptorPool::ErrorCollector::EXTENDEE,
7381 [&] {
7382 return absl::Substitute(
7383 "\"$0\" extension field $1 is expected to be type "
7384 "\"$2\", not \"$3\".",
7385 field.containing_type()->full_name(), field.number(),
7386 expected_type, actual_type);
7387 });
7388 }
7389 }
7390
7391
CheckExtensionDeclaration(const FieldDescriptor & field,const FieldDescriptorProto & proto,absl::string_view declared_full_name,absl::string_view declared_type_name,bool is_repeated)7392 void DescriptorBuilder::CheckExtensionDeclaration(
7393 const FieldDescriptor& field, const FieldDescriptorProto& proto,
7394 absl::string_view declared_full_name, absl::string_view declared_type_name,
7395 bool is_repeated) {
7396 if (!declared_type_name.empty()) {
7397 CheckExtensionDeclarationFieldType(field, proto, declared_type_name);
7398 }
7399 if (!declared_full_name.empty()) {
7400 std::string actual_full_name = absl::StrCat(".", field.full_name());
7401 if (declared_full_name != actual_full_name) {
7402 AddError(field.full_name(), proto,
7403 DescriptorPool::ErrorCollector::EXTENDEE, [&] {
7404 return absl::Substitute(
7405 "\"$0\" extension field $1 is expected to have field name "
7406 "\"$2\", not \"$3\".",
7407 field.containing_type()->full_name(), field.number(),
7408 declared_full_name, actual_full_name);
7409 });
7410 }
7411 }
7412
7413 if (is_repeated != field.is_repeated()) {
7414 AddError(field.full_name(), proto, DescriptorPool::ErrorCollector::EXTENDEE,
7415 [&] {
7416 return absl::Substitute(
7417 "\"$0\" extension field $1 is expected to be $2.",
7418 field.containing_type()->full_name(), field.number(),
7419 is_repeated ? "repeated" : "optional");
7420 });
7421 }
7422 }
7423
CrossLinkField(FieldDescriptor * field,const FieldDescriptorProto & proto)7424 void DescriptorBuilder::CrossLinkField(FieldDescriptor* field,
7425 const FieldDescriptorProto& proto) {
7426 if (proto.has_extendee()) {
7427 Symbol extendee =
7428 LookupSymbol(proto.extendee(), field->full_name(),
7429 DescriptorPool::PLACEHOLDER_EXTENDABLE_MESSAGE);
7430 if (extendee.IsNull()) {
7431 AddNotDefinedError(field->full_name(), proto,
7432 DescriptorPool::ErrorCollector::EXTENDEE,
7433 proto.extendee());
7434 return;
7435 } else if (extendee.type() != Symbol::MESSAGE) {
7436 AddError(field->full_name(), proto,
7437 DescriptorPool::ErrorCollector::EXTENDEE, [&] {
7438 return absl::StrCat("\"", proto.extendee(),
7439 "\" is not a message type.");
7440 });
7441 return;
7442 }
7443 field->containing_type_ = extendee.descriptor();
7444
7445 const Descriptor::ExtensionRange* extension_range =
7446 field->containing_type()->FindExtensionRangeContainingNumber(
7447 field->number());
7448
7449 if (extension_range == nullptr) {
7450 AddError(field->full_name(), proto,
7451 DescriptorPool::ErrorCollector::NUMBER, [&] {
7452 return absl::Substitute(
7453 "\"$0\" does not declare $1 as an "
7454 "extension number.",
7455 field->containing_type()->full_name(), field->number());
7456 });
7457 }
7458 }
7459
7460 if (field->containing_oneof() != nullptr) {
7461 if (field->label() != FieldDescriptor::LABEL_OPTIONAL) {
7462 // Note that this error will never happen when parsing .proto files.
7463 // It can only happen if you manually construct a FileDescriptorProto
7464 // that is incorrect.
7465 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7466 "Fields of oneofs must themselves have label LABEL_OPTIONAL.");
7467 }
7468 }
7469
7470 if (proto.has_type_name()) {
7471 // Assume we are expecting a message type unless the proto contains some
7472 // evidence that it expects an enum type. This only makes a difference if
7473 // we end up creating a placeholder.
7474 bool expecting_enum = (proto.type() == FieldDescriptorProto::TYPE_ENUM) ||
7475 proto.has_default_value();
7476
7477 // In case of weak fields we force building the dependency. We need to know
7478 // if the type exist or not. If it doesn't exist we substitute Empty which
7479 // should only be done if the type can't be found in the generated pool.
7480 // TODO Ideally we should query the database directly to check
7481 // if weak fields exist or not so that we don't need to force building
7482 // weak dependencies. However the name lookup rules for symbols are
7483 // somewhat complicated, so I defer it too another CL.
7484 bool is_weak = !pool_->enforce_weak_ && proto.options().weak();
7485 bool is_lazy = pool_->lazily_build_dependencies_ && !is_weak;
7486
7487 Symbol type =
7488 LookupSymbol(proto.type_name(), field->full_name(),
7489 expecting_enum ? DescriptorPool::PLACEHOLDER_ENUM
7490 : DescriptorPool::PLACEHOLDER_MESSAGE,
7491 LOOKUP_TYPES, !is_lazy);
7492
7493 if (type.IsNull()) {
7494 if (is_lazy) {
7495 ABSL_CHECK(field->type_ == FieldDescriptor::TYPE_MESSAGE ||
7496 field->type_ == FieldDescriptor::TYPE_GROUP ||
7497 field->type_ == FieldDescriptor::TYPE_ENUM)
7498 << proto;
7499 // Save the symbol names for later for lookup, and allocate the once
7500 // object needed for the accessors.
7501 const std::string& name = proto.type_name();
7502
7503 int name_sizes = static_cast<int>(name.size() + 1 +
7504 proto.default_value().size() + 1);
7505
7506 field->type_once_ = ::new (tables_->AllocateBytes(
7507 static_cast<int>(sizeof(absl::once_flag)) + name_sizes))
7508 absl::once_flag{};
7509 char* names = reinterpret_cast<char*>(field->type_once_ + 1);
7510
7511 memcpy(names, name.c_str(), name.size() + 1);
7512 memcpy(names + name.size() + 1, proto.default_value().c_str(),
7513 proto.default_value().size() + 1);
7514
7515 // AddFieldByNumber and AddExtension are done later in this function,
7516 // and can/must be done if the field type was not found. The related
7517 // error checking is not necessary when in lazily_build_dependencies_
7518 // mode, and can't be done without building the type's descriptor,
7519 // which we don't want to do.
7520 file_tables_->AddFieldByNumber(field);
7521 if (field->is_extension()) {
7522 tables_->AddExtension(field);
7523 }
7524 return;
7525 } else {
7526 // If the type is a weak type, we change the type to a google.protobuf.Empty
7527 // field.
7528 if (is_weak) {
7529 type = FindSymbol(kNonLinkedWeakMessageReplacementName);
7530 }
7531 if (type.IsNull()) {
7532 AddNotDefinedError(field->full_name(), proto,
7533 DescriptorPool::ErrorCollector::TYPE,
7534 proto.type_name());
7535 return;
7536 }
7537 }
7538 }
7539
7540 if (!proto.has_type()) {
7541 // Choose field type based on symbol.
7542 if (type.type() == Symbol::MESSAGE) {
7543 field->type_ = FieldDescriptor::TYPE_MESSAGE;
7544 } else if (type.type() == Symbol::ENUM) {
7545 field->type_ = FieldDescriptor::TYPE_ENUM;
7546 } else {
7547 AddError(field->full_name(), proto,
7548 DescriptorPool::ErrorCollector::TYPE, [&] {
7549 return absl::StrCat("\"", proto.type_name(),
7550 "\" is not a type.");
7551 });
7552 return;
7553 }
7554 }
7555
7556 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
7557 field->type_descriptor_.message_type = type.descriptor();
7558 if (field->type_descriptor_.message_type == nullptr) {
7559 AddError(field->full_name(), proto,
7560 DescriptorPool::ErrorCollector::TYPE, [&] {
7561 return absl::StrCat("\"", proto.type_name(),
7562 "\" is not a message type.");
7563 });
7564 return;
7565 }
7566
7567 if (field->has_default_value()) {
7568 AddError(field->full_name(), proto,
7569 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
7570 "Messages can't have default values.");
7571 }
7572 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
7573 field->type_descriptor_.enum_type = type.enum_descriptor();
7574 if (field->type_descriptor_.enum_type == nullptr) {
7575 AddError(field->full_name(), proto,
7576 DescriptorPool::ErrorCollector::TYPE, [&] {
7577 return absl::StrCat("\"", proto.type_name(),
7578 "\" is not an enum type.");
7579 });
7580 return;
7581 }
7582
7583 if (field->enum_type()->is_placeholder_) {
7584 // We can't look up default values for placeholder types. We'll have
7585 // to just drop them.
7586 field->has_default_value_ = false;
7587 }
7588
7589 if (field->has_default_value()) {
7590 // Ensure that the default value is an identifier. Parser cannot always
7591 // verify this because it does not have complete type information.
7592 // N.B. that this check yields better error messages but is not
7593 // necessary for correctness (an enum symbol must be a valid identifier
7594 // anyway), only for better errors.
7595 if (!io::Tokenizer::IsIdentifier(proto.default_value())) {
7596 AddError(field->full_name(), proto,
7597 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
7598 "Default value for an enum field must be an identifier.");
7599 } else {
7600 // We can't just use field->enum_type()->FindValueByName() here
7601 // because that locks the pool's mutex, which we have already locked
7602 // at this point.
7603 const EnumValueDescriptor* default_value =
7604 LookupSymbolNoPlaceholder(proto.default_value(),
7605 field->enum_type()->full_name())
7606 .enum_value_descriptor();
7607
7608 if (default_value != nullptr &&
7609 default_value->type() == field->enum_type()) {
7610 field->default_value_enum_ = default_value;
7611 } else {
7612 AddError(field->full_name(), proto,
7613 DescriptorPool::ErrorCollector::DEFAULT_VALUE, [&] {
7614 return absl::StrCat("Enum type \"",
7615 field->enum_type()->full_name(),
7616 "\" has no value named \"",
7617 proto.default_value(), "\".");
7618 });
7619 }
7620 }
7621 } else if (field->enum_type()->value_count() > 0) {
7622 // All enums must have at least one value, or we would have reported
7623 // an error elsewhere. We use the first defined value as the default
7624 // if a default is not explicitly defined.
7625 field->default_value_enum_ = field->enum_type()->value(0);
7626 }
7627 } else {
7628 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7629 "Field with primitive type has type_name.");
7630 }
7631 } else {
7632 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
7633 field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
7634 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7635 "Field with message or enum type missing type_name.");
7636 }
7637 }
7638
7639 // Add the field to the fields-by-number table.
7640 // Note: We have to do this *after* cross-linking because extensions do not
7641 // know their containing type until now. If we're in
7642 // lazily_build_dependencies_ mode, we're guaranteed there's no errors, so no
7643 // risk to calling containing_type() or other accessors that will build
7644 // dependencies.
7645 if (!file_tables_->AddFieldByNumber(field)) {
7646 const FieldDescriptor* conflicting_field = file_tables_->FindFieldByNumber(
7647 field->containing_type(), field->number());
7648 const absl::string_view containing_type_name =
7649 field->containing_type() == nullptr
7650 ? absl::string_view("unknown")
7651 : field->containing_type()->full_name();
7652 if (field->is_extension()) {
7653 AddError(field->full_name(), proto,
7654 DescriptorPool::ErrorCollector::NUMBER, [&] {
7655 return absl::Substitute(
7656 "Extension number $0 has already been used "
7657 "in \"$1\" by extension \"$2\".",
7658 field->number(), containing_type_name,
7659 conflicting_field->full_name());
7660 });
7661 } else {
7662 AddError(field->full_name(), proto,
7663 DescriptorPool::ErrorCollector::NUMBER, [&] {
7664 return absl::Substitute(
7665 "Field number $0 has already been used in "
7666 "\"$1\" by field \"$2\".",
7667 field->number(), containing_type_name,
7668 conflicting_field->name());
7669 });
7670 }
7671 } else {
7672 if (field->is_extension()) {
7673 if (!tables_->AddExtension(field)) {
7674 auto make_error = [&] {
7675 const FieldDescriptor* conflicting_field =
7676 tables_->FindExtension(field->containing_type(), field->number());
7677 const absl::string_view containing_type_name =
7678 field->containing_type() == nullptr
7679 ? absl::string_view("unknown")
7680 : field->containing_type()->full_name();
7681 return absl::Substitute(
7682 "Extension number $0 has already been used in \"$1\" by "
7683 "extension "
7684 "\"$2\" defined in $3.",
7685 field->number(), containing_type_name,
7686 conflicting_field->full_name(),
7687 conflicting_field->file()->name());
7688 };
7689 // Conflicting extension numbers should be an error. However, before
7690 // turning this into an error we need to fix all existing broken
7691 // protos first.
7692 // TODO: Change this to an error.
7693 AddWarning(field->full_name(), proto,
7694 DescriptorPool::ErrorCollector::NUMBER, make_error);
7695 }
7696 }
7697 }
7698 }
7699
CrossLinkService(ServiceDescriptor * service,const ServiceDescriptorProto & proto)7700 void DescriptorBuilder::CrossLinkService(ServiceDescriptor* service,
7701 const ServiceDescriptorProto& proto) {
7702 for (int i = 0; i < service->method_count(); i++) {
7703 CrossLinkMethod(&service->methods_[i], proto.method(i));
7704 }
7705 }
7706
CrossLinkMethod(MethodDescriptor * method,const MethodDescriptorProto & proto)7707 void DescriptorBuilder::CrossLinkMethod(MethodDescriptor* method,
7708 const MethodDescriptorProto& proto) {
7709 Symbol input_type =
7710 LookupSymbol(proto.input_type(), method->full_name(),
7711 DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
7712 !pool_->lazily_build_dependencies_);
7713 if (input_type.IsNull()) {
7714 if (!pool_->lazily_build_dependencies_) {
7715 AddNotDefinedError(method->full_name(), proto,
7716 DescriptorPool::ErrorCollector::INPUT_TYPE,
7717 proto.input_type());
7718 } else {
7719 method->input_type_.SetLazy(proto.input_type(), file_);
7720 }
7721 } else if (input_type.type() != Symbol::MESSAGE) {
7722 AddError(method->full_name(), proto,
7723 DescriptorPool::ErrorCollector::INPUT_TYPE, [&] {
7724 return absl::StrCat("\"", proto.input_type(),
7725 "\" is not a message type.");
7726 });
7727 } else {
7728 method->input_type_.Set(input_type.descriptor());
7729 }
7730
7731 Symbol output_type =
7732 LookupSymbol(proto.output_type(), method->full_name(),
7733 DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
7734 !pool_->lazily_build_dependencies_);
7735 if (output_type.IsNull()) {
7736 if (!pool_->lazily_build_dependencies_) {
7737 AddNotDefinedError(method->full_name(), proto,
7738 DescriptorPool::ErrorCollector::OUTPUT_TYPE,
7739 proto.output_type());
7740 } else {
7741 method->output_type_.SetLazy(proto.output_type(), file_);
7742 }
7743 } else if (output_type.type() != Symbol::MESSAGE) {
7744 AddError(method->full_name(), proto,
7745 DescriptorPool::ErrorCollector::OUTPUT_TYPE, [&] {
7746 return absl::StrCat("\"", proto.output_type(),
7747 "\" is not a message type.");
7748 });
7749 } else {
7750 method->output_type_.Set(output_type.descriptor());
7751 }
7752 }
7753
SuggestFieldNumbers(FileDescriptor * file,const FileDescriptorProto & proto)7754 void DescriptorBuilder::SuggestFieldNumbers(FileDescriptor* file,
7755 const FileDescriptorProto& proto) {
7756 for (int message_index = 0; message_index < file->message_type_count();
7757 message_index++) {
7758 const Descriptor* message = &file->message_types_[message_index];
7759 auto hints_it = message_hints_.find(message);
7760 if (hints_it == message_hints_.end()) continue;
7761 auto* hints = &hints_it->second;
7762 constexpr int kMaxSuggestions = 3;
7763 int fields_to_suggest = std::min(kMaxSuggestions, hints->fields_to_suggest);
7764 if (fields_to_suggest <= 0) continue;
7765 struct Range {
7766 int from;
7767 int to;
7768 };
7769 std::vector<Range> used_ordinals;
7770 auto add_ordinal = [&](int ordinal) {
7771 if (ordinal <= 0 || ordinal > FieldDescriptor::kMaxNumber) return;
7772 if (!used_ordinals.empty() && ordinal == used_ordinals.back().to) {
7773 used_ordinals.back().to = ordinal + 1;
7774 } else {
7775 used_ordinals.push_back({ordinal, ordinal + 1});
7776 }
7777 };
7778 auto add_range = [&](int from, int to) {
7779 from = std::max(0, std::min(FieldDescriptor::kMaxNumber + 1, from));
7780 to = std::max(0, std::min(FieldDescriptor::kMaxNumber + 1, to));
7781 if (from >= to) return;
7782 used_ordinals.push_back({from, to});
7783 };
7784 for (int i = 0; i < message->field_count(); i++) {
7785 add_ordinal(message->field(i)->number());
7786 }
7787 for (int i = 0; i < message->extension_count(); i++) {
7788 add_ordinal(message->extension(i)->number());
7789 }
7790 for (int i = 0; i < message->reserved_range_count(); i++) {
7791 auto range = message->reserved_range(i);
7792 add_range(range->start, range->end);
7793 }
7794 for (int i = 0; i < message->extension_range_count(); i++) {
7795 auto range = message->extension_range(i);
7796 add_range(range->start_number(), range->end_number());
7797 }
7798 used_ordinals.push_back(
7799 {FieldDescriptor::kMaxNumber, FieldDescriptor::kMaxNumber + 1});
7800 used_ordinals.push_back({FieldDescriptor::kFirstReservedNumber,
7801 FieldDescriptor::kLastReservedNumber});
7802 std::sort(used_ordinals.begin(), used_ordinals.end(),
7803 [](Range lhs, Range rhs) {
7804 return std::tie(lhs.from, lhs.to) < std::tie(rhs.from, rhs.to);
7805 });
7806 int current_ordinal = 1;
7807 if (hints->first_reason) {
7808 auto make_error = [&] {
7809 std::stringstream id_list;
7810 id_list << "Suggested field numbers for " << message->full_name()
7811 << ": ";
7812 const char* separator = "";
7813 for (auto& current_range : used_ordinals) {
7814 while (current_ordinal < current_range.from &&
7815 fields_to_suggest > 0) {
7816 id_list << separator << current_ordinal++;
7817 separator = ", ";
7818 fields_to_suggest--;
7819 }
7820 if (fields_to_suggest == 0) break;
7821 current_ordinal = std::max(current_ordinal, current_range.to);
7822 }
7823 return id_list.str();
7824 };
7825 AddError(message->full_name(), *hints->first_reason,
7826 hints->first_reason_location, make_error);
7827 }
7828 }
7829 }
7830
7831 // -------------------------------------------------------------------
7832
7833 // Determine if the file uses optimize_for = LITE_RUNTIME, being careful to
7834 // avoid problems that exist at init time.
IsLite(const FileDescriptor * file)7835 static bool IsLite(const FileDescriptor* file) {
7836 // TODO: I don't even remember how many of these conditions are
7837 // actually possible. I'm just being super-safe.
7838 return file != nullptr &&
7839 &file->options() != &FileOptions::default_instance() &&
7840 file->options().optimize_for() == FileOptions::LITE_RUNTIME;
7841 }
7842
ValidateOptions(const FileDescriptor * file,const FileDescriptorProto & proto)7843 void DescriptorBuilder::ValidateOptions(const FileDescriptor* file,
7844 const FileDescriptorProto& proto) {
7845 ValidateFileFeatures(file, proto);
7846
7847 // Lite files can only be imported by other Lite files.
7848 if (!IsLite(file)) {
7849 for (int i = 0; i < file->dependency_count(); i++) {
7850 if (IsLite(file->dependency(i))) {
7851 AddError(file->dependency(i)->name(), proto,
7852 DescriptorPool::ErrorCollector::IMPORT, [&] {
7853 return absl::StrCat(
7854 "Files that do not use optimize_for = LITE_RUNTIME "
7855 "cannot import files which do use this option. This "
7856 "file is not lite, but it imports \"",
7857 file->dependency(i)->name(), "\" which is.");
7858 });
7859 break;
7860 }
7861 }
7862 }
7863 if (file->edition() == Edition::EDITION_PROTO3) {
7864 ValidateProto3(file, proto);
7865 }
7866 }
7867
ValidateProto3(const FileDescriptor * file,const FileDescriptorProto & proto)7868 void DescriptorBuilder::ValidateProto3(const FileDescriptor* file,
7869 const FileDescriptorProto& proto) {
7870 for (int i = 0; i < file->extension_count(); ++i) {
7871 ValidateProto3Field(file->extensions_ + i, proto.extension(i));
7872 }
7873 for (int i = 0; i < file->message_type_count(); ++i) {
7874 ValidateProto3Message(file->message_types_ + i, proto.message_type(i));
7875 }
7876 }
7877
ValidateProto3Message(const Descriptor * message,const DescriptorProto & proto)7878 void DescriptorBuilder::ValidateProto3Message(const Descriptor* message,
7879 const DescriptorProto& proto) {
7880 for (int i = 0; i < message->nested_type_count(); ++i) {
7881 ValidateProto3Message(message->nested_types_ + i, proto.nested_type(i));
7882 }
7883 for (int i = 0; i < message->field_count(); ++i) {
7884 ValidateProto3Field(message->fields_ + i, proto.field(i));
7885 }
7886 for (int i = 0; i < message->extension_count(); ++i) {
7887 ValidateProto3Field(message->extensions_ + i, proto.extension(i));
7888 }
7889 if (message->extension_range_count() > 0) {
7890 AddError(message->full_name(), proto.extension_range(0),
7891 DescriptorPool::ErrorCollector::NUMBER,
7892 "Extension ranges are not allowed in proto3.");
7893 }
7894 if (message->options().message_set_wire_format()) {
7895 // Using MessageSet doesn't make sense since we disallow extensions.
7896 AddError(message->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7897 "MessageSet is not supported in proto3.");
7898 }
7899 }
7900
ValidateProto3Field(const FieldDescriptor * field,const FieldDescriptorProto & proto)7901 void DescriptorBuilder::ValidateProto3Field(const FieldDescriptor* field,
7902 const FieldDescriptorProto& proto) {
7903 if (field->is_extension() &&
7904 !AllowedExtendeeInProto3(field->containing_type()->full_name())) {
7905 AddError(field->full_name(), proto,
7906 DescriptorPool::ErrorCollector::EXTENDEE,
7907 "Extensions in proto3 are only allowed for defining options.");
7908 }
7909 if (field->is_required()) {
7910 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7911 "Required fields are not allowed in proto3.");
7912 }
7913 if (field->has_default_value()) {
7914 AddError(field->full_name(), proto,
7915 DescriptorPool::ErrorCollector::DEFAULT_VALUE,
7916 "Explicit default values are not allowed in proto3.");
7917 }
7918 if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
7919 field->enum_type() && field->enum_type()->is_closed()) {
7920 // Proto3 messages can only use open enum types; otherwise we can't
7921 // guarantee that the default value is zero.
7922 AddError(
7923 field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE, [&] {
7924 return absl::StrCat("Enum type \"", field->enum_type()->full_name(),
7925 "\" is not an open enum, but is used in \"",
7926 field->containing_type()->full_name(),
7927 "\" which is a proto3 message type.");
7928 });
7929 }
7930 if (field->type() == FieldDescriptor::TYPE_GROUP) {
7931 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7932 "Groups are not supported in proto3 syntax.");
7933 }
7934 }
7935
ValidateOptions(const Descriptor * message,const DescriptorProto & proto)7936 void DescriptorBuilder::ValidateOptions(const Descriptor* message,
7937 const DescriptorProto& proto) {
7938 CheckFieldJsonNameUniqueness(proto, message);
7939 ValidateExtensionRangeOptions(proto, *message);
7940 }
7941
ValidateOptions(const OneofDescriptor *,const OneofDescriptorProto &)7942 void DescriptorBuilder::ValidateOptions(const OneofDescriptor* /*oneof*/,
7943 const OneofDescriptorProto& /*proto*/) {
7944 }
7945
7946
ValidateOptions(const FieldDescriptor * field,const FieldDescriptorProto & proto)7947 void DescriptorBuilder::ValidateOptions(const FieldDescriptor* field,
7948 const FieldDescriptorProto& proto) {
7949 if (pool_->lazily_build_dependencies_ && (!field || !field->message_type())) {
7950 return;
7951 }
7952
7953 ValidateFieldFeatures(field, proto);
7954
7955 // Only message type fields may be lazy.
7956 if (field->options().lazy() || field->options().unverified_lazy()) {
7957 if (field->type() != FieldDescriptor::TYPE_MESSAGE) {
7958 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7959 "[lazy = true] can only be specified for submessage fields.");
7960 }
7961 }
7962
7963 // Only repeated primitive fields may be packed.
7964 if (field->options().packed() && !field->is_packable()) {
7965 AddError(
7966 field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7967 "[packed = true] can only be specified for repeated primitive fields.");
7968 }
7969
7970 // Note: Default instance may not yet be initialized here, so we have to
7971 // avoid reading from it.
7972 if (field->containing_type_ != nullptr &&
7973 &field->containing_type()->options() !=
7974 &MessageOptions::default_instance() &&
7975 field->containing_type()->options().message_set_wire_format()) {
7976 if (field->is_extension()) {
7977 if (!field->is_optional() ||
7978 field->type() != FieldDescriptor::TYPE_MESSAGE) {
7979 AddError(field->full_name(), proto,
7980 DescriptorPool::ErrorCollector::TYPE,
7981 "Extensions of MessageSets must be optional messages.");
7982 }
7983 } else {
7984 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7985 "MessageSets cannot have fields, only extensions.");
7986 }
7987 }
7988
7989 // Lite extensions can only be of Lite types.
7990 if (IsLite(field->file()) && field->containing_type_ != nullptr &&
7991 !IsLite(field->containing_type()->file())) {
7992 AddError(field->full_name(), proto,
7993 DescriptorPool::ErrorCollector::EXTENDEE,
7994 "Extensions to non-lite types can only be declared in non-lite "
7995 "files. Note that you cannot extend a non-lite type to contain "
7996 "a lite type, but the reverse is allowed.");
7997 }
7998
7999 // Validate map types.
8000 if (field->is_map()) {
8001 if (!ValidateMapEntry(field, proto)) {
8002 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8003 "map_entry should not be set explicitly. Use map<KeyType, "
8004 "ValueType> instead.");
8005 }
8006 }
8007
8008 ValidateJSType(field, proto);
8009
8010 // json_name option is not allowed on extension fields. Note that the
8011 // json_name field in FieldDescriptorProto is always populated by protoc
8012 // when it sends descriptor data to plugins (calculated from field name if
8013 // the option is not explicitly set) so we can't rely on its presence to
8014 // determine whether the json_name option is set on the field. Here we
8015 // compare it against the default calculated json_name value and consider
8016 // the option set if they are different. This won't catch the case when
8017 // a user explicitly sets json_name to the default value, but should be
8018 // good enough to catch common misuses.
8019 if (field->is_extension() &&
8020 (field->has_json_name() &&
8021 field->json_name() != ToJsonName(field->name()))) {
8022 AddError(field->full_name(), proto,
8023 DescriptorPool::ErrorCollector::OPTION_NAME,
8024 "option json_name is not allowed on extension fields.");
8025 }
8026
8027 if (absl::StrContains(field->json_name(), '\0')) {
8028 AddError(field->full_name(), proto,
8029 DescriptorPool::ErrorCollector::OPTION_NAME,
8030 "json_name cannot have embedded null characters.");
8031 }
8032
8033
8034 // If this is a declared extension, validate that the actual name and type
8035 // match the declaration.
8036 if (field->is_extension()) {
8037 if (pool_->IsReadyForCheckingDescriptorExtDecl(
8038 field->containing_type()->full_name())) {
8039 return;
8040 }
8041 const Descriptor::ExtensionRange* extension_range =
8042 field->containing_type()->FindExtensionRangeContainingNumber(
8043 field->number());
8044
8045 if (extension_range->options_ == nullptr) {
8046 return;
8047 }
8048
8049 if (pool_->enforce_extension_declarations_) {
8050 for (const auto& declaration : extension_range->options_->declaration()) {
8051 if (declaration.number() != field->number()) continue;
8052 if (declaration.reserved()) {
8053 AddError(
8054 field->full_name(), proto,
8055 DescriptorPool::ErrorCollector::EXTENDEE, [&] {
8056 return absl::Substitute(
8057 "Cannot use number $0 for extension field $1, as it is "
8058 "reserved in the extension declarations for message $2.",
8059 field->number(), field->full_name(),
8060 field->containing_type()->full_name());
8061 });
8062 return;
8063 }
8064 CheckExtensionDeclaration(*field, proto, declaration.full_name(),
8065 declaration.type(), declaration.repeated());
8066 return;
8067 }
8068
8069 // Either no declarations, or there are but no matches. If there are no
8070 // declarations, we check its verification state. If there are other
8071 // non-matching declarations, we enforce that this extension must also be
8072 // declared.
8073 if (!extension_range->options_->declaration().empty() ||
8074 (extension_range->options_->verification() ==
8075 ExtensionRangeOptions::DECLARATION)) {
8076 AddError(
8077 field->full_name(), proto, DescriptorPool::ErrorCollector::EXTENDEE,
8078 [&] {
8079 return absl::Substitute(
8080 "Missing extension declaration for field $0 with number $1 "
8081 "in extendee message $2. An extension range must declare for "
8082 "all extension fields if its verification state is "
8083 "DECLARATION or there's any declaration in the range "
8084 "already. Otherwise, consider splitting up the range.",
8085 field->full_name(), field->number(),
8086 field->containing_type()->full_name());
8087 });
8088 return;
8089 }
8090 }
8091 }
8092 }
8093
IsStringMapType(const FieldDescriptor & field)8094 static bool IsStringMapType(const FieldDescriptor& field) {
8095 if (!field.is_map()) return false;
8096 for (int i = 0; i < field.message_type()->field_count(); ++i) {
8097 if (field.message_type()->field(i)->type() ==
8098 FieldDescriptor::TYPE_STRING) {
8099 return true;
8100 }
8101 }
8102 return false;
8103 }
8104
ValidateFileFeatures(const FileDescriptor * file,const FileDescriptorProto & proto)8105 void DescriptorBuilder::ValidateFileFeatures(const FileDescriptor* file,
8106 const FileDescriptorProto& proto) {
8107 // Rely on our legacy validation for proto2/proto3 files.
8108 if (IsLegacyEdition(file->edition())) {
8109 return;
8110 }
8111
8112 if (file->features().field_presence() == FeatureSet::LEGACY_REQUIRED) {
8113 AddError(file->name(), proto, DescriptorPool::ErrorCollector::EDITIONS,
8114 "Required presence can't be specified by default.");
8115 }
8116 if (file->options().java_string_check_utf8()) {
8117 AddError(
8118 file->name(), proto, DescriptorPool::ErrorCollector::EDITIONS,
8119 "File option java_string_check_utf8 is not allowed under editions. Use "
8120 "the (pb.java).utf8_validation feature to control this behavior.");
8121 }
8122 }
8123
ValidateFieldFeatures(const FieldDescriptor * field,const FieldDescriptorProto & proto)8124 void DescriptorBuilder::ValidateFieldFeatures(
8125 const FieldDescriptor* field, const FieldDescriptorProto& proto) {
8126 // Rely on our legacy validation for proto2/proto3 files.
8127 if (field->file()->edition() < Edition::EDITION_2023) {
8128 return;
8129 }
8130
8131 // Double check proto descriptors in editions. These would usually be caught
8132 // by the parser, but may not be for dynamically built descriptors.
8133 if (proto.label() == FieldDescriptorProto::LABEL_REQUIRED) {
8134 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8135 "Required label is not allowed under editions. Use the feature "
8136 "field_presence = LEGACY_REQUIRED to control this behavior.");
8137 }
8138 if (proto.type() == FieldDescriptorProto::TYPE_GROUP) {
8139 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8140 "Group types are not allowed under editions. Use the feature "
8141 "message_encoding = DELIMITED to control this behavior.");
8142 }
8143
8144 auto& field_options = field->options();
8145 // Validate legacy options that have been migrated to features.
8146 if (field_options.has_packed()) {
8147 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8148 "Field option packed is not allowed under editions. Use the "
8149 "repeated_field_encoding feature to control this behavior.");
8150 }
8151
8152 // Validate fully resolved features.
8153 if (!field->is_repeated() && !field->has_presence()) {
8154 if (field->has_default_value()) {
8155 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8156 "Implicit presence fields can't specify defaults.");
8157 }
8158 if (field->enum_type() != nullptr &&
8159 field->enum_type()->features().enum_type() != FeatureSet::OPEN) {
8160 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8161 "Implicit presence enum fields must always be open.");
8162 }
8163 }
8164 if (field->is_extension() &&
8165 field->features().field_presence() == FeatureSet::LEGACY_REQUIRED) {
8166 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8167 "Extensions can't be required.");
8168 }
8169
8170 if (field->containing_type() != nullptr &&
8171 field->containing_type()->options().map_entry()) {
8172 // Skip validation of explicit features on generated map fields. These will
8173 // be blindly propagated from the original map field, and may violate a lot
8174 // of these conditions. Note: we do still validate the user-specified map
8175 // field.
8176 return;
8177 }
8178
8179 // Validate explicitly specified features on the field proto.
8180 if (field->proto_features_->has_field_presence()) {
8181 if (field->containing_oneof() != nullptr) {
8182 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8183 "Oneof fields can't specify field presence.");
8184 } else if (field->is_repeated()) {
8185 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8186 "Repeated fields can't specify field presence.");
8187 } else if (field->is_extension() &&
8188 field->proto_features_->field_presence() !=
8189 FeatureSet::LEGACY_REQUIRED) {
8190 // Note: required extensions will fail elsewhere, so we skip reporting a
8191 // second error here.
8192 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8193 "Extensions can't specify field presence.");
8194 } else if (field->message_type() != nullptr &&
8195 field->proto_features_->field_presence() ==
8196 FeatureSet::IMPLICIT) {
8197 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8198 "Message fields can't specify implicit presence.");
8199 }
8200 }
8201 if (!field->is_repeated() &&
8202 field->proto_features_->has_repeated_field_encoding()) {
8203 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8204 "Only repeated fields can specify repeated field encoding.");
8205 }
8206 if (field->type() != FieldDescriptor::TYPE_STRING &&
8207 !IsStringMapType(*field) &&
8208 field->proto_features_->has_utf8_validation()) {
8209 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8210 "Only string fields can specify utf8 validation.");
8211 }
8212 if (!field->is_packable() &&
8213 field->proto_features_->repeated_field_encoding() == FeatureSet::PACKED) {
8214 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8215 "Only repeated primitive fields can specify PACKED repeated field "
8216 "encoding.");
8217 }
8218 if ((field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE ||
8219 field->is_map_message_type()) &&
8220 field->proto_features_->has_message_encoding()) {
8221 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8222 "Only message fields can specify message encoding.");
8223 }
8224 }
8225
ValidateOptions(const EnumDescriptor * enm,const EnumDescriptorProto & proto)8226 void DescriptorBuilder::ValidateOptions(const EnumDescriptor* enm,
8227 const EnumDescriptorProto& proto) {
8228 CheckEnumValueUniqueness(proto, enm);
8229
8230 if (!enm->is_closed() && enm->value_count() > 0 &&
8231 enm->value(0)->number() != 0) {
8232 AddError(enm->full_name(), proto.value(0),
8233 DescriptorPool::ErrorCollector::NUMBER,
8234 "The first enum value must be zero for open enums.");
8235 }
8236
8237 if (!enm->options().has_allow_alias() || !enm->options().allow_alias()) {
8238 absl::flat_hash_map<int, std::string> used_values;
8239 for (int i = 0; i < enm->value_count(); ++i) {
8240 const EnumValueDescriptor* enum_value = enm->value(i);
8241 auto insert_result =
8242 used_values.emplace(enum_value->number(), enum_value->full_name());
8243 bool inserted = insert_result.second;
8244 if (!inserted) {
8245 if (!enm->options().allow_alias()) {
8246 // Generate error if duplicated enum values are explicitly disallowed.
8247 auto make_error = [&] {
8248 // Find the next free number.
8249 absl::flat_hash_set<int64_t> used;
8250 for (int j = 0; j < enm->value_count(); ++j) {
8251 used.insert(enm->value(j)->number());
8252 }
8253 int64_t next_value = static_cast<int64_t>(enum_value->number()) + 1;
8254 while (used.contains(next_value)) ++next_value;
8255
8256 std::string error = absl::StrCat(
8257 "\"", enum_value->full_name(),
8258 "\" uses the same enum value as \"",
8259 insert_result.first->second,
8260 "\". If this is intended, set "
8261 "'option allow_alias = true;' to the enum definition.");
8262 if (next_value < std::numeric_limits<int32_t>::max()) {
8263 absl::StrAppend(&error, " The next available enum value is ",
8264 next_value, ".");
8265 }
8266 return error;
8267 };
8268 AddError(enm->full_name(), proto.value(i),
8269 DescriptorPool::ErrorCollector::NUMBER, make_error);
8270 }
8271 }
8272 }
8273 }
8274 }
8275
ValidateOptions(const EnumValueDescriptor *,const EnumValueDescriptorProto &)8276 void DescriptorBuilder::ValidateOptions(
8277 const EnumValueDescriptor* /* enum_value */,
8278 const EnumValueDescriptorProto& /* proto */) {
8279 // Nothing to do so far.
8280 }
8281
8282 namespace {
8283 // Validates that a fully-qualified symbol for extension declaration must
8284 // have a leading dot and valid identifiers.
ValidateSymbolForDeclaration(absl::string_view symbol)8285 absl::optional<std::string> ValidateSymbolForDeclaration(
8286 absl::string_view symbol) {
8287 if (!absl::StartsWith(symbol, ".")) {
8288 return absl::StrCat("\"", symbol,
8289 "\" must have a leading dot to indicate the "
8290 "fully-qualified scope.");
8291 }
8292 if (!ValidateQualifiedName(symbol)) {
8293 return absl::StrCat("\"", symbol, "\" contains invalid identifiers.");
8294 }
8295 return absl::nullopt;
8296 }
8297 } // namespace
8298
8299
ValidateExtensionDeclaration(const absl::string_view full_name,const RepeatedPtrField<ExtensionRangeOptions_Declaration> & declarations,const DescriptorProto_ExtensionRange & proto,absl::flat_hash_set<absl::string_view> & full_name_set)8300 void DescriptorBuilder::ValidateExtensionDeclaration(
8301 const absl::string_view full_name,
8302 const RepeatedPtrField<ExtensionRangeOptions_Declaration>& declarations,
8303 const DescriptorProto_ExtensionRange& proto,
8304 absl::flat_hash_set<absl::string_view>& full_name_set) {
8305 absl::flat_hash_set<int> extension_number_set;
8306 for (const auto& declaration : declarations) {
8307 if (declaration.number() < proto.start() ||
8308 declaration.number() >= proto.end()) {
8309 AddError(full_name, proto, DescriptorPool::ErrorCollector::NUMBER, [&] {
8310 return absl::Substitute(
8311 "Extension declaration number $0 is not in the "
8312 "extension range.",
8313 declaration.number());
8314 });
8315 }
8316
8317 if (!extension_number_set.insert(declaration.number()).second) {
8318 AddError(full_name, proto, DescriptorPool::ErrorCollector::NUMBER, [&] {
8319 return absl::Substitute(
8320 "Extension declaration number $0 is declared multiple times.",
8321 declaration.number());
8322 });
8323 }
8324
8325 // Both full_name and type should be present. If none of them is set,
8326 // add an error unless reserved is set to true. If only one of them is set,
8327 // add an error whether or not reserved is set to true.
8328 if (!declaration.has_full_name() || !declaration.has_type()) {
8329 if (declaration.has_full_name() != declaration.has_type() ||
8330 !declaration.reserved()) {
8331 AddError(full_name, proto, DescriptorPool::ErrorCollector::EXTENDEE,
8332 [&] {
8333 return absl::StrCat(
8334 "Extension declaration #", declaration.number(),
8335 " should have both \"full_name\" and \"type\" set.");
8336 });
8337 }
8338 } else {
8339 if (!full_name_set.insert(declaration.full_name()).second) {
8340 AddError(
8341 declaration.full_name(), proto,
8342 DescriptorPool::ErrorCollector::NAME, [&] {
8343 return absl::Substitute(
8344 "Extension field name \"$0\" is declared multiple times.",
8345 declaration.full_name());
8346 });
8347 return;
8348 }
8349 absl::optional<std::string> err =
8350 ValidateSymbolForDeclaration(declaration.full_name());
8351 if (err.has_value()) {
8352 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
8353 [err] { return *err; });
8354 }
8355 if (!IsNonMessageType(declaration.type())) {
8356 err = ValidateSymbolForDeclaration(declaration.type());
8357 if (err.has_value()) {
8358 AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
8359 [err] { return *err; });
8360 }
8361 }
8362 }
8363 }
8364 }
8365
ValidateExtensionRangeOptions(const DescriptorProto & proto,const Descriptor & message)8366 void DescriptorBuilder::ValidateExtensionRangeOptions(
8367 const DescriptorProto& proto, const Descriptor& message) {
8368 const int64_t max_extension_range =
8369 static_cast<int64_t>(message.options().message_set_wire_format()
8370 ? std::numeric_limits<int32_t>::max()
8371 : FieldDescriptor::kMaxNumber);
8372
8373 size_t num_declarations = 0;
8374 for (int i = 0; i < message.extension_range_count(); i++) {
8375 if (message.extension_range(i)->options_ == nullptr) continue;
8376 num_declarations +=
8377 message.extension_range(i)->options_->declaration_size();
8378 }
8379
8380 // Contains the full names from both "declaration" and "metadata".
8381 absl::flat_hash_set<absl::string_view> declaration_full_name_set;
8382 declaration_full_name_set.reserve(num_declarations);
8383
8384 for (int i = 0; i < message.extension_range_count(); i++) {
8385 const auto& range = *message.extension_range(i);
8386 if (range.end_number() > max_extension_range + 1) {
8387 AddError(message.full_name(), proto,
8388 DescriptorPool::ErrorCollector::NUMBER, [&] {
8389 return absl::Substitute(
8390 "Extension numbers cannot be greater than $0.",
8391 max_extension_range);
8392 });
8393 }
8394 const auto& range_options = *range.options_;
8395
8396
8397 if (!range_options.declaration().empty()) {
8398 // TODO: remove the "has_verification" check once the default
8399 // is flipped to DECLARATION.
8400 if (range_options.has_verification() &&
8401 range_options.verification() == ExtensionRangeOptions::UNVERIFIED) {
8402 AddError(message.full_name(), proto.extension_range(i),
8403 DescriptorPool::ErrorCollector::EXTENDEE, [&] {
8404 return "Cannot mark the extension range as UNVERIFIED when "
8405 "it has extension(s) declared.";
8406 });
8407 return;
8408 }
8409 ValidateExtensionDeclaration(
8410 message.full_name(), range_options.declaration(),
8411 proto.extension_range(i), declaration_full_name_set);
8412 }
8413 }
8414 }
8415
ValidateOptions(const ServiceDescriptor * service,const ServiceDescriptorProto & proto)8416 void DescriptorBuilder::ValidateOptions(const ServiceDescriptor* service,
8417 const ServiceDescriptorProto& proto) {
8418 if (IsLite(service->file()) &&
8419 (service->file()->options().cc_generic_services() ||
8420 service->file()->options().java_generic_services())) {
8421 AddError(service->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8422 "Files with optimize_for = LITE_RUNTIME cannot define services "
8423 "unless you set both options cc_generic_services and "
8424 "java_generic_services to false.");
8425 }
8426 }
8427
ValidateOptions(const MethodDescriptor *,const MethodDescriptorProto &)8428 void DescriptorBuilder::ValidateOptions(
8429 const MethodDescriptor* /* method */,
8430 const MethodDescriptorProto& /* proto */) {
8431 // Nothing to do so far.
8432 }
8433
ValidateMapEntry(const FieldDescriptor * field,const FieldDescriptorProto & proto)8434 bool DescriptorBuilder::ValidateMapEntry(const FieldDescriptor* field,
8435 const FieldDescriptorProto& proto) {
8436 const Descriptor* message = field->message_type();
8437 if ( // Must not contain extensions, extension range or nested message or
8438 // enums
8439 message->extension_count() != 0 ||
8440 field->label() != FieldDescriptor::LABEL_REPEATED ||
8441 message->extension_range_count() != 0 ||
8442 message->nested_type_count() != 0 || message->enum_type_count() != 0 ||
8443 // Must contain exactly two fields
8444 message->field_count() != 2 ||
8445 // Field name and message name must match
8446 message->name() !=
8447 absl::StrCat(ToCamelCase(field->name(), false), "Entry") ||
8448 // Entry message must be in the same containing type of the field.
8449 field->containing_type() != message->containing_type()) {
8450 return false;
8451 }
8452
8453 const FieldDescriptor* key = message->map_key();
8454 const FieldDescriptor* value = message->map_value();
8455 if (key->label() != FieldDescriptor::LABEL_OPTIONAL || key->number() != 1 ||
8456 key->name() != "key") {
8457 return false;
8458 }
8459 if (value->label() != FieldDescriptor::LABEL_OPTIONAL ||
8460 value->number() != 2 || value->name() != "value") {
8461 return false;
8462 }
8463
8464 // Check key types are legal.
8465 switch (key->type()) {
8466 case FieldDescriptor::TYPE_ENUM:
8467 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8468 "Key in map fields cannot be enum types.");
8469 break;
8470 case FieldDescriptor::TYPE_FLOAT:
8471 case FieldDescriptor::TYPE_DOUBLE:
8472 case FieldDescriptor::TYPE_MESSAGE:
8473 case FieldDescriptor::TYPE_GROUP:
8474 case FieldDescriptor::TYPE_BYTES:
8475 AddError(
8476 field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8477 "Key in map fields cannot be float/double, bytes or message types.");
8478 break;
8479 case FieldDescriptor::TYPE_BOOL:
8480 case FieldDescriptor::TYPE_INT32:
8481 case FieldDescriptor::TYPE_INT64:
8482 case FieldDescriptor::TYPE_SINT32:
8483 case FieldDescriptor::TYPE_SINT64:
8484 case FieldDescriptor::TYPE_STRING:
8485 case FieldDescriptor::TYPE_UINT32:
8486 case FieldDescriptor::TYPE_UINT64:
8487 case FieldDescriptor::TYPE_FIXED32:
8488 case FieldDescriptor::TYPE_FIXED64:
8489 case FieldDescriptor::TYPE_SFIXED32:
8490 case FieldDescriptor::TYPE_SFIXED64:
8491 // Legal cases
8492 break;
8493 // Do not add a default, so that the compiler will complain when new types
8494 // are added.
8495 }
8496
8497 if (value->type() == FieldDescriptor::TYPE_ENUM) {
8498 if (value->enum_type()->value(0)->number() != 0) {
8499 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8500 "Enum value in map must define 0 as the first value.");
8501 }
8502 }
8503
8504 return true;
8505 }
8506
DetectMapConflicts(const Descriptor * message,const DescriptorProto & proto)8507 void DescriptorBuilder::DetectMapConflicts(const Descriptor* message,
8508 const DescriptorProto& proto) {
8509 DescriptorsByNameSet<Descriptor> seen_types;
8510 for (int i = 0; i < message->nested_type_count(); ++i) {
8511 const Descriptor* nested = message->nested_type(i);
8512 auto insert_result = seen_types.insert(nested);
8513 bool inserted = insert_result.second;
8514 if (!inserted) {
8515 if ((*insert_result.first)->options().map_entry() ||
8516 nested->options().map_entry()) {
8517 AddError(message->full_name(), proto,
8518 DescriptorPool::ErrorCollector::NAME, [&] {
8519 return absl::StrCat(
8520 "Expanded map entry type ", nested->name(),
8521 " conflicts with an existing nested message type.");
8522 });
8523 break;
8524 }
8525 }
8526 // Recursively test on the nested types.
8527 DetectMapConflicts(message->nested_type(i), proto.nested_type(i));
8528 }
8529 // Check for conflicted field names.
8530 for (int i = 0; i < message->field_count(); ++i) {
8531 const FieldDescriptor* field = message->field(i);
8532 auto iter = seen_types.find(field->name());
8533 if (iter != seen_types.end() && (*iter)->options().map_entry()) {
8534 AddError(message->full_name(), proto,
8535 DescriptorPool::ErrorCollector::NAME, [&] {
8536 return absl::StrCat("Expanded map entry type ",
8537 (*iter)->name(),
8538 " conflicts with an existing field.");
8539 });
8540 }
8541 }
8542 // Check for conflicted enum names.
8543 for (int i = 0; i < message->enum_type_count(); ++i) {
8544 const EnumDescriptor* enum_desc = message->enum_type(i);
8545 auto iter = seen_types.find(enum_desc->name());
8546 if (iter != seen_types.end() && (*iter)->options().map_entry()) {
8547 AddError(message->full_name(), proto,
8548 DescriptorPool::ErrorCollector::NAME, [&] {
8549 return absl::StrCat("Expanded map entry type ",
8550 (*iter)->name(),
8551 " conflicts with an existing enum type.");
8552 });
8553 }
8554 }
8555 // Check for conflicted oneof names.
8556 for (int i = 0; i < message->oneof_decl_count(); ++i) {
8557 const OneofDescriptor* oneof_desc = message->oneof_decl(i);
8558 auto iter = seen_types.find(oneof_desc->name());
8559 if (iter != seen_types.end() && (*iter)->options().map_entry()) {
8560 AddError(message->full_name(), proto,
8561 DescriptorPool::ErrorCollector::NAME, [&] {
8562 return absl::StrCat("Expanded map entry type ",
8563 (*iter)->name(),
8564 " conflicts with an existing oneof type.");
8565 });
8566 }
8567 }
8568 }
8569
ValidateJSType(const FieldDescriptor * field,const FieldDescriptorProto & proto)8570 void DescriptorBuilder::ValidateJSType(const FieldDescriptor* field,
8571 const FieldDescriptorProto& proto) {
8572 FieldOptions::JSType jstype = field->options().jstype();
8573 // The default is always acceptable.
8574 if (jstype == FieldOptions::JS_NORMAL) {
8575 return;
8576 }
8577
8578 switch (field->type()) {
8579 // Integral 64-bit types may be represented as JavaScript numbers or
8580 // strings.
8581 case FieldDescriptor::TYPE_UINT64:
8582 case FieldDescriptor::TYPE_INT64:
8583 case FieldDescriptor::TYPE_SINT64:
8584 case FieldDescriptor::TYPE_FIXED64:
8585 case FieldDescriptor::TYPE_SFIXED64:
8586 if (jstype == FieldOptions::JS_STRING ||
8587 jstype == FieldOptions::JS_NUMBER) {
8588 return;
8589 }
8590 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8591 [&] {
8592 return absl::StrCat(
8593 "Illegal jstype for int64, uint64, sint64, fixed64 "
8594 "or sfixed64 field: ",
8595 FieldOptions_JSType_descriptor()->value(jstype)->name());
8596 });
8597 break;
8598
8599 // No other types permit a jstype option.
8600 default:
8601 AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8602 "jstype is only allowed on int64, uint64, sint64, fixed64 "
8603 "or sfixed64 fields.");
8604 break;
8605 }
8606 }
8607
8608 // -------------------------------------------------------------------
8609
OptionInterpreter(DescriptorBuilder * builder)8610 DescriptorBuilder::OptionInterpreter::OptionInterpreter(
8611 DescriptorBuilder* builder)
8612 : builder_(builder) {
8613 ABSL_CHECK(builder_);
8614 }
8615
8616 DescriptorBuilder::OptionInterpreter::~OptionInterpreter() = default;
8617
InterpretOptionExtensions(OptionsToInterpret * options_to_interpret)8618 bool DescriptorBuilder::OptionInterpreter::InterpretOptionExtensions(
8619 OptionsToInterpret* options_to_interpret) {
8620 return InterpretOptionsImpl(options_to_interpret, /*skip_extensions=*/false);
8621 }
InterpretNonExtensionOptions(OptionsToInterpret * options_to_interpret)8622 bool DescriptorBuilder::OptionInterpreter::InterpretNonExtensionOptions(
8623 OptionsToInterpret* options_to_interpret) {
8624 return InterpretOptionsImpl(options_to_interpret, /*skip_extensions=*/true);
8625 }
InterpretOptionsImpl(OptionsToInterpret * options_to_interpret,bool skip_extensions)8626 bool DescriptorBuilder::OptionInterpreter::InterpretOptionsImpl(
8627 OptionsToInterpret* options_to_interpret, bool skip_extensions) {
8628 // Note that these may be in different pools, so we can't use the same
8629 // descriptor and reflection objects on both.
8630 Message* options = options_to_interpret->options;
8631 const Message* original_options = options_to_interpret->original_options;
8632
8633 bool failed = false;
8634 options_to_interpret_ = options_to_interpret;
8635
8636 // Find the uninterpreted_option field in the mutable copy of the options
8637 // and clear them, since we're about to interpret them.
8638 const FieldDescriptor* uninterpreted_options_field =
8639 options->GetDescriptor()->FindFieldByName("uninterpreted_option");
8640 ABSL_CHECK(uninterpreted_options_field != nullptr)
8641 << "No field named \"uninterpreted_option\" in the Options proto.";
8642 options->GetReflection()->ClearField(options, uninterpreted_options_field);
8643
8644 std::vector<int> src_path = options_to_interpret->element_path;
8645 src_path.push_back(uninterpreted_options_field->number());
8646
8647 // Find the uninterpreted_option field in the original options.
8648 const FieldDescriptor* original_uninterpreted_options_field =
8649 original_options->GetDescriptor()->FindFieldByName(
8650 "uninterpreted_option");
8651 ABSL_CHECK(original_uninterpreted_options_field != nullptr)
8652 << "No field named \"uninterpreted_option\" in the Options proto.";
8653
8654 const int num_uninterpreted_options =
8655 original_options->GetReflection()->FieldSize(
8656 *original_options, original_uninterpreted_options_field);
8657 for (int i = 0; i < num_uninterpreted_options; ++i) {
8658 src_path.push_back(i);
8659 uninterpreted_option_ = DownCastMessage<UninterpretedOption>(
8660 &original_options->GetReflection()->GetRepeatedMessage(
8661 *original_options, original_uninterpreted_options_field, i));
8662 if (!InterpretSingleOption(options, src_path,
8663 options_to_interpret->element_path,
8664 skip_extensions)) {
8665 // Error already added by InterpretSingleOption().
8666 failed = true;
8667 break;
8668 }
8669 src_path.pop_back();
8670 }
8671 // Reset these, so we don't have any dangling pointers.
8672 uninterpreted_option_ = nullptr;
8673 options_to_interpret_ = nullptr;
8674
8675 if (!failed) {
8676 // InterpretSingleOption() added the interpreted options in the
8677 // UnknownFieldSet, in case the option isn't yet known to us. Now we
8678 // serialize the options message and deserialize it back. That way, any
8679 // option fields that we do happen to know about will get moved from the
8680 // UnknownFieldSet into the real fields, and thus be available right away.
8681 // If they are not known, that's OK too. They will get reparsed into the
8682 // UnknownFieldSet and wait there until the message is parsed by something
8683 // that does know about the options.
8684
8685 // Keep the unparsed options around in case the reparsing fails.
8686 std::unique_ptr<Message> unparsed_options(options->New());
8687 options->GetReflection()->Swap(unparsed_options.get(), options);
8688
8689 std::string buf;
8690 if (!unparsed_options->AppendToString(&buf) ||
8691 !options->ParseFromString(buf)) {
8692 builder_->AddError(
8693 options_to_interpret->element_name, *original_options,
8694 DescriptorPool::ErrorCollector::OTHER, [&] {
8695 return absl::StrCat(
8696 "Some options could not be correctly parsed using the proto "
8697 "descriptors compiled into this binary.\n"
8698 "Unparsed options: ",
8699 unparsed_options->ShortDebugString(),
8700 "\n"
8701 "Parsing attempt: ",
8702 options->ShortDebugString());
8703 });
8704 // Restore the unparsed options.
8705 options->GetReflection()->Swap(unparsed_options.get(), options);
8706 }
8707 }
8708
8709 return !failed;
8710 }
8711
InterpretSingleOption(Message * options,const std::vector<int> & src_path,const std::vector<int> & options_path,bool skip_extensions)8712 bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
8713 Message* options, const std::vector<int>& src_path,
8714 const std::vector<int>& options_path, bool skip_extensions) {
8715 // First do some basic validation.
8716 if (uninterpreted_option_->name_size() == 0) {
8717 // This should never happen unless the parser has gone seriously awry or
8718 // someone has manually created the uninterpreted option badly.
8719 if (skip_extensions) {
8720 // Come back to it later.
8721 return true;
8722 }
8723 return AddNameError(
8724 []() -> std::string { return "Option must have a name."; });
8725 }
8726 if (uninterpreted_option_->name(0).name_part() == "uninterpreted_option") {
8727 if (skip_extensions) {
8728 // Come back to it later.
8729 return true;
8730 }
8731 return AddNameError([]() -> std::string {
8732 return "Option must not use reserved name \"uninterpreted_option\".";
8733 });
8734 }
8735
8736 if (skip_extensions == uninterpreted_option_->name(0).is_extension()) {
8737 // Allow feature and option interpretation to occur in two phases. This is
8738 // necessary because features *are* options and need to be interpreted
8739 // before resolving them. However, options can also *have* features
8740 // attached to them.
8741 return true;
8742 }
8743
8744 const Descriptor* options_descriptor = nullptr;
8745 // Get the options message's descriptor from the builder's pool, so that we
8746 // get the version that knows about any extension options declared in the file
8747 // we're currently building. The descriptor should be there as long as the
8748 // file we're building imported descriptor.proto.
8749
8750 // Note that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
8751 // DescriptorPool::FindMessageTypeByName() because we're already holding the
8752 // pool's mutex, and the latter method locks it again. We don't use
8753 // FindSymbol() because files that use custom options only need to depend on
8754 // the file that defines the option, not descriptor.proto itself.
8755 Symbol symbol = builder_->FindSymbolNotEnforcingDeps(
8756 options->GetDescriptor()->full_name());
8757 options_descriptor = symbol.descriptor();
8758 if (options_descriptor == nullptr) {
8759 // The options message's descriptor was not in the builder's pool, so use
8760 // the standard version from the generated pool. We're not holding the
8761 // generated pool's mutex, so we can search it the straightforward way.
8762 options_descriptor = options->GetDescriptor();
8763 }
8764 ABSL_CHECK(options_descriptor);
8765
8766 // We iterate over the name parts to drill into the submessages until we find
8767 // the leaf field for the option. As we drill down we remember the current
8768 // submessage's descriptor in |descriptor| and the next field in that
8769 // submessage in |field|. We also track the fields we're drilling down
8770 // through in |intermediate_fields|. As we go, we reconstruct the full option
8771 // name in |debug_msg_name|, for use in error messages.
8772 const Descriptor* descriptor = options_descriptor;
8773 const FieldDescriptor* field = nullptr;
8774 std::vector<const FieldDescriptor*> intermediate_fields;
8775 std::string debug_msg_name = "";
8776
8777 std::vector<int> dest_path = options_path;
8778
8779 for (int i = 0; i < uninterpreted_option_->name_size(); ++i) {
8780 builder_->undefine_resolved_name_.clear();
8781 const std::string& name_part = uninterpreted_option_->name(i).name_part();
8782 if (!debug_msg_name.empty()) {
8783 absl::StrAppend(&debug_msg_name, ".");
8784 }
8785 if (uninterpreted_option_->name(i).is_extension()) {
8786 absl::StrAppend(&debug_msg_name, "(", name_part, ")");
8787 // Search for the extension's descriptor as an extension in the builder's
8788 // pool. Note that we use DescriptorBuilder::LookupSymbol(), not
8789 // DescriptorPool::FindExtensionByName(), for two reasons: 1) It allows
8790 // relative lookups, and 2) because we're already holding the pool's
8791 // mutex, and the latter method locks it again.
8792 symbol =
8793 builder_->LookupSymbol(name_part, options_to_interpret_->name_scope);
8794 field = symbol.field_descriptor();
8795 // If we don't find the field then the field's descriptor was not in the
8796 // builder's pool, but there's no point in looking in the generated
8797 // pool. We require that you import the file that defines any extensions
8798 // you use, so they must be present in the builder's pool.
8799 } else {
8800 absl::StrAppend(&debug_msg_name, name_part);
8801 // Search for the field's descriptor as a regular field.
8802 field = descriptor->FindFieldByName(name_part);
8803 }
8804
8805 if (field == nullptr) {
8806 if (get_allow_unknown(builder_->pool_)) {
8807 // We can't find the option, but AllowUnknownDependencies() is enabled,
8808 // so we will just leave it as uninterpreted.
8809 AddWithoutInterpreting(*uninterpreted_option_, options);
8810 return true;
8811 } else if (!(builder_->undefine_resolved_name_).empty()) {
8812 // Option is resolved to a name which is not defined.
8813 return AddNameError([&] {
8814 return absl::StrCat(
8815 "Option \"", debug_msg_name, "\" is resolved to \"(",
8816 builder_->undefine_resolved_name_,
8817 ")\", which is not defined. The innermost scope is searched "
8818 "first "
8819 "in name resolution. Consider using a leading '.'(i.e., \"(.",
8820 debug_msg_name.substr(1),
8821 "\") to start from the outermost scope.");
8822 });
8823 } else {
8824 return AddNameError([&] {
8825 return absl::StrCat(
8826 "Option \"", debug_msg_name, "\" unknown. Ensure that your proto",
8827 " definition file imports the proto which defines the option.");
8828 });
8829 }
8830 } else if (field->containing_type() != descriptor) {
8831 if (get_is_placeholder(field->containing_type())) {
8832 // The field is an extension of a placeholder type, so we can't
8833 // reliably verify whether it is a valid extension to use here (e.g.
8834 // we don't know if it is an extension of the correct *Options message,
8835 // or if it has a valid field number, etc.). Just leave it as
8836 // uninterpreted instead.
8837 AddWithoutInterpreting(*uninterpreted_option_, options);
8838 return true;
8839 } else {
8840 // This can only happen if, due to some insane misconfiguration of the
8841 // pools, we find the options message in one pool but the field in
8842 // another. This would probably imply a hefty bug somewhere.
8843 return AddNameError([&] {
8844 return absl::StrCat("Option field \"", debug_msg_name,
8845 "\" is not a field or extension of message \"",
8846 descriptor->name(), "\".");
8847 });
8848 }
8849 } else {
8850 // accumulate field numbers to form path to interpreted option
8851 dest_path.push_back(field->number());
8852
8853 // Special handling to prevent feature use in the same file as the
8854 // definition.
8855 // TODO Add proper support for cases where this can work.
8856 if (field->file() == builder_->file_ &&
8857 uninterpreted_option_->name(0).name_part() == "features" &&
8858 !uninterpreted_option_->name(0).is_extension()) {
8859 return AddNameError([&] {
8860 return absl::StrCat(
8861 "Feature \"", debug_msg_name,
8862 "\" can't be used in the same file it's defined in.");
8863 });
8864 }
8865
8866 if (i < uninterpreted_option_->name_size() - 1) {
8867 if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
8868 return AddNameError([&] {
8869 return absl::StrCat("Option \"", debug_msg_name,
8870 "\" is an atomic type, not a message.");
8871 });
8872 } else if (field->is_repeated()) {
8873 return AddNameError([&] {
8874 return absl::StrCat("Option field \"", debug_msg_name,
8875 "\" is a repeated message. Repeated message "
8876 "options must be initialized using an "
8877 "aggregate value.");
8878 });
8879 } else {
8880 // Drill down into the submessage.
8881 intermediate_fields.push_back(field);
8882 descriptor = field->message_type();
8883 }
8884 }
8885 }
8886 }
8887
8888 // We've found the leaf field. Now we use UnknownFieldSets to set its value
8889 // on the options message. We do so because the message may not yet know
8890 // about its extension fields, so we may not be able to set the fields
8891 // directly. But the UnknownFieldSets will serialize to the same wire-format
8892 // message, so reading that message back in once the extension fields are
8893 // known will populate them correctly.
8894
8895 // First see if the option is already set.
8896 if (!field->is_repeated() &&
8897 !ExamineIfOptionIsSet(
8898 intermediate_fields.begin(), intermediate_fields.end(), field,
8899 debug_msg_name,
8900 options->GetReflection()->GetUnknownFields(*options))) {
8901 return false; // ExamineIfOptionIsSet() already added the error.
8902 }
8903
8904 // First set the value on the UnknownFieldSet corresponding to the
8905 // innermost message.
8906 std::unique_ptr<UnknownFieldSet> unknown_fields(new UnknownFieldSet());
8907 if (!SetOptionValue(field, unknown_fields.get())) {
8908 return false; // SetOptionValue() already added the error.
8909 }
8910
8911 // Now wrap the UnknownFieldSet with UnknownFieldSets corresponding to all
8912 // the intermediate messages.
8913 for (std::vector<const FieldDescriptor*>::reverse_iterator iter =
8914 intermediate_fields.rbegin();
8915 iter != intermediate_fields.rend(); ++iter) {
8916 std::unique_ptr<UnknownFieldSet> parent_unknown_fields(
8917 new UnknownFieldSet());
8918 switch ((*iter)->type()) {
8919 case FieldDescriptor::TYPE_MESSAGE: {
8920 std::string outstr;
8921 ABSL_CHECK(unknown_fields->SerializeToString(&outstr))
8922 << "Unexpected failure while serializing option submessage "
8923 << debug_msg_name << "\".";
8924 parent_unknown_fields->AddLengthDelimited((*iter)->number(),
8925 std::move(outstr));
8926 break;
8927 }
8928
8929 case FieldDescriptor::TYPE_GROUP: {
8930 parent_unknown_fields->AddGroup((*iter)->number())
8931 ->MergeFrom(*unknown_fields);
8932 break;
8933 }
8934
8935 default:
8936 ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: "
8937 << (*iter)->type();
8938 return false;
8939 }
8940 unknown_fields = std::move(parent_unknown_fields);
8941 }
8942
8943 // Now merge the UnknownFieldSet corresponding to the top-level message into
8944 // the options message.
8945 options->GetReflection()->MutableUnknownFields(options)->MergeFrom(
8946 *unknown_fields);
8947
8948 // record the element path of the interpreted option
8949 if (field->is_repeated()) {
8950 int index = repeated_option_counts_[dest_path]++;
8951 dest_path.push_back(index);
8952 }
8953 interpreted_paths_[src_path] = dest_path;
8954
8955 return true;
8956 }
8957
UpdateSourceCodeInfo(SourceCodeInfo * info)8958 void DescriptorBuilder::OptionInterpreter::UpdateSourceCodeInfo(
8959 SourceCodeInfo* info) {
8960 if (interpreted_paths_.empty()) {
8961 // nothing to do!
8962 return;
8963 }
8964
8965 // We find locations that match keys in interpreted_paths_ and
8966 // 1) replace the path with the corresponding value in interpreted_paths_
8967 // 2) remove any subsequent sub-locations (sub-location is one whose path
8968 // has the parent path as a prefix)
8969 //
8970 // To avoid quadratic behavior of removing interior rows as we go,
8971 // we keep a copy. But we don't actually copy anything until we've
8972 // found the first match (so if the source code info has no locations
8973 // that need to be changed, there is zero copy overhead).
8974
8975 RepeatedPtrField<SourceCodeInfo_Location>* locs = info->mutable_location();
8976 RepeatedPtrField<SourceCodeInfo_Location> new_locs;
8977 bool copying = false;
8978
8979 std::vector<int> pathv;
8980 bool matched = false;
8981
8982 for (RepeatedPtrField<SourceCodeInfo_Location>::iterator loc = locs->begin();
8983 loc != locs->end(); loc++) {
8984 if (matched) {
8985 // see if this location is in the range to remove
8986 bool loc_matches = true;
8987 if (loc->path_size() < static_cast<int64_t>(pathv.size())) {
8988 loc_matches = false;
8989 } else {
8990 for (size_t j = 0; j < pathv.size(); j++) {
8991 if (loc->path(j) != pathv[j]) {
8992 loc_matches = false;
8993 break;
8994 }
8995 }
8996 }
8997
8998 if (loc_matches) {
8999 // don't copy this row since it is a sub-location that we're removing
9000 continue;
9001 }
9002
9003 matched = false;
9004 }
9005
9006 pathv.clear();
9007 for (int j = 0; j < loc->path_size(); j++) {
9008 pathv.push_back(loc->path(j));
9009 }
9010
9011 auto entry = interpreted_paths_.find(pathv);
9012
9013 if (entry == interpreted_paths_.end()) {
9014 // not a match
9015 if (copying) {
9016 *new_locs.Add() = *loc;
9017 }
9018 continue;
9019 }
9020
9021 matched = true;
9022
9023 if (!copying) {
9024 // initialize the copy we are building
9025 copying = true;
9026 new_locs.Reserve(locs->size());
9027 for (RepeatedPtrField<SourceCodeInfo_Location>::iterator it =
9028 locs->begin();
9029 it != loc; it++) {
9030 *new_locs.Add() = *it;
9031 }
9032 }
9033
9034 // add replacement and update its path
9035 SourceCodeInfo_Location* replacement = new_locs.Add();
9036 *replacement = *loc;
9037 replacement->clear_path();
9038 for (std::vector<int>::iterator rit = entry->second.begin();
9039 rit != entry->second.end(); rit++) {
9040 replacement->add_path(*rit);
9041 }
9042 }
9043
9044 // if we made a changed copy, put it in place
9045 if (copying) {
9046 *locs = std::move(new_locs);
9047 }
9048 }
9049
AddWithoutInterpreting(const UninterpretedOption & uninterpreted_option,Message * options)9050 void DescriptorBuilder::OptionInterpreter::AddWithoutInterpreting(
9051 const UninterpretedOption& uninterpreted_option, Message* options) {
9052 const FieldDescriptor* field =
9053 options->GetDescriptor()->FindFieldByName("uninterpreted_option");
9054 ABSL_CHECK(field != nullptr);
9055
9056 options->GetReflection()
9057 ->AddMessage(options, field)
9058 ->CopyFrom(uninterpreted_option);
9059 }
9060
ExamineIfOptionIsSet(std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_iter,std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_end,const FieldDescriptor * innermost_field,const std::string & debug_msg_name,const UnknownFieldSet & unknown_fields)9061 bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
9062 std::vector<const FieldDescriptor*>::const_iterator
9063 intermediate_fields_iter,
9064 std::vector<const FieldDescriptor*>::const_iterator intermediate_fields_end,
9065 const FieldDescriptor* innermost_field, const std::string& debug_msg_name,
9066 const UnknownFieldSet& unknown_fields) {
9067 // We do linear searches of the UnknownFieldSet and its sub-groups. This
9068 // should be fine since it's unlikely that any one options structure will
9069 // contain more than a handful of options.
9070
9071 if (intermediate_fields_iter == intermediate_fields_end) {
9072 // We're at the innermost submessage.
9073 for (int i = 0; i < unknown_fields.field_count(); i++) {
9074 if (unknown_fields.field(i).number() == innermost_field->number()) {
9075 return AddNameError([&] {
9076 return absl::StrCat("Option \"", debug_msg_name,
9077 "\" was already set.");
9078 });
9079 }
9080 }
9081 return true;
9082 }
9083
9084 for (int i = 0; i < unknown_fields.field_count(); i++) {
9085 if (unknown_fields.field(i).number() ==
9086 (*intermediate_fields_iter)->number()) {
9087 const UnknownField* unknown_field = &unknown_fields.field(i);
9088 FieldDescriptor::Type type = (*intermediate_fields_iter)->type();
9089 // Recurse into the next submessage.
9090 switch (type) {
9091 case FieldDescriptor::TYPE_MESSAGE:
9092 if (unknown_field->type() == UnknownField::TYPE_LENGTH_DELIMITED) {
9093 UnknownFieldSet intermediate_unknown_fields;
9094 if (intermediate_unknown_fields.ParseFromString(
9095 unknown_field->length_delimited()) &&
9096 !ExamineIfOptionIsSet(intermediate_fields_iter + 1,
9097 intermediate_fields_end, innermost_field,
9098 debug_msg_name,
9099 intermediate_unknown_fields)) {
9100 return false; // Error already added.
9101 }
9102 }
9103 break;
9104
9105 case FieldDescriptor::TYPE_GROUP:
9106 if (unknown_field->type() == UnknownField::TYPE_GROUP) {
9107 if (!ExamineIfOptionIsSet(intermediate_fields_iter + 1,
9108 intermediate_fields_end, innermost_field,
9109 debug_msg_name, unknown_field->group())) {
9110 return false; // Error already added.
9111 }
9112 }
9113 break;
9114
9115 default:
9116 ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: " << type;
9117 return false;
9118 }
9119 }
9120 }
9121 return true;
9122 }
9123
9124 namespace {
9125 // Helpers for method below
9126
9127 template <typename T>
ValueOutOfRange(absl::string_view type_name,absl::string_view option_name)9128 std::string ValueOutOfRange(absl::string_view type_name,
9129 absl::string_view option_name) {
9130 return absl::StrFormat("Value out of range, %d to %d, for %s option \"%s\".",
9131 std::numeric_limits<T>::min(),
9132 std::numeric_limits<T>::max(), type_name, option_name);
9133 }
9134
9135 template <typename T>
ValueMustBeInt(absl::string_view type_name,absl::string_view option_name)9136 std::string ValueMustBeInt(absl::string_view type_name,
9137 absl::string_view option_name) {
9138 return absl::StrFormat(
9139 "Value must be integer, from %d to %d, for %s option \"%s\".",
9140 std::numeric_limits<T>::min(), std::numeric_limits<T>::max(), type_name,
9141 option_name);
9142 }
9143
9144 } // namespace
9145
SetOptionValue(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)9146 bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
9147 const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
9148 // We switch on the CppType to validate.
9149 switch (option_field->cpp_type()) {
9150 case FieldDescriptor::CPPTYPE_INT32:
9151 if (uninterpreted_option_->has_positive_int_value()) {
9152 if (uninterpreted_option_->positive_int_value() >
9153 static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
9154 return AddValueError([&] {
9155 return ValueOutOfRange<int32_t>("int32", option_field->full_name());
9156 });
9157 } else {
9158 SetInt32(option_field->number(),
9159 uninterpreted_option_->positive_int_value(),
9160 option_field->type(), unknown_fields);
9161 }
9162 } else if (uninterpreted_option_->has_negative_int_value()) {
9163 if (uninterpreted_option_->negative_int_value() <
9164 static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
9165 return AddValueError([&] {
9166 return ValueOutOfRange<int32_t>("int32", option_field->full_name());
9167 });
9168 } else {
9169 SetInt32(option_field->number(),
9170 uninterpreted_option_->negative_int_value(),
9171 option_field->type(), unknown_fields);
9172 }
9173 } else {
9174 return AddValueError([&] {
9175 return ValueMustBeInt<int32_t>("int32", option_field->full_name());
9176 });
9177 }
9178 break;
9179
9180 case FieldDescriptor::CPPTYPE_INT64:
9181 if (uninterpreted_option_->has_positive_int_value()) {
9182 if (uninterpreted_option_->positive_int_value() >
9183 static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
9184 return AddValueError([&] {
9185 return ValueOutOfRange<int64_t>("int64", option_field->full_name());
9186 });
9187 } else {
9188 SetInt64(option_field->number(),
9189 uninterpreted_option_->positive_int_value(),
9190 option_field->type(), unknown_fields);
9191 }
9192 } else if (uninterpreted_option_->has_negative_int_value()) {
9193 SetInt64(option_field->number(),
9194 uninterpreted_option_->negative_int_value(),
9195 option_field->type(), unknown_fields);
9196 } else {
9197 return AddValueError([&] {
9198 return ValueMustBeInt<int64_t>("int64", option_field->full_name());
9199 });
9200 }
9201 break;
9202
9203 case FieldDescriptor::CPPTYPE_UINT32:
9204 if (uninterpreted_option_->has_positive_int_value()) {
9205 if (uninterpreted_option_->positive_int_value() >
9206 std::numeric_limits<uint32_t>::max()) {
9207 return AddValueError([&] {
9208 return ValueOutOfRange<uint32_t>("uint32",
9209 option_field->full_name());
9210 });
9211 } else {
9212 SetUInt32(option_field->number(),
9213 uninterpreted_option_->positive_int_value(),
9214 option_field->type(), unknown_fields);
9215 }
9216 } else {
9217 return AddValueError([&] {
9218 return ValueMustBeInt<uint32_t>("uint32", option_field->full_name());
9219 });
9220 }
9221 break;
9222
9223 case FieldDescriptor::CPPTYPE_UINT64:
9224 if (uninterpreted_option_->has_positive_int_value()) {
9225 SetUInt64(option_field->number(),
9226 uninterpreted_option_->positive_int_value(),
9227 option_field->type(), unknown_fields);
9228 } else {
9229 return AddValueError([&] {
9230 return ValueMustBeInt<uint64_t>("uint64", option_field->full_name());
9231 });
9232 }
9233 break;
9234
9235 case FieldDescriptor::CPPTYPE_FLOAT: {
9236 float value;
9237 if (uninterpreted_option_->has_double_value()) {
9238 value = uninterpreted_option_->double_value();
9239 } else if (uninterpreted_option_->has_positive_int_value()) {
9240 value = uninterpreted_option_->positive_int_value();
9241 } else if (uninterpreted_option_->has_negative_int_value()) {
9242 value = uninterpreted_option_->negative_int_value();
9243 } else if (uninterpreted_option_->identifier_value() == "inf") {
9244 value = std::numeric_limits<float>::infinity();
9245 } else if (uninterpreted_option_->identifier_value() == "nan") {
9246 value = std::numeric_limits<float>::quiet_NaN();
9247 } else {
9248 return AddValueError([&] {
9249 return absl::StrCat("Value must be number for float option \"",
9250 option_field->full_name(), "\".");
9251 });
9252 }
9253 unknown_fields->AddFixed32(option_field->number(),
9254 internal::WireFormatLite::EncodeFloat(value));
9255 break;
9256 }
9257
9258 case FieldDescriptor::CPPTYPE_DOUBLE: {
9259 double value;
9260 if (uninterpreted_option_->has_double_value()) {
9261 value = uninterpreted_option_->double_value();
9262 } else if (uninterpreted_option_->has_positive_int_value()) {
9263 value = uninterpreted_option_->positive_int_value();
9264 } else if (uninterpreted_option_->has_negative_int_value()) {
9265 value = uninterpreted_option_->negative_int_value();
9266 } else if (uninterpreted_option_->identifier_value() == "inf") {
9267 value = std::numeric_limits<double>::infinity();
9268 } else if (uninterpreted_option_->identifier_value() == "nan") {
9269 value = std::numeric_limits<double>::quiet_NaN();
9270 } else {
9271 return AddValueError([&] {
9272 return absl::StrCat("Value must be number for double option \"",
9273 option_field->full_name(), "\".");
9274 });
9275 }
9276 unknown_fields->AddFixed64(option_field->number(),
9277 internal::WireFormatLite::EncodeDouble(value));
9278 break;
9279 }
9280
9281 case FieldDescriptor::CPPTYPE_BOOL:
9282 uint64_t value;
9283 if (!uninterpreted_option_->has_identifier_value()) {
9284 return AddValueError([&] {
9285 return absl::StrCat("Value must be identifier for boolean option \"",
9286 option_field->full_name(), "\".");
9287 });
9288 }
9289 if (uninterpreted_option_->identifier_value() == "true") {
9290 value = 1;
9291 } else if (uninterpreted_option_->identifier_value() == "false") {
9292 value = 0;
9293 } else {
9294 return AddValueError([&] {
9295 return absl::StrCat(
9296 "Value must be \"true\" or \"false\" for boolean option \"",
9297 option_field->full_name(), "\".");
9298 });
9299 }
9300 unknown_fields->AddVarint(option_field->number(), value);
9301 break;
9302
9303 case FieldDescriptor::CPPTYPE_ENUM: {
9304 if (!uninterpreted_option_->has_identifier_value()) {
9305 return AddValueError([&] {
9306 return absl::StrCat(
9307 "Value must be identifier for enum-valued option \"",
9308 option_field->full_name(), "\".");
9309 });
9310 }
9311 const EnumDescriptor* enum_type = option_field->enum_type();
9312 const std::string& value_name = uninterpreted_option_->identifier_value();
9313 const EnumValueDescriptor* enum_value = nullptr;
9314
9315 if (enum_type->file()->pool() != DescriptorPool::generated_pool()) {
9316 // Note that the enum value's fully-qualified name is a sibling of the
9317 // enum's name, not a child of it.
9318 std::string fully_qualified_name = std::string(enum_type->full_name());
9319 fully_qualified_name.resize(fully_qualified_name.size() -
9320 enum_type->name().size());
9321 fully_qualified_name += value_name;
9322
9323 // Search for the enum value's descriptor in the builder's pool. Note
9324 // that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
9325 // DescriptorPool::FindEnumValueByName() because we're already holding
9326 // the pool's mutex, and the latter method locks it again.
9327 Symbol symbol =
9328 builder_->FindSymbolNotEnforcingDeps(fully_qualified_name);
9329 if (auto* candidate_descriptor = symbol.enum_value_descriptor()) {
9330 if (candidate_descriptor->type() != enum_type) {
9331 return AddValueError([&] {
9332 return absl::StrCat(
9333 "Enum type \"", enum_type->full_name(),
9334 "\" has no value named \"", value_name, "\" for option \"",
9335 option_field->full_name(),
9336 "\". This appears to be a value from a sibling type.");
9337 });
9338 } else {
9339 enum_value = candidate_descriptor;
9340 }
9341 }
9342 } else {
9343 // The enum type is in the generated pool, so we can search for the
9344 // value there.
9345 enum_value = enum_type->FindValueByName(value_name);
9346 }
9347
9348 if (enum_value == nullptr) {
9349 return AddValueError([&] {
9350 return absl::StrCat(
9351 "Enum type \"", option_field->enum_type()->full_name(),
9352 "\" has no value named \"", value_name, "\" for option \"",
9353 option_field->full_name(), "\".");
9354 });
9355 } else {
9356 // Sign-extension is not a problem, since we cast directly from int32_t
9357 // to uint64_t, without first going through uint32_t.
9358 unknown_fields->AddVarint(
9359 option_field->number(),
9360 static_cast<uint64_t>(static_cast<int64_t>(enum_value->number())));
9361 }
9362 break;
9363 }
9364
9365 case FieldDescriptor::CPPTYPE_STRING:
9366 if (!uninterpreted_option_->has_string_value()) {
9367 return AddValueError([&] {
9368 return absl::StrCat(
9369 "Value must be quoted string for string option \"",
9370 option_field->full_name(), "\".");
9371 });
9372 }
9373 // The string has already been unquoted and unescaped by the parser.
9374 unknown_fields->AddLengthDelimited(option_field->number(),
9375 uninterpreted_option_->string_value());
9376 break;
9377
9378 case FieldDescriptor::CPPTYPE_MESSAGE:
9379 if (!SetAggregateOption(option_field, unknown_fields)) {
9380 return false;
9381 }
9382 break;
9383 }
9384
9385 return true;
9386 }
9387
9388 class DescriptorBuilder::OptionInterpreter::AggregateOptionFinder
9389 : public TextFormat::Finder {
9390 public:
9391 DescriptorBuilder* builder_;
9392
FindAnyType(const Message &,const std::string & prefix,const std::string & name) const9393 const Descriptor* FindAnyType(const Message& /*message*/,
9394 const std::string& prefix,
9395 const std::string& name) const override {
9396 if (prefix != internal::kTypeGoogleApisComPrefix &&
9397 prefix != internal::kTypeGoogleProdComPrefix) {
9398 return nullptr;
9399 }
9400 assert_mutex_held(builder_->pool_);
9401 return builder_->FindSymbol(name).descriptor();
9402 }
9403
FindExtension(Message * message,const std::string & name) const9404 const FieldDescriptor* FindExtension(Message* message,
9405 const std::string& name) const override {
9406 assert_mutex_held(builder_->pool_);
9407 const Descriptor* descriptor = message->GetDescriptor();
9408 Symbol result =
9409 builder_->LookupSymbolNoPlaceholder(name, descriptor->full_name());
9410 if (auto* field = result.field_descriptor()) {
9411 return field;
9412 } else if (result.type() == Symbol::MESSAGE &&
9413 descriptor->options().message_set_wire_format()) {
9414 const Descriptor* foreign_type = result.descriptor();
9415 // The text format allows MessageSet items to be specified using
9416 // the type name, rather than the extension identifier. If the symbol
9417 // lookup returned a Message, and the enclosing Message has
9418 // message_set_wire_format = true, then return the message set
9419 // extension, if one exists.
9420 for (int i = 0; i < foreign_type->extension_count(); i++) {
9421 const FieldDescriptor* extension = foreign_type->extension(i);
9422 if (extension->containing_type() == descriptor &&
9423 extension->type() == FieldDescriptor::TYPE_MESSAGE &&
9424 extension->is_optional() &&
9425 extension->message_type() == foreign_type) {
9426 // Found it.
9427 return extension;
9428 }
9429 }
9430 }
9431 return nullptr;
9432 }
9433 };
9434
9435 // A custom error collector to record any text-format parsing errors
9436 namespace {
9437 class AggregateErrorCollector : public io::ErrorCollector {
9438 public:
9439 std::string error_;
9440
RecordError(int,int,const absl::string_view message)9441 void RecordError(int /* line */, int /* column */,
9442 const absl::string_view message) override {
9443 if (!error_.empty()) {
9444 absl::StrAppend(&error_, "; ");
9445 }
9446 absl::StrAppend(&error_, message);
9447 }
9448
RecordWarning(int,int,const absl::string_view)9449 void RecordWarning(int /* line */, int /* column */,
9450 const absl::string_view /* message */) override {
9451 // Ignore warnings
9452 }
9453 };
9454 } // namespace
9455
9456 // We construct a dynamic message of the type corresponding to
9457 // option_field, parse the supplied text-format string into this
9458 // message, and serialize the resulting message to produce the value.
SetAggregateOption(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)9459 bool DescriptorBuilder::OptionInterpreter::SetAggregateOption(
9460 const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
9461 if (!uninterpreted_option_->has_aggregate_value()) {
9462 return AddValueError([&] {
9463 return absl::StrCat("Option \"", option_field->full_name(),
9464 "\" is a message. "
9465 "To set the entire message, use syntax like \"",
9466 option_field->name(),
9467 " = { <proto text format> }\". "
9468 "To set fields within it, use syntax like \"",
9469 option_field->name(), ".foo = value\".");
9470 });
9471 }
9472
9473 const Descriptor* type = option_field->message_type();
9474 std::unique_ptr<Message> dynamic(dynamic_factory_.GetPrototype(type)->New());
9475 ABSL_CHECK(dynamic.get() != nullptr)
9476 << "Could not create an instance of " << option_field->DebugString();
9477
9478 AggregateErrorCollector collector;
9479 AggregateOptionFinder finder;
9480 finder.builder_ = builder_;
9481 TextFormat::Parser parser;
9482 parser.RecordErrorsTo(&collector);
9483 parser.SetFinder(&finder);
9484 if (!parser.ParseFromString(uninterpreted_option_->aggregate_value(),
9485 dynamic.get())) {
9486 AddValueError([&] {
9487 return absl::StrCat("Error while parsing option value for \"",
9488 option_field->name(), "\": ", collector.error_);
9489 });
9490 return false;
9491 } else {
9492 std::string serial;
9493 dynamic->SerializeToString(&serial); // Never fails
9494 if (option_field->type() == FieldDescriptor::TYPE_MESSAGE) {
9495 unknown_fields->AddLengthDelimited(option_field->number(), serial);
9496 } else {
9497 ABSL_CHECK_EQ(option_field->type(), FieldDescriptor::TYPE_GROUP);
9498 UnknownFieldSet* group = unknown_fields->AddGroup(option_field->number());
9499 group->ParseFromString(serial);
9500 }
9501 return true;
9502 }
9503 }
9504
SetInt32(int number,int32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9505 void DescriptorBuilder::OptionInterpreter::SetInt32(
9506 int number, int32_t value, FieldDescriptor::Type type,
9507 UnknownFieldSet* unknown_fields) {
9508 switch (type) {
9509 case FieldDescriptor::TYPE_INT32:
9510 unknown_fields->AddVarint(
9511 number, static_cast<uint64_t>(static_cast<int64_t>(value)));
9512 break;
9513
9514 case FieldDescriptor::TYPE_SFIXED32:
9515 unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
9516 break;
9517
9518 case FieldDescriptor::TYPE_SINT32:
9519 unknown_fields->AddVarint(
9520 number, internal::WireFormatLite::ZigZagEncode32(value));
9521 break;
9522
9523 default:
9524 ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT32: " << type;
9525 break;
9526 }
9527 }
9528
SetInt64(int number,int64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9529 void DescriptorBuilder::OptionInterpreter::SetInt64(
9530 int number, int64_t value, FieldDescriptor::Type type,
9531 UnknownFieldSet* unknown_fields) {
9532 switch (type) {
9533 case FieldDescriptor::TYPE_INT64:
9534 unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
9535 break;
9536
9537 case FieldDescriptor::TYPE_SFIXED64:
9538 unknown_fields->AddFixed64(number, static_cast<uint64_t>(value));
9539 break;
9540
9541 case FieldDescriptor::TYPE_SINT64:
9542 unknown_fields->AddVarint(
9543 number, internal::WireFormatLite::ZigZagEncode64(value));
9544 break;
9545
9546 default:
9547 ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT64: " << type;
9548 break;
9549 }
9550 }
9551
SetUInt32(int number,uint32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9552 void DescriptorBuilder::OptionInterpreter::SetUInt32(
9553 int number, uint32_t value, FieldDescriptor::Type type,
9554 UnknownFieldSet* unknown_fields) {
9555 switch (type) {
9556 case FieldDescriptor::TYPE_UINT32:
9557 unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
9558 break;
9559
9560 case FieldDescriptor::TYPE_FIXED32:
9561 unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
9562 break;
9563
9564 default:
9565 ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT32: " << type;
9566 break;
9567 }
9568 }
9569
SetUInt64(int number,uint64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9570 void DescriptorBuilder::OptionInterpreter::SetUInt64(
9571 int number, uint64_t value, FieldDescriptor::Type type,
9572 UnknownFieldSet* unknown_fields) {
9573 switch (type) {
9574 case FieldDescriptor::TYPE_UINT64:
9575 unknown_fields->AddVarint(number, value);
9576 break;
9577
9578 case FieldDescriptor::TYPE_FIXED64:
9579 unknown_fields->AddFixed64(number, value);
9580 break;
9581
9582 default:
9583 ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT64: " << type;
9584 break;
9585 }
9586 }
9587
LogUnusedDependency(const FileDescriptorProto & proto,const FileDescriptor * result)9588 void DescriptorBuilder::LogUnusedDependency(const FileDescriptorProto& proto,
9589 const FileDescriptor* result) {
9590 (void)result; // Parameter is used by Google-internal code.
9591
9592 if (!unused_dependency_.empty()) {
9593 auto itr = pool_->direct_input_files_.find(proto.name());
9594 bool is_error = itr != pool_->direct_input_files_.end() && itr->second;
9595 for (const auto* unused : unused_dependency_) {
9596 auto make_error = [&] {
9597 return absl::StrCat("Import ", unused->name(), " is unused.");
9598 };
9599 if (is_error) {
9600 AddError(unused->name(), proto, DescriptorPool::ErrorCollector::IMPORT,
9601 make_error);
9602 } else {
9603 AddWarning(unused->name(), proto,
9604 DescriptorPool::ErrorCollector::IMPORT, make_error);
9605 }
9606 }
9607 }
9608 }
9609
CrossLinkOnDemandHelper(absl::string_view name,bool expecting_enum) const9610 Symbol DescriptorPool::CrossLinkOnDemandHelper(absl::string_view name,
9611 bool expecting_enum) const {
9612 (void)expecting_enum; // Parameter is used by Google-internal code.
9613 auto lookup_name = std::string(name);
9614 if (!lookup_name.empty() && lookup_name[0] == '.') {
9615 lookup_name = lookup_name.substr(1);
9616 }
9617 Symbol result = tables_->FindByNameHelper(this, lookup_name);
9618 return result;
9619 }
9620
9621 // Handle the lazy import building for a message field whose type wasn't built
9622 // at cross link time. If that was the case, we saved the name of the type to
9623 // be looked up when the accessor for the type was called. Set type_,
9624 // enum_type_, message_type_, and default_value_enum_ appropriately.
InternalTypeOnceInit() const9625 void FieldDescriptor::InternalTypeOnceInit() const {
9626 ABSL_CHECK(file()->finished_building_ == true);
9627 const EnumDescriptor* enum_type = nullptr;
9628 const char* lazy_type_name = reinterpret_cast<const char*>(type_once_ + 1);
9629 const char* lazy_default_value_enum_name =
9630 lazy_type_name + strlen(lazy_type_name) + 1;
9631 Symbol result = file()->pool()->CrossLinkOnDemandHelper(
9632 lazy_type_name, type_ == FieldDescriptor::TYPE_ENUM);
9633 if (result.type() == Symbol::MESSAGE) {
9634 ABSL_CHECK(type_ == FieldDescriptor::TYPE_MESSAGE ||
9635 type_ == FieldDescriptor::TYPE_GROUP);
9636 type_descriptor_.message_type = result.descriptor();
9637 } else if (result.type() == Symbol::ENUM) {
9638 ABSL_CHECK(type_ == FieldDescriptor::TYPE_ENUM);
9639 enum_type = type_descriptor_.enum_type = result.enum_descriptor();
9640 }
9641
9642 if (enum_type) {
9643 if (lazy_default_value_enum_name[0] != '\0') {
9644 // Have to build the full name now instead of at CrossLink time,
9645 // because enum_type may not be known at the time.
9646 std::string name = std::string(enum_type->full_name());
9647 // Enum values reside in the same scope as the enum type.
9648 std::string::size_type last_dot = name.find_last_of('.');
9649 if (last_dot != std::string::npos) {
9650 name = absl::StrCat(name.substr(0, last_dot), ".",
9651 lazy_default_value_enum_name);
9652 } else {
9653 name = lazy_default_value_enum_name;
9654 }
9655 Symbol result_enum = file()->pool()->CrossLinkOnDemandHelper(name, true);
9656 default_value_enum_ = result_enum.enum_value_descriptor();
9657 } else {
9658 default_value_enum_ = nullptr;
9659 }
9660 if (!default_value_enum_) {
9661 // We use the first defined value as the default
9662 // if a default is not explicitly defined.
9663 ABSL_CHECK(enum_type->value_count());
9664 default_value_enum_ = enum_type->value(0);
9665 }
9666 }
9667 }
9668
TypeOnceInit(const FieldDescriptor * to_init)9669 void FieldDescriptor::TypeOnceInit(const FieldDescriptor* to_init) {
9670 to_init->InternalTypeOnceInit();
9671 }
9672
9673 // message_type(), enum_type(), default_value_enum(), and type()
9674 // all share the same absl::call_once init path to do lazy
9675 // import building and cross linking of a field of a message.
message_type() const9676 const Descriptor* FieldDescriptor::message_type() const {
9677 if (type_ == TYPE_MESSAGE || type_ == TYPE_GROUP) {
9678 if (type_once_) {
9679 absl::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
9680 }
9681 return type_descriptor_.message_type;
9682 }
9683 return nullptr;
9684 }
9685
enum_type() const9686 const EnumDescriptor* FieldDescriptor::enum_type() const {
9687 if (type_ == TYPE_ENUM) {
9688 if (type_once_) {
9689 absl::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
9690 }
9691 return type_descriptor_.enum_type;
9692 }
9693 return nullptr;
9694 }
9695
default_value_enum() const9696 const EnumValueDescriptor* FieldDescriptor::default_value_enum() const {
9697 if (type_once_) {
9698 absl::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
9699 }
9700 return default_value_enum_;
9701 }
9702
PrintableNameForExtension() const9703 internal::DescriptorStringView FieldDescriptor::PrintableNameForExtension()
9704 const {
9705 const bool is_message_set_extension =
9706 is_extension() &&
9707 containing_type()->options().message_set_wire_format() &&
9708 type() == FieldDescriptor::TYPE_MESSAGE && is_optional() &&
9709 extension_scope() == message_type();
9710 return is_message_set_extension ? message_type()->full_name() : full_name();
9711 }
9712
InternalDependenciesOnceInit() const9713 void FileDescriptor::InternalDependenciesOnceInit() const {
9714 ABSL_CHECK(finished_building_ == true);
9715 const char* names_ptr = reinterpret_cast<const char*>(dependencies_once_ + 1);
9716 for (int i = 0; i < dependency_count(); i++) {
9717 const char* name = names_ptr;
9718 names_ptr += strlen(name) + 1;
9719 if (name[0] != '\0') {
9720 dependencies_[i] = pool_->FindFileByName(name);
9721 }
9722 }
9723 }
9724
DependenciesOnceInit(const FileDescriptor * to_init)9725 void FileDescriptor::DependenciesOnceInit(const FileDescriptor* to_init) {
9726 to_init->InternalDependenciesOnceInit();
9727 }
9728
dependency(int index) const9729 const FileDescriptor* FileDescriptor::dependency(int index) const {
9730 if (dependencies_once_) {
9731 // Do once init for all indices, as it's unlikely only a single index would
9732 // be called, and saves on absl::call_once allocations.
9733 absl::call_once(*dependencies_once_, FileDescriptor::DependenciesOnceInit,
9734 this);
9735 }
9736 return dependencies_[index];
9737 }
9738
input_type() const9739 const Descriptor* MethodDescriptor::input_type() const {
9740 return input_type_.Get(service());
9741 }
9742
output_type() const9743 const Descriptor* MethodDescriptor::output_type() const {
9744 return output_type_.Get(service());
9745 }
9746
9747 namespace internal {
Set(const Descriptor * descriptor)9748 void LazyDescriptor::Set(const Descriptor* descriptor) {
9749 ABSL_CHECK(!once_);
9750 descriptor_ = descriptor;
9751 }
9752
SetLazy(absl::string_view name,const FileDescriptor * file)9753 void LazyDescriptor::SetLazy(absl::string_view name,
9754 const FileDescriptor* file) {
9755 // verify Init() has been called and Set hasn't been called yet.
9756 ABSL_CHECK(!descriptor_);
9757 ABSL_CHECK(!once_);
9758 ABSL_CHECK(file && file->pool_);
9759 ABSL_CHECK(file->pool_->lazily_build_dependencies_);
9760 ABSL_CHECK(!file->finished_building_);
9761 once_ = ::new (file->pool_->tables_->AllocateBytes(static_cast<int>(
9762 sizeof(absl::once_flag) + name.size() + 1))) absl::once_flag{};
9763 char* lazy_name = reinterpret_cast<char*>(once_ + 1);
9764 memcpy(lazy_name, name.data(), name.size());
9765 lazy_name[name.size()] = 0;
9766 }
9767
Once(const ServiceDescriptor * service)9768 void LazyDescriptor::Once(const ServiceDescriptor* service) {
9769 if (once_) {
9770 absl::call_once(*once_, [&] {
9771 auto* file = service->file();
9772 ABSL_CHECK(file->finished_building_);
9773 const char* lazy_name = reinterpret_cast<const char*>(once_ + 1);
9774 descriptor_ =
9775 file->pool_->CrossLinkOnDemandHelper(lazy_name, false).descriptor();
9776 });
9777 }
9778 }
9779
ParseNoReflection(absl::string_view from,google::protobuf::MessageLite & to)9780 bool ParseNoReflection(absl::string_view from, google::protobuf::MessageLite& to) {
9781 auto cleanup = DisableTracking();
9782
9783 to.Clear();
9784 const char* ptr;
9785 internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
9786 false, &ptr, from);
9787 ptr = to._InternalParse(ptr, &ctx);
9788 if (ptr == nullptr || !ctx.EndedAtLimit()) return false;
9789 return to.IsInitializedWithErrors();
9790 }
9791
9792 namespace cpp {
HasPreservingUnknownEnumSemantics(const FieldDescriptor * field)9793 bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
9794 if (field->legacy_enum_field_treated_as_closed()) {
9795 return false;
9796 }
9797
9798 return field->enum_type() != nullptr && !field->enum_type()->is_closed();
9799 }
9800
HasHasbit(const FieldDescriptor * field)9801 bool HasHasbit(const FieldDescriptor* field) {
9802 return field->has_presence() && !field->real_containing_oneof() &&
9803 !field->options().weak();
9804 }
9805
IsVerifyUtf8(const FieldDescriptor * field,bool is_lite)9806 static bool IsVerifyUtf8(const FieldDescriptor* field, bool is_lite) {
9807 if (is_lite) return false;
9808 return true;
9809 }
9810
9811 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,bool is_lite)9812 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, bool is_lite) {
9813 if (field->type() == FieldDescriptor::TYPE_STRING ||
9814 (field->is_map() && (field->message_type()->map_key()->type() ==
9815 FieldDescriptor::TYPE_STRING ||
9816 field->message_type()->map_value()->type() ==
9817 FieldDescriptor::TYPE_STRING))) {
9818 if (IsStrictUtf8(field)) {
9819 return Utf8CheckMode::kStrict;
9820 } else if (IsVerifyUtf8(field, is_lite)) {
9821 return Utf8CheckMode::kVerify;
9822 }
9823 }
9824 return Utf8CheckMode::kNone;
9825 }
9826
IsGroupLike(const FieldDescriptor & field)9827 bool IsGroupLike(const FieldDescriptor& field) {
9828 // Groups are always tag-delimited, currently specified by a TYPE_GROUP type.
9829 if (field.type() != FieldDescriptor::TYPE_GROUP) return false;
9830 // Group fields always are always the lowercase type name.
9831 if (field.name() != absl::AsciiStrToLower(field.message_type()->name())) {
9832 return false;
9833 }
9834
9835 if (field.message_type()->file() != field.file()) return false;
9836
9837 // Group messages are always defined in the same scope as the field. File
9838 // level extensions will compare NULL == NULL here, which is why the file
9839 // comparison above is necessary to ensure both come from the same file.
9840 return field.is_extension() ? field.message_type()->containing_type() ==
9841 field.extension_scope()
9842 : field.message_type()->containing_type() ==
9843 field.containing_type();
9844 }
9845
IsLazilyInitializedFile(absl::string_view filename)9846 bool IsLazilyInitializedFile(absl::string_view filename) {
9847 if (filename == "third_party/protobuf/cpp_features.proto" ||
9848 filename == "google/protobuf/cpp_features.proto") {
9849 return true;
9850 }
9851 return filename == "net/proto2/proto/descriptor.proto" ||
9852 filename == "google/protobuf/descriptor.proto";
9853 }
9854
IsTrackingEnabled()9855 bool IsTrackingEnabled() { return is_tracking_enabled(); }
9856
9857 } // namespace cpp
9858 } // namespace internal
9859
edition() const9860 Edition FileDescriptor::edition() const { return edition_; }
9861
9862 namespace internal {
ShortEditionName(Edition edition)9863 absl::string_view ShortEditionName(Edition edition) {
9864 return absl::StripPrefix(Edition_Name(edition), "EDITION_");
9865 }
GetEdition(const FileDescriptor & desc)9866 Edition InternalFeatureHelper::GetEdition(const FileDescriptor& desc) {
9867 return desc.edition();
9868 }
9869 } // namespace internal
9870
9871 } // namespace protobuf
9872 } // namespace google
9873
9874 #include "google/protobuf/port_undef.inc"
9875