• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/descriptor.h"
13 
14 #include <fcntl.h>
15 #include <limits.h>
16 
17 #include <algorithm>
18 #include <array>
19 #include <atomic>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <cstring>
23 #include <functional>
24 #include <initializer_list>
25 #include <iterator>
26 #include <limits>
27 #include <memory>
28 #include <new>  // IWYU pragma: keep
29 #include <sstream>
30 #include <string>
31 #include <tuple>
32 #include <type_traits>
33 #include <utility>
34 #include <vector>
35 
36 #include "absl/base/attributes.h"
37 #include "absl/base/call_once.h"
38 #include "absl/base/casts.h"
39 #include "absl/base/const_init.h"
40 #include "absl/base/dynamic_annotations.h"
41 #include "absl/base/thread_annotations.h"
42 #include "absl/cleanup/cleanup.h"
43 #include "absl/container/btree_map.h"
44 #include "absl/container/flat_hash_map.h"
45 #include "absl/container/flat_hash_set.h"
46 #include "absl/functional/function_ref.h"
47 #include "absl/hash/hash.h"
48 #include "absl/log/absl_check.h"
49 #include "absl/log/absl_log.h"
50 #include "absl/memory/memory.h"
51 #include "absl/status/status.h"
52 #include "absl/status/statusor.h"
53 #include "absl/strings/ascii.h"
54 #include "absl/strings/escaping.h"
55 #include "absl/strings/match.h"
56 #include "absl/strings/str_cat.h"
57 #include "absl/strings/str_format.h"
58 #include "absl/strings/str_join.h"
59 #include "absl/strings/str_split.h"
60 #include "absl/strings/string_view.h"
61 #include "absl/strings/strip.h"
62 #include "absl/strings/substitute.h"
63 #include "absl/synchronization/mutex.h"
64 #include "absl/types/optional.h"
65 #include "absl/types/span.h"
66 #include "google/protobuf/any.h"
67 #include "google/protobuf/cpp_edition_defaults.h"
68 #include "google/protobuf/cpp_features.pb.h"
69 #include "google/protobuf/descriptor.pb.h"
70 #include "google/protobuf/descriptor_database.h"
71 #include "google/protobuf/descriptor_lite.h"
72 #include "google/protobuf/descriptor_visitor.h"
73 #include "google/protobuf/dynamic_message.h"
74 #include "google/protobuf/feature_resolver.h"
75 #include "google/protobuf/generated_message_util.h"
76 #include "google/protobuf/io/strtod.h"
77 #include "google/protobuf/io/tokenizer.h"
78 #include "google/protobuf/message.h"
79 #include "google/protobuf/message_lite.h"
80 #include "google/protobuf/parse_context.h"
81 #include "google/protobuf/port.h"
82 #include "google/protobuf/repeated_ptr_field.h"
83 #include "google/protobuf/text_format.h"
84 #include "google/protobuf/unknown_field_set.h"
85 
86 
87 // Must be included last.
88 #include "google/protobuf/port_def.inc"
89 
90 namespace google {
91 namespace protobuf {
92 namespace {
93 
94 const int kPackageLimit = 100;
95 
96 
ToCamelCase(const absl::string_view input,bool lower_first)97 std::string ToCamelCase(const absl::string_view input, bool lower_first) {
98   bool capitalize_next = !lower_first;
99   std::string result;
100   result.reserve(input.size());
101 
102   for (char character : input) {
103     if (character == '_') {
104       capitalize_next = true;
105     } else if (capitalize_next) {
106       result.push_back(absl::ascii_toupper(character));
107       capitalize_next = false;
108     } else {
109       result.push_back(character);
110     }
111   }
112 
113   // Lower-case the first letter.
114   if (lower_first && !result.empty()) {
115     result[0] = absl::ascii_tolower(result[0]);
116   }
117 
118   return result;
119 }
120 
ToJsonName(const absl::string_view input)121 std::string ToJsonName(const absl::string_view input) {
122   bool capitalize_next = false;
123   std::string result;
124   result.reserve(input.size());
125 
126   for (char character : input) {
127     if (character == '_') {
128       capitalize_next = true;
129     } else if (capitalize_next) {
130       result.push_back(absl::ascii_toupper(character));
131       capitalize_next = false;
132     } else {
133       result.push_back(character);
134     }
135   }
136 
137   return result;
138 }
139 
140 template <typename OptionsT>
IsLegacyJsonFieldConflictEnabled(const OptionsT & options)141 bool IsLegacyJsonFieldConflictEnabled(const OptionsT& options) {
142 #ifdef __GNUC__
143 #pragma GCC diagnostic push
144 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
145 #endif
146   return options.deprecated_legacy_json_field_conflicts();
147 #ifdef __GNUC__
148 #pragma GCC diagnostic pop
149 #endif
150 }
151 
152 // Backport of fold expressions for the comma operator to C++11.
153 // Usage:  Fold({expr...});
154 // Guaranteed to evaluate left-to-right
155 struct ExpressionEater {
156   template <typename T>
ExpressionEatergoogle::protobuf::__anona654feba0111::ExpressionEater157   ExpressionEater(T&&) {}  // NOLINT
158 };
Fold(std::initializer_list<ExpressionEater>)159 void Fold(std::initializer_list<ExpressionEater>) {}
160 
161 template <int R>
RoundUpTo(size_t n)162 constexpr size_t RoundUpTo(size_t n) {
163   static_assert((R & (R - 1)) == 0, "Must be power of two");
164   return (n + (R - 1)) & ~(R - 1);
165 }
166 
Max(size_t a,size_t b)167 constexpr size_t Max(size_t a, size_t b) { return a > b ? a : b; }
168 template <typename T, typename... Ts>
Max(T a,Ts...b)169 constexpr size_t Max(T a, Ts... b) {
170   return Max(a, Max(b...));
171 }
172 
173 template <typename T>
EffectiveAlignof()174 constexpr size_t EffectiveAlignof() {
175   // `char` is special in that it gets aligned to 8. It is where we drop the
176   // trivial structs.
177   return std::is_same<T, char>::value ? 8 : alignof(T);
178 }
179 
180 template <int align, typename U, typename... T>
181 using AppendIfAlign =
182     typename std::conditional<EffectiveAlignof<U>() == align, void (*)(T..., U),
183                               void (*)(T...)>::type;
184 
185 // Metafunction to sort types in descending order of alignment.
186 // Useful for the flat allocator to ensure proper alignment of all elements
187 // without having to add padding.
188 // Instead of implementing a proper sort metafunction we just do a
189 // filter+merge, which is much simpler to write as a metafunction.
190 // We have a fixed set of alignments we can filter on.
191 // For simplicity we use a function pointer as a type list.
192 template <typename In, typename T16, typename T8, typename T4, typename T2,
193           typename T1>
194 struct TypeListSortImpl;
195 
196 template <typename... T16, typename... T8, typename... T4, typename... T2,
197           typename... T1>
198 struct TypeListSortImpl<void (*)(), void (*)(T16...), void (*)(T8...),
199                         void (*)(T4...), void (*)(T2...), void (*)(T1...)> {
200   using type = void (*)(T16..., T8..., T4..., T2..., T1...);
201 };
202 
203 template <typename First, typename... Rest, typename... T16, typename... T8,
204           typename... T4, typename... T2, typename... T1>
205 struct TypeListSortImpl<void (*)(First, Rest...), void (*)(T16...),
206                         void (*)(T8...), void (*)(T4...), void (*)(T2...),
207                         void (*)(T1...)> {
208   using type = typename TypeListSortImpl<
209       void (*)(Rest...), AppendIfAlign<16, First, T16...>,
210       AppendIfAlign<8, First, T8...>, AppendIfAlign<4, First, T4...>,
211       AppendIfAlign<2, First, T2...>, AppendIfAlign<1, First, T1...>>::type;
212 };
213 
214 template <typename... T>
215 using SortByAlignment =
216     typename TypeListSortImpl<void (*)(T...), void (*)(), void (*)(),
217                               void (*)(), void (*)(), void (*)()>::type;
218 
219 template <template <typename...> class C, typename... T>
220 auto ApplyTypeList(void (*)(T...)) -> C<T...>;
221 
222 template <typename T>
FindTypeIndex()223 constexpr int FindTypeIndex() {
224   return -1;
225 }
226 
227 template <typename T, typename T1, typename... Ts>
FindTypeIndex()228 constexpr int FindTypeIndex() {
229   return std::is_same<T, T1>::value ? 0 : FindTypeIndex<T, Ts...>() + 1;
230 }
231 
232 // A type to value map, where the possible keys as specified in `Keys...`.
233 // The values for key `K` is `ValueT<K>`
234 template <template <typename> class ValueT, typename... Keys>
235 class TypeMap {
236  public:
237   template <typename K>
Get()238   ValueT<K>& Get() {
239     return static_cast<Base<K>&>(payload_).value;
240   }
241 
242   template <typename K>
Get() const243   const ValueT<K>& Get() const {
244     return static_cast<const Base<K>&>(payload_).value;
245   }
246 
247  private:
248   template <typename K>
249   struct Base {
250     ValueT<K> value{};
251   };
252   struct Payload : Base<Keys>... {};
253   Payload payload_;
254 };
255 
256 template <typename T>
257 using IntT = int;
258 template <typename T>
259 using PointerT = T*;
260 
261 // Manages an allocation of sequential arrays of type `T...`.
262 // It is more space efficient than storing N (ptr, size) pairs, by storing only
263 // the pointer to the head and the boundaries between the arrays.
264 template <typename... T>
265 class FlatAllocation {
266  public:
267   static constexpr size_t kMaxAlign = Max(alignof(T)...);
268 
FlatAllocation(const TypeMap<IntT,T...> & ends)269   explicit FlatAllocation(const TypeMap<IntT, T...>& ends) : ends_(ends) {
270     // The arrays start just after FlatAllocation, so adjust the ends.
271     Fold({(ends_.template Get<T>() +=
272            RoundUpTo<kMaxAlign>(sizeof(FlatAllocation)))...});
273     Fold({Init<T>()...});
274   }
275 
Destroy()276   void Destroy() {
277     Fold({Destroy<T>()...});
278     internal::SizedDelete(this, total_bytes());
279   }
280 
281   template <int I>
282   using type = typename std::tuple_element<I, std::tuple<T...>>::type;
283 
284   // Gets a tuple of the head pointers for the arrays
Pointers() const285   TypeMap<PointerT, T...> Pointers() const {
286     TypeMap<PointerT, T...> out;
287     Fold({(out.template Get<T>() = Begin<T>())...});
288     return out;
289   }
290 
291 
292  private:
293   // Total number of bytes used by all arrays.
total_bytes() const294   int total_bytes() const {
295     // Get the last end.
296     return ends_.template Get<typename std::tuple_element<
297         sizeof...(T) - 1, std::tuple<T...>>::type>();
298   }
299 
300 
301   template <typename U>
BeginOffset() const302   int BeginOffset() const {
303     constexpr int type_index = FindTypeIndex<U, T...>();
304     // Avoid a negative value here to keep it compiling when type_index == 0
305     constexpr int prev_type_index = type_index == 0 ? 0 : type_index - 1;
306     using PrevType =
307         typename std::tuple_element<prev_type_index, std::tuple<T...>>::type;
308     // Ensure the types are properly aligned.
309     static_assert(EffectiveAlignof<PrevType>() >= EffectiveAlignof<U>(), "");
310     return type_index == 0 ? RoundUpTo<kMaxAlign>(sizeof(FlatAllocation))
311                            : ends_.template Get<PrevType>();
312   }
313 
314   template <typename U>
EndOffset() const315   int EndOffset() const {
316     return ends_.template Get<U>();
317   }
318 
319   // Avoid the reinterpret_cast if the array is empty.
320   // Clang's Control Flow Integrity does not like the cast pointing to memory
321   // that is not yet initialized to be of that type.
322   // (from -fsanitize=cfi-unrelated-cast)
323   template <typename U>
Begin() const324   U* Begin() const {
325     int begin = BeginOffset<U>(), end = EndOffset<U>();
326     if (begin == end) return nullptr;
327     return reinterpret_cast<U*>(data() + begin);
328   }
329 
330   template <typename U>
End() const331   U* End() const {
332     int begin = BeginOffset<U>(), end = EndOffset<U>();
333     if (begin == end) return nullptr;
334     return reinterpret_cast<U*>(data() + end);
335   }
336 
337   template <typename U>
Init()338   bool Init() {
339     // Skip for the `char` block. No need to zero initialize it.
340     if (std::is_same<U, char>::value) return true;
341     for (char *p = data() + BeginOffset<U>(), *end = data() + EndOffset<U>();
342          p != end; p += sizeof(U)) {
343       ::new (p) U{};
344     }
345     return true;
346   }
347 
348   template <typename U>
Destroy()349   bool Destroy() {
350     if (std::is_trivially_destructible<U>::value) return true;
351     for (U *it = Begin<U>(), *end = End<U>(); it != end; ++it) {
352       it->~U();
353     }
354     return true;
355   }
356 
data() const357   char* data() const {
358     return const_cast<char*>(reinterpret_cast<const char*>(this));
359   }
360 
361   TypeMap<IntT, T...> ends_;
362 };
363 
364 template <typename... T>
CalculateEnds(const TypeMap<IntT,T...> & sizes)365 TypeMap<IntT, T...> CalculateEnds(const TypeMap<IntT, T...>& sizes) {
366   int total = 0;
367   TypeMap<IntT, T...> out;
368   Fold({(out.template Get<T>() = total +=
369          sizeof(T) * sizes.template Get<T>())...});
370   return out;
371 }
372 
373 // The implementation for FlatAllocator below.
374 // This separate class template makes it easier to have methods that fold on
375 // `T...`.
376 template <typename... T>
377 class FlatAllocatorImpl {
378  public:
379   using Allocation = FlatAllocation<T...>;
380 
381   template <typename U>
PlanArray(int array_size)382   void PlanArray(int array_size) {
383     // We can't call PlanArray after FinalizePlanning has been called.
384     ABSL_CHECK(!has_allocated());
385     if (std::is_trivially_destructible<U>::value) {
386       // Trivial types are aligned to 8 bytes.
387       static_assert(alignof(U) <= 8, "");
388       total_.template Get<char>() += RoundUpTo<8>(array_size * sizeof(U));
389     } else {
390       // Since we can't use `if constexpr`, just make the expression compile
391       // when this path is not taken.
392       using TypeToUse =
393           typename std::conditional<std::is_trivially_destructible<U>::value,
394                                     char, U>::type;
395       total_.template Get<TypeToUse>() += array_size;
396     }
397   }
398 
399   template <typename U>
AllocateArray(int array_size)400   U* AllocateArray(int array_size) {
401     constexpr bool trivial = std::is_trivially_destructible<U>::value;
402     using TypeToUse = typename std::conditional<trivial, char, U>::type;
403 
404     // We can only allocate after FinalizePlanning has been called.
405     ABSL_CHECK(has_allocated());
406 
407     TypeToUse*& data = pointers_.template Get<TypeToUse>();
408     int& used = used_.template Get<TypeToUse>();
409     U* res = reinterpret_cast<U*>(data + used);
410     used += trivial ? RoundUpTo<8>(array_size * sizeof(U)) : array_size;
411     ABSL_CHECK_LE(used, total_.template Get<TypeToUse>());
412     return res;
413   }
414 
415   template <typename... In>
AllocateStrings(In &&...in)416   const std::string* AllocateStrings(In&&... in) {
417     std::string* strings = AllocateArray<std::string>(sizeof...(in));
418     std::string* res = strings;
419     Fold({(*strings++ = std::string(std::forward<In>(in)))...});
420     return res;
421   }
422 
423   // Allocate all 5 names of the field:
424   // name, full name, lowercase, camelcase and json.
425   // It will dedup the strings when possible.
426   // The resulting array contains `name` at index 0, `full_name` at index 1
427   // and the other 3 indices are specified in the result.
PlanFieldNames(const std::string & name,const std::string * opt_json_name)428   void PlanFieldNames(const std::string& name,
429                       const std::string* opt_json_name) {
430     ABSL_CHECK(!has_allocated());
431 
432     // Fast path for snake_case names, which follow the style guide.
433     if (opt_json_name == nullptr) {
434       switch (GetFieldNameCase(name)) {
435         case FieldNameCase::kAllLower:
436           // Case 1: they are all the same.
437           return PlanArray<std::string>(2);
438         case FieldNameCase::kSnakeCase:
439           // Case 2: name==lower, camel==json
440           return PlanArray<std::string>(3);
441         default:
442           break;
443       }
444     }
445 
446     std::string lowercase_name = name;
447     absl::AsciiStrToLower(&lowercase_name);
448 
449     std::string camelcase_name = ToCamelCase(name, /* lower_first = */ true);
450     std::string json_name =
451         opt_json_name != nullptr ? *opt_json_name : ToJsonName(name);
452 
453     absl::string_view all_names[] = {name, lowercase_name, camelcase_name,
454                                      json_name};
455     std::sort(all_names, all_names + 4);
456     int unique =
457         static_cast<int>(std::unique(all_names, all_names + 4) - all_names);
458 
459     PlanArray<std::string>(unique + 1);
460   }
461 
462   struct FieldNamesResult {
463     const std::string* array;
464     int lowercase_index;
465     int camelcase_index;
466     int json_index;
467   };
AllocateFieldNames(const absl::string_view name,const absl::string_view scope,const std::string * opt_json_name)468   FieldNamesResult AllocateFieldNames(const absl::string_view name,
469                                       const absl::string_view scope,
470                                       const std::string* opt_json_name) {
471     ABSL_CHECK(has_allocated());
472 
473     std::string full_name =
474         scope.empty() ? std::string(name) : absl::StrCat(scope, ".", name);
475 
476     // Fast path for snake_case names, which follow the style guide.
477     if (opt_json_name == nullptr) {
478       switch (GetFieldNameCase(name)) {
479         case FieldNameCase::kAllLower:
480           // Case 1: they are all the same.
481           return {AllocateStrings(name, std::move(full_name)), 0, 0, 0};
482         case FieldNameCase::kSnakeCase:
483           // Case 2: name==lower, camel==json
484           return {AllocateStrings(name, std::move(full_name),
485                                   ToCamelCase(name, /* lower_first = */ true)),
486                   0, 2, 2};
487         default:
488           break;
489       }
490     }
491 
492     std::vector<std::string> names;
493     names.emplace_back(name);
494     names.push_back(std::move(full_name));
495 
496     const auto push_name = [&](std::string new_name) {
497       for (size_t i = 0; i < names.size(); ++i) {
498         // Do not compare the full_name. It is unlikely to match, except in
499         // custom json_name. We are not taking this into account in
500         // PlanFieldNames so better to not try it.
501         if (i == 1) continue;
502         if (names[i] == new_name) return i;
503       }
504       names.push_back(std::move(new_name));
505       return names.size() - 1;
506     };
507 
508     FieldNamesResult result{nullptr, 0, 0, 0};
509 
510     std::string lowercase_name = std::string(name);
511     absl::AsciiStrToLower(&lowercase_name);
512     result.lowercase_index = push_name(std::move(lowercase_name));
513     result.camelcase_index =
514         push_name(ToCamelCase(name, /* lower_first = */ true));
515     result.json_index =
516         push_name(opt_json_name != nullptr ? *opt_json_name : ToJsonName(name));
517 
518     std::string* all_names = AllocateArray<std::string>(names.size());
519     result.array = all_names;
520     std::move(names.begin(), names.end(), all_names);
521 
522     return result;
523   }
524 
525   template <typename Alloc>
FinalizePlanning(Alloc & alloc)526   void FinalizePlanning(Alloc& alloc) {
527     ABSL_CHECK(!has_allocated());
528 
529     pointers_ = alloc->CreateFlatAlloc(total_)->Pointers();
530 
531     ABSL_CHECK(has_allocated());
532   }
533 
ExpectConsumed() const534   void ExpectConsumed() const {
535     // We verify that we consumed all the memory requested if there was no
536     // error in processing.
537     Fold({ExpectConsumed<T>()...});
538   }
539 
540  private:
has_allocated() const541   bool has_allocated() const {
542     return pointers_.template Get<char>() != nullptr;
543   }
544 
IsLower(char c)545   static bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
IsDigit(char c)546   static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
IsLowerOrDigit(char c)547   static bool IsLowerOrDigit(char c) { return IsLower(c) || IsDigit(c); }
548 
549   enum class FieldNameCase { kAllLower, kSnakeCase, kOther };
GetFieldNameCase(const absl::string_view name)550   FieldNameCase GetFieldNameCase(const absl::string_view name) {
551     if (!name.empty() && !IsLower(name[0])) return FieldNameCase::kOther;
552     FieldNameCase best = FieldNameCase::kAllLower;
553     for (char c : name) {
554       if (IsLowerOrDigit(c)) {
555         // nothing to do
556       } else if (c == '_') {
557         best = FieldNameCase::kSnakeCase;
558       } else {
559         return FieldNameCase::kOther;
560       }
561     }
562     return best;
563   }
564 
565   template <typename U>
ExpectConsumed() const566   bool ExpectConsumed() const {
567     ABSL_CHECK_EQ(total_.template Get<U>(), used_.template Get<U>());
568     return true;
569   }
570 
571   TypeMap<PointerT, T...> pointers_;
572   TypeMap<IntT, T...> total_;
573   TypeMap<IntT, T...> used_;
574 };
575 
576 // Allows us to disable tracking in the current thread while certain build steps
577 // are happening.
is_tracking_enabled()578 bool& is_tracking_enabled() {
579   static PROTOBUF_THREAD_LOCAL bool value = true;
580   return value;
581 }
582 
DisableTracking()583 auto DisableTracking() {
584   bool old_value = is_tracking_enabled();
585   is_tracking_enabled() = false;
586   return absl::MakeCleanup([=] { is_tracking_enabled() = old_value; });
587 }
588 
589 }  // namespace
590 
591 class Symbol {
592  public:
593   enum Type {
594     NULL_SYMBOL,
595     MESSAGE,
596     FIELD,
597     ONEOF,
598     ENUM,
599     ENUM_VALUE,
600     ENUM_VALUE_OTHER_PARENT,
601     SERVICE,
602     METHOD,
603     FULL_PACKAGE,
604     SUB_PACKAGE,
605   };
606 
Symbol()607   Symbol() {
608     static constexpr internal::SymbolBase null_symbol{};
609     static_assert(null_symbol.symbol_type_ == NULL_SYMBOL, "");
610     // Initialize with a sentinel to make sure `ptr_` is never null.
611     ptr_ = &null_symbol;
612   }
613 
614   // Every object we store derives from internal::SymbolBase, where we store the
615   // symbol type enum.
616   // Storing in the object can be done without using more space in most cases,
617   // while storing it in the Symbol type would require 8 bytes.
618 #define DEFINE_MEMBERS(TYPE, TYPE_CONSTANT, FIELD)                             \
619   explicit Symbol(TYPE* value) : ptr_(value) {                                 \
620     value->symbol_type_ = TYPE_CONSTANT;                                       \
621   }                                                                            \
622   const TYPE* FIELD() const {                                                  \
623     return type() == TYPE_CONSTANT ? static_cast<const TYPE*>(ptr_) : nullptr; \
624   }
625 
626   DEFINE_MEMBERS(Descriptor, MESSAGE, descriptor)
627   DEFINE_MEMBERS(FieldDescriptor, FIELD, field_descriptor)
628   DEFINE_MEMBERS(OneofDescriptor, ONEOF, oneof_descriptor)
629   DEFINE_MEMBERS(EnumDescriptor, ENUM, enum_descriptor)
630   DEFINE_MEMBERS(ServiceDescriptor, SERVICE, service_descriptor)
631   DEFINE_MEMBERS(MethodDescriptor, METHOD, method_descriptor)
632   DEFINE_MEMBERS(FileDescriptor, FULL_PACKAGE, file_descriptor)
633 
634   // We use a special node for subpackage FileDescriptor.
635   // It is potentially added to the table with multiple different names, so we
636   // need a separate place to put the name.
637   struct Subpackage : internal::SymbolBase {
638     int name_size;
639     const FileDescriptor* file;
640   };
DEFINE_MEMBERS(Subpackage,SUB_PACKAGE,sub_package_file_descriptor)641   DEFINE_MEMBERS(Subpackage, SUB_PACKAGE, sub_package_file_descriptor)
642 
643   // Enum values have two different parents.
644   // We use two different identitied for the same object to determine the two
645   // different insertions in the map.
646   static Symbol EnumValue(EnumValueDescriptor* value, int n) {
647     Symbol s;
648     internal::SymbolBase* ptr;
649     if (n == 0) {
650       ptr = static_cast<internal::SymbolBaseN<0>*>(value);
651       ptr->symbol_type_ = ENUM_VALUE;
652     } else {
653       ptr = static_cast<internal::SymbolBaseN<1>*>(value);
654       ptr->symbol_type_ = ENUM_VALUE_OTHER_PARENT;
655     }
656     s.ptr_ = ptr;
657     return s;
658   }
659 
enum_value_descriptor() const660   const EnumValueDescriptor* enum_value_descriptor() const {
661     return type() == ENUM_VALUE
662                ? static_cast<const EnumValueDescriptor*>(
663                      static_cast<const internal::SymbolBaseN<0>*>(ptr_))
664            : type() == ENUM_VALUE_OTHER_PARENT
665                ? static_cast<const EnumValueDescriptor*>(
666                      static_cast<const internal::SymbolBaseN<1>*>(ptr_))
667                : nullptr;
668   }
669 
670 #undef DEFINE_MEMBERS
671 
type() const672   Type type() const { return static_cast<Type>(ptr_->symbol_type_); }
IsNull() const673   bool IsNull() const { return type() == NULL_SYMBOL; }
IsType() const674   bool IsType() const { return type() == MESSAGE || type() == ENUM; }
IsAggregate() const675   bool IsAggregate() const {
676     return IsType() || IsPackage() || type() == SERVICE;
677   }
IsPackage() const678   bool IsPackage() const {
679     return type() == FULL_PACKAGE || type() == SUB_PACKAGE;
680   }
681 
GetFile() const682   const FileDescriptor* GetFile() const {
683     switch (type()) {
684       case MESSAGE:
685         return descriptor()->file();
686       case FIELD:
687         return field_descriptor()->file();
688       case ONEOF:
689         return oneof_descriptor()->containing_type()->file();
690       case ENUM:
691         return enum_descriptor()->file();
692       case ENUM_VALUE:
693         return enum_value_descriptor()->type()->file();
694       case SERVICE:
695         return service_descriptor()->file();
696       case METHOD:
697         return method_descriptor()->service()->file();
698       case FULL_PACKAGE:
699         return file_descriptor();
700       case SUB_PACKAGE:
701         return sub_package_file_descriptor()->file;
702       default:
703         return nullptr;
704     }
705   }
706 
full_name() const707   absl::string_view full_name() const {
708     switch (type()) {
709       case MESSAGE:
710         return descriptor()->full_name();
711       case FIELD:
712         return field_descriptor()->full_name();
713       case ONEOF:
714         return oneof_descriptor()->full_name();
715       case ENUM:
716         return enum_descriptor()->full_name();
717       case ENUM_VALUE:
718         return enum_value_descriptor()->full_name();
719       case SERVICE:
720         return service_descriptor()->full_name();
721       case METHOD:
722         return method_descriptor()->full_name();
723       case FULL_PACKAGE:
724         return file_descriptor()->package();
725       case SUB_PACKAGE:
726         return absl::string_view(sub_package_file_descriptor()->file->package())
727             .substr(0, sub_package_file_descriptor()->name_size);
728       default:
729         ABSL_CHECK(false);
730     }
731     return "";
732   }
733 
parent_name_key() const734   std::pair<const void*, absl::string_view> parent_name_key() const {
735     const auto or_file = [&](const void* p) { return p ? p : GetFile(); };
736     switch (type()) {
737       case MESSAGE:
738         return {or_file(descriptor()->containing_type()), descriptor()->name()};
739       case FIELD: {
740         auto* field = field_descriptor();
741         return {or_file(field->is_extension() ? field->extension_scope()
742                                               : field->containing_type()),
743                 field->name()};
744       }
745       case ONEOF:
746         return {oneof_descriptor()->containing_type(),
747                 oneof_descriptor()->name()};
748       case ENUM:
749         return {or_file(enum_descriptor()->containing_type()),
750                 enum_descriptor()->name()};
751       case ENUM_VALUE:
752         return {or_file(enum_value_descriptor()->type()->containing_type()),
753                 enum_value_descriptor()->name()};
754       case ENUM_VALUE_OTHER_PARENT:
755         return {enum_value_descriptor()->type(),
756                 enum_value_descriptor()->name()};
757       case SERVICE:
758         return {GetFile(), service_descriptor()->name()};
759       case METHOD:
760         return {method_descriptor()->service(), method_descriptor()->name()};
761       default:
762         ABSL_CHECK(false);
763     }
764     return {};
765   }
766 
767  private:
768   const internal::SymbolBase* ptr_;
769 };
770 
771 const FieldDescriptor::CppType
772     FieldDescriptor::kTypeToCppTypeMap[MAX_TYPE + 1] = {
773         static_cast<CppType>(0),  // 0 is reserved for errors
774 
775         CPPTYPE_DOUBLE,   // TYPE_DOUBLE
776         CPPTYPE_FLOAT,    // TYPE_FLOAT
777         CPPTYPE_INT64,    // TYPE_INT64
778         CPPTYPE_UINT64,   // TYPE_UINT64
779         CPPTYPE_INT32,    // TYPE_INT32
780         CPPTYPE_UINT64,   // TYPE_FIXED64
781         CPPTYPE_UINT32,   // TYPE_FIXED32
782         CPPTYPE_BOOL,     // TYPE_BOOL
783         CPPTYPE_STRING,   // TYPE_STRING
784         CPPTYPE_MESSAGE,  // TYPE_GROUP
785         CPPTYPE_MESSAGE,  // TYPE_MESSAGE
786         CPPTYPE_STRING,   // TYPE_BYTES
787         CPPTYPE_UINT32,   // TYPE_UINT32
788         CPPTYPE_ENUM,     // TYPE_ENUM
789         CPPTYPE_INT32,    // TYPE_SFIXED32
790         CPPTYPE_INT64,    // TYPE_SFIXED64
791         CPPTYPE_INT32,    // TYPE_SINT32
792         CPPTYPE_INT64,    // TYPE_SINT64
793 };
794 
795 const char* const FieldDescriptor::kTypeToName[MAX_TYPE + 1] = {
796     "ERROR",  // 0 is reserved for errors
797 
798     "double",    // TYPE_DOUBLE
799     "float",     // TYPE_FLOAT
800     "int64",     // TYPE_INT64
801     "uint64",    // TYPE_UINT64
802     "int32",     // TYPE_INT32
803     "fixed64",   // TYPE_FIXED64
804     "fixed32",   // TYPE_FIXED32
805     "bool",      // TYPE_BOOL
806     "string",    // TYPE_STRING
807     "group",     // TYPE_GROUP
808     "message",   // TYPE_MESSAGE
809     "bytes",     // TYPE_BYTES
810     "uint32",    // TYPE_UINT32
811     "enum",      // TYPE_ENUM
812     "sfixed32",  // TYPE_SFIXED32
813     "sfixed64",  // TYPE_SFIXED64
814     "sint32",    // TYPE_SINT32
815     "sint64",    // TYPE_SINT64
816 };
817 
818 const char* const FieldDescriptor::kCppTypeToName[MAX_CPPTYPE + 1] = {
819     "ERROR",  // 0 is reserved for errors
820 
821     "int32",    // CPPTYPE_INT32
822     "int64",    // CPPTYPE_INT64
823     "uint32",   // CPPTYPE_UINT32
824     "uint64",   // CPPTYPE_UINT64
825     "double",   // CPPTYPE_DOUBLE
826     "float",    // CPPTYPE_FLOAT
827     "bool",     // CPPTYPE_BOOL
828     "enum",     // CPPTYPE_ENUM
829     "string",   // CPPTYPE_STRING
830     "message",  // CPPTYPE_MESSAGE
831 };
832 
833 const char* const FieldDescriptor::kLabelToName[MAX_LABEL + 1] = {
834     "ERROR",  // 0 is reserved for errors
835 
836     "optional",  // LABEL_OPTIONAL
837     "required",  // LABEL_REQUIRED
838     "repeated",  // LABEL_REPEATED
839 };
840 
841 static const char* const kNonLinkedWeakMessageReplacementName = "google.protobuf.Empty";
842 
843 #if !defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912)
844 const int FieldDescriptor::kMaxNumber;
845 const int FieldDescriptor::kFirstReservedNumber;
846 const int FieldDescriptor::kLastReservedNumber;
847 #endif
848 
849 namespace {
850 
EnumValueToPascalCase(const std::string & input)851 std::string EnumValueToPascalCase(const std::string& input) {
852   bool next_upper = true;
853   std::string result;
854   result.reserve(input.size());
855 
856   for (char character : input) {
857     if (character == '_') {
858       next_upper = true;
859     } else {
860       if (next_upper) {
861         result.push_back(absl::ascii_toupper(character));
862       } else {
863         result.push_back(absl::ascii_tolower(character));
864       }
865       next_upper = false;
866     }
867   }
868 
869   return result;
870 }
871 
872 // Class to remove an enum prefix from enum values.
873 class PrefixRemover {
874  public:
PrefixRemover(absl::string_view prefix)875   explicit PrefixRemover(absl::string_view prefix) {
876     // Strip underscores and lower-case the prefix.
877     for (char character : prefix) {
878       if (character != '_') {
879         prefix_ += absl::ascii_tolower(character);
880       }
881     }
882   }
883 
884   // Tries to remove the enum prefix from this enum value.
885   // If this is not possible, returns the input verbatim.
MaybeRemove(absl::string_view str)886   std::string MaybeRemove(absl::string_view str) {
887     // We can't just lowercase and strip str and look for a prefix.
888     // We need to properly recognize the difference between:
889     //
890     //   enum Foo {
891     //     FOO_BAR_BAZ = 0;
892     //     FOO_BARBAZ = 1;
893     //   }
894     //
895     // This is acceptable (though perhaps not advisable) because even when
896     // we PascalCase, these two will still be distinct (BarBaz vs. Barbaz).
897     size_t i, j;
898 
899     // Skip past prefix_ in str if we can.
900     for (i = 0, j = 0; i < str.size() && j < prefix_.size(); i++) {
901       if (str[i] == '_') {
902         continue;
903       }
904 
905       if (absl::ascii_tolower(str[i]) != prefix_[j++]) {
906         return std::string(str);
907       }
908     }
909 
910     // If we didn't make it through the prefix, we've failed to strip the
911     // prefix.
912     if (j < prefix_.size()) {
913       return std::string(str);
914     }
915 
916     // Skip underscores between prefix and further characters.
917     while (i < str.size() && str[i] == '_') {
918       i++;
919     }
920 
921     // Enum label can't be the empty string.
922     if (i == str.size()) {
923       return std::string(str);
924     }
925 
926     // We successfully stripped the prefix.
927     str.remove_prefix(i);
928     return std::string(str);
929   }
930 
931  private:
932   std::string prefix_;
933 };
934 
935 // A DescriptorPool contains a bunch of hash-maps to implement the
936 // various Find*By*() methods.  Since hashtable lookups are O(1), it's
937 // most efficient to construct a fixed set of large hash-maps used by
938 // all objects in the pool rather than construct one or more small
939 // hash-maps for each object.
940 //
941 // The keys to these hash-maps are (parent, name) or (parent, number) pairs.
942 struct FullNameQuery {
943   absl::string_view query;
full_namegoogle::protobuf::__anona654feba0511::FullNameQuery944   absl::string_view full_name() const { return query; }
945 };
946 struct SymbolByFullNameHash {
947   using is_transparent = void;
948 
949   template <typename T>
operator ()google::protobuf::__anona654feba0511::SymbolByFullNameHash950   size_t operator()(const T& s) const {
951     return absl::HashOf(s.full_name());
952   }
953 };
954 struct SymbolByFullNameEq {
955   using is_transparent = void;
956 
957   template <typename T, typename U>
operator ()google::protobuf::__anona654feba0511::SymbolByFullNameEq958   bool operator()(const T& a, const U& b) const {
959     return a.full_name() == b.full_name();
960   }
961 };
962 using SymbolsByNameSet =
963     absl::flat_hash_set<Symbol, SymbolByFullNameHash, SymbolByFullNameEq>;
964 
965 struct ParentNameQuery {
966   std::pair<const void*, absl::string_view> query;
parent_name_keygoogle::protobuf::__anona654feba0511::ParentNameQuery967   std::pair<const void*, absl::string_view> parent_name_key() const {
968     return query;
969   }
970 };
971 struct SymbolByParentHash {
972   using is_transparent = void;
973 
974   template <typename T>
operator ()google::protobuf::__anona654feba0511::SymbolByParentHash975   size_t operator()(const T& s) const {
976     return absl::HashOf(s.parent_name_key());
977   }
978 };
979 struct SymbolByParentEq {
980   using is_transparent = void;
981 
982   template <typename T, typename U>
operator ()google::protobuf::__anona654feba0511::SymbolByParentEq983   bool operator()(const T& a, const U& b) const {
984     return a.parent_name_key() == b.parent_name_key();
985   }
986 };
987 using SymbolsByParentSet =
988     absl::flat_hash_set<Symbol, SymbolByParentHash, SymbolByParentEq>;
989 
990 template <typename DescriptorT>
991 struct DescriptorsByNameHash {
992   using is_transparent = void;
993 
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameHash994   size_t operator()(absl::string_view name) const { return absl::HashOf(name); }
995 
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameHash996   size_t operator()(const DescriptorT* file) const {
997     return absl::HashOf(file->name());
998   }
999 };
1000 
1001 template <typename DescriptorT>
1002 struct DescriptorsByNameEq {
1003   using is_transparent = void;
1004 
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1005   bool operator()(absl::string_view lhs, absl::string_view rhs) const {
1006     return lhs == rhs;
1007   }
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1008   bool operator()(absl::string_view lhs, const DescriptorT* rhs) const {
1009     return lhs == rhs->name();
1010   }
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1011   bool operator()(const DescriptorT* lhs, absl::string_view rhs) const {
1012     return lhs->name() == rhs;
1013   }
operator ()google::protobuf::__anona654feba0511::DescriptorsByNameEq1014   bool operator()(const DescriptorT* lhs, const DescriptorT* rhs) const {
1015     return lhs == rhs || lhs->name() == rhs->name();
1016   }
1017 };
1018 
1019 template <typename DescriptorT>
1020 using DescriptorsByNameSet =
1021     absl::flat_hash_set<const DescriptorT*, DescriptorsByNameHash<DescriptorT>,
1022                         DescriptorsByNameEq<DescriptorT>>;
1023 
1024 using FieldsByNameMap =
1025     absl::flat_hash_map<std::pair<const void*, absl::string_view>,
1026                         const FieldDescriptor*>;
1027 
1028 struct ParentNumberQuery {
1029   std::pair<const void*, int> query;
1030 };
ObjectToParentNumber(const FieldDescriptor * field)1031 std::pair<const void*, int> ObjectToParentNumber(const FieldDescriptor* field) {
1032   return {field->containing_type(), field->number()};
1033 }
ObjectToParentNumber(const EnumValueDescriptor * enum_value)1034 std::pair<const void*, int> ObjectToParentNumber(
1035     const EnumValueDescriptor* enum_value) {
1036   return {enum_value->type(), enum_value->number()};
1037 }
ObjectToParentNumber(ParentNumberQuery query)1038 std::pair<const void*, int> ObjectToParentNumber(ParentNumberQuery query) {
1039   return query.query;
1040 }
1041 struct ParentNumberHash {
1042   using is_transparent = void;
1043 
1044   template <typename T>
operator ()google::protobuf::__anona654feba0511::ParentNumberHash1045   size_t operator()(const T& t) const {
1046     return absl::HashOf(ObjectToParentNumber(t));
1047   }
1048 };
1049 struct ParentNumberEq {
1050   using is_transparent = void;
1051 
1052   template <typename T, typename U>
operator ()google::protobuf::__anona654feba0511::ParentNumberEq1053   bool operator()(const T& a, const U& b) const {
1054     return ObjectToParentNumber(a) == ObjectToParentNumber(b);
1055   }
1056 };
1057 using FieldsByNumberSet = absl::flat_hash_set<const FieldDescriptor*,
1058                                               ParentNumberHash, ParentNumberEq>;
1059 using EnumValuesByNumberSet =
1060     absl::flat_hash_set<const EnumValueDescriptor*, ParentNumberHash,
1061                         ParentNumberEq>;
1062 
1063 // This is a map rather than a hash-map, since we use it to iterate
1064 // through all the extensions that extend a given Descriptor, and an
1065 // ordered data structure that implements lower_bound is convenient
1066 // for that.
1067 using ExtensionsGroupedByDescriptorMap =
1068     absl::btree_map<std::pair<const Descriptor*, int>, const FieldDescriptor*>;
1069 using LocationsByPathMap =
1070     absl::flat_hash_map<std::string, const SourceCodeInfo_Location*>;
1071 
NewAllowedProto3Extendee()1072 absl::flat_hash_set<std::string>* NewAllowedProto3Extendee() {
1073   const char* kOptionNames[] = {
1074       "FileOptions",   "MessageOptions",   "FieldOptions",
1075       "EnumOptions",   "EnumValueOptions", "ServiceOptions",
1076       "MethodOptions", "OneofOptions",     "ExtensionRangeOptions"};
1077   auto allowed_proto3_extendees = new absl::flat_hash_set<std::string>();
1078   allowed_proto3_extendees->reserve(sizeof(kOptionNames) /
1079                                     sizeof(kOptionNames[0]));
1080 
1081   for (const char* option_name : kOptionNames) {
1082     // descriptor.proto has a different package name in opensource. We allow
1083     // both so the opensource protocol compiler can also compile internal
1084     // proto3 files with custom options. See: b/27567912
1085     allowed_proto3_extendees->insert(std::string("google.protobuf.") +
1086                                      option_name);
1087     // Split the word to trick the opensource processing scripts so they
1088     // will keep the original package name.
1089     allowed_proto3_extendees->insert(std::string("proto2.") + option_name);
1090   }
1091   return allowed_proto3_extendees;
1092 }
1093 
1094 // Checks whether the extendee type is allowed in proto3.
1095 // Only extensions to descriptor options are allowed. We use name comparison
1096 // instead of comparing the descriptor directly because the extensions may be
1097 // defined in a different pool.
AllowedExtendeeInProto3(const absl::string_view name)1098 bool AllowedExtendeeInProto3(const absl::string_view name) {
1099   static auto allowed_proto3_extendees =
1100       internal::OnShutdownDelete(NewAllowedProto3Extendee());
1101   return allowed_proto3_extendees->find(name) !=
1102          allowed_proto3_extendees->end();
1103 }
1104 
GetCppFeatureSetDefaults()1105 const FeatureSetDefaults& GetCppFeatureSetDefaults() {
1106   static const FeatureSetDefaults* default_spec =
1107       internal::OnShutdownDelete([] {
1108         auto* defaults = new FeatureSetDefaults();
1109         internal::ParseNoReflection(
1110             absl::string_view{
1111                 PROTOBUF_INTERNAL_CPP_EDITION_DEFAULTS,
1112                 sizeof(PROTOBUF_INTERNAL_CPP_EDITION_DEFAULTS) - 1},
1113             *defaults);
1114         return defaults;
1115       }());
1116   return *default_spec;
1117 }
1118 
1119 template <typename ProtoT>
RestoreFeaturesToOptions(const FeatureSet * features,ProtoT * proto)1120 void RestoreFeaturesToOptions(const FeatureSet* features, ProtoT* proto) {
1121   if (features != &FeatureSet::default_instance()) {
1122     *proto->mutable_options()->mutable_features() = *features;
1123   }
1124 }
1125 
1126 template <typename DescriptorT>
GetFullName(const DescriptorT & desc)1127 absl::string_view GetFullName(const DescriptorT& desc) {
1128   return desc.full_name();
1129 }
1130 
GetFullName(const FileDescriptor & desc)1131 absl::string_view GetFullName(const FileDescriptor& desc) {
1132   return desc.name();
1133 }
1134 
1135 template <typename DescriptorT>
GetFile(const DescriptorT & desc)1136 const FileDescriptor* GetFile(const DescriptorT& desc) {
1137   return desc.file();
1138 }
1139 
GetFile(const FileDescriptor & desc)1140 const FileDescriptor* GetFile(const FileDescriptor& desc) { return &desc; }
1141 
GetParentFeatures(const FileDescriptor * file)1142 const FeatureSet& GetParentFeatures(const FileDescriptor* file) {
1143   return FeatureSet::default_instance();
1144 }
1145 
GetParentFeatures(const Descriptor * message)1146 const FeatureSet& GetParentFeatures(const Descriptor* message) {
1147   if (message->containing_type() == nullptr) {
1148     return internal::InternalFeatureHelper::GetFeatures(*message->file());
1149   }
1150   return internal::InternalFeatureHelper::GetFeatures(
1151       *message->containing_type());
1152 }
1153 
GetParentFeatures(const OneofDescriptor * oneof)1154 const FeatureSet& GetParentFeatures(const OneofDescriptor* oneof) {
1155   return internal::InternalFeatureHelper::GetFeatures(
1156       *oneof->containing_type());
1157 }
1158 
GetParentFeatures(const Descriptor::ExtensionRange * range)1159 const FeatureSet& GetParentFeatures(const Descriptor::ExtensionRange* range) {
1160   return internal::InternalFeatureHelper::GetFeatures(
1161       *range->containing_type());
1162 }
1163 
GetParentFeatures(const FieldDescriptor * field)1164 const FeatureSet& GetParentFeatures(const FieldDescriptor* field) {
1165   if (field->containing_oneof() != nullptr) {
1166     return internal::InternalFeatureHelper::GetFeatures(
1167         *field->containing_oneof());
1168   } else if (field->is_extension()) {
1169     if (field->extension_scope() == nullptr) {
1170       return internal::InternalFeatureHelper::GetFeatures(*field->file());
1171     }
1172     return internal::InternalFeatureHelper::GetFeatures(
1173         *field->extension_scope());
1174   }
1175   return internal::InternalFeatureHelper::GetFeatures(
1176       *field->containing_type());
1177 }
1178 
GetParentFeatures(const EnumDescriptor * enm)1179 const FeatureSet& GetParentFeatures(const EnumDescriptor* enm) {
1180   if (enm->containing_type() == nullptr) {
1181     return internal::InternalFeatureHelper::GetFeatures(*enm->file());
1182   }
1183   return internal::InternalFeatureHelper::GetFeatures(*enm->containing_type());
1184 }
1185 
GetParentFeatures(const EnumValueDescriptor * value)1186 const FeatureSet& GetParentFeatures(const EnumValueDescriptor* value) {
1187   return internal::InternalFeatureHelper::GetFeatures(*value->type());
1188 }
1189 
GetParentFeatures(const ServiceDescriptor * service)1190 const FeatureSet& GetParentFeatures(const ServiceDescriptor* service) {
1191   return internal::InternalFeatureHelper::GetFeatures(*service->file());
1192 }
1193 
GetParentFeatures(const MethodDescriptor * method)1194 const FeatureSet& GetParentFeatures(const MethodDescriptor* method) {
1195   return internal::InternalFeatureHelper::GetFeatures(*method->service());
1196 }
1197 
IsLegacyEdition(Edition edition)1198 bool IsLegacyEdition(Edition edition) {
1199   return edition < Edition::EDITION_2023;
1200 }
1201 
1202 }  // anonymous namespace
1203 
1204 // Contains tables specific to a particular file.  These tables are not
1205 // modified once the file has been constructed, so they need not be
1206 // protected by a mutex.  This makes operations that depend only on the
1207 // contents of a single file -- e.g. Descriptor::FindFieldByName() --
1208 // lock-free.
1209 //
1210 // For historical reasons, the definitions of the methods of
1211 // FileDescriptorTables and DescriptorPool::Tables are interleaved below.
1212 // These used to be a single class.
1213 class FileDescriptorTables {
1214  public:
1215   FileDescriptorTables();
1216   ~FileDescriptorTables();
1217 
1218   // Empty table, used with placeholder files.
1219   inline static const FileDescriptorTables& GetEmptyInstance();
1220 
1221   // -----------------------------------------------------------------
1222   // Finding items.
1223 
1224   // Returns a null Symbol (symbol.IsNull() is true) if not found.
1225   // TODO: All callers to this function know the type they are looking
1226   // for. If we propagate that information statically we can make the query
1227   // faster.
1228   inline Symbol FindNestedSymbol(const void* parent,
1229                                  absl::string_view name) const;
1230 
1231   // These return nullptr if not found.
1232   inline const FieldDescriptor* FindFieldByNumber(const Descriptor* parent,
1233                                                   int number) const;
1234   inline const FieldDescriptor* FindFieldByLowercaseName(
1235       const void* parent, absl::string_view lowercase_name) const;
1236   inline const FieldDescriptor* FindFieldByCamelcaseName(
1237       const void* parent, absl::string_view camelcase_name) const;
1238   inline const EnumValueDescriptor* FindEnumValueByNumber(
1239       const EnumDescriptor* parent, int number) const;
1240   // This creates a new EnumValueDescriptor if not found, in a thread-safe way.
1241   inline const EnumValueDescriptor* FindEnumValueByNumberCreatingIfUnknown(
1242       const EnumDescriptor* parent, int number) const;
1243 
1244   // -----------------------------------------------------------------
1245   // Adding items.
1246 
1247   // These add items to the corresponding tables.  They return false if
1248   // the key already exists in the table.
1249   bool AddAliasUnderParent(const void* parent, absl::string_view name,
1250                            Symbol symbol);
1251   bool AddFieldByNumber(FieldDescriptor* field);
1252   bool AddEnumValueByNumber(EnumValueDescriptor* value);
1253 
1254   // Populates p->first->locations_by_path_ from p->second.
1255   // Unusual signature dictated by absl::call_once.
1256   static void BuildLocationsByPath(
1257       std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p);
1258 
1259   // Returns the location denoted by the specified path through info,
1260   // or nullptr if not found.
1261   // The value of info must be that of the corresponding FileDescriptor.
1262   // (Conceptually a pure function, but stateful as an optimisation.)
1263   const SourceCodeInfo_Location* GetSourceLocation(
1264       const std::vector<int>& path, const SourceCodeInfo* info) const;
1265 
1266   // Must be called after BuildFileImpl(), even if the build failed and
1267   // we are going to roll back to the last checkpoint.
1268   void FinalizeTables();
1269 
1270  private:
1271   const void* FindParentForFieldsByMap(const FieldDescriptor* field) const;
1272   static void FieldsByLowercaseNamesLazyInitStatic(
1273       const FileDescriptorTables* tables);
1274   void FieldsByLowercaseNamesLazyInitInternal() const;
1275   static void FieldsByCamelcaseNamesLazyInitStatic(
1276       const FileDescriptorTables* tables);
1277   void FieldsByCamelcaseNamesLazyInitInternal() const;
1278 
1279   SymbolsByParentSet symbols_by_parent_;
1280   mutable absl::once_flag fields_by_lowercase_name_once_;
1281   mutable absl::once_flag fields_by_camelcase_name_once_;
1282   // Make these fields atomic to avoid race conditions with
1283   // GetEstimatedOwnedMemoryBytesSize. Once the pointer is set the map won't
1284   // change anymore.
1285   mutable std::atomic<const FieldsByNameMap*> fields_by_lowercase_name_{};
1286   mutable std::atomic<const FieldsByNameMap*> fields_by_camelcase_name_{};
1287   FieldsByNumberSet fields_by_number_;  // Not including extensions.
1288   EnumValuesByNumberSet enum_values_by_number_;
1289   mutable EnumValuesByNumberSet unknown_enum_values_by_number_
1290       ABSL_GUARDED_BY(unknown_enum_values_mu_);
1291 
1292   // Populated on first request to save space, hence constness games.
1293   mutable absl::once_flag locations_by_path_once_;
1294   mutable LocationsByPathMap locations_by_path_;
1295 
1296   // Mutex to protect the unknown-enum-value map due to dynamic
1297   // EnumValueDescriptor creation on unknown values.
1298   mutable absl::Mutex unknown_enum_values_mu_;
1299 };
1300 
1301 namespace internal {
1302 
1303 // Small sequential allocator to be used within a single file.
1304 // Most of the memory for a single FileDescriptor and everything under it is
1305 // allocated in a single block of memory, with the FlatAllocator giving it out
1306 // in parts later.
1307 // The code first plans the total number of bytes needed by calling PlanArray
1308 // with all the allocations that will happen afterwards, then calls
1309 // FinalizePlanning passing the underlying allocator (the DescriptorPool::Tables
1310 // instance), and then proceeds to get the memory via
1311 // `AllocateArray`/`AllocateString` calls. The calls to PlanArray and
1312 // The calls have to match between planning and allocating, though not
1313 // necessarily in the same order.
1314 class FlatAllocator
1315     : public decltype(ApplyTypeList<FlatAllocatorImpl>(
1316           SortByAlignment<char, std::string, SourceCodeInfo,
1317                           FileDescriptorTables, FeatureSet,
1318                           // Option types
1319                           MessageOptions, FieldOptions, EnumOptions,
1320                           EnumValueOptions, ExtensionRangeOptions, OneofOptions,
1321                           ServiceOptions, MethodOptions, FileOptions>())) {};
1322 
1323 }  // namespace internal
1324 
1325 // ===================================================================
1326 // DescriptorPool::DeferredValidation
1327 
1328 // This class stores information required to defer validation until we're
1329 // outside the mutex lock.  These are reflective checks that also require us to
1330 // acquire the lock.
1331 class DescriptorPool::DeferredValidation {
1332  public:
DeferredValidation(const DescriptorPool * pool,ErrorCollector * error_collector)1333   DeferredValidation(const DescriptorPool* pool,
1334                      ErrorCollector* error_collector)
1335       : pool_(pool), error_collector_(error_collector) {}
DeferredValidation(const DescriptorPool * pool)1336   explicit DeferredValidation(const DescriptorPool* pool)
1337       : pool_(pool), error_collector_(pool->default_error_collector_) {}
1338 
1339   DeferredValidation(const DeferredValidation&) = delete;
1340   DeferredValidation& operator=(const DeferredValidation&) = delete;
1341   DeferredValidation(DeferredValidation&&) = delete;
1342   DeferredValidation& operator=(DeferredValidation&&) = delete;
1343 
~DeferredValidation()1344   ~DeferredValidation() {
1345     ABSL_CHECK(lifetimes_info_map_.empty())
1346         << "DeferredValidation destroyed with unvalidated features";
1347   }
1348 
1349   struct LifetimesInfo {
1350     const FeatureSet* proto_features;
1351     const Message* proto;
1352     absl::string_view full_name;
1353     absl::string_view filename;
1354   };
ValidateFeatureLifetimes(const FileDescriptor * file,LifetimesInfo info)1355   void ValidateFeatureLifetimes(const FileDescriptor* file,
1356                                 LifetimesInfo info) {
1357     lifetimes_info_map_[file].emplace_back(std::move(info));
1358   }
1359 
RollbackFile(const FileDescriptor * file)1360   void RollbackFile(const FileDescriptor* file) {
1361     lifetimes_info_map_.erase(file);
1362   }
1363 
1364   // Create a new file proto with an extended lifetime for deferred error
1365   // reporting.  If any temporary file protos don't outlive this object, the
1366   // reported errors won't be able to safely reference a location in the
1367   // original proto file.
CreateProto()1368   FileDescriptorProto& CreateProto() {
1369     owned_protos_.push_back(Arena::Create<FileDescriptorProto>(&arena_));
1370     return *owned_protos_.back();
1371   }
1372 
Validate()1373   bool Validate() {
1374     if (lifetimes_info_map_.empty()) return true;
1375 
1376     static absl::string_view feature_set_name = "google.protobuf.FeatureSet";
1377     const Descriptor* feature_set =
1378         pool_->FindMessageTypeByName(feature_set_name);
1379 
1380     bool has_errors = false;
1381     for (const auto& it : lifetimes_info_map_) {
1382       const FileDescriptor* file = it.first;
1383 
1384       for (const auto& info : it.second) {
1385         auto results = FeatureResolver::ValidateFeatureLifetimes(
1386             file->edition(), *info.proto_features, feature_set);
1387         for (const auto& error : results.errors) {
1388           has_errors = true;
1389           if (error_collector_ == nullptr) {
1390             ABSL_LOG(ERROR)
1391                 << info.filename << " " << info.full_name << ": " << error;
1392           } else {
1393             error_collector_->RecordError(
1394                 info.filename, info.full_name, info.proto,
1395                 DescriptorPool::ErrorCollector::NAME, error);
1396           }
1397         }
1398         if (pool_->direct_input_files_.find(file->name()) !=
1399             pool_->direct_input_files_.end()) {
1400           for (const auto& warning : results.warnings) {
1401             if (error_collector_ == nullptr) {
1402               ABSL_LOG(WARNING)
1403                   << info.filename << " " << info.full_name << ": " << warning;
1404             } else {
1405               error_collector_->RecordWarning(
1406                   info.filename, info.full_name, info.proto,
1407                   DescriptorPool::ErrorCollector::NAME, warning);
1408             }
1409           }
1410         }
1411       }
1412     }
1413     lifetimes_info_map_.clear();
1414     return !has_errors;
1415   }
1416 
1417  private:
1418   Arena arena_;
1419   const DescriptorPool* pool_;
1420   ErrorCollector* error_collector_;
1421   absl::flat_hash_map<const FileDescriptor*, std::vector<LifetimesInfo>>
1422       lifetimes_info_map_;
1423   std::vector<FileDescriptorProto*> owned_protos_;
1424 };
1425 
1426 // ===================================================================
1427 // DescriptorPool::Tables
1428 
1429 class DescriptorPool::Tables {
1430  public:
1431   Tables();
1432   ~Tables();
1433 
1434   // Record the current state of the tables to the stack of checkpoints.
1435   // Each call to AddCheckpoint() must be paired with exactly one call to either
1436   // ClearLastCheckpoint() or RollbackToLastCheckpoint().
1437   //
1438   // This is used when building files, since some kinds of validation errors
1439   // cannot be detected until the file's descriptors have already been added to
1440   // the tables.
1441   //
1442   // This supports recursive checkpoints, since building a file may trigger
1443   // recursive building of other files. Note that recursive checkpoints are not
1444   // normally necessary; explicit dependencies are built prior to checkpointing.
1445   // So although we recursively build transitive imports, there is at most one
1446   // checkpoint in the stack during dependency building.
1447   //
1448   // Recursive checkpoints only arise during cross-linking of the descriptors.
1449   // Symbol references must be resolved, via DescriptorBuilder::FindSymbol and
1450   // friends. If the pending file references an unknown symbol
1451   // (e.g., it is not defined in the pending file's explicit dependencies), and
1452   // the pool is using a fallback database, and that database contains a file
1453   // defining that symbol, and that file has not yet been built by the pool,
1454   // the pool builds the file during cross-linking, leading to another
1455   // checkpoint.
1456   void AddCheckpoint();
1457 
1458   // Mark the last checkpoint as having cleared successfully, removing it from
1459   // the stack. If the stack is empty, all pending symbols will be committed.
1460   //
1461   // Note that this does not guarantee that the symbols added since the last
1462   // checkpoint won't be rolled back: if a checkpoint gets rolled back,
1463   // everything past that point gets rolled back, including symbols added after
1464   // checkpoints that were pushed onto the stack after it and marked as cleared.
1465   void ClearLastCheckpoint();
1466 
1467   // Roll back the Tables to the state of the checkpoint at the top of the
1468   // stack, removing everything that was added after that point.
1469   void RollbackToLastCheckpoint(DeferredValidation& deferred_validation);
1470 
1471   // The stack of files which are currently being built.  Used to detect
1472   // cyclic dependencies when loading files from a DescriptorDatabase.  Not
1473   // used when fallback_database_ == nullptr.
1474   std::vector<std::string> pending_files_;
1475 
1476   // A set of files which we have tried to load from the fallback database
1477   // and encountered errors.  We will not attempt to load them again during
1478   // execution of the current public API call, but for compatibility with
1479   // legacy clients, this is cleared at the beginning of each public API call.
1480   // Not used when fallback_database_ == nullptr.
1481   absl::flat_hash_set<std::string> known_bad_files_;
1482 
1483   // A set of symbols which we have tried to load from the fallback database
1484   // and encountered errors. We will not attempt to load them again during
1485   // execution of the current public API call, but for compatibility with
1486   // legacy clients, this is cleared at the beginning of each public API call.
1487   absl::flat_hash_set<std::string> known_bad_symbols_;
1488 
1489   // The set of descriptors for which we've already loaded the full
1490   // set of extensions numbers from fallback_database_.
1491   absl::flat_hash_set<const Descriptor*> extensions_loaded_from_db_;
1492 
1493   // Maps type name to Descriptor::WellKnownType.  This is logically global
1494   // and const, but we make it a member here to simplify its construction and
1495   // destruction.  This only has 20-ish entries and is one per DescriptorPool,
1496   // so the overhead is small.
1497   absl::flat_hash_map<std::string, Descriptor::WellKnownType> well_known_types_;
1498 
1499   // -----------------------------------------------------------------
1500   // Finding items.
1501 
1502   // Find symbols.  This returns a null Symbol (symbol.IsNull() is true)
1503   // if not found.
1504   inline Symbol FindSymbol(absl::string_view key) const;
1505 
1506   // This implements the body of DescriptorPool::Find*ByName().  It should
1507   // really be a private method of DescriptorPool, but that would require
1508   // declaring Symbol in descriptor.h, which would drag all kinds of other
1509   // stuff into the header.  Yay C++.
1510   Symbol FindByNameHelper(const DescriptorPool* pool, absl::string_view name);
1511 
1512   // These return nullptr if not found.
1513   inline const FileDescriptor* FindFile(absl::string_view key) const;
1514   inline const FieldDescriptor* FindExtension(const Descriptor* extendee,
1515                                               int number) const;
1516   inline void FindAllExtensions(const Descriptor* extendee,
1517                                 std::vector<const FieldDescriptor*>* out) const;
1518 
1519   // -----------------------------------------------------------------
1520   // Adding items.
1521 
1522   // These add items to the corresponding tables.  They return false if
1523   // the key already exists in the table.  For AddSymbol(), the string passed
1524   // in must be one that was constructed using AllocateString(), as it will
1525   // be used as a key in the symbols_by_name_ map without copying.
1526   bool AddSymbol(absl::string_view full_name, Symbol symbol);
1527   bool AddFile(const FileDescriptor* file);
1528   bool AddExtension(const FieldDescriptor* field);
1529 
1530   // Caches a feature set and returns a stable reference to the cached
1531   // allocation owned by the pool.
1532   const FeatureSet* InternFeatureSet(FeatureSet&& features);
1533 
1534   // -----------------------------------------------------------------
1535   // Allocating memory.
1536 
1537   // Allocate an object which will be reclaimed when the pool is
1538   // destroyed.  Note that the object's destructor will never be called,
1539   // so its fields must be plain old data (primitive data types and
1540   // pointers).  All of the descriptor types are such objects.
1541   template <typename Type>
1542   Type* Allocate();
1543 
1544   // Allocate some bytes which will be reclaimed when the pool is
1545   // destroyed. Memory is aligned to 8 bytes.
1546   void* AllocateBytes(int size);
1547 
1548   // Create a FlatAllocation for the corresponding sizes.
1549   // All objects within it will be default constructed.
1550   // The whole allocation, including the non-trivial objects within, will be
1551   // destroyed with the pool.
1552   template <typename... T>
1553   internal::FlatAllocator::Allocation* CreateFlatAlloc(
1554       const TypeMap<IntT, T...>& sizes);
1555 
1556 
1557  private:
1558   // All memory allocated in the pool.  Must be first as other objects can
1559   // point into these.
1560   struct MiscDeleter {
operator ()google::protobuf::DescriptorPool::Tables::MiscDeleter1561     void operator()(int* p) const { internal::SizedDelete(p, *p + 8); }
1562   };
1563   // Miscellaneous allocations are length prefixed. The paylaod is 8 bytes after
1564   // the `int` that contains the size. This keeps the payload aligned.
1565   std::vector<std::unique_ptr<int, MiscDeleter>> misc_allocs_;
1566   struct FlatAllocDeleter {
operator ()google::protobuf::DescriptorPool::Tables::FlatAllocDeleter1567     void operator()(internal::FlatAllocator::Allocation* p) const {
1568       p->Destroy();
1569     }
1570   };
1571   std::vector<
1572       std::unique_ptr<internal::FlatAllocator::Allocation, FlatAllocDeleter>>
1573       flat_allocs_;
1574 
1575   SymbolsByNameSet symbols_by_name_;
1576   DescriptorsByNameSet<FileDescriptor> files_by_name_;
1577   ExtensionsGroupedByDescriptorMap extensions_;
1578 
1579   // A cache of all unique feature sets seen.  Since we expect this number to be
1580   // relatively low compared to descriptors, it's significantly cheaper to share
1581   // these within the pool than have each file create its own feature sets.
1582   absl::flat_hash_map<std::string, std::unique_ptr<FeatureSet>>
1583       feature_set_cache_;
1584 
1585   struct CheckPoint {
CheckPointgoogle::protobuf::DescriptorPool::Tables::CheckPoint1586     explicit CheckPoint(const Tables* tables)
1587         : flat_allocations_before_checkpoint(
1588               static_cast<int>(tables->flat_allocs_.size())),
1589           misc_allocations_before_checkpoint(
1590               static_cast<int>(tables->misc_allocs_.size())),
1591           pending_symbols_before_checkpoint(
1592               tables->symbols_after_checkpoint_.size()),
1593           pending_files_before_checkpoint(
1594               tables->files_after_checkpoint_.size()),
1595           pending_extensions_before_checkpoint(
1596               tables->extensions_after_checkpoint_.size()) {}
1597     int flat_allocations_before_checkpoint;
1598     int misc_allocations_before_checkpoint;
1599     int pending_symbols_before_checkpoint;
1600     int pending_files_before_checkpoint;
1601     int pending_extensions_before_checkpoint;
1602   };
1603   std::vector<CheckPoint> checkpoints_;
1604   std::vector<Symbol> symbols_after_checkpoint_;
1605   std::vector<const FileDescriptor*> files_after_checkpoint_;
1606   std::vector<std::pair<const Descriptor*, int>> extensions_after_checkpoint_;
1607 };
1608 
Tables()1609 DescriptorPool::Tables::Tables() {
1610   well_known_types_.insert({
1611       {"google.protobuf.DoubleValue", Descriptor::WELLKNOWNTYPE_DOUBLEVALUE},
1612       {"google.protobuf.FloatValue", Descriptor::WELLKNOWNTYPE_FLOATVALUE},
1613       {"google.protobuf.Int64Value", Descriptor::WELLKNOWNTYPE_INT64VALUE},
1614       {"google.protobuf.UInt64Value", Descriptor::WELLKNOWNTYPE_UINT64VALUE},
1615       {"google.protobuf.Int32Value", Descriptor::WELLKNOWNTYPE_INT32VALUE},
1616       {"google.protobuf.UInt32Value", Descriptor::WELLKNOWNTYPE_UINT32VALUE},
1617       {"google.protobuf.StringValue", Descriptor::WELLKNOWNTYPE_STRINGVALUE},
1618       {"google.protobuf.BytesValue", Descriptor::WELLKNOWNTYPE_BYTESVALUE},
1619       {"google.protobuf.BoolValue", Descriptor::WELLKNOWNTYPE_BOOLVALUE},
1620       {"google.protobuf.Any", Descriptor::WELLKNOWNTYPE_ANY},
1621       {"google.protobuf.FieldMask", Descriptor::WELLKNOWNTYPE_FIELDMASK},
1622       {"google.protobuf.Duration", Descriptor::WELLKNOWNTYPE_DURATION},
1623       {"google.protobuf.Timestamp", Descriptor::WELLKNOWNTYPE_TIMESTAMP},
1624       {"google.protobuf.Value", Descriptor::WELLKNOWNTYPE_VALUE},
1625       {"google.protobuf.ListValue", Descriptor::WELLKNOWNTYPE_LISTVALUE},
1626       {"google.protobuf.Struct", Descriptor::WELLKNOWNTYPE_STRUCT},
1627   });
1628 }
1629 
~Tables()1630 DescriptorPool::Tables::~Tables() { ABSL_DCHECK(checkpoints_.empty()); }
1631 
1632 FileDescriptorTables::FileDescriptorTables() = default;
1633 
~FileDescriptorTables()1634 FileDescriptorTables::~FileDescriptorTables() {
1635   delete fields_by_lowercase_name_.load(std::memory_order_acquire);
1636   delete fields_by_camelcase_name_.load(std::memory_order_acquire);
1637 }
1638 
GetEmptyInstance()1639 inline const FileDescriptorTables& FileDescriptorTables::GetEmptyInstance() {
1640   static auto file_descriptor_tables =
1641       internal::OnShutdownDelete(new FileDescriptorTables());
1642   return *file_descriptor_tables;
1643 }
1644 
AddCheckpoint()1645 void DescriptorPool::Tables::AddCheckpoint() {
1646   checkpoints_.emplace_back(this);
1647 }
1648 
ClearLastCheckpoint()1649 void DescriptorPool::Tables::ClearLastCheckpoint() {
1650   ABSL_DCHECK(!checkpoints_.empty());
1651   checkpoints_.pop_back();
1652   if (checkpoints_.empty()) {
1653     // All checkpoints have been cleared: we can now commit all of the pending
1654     // data.
1655     symbols_after_checkpoint_.clear();
1656     files_after_checkpoint_.clear();
1657     extensions_after_checkpoint_.clear();
1658   }
1659 }
1660 
RollbackToLastCheckpoint(DeferredValidation & deferred_validation)1661 void DescriptorPool::Tables::RollbackToLastCheckpoint(
1662     DeferredValidation& deferred_validation) {
1663   ABSL_DCHECK(!checkpoints_.empty());
1664   const CheckPoint& checkpoint = checkpoints_.back();
1665 
1666   for (size_t i = checkpoint.pending_symbols_before_checkpoint;
1667        i < symbols_after_checkpoint_.size(); i++) {
1668     symbols_by_name_.erase(symbols_after_checkpoint_[i]);
1669   }
1670   for (size_t i = checkpoint.pending_files_before_checkpoint;
1671        i < files_after_checkpoint_.size(); i++) {
1672     deferred_validation.RollbackFile(files_after_checkpoint_[i]);
1673     files_by_name_.erase(files_after_checkpoint_[i]);
1674   }
1675   for (size_t i = checkpoint.pending_extensions_before_checkpoint;
1676        i < extensions_after_checkpoint_.size(); i++) {
1677     extensions_.erase(extensions_after_checkpoint_[i]);
1678   }
1679 
1680   symbols_after_checkpoint_.resize(
1681       checkpoint.pending_symbols_before_checkpoint);
1682   files_after_checkpoint_.resize(checkpoint.pending_files_before_checkpoint);
1683   extensions_after_checkpoint_.resize(
1684       checkpoint.pending_extensions_before_checkpoint);
1685 
1686   flat_allocs_.resize(checkpoint.flat_allocations_before_checkpoint);
1687   misc_allocs_.resize(checkpoint.misc_allocations_before_checkpoint);
1688   checkpoints_.pop_back();
1689 }
1690 
1691 // -------------------------------------------------------------------
1692 
FindSymbol(absl::string_view key) const1693 inline Symbol DescriptorPool::Tables::FindSymbol(absl::string_view key) const {
1694   auto it = symbols_by_name_.find(FullNameQuery{key});
1695   return it == symbols_by_name_.end() ? Symbol() : *it;
1696 }
1697 
FindNestedSymbol(const void * parent,absl::string_view name) const1698 inline Symbol FileDescriptorTables::FindNestedSymbol(
1699     const void* parent, absl::string_view name) const {
1700   auto it = symbols_by_parent_.find(ParentNameQuery{{parent, name}});
1701   return it == symbols_by_parent_.end() ? Symbol() : *it;
1702 }
1703 
FindByNameHelper(const DescriptorPool * pool,absl::string_view name)1704 Symbol DescriptorPool::Tables::FindByNameHelper(const DescriptorPool* pool,
1705                                                 absl::string_view name) {
1706   if (pool->mutex_ != nullptr) {
1707     // Fast path: the Symbol is already cached.  This is just a hash lookup.
1708     absl::ReaderMutexLock lock(pool->mutex_);
1709     if (known_bad_symbols_.empty() && known_bad_files_.empty()) {
1710       Symbol result = FindSymbol(name);
1711       if (!result.IsNull()) return result;
1712     }
1713   }
1714   DescriptorPool::DeferredValidation deferred_validation(pool);
1715   Symbol result;
1716   {
1717     absl::MutexLockMaybe lock(pool->mutex_);
1718     if (pool->fallback_database_ != nullptr) {
1719       known_bad_symbols_.clear();
1720       known_bad_files_.clear();
1721     }
1722     result = FindSymbol(name);
1723 
1724     if (result.IsNull() && pool->underlay_ != nullptr) {
1725       // Symbol not found; check the underlay.
1726       result =
1727           pool->underlay_->tables_->FindByNameHelper(pool->underlay_, name);
1728     }
1729 
1730     if (result.IsNull()) {
1731       // Symbol still not found, so check fallback database.
1732       if (pool->TryFindSymbolInFallbackDatabase(name, deferred_validation)) {
1733         result = FindSymbol(name);
1734       }
1735     }
1736   }
1737 
1738   if (!deferred_validation.Validate()) {
1739     return Symbol();
1740   }
1741   return result;
1742 }
1743 
FindFile(absl::string_view key) const1744 inline const FileDescriptor* DescriptorPool::Tables::FindFile(
1745     absl::string_view key) const {
1746   auto it = files_by_name_.find(key);
1747   if (it == files_by_name_.end()) return nullptr;
1748   return *it;
1749 }
1750 
FindFieldByNumber(const Descriptor * parent,int number) const1751 inline const FieldDescriptor* FileDescriptorTables::FindFieldByNumber(
1752     const Descriptor* parent, int number) const {
1753   // If `number` is within the sequential range, just index into the parent
1754   // without doing a table lookup.
1755   if (parent != nullptr &&  //
1756       1 <= number && number <= parent->sequential_field_limit_) {
1757     return parent->field(number - 1);
1758   }
1759 
1760   auto it = fields_by_number_.find(ParentNumberQuery{{parent, number}});
1761   return it == fields_by_number_.end() ? nullptr : *it;
1762 }
1763 
FindParentForFieldsByMap(const FieldDescriptor * field) const1764 const void* FileDescriptorTables::FindParentForFieldsByMap(
1765     const FieldDescriptor* field) const {
1766   if (field->is_extension()) {
1767     if (field->extension_scope() == nullptr) {
1768       return field->file();
1769     } else {
1770       return field->extension_scope();
1771     }
1772   } else {
1773     return field->containing_type();
1774   }
1775 }
1776 
FieldsByLowercaseNamesLazyInitStatic(const FileDescriptorTables * tables)1777 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic(
1778     const FileDescriptorTables* tables) {
1779   tables->FieldsByLowercaseNamesLazyInitInternal();
1780 }
1781 
FieldsByLowercaseNamesLazyInitInternal() const1782 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitInternal() const {
1783   auto* map = new FieldsByNameMap;
1784   for (Symbol symbol : symbols_by_parent_) {
1785     const FieldDescriptor* field = symbol.field_descriptor();
1786     if (!field) continue;
1787     (*map)[{FindParentForFieldsByMap(field), field->lowercase_name()}] = field;
1788   }
1789   fields_by_lowercase_name_.store(map, std::memory_order_release);
1790 }
1791 
FindFieldByLowercaseName(const void * parent,absl::string_view lowercase_name) const1792 inline const FieldDescriptor* FileDescriptorTables::FindFieldByLowercaseName(
1793     const void* parent, absl::string_view lowercase_name) const {
1794   absl::call_once(fields_by_lowercase_name_once_,
1795                   &FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic,
1796                   this);
1797   const auto* fields =
1798       fields_by_lowercase_name_.load(std::memory_order_acquire);
1799   auto it = fields->find({parent, lowercase_name});
1800   if (it == fields->end()) return nullptr;
1801   return it->second;
1802 }
1803 
FieldsByCamelcaseNamesLazyInitStatic(const FileDescriptorTables * tables)1804 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic(
1805     const FileDescriptorTables* tables) {
1806   tables->FieldsByCamelcaseNamesLazyInitInternal();
1807 }
1808 
FieldsByCamelcaseNamesLazyInitInternal() const1809 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitInternal() const {
1810   auto* map = new FieldsByNameMap;
1811   for (Symbol symbol : symbols_by_parent_) {
1812     const FieldDescriptor* field = symbol.field_descriptor();
1813     if (!field) continue;
1814     const void* parent = FindParentForFieldsByMap(field);
1815     // If we already have a field with this camelCase name, keep the field with
1816     // the smallest field number. This way we get a deterministic mapping.
1817     const FieldDescriptor*& found = (*map)[{parent, field->camelcase_name()}];
1818     if (found == nullptr || found->number() > field->number()) {
1819       found = field;
1820     }
1821   }
1822   fields_by_camelcase_name_.store(map, std::memory_order_release);
1823 }
1824 
FindFieldByCamelcaseName(const void * parent,absl::string_view camelcase_name) const1825 inline const FieldDescriptor* FileDescriptorTables::FindFieldByCamelcaseName(
1826     const void* parent, absl::string_view camelcase_name) const {
1827   absl::call_once(fields_by_camelcase_name_once_,
1828                   FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic,
1829                   this);
1830   auto* fields = fields_by_camelcase_name_.load(std::memory_order_acquire);
1831   auto it = fields->find({parent, camelcase_name});
1832   if (it == fields->end()) return nullptr;
1833   return it->second;
1834 }
1835 
FindEnumValueByNumber(const EnumDescriptor * parent,int number) const1836 inline const EnumValueDescriptor* FileDescriptorTables::FindEnumValueByNumber(
1837     const EnumDescriptor* parent, int number) const {
1838   // If `number` is within the sequential range, just index into the parent
1839   // without doing a table lookup.
1840   const int base = parent->value(0)->number();
1841   if (base <= number &&
1842       number <= static_cast<int64_t>(base) + parent->sequential_value_limit_) {
1843     return parent->value(number - base);
1844   }
1845 
1846   auto it = enum_values_by_number_.find(ParentNumberQuery{{parent, number}});
1847   return it == enum_values_by_number_.end() ? nullptr : *it;
1848 }
1849 
1850 inline const EnumValueDescriptor*
FindEnumValueByNumberCreatingIfUnknown(const EnumDescriptor * parent,int number) const1851 FileDescriptorTables::FindEnumValueByNumberCreatingIfUnknown(
1852     const EnumDescriptor* parent, int number) const {
1853   // First try, with map of compiled-in values.
1854   {
1855     const auto* value = FindEnumValueByNumber(parent, number);
1856     if (value != nullptr) {
1857       return value;
1858     }
1859   }
1860 
1861   const ParentNumberQuery query{{parent, number}};
1862 
1863   // Second try, with reader lock held on unknown enum values: common case.
1864   {
1865     absl::ReaderMutexLock l(&unknown_enum_values_mu_);
1866     auto it = unknown_enum_values_by_number_.find(query);
1867     if (it != unknown_enum_values_by_number_.end()) {
1868       return *it;
1869     }
1870   }
1871   // If not found, try again with writer lock held, and create new descriptor if
1872   // necessary.
1873   {
1874     absl::WriterMutexLock l(&unknown_enum_values_mu_);
1875     auto it = unknown_enum_values_by_number_.find(query);
1876     if (it != unknown_enum_values_by_number_.end()) {
1877       return *it;
1878     }
1879 
1880     // Create an EnumValueDescriptor dynamically. We don't insert it into the
1881     // EnumDescriptor (it's not a part of the enum as originally defined), but
1882     // we do insert it into the table so that we can return the same pointer
1883     // later.
1884     std::string enum_value_name =
1885         absl::StrFormat("UNKNOWN_ENUM_VALUE_%s_%d", parent->name(), number);
1886     auto* pool = DescriptorPool::generated_pool();
1887     auto* tables = const_cast<DescriptorPool::Tables*>(pool->tables_.get());
1888     internal::FlatAllocator alloc;
1889     alloc.PlanArray<EnumValueDescriptor>(1);
1890     alloc.PlanArray<std::string>(2);
1891 
1892     {
1893       // Must lock the pool because we will do allocations in the shared arena.
1894       absl::MutexLockMaybe l2(pool->mutex_);
1895       alloc.FinalizePlanning(tables);
1896     }
1897     EnumValueDescriptor* result = alloc.AllocateArray<EnumValueDescriptor>(1);
1898     result->all_names_ = alloc.AllocateStrings(
1899         enum_value_name,
1900         absl::StrCat(parent->full_name(), ".", enum_value_name));
1901     result->number_ = number;
1902     result->type_ = parent;
1903     result->options_ = &EnumValueOptions::default_instance();
1904     unknown_enum_values_by_number_.insert(result);
1905     return result;
1906   }
1907 }
1908 
FindExtension(const Descriptor * extendee,int number) const1909 inline const FieldDescriptor* DescriptorPool::Tables::FindExtension(
1910     const Descriptor* extendee, int number) const {
1911   auto it = extensions_.find({extendee, number});
1912   if (it == extensions_.end()) return nullptr;
1913   return it->second;
1914 }
1915 
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const1916 inline void DescriptorPool::Tables::FindAllExtensions(
1917     const Descriptor* extendee,
1918     std::vector<const FieldDescriptor*>* out) const {
1919   ExtensionsGroupedByDescriptorMap::const_iterator it =
1920       extensions_.lower_bound(std::make_pair(extendee, 0));
1921   for (; it != extensions_.end() && it->first.first == extendee; ++it) {
1922     out->push_back(it->second);
1923   }
1924 }
1925 
1926 // -------------------------------------------------------------------
1927 
AddSymbol(absl::string_view full_name,Symbol symbol)1928 bool DescriptorPool::Tables::AddSymbol(absl::string_view full_name,
1929                                        Symbol symbol) {
1930   ABSL_DCHECK_EQ(full_name, symbol.full_name());
1931   if (symbols_by_name_.insert(symbol).second) {
1932     symbols_after_checkpoint_.push_back(symbol);
1933     return true;
1934   } else {
1935     return false;
1936   }
1937 }
1938 
AddAliasUnderParent(const void * parent,absl::string_view name,Symbol symbol)1939 bool FileDescriptorTables::AddAliasUnderParent(const void* parent,
1940                                                absl::string_view name,
1941                                                Symbol symbol) {
1942   ABSL_DCHECK_EQ(name, symbol.parent_name_key().second);
1943   ABSL_DCHECK_EQ(parent, symbol.parent_name_key().first);
1944   return symbols_by_parent_.insert(symbol).second;
1945 }
1946 
AddFile(const FileDescriptor * file)1947 bool DescriptorPool::Tables::AddFile(const FileDescriptor* file) {
1948   if (files_by_name_.insert(file).second) {
1949     files_after_checkpoint_.push_back(file);
1950     return true;
1951   } else {
1952     return false;
1953   }
1954 }
1955 
FinalizeTables()1956 void FileDescriptorTables::FinalizeTables() {}
1957 
AddFieldByNumber(FieldDescriptor * field)1958 bool FileDescriptorTables::AddFieldByNumber(FieldDescriptor* field) {
1959   // Skip fields that are at the start of the sequence.
1960   if (field->containing_type() != nullptr && field->number() >= 1 &&
1961       field->number() <= field->containing_type()->sequential_field_limit_) {
1962     if (field->is_extension()) {
1963       // Conflicts with the field that already exists in the sequential range.
1964       return false;
1965     }
1966     // Only return true if the field at that index matches. Otherwise it
1967     // conflicts with the existing field in the sequential range.
1968     return field->containing_type()->field(field->number() - 1) == field;
1969   }
1970 
1971   return fields_by_number_.insert(field).second;
1972 }
1973 
AddEnumValueByNumber(EnumValueDescriptor * value)1974 bool FileDescriptorTables::AddEnumValueByNumber(EnumValueDescriptor* value) {
1975   // Skip values that are at the start of the sequence.
1976   const int base = value->type()->value(0)->number();
1977   if (base <= value->number() &&
1978       value->number() <=
1979           static_cast<int64_t>(base) + value->type()->sequential_value_limit_)
1980     return true;
1981   return enum_values_by_number_.insert(value).second;
1982 }
1983 
AddExtension(const FieldDescriptor * field)1984 bool DescriptorPool::Tables::AddExtension(const FieldDescriptor* field) {
1985   auto it_inserted =
1986       extensions_.insert({{field->containing_type(), field->number()}, field});
1987   if (it_inserted.second) {
1988     extensions_after_checkpoint_.push_back(it_inserted.first->first);
1989     return true;
1990   } else {
1991     return false;
1992   }
1993 }
1994 
InternFeatureSet(FeatureSet && features)1995 const FeatureSet* DescriptorPool::Tables::InternFeatureSet(
1996     FeatureSet&& features) {
1997   // Use the serialized feature set as the cache key.  If multiple equivalent
1998   // feature sets serialize to different strings, that just bloats the cache a
1999   // little.
2000   auto& result = feature_set_cache_[features.SerializeAsString()];
2001   if (result == nullptr) {
2002     result = absl::make_unique<FeatureSet>(std::move(features));
2003   }
2004   return result.get();
2005 }
2006 
2007 // -------------------------------------------------------------------
2008 
2009 template <typename Type>
Allocate()2010 Type* DescriptorPool::Tables::Allocate() {
2011   static_assert(std::is_trivially_destructible<Type>::value, "");
2012   static_assert(alignof(Type) <= 8, "");
2013   return ::new (AllocateBytes(sizeof(Type))) Type{};
2014 }
2015 
AllocateBytes(int size)2016 void* DescriptorPool::Tables::AllocateBytes(int size) {
2017   if (size == 0) return nullptr;
2018   void* p = ::operator new(size + RoundUpTo<8>(sizeof(int)));
2019   int* sizep = static_cast<int*>(p);
2020   misc_allocs_.emplace_back(sizep);
2021   *sizep = size;
2022   return static_cast<char*>(p) + RoundUpTo<8>(sizeof(int));
2023 }
2024 
2025 template <typename... T>
CreateFlatAlloc(const TypeMap<IntT,T...> & sizes)2026 internal::FlatAllocator::Allocation* DescriptorPool::Tables::CreateFlatAlloc(
2027     const TypeMap<IntT, T...>& sizes) {
2028   auto ends = CalculateEnds(sizes);
2029   using FlatAlloc = internal::FlatAllocator::Allocation;
2030 
2031   int last_end = ends.template Get<
2032       typename std::tuple_element<sizeof...(T) - 1, std::tuple<T...>>::type>();
2033   size_t total_size =
2034       last_end + RoundUpTo<FlatAlloc::kMaxAlign>(sizeof(FlatAlloc));
2035   char* data = static_cast<char*>(::operator new(total_size));
2036   auto* res = ::new (data) FlatAlloc(ends);
2037   flat_allocs_.emplace_back(res);
2038 
2039   return res;
2040 }
2041 
BuildLocationsByPath(std::pair<const FileDescriptorTables *,const SourceCodeInfo * > * p)2042 void FileDescriptorTables::BuildLocationsByPath(
2043     std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p) {
2044   for (int i = 0, len = p->second->location_size(); i < len; ++i) {
2045     const SourceCodeInfo_Location* loc = &p->second->location().Get(i);
2046     p->first->locations_by_path_[absl::StrJoin(loc->path(), ",")] = loc;
2047   }
2048 }
2049 
GetSourceLocation(const std::vector<int> & path,const SourceCodeInfo * info) const2050 const SourceCodeInfo_Location* FileDescriptorTables::GetSourceLocation(
2051     const std::vector<int>& path, const SourceCodeInfo* info) const {
2052   std::pair<const FileDescriptorTables*, const SourceCodeInfo*> p(
2053       std::make_pair(this, info));
2054   absl::call_once(locations_by_path_once_,
2055                   FileDescriptorTables::BuildLocationsByPath, &p);
2056   auto it = locations_by_path_.find(absl::StrJoin(path, ","));
2057   if (it == locations_by_path_.end()) return nullptr;
2058   return it->second;
2059 }
2060 
2061 // ===================================================================
2062 // DescriptorPool
2063 
2064 DescriptorPool::ErrorCollector::~ErrorCollector() = default;
2065 
ErrorLocationName(ErrorLocation location)2066 absl::string_view DescriptorPool::ErrorCollector::ErrorLocationName(
2067     ErrorLocation location) {
2068   switch (location) {
2069     case NAME:
2070       return "NAME";
2071     case NUMBER:
2072       return "NUMBER";
2073     case TYPE:
2074       return "TYPE";
2075     case EXTENDEE:
2076       return "EXTENDEE";
2077     case DEFAULT_VALUE:
2078       return "DEFAULT_VALUE";
2079     case OPTION_NAME:
2080       return "OPTION_NAME";
2081     case OPTION_VALUE:
2082       return "OPTION_VALUE";
2083     case INPUT_TYPE:
2084       return "INPUT_TYPE";
2085     case OUTPUT_TYPE:
2086       return "OUTPUT_TYPE";
2087     case IMPORT:
2088       return "IMPORT";
2089     case EDITIONS:
2090       return "EDITIONS";
2091     case OTHER:
2092       return "OTHER";
2093   }
2094   return "UNKNOWN";
2095 }
2096 
DescriptorPool()2097 DescriptorPool::DescriptorPool()
2098     : mutex_(nullptr),
2099       fallback_database_(nullptr),
2100       default_error_collector_(nullptr),
2101       underlay_(nullptr),
2102       tables_(new Tables),
2103       enforce_dependencies_(true),
2104       lazily_build_dependencies_(false),
2105       allow_unknown_(false),
2106       enforce_weak_(false),
2107       enforce_extension_declarations_(false),
2108       disallow_enforce_utf8_(false),
2109       deprecated_legacy_json_field_conflicts_(false) {}
2110 
DescriptorPool(DescriptorDatabase * fallback_database,ErrorCollector * error_collector)2111 DescriptorPool::DescriptorPool(DescriptorDatabase* fallback_database,
2112                                ErrorCollector* error_collector)
2113     : mutex_(new absl::Mutex),
2114       fallback_database_(fallback_database),
2115       default_error_collector_(error_collector),
2116       underlay_(nullptr),
2117       tables_(new Tables),
2118       enforce_dependencies_(true),
2119       lazily_build_dependencies_(false),
2120       allow_unknown_(false),
2121       enforce_weak_(false),
2122       enforce_extension_declarations_(false),
2123       disallow_enforce_utf8_(false),
2124       deprecated_legacy_json_field_conflicts_(false) {}
2125 
DescriptorPool(const DescriptorPool * underlay)2126 DescriptorPool::DescriptorPool(const DescriptorPool* underlay)
2127     : mutex_(nullptr),
2128       fallback_database_(nullptr),
2129       default_error_collector_(nullptr),
2130       underlay_(underlay),
2131       tables_(new Tables),
2132       enforce_dependencies_(true),
2133       lazily_build_dependencies_(false),
2134       allow_unknown_(false),
2135       enforce_weak_(false),
2136       enforce_extension_declarations_(false),
2137       disallow_enforce_utf8_(false),
2138       deprecated_legacy_json_field_conflicts_(false) {}
2139 
~DescriptorPool()2140 DescriptorPool::~DescriptorPool() {
2141   if (mutex_ != nullptr) delete mutex_;
2142 }
2143 
2144 // DescriptorPool::BuildFile() defined later.
2145 // DescriptorPool::BuildFileCollectingErrors() defined later.
2146 
InternalDontEnforceDependencies()2147 void DescriptorPool::InternalDontEnforceDependencies() {
2148   enforce_dependencies_ = false;
2149 }
2150 
AddDirectInputFile(absl::string_view file_name,bool is_error)2151 void DescriptorPool::AddDirectInputFile(absl::string_view file_name,
2152                                         bool is_error) {
2153   direct_input_files_[file_name] = is_error;
2154 }
2155 
IsReadyForCheckingDescriptorExtDecl(absl::string_view message_name) const2156 bool DescriptorPool::IsReadyForCheckingDescriptorExtDecl(
2157     absl::string_view message_name) const {
2158   static const auto& kDescriptorTypes = *new absl::flat_hash_set<std::string>({
2159       "google.protobuf.EnumOptions",
2160       "google.protobuf.EnumValueOptions",
2161       "google.protobuf.ExtensionRangeOptions",
2162       "google.protobuf.FieldOptions",
2163       "google.protobuf.FileOptions",
2164       "google.protobuf.MessageOptions",
2165       "google.protobuf.MethodOptions",
2166       "google.protobuf.OneofOptions",
2167       "google.protobuf.ServiceOptions",
2168       "google.protobuf.StreamOptions",
2169   });
2170   return kDescriptorTypes.contains(message_name);
2171 }
2172 
2173 
ClearDirectInputFiles()2174 void DescriptorPool::ClearDirectInputFiles() { direct_input_files_.clear(); }
2175 
InternalIsFileLoaded(absl::string_view filename) const2176 bool DescriptorPool::InternalIsFileLoaded(absl::string_view filename) const {
2177   absl::MutexLockMaybe lock(mutex_);
2178   return tables_->FindFile(filename) != nullptr;
2179 }
2180 
2181 // generated_pool ====================================================
2182 
2183 namespace {
2184 
2185 
GeneratedDatabase()2186 EncodedDescriptorDatabase* GeneratedDatabase() {
2187   static auto generated_database =
2188       internal::OnShutdownDelete(new EncodedDescriptorDatabase());
2189   return generated_database;
2190 }
2191 
NewGeneratedPool()2192 DescriptorPool* NewGeneratedPool() {
2193   auto generated_pool = new DescriptorPool(GeneratedDatabase());
2194   generated_pool->InternalSetLazilyBuildDependencies();
2195   return generated_pool;
2196 }
2197 
2198 }  // anonymous namespace
2199 
internal_generated_database()2200 DescriptorDatabase* DescriptorPool::internal_generated_database() {
2201   return GeneratedDatabase();
2202 }
2203 
internal_generated_pool()2204 DescriptorPool* DescriptorPool::internal_generated_pool() {
2205   static DescriptorPool* generated_pool =
2206       internal::OnShutdownDelete(NewGeneratedPool());
2207   return generated_pool;
2208 }
2209 
generated_pool()2210 const DescriptorPool* DescriptorPool::generated_pool() {
2211   const DescriptorPool* pool = internal_generated_pool();
2212   // Ensure that descriptor.proto and cpp_features.proto get registered in the
2213   // generated pool. They're special cases because they're included in the full
2214   // runtime. We have to avoid registering it pre-main, because we need to
2215   // ensure that the linker --gc-sections step can strip out the full runtime if
2216   // it is unused.
2217   DescriptorProto::descriptor();
2218   pb::CppFeatures::descriptor();
2219   return pool;
2220 }
2221 
2222 
InternalAddGeneratedFile(const void * encoded_file_descriptor,int size)2223 void DescriptorPool::InternalAddGeneratedFile(
2224     const void* encoded_file_descriptor, int size) {
2225   // So, this function is called in the process of initializing the
2226   // descriptors for generated proto classes.  Each generated .pb.cc file
2227   // has an internal procedure called AddDescriptors() which is called at
2228   // process startup, and that function calls this one in order to register
2229   // the raw bytes of the FileDescriptorProto representing the file.
2230   //
2231   // We do not actually construct the descriptor objects right away.  We just
2232   // hang on to the bytes until they are actually needed.  We actually construct
2233   // the descriptor the first time one of the following things happens:
2234   // * Someone calls a method like descriptor(), GetDescriptor(), or
2235   //   GetReflection() on the generated types, which requires returning the
2236   //   descriptor or an object based on it.
2237   // * Someone looks up the descriptor in DescriptorPool::generated_pool().
2238   //
2239   // Once one of these happens, the DescriptorPool actually parses the
2240   // FileDescriptorProto and generates a FileDescriptor (and all its children)
2241   // based on it.
2242   //
2243   // Note that FileDescriptorProto is itself a generated protocol message.
2244   // Therefore, when we parse one, we have to be very careful to avoid using
2245   // any descriptor-based operations, since this might cause infinite recursion
2246   // or deadlock.
2247   absl::MutexLockMaybe lock(internal_generated_pool()->mutex_);
2248   ABSL_CHECK(GeneratedDatabase()->Add(encoded_file_descriptor, size));
2249 }
2250 
2251 
2252 // Find*By* methods ==================================================
2253 
2254 // TODO:  There's a lot of repeated code here, but I'm not sure if
2255 //   there's any good way to factor it out.  Think about this some time when
2256 //   there's nothing more important to do (read: never).
2257 
FindFileByName(absl::string_view name) const2258 const FileDescriptor* DescriptorPool::FindFileByName(
2259     absl::string_view name) const {
2260   DeferredValidation deferred_validation(this);
2261   const FileDescriptor* result = nullptr;
2262   {
2263     absl::MutexLockMaybe lock(mutex_);
2264     if (fallback_database_ != nullptr) {
2265       tables_->known_bad_symbols_.clear();
2266       tables_->known_bad_files_.clear();
2267     }
2268     result = tables_->FindFile(name);
2269     if (result != nullptr) return result;
2270     if (underlay_ != nullptr) {
2271       result = underlay_->FindFileByName(name);
2272       if (result != nullptr) return result;
2273     }
2274     if (TryFindFileInFallbackDatabase(name, deferred_validation)) {
2275       result = tables_->FindFile(name);
2276     }
2277   }
2278   if (!deferred_validation.Validate()) {
2279     return nullptr;
2280   }
2281   return result;
2282 }
2283 
FindFileContainingSymbol(absl::string_view symbol_name) const2284 const FileDescriptor* DescriptorPool::FindFileContainingSymbol(
2285     absl::string_view symbol_name) const {
2286   const FileDescriptor* file_result = nullptr;
2287   DeferredValidation deferred_validation(this);
2288   {
2289     absl::MutexLockMaybe lock(mutex_);
2290     if (fallback_database_ != nullptr) {
2291       tables_->known_bad_symbols_.clear();
2292       tables_->known_bad_files_.clear();
2293     }
2294     Symbol result = tables_->FindSymbol(symbol_name);
2295     if (!result.IsNull()) return result.GetFile();
2296     if (underlay_ != nullptr) {
2297       file_result = underlay_->FindFileContainingSymbol(symbol_name);
2298       if (file_result != nullptr) return file_result;
2299     }
2300     if (TryFindSymbolInFallbackDatabase(symbol_name, deferred_validation)) {
2301       result = tables_->FindSymbol(symbol_name);
2302       if (!result.IsNull()) file_result = result.GetFile();
2303     }
2304   }
2305   if (!deferred_validation.Validate()) {
2306     return nullptr;
2307   }
2308   return file_result;
2309 }
2310 
FindMessageTypeByName(absl::string_view name) const2311 const Descriptor* DescriptorPool::FindMessageTypeByName(
2312     absl::string_view name) const {
2313   return tables_->FindByNameHelper(this, name).descriptor();
2314 }
2315 
FindFieldByName(absl::string_view name) const2316 const FieldDescriptor* DescriptorPool::FindFieldByName(
2317     absl::string_view name) const {
2318   if (const FieldDescriptor* field =
2319           tables_->FindByNameHelper(this, name).field_descriptor()) {
2320     if (!field->is_extension()) {
2321       return field;
2322     }
2323   }
2324   return nullptr;
2325 }
2326 
FindExtensionByName(absl::string_view name) const2327 const FieldDescriptor* DescriptorPool::FindExtensionByName(
2328     absl::string_view name) const {
2329   if (const FieldDescriptor* field =
2330           tables_->FindByNameHelper(this, name).field_descriptor()) {
2331     if (field->is_extension()) {
2332       return field;
2333     }
2334   }
2335   return nullptr;
2336 }
2337 
FindOneofByName(absl::string_view name) const2338 const OneofDescriptor* DescriptorPool::FindOneofByName(
2339     absl::string_view name) const {
2340   return tables_->FindByNameHelper(this, name).oneof_descriptor();
2341 }
2342 
FindEnumTypeByName(absl::string_view name) const2343 const EnumDescriptor* DescriptorPool::FindEnumTypeByName(
2344     absl::string_view name) const {
2345   return tables_->FindByNameHelper(this, name).enum_descriptor();
2346 }
2347 
FindEnumValueByName(absl::string_view name) const2348 const EnumValueDescriptor* DescriptorPool::FindEnumValueByName(
2349     absl::string_view name) const {
2350   return tables_->FindByNameHelper(this, name).enum_value_descriptor();
2351 }
2352 
FindServiceByName(absl::string_view name) const2353 const ServiceDescriptor* DescriptorPool::FindServiceByName(
2354     absl::string_view name) const {
2355   return tables_->FindByNameHelper(this, name).service_descriptor();
2356 }
2357 
FindMethodByName(absl::string_view name) const2358 const MethodDescriptor* DescriptorPool::FindMethodByName(
2359     absl::string_view name) const {
2360   return tables_->FindByNameHelper(this, name).method_descriptor();
2361 }
2362 
FindExtensionByNumber(const Descriptor * extendee,int number) const2363 const FieldDescriptor* DescriptorPool::FindExtensionByNumber(
2364     const Descriptor* extendee, int number) const {
2365   if (extendee->extension_range_count() == 0) return nullptr;
2366   // A faster path to reduce lock contention in finding extensions, assuming
2367   // most extensions will be cache hit.
2368   if (mutex_ != nullptr) {
2369     absl::ReaderMutexLock lock(mutex_);
2370     const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2371     if (result != nullptr) {
2372       return result;
2373     }
2374   }
2375   const FieldDescriptor* result = nullptr;
2376   DeferredValidation deferred_validation(this);
2377   {
2378     absl::MutexLockMaybe lock(mutex_);
2379     if (fallback_database_ != nullptr) {
2380       tables_->known_bad_symbols_.clear();
2381       tables_->known_bad_files_.clear();
2382     }
2383     result = tables_->FindExtension(extendee, number);
2384     if (result != nullptr) {
2385       return result;
2386     }
2387     if (underlay_ != nullptr) {
2388       result = underlay_->FindExtensionByNumber(extendee, number);
2389       if (result != nullptr) return result;
2390     }
2391     if (TryFindExtensionInFallbackDatabase(extendee, number,
2392                                            deferred_validation)) {
2393       result = tables_->FindExtension(extendee, number);
2394     }
2395   }
2396   if (!deferred_validation.Validate()) {
2397     return nullptr;
2398   }
2399   return result;
2400 }
2401 
InternalFindExtensionByNumberNoLock(const Descriptor * extendee,int number) const2402 const FieldDescriptor* DescriptorPool::InternalFindExtensionByNumberNoLock(
2403     const Descriptor* extendee, int number) const {
2404   if (extendee->extension_range_count() == 0) return nullptr;
2405 
2406   const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2407   if (result != nullptr) {
2408     return result;
2409   }
2410 
2411   if (underlay_ != nullptr) {
2412     result = underlay_->InternalFindExtensionByNumberNoLock(extendee, number);
2413     if (result != nullptr) return result;
2414   }
2415 
2416   return nullptr;
2417 }
2418 
FindExtensionByPrintableName(const Descriptor * extendee,absl::string_view printable_name) const2419 const FieldDescriptor* DescriptorPool::FindExtensionByPrintableName(
2420     const Descriptor* extendee, absl::string_view printable_name) const {
2421   if (extendee->extension_range_count() == 0) return nullptr;
2422   const FieldDescriptor* result = FindExtensionByName(printable_name);
2423   if (result != nullptr && result->containing_type() == extendee) {
2424     return result;
2425   }
2426   if (extendee->options().message_set_wire_format()) {
2427     // MessageSet extensions may be identified by type name.
2428     const Descriptor* type = FindMessageTypeByName(printable_name);
2429     if (type != nullptr) {
2430       // Look for a matching extension in the foreign type's scope.
2431       const int type_extension_count = type->extension_count();
2432       for (int i = 0; i < type_extension_count; i++) {
2433         const FieldDescriptor* extension = type->extension(i);
2434         if (extension->containing_type() == extendee &&
2435             extension->type() == FieldDescriptor::TYPE_MESSAGE &&
2436             extension->is_optional() && extension->message_type() == type) {
2437           // Found it.
2438           return extension;
2439         }
2440       }
2441     }
2442   }
2443   return nullptr;
2444 }
2445 
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const2446 void DescriptorPool::FindAllExtensions(
2447     const Descriptor* extendee,
2448     std::vector<const FieldDescriptor*>* out) const {
2449   DeferredValidation deferred_validation(this);
2450   std::vector<const FieldDescriptor*> extensions;
2451   {
2452     absl::MutexLockMaybe lock(mutex_);
2453     if (fallback_database_ != nullptr) {
2454       tables_->known_bad_symbols_.clear();
2455       tables_->known_bad_files_.clear();
2456     }
2457 
2458     // Initialize tables_->extensions_ from the fallback database first
2459     // (but do this only once per descriptor).
2460     if (fallback_database_ != nullptr &&
2461         tables_->extensions_loaded_from_db_.count(extendee) == 0) {
2462       std::vector<int> numbers;
2463       if (fallback_database_->FindAllExtensionNumbers(
2464               std::string(extendee->full_name()), &numbers)) {
2465         for (int number : numbers) {
2466           if (tables_->FindExtension(extendee, number) == nullptr) {
2467             TryFindExtensionInFallbackDatabase(extendee, number,
2468                                                deferred_validation);
2469           }
2470         }
2471         tables_->extensions_loaded_from_db_.insert(extendee);
2472       }
2473     }
2474 
2475     tables_->FindAllExtensions(extendee, &extensions);
2476     if (underlay_ != nullptr) {
2477       underlay_->FindAllExtensions(extendee, &extensions);
2478     }
2479   }
2480   if (deferred_validation.Validate()) {
2481     out->insert(out->end(), extensions.begin(), extensions.end());
2482   }
2483 }
2484 
2485 
2486 // -------------------------------------------------------------------
2487 
FindFieldByNumber(int number) const2488 const FieldDescriptor* Descriptor::FindFieldByNumber(int number) const {
2489   const FieldDescriptor* result =
2490       file()->tables_->FindFieldByNumber(this, number);
2491   if (result == nullptr || result->is_extension()) {
2492     return nullptr;
2493   } else {
2494     return result;
2495   }
2496 }
2497 
FindFieldByLowercaseName(absl::string_view lowercase_name) const2498 const FieldDescriptor* Descriptor::FindFieldByLowercaseName(
2499     absl::string_view lowercase_name) const {
2500   const FieldDescriptor* result =
2501       file()->tables_->FindFieldByLowercaseName(this, lowercase_name);
2502   if (result == nullptr || result->is_extension()) {
2503     return nullptr;
2504   } else {
2505     return result;
2506   }
2507 }
2508 
FindFieldByCamelcaseName(absl::string_view camelcase_name) const2509 const FieldDescriptor* Descriptor::FindFieldByCamelcaseName(
2510     absl::string_view camelcase_name) const {
2511   const FieldDescriptor* result =
2512       file()->tables_->FindFieldByCamelcaseName(this, camelcase_name);
2513   if (result == nullptr || result->is_extension()) {
2514     return nullptr;
2515   } else {
2516     return result;
2517   }
2518 }
2519 
FindFieldByName(absl::string_view name) const2520 const FieldDescriptor* Descriptor::FindFieldByName(
2521     absl::string_view name) const {
2522   const FieldDescriptor* field =
2523       file()->tables_->FindNestedSymbol(this, name).field_descriptor();
2524   return field != nullptr && !field->is_extension() ? field : nullptr;
2525 }
2526 
FindOneofByName(absl::string_view name) const2527 const OneofDescriptor* Descriptor::FindOneofByName(
2528     absl::string_view name) const {
2529   return file()->tables_->FindNestedSymbol(this, name).oneof_descriptor();
2530 }
2531 
FindExtensionByName(absl::string_view name) const2532 const FieldDescriptor* Descriptor::FindExtensionByName(
2533     absl::string_view name) const {
2534   const FieldDescriptor* field =
2535       file()->tables_->FindNestedSymbol(this, name).field_descriptor();
2536   return field != nullptr && field->is_extension() ? field : nullptr;
2537 }
2538 
FindExtensionByLowercaseName(absl::string_view name) const2539 const FieldDescriptor* Descriptor::FindExtensionByLowercaseName(
2540     absl::string_view name) const {
2541   const FieldDescriptor* result =
2542       file()->tables_->FindFieldByLowercaseName(this, name);
2543   if (result == nullptr || !result->is_extension()) {
2544     return nullptr;
2545   } else {
2546     return result;
2547   }
2548 }
2549 
FindExtensionByCamelcaseName(absl::string_view name) const2550 const FieldDescriptor* Descriptor::FindExtensionByCamelcaseName(
2551     absl::string_view name) const {
2552   const FieldDescriptor* result =
2553       file()->tables_->FindFieldByCamelcaseName(this, name);
2554   if (result == nullptr || !result->is_extension()) {
2555     return nullptr;
2556   } else {
2557     return result;
2558   }
2559 }
2560 
FindNestedTypeByName(absl::string_view name) const2561 const Descriptor* Descriptor::FindNestedTypeByName(
2562     absl::string_view name) const {
2563   return file()->tables_->FindNestedSymbol(this, name).descriptor();
2564 }
2565 
FindEnumTypeByName(absl::string_view name) const2566 const EnumDescriptor* Descriptor::FindEnumTypeByName(
2567     absl::string_view name) const {
2568   return file()->tables_->FindNestedSymbol(this, name).enum_descriptor();
2569 }
2570 
FindEnumValueByName(absl::string_view name) const2571 const EnumValueDescriptor* Descriptor::FindEnumValueByName(
2572     absl::string_view name) const {
2573   return file()->tables_->FindNestedSymbol(this, name).enum_value_descriptor();
2574 }
2575 
map_key() const2576 const FieldDescriptor* Descriptor::map_key() const {
2577   if (!options().map_entry()) return nullptr;
2578   ABSL_DCHECK_EQ(field_count(), 2);
2579   return field(0);
2580 }
2581 
map_value() const2582 const FieldDescriptor* Descriptor::map_value() const {
2583   if (!options().map_entry()) return nullptr;
2584   ABSL_DCHECK_EQ(field_count(), 2);
2585   return field(1);
2586 }
2587 
FindValueByName(absl::string_view name) const2588 const EnumValueDescriptor* EnumDescriptor::FindValueByName(
2589     absl::string_view name) const {
2590   return file()->tables_->FindNestedSymbol(this, name).enum_value_descriptor();
2591 }
2592 
FindValueByNumber(int number) const2593 const EnumValueDescriptor* EnumDescriptor::FindValueByNumber(int number) const {
2594   return file()->tables_->FindEnumValueByNumber(this, number);
2595 }
2596 
FindValueByNumberCreatingIfUnknown(int number) const2597 const EnumValueDescriptor* EnumDescriptor::FindValueByNumberCreatingIfUnknown(
2598     int number) const {
2599   return file()->tables_->FindEnumValueByNumberCreatingIfUnknown(this, number);
2600 }
2601 
FindMethodByName(absl::string_view name) const2602 const MethodDescriptor* ServiceDescriptor::FindMethodByName(
2603     absl::string_view name) const {
2604   return file()->tables_->FindNestedSymbol(this, name).method_descriptor();
2605 }
2606 
FindMessageTypeByName(absl::string_view name) const2607 const Descriptor* FileDescriptor::FindMessageTypeByName(
2608     absl::string_view name) const {
2609   return tables_->FindNestedSymbol(this, name).descriptor();
2610 }
2611 
FindEnumTypeByName(absl::string_view name) const2612 const EnumDescriptor* FileDescriptor::FindEnumTypeByName(
2613     absl::string_view name) const {
2614   return tables_->FindNestedSymbol(this, name).enum_descriptor();
2615 }
2616 
FindEnumValueByName(absl::string_view name) const2617 const EnumValueDescriptor* FileDescriptor::FindEnumValueByName(
2618     absl::string_view name) const {
2619   return tables_->FindNestedSymbol(this, name).enum_value_descriptor();
2620 }
2621 
FindServiceByName(absl::string_view name) const2622 const ServiceDescriptor* FileDescriptor::FindServiceByName(
2623     absl::string_view name) const {
2624   return tables_->FindNestedSymbol(this, name).service_descriptor();
2625 }
2626 
FindExtensionByName(absl::string_view name) const2627 const FieldDescriptor* FileDescriptor::FindExtensionByName(
2628     absl::string_view name) const {
2629   const FieldDescriptor* field =
2630       tables_->FindNestedSymbol(this, name).field_descriptor();
2631   return field != nullptr && field->is_extension() ? field : nullptr;
2632 }
2633 
FindExtensionByLowercaseName(absl::string_view name) const2634 const FieldDescriptor* FileDescriptor::FindExtensionByLowercaseName(
2635     absl::string_view name) const {
2636   const FieldDescriptor* result = tables_->FindFieldByLowercaseName(this, name);
2637   if (result == nullptr || !result->is_extension()) {
2638     return nullptr;
2639   } else {
2640     return result;
2641   }
2642 }
2643 
FindExtensionByCamelcaseName(absl::string_view name) const2644 const FieldDescriptor* FileDescriptor::FindExtensionByCamelcaseName(
2645     absl::string_view name) const {
2646   const FieldDescriptor* result = tables_->FindFieldByCamelcaseName(this, name);
2647   if (result == nullptr || !result->is_extension()) {
2648     return nullptr;
2649   } else {
2650     return result;
2651   }
2652 }
2653 
CopyTo(DescriptorProto_ExtensionRange * proto) const2654 void Descriptor::ExtensionRange::CopyTo(
2655     DescriptorProto_ExtensionRange* proto) const {
2656   proto->set_start(start_);
2657   proto->set_end(end_);
2658   if (options_ != &ExtensionRangeOptions::default_instance()) {
2659     *proto->mutable_options() = *options_;
2660   }
2661   RestoreFeaturesToOptions(proto_features_, proto);
2662 }
2663 
2664 const Descriptor::ExtensionRange*
FindExtensionRangeContainingNumber(int number) const2665 Descriptor::FindExtensionRangeContainingNumber(int number) const {
2666   // Linear search should be fine because we don't expect a message to have
2667   // more than a couple extension ranges.
2668   for (int i = 0; i < extension_range_count(); i++) {
2669     if (number >= extension_range(i)->start_number() &&
2670         number < extension_range(i)->end_number()) {
2671       return extension_range(i);
2672     }
2673   }
2674   return nullptr;
2675 }
2676 
FindReservedRangeContainingNumber(int number) const2677 const Descriptor::ReservedRange* Descriptor::FindReservedRangeContainingNumber(
2678     int number) const {
2679   // TODO: Consider a non-linear search.
2680   for (int i = 0; i < reserved_range_count(); i++) {
2681     if (number >= reserved_range(i)->start && number < reserved_range(i)->end) {
2682       return reserved_range(i);
2683     }
2684   }
2685   return nullptr;
2686 }
2687 
2688 const EnumDescriptor::ReservedRange*
FindReservedRangeContainingNumber(int number) const2689 EnumDescriptor::FindReservedRangeContainingNumber(int number) const {
2690   // TODO: Consider a non-linear search.
2691   for (int i = 0; i < reserved_range_count(); i++) {
2692     if (number >= reserved_range(i)->start &&
2693         number <= reserved_range(i)->end) {
2694       return reserved_range(i);
2695     }
2696   }
2697   return nullptr;
2698 }
2699 
2700 // -------------------------------------------------------------------
2701 
TryFindFileInFallbackDatabase(absl::string_view name,DeferredValidation & deferred_validation) const2702 bool DescriptorPool::TryFindFileInFallbackDatabase(
2703     absl::string_view name, DeferredValidation& deferred_validation) const {
2704   if (fallback_database_ == nullptr) return false;
2705 
2706   if (tables_->known_bad_files_.contains(name)) return false;
2707 
2708   // NOINLINE to reduce the stack cost of the operation in the caller.
2709   const auto find_file = [](DescriptorDatabase& database,
2710                             absl::string_view filename,
2711                             FileDescriptorProto& output) PROTOBUF_NOINLINE {
2712     return database.FindFileByName(std::string(filename), &output);
2713   };
2714 
2715   auto& file_proto = deferred_validation.CreateProto();
2716   if (!find_file(*fallback_database_, name, file_proto) ||
2717       BuildFileFromDatabase(file_proto, deferred_validation) == nullptr) {
2718     tables_->known_bad_files_.emplace(name);
2719     return false;
2720   }
2721   return true;
2722 }
2723 
IsSubSymbolOfBuiltType(absl::string_view name) const2724 bool DescriptorPool::IsSubSymbolOfBuiltType(absl::string_view name) const {
2725   for (size_t pos = name.find('.'); pos != name.npos;
2726        pos = name.find('.', pos + 1)) {
2727     auto prefix = name.substr(0, pos);
2728     Symbol symbol = tables_->FindSymbol(prefix);
2729     if (symbol.IsNull()) {
2730       break;
2731     }
2732     if (!symbol.IsPackage()) {
2733       // If the symbol type is anything other than PACKAGE, then its complete
2734       // definition is already known.
2735       return true;
2736     }
2737   }
2738   if (underlay_ != nullptr) {
2739     // Check to see if any prefix of this symbol exists in the underlay.
2740     return underlay_->IsSubSymbolOfBuiltType(name);
2741   }
2742   return false;
2743 }
2744 
TryFindSymbolInFallbackDatabase(absl::string_view name,DeferredValidation & deferred_validation) const2745 bool DescriptorPool::TryFindSymbolInFallbackDatabase(
2746     absl::string_view name, DeferredValidation& deferred_validation) const {
2747   if (fallback_database_ == nullptr) return false;
2748 
2749   if (tables_->known_bad_symbols_.contains(name)) return false;
2750 
2751   std::string name_string(name);
2752   auto& file_proto = deferred_validation.CreateProto();
2753   if (  // We skip looking in the fallback database if the name is a sub-symbol
2754         // of any descriptor that already exists in the descriptor pool (except
2755         // for package descriptors).  This is valid because all symbols except
2756         // for packages are defined in a single file, so if the symbol exists
2757         // then we should already have its definition.
2758         //
2759         // The other reason to do this is to support "overriding" type
2760         // definitions by merging two databases that define the same type. (Yes,
2761         // people do this.)  The main difficulty with making this work is that
2762         // FindFileContainingSymbol() is allowed to return both false positives
2763         // (e.g., SimpleDescriptorDatabase, UpgradedDescriptorDatabase) and
2764         // false negatives (e.g. ProtoFileParser, SourceTreeDescriptorDatabase).
2765         // When two such databases are merged, looking up a non-existent
2766         // sub-symbol of a type that already exists in the descriptor pool can
2767         // result in an attempt to load multiple definitions of the same type.
2768         // The check below avoids this.
2769       IsSubSymbolOfBuiltType(name)
2770 
2771       // Look up file containing this symbol in fallback database.
2772       || !fallback_database_->FindFileContainingSymbol(name_string, &file_proto)
2773 
2774       // Check if we've already built this file. If so, it apparently doesn't
2775       // contain the symbol we're looking for.  Some DescriptorDatabases
2776       // return false positives.
2777       || tables_->FindFile(file_proto.name()) != nullptr
2778 
2779       // Build the file.
2780       || BuildFileFromDatabase(file_proto, deferred_validation) == nullptr) {
2781     tables_->known_bad_symbols_.insert(std::move(name_string));
2782     return false;
2783   }
2784 
2785   return true;
2786 }
2787 
TryFindExtensionInFallbackDatabase(const Descriptor * containing_type,int field_number,DeferredValidation & deferred_validation) const2788 bool DescriptorPool::TryFindExtensionInFallbackDatabase(
2789     const Descriptor* containing_type, int field_number,
2790     DeferredValidation& deferred_validation) const {
2791   if (fallback_database_ == nullptr) return false;
2792 
2793   auto& file_proto = deferred_validation.CreateProto();
2794   if (!fallback_database_->FindFileContainingExtension(
2795           std::string(containing_type->full_name()), field_number,
2796           &file_proto)) {
2797     return false;
2798   }
2799 
2800   if (tables_->FindFile(file_proto.name()) != nullptr) {
2801     // We've already loaded this file, and it apparently doesn't contain the
2802     // extension we're looking for.  Some DescriptorDatabases return false
2803     // positives.
2804     return false;
2805   }
2806 
2807   if (BuildFileFromDatabase(file_proto, deferred_validation) == nullptr) {
2808     return false;
2809   }
2810 
2811   return true;
2812 }
2813 
2814 // ===================================================================
2815 
is_map_message_type() const2816 bool FieldDescriptor::is_map_message_type() const {
2817   return message_type()->options().map_entry();
2818 }
2819 
DefaultValueAsString(bool quote_string_type) const2820 std::string FieldDescriptor::DefaultValueAsString(
2821     bool quote_string_type) const {
2822   ABSL_CHECK(has_default_value()) << "No default value";
2823   switch (cpp_type()) {
2824     case CPPTYPE_INT32:
2825       return absl::StrCat(default_value_int32_t());
2826     case CPPTYPE_INT64:
2827       return absl::StrCat(default_value_int64_t());
2828     case CPPTYPE_UINT32:
2829       return absl::StrCat(default_value_uint32_t());
2830     case CPPTYPE_UINT64:
2831       return absl::StrCat(default_value_uint64_t());
2832     case CPPTYPE_FLOAT:
2833       return io::SimpleFtoa(default_value_float());
2834     case CPPTYPE_DOUBLE:
2835       return io::SimpleDtoa(default_value_double());
2836     case CPPTYPE_BOOL:
2837       return default_value_bool() ? "true" : "false";
2838     case CPPTYPE_STRING:
2839       if (quote_string_type) {
2840         return absl::StrCat("\"", absl::CEscape(default_value_string()), "\"");
2841       } else {
2842         if (type() == TYPE_BYTES) {
2843           return absl::CEscape(default_value_string());
2844         } else {
2845           return std::string(default_value_string());
2846         }
2847       }
2848     case CPPTYPE_ENUM:
2849       return std::string(default_value_enum()->name());
2850     case CPPTYPE_MESSAGE:
2851       ABSL_DLOG(FATAL) << "Messages can't have default values!";
2852       break;
2853   }
2854   ABSL_LOG(FATAL) << "Can't get here: failed to get default value as string";
2855   return "";
2856 }
2857 
2858 // Out-of-line constructor definitions ==============================
2859 // When using constructor type homing in Clang, debug info for a type
2860 // is only emitted when a constructor definition is emitted, as an
2861 // optimization. These constructors are never called, so we define them
2862 // out of line to make sure the debug info is emitted somewhere.
2863 
2864 Descriptor::Descriptor() = default;
FieldDescriptor()2865 FieldDescriptor::FieldDescriptor() {}
2866 OneofDescriptor::OneofDescriptor() = default;
2867 EnumDescriptor::EnumDescriptor() = default;
2868 EnumValueDescriptor::EnumValueDescriptor() = default;
2869 ServiceDescriptor::ServiceDescriptor() = default;
2870 MethodDescriptor::MethodDescriptor() = default;
2871 FileDescriptor::FileDescriptor() = default;
2872 
2873 // CopyTo methods ====================================================
2874 
CopyTo(FileDescriptorProto * proto) const2875 void FileDescriptor::CopyTo(FileDescriptorProto* proto) const {
2876   CopyHeadingTo(proto);
2877 
2878   for (int i = 0; i < dependency_count(); i++) {
2879     proto->add_dependency(dependency(i)->name());
2880   }
2881 
2882   for (int i = 0; i < public_dependency_count(); i++) {
2883     proto->add_public_dependency(public_dependencies_[i]);
2884   }
2885 
2886   for (int i = 0; i < weak_dependency_count(); i++) {
2887     proto->add_weak_dependency(weak_dependencies_[i]);
2888   }
2889 
2890   for (int i = 0; i < message_type_count(); i++) {
2891     message_type(i)->CopyTo(proto->add_message_type());
2892   }
2893   for (int i = 0; i < enum_type_count(); i++) {
2894     enum_type(i)->CopyTo(proto->add_enum_type());
2895   }
2896   for (int i = 0; i < service_count(); i++) {
2897     service(i)->CopyTo(proto->add_service());
2898   }
2899   for (int i = 0; i < extension_count(); i++) {
2900     extension(i)->CopyTo(proto->add_extension());
2901   }
2902 }
2903 
CopyHeadingTo(FileDescriptorProto * proto) const2904 void FileDescriptor::CopyHeadingTo(FileDescriptorProto* proto) const {
2905   proto->set_name(name());
2906   if (!package().empty()) {
2907     proto->set_package(package());
2908   }
2909 
2910   if (edition() == Edition::EDITION_PROTO3) {
2911     proto->set_syntax("proto3");
2912   } else if (!IsLegacyEdition(edition())) {
2913     proto->set_syntax("editions");
2914     proto->set_edition(edition());
2915   }
2916 
2917   if (&options() != &FileOptions::default_instance()) {
2918     *proto->mutable_options() = options();
2919   }
2920   RestoreFeaturesToOptions(proto_features_, proto);
2921 }
2922 
CopyJsonNameTo(FileDescriptorProto * proto) const2923 void FileDescriptor::CopyJsonNameTo(FileDescriptorProto* proto) const {
2924   if (message_type_count() != proto->message_type_size() ||
2925       extension_count() != proto->extension_size()) {
2926     ABSL_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2927     return;
2928   }
2929   for (int i = 0; i < message_type_count(); i++) {
2930     message_type(i)->CopyJsonNameTo(proto->mutable_message_type(i));
2931   }
2932   for (int i = 0; i < extension_count(); i++) {
2933     extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
2934   }
2935 }
2936 
CopySourceCodeInfoTo(FileDescriptorProto * proto) const2937 void FileDescriptor::CopySourceCodeInfoTo(FileDescriptorProto* proto) const {
2938   if (source_code_info_ &&
2939       source_code_info_ != &SourceCodeInfo::default_instance()) {
2940     *proto->mutable_source_code_info() = *source_code_info_;
2941   }
2942 }
2943 
CopyTo(DescriptorProto * proto) const2944 void Descriptor::CopyTo(DescriptorProto* proto) const {
2945   CopyHeadingTo(proto);
2946 
2947   for (int i = 0; i < field_count(); i++) {
2948     field(i)->CopyTo(proto->add_field());
2949   }
2950   for (int i = 0; i < oneof_decl_count(); i++) {
2951     oneof_decl(i)->CopyTo(proto->add_oneof_decl());
2952   }
2953   for (int i = 0; i < nested_type_count(); i++) {
2954     nested_type(i)->CopyTo(proto->add_nested_type());
2955   }
2956   for (int i = 0; i < enum_type_count(); i++) {
2957     enum_type(i)->CopyTo(proto->add_enum_type());
2958   }
2959   for (int i = 0; i < extension_range_count(); i++) {
2960     extension_range(i)->CopyTo(proto->add_extension_range());
2961   }
2962   for (int i = 0; i < extension_count(); i++) {
2963     extension(i)->CopyTo(proto->add_extension());
2964   }
2965 }
2966 
CopyHeadingTo(DescriptorProto * proto) const2967 void Descriptor::CopyHeadingTo(DescriptorProto* proto) const {
2968   proto->set_name(name());
2969 
2970   for (int i = 0; i < reserved_range_count(); i++) {
2971     DescriptorProto::ReservedRange* range = proto->add_reserved_range();
2972     range->set_start(reserved_range(i)->start);
2973     range->set_end(reserved_range(i)->end);
2974   }
2975   for (int i = 0; i < reserved_name_count(); i++) {
2976     proto->add_reserved_name(reserved_name(i));
2977   }
2978 
2979   if (&options() != &MessageOptions::default_instance()) {
2980     *proto->mutable_options() = options();
2981   }
2982 
2983   RestoreFeaturesToOptions(proto_features_, proto);
2984 }
2985 
CopyJsonNameTo(DescriptorProto * proto) const2986 void Descriptor::CopyJsonNameTo(DescriptorProto* proto) const {
2987   if (field_count() != proto->field_size() ||
2988       nested_type_count() != proto->nested_type_size() ||
2989       extension_count() != proto->extension_size()) {
2990     ABSL_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2991     return;
2992   }
2993   for (int i = 0; i < field_count(); i++) {
2994     field(i)->CopyJsonNameTo(proto->mutable_field(i));
2995   }
2996   for (int i = 0; i < nested_type_count(); i++) {
2997     nested_type(i)->CopyJsonNameTo(proto->mutable_nested_type(i));
2998   }
2999   for (int i = 0; i < extension_count(); i++) {
3000     extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
3001   }
3002 }
3003 
CopyTo(FieldDescriptorProto * proto) const3004 void FieldDescriptor::CopyTo(FieldDescriptorProto* proto) const {
3005   proto->set_name(name());
3006   proto->set_number(number());
3007   if (has_json_name_) {
3008     proto->set_json_name(json_name());
3009   }
3010   if (proto3_optional_) {
3011     proto->set_proto3_optional(true);
3012   }
3013   // Some compilers do not allow static_cast directly between two enum types,
3014   // so we must cast to int first.
3015   if (is_required() && !IsLegacyEdition(file()->edition())) {
3016     // Editions files have no required keyword, and we only set this label
3017     // during descriptor build.
3018     proto->set_label(static_cast<FieldDescriptorProto::Label>(
3019         absl::implicit_cast<int>(LABEL_OPTIONAL)));
3020   } else {
3021     proto->set_label(static_cast<FieldDescriptorProto::Label>(
3022         absl::implicit_cast<int>(label())));
3023   }
3024   if (type() == TYPE_GROUP && !IsLegacyEdition(file()->edition())) {
3025     // Editions files have no group keyword, and we only set this label
3026     // during descriptor build.
3027     proto->set_type(static_cast<FieldDescriptorProto::Type>(
3028         absl::implicit_cast<int>(TYPE_MESSAGE)));
3029   } else {
3030     proto->set_type(static_cast<FieldDescriptorProto::Type>(
3031         absl::implicit_cast<int>(type())));
3032   }
3033 
3034   if (is_extension()) {
3035     if (!containing_type()->is_unqualified_placeholder_) {
3036       proto->set_extendee(".");
3037     }
3038     proto->mutable_extendee()->append(containing_type()->full_name());
3039   }
3040 
3041   if (cpp_type() == CPPTYPE_MESSAGE) {
3042     if (message_type()->is_placeholder_) {
3043       // We don't actually know if the type is a message type.  It could be
3044       // an enum.
3045       proto->clear_type();
3046     }
3047 
3048     if (!message_type()->is_unqualified_placeholder_) {
3049       proto->set_type_name(".");
3050     }
3051     proto->mutable_type_name()->append(message_type()->full_name());
3052   } else if (cpp_type() == CPPTYPE_ENUM) {
3053     if (!enum_type()->is_unqualified_placeholder_) {
3054       proto->set_type_name(".");
3055     }
3056     proto->mutable_type_name()->append(enum_type()->full_name());
3057   }
3058 
3059   if (has_default_value()) {
3060     proto->set_default_value(DefaultValueAsString(false));
3061   }
3062 
3063   if (containing_oneof() != nullptr && !is_extension()) {
3064     proto->set_oneof_index(containing_oneof()->index());
3065   }
3066 
3067   if (&options() != &FieldOptions::default_instance()) {
3068     *proto->mutable_options() = options();
3069     if (proto_features_->GetExtension(pb::cpp).has_string_type()) {
3070       // ctype must have been set in InferLegacyProtoFeatures so avoid copying.
3071       proto->mutable_options()->clear_ctype();
3072     }
3073   }
3074 
3075   RestoreFeaturesToOptions(proto_features_, proto);
3076 }
3077 
CopyJsonNameTo(FieldDescriptorProto * proto) const3078 void FieldDescriptor::CopyJsonNameTo(FieldDescriptorProto* proto) const {
3079   proto->set_json_name(json_name());
3080 }
3081 
CopyTo(OneofDescriptorProto * proto) const3082 void OneofDescriptor::CopyTo(OneofDescriptorProto* proto) const {
3083   proto->set_name(name());
3084   if (&options() != &OneofOptions::default_instance()) {
3085     *proto->mutable_options() = options();
3086   }
3087   RestoreFeaturesToOptions(proto_features_, proto);
3088 }
3089 
CopyTo(EnumDescriptorProto * proto) const3090 void EnumDescriptor::CopyTo(EnumDescriptorProto* proto) const {
3091   proto->set_name(name());
3092 
3093   for (int i = 0; i < value_count(); i++) {
3094     value(i)->CopyTo(proto->add_value());
3095   }
3096   for (int i = 0; i < reserved_range_count(); i++) {
3097     EnumDescriptorProto::EnumReservedRange* range = proto->add_reserved_range();
3098     range->set_start(reserved_range(i)->start);
3099     range->set_end(reserved_range(i)->end);
3100   }
3101   for (int i = 0; i < reserved_name_count(); i++) {
3102     proto->add_reserved_name(reserved_name(i));
3103   }
3104 
3105   if (&options() != &EnumOptions::default_instance()) {
3106     *proto->mutable_options() = options();
3107   }
3108   RestoreFeaturesToOptions(proto_features_, proto);
3109 }
3110 
CopyTo(EnumValueDescriptorProto * proto) const3111 void EnumValueDescriptor::CopyTo(EnumValueDescriptorProto* proto) const {
3112   proto->set_name(name());
3113   proto->set_number(number());
3114 
3115   if (&options() != &EnumValueOptions::default_instance()) {
3116     *proto->mutable_options() = options();
3117   }
3118   RestoreFeaturesToOptions(proto_features_, proto);
3119 }
3120 
CopyTo(ServiceDescriptorProto * proto) const3121 void ServiceDescriptor::CopyTo(ServiceDescriptorProto* proto) const {
3122   proto->set_name(name());
3123 
3124   for (int i = 0; i < method_count(); i++) {
3125     method(i)->CopyTo(proto->add_method());
3126   }
3127 
3128   if (&options() != &ServiceOptions::default_instance()) {
3129     *proto->mutable_options() = options();
3130   }
3131   RestoreFeaturesToOptions(proto_features_, proto);
3132 }
3133 
CopyTo(MethodDescriptorProto * proto) const3134 void MethodDescriptor::CopyTo(MethodDescriptorProto* proto) const {
3135   proto->set_name(name());
3136 
3137   if (!input_type()->is_unqualified_placeholder_) {
3138     proto->set_input_type(".");
3139   }
3140   proto->mutable_input_type()->append(input_type()->full_name());
3141 
3142   if (!output_type()->is_unqualified_placeholder_) {
3143     proto->set_output_type(".");
3144   }
3145   proto->mutable_output_type()->append(output_type()->full_name());
3146 
3147   if (&options() != &MethodOptions::default_instance()) {
3148     *proto->mutable_options() = options();
3149   }
3150 
3151   if (client_streaming_) {
3152     proto->set_client_streaming(true);
3153   }
3154   if (server_streaming_) {
3155     proto->set_server_streaming(true);
3156   }
3157   RestoreFeaturesToOptions(proto_features_, proto);
3158 }
3159 
3160 // DebugString methods ===============================================
3161 
3162 namespace {
3163 
IsGroupSyntax(Edition edition,const FieldDescriptor * desc)3164 bool IsGroupSyntax(Edition edition, const FieldDescriptor* desc) {
3165   return IsLegacyEdition(edition) &&
3166          desc->type() == FieldDescriptor::TYPE_GROUP;
3167 }
3168 
3169 template <typename OptionsT>
CopyFeaturesToOptions(const FeatureSet * features,OptionsT * options)3170 void CopyFeaturesToOptions(const FeatureSet* features, OptionsT* options) {
3171   if (features != &FeatureSet::default_instance()) {
3172     *options->mutable_features() = *features;
3173   }
3174 }
3175 
RetrieveOptionsAssumingRightPool(int depth,const Message & options,std::vector<std::string> * option_entries)3176 bool RetrieveOptionsAssumingRightPool(
3177     int depth, const Message& options,
3178     std::vector<std::string>* option_entries) {
3179   option_entries->clear();
3180   const Reflection* reflection = options.GetReflection();
3181   std::vector<const FieldDescriptor*> fields;
3182   reflection->ListFields(options, &fields);
3183   for (const FieldDescriptor* field : fields) {
3184     int count = 1;
3185     bool repeated = false;
3186     if (field->is_repeated()) {
3187       count = reflection->FieldSize(options, field);
3188       repeated = true;
3189     }
3190     for (int j = 0; j < count; j++) {
3191       std::string fieldval;
3192       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
3193         std::string tmp;
3194         TextFormat::Printer printer;
3195         printer.SetExpandAny(true);
3196         printer.SetInitialIndentLevel(depth + 1);
3197         printer.PrintFieldValueToString(options, field, repeated ? j : -1,
3198                                         &tmp);
3199         fieldval.append("{\n");
3200         fieldval.append(tmp);
3201         fieldval.append(depth * 2, ' ');
3202         fieldval.append("}");
3203       } else {
3204         TextFormat::PrintFieldValueToString(options, field, repeated ? j : -1,
3205                                             &fieldval);
3206       }
3207       std::string name;
3208       if (field->is_extension()) {
3209         name = absl::StrCat("(.", field->full_name(), ")");
3210       } else {
3211         name = field->name();
3212       }
3213       option_entries->push_back(absl::StrCat(name, " = ", fieldval));
3214     }
3215   }
3216   return !option_entries->empty();
3217 }
3218 
3219 // Used by each of the option formatters.
RetrieveOptions(int depth,const Message & options,const DescriptorPool * pool,std::vector<std::string> * option_entries)3220 bool RetrieveOptions(int depth, const Message& options,
3221                      const DescriptorPool* pool,
3222                      std::vector<std::string>* option_entries) {
3223   // When printing custom options for a descriptor, we must use an options
3224   // message built on top of the same DescriptorPool where the descriptor
3225   // is coming from. This is to ensure we are interpreting custom options
3226   // against the right pool.
3227   if (options.GetDescriptor()->file()->pool() == pool) {
3228     return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
3229   } else {
3230     const Descriptor* option_descriptor =
3231         pool->FindMessageTypeByName(options.GetDescriptor()->full_name());
3232     if (option_descriptor == nullptr) {
3233       // descriptor.proto is not in the pool. This means no custom options are
3234       // used so we are safe to proceed with the compiled options message type.
3235       return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
3236     }
3237     DynamicMessageFactory factory;
3238     std::unique_ptr<Message> dynamic_options(
3239         factory.GetPrototype(option_descriptor)->New());
3240     std::string serialized = options.SerializeAsString();
3241     io::CodedInputStream input(
3242         reinterpret_cast<const uint8_t*>(serialized.data()), serialized.size());
3243     input.SetExtensionRegistry(pool, &factory);
3244     if (dynamic_options->ParseFromCodedStream(&input)) {
3245       return RetrieveOptionsAssumingRightPool(depth, *dynamic_options,
3246                                               option_entries);
3247     } else {
3248       ABSL_LOG(ERROR) << "Found invalid proto option data for: "
3249                       << options.GetDescriptor()->full_name();
3250       return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
3251     }
3252   }
3253 }
3254 
3255 // Formats options that all appear together in brackets. Does not include
3256 // brackets.
FormatBracketedOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)3257 bool FormatBracketedOptions(int depth, const Message& options,
3258                             const DescriptorPool* pool, std::string* output) {
3259   std::vector<std::string> all_options;
3260   if (RetrieveOptions(depth, options, pool, &all_options)) {
3261     output->append(absl::StrJoin(all_options, ", "));
3262   }
3263   return !all_options.empty();
3264 }
3265 
3266 // Formats options one per line
FormatLineOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)3267 bool FormatLineOptions(int depth, const Message& options,
3268                        const DescriptorPool* pool, std::string* output) {
3269   std::string prefix(depth * 2, ' ');
3270   std::vector<std::string> all_options;
3271   if (RetrieveOptions(depth, options, pool, &all_options)) {
3272     for (const std::string& option : all_options) {
3273       absl::SubstituteAndAppend(output, "$0option $1;\n", prefix, option);
3274     }
3275   }
3276   return !all_options.empty();
3277 }
3278 
GetLegacySyntaxName(Edition edition)3279 static std::string GetLegacySyntaxName(Edition edition) {
3280   if (edition == Edition::EDITION_PROTO3) {
3281     return "proto3";
3282   }
3283   return "proto2";
3284 }
3285 
3286 
3287 class SourceLocationCommentPrinter {
3288  public:
3289   template <typename DescType>
SourceLocationCommentPrinter(const DescType * desc,const std::string & prefix,const DebugStringOptions & options)3290   SourceLocationCommentPrinter(const DescType* desc, const std::string& prefix,
3291                                const DebugStringOptions& options)
3292       : options_(options), prefix_(prefix) {
3293     // Perform the SourceLocation lookup only if we're including user comments,
3294     // because the lookup is fairly expensive.
3295     have_source_loc_ =
3296         options.include_comments && desc->GetSourceLocation(&source_loc_);
3297   }
SourceLocationCommentPrinter(const FileDescriptor * file,const std::vector<int> & path,const std::string & prefix,const DebugStringOptions & options)3298   SourceLocationCommentPrinter(const FileDescriptor* file,
3299                                const std::vector<int>& path,
3300                                const std::string& prefix,
3301                                const DebugStringOptions& options)
3302       : options_(options), prefix_(prefix) {
3303     // Perform the SourceLocation lookup only if we're including user comments,
3304     // because the lookup is fairly expensive.
3305     have_source_loc_ =
3306         options.include_comments && file->GetSourceLocation(path, &source_loc_);
3307   }
AddPreComment(std::string * output)3308   void AddPreComment(std::string* output) {
3309     if (have_source_loc_) {
3310       // Detached leading comments.
3311       for (const std::string& leading_detached_comment :
3312            source_loc_.leading_detached_comments) {
3313         absl::StrAppend(output, FormatComment(leading_detached_comment), "\n");
3314       }
3315       // Attached leading comments.
3316       if (!source_loc_.leading_comments.empty()) {
3317         absl::StrAppend(output, FormatComment(source_loc_.leading_comments));
3318       }
3319     }
3320   }
AddPostComment(std::string * output)3321   void AddPostComment(std::string* output) {
3322     if (have_source_loc_ && source_loc_.trailing_comments.size() > 0) {
3323       absl::StrAppend(output, FormatComment(source_loc_.trailing_comments));
3324     }
3325   }
3326 
3327   // Format comment such that each line becomes a full-line C++-style comment in
3328   // the DebugString() output.
FormatComment(const std::string & comment_text)3329   std::string FormatComment(const std::string& comment_text) {
3330     std::string stripped_comment = comment_text;
3331     absl::StripAsciiWhitespace(&stripped_comment);
3332     std::string output;
3333     for (absl::string_view line : absl::StrSplit(stripped_comment, '\n')) {
3334       absl::SubstituteAndAppend(&output, "$0// $1\n", prefix_, line);
3335     }
3336     return output;
3337   }
3338 
3339  private:
3340 
3341   bool have_source_loc_;
3342   SourceLocation source_loc_;
3343   DebugStringOptions options_;
3344   std::string prefix_;
3345 };
3346 
3347 }  // anonymous namespace
3348 
DebugString() const3349 std::string FileDescriptor::DebugString() const {
3350   DebugStringOptions options;  // default options
3351   return DebugStringWithOptions(options);
3352 }
3353 
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const3354 std::string FileDescriptor::DebugStringWithOptions(
3355     const DebugStringOptions& debug_string_options) const {
3356   std::string contents;
3357   {
3358     std::vector<int> path;
3359     path.push_back(FileDescriptorProto::kSyntaxFieldNumber);
3360     SourceLocationCommentPrinter syntax_comment(this, path, "",
3361                                                 debug_string_options);
3362     syntax_comment.AddPreComment(&contents);
3363     if (IsLegacyEdition(edition())) {
3364       absl::SubstituteAndAppend(&contents, "syntax = \"$0\";\n\n",
3365                                 GetLegacySyntaxName(edition()));
3366     } else {
3367       absl::SubstituteAndAppend(&contents, "edition = \"$0\";\n\n", edition());
3368     }
3369     syntax_comment.AddPostComment(&contents);
3370   }
3371 
3372   SourceLocationCommentPrinter comment_printer(this, "", debug_string_options);
3373   comment_printer.AddPreComment(&contents);
3374 
3375   absl::flat_hash_set<int> public_dependencies(
3376       public_dependencies_, public_dependencies_ + public_dependency_count_);
3377   absl::flat_hash_set<int> weak_dependencies(
3378       weak_dependencies_, weak_dependencies_ + weak_dependency_count_);
3379 
3380   for (int i = 0; i < dependency_count(); i++) {
3381     if (public_dependencies.contains(i)) {
3382       absl::SubstituteAndAppend(&contents, "import public \"$0\";\n",
3383                                 dependency(i)->name());
3384     } else if (weak_dependencies.contains(i)) {
3385       absl::SubstituteAndAppend(&contents, "import weak \"$0\";\n",
3386                                 dependency(i)->name());
3387     } else {
3388       absl::SubstituteAndAppend(&contents, "import \"$0\";\n",
3389                                 dependency(i)->name());
3390     }
3391   }
3392 
3393   if (!package().empty()) {
3394     std::vector<int> path;
3395     path.push_back(FileDescriptorProto::kPackageFieldNumber);
3396     SourceLocationCommentPrinter package_comment(this, path, "",
3397                                                  debug_string_options);
3398     package_comment.AddPreComment(&contents);
3399     absl::SubstituteAndAppend(&contents, "package $0;\n\n", package());
3400     package_comment.AddPostComment(&contents);
3401   }
3402 
3403   FileOptions full_options = options();
3404   CopyFeaturesToOptions(proto_features_, &full_options);
3405   if (FormatLineOptions(0, full_options, pool(), &contents)) {
3406     contents.append("\n");  // add some space if we had options
3407   }
3408 
3409   for (int i = 0; i < enum_type_count(); i++) {
3410     enum_type(i)->DebugString(0, &contents, debug_string_options);
3411     contents.append("\n");
3412   }
3413 
3414   // Find all the 'group' type extensions; we will not output their nested
3415   // definitions (those will be done with their group field descriptor).
3416   absl::flat_hash_set<const Descriptor*> groups;
3417   for (int i = 0; i < extension_count(); i++) {
3418     if (IsGroupSyntax(edition(), extension(i))) {
3419       groups.insert(extension(i)->message_type());
3420     }
3421   }
3422 
3423   for (int i = 0; i < message_type_count(); i++) {
3424     if (!groups.contains(message_type(i))) {
3425       message_type(i)->DebugString(0, &contents, debug_string_options,
3426                                    /* include_opening_clause */ true);
3427       contents.append("\n");
3428     }
3429   }
3430 
3431   for (int i = 0; i < service_count(); i++) {
3432     service(i)->DebugString(&contents, debug_string_options);
3433     contents.append("\n");
3434   }
3435 
3436   const Descriptor* containing_type = nullptr;
3437   for (int i = 0; i < extension_count(); i++) {
3438     if (extension(i)->containing_type() != containing_type) {
3439       if (i > 0) contents.append("}\n\n");
3440       containing_type = extension(i)->containing_type();
3441       absl::SubstituteAndAppend(&contents, "extend .$0 {\n",
3442                                 containing_type->full_name());
3443     }
3444     extension(i)->DebugString(1, &contents, debug_string_options);
3445   }
3446   if (extension_count() > 0) contents.append("}\n\n");
3447 
3448   comment_printer.AddPostComment(&contents);
3449 
3450   return contents;
3451 }
3452 
DebugString() const3453 std::string Descriptor::DebugString() const {
3454   DebugStringOptions options;  // default options
3455   return DebugStringWithOptions(options);
3456 }
3457 
DebugStringWithOptions(const DebugStringOptions & options) const3458 std::string Descriptor::DebugStringWithOptions(
3459     const DebugStringOptions& options) const {
3460   std::string contents;
3461   DebugString(0, &contents, options, /* include_opening_clause */ true);
3462   return contents;
3463 }
3464 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options,bool include_opening_clause) const3465 void Descriptor::DebugString(int depth, std::string* contents,
3466                              const DebugStringOptions& debug_string_options,
3467                              bool include_opening_clause) const {
3468   if (options().map_entry()) {
3469     // Do not generate debug string for auto-generated map-entry type.
3470     return;
3471   }
3472   std::string prefix(depth * 2, ' ');
3473   ++depth;
3474 
3475   SourceLocationCommentPrinter comment_printer(this, prefix,
3476                                                debug_string_options);
3477   comment_printer.AddPreComment(contents);
3478 
3479   if (include_opening_clause) {
3480     absl::SubstituteAndAppend(contents, "$0message $1", prefix, name());
3481   }
3482   contents->append(" {\n");
3483 
3484   MessageOptions full_options = options();
3485   CopyFeaturesToOptions(proto_features_, &full_options);
3486   FormatLineOptions(depth, full_options, file()->pool(), contents);
3487 
3488   // Find all the 'group' types for fields and extensions; we will not output
3489   // their nested definitions (those will be done with their group field
3490   // descriptor).
3491   absl::flat_hash_set<const Descriptor*> groups;
3492   for (int i = 0; i < field_count(); i++) {
3493     if (IsGroupSyntax(file()->edition(), field(i))) {
3494       groups.insert(field(i)->message_type());
3495     }
3496   }
3497   for (int i = 0; i < extension_count(); i++) {
3498     if (IsGroupSyntax(file()->edition(), extension(i))) {
3499       groups.insert(extension(i)->message_type());
3500     }
3501   }
3502 
3503   for (int i = 0; i < nested_type_count(); i++) {
3504     if (!groups.contains(nested_type(i))) {
3505       nested_type(i)->DebugString(depth, contents, debug_string_options,
3506                                   /* include_opening_clause */ true);
3507     }
3508   }
3509   for (int i = 0; i < enum_type_count(); i++) {
3510     enum_type(i)->DebugString(depth, contents, debug_string_options);
3511   }
3512   for (int i = 0; i < field_count(); i++) {
3513     if (field(i)->real_containing_oneof() == nullptr) {
3514       field(i)->DebugString(depth, contents, debug_string_options);
3515     } else if (field(i)->containing_oneof()->field(0) == field(i)) {
3516       // This is the first field in this oneof, so print the whole oneof.
3517       field(i)->containing_oneof()->DebugString(depth, contents,
3518                                                 debug_string_options);
3519     }
3520   }
3521 
3522   for (int i = 0; i < extension_range_count(); i++) {
3523     absl::SubstituteAndAppend(contents, "$0  extensions $1", prefix,
3524                               extension_range(i)->start_number());
3525     if (extension_range(i)->end_number() >
3526         extension_range(i)->start_number() + 1) {
3527       absl::SubstituteAndAppend(contents, " to $0",
3528                                 extension_range(i)->end_number() - 1);
3529     }
3530     ExtensionRangeOptions range_options = extension_range(i)->options();
3531     CopyFeaturesToOptions(extension_range(i)->proto_features_, &range_options);
3532     std::string formatted_options;
3533     if (FormatBracketedOptions(depth, range_options, file()->pool(),
3534                                &formatted_options)) {
3535       absl::StrAppend(contents, " [", formatted_options, "]");
3536     }
3537     absl::StrAppend(contents, ";\n");
3538   }
3539 
3540   // Group extensions by what they extend, so they can be printed out together.
3541   const Descriptor* containing_type = nullptr;
3542   for (int i = 0; i < extension_count(); i++) {
3543     if (extension(i)->containing_type() != containing_type) {
3544       if (i > 0) absl::SubstituteAndAppend(contents, "$0  }\n", prefix);
3545       containing_type = extension(i)->containing_type();
3546       absl::SubstituteAndAppend(contents, "$0  extend .$1 {\n", prefix,
3547                                 containing_type->full_name());
3548     }
3549     extension(i)->DebugString(depth + 1, contents, debug_string_options);
3550   }
3551   if (extension_count() > 0)
3552     absl::SubstituteAndAppend(contents, "$0  }\n", prefix);
3553 
3554   if (reserved_range_count() > 0) {
3555     absl::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3556     for (int i = 0; i < reserved_range_count(); i++) {
3557       const Descriptor::ReservedRange* range = reserved_range(i);
3558       if (range->end == range->start + 1) {
3559         absl::SubstituteAndAppend(contents, "$0, ", range->start);
3560       } else if (range->end > FieldDescriptor::kMaxNumber) {
3561         absl::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3562       } else {
3563         absl::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3564                                   range->end - 1);
3565       }
3566     }
3567     contents->replace(contents->size() - 2, 2, ";\n");
3568   }
3569 
3570   if (reserved_name_count() > 0) {
3571     absl::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3572     for (int i = 0; i < reserved_name_count(); i++) {
3573       absl::SubstituteAndAppend(
3574           contents,
3575           file()->edition() < Edition::EDITION_2023 ? "\"$0\", " : "$0, ",
3576           absl::CEscape(reserved_name(i)));
3577     }
3578     contents->replace(contents->size() - 2, 2, ";\n");
3579   }
3580 
3581   absl::SubstituteAndAppend(contents, "$0}\n", prefix);
3582   comment_printer.AddPostComment(contents);
3583 }
3584 
DebugString() const3585 std::string FieldDescriptor::DebugString() const {
3586   DebugStringOptions options;  // default options
3587   return DebugStringWithOptions(options);
3588 }
3589 
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const3590 std::string FieldDescriptor::DebugStringWithOptions(
3591     const DebugStringOptions& debug_string_options) const {
3592   std::string contents;
3593   int depth = 0;
3594   if (is_extension()) {
3595     absl::SubstituteAndAppend(&contents, "extend .$0 {\n",
3596                               containing_type()->full_name());
3597     depth = 1;
3598   }
3599   DebugString(depth, &contents, debug_string_options);
3600   if (is_extension()) {
3601     contents.append("}\n");
3602   }
3603   return contents;
3604 }
3605 
3606 // The field type string used in FieldDescriptor::DebugString()
FieldTypeNameDebugString() const3607 std::string FieldDescriptor::FieldTypeNameDebugString() const {
3608   switch (type()) {
3609     case TYPE_MESSAGE:
3610     case TYPE_GROUP:
3611       if (IsGroupSyntax(file()->edition(), this)) {
3612         return kTypeToName[type()];
3613       }
3614       return absl::StrCat(".", message_type()->full_name());
3615     case TYPE_ENUM:
3616       return absl::StrCat(".", enum_type()->full_name());
3617     default:
3618       return kTypeToName[type()];
3619   }
3620 }
3621 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3622 void FieldDescriptor::DebugString(
3623     int depth, std::string* contents,
3624     const DebugStringOptions& debug_string_options) const {
3625   std::string prefix(depth * 2, ' ');
3626   std::string field_type;
3627 
3628   // Special case map fields.
3629   if (is_map()) {
3630     absl::SubstituteAndAppend(
3631         &field_type, "map<$0, $1>",
3632         message_type()->field(0)->FieldTypeNameDebugString(),
3633         message_type()->field(1)->FieldTypeNameDebugString());
3634   } else {
3635     field_type = FieldTypeNameDebugString();
3636   }
3637 
3638   std::string label = absl::StrCat(kLabelToName[this->label()], " ");
3639 
3640   // Label is omitted for maps, oneof, and plain proto3 fields.
3641   if (is_map() || real_containing_oneof() ||
3642       (is_optional() && !has_optional_keyword())) {
3643     label.clear();
3644   }
3645   // Label is omitted for optional and required fields under editions.
3646   if ((is_optional() || is_required()) && !IsLegacyEdition(file()->edition())) {
3647     label.clear();
3648   }
3649 
3650   SourceLocationCommentPrinter comment_printer(this, prefix,
3651                                                debug_string_options);
3652   comment_printer.AddPreComment(contents);
3653 
3654   absl::SubstituteAndAppend(
3655       contents, "$0$1$2 $3 = $4", prefix, label, field_type,
3656       IsGroupSyntax(file()->edition(), this) ? message_type()->name() : name(),
3657       number());
3658 
3659   bool bracketed = false;
3660   if (has_default_value()) {
3661     bracketed = true;
3662     absl::SubstituteAndAppend(contents, " [default = $0",
3663                               DefaultValueAsString(true));
3664   }
3665   if (has_json_name_) {
3666     if (!bracketed) {
3667       bracketed = true;
3668       contents->append(" [");
3669     } else {
3670       contents->append(", ");
3671     }
3672     contents->append("json_name = \"");
3673     contents->append(absl::CEscape(json_name()));
3674     contents->append("\"");
3675   }
3676 
3677   FieldOptions full_options = options();
3678   CopyFeaturesToOptions(proto_features_, &full_options);
3679   std::string formatted_options;
3680   if (FormatBracketedOptions(depth, full_options, file()->pool(),
3681                              &formatted_options)) {
3682     contents->append(bracketed ? ", " : " [");
3683     bracketed = true;
3684     contents->append(formatted_options);
3685   }
3686 
3687   if (bracketed) {
3688     contents->append("]");
3689   }
3690 
3691   if (IsGroupSyntax(file()->edition(), this)) {
3692     if (debug_string_options.elide_group_body) {
3693       contents->append(" { ... };\n");
3694     } else {
3695       message_type()->DebugString(depth, contents, debug_string_options,
3696                                   /* include_opening_clause */ false);
3697     }
3698   } else {
3699     contents->append(";\n");
3700   }
3701 
3702   comment_printer.AddPostComment(contents);
3703 }
3704 
DebugString() const3705 std::string OneofDescriptor::DebugString() const {
3706   DebugStringOptions options;  // default values
3707   return DebugStringWithOptions(options);
3708 }
3709 
DebugStringWithOptions(const DebugStringOptions & options) const3710 std::string OneofDescriptor::DebugStringWithOptions(
3711     const DebugStringOptions& options) const {
3712   std::string contents;
3713   DebugString(0, &contents, options);
3714   return contents;
3715 }
3716 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3717 void OneofDescriptor::DebugString(
3718     int depth, std::string* contents,
3719     const DebugStringOptions& debug_string_options) const {
3720   std::string prefix(depth * 2, ' ');
3721   ++depth;
3722   SourceLocationCommentPrinter comment_printer(this, prefix,
3723                                                debug_string_options);
3724   comment_printer.AddPreComment(contents);
3725   absl::SubstituteAndAppend(contents, "$0oneof $1 {", prefix, name());
3726 
3727   OneofOptions full_options = options();
3728   CopyFeaturesToOptions(proto_features_, &full_options);
3729   FormatLineOptions(depth, full_options, containing_type()->file()->pool(),
3730                     contents);
3731 
3732   if (debug_string_options.elide_oneof_body) {
3733     contents->append(" ... }\n");
3734   } else {
3735     contents->append("\n");
3736     for (int i = 0; i < field_count(); i++) {
3737       field(i)->DebugString(depth, contents, debug_string_options);
3738     }
3739     absl::SubstituteAndAppend(contents, "$0}\n", prefix);
3740   }
3741   comment_printer.AddPostComment(contents);
3742 }
3743 
DebugString() const3744 std::string EnumDescriptor::DebugString() const {
3745   DebugStringOptions options;  // default values
3746   return DebugStringWithOptions(options);
3747 }
3748 
DebugStringWithOptions(const DebugStringOptions & options) const3749 std::string EnumDescriptor::DebugStringWithOptions(
3750     const DebugStringOptions& options) const {
3751   std::string contents;
3752   DebugString(0, &contents, options);
3753   return contents;
3754 }
3755 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3756 void EnumDescriptor::DebugString(
3757     int depth, std::string* contents,
3758     const DebugStringOptions& debug_string_options) const {
3759   std::string prefix(depth * 2, ' ');
3760   ++depth;
3761 
3762   SourceLocationCommentPrinter comment_printer(this, prefix,
3763                                                debug_string_options);
3764   comment_printer.AddPreComment(contents);
3765 
3766   absl::SubstituteAndAppend(contents, "$0enum $1 {\n", prefix, name());
3767 
3768   EnumOptions full_options = options();
3769   CopyFeaturesToOptions(proto_features_, &full_options);
3770   FormatLineOptions(depth, full_options, file()->pool(), contents);
3771 
3772   for (int i = 0; i < value_count(); i++) {
3773     value(i)->DebugString(depth, contents, debug_string_options);
3774   }
3775 
3776   if (reserved_range_count() > 0) {
3777     absl::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3778     for (int i = 0; i < reserved_range_count(); i++) {
3779       const EnumDescriptor::ReservedRange* range = reserved_range(i);
3780       if (range->end == range->start) {
3781         absl::SubstituteAndAppend(contents, "$0, ", range->start);
3782       } else if (range->end == INT_MAX) {
3783         absl::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3784       } else {
3785         absl::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3786                                   range->end);
3787       }
3788     }
3789     contents->replace(contents->size() - 2, 2, ";\n");
3790   }
3791 
3792   if (reserved_name_count() > 0) {
3793     absl::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3794     for (int i = 0; i < reserved_name_count(); i++) {
3795       absl::SubstituteAndAppend(
3796           contents,
3797           file()->edition() < Edition::EDITION_2023 ? "\"$0\", " : "$0, ",
3798           absl::CEscape(reserved_name(i)));
3799     }
3800     contents->replace(contents->size() - 2, 2, ";\n");
3801   }
3802 
3803   absl::SubstituteAndAppend(contents, "$0}\n", prefix);
3804 
3805   comment_printer.AddPostComment(contents);
3806 }
3807 
DebugString() const3808 std::string EnumValueDescriptor::DebugString() const {
3809   DebugStringOptions options;  // default values
3810   return DebugStringWithOptions(options);
3811 }
3812 
DebugStringWithOptions(const DebugStringOptions & options) const3813 std::string EnumValueDescriptor::DebugStringWithOptions(
3814     const DebugStringOptions& options) const {
3815   std::string contents;
3816   DebugString(0, &contents, options);
3817   return contents;
3818 }
3819 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3820 void EnumValueDescriptor::DebugString(
3821     int depth, std::string* contents,
3822     const DebugStringOptions& debug_string_options) const {
3823   std::string prefix(depth * 2, ' ');
3824 
3825   SourceLocationCommentPrinter comment_printer(this, prefix,
3826                                                debug_string_options);
3827   comment_printer.AddPreComment(contents);
3828 
3829   absl::SubstituteAndAppend(contents, "$0$1 = $2", prefix, name(), number());
3830 
3831   EnumValueOptions full_options = options();
3832   CopyFeaturesToOptions(proto_features_, &full_options);
3833   std::string formatted_options;
3834   if (FormatBracketedOptions(depth, full_options, type()->file()->pool(),
3835                              &formatted_options)) {
3836     absl::SubstituteAndAppend(contents, " [$0]", formatted_options);
3837   }
3838   contents->append(";\n");
3839 
3840   comment_printer.AddPostComment(contents);
3841 }
3842 
DebugString() const3843 std::string ServiceDescriptor::DebugString() const {
3844   DebugStringOptions options;  // default values
3845   return DebugStringWithOptions(options);
3846 }
3847 
DebugStringWithOptions(const DebugStringOptions & options) const3848 std::string ServiceDescriptor::DebugStringWithOptions(
3849     const DebugStringOptions& options) const {
3850   std::string contents;
3851   DebugString(&contents, options);
3852   return contents;
3853 }
3854 
DebugString(std::string * contents,const DebugStringOptions & debug_string_options) const3855 void ServiceDescriptor::DebugString(
3856     std::string* contents,
3857     const DebugStringOptions& debug_string_options) const {
3858   SourceLocationCommentPrinter comment_printer(this, /* prefix */ "",
3859                                                debug_string_options);
3860   comment_printer.AddPreComment(contents);
3861 
3862   absl::SubstituteAndAppend(contents, "service $0 {\n", name());
3863 
3864   ServiceOptions full_options = options();
3865   CopyFeaturesToOptions(proto_features_, &full_options);
3866   FormatLineOptions(1, full_options, file()->pool(), contents);
3867 
3868   for (int i = 0; i < method_count(); i++) {
3869     method(i)->DebugString(1, contents, debug_string_options);
3870   }
3871 
3872   contents->append("}\n");
3873 
3874   comment_printer.AddPostComment(contents);
3875 }
3876 
DebugString() const3877 std::string MethodDescriptor::DebugString() const {
3878   DebugStringOptions options;  // default values
3879   return DebugStringWithOptions(options);
3880 }
3881 
DebugStringWithOptions(const DebugStringOptions & options) const3882 std::string MethodDescriptor::DebugStringWithOptions(
3883     const DebugStringOptions& options) const {
3884   std::string contents;
3885   DebugString(0, &contents, options);
3886   return contents;
3887 }
3888 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3889 void MethodDescriptor::DebugString(
3890     int depth, std::string* contents,
3891     const DebugStringOptions& debug_string_options) const {
3892   std::string prefix(depth * 2, ' ');
3893   ++depth;
3894 
3895   SourceLocationCommentPrinter comment_printer(this, prefix,
3896                                                debug_string_options);
3897   comment_printer.AddPreComment(contents);
3898 
3899   absl::SubstituteAndAppend(
3900       contents, "$0rpc $1($4.$2) returns ($5.$3)", prefix, name(),
3901       input_type()->full_name(), output_type()->full_name(),
3902       client_streaming() ? "stream " : "", server_streaming() ? "stream " : "");
3903 
3904   MethodOptions full_options = options();
3905   CopyFeaturesToOptions(proto_features_, &full_options);
3906   std::string formatted_options;
3907   if (FormatLineOptions(depth, full_options, service()->file()->pool(),
3908                         &formatted_options)) {
3909     absl::SubstituteAndAppend(contents, " {\n$0$1}\n", formatted_options,
3910                               prefix);
3911   } else {
3912     contents->append(";\n");
3913   }
3914 
3915   comment_printer.AddPostComment(contents);
3916 }
3917 
3918 // Feature methods ===============================================
3919 
is_closed() const3920 bool EnumDescriptor::is_closed() const {
3921   return features().enum_type() == FeatureSet::CLOSED;
3922 }
3923 
is_packed() const3924 bool FieldDescriptor::is_packed() const {
3925   if (!is_packable()) return false;
3926   return features().repeated_field_encoding() == FeatureSet::PACKED;
3927 }
3928 
IsStrictUtf8(const FieldDescriptor * field)3929 static bool IsStrictUtf8(const FieldDescriptor* field) {
3930   return internal::InternalFeatureHelper::GetFeatures(*field)
3931              .utf8_validation() == FeatureSet::VERIFY;
3932 }
3933 
requires_utf8_validation() const3934 bool FieldDescriptor::requires_utf8_validation() const {
3935   return type() == TYPE_STRING && IsStrictUtf8(this);
3936 }
3937 
has_presence() const3938 bool FieldDescriptor::has_presence() const {
3939   if (is_repeated()) return false;
3940   return cpp_type() == CPPTYPE_MESSAGE || is_extension() ||
3941          containing_oneof() ||
3942          features().field_presence() != FeatureSet::IMPLICIT;
3943 }
3944 
is_required() const3945 bool FieldDescriptor::is_required() const {
3946   return features().field_presence() == FeatureSet::LEGACY_REQUIRED;
3947 }
3948 
legacy_enum_field_treated_as_closed() const3949 bool FieldDescriptor::legacy_enum_field_treated_as_closed() const {
3950   return type() == TYPE_ENUM &&
3951          (features().GetExtension(pb::cpp).legacy_closed_enum() ||
3952           enum_type()->is_closed());
3953 }
3954 
has_optional_keyword() const3955 bool FieldDescriptor::has_optional_keyword() const {
3956   return proto3_optional_ || (file()->edition() == Edition::EDITION_PROTO2 &&
3957                               is_optional() && !containing_oneof());
3958 }
3959 
cpp_string_type() const3960 FieldDescriptor::CppStringType FieldDescriptor::cpp_string_type() const {
3961   ABSL_DCHECK(cpp_type() == FieldDescriptor::CPPTYPE_STRING);
3962   switch (features().GetExtension(pb::cpp).string_type()) {
3963     case pb::CppFeatures::VIEW:
3964       return CppStringType::kView;
3965     case pb::CppFeatures::CORD:
3966       // In open-source, protobuf CORD is only supported for singular bytes
3967       // fields.
3968       if (type() != FieldDescriptor::TYPE_BYTES || is_repeated() ||
3969           is_extension()) {
3970         return CppStringType::kString;
3971       }
3972       return CppStringType::kCord;
3973     case pb::CppFeatures::STRING:
3974       return CppStringType::kString;
3975     default:
3976       // If features haven't been resolved, this is a dynamic build not for C++
3977       // codegen.  Just use string type.
3978       ABSL_DCHECK(!features().GetExtension(pb::cpp).has_string_type());
3979       return CppStringType::kString;
3980   }
3981 }
3982 
3983 // Location methods ===============================================
3984 
GetSourceLocation(const std::vector<int> & path,SourceLocation * out_location) const3985 bool FileDescriptor::GetSourceLocation(const std::vector<int>& path,
3986                                        SourceLocation* out_location) const {
3987   ABSL_CHECK(out_location != nullptr);
3988   if (source_code_info_) {
3989     if (const SourceCodeInfo_Location* loc =
3990             tables_->GetSourceLocation(path, source_code_info_)) {
3991       const RepeatedField<int32_t>& span = loc->span();
3992       if (span.size() == 3 || span.size() == 4) {
3993         out_location->start_line = span.Get(0);
3994         out_location->start_column = span.Get(1);
3995         out_location->end_line = span.Get(span.size() == 3 ? 0 : 2);
3996         out_location->end_column = span.Get(span.size() - 1);
3997 
3998         out_location->leading_comments = loc->leading_comments();
3999         out_location->trailing_comments = loc->trailing_comments();
4000         out_location->leading_detached_comments.assign(
4001             loc->leading_detached_comments().begin(),
4002             loc->leading_detached_comments().end());
4003         return true;
4004       }
4005     }
4006   }
4007   return false;
4008 }
4009 
GetSourceLocation(SourceLocation * out_location) const4010 bool FileDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4011   std::vector<int> path;  // empty path for root FileDescriptor
4012   return GetSourceLocation(path, out_location);
4013 }
4014 
GetSourceLocation(SourceLocation * out_location) const4015 bool Descriptor::GetSourceLocation(SourceLocation* out_location) const {
4016   std::vector<int> path;
4017   GetLocationPath(&path);
4018   return file()->GetSourceLocation(path, out_location);
4019 }
4020 
GetSourceLocation(SourceLocation * out_location) const4021 bool FieldDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4022   std::vector<int> path;
4023   GetLocationPath(&path);
4024   return file()->GetSourceLocation(path, out_location);
4025 }
4026 
GetSourceLocation(SourceLocation * out_location) const4027 bool OneofDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4028   std::vector<int> path;
4029   GetLocationPath(&path);
4030   return containing_type()->file()->GetSourceLocation(path, out_location);
4031 }
4032 
GetSourceLocation(SourceLocation * out_location) const4033 bool EnumDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4034   std::vector<int> path;
4035   GetLocationPath(&path);
4036   return file()->GetSourceLocation(path, out_location);
4037 }
4038 
GetSourceLocation(SourceLocation * out_location) const4039 bool MethodDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4040   std::vector<int> path;
4041   GetLocationPath(&path);
4042   return service()->file()->GetSourceLocation(path, out_location);
4043 }
4044 
GetSourceLocation(SourceLocation * out_location) const4045 bool ServiceDescriptor::GetSourceLocation(SourceLocation* out_location) const {
4046   std::vector<int> path;
4047   GetLocationPath(&path);
4048   return file()->GetSourceLocation(path, out_location);
4049 }
4050 
GetSourceLocation(SourceLocation * out_location) const4051 bool EnumValueDescriptor::GetSourceLocation(
4052     SourceLocation* out_location) const {
4053   std::vector<int> path;
4054   GetLocationPath(&path);
4055   return type()->file()->GetSourceLocation(path, out_location);
4056 }
4057 
GetLocationPath(std::vector<int> * output) const4058 void Descriptor::GetLocationPath(std::vector<int>* output) const {
4059   if (containing_type()) {
4060     containing_type()->GetLocationPath(output);
4061     output->push_back(DescriptorProto::kNestedTypeFieldNumber);
4062     output->push_back(index());
4063   } else {
4064     output->push_back(FileDescriptorProto::kMessageTypeFieldNumber);
4065     output->push_back(index());
4066   }
4067 }
4068 
GetLocationPath(std::vector<int> * output) const4069 void FieldDescriptor::GetLocationPath(std::vector<int>* output) const {
4070   if (is_extension()) {
4071     if (extension_scope() == nullptr) {
4072       output->push_back(FileDescriptorProto::kExtensionFieldNumber);
4073       output->push_back(index());
4074     } else {
4075       extension_scope()->GetLocationPath(output);
4076       output->push_back(DescriptorProto::kExtensionFieldNumber);
4077       output->push_back(index());
4078     }
4079   } else {
4080     containing_type()->GetLocationPath(output);
4081     output->push_back(DescriptorProto::kFieldFieldNumber);
4082     output->push_back(index());
4083   }
4084 }
4085 
GetLocationPath(std::vector<int> * output) const4086 void OneofDescriptor::GetLocationPath(std::vector<int>* output) const {
4087   containing_type()->GetLocationPath(output);
4088   output->push_back(DescriptorProto::kOneofDeclFieldNumber);
4089   output->push_back(index());
4090 }
4091 
GetLocationPath(std::vector<int> * output) const4092 void Descriptor::ExtensionRange::GetLocationPath(
4093     std::vector<int>* output) const {
4094   containing_type()->GetLocationPath(output);
4095   output->push_back(DescriptorProto::kExtensionRangeFieldNumber);
4096   output->push_back(index());
4097 }
4098 
GetLocationPath(std::vector<int> * output) const4099 void EnumDescriptor::GetLocationPath(std::vector<int>* output) const {
4100   if (containing_type()) {
4101     containing_type()->GetLocationPath(output);
4102     output->push_back(DescriptorProto::kEnumTypeFieldNumber);
4103     output->push_back(index());
4104   } else {
4105     output->push_back(FileDescriptorProto::kEnumTypeFieldNumber);
4106     output->push_back(index());
4107   }
4108 }
4109 
GetLocationPath(std::vector<int> * output) const4110 void EnumValueDescriptor::GetLocationPath(std::vector<int>* output) const {
4111   type()->GetLocationPath(output);
4112   output->push_back(EnumDescriptorProto::kValueFieldNumber);
4113   output->push_back(index());
4114 }
4115 
GetLocationPath(std::vector<int> * output) const4116 void ServiceDescriptor::GetLocationPath(std::vector<int>* output) const {
4117   output->push_back(FileDescriptorProto::kServiceFieldNumber);
4118   output->push_back(index());
4119 }
4120 
GetLocationPath(std::vector<int> * output) const4121 void MethodDescriptor::GetLocationPath(std::vector<int>* output) const {
4122   service()->GetLocationPath(output);
4123   output->push_back(ServiceDescriptorProto::kMethodFieldNumber);
4124   output->push_back(index());
4125 }
4126 
4127 // ===================================================================
4128 
4129 namespace {
4130 
4131 // Represents an options message to interpret. Extension names in the option
4132 // name are resolved relative to name_scope. element_name and orig_opt are
4133 // used only for error reporting (since the parser records locations against
4134 // pointers in the original options, not the mutable copy). The Message must be
4135 // one of the Options messages in descriptor.proto.
4136 struct OptionsToInterpret {
OptionsToInterpretgoogle::protobuf::__anona654feba0a11::OptionsToInterpret4137   OptionsToInterpret(absl::string_view ns, absl::string_view el,
4138                      absl::Span<const int> path, const Message* orig_opt,
4139                      Message* opt)
4140       : name_scope(ns),
4141         element_name(el),
4142         element_path(path.begin(), path.end()),
4143         original_options(orig_opt),
4144         options(opt) {}
4145   std::string name_scope;
4146   std::string element_name;
4147   std::vector<int> element_path;
4148   const Message* original_options;
4149   Message* options;
4150 };
4151 
4152 }  // namespace
4153 
4154 class DescriptorBuilder {
4155  public:
New(const DescriptorPool * pool,DescriptorPool::Tables * tables,DescriptorPool::DeferredValidation & deferred_validation,DescriptorPool::ErrorCollector * error_collector)4156   static std::unique_ptr<DescriptorBuilder> New(
4157       const DescriptorPool* pool, DescriptorPool::Tables* tables,
4158       DescriptorPool::DeferredValidation& deferred_validation,
4159       DescriptorPool::ErrorCollector* error_collector) {
4160     return std::unique_ptr<DescriptorBuilder>(new DescriptorBuilder(
4161         pool, tables, deferred_validation, error_collector));
4162   }
4163 
4164   ~DescriptorBuilder();
4165 
4166   const FileDescriptor* BuildFile(const FileDescriptorProto& proto);
4167 
4168  private:
4169   DescriptorBuilder(const DescriptorPool* pool, DescriptorPool::Tables* tables,
4170                     DescriptorPool::DeferredValidation& deferred_validation,
4171                     DescriptorPool::ErrorCollector* error_collector);
4172 
4173   friend class OptionInterpreter;
4174 
4175   // Non-recursive part of BuildFile functionality.
4176   FileDescriptor* BuildFileImpl(const FileDescriptorProto& proto,
4177                                 internal::FlatAllocator& alloc);
4178 
4179   const DescriptorPool* pool_;
4180   DescriptorPool::Tables* tables_;  // for convenience
4181   DescriptorPool::DeferredValidation& deferred_validation_;
4182   DescriptorPool::ErrorCollector* error_collector_;
4183 
4184   absl::optional<FeatureResolver> feature_resolver_ = absl::nullopt;
4185 
4186   // As we build descriptors we store copies of the options messages in
4187   // them. We put pointers to those copies in this vector, as we build, so we
4188   // can later (after cross-linking) interpret those options.
4189   std::vector<OptionsToInterpret> options_to_interpret_;
4190 
4191   bool had_errors_;
4192   std::string filename_;
4193   FileDescriptor* file_;
4194   FileDescriptorTables* file_tables_;
4195   absl::flat_hash_set<const FileDescriptor*> dependencies_;
4196 
4197   struct MessageHints {
4198     int fields_to_suggest = 0;
4199     const Message* first_reason = nullptr;
4200     DescriptorPool::ErrorCollector::ErrorLocation first_reason_location =
4201         DescriptorPool::ErrorCollector::ErrorLocation::OTHER;
4202 
RequestHintOnFieldNumbersgoogle::protobuf::DescriptorBuilder::MessageHints4203     void RequestHintOnFieldNumbers(
4204         const Message& reason,
4205         DescriptorPool::ErrorCollector::ErrorLocation reason_location,
4206         int range_start = 0, int range_end = 1) {
4207       auto fit = [](int value) {
4208         return std::min(std::max(value, 0), FieldDescriptor::kMaxNumber);
4209       };
4210       fields_to_suggest =
4211           fit(fields_to_suggest + fit(fit(range_end) - fit(range_start)));
4212       if (first_reason) return;
4213       first_reason = &reason;
4214       first_reason_location = reason_location;
4215     }
4216   };
4217 
4218   absl::flat_hash_map<const Descriptor*, MessageHints> message_hints_;
4219 
4220   // unused_dependency_ is used to record the unused imported files.
4221   // Note: public import is not considered.
4222   absl::flat_hash_set<const FileDescriptor*> unused_dependency_;
4223 
4224   // If LookupSymbol() finds a symbol that is in a file which is not a declared
4225   // dependency of this file, it will fail, but will set
4226   // possible_undeclared_dependency_ to point at that file.  This is only used
4227   // by AddNotDefinedError() to report a more useful error message.
4228   // possible_undeclared_dependency_name_ is the name of the symbol that was
4229   // actually found in possible_undeclared_dependency_, which may be a parent
4230   // of the symbol actually looked for.
4231   const FileDescriptor* possible_undeclared_dependency_;
4232   std::string possible_undeclared_dependency_name_;
4233 
4234   // If LookupSymbol() could resolve a symbol which is not defined,
4235   // record the resolved name.  This is only used by AddNotDefinedError()
4236   // to report a more useful error message.
4237   std::string undefine_resolved_name_;
4238 
4239   // Tracker for current recursion depth to implement recursion protection.
4240   //
4241   // Counts down to 0 when there is no depth remaining.
4242   //
4243   // Maximum recursion depth corresponds to 32 nested message declarations.
4244   int recursion_depth_ = internal::cpp::MaxMessageDeclarationNestingDepth();
4245 
4246   // Note: Both AddError and AddWarning functions are extremely sensitive to
4247   // the *caller* stack space used. We call these functions many times in
4248   // complex code paths that are hot and likely to be inlined heavily. However,
4249   // these calls themselves are cold error paths. But stack space used by the
4250   // code that sets up the call in many cases is paid for even when the call
4251   // isn't reached. To optimize this, we use `const std::string &` to reuse
4252   // string objects where possible for the inputs and for the error message
4253   // itself we use a closure to build the error message inside these routines.
4254   // The routines themselves are marked to prevent inlining and this lets us
4255   // move the large code sometimes required to produce a useful error message
4256   // entirely into a helper closure rather than the immediate caller.
4257   //
4258   // The `const char*` overload should only be used for string literal messages
4259   // where this is a frustrating amount of overhead and there is no harm in
4260   // directly using the literal.
4261   void AddError(absl::string_view element_name, const Message& descriptor,
4262                 DescriptorPool::ErrorCollector::ErrorLocation location,
4263                 absl::FunctionRef<std::string()> make_error);
4264   void AddError(absl::string_view element_name, const Message& descriptor,
4265                 DescriptorPool::ErrorCollector::ErrorLocation location,
4266                 const char* error);
4267   void AddRecursiveImportError(const FileDescriptorProto& proto, int from_here);
4268   void AddTwiceListedError(const FileDescriptorProto& proto, int index);
4269   void AddImportError(const FileDescriptorProto& proto, int index);
4270 
4271   // Adds an error indicating that undefined_symbol was not defined.  Must
4272   // only be called after LookupSymbol() fails.
4273   void AddNotDefinedError(
4274       absl::string_view element_name, const Message& descriptor,
4275       DescriptorPool::ErrorCollector::ErrorLocation location,
4276       absl::string_view undefined_symbol);
4277 
4278   void AddWarning(absl::string_view element_name, const Message& descriptor,
4279                   DescriptorPool::ErrorCollector::ErrorLocation location,
4280                   absl::FunctionRef<std::string()> make_error);
4281   void AddWarning(absl::string_view element_name, const Message& descriptor,
4282                   DescriptorPool::ErrorCollector::ErrorLocation location,
4283                   const char* error);
4284 
4285   // Silly helper which determines if the given file is in the given package.
4286   // I.e., either file->package() == package_name or file->package() is a
4287   // nested package within package_name.
4288   bool IsInPackage(const FileDescriptor* file, absl::string_view package_name);
4289 
4290   // Helper function which finds all public dependencies of the given file, and
4291   // stores the them in the dependencies_ set in the builder.
4292   void RecordPublicDependencies(const FileDescriptor* file);
4293 
4294   // Like tables_->FindSymbol(), but additionally:
4295   // - Search the pool's underlay if not found in tables_.
4296   // - Insure that the resulting Symbol is from one of the file's declared
4297   //   dependencies.
4298   Symbol FindSymbol(absl::string_view name, bool build_it = true);
4299 
4300   // Like FindSymbol() but does not require that the symbol is in one of the
4301   // file's declared dependencies.
4302   Symbol FindSymbolNotEnforcingDeps(absl::string_view name,
4303                                     bool build_it = true);
4304 
4305   // This implements the body of FindSymbolNotEnforcingDeps().
4306   Symbol FindSymbolNotEnforcingDepsHelper(const DescriptorPool* pool,
4307                                           absl::string_view name,
4308                                           bool build_it = true);
4309 
4310   // Like FindSymbol(), but looks up the name relative to some other symbol
4311   // name.  This first searches siblings of relative_to, then siblings of its
4312   // parents, etc.  For example, LookupSymbol("foo.bar", "baz.moo.corge") makes
4313   // the following calls, returning the first non-null result:
4314   // FindSymbol("baz.moo.foo.bar"), FindSymbol("baz.foo.bar"),
4315   // FindSymbol("foo.bar").  If AllowUnknownDependencies() has been called
4316   // on the DescriptorPool, this will generate a placeholder type if
4317   // the name is not found (unless the name itself is malformed).  The
4318   // placeholder_type parameter indicates what kind of placeholder should be
4319   // constructed in this case.  The resolve_mode parameter determines whether
4320   // any symbol is returned, or only symbols that are types.  Note, however,
4321   // that LookupSymbol may still return a non-type symbol in LOOKUP_TYPES mode,
4322   // if it believes that's all it could refer to.  The caller should always
4323   // check that it receives the type of symbol it was expecting.
4324   enum ResolveMode { LOOKUP_ALL, LOOKUP_TYPES };
4325   Symbol LookupSymbol(absl::string_view name, absl::string_view relative_to,
4326                       DescriptorPool::PlaceholderType placeholder_type =
4327                           DescriptorPool::PLACEHOLDER_MESSAGE,
4328                       ResolveMode resolve_mode = LOOKUP_ALL,
4329                       bool build_it = true);
4330 
4331   // Like LookupSymbol() but will not return a placeholder even if
4332   // AllowUnknownDependencies() has been used.
4333   Symbol LookupSymbolNoPlaceholder(absl::string_view name,
4334                                    absl::string_view relative_to,
4335                                    ResolveMode resolve_mode = LOOKUP_ALL,
4336                                    bool build_it = true);
4337 
4338   // Calls tables_->AddSymbol() and records an error if it fails.  Returns
4339   // true if successful or false if failed, though most callers can ignore
4340   // the return value since an error has already been recorded.
4341   bool AddSymbol(absl::string_view full_name, const void* parent,
4342                  absl::string_view name, const Message& proto, Symbol symbol);
4343 
4344   // Like AddSymbol(), but succeeds if the symbol is already defined as long
4345   // as the existing definition is also a package (because it's OK to define
4346   // the same package in two different files).  Also adds all parents of the
4347   // package to the symbol table (e.g. AddPackage("foo.bar", ...) will add
4348   // "foo.bar" and "foo" to the table).
4349   void AddPackage(absl::string_view name, const Message& proto,
4350                   FileDescriptor* file, bool toplevel);
4351 
4352   // Checks that the symbol name contains only alphanumeric characters and
4353   // underscores.  Records an error otherwise.
4354   void ValidateSymbolName(absl::string_view name, absl::string_view full_name,
4355                           const Message& proto);
4356 
4357   // Allocates a copy of orig_options in tables_ and stores it in the
4358   // descriptor. Remembers its uninterpreted options, to be interpreted
4359   // later. DescriptorT must be one of the Descriptor messages from
4360   // descriptor.proto.
4361   template <class DescriptorT>
4362   void AllocateOptions(const typename DescriptorT::Proto& proto,
4363                        DescriptorT* descriptor, int options_field_tag,
4364                        absl::string_view option_name,
4365                        internal::FlatAllocator& alloc);
4366   // Specialization for FileOptions.
4367   void AllocateOptions(const FileDescriptorProto& proto,
4368                        FileDescriptor* descriptor,
4369                        internal::FlatAllocator& alloc);
4370 
4371   // Implementation for AllocateOptions(). Don't call this directly.
4372   template <class DescriptorT>
4373   const typename DescriptorT::OptionsType* AllocateOptionsImpl(
4374       absl::string_view name_scope, absl::string_view element_name,
4375       const typename DescriptorT::Proto& proto,
4376       absl::Span<const int> options_path, absl::string_view option_name,
4377       internal::FlatAllocator& alloc);
4378 
4379   // Allocates and resolves any feature sets that need to be owned by a given
4380   // descriptor. This also strips features out of the mutable options message to
4381   // prevent leaking of unresolved features.
4382   // Note: This must be used during a pre-order traversal of the
4383   // descriptor tree, so that each descriptor's parent has a fully resolved
4384   // feature set already.
4385   template <class DescriptorT>
4386   void ResolveFeatures(const typename DescriptorT::Proto& proto,
4387                        DescriptorT* descriptor,
4388                        typename DescriptorT::OptionsType* options,
4389                        internal::FlatAllocator& alloc);
4390   void ResolveFeatures(const FileDescriptorProto& proto,
4391                        FileDescriptor* descriptor, FileOptions* options,
4392                        internal::FlatAllocator& alloc);
4393   template <class DescriptorT>
4394   void ResolveFeaturesImpl(
4395       Edition edition, const typename DescriptorT::Proto& proto,
4396       DescriptorT* descriptor, typename DescriptorT::OptionsType* options,
4397       internal::FlatAllocator& alloc,
4398       DescriptorPool::ErrorCollector::ErrorLocation error_location,
4399       bool force_merge = false);
4400 
4401   void PostProcessFieldFeatures(FieldDescriptor& field,
4402                                 const FieldDescriptorProto& proto);
4403 
4404   // Allocates an array of two strings, the first one is a copy of
4405   // `proto_name`, and the second one is the full name. Full proto name is
4406   // "scope.proto_name" if scope is non-empty and "proto_name" otherwise.
4407   const std::string* AllocateNameStrings(absl::string_view scope,
4408                                          absl::string_view proto_name,
4409                                          internal::FlatAllocator& alloc);
4410 
4411   // These methods all have the same signature for the sake of the BUILD_ARRAY
4412   // macro, below.
4413   void BuildMessage(const DescriptorProto& proto, const Descriptor* parent,
4414                     Descriptor* result, internal::FlatAllocator& alloc);
4415   void BuildFieldOrExtension(const FieldDescriptorProto& proto,
4416                              Descriptor* parent, FieldDescriptor* result,
4417                              bool is_extension, internal::FlatAllocator& alloc);
BuildField(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,internal::FlatAllocator & alloc)4418   void BuildField(const FieldDescriptorProto& proto, Descriptor* parent,
4419                   FieldDescriptor* result, internal::FlatAllocator& alloc) {
4420     BuildFieldOrExtension(proto, parent, result, false, alloc);
4421   }
BuildExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,internal::FlatAllocator & alloc)4422   void BuildExtension(const FieldDescriptorProto& proto, Descriptor* parent,
4423                       FieldDescriptor* result, internal::FlatAllocator& alloc) {
4424     BuildFieldOrExtension(proto, parent, result, true, alloc);
4425   }
4426   void BuildExtensionRange(const DescriptorProto::ExtensionRange& proto,
4427                            const Descriptor* parent,
4428                            Descriptor::ExtensionRange* result,
4429                            internal::FlatAllocator& alloc);
4430   void BuildReservedRange(const DescriptorProto::ReservedRange& proto,
4431                           const Descriptor* parent,
4432                           Descriptor::ReservedRange* result,
4433                           internal::FlatAllocator& alloc);
4434   void BuildReservedRange(const EnumDescriptorProto::EnumReservedRange& proto,
4435                           const EnumDescriptor* parent,
4436                           EnumDescriptor::ReservedRange* result,
4437                           internal::FlatAllocator& alloc);
4438   void BuildOneof(const OneofDescriptorProto& proto, Descriptor* parent,
4439                   OneofDescriptor* result, internal::FlatAllocator& alloc);
4440   void BuildEnum(const EnumDescriptorProto& proto, const Descriptor* parent,
4441                  EnumDescriptor* result, internal::FlatAllocator& alloc);
4442   void BuildEnumValue(const EnumValueDescriptorProto& proto,
4443                       const EnumDescriptor* parent, EnumValueDescriptor* result,
4444                       internal::FlatAllocator& alloc);
4445   void BuildService(const ServiceDescriptorProto& proto, const void* dummy,
4446                     ServiceDescriptor* result, internal::FlatAllocator& alloc);
4447   void BuildMethod(const MethodDescriptorProto& proto,
4448                    const ServiceDescriptor* parent, MethodDescriptor* result,
4449                    internal::FlatAllocator& alloc);
4450 
4451   void CheckFieldJsonNameUniqueness(const DescriptorProto& proto,
4452                                     const Descriptor* result);
4453   void CheckFieldJsonNameUniqueness(absl::string_view message_name,
4454                                     const DescriptorProto& message,
4455                                     const Descriptor* descriptor,
4456                                     bool use_custom_names);
4457   void CheckEnumValueUniqueness(const EnumDescriptorProto& proto,
4458                                 const EnumDescriptor* result);
4459 
4460   void LogUnusedDependency(const FileDescriptorProto& proto,
4461                            const FileDescriptor* result);
4462 
4463   // Must be run only after building.
4464   //
4465   // NOTE: Options will not be available during cross-linking, as they
4466   // have not yet been interpreted. Defer any handling of options to the
4467   // Validate*Options methods.
4468   void CrossLinkFile(FileDescriptor* file, const FileDescriptorProto& proto);
4469   void CrossLinkMessage(Descriptor* message, const DescriptorProto& proto);
4470   void CrossLinkField(FieldDescriptor* field,
4471                       const FieldDescriptorProto& proto);
4472   void CrossLinkService(ServiceDescriptor* service,
4473                         const ServiceDescriptorProto& proto);
4474   void CrossLinkMethod(MethodDescriptor* method,
4475                        const MethodDescriptorProto& proto);
4476   void SuggestFieldNumbers(FileDescriptor* file,
4477                            const FileDescriptorProto& proto);
4478 
4479   // Checks that the extension field matches what is declared.
4480   void CheckExtensionDeclaration(const FieldDescriptor& field,
4481                                  const FieldDescriptorProto& proto,
4482                                  absl::string_view declared_full_name,
4483                                  absl::string_view declared_type_name,
4484                                  bool is_repeated);
4485   // Checks that the extension field type matches the declared type. It also
4486   // handles message types that look like non-message types such as "fixed64" vs
4487   // ".fixed64".
4488   void CheckExtensionDeclarationFieldType(const FieldDescriptor& field,
4489                                           const FieldDescriptorProto& proto,
4490                                           absl::string_view type);
4491 
4492   // A helper class for interpreting options.
4493   class OptionInterpreter {
4494    public:
4495     // Creates an interpreter that operates in the context of the pool of the
4496     // specified builder, which must not be nullptr. We don't take ownership of
4497     // the builder.
4498     explicit OptionInterpreter(DescriptorBuilder* builder);
4499     OptionInterpreter(const OptionInterpreter&) = delete;
4500     OptionInterpreter& operator=(const OptionInterpreter&) = delete;
4501 
4502     ~OptionInterpreter();
4503 
4504     // Interprets the uninterpreted options in the specified Options message.
4505     // On error, calls AddError() on the underlying builder and returns false.
4506     // Otherwise returns true.
4507     bool InterpretOptionExtensions(OptionsToInterpret* options_to_interpret);
4508 
4509     // Interprets the uninterpreted feature options in the specified Options
4510     // message. On error, calls AddError() on the underlying builder and returns
4511     // false. Otherwise returns true.
4512     bool InterpretNonExtensionOptions(OptionsToInterpret* options_to_interpret);
4513 
4514     // Updates the given source code info by re-writing uninterpreted option
4515     // locations to refer to the corresponding interpreted option.
4516     void UpdateSourceCodeInfo(SourceCodeInfo* info);
4517 
4518     class AggregateOptionFinder;
4519 
4520    private:
4521     bool InterpretOptionsImpl(OptionsToInterpret* options_to_interpret,
4522                               bool skip_extensions);
4523 
4524     // Interprets uninterpreted_option_ on the specified message, which
4525     // must be the mutable copy of the original options message to which
4526     // uninterpreted_option_ belongs. The given src_path is the source
4527     // location path to the uninterpreted option, and options_path is the
4528     // source location path to the options message. The location paths are
4529     // recorded and then used in UpdateSourceCodeInfo.
4530     // The features boolean controls whether or not we should only interpret
4531     // feature options or skip them entirely.
4532     bool InterpretSingleOption(Message* options,
4533                                const std::vector<int>& src_path,
4534                                const std::vector<int>& options_path,
4535                                bool skip_extensions);
4536 
4537     // Adds the uninterpreted_option to the given options message verbatim.
4538     // Used when AllowUnknownDependencies() is in effect and we can't find
4539     // the option's definition.
4540     void AddWithoutInterpreting(const UninterpretedOption& uninterpreted_option,
4541                                 Message* options);
4542 
4543     // A recursive helper function that drills into the intermediate fields
4544     // in unknown_fields to check if field innermost_field is set on the
4545     // innermost message. Returns false and sets an error if so.
4546     bool ExamineIfOptionIsSet(
4547         std::vector<const FieldDescriptor*>::const_iterator
4548             intermediate_fields_iter,
4549         std::vector<const FieldDescriptor*>::const_iterator
4550             intermediate_fields_end,
4551         const FieldDescriptor* innermost_field,
4552         const std::string& debug_msg_name,
4553         const UnknownFieldSet& unknown_fields);
4554 
4555     // Validates the value for the option field of the currently interpreted
4556     // option and then sets it on the unknown_field.
4557     bool SetOptionValue(const FieldDescriptor* option_field,
4558                         UnknownFieldSet* unknown_fields);
4559 
4560     // Parses an aggregate value for a CPPTYPE_MESSAGE option and
4561     // saves it into *unknown_fields.
4562     bool SetAggregateOption(const FieldDescriptor* option_field,
4563                             UnknownFieldSet* unknown_fields);
4564 
4565     // Convenience functions to set an int field the right way, depending on
4566     // its wire type (a single int CppType can represent multiple wire types).
4567     void SetInt32(int number, int32_t value, FieldDescriptor::Type type,
4568                   UnknownFieldSet* unknown_fields);
4569     void SetInt64(int number, int64_t value, FieldDescriptor::Type type,
4570                   UnknownFieldSet* unknown_fields);
4571     void SetUInt32(int number, uint32_t value, FieldDescriptor::Type type,
4572                    UnknownFieldSet* unknown_fields);
4573     void SetUInt64(int number, uint64_t value, FieldDescriptor::Type type,
4574                    UnknownFieldSet* unknown_fields);
4575 
4576     // A helper function that adds an error at the specified location of the
4577     // option we're currently interpreting, and returns false.
AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,absl::FunctionRef<std::string ()> make_error)4578     bool AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,
4579                         absl::FunctionRef<std::string()> make_error) {
4580       builder_->AddError(options_to_interpret_->element_name,
4581                          *uninterpreted_option_, location, make_error);
4582       return false;
4583     }
4584 
4585     // A helper function that adds an error at the location of the option name
4586     // and returns false.
AddNameError(absl::FunctionRef<std::string ()> make_error)4587     bool AddNameError(absl::FunctionRef<std::string()> make_error) {
4588 #ifdef PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4589       return true;
4590 #else   // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4591       return AddOptionError(DescriptorPool::ErrorCollector::OPTION_NAME,
4592                             make_error);
4593 #endif  // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4594     }
4595 
4596     // A helper function that adds an error at the location of the option name
4597     // and returns false.
AddValueError(absl::FunctionRef<std::string ()> make_error)4598     bool AddValueError(absl::FunctionRef<std::string()> make_error) {
4599       return AddOptionError(DescriptorPool::ErrorCollector::OPTION_VALUE,
4600                             make_error);
4601     }
4602 
4603     // We interpret against this builder's pool. Is never nullptr. We don't own
4604     // this pointer.
4605     DescriptorBuilder* builder_;
4606 
4607     // The options we're currently interpreting, or nullptr if we're not in a
4608     // call to InterpretOptions.
4609     const OptionsToInterpret* options_to_interpret_;
4610 
4611     // The option we're currently interpreting within options_to_interpret_, or
4612     // nullptr if we're not in a call to InterpretOptions(). This points to a
4613     // submessage of the original option, not the mutable copy. Therefore we
4614     // can use it to find locations recorded by the parser.
4615     const UninterpretedOption* uninterpreted_option_;
4616 
4617     // This maps the element path of uninterpreted options to the element path
4618     // of the resulting interpreted option. This is used to modify a file's
4619     // source code info to account for option interpretation.
4620     absl::flat_hash_map<std::vector<int>, std::vector<int>> interpreted_paths_;
4621 
4622     // This maps the path to a repeated option field to the known number of
4623     // elements the field contains. This is used to track the compute the
4624     // index portion of the element path when interpreting a single option.
4625     absl::flat_hash_map<std::vector<int>, int> repeated_option_counts_;
4626 
4627     // Factory used to create the dynamic messages we need to parse
4628     // any aggregate option values we encounter.
4629     DynamicMessageFactory dynamic_factory_;
4630   };
4631 
4632   // Work-around for broken compilers:  According to the C++ standard,
4633   // OptionInterpreter should have access to the private members of any class
4634   // which has declared DescriptorBuilder as a friend.  Unfortunately some old
4635   // versions of GCC and other compilers do not implement this correctly.  So,
4636   // we have to have these intermediate methods to provide access.  We also
4637   // redundantly declare OptionInterpreter a friend just to make things extra
4638   // clear for these bad compilers.
4639   friend class OptionInterpreter;
4640   friend class OptionInterpreter::AggregateOptionFinder;
4641 
get_allow_unknown(const DescriptorPool * pool)4642   static inline bool get_allow_unknown(const DescriptorPool* pool) {
4643     return pool->allow_unknown_;
4644   }
get_enforce_weak(const DescriptorPool * pool)4645   static inline bool get_enforce_weak(const DescriptorPool* pool) {
4646     return pool->enforce_weak_;
4647   }
get_is_placeholder(const Descriptor * descriptor)4648   static inline bool get_is_placeholder(const Descriptor* descriptor) {
4649     return descriptor != nullptr && descriptor->is_placeholder_;
4650   }
assert_mutex_held(const DescriptorPool * pool)4651   static inline void assert_mutex_held(const DescriptorPool* pool) {
4652     if (pool->mutex_ != nullptr) {
4653       pool->mutex_->AssertHeld();
4654     }
4655   }
4656 
4657   // Must be run only after options have been interpreted.
4658   //
4659   // NOTE: Validation code must only reference the options in the mutable
4660   // descriptors, which are the ones that have been interpreted. The const
4661   // proto references are passed in only so they can be provided to calls to
4662   // AddError(). Do not look at their options, which have not been interpreted.
4663   void ValidateOptions(const FileDescriptor* file,
4664                        const FileDescriptorProto& proto);
4665   void ValidateFileFeatures(const FileDescriptor* file,
4666                             const FileDescriptorProto& proto);
4667   void ValidateOptions(const Descriptor* message, const DescriptorProto& proto);
4668   void ValidateOptions(const OneofDescriptor* oneof,
4669                        const OneofDescriptorProto& proto);
4670   void ValidateOptions(const FieldDescriptor* field,
4671                        const FieldDescriptorProto& proto);
4672   void ValidateFieldFeatures(const FieldDescriptor* field,
4673                              const FieldDescriptorProto& proto);
4674   void ValidateOptions(const EnumDescriptor* enm,
4675                        const EnumDescriptorProto& proto);
4676   void ValidateOptions(const EnumValueDescriptor* enum_value,
4677                        const EnumValueDescriptorProto& proto);
ValidateOptions(const Descriptor::ExtensionRange * range,const DescriptorProto::ExtensionRange & proto)4678   void ValidateOptions(const Descriptor::ExtensionRange* range,
4679                        const DescriptorProto::ExtensionRange& proto) {}
4680   void ValidateExtensionRangeOptions(const DescriptorProto& proto,
4681                                      const Descriptor& message);
4682   void ValidateExtensionDeclaration(
4683       absl::string_view full_name,
4684       const RepeatedPtrField<ExtensionRangeOptions_Declaration>& declarations,
4685       const DescriptorProto_ExtensionRange& proto,
4686       absl::flat_hash_set<absl::string_view>& full_name_set);
4687   void ValidateOptions(const ServiceDescriptor* service,
4688                        const ServiceDescriptorProto& proto);
4689   void ValidateOptions(const MethodDescriptor* method,
4690                        const MethodDescriptorProto& proto);
4691   void ValidateProto3(const FileDescriptor* file,
4692                       const FileDescriptorProto& proto);
4693   void ValidateProto3Message(const Descriptor* message,
4694                              const DescriptorProto& proto);
4695   void ValidateProto3Field(const FieldDescriptor* field,
4696                            const FieldDescriptorProto& proto);
4697 
4698   // Returns true if the map entry message is compatible with the
4699   // auto-generated entry message from map fields syntax.
4700   bool ValidateMapEntry(const FieldDescriptor* field,
4701                         const FieldDescriptorProto& proto);
4702 
4703   // Recursively detects naming conflicts with map entry types for a
4704   // better error message.
4705   void DetectMapConflicts(const Descriptor* message,
4706                           const DescriptorProto& proto);
4707 
4708   void ValidateJSType(const FieldDescriptor* field,
4709                       const FieldDescriptorProto& proto);
4710 };
4711 
BuildFile(const FileDescriptorProto & proto)4712 const FileDescriptor* DescriptorPool::BuildFile(
4713     const FileDescriptorProto& proto) {
4714   return BuildFileCollectingErrors(proto, nullptr);
4715 }
4716 
BuildFileCollectingErrors(const FileDescriptorProto & proto,ErrorCollector * error_collector)4717 const FileDescriptor* DescriptorPool::BuildFileCollectingErrors(
4718     const FileDescriptorProto& proto, ErrorCollector* error_collector) {
4719   ABSL_CHECK(fallback_database_ == nullptr)
4720       << "Cannot call BuildFile on a DescriptorPool that uses a "
4721          "DescriptorDatabase.  You must instead find a way to get your file "
4722          "into the underlying database.";
4723   ABSL_CHECK(mutex_ == nullptr);  // Implied by the above ABSL_CHECK.
4724   tables_->known_bad_symbols_.clear();
4725   tables_->known_bad_files_.clear();
4726   build_started_ = true;
4727   DeferredValidation deferred_validation(this, error_collector);
4728   const FileDescriptor* file =
4729       DescriptorBuilder::New(this, tables_.get(), deferred_validation,
4730                              error_collector)
4731           ->BuildFile(proto);
4732   if (deferred_validation.Validate()) {
4733     return file;
4734   }
4735   return nullptr;
4736 }
4737 
BuildFileFromDatabase(const FileDescriptorProto & proto,DeferredValidation & deferred_validation) const4738 const FileDescriptor* DescriptorPool::BuildFileFromDatabase(
4739     const FileDescriptorProto& proto,
4740     DeferredValidation& deferred_validation) const {
4741   mutex_->AssertHeld();
4742   build_started_ = true;
4743   if (tables_->known_bad_files_.contains(proto.name())) {
4744     return nullptr;
4745   }
4746   const FileDescriptor* result;
4747   const auto build_file = [&] {
4748     result = DescriptorBuilder::New(this, tables_.get(), deferred_validation,
4749                                     default_error_collector_)
4750                  ->BuildFile(proto);
4751   };
4752   if (dispatcher_ != nullptr) {
4753     (*dispatcher_)(build_file);
4754   } else {
4755     build_file();
4756   }
4757   if (result == nullptr) {
4758     tables_->known_bad_files_.insert(proto.name());
4759   }
4760   return result;
4761 }
4762 
SetFeatureSetDefaults(FeatureSetDefaults spec)4763 absl::Status DescriptorPool::SetFeatureSetDefaults(FeatureSetDefaults spec) {
4764   if (build_started_) {
4765     return absl::FailedPreconditionError(
4766         "Feature set defaults can't be changed once the pool has started "
4767         "building.");
4768   }
4769   if (spec.minimum_edition() > spec.maximum_edition()) {
4770     return absl::InvalidArgumentError(
4771         absl::StrCat("Invalid edition range ", spec.minimum_edition(), " to ",
4772                      spec.maximum_edition(), "."));
4773   }
4774   Edition prev_edition = EDITION_UNKNOWN;
4775   for (const auto& edition_default : spec.defaults()) {
4776     if (edition_default.edition() == EDITION_UNKNOWN) {
4777       return absl::InvalidArgumentError(absl::StrCat(
4778           "Invalid edition ", edition_default.edition(), " specified."));
4779     }
4780     if (edition_default.edition() <= prev_edition) {
4781       return absl::InvalidArgumentError(absl::StrCat(
4782           "Feature set defaults are not strictly increasing.  Edition ",
4783           prev_edition, " is greater than or equal to edition ",
4784           edition_default.edition(), "."));
4785     }
4786     prev_edition = edition_default.edition();
4787   }
4788   feature_set_defaults_spec_ =
4789       absl::make_unique<FeatureSetDefaults>(std::move(spec));
4790   return absl::OkStatus();
4791 }
4792 
DescriptorBuilder(const DescriptorPool * pool,DescriptorPool::Tables * tables,DescriptorPool::DeferredValidation & deferred_validation,DescriptorPool::ErrorCollector * error_collector)4793 DescriptorBuilder::DescriptorBuilder(
4794     const DescriptorPool* pool, DescriptorPool::Tables* tables,
4795     DescriptorPool::DeferredValidation& deferred_validation,
4796     DescriptorPool::ErrorCollector* error_collector)
4797     : pool_(pool),
4798       tables_(tables),
4799       deferred_validation_(deferred_validation),
4800       error_collector_(error_collector),
4801       had_errors_(false),
4802       possible_undeclared_dependency_(nullptr),
4803       undefine_resolved_name_("") {
4804   // Ensure that any lazily loaded static initializers from the generated pool
4805   // (e.g. from bootstrapped protos) are run before building any descriptors. We
4806   // have to avoid registering these pre-main, because we need to ensure that
4807   // the linker --gc-sections step can strip out the full runtime if it is
4808   // unused.
4809   PROTOBUF_UNUSED static std::true_type lazy_register =
4810       (internal::ExtensionSet::RegisterMessageExtension(
4811            &FeatureSet::default_instance(), pb::cpp.number(),
4812            FieldDescriptor::TYPE_MESSAGE, false, false,
4813            &pb::CppFeatures::default_instance(),
4814            nullptr,
4815            internal::LazyAnnotation::kUndefined),
4816        std::true_type{});
4817 }
4818 
4819 DescriptorBuilder::~DescriptorBuilder() = default;
4820 
AddError(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,absl::FunctionRef<std::string ()> make_error)4821 PROTOBUF_NOINLINE void DescriptorBuilder::AddError(
4822     const absl::string_view element_name, const Message& descriptor,
4823     DescriptorPool::ErrorCollector::ErrorLocation location,
4824     absl::FunctionRef<std::string()> make_error) {
4825   std::string error = make_error();
4826   if (error_collector_ == nullptr) {
4827     if (!had_errors_) {
4828       ABSL_LOG(ERROR) << "Invalid proto descriptor for file \"" << filename_
4829                       << "\":";
4830     }
4831     ABSL_LOG(ERROR) << "  " << element_name << ": " << error;
4832   } else {
4833     error_collector_->RecordError(filename_, element_name, &descriptor,
4834                                   location, error);
4835   }
4836   had_errors_ = true;
4837 }
4838 
AddError(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const char * error)4839 PROTOBUF_NOINLINE void DescriptorBuilder::AddError(
4840     const absl::string_view element_name, const Message& descriptor,
4841     DescriptorPool::ErrorCollector::ErrorLocation location, const char* error) {
4842   AddError(element_name, descriptor, location, [error] { return error; });
4843 }
4844 
AddNotDefinedError(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const absl::string_view undefined_symbol)4845 PROTOBUF_NOINLINE void DescriptorBuilder::AddNotDefinedError(
4846     const absl::string_view element_name, const Message& descriptor,
4847     DescriptorPool::ErrorCollector::ErrorLocation location,
4848     const absl::string_view undefined_symbol) {
4849   if (possible_undeclared_dependency_ == nullptr &&
4850       undefine_resolved_name_.empty()) {
4851     AddError(element_name, descriptor, location, [&] {
4852       return absl::StrCat("\"", undefined_symbol, "\" is not defined.");
4853     });
4854   } else {
4855     if (possible_undeclared_dependency_ != nullptr) {
4856       AddError(element_name, descriptor, location, [&] {
4857         return absl::StrCat("\"", possible_undeclared_dependency_name_,
4858                             "\" seems to be defined in \"",
4859                             possible_undeclared_dependency_->name(),
4860                             "\", which is not "
4861                             "imported by \"",
4862                             filename_,
4863                             "\".  To use it here, please "
4864                             "add the necessary import.");
4865       });
4866     }
4867     if (!undefine_resolved_name_.empty()) {
4868       AddError(element_name, descriptor, location, [&] {
4869         return absl::StrCat(
4870             "\"", undefined_symbol, "\" is resolved to \"",
4871             undefine_resolved_name_,
4872             "\", which is not defined. "
4873             "The innermost scope is searched first in name resolution. "
4874             "Consider using a leading '.'(i.e., \".",
4875             undefined_symbol, "\") to start from the outermost scope.");
4876       });
4877     }
4878   }
4879 }
4880 
AddWarning(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,absl::FunctionRef<std::string ()> make_error)4881 PROTOBUF_NOINLINE void DescriptorBuilder::AddWarning(
4882     const absl::string_view element_name, const Message& descriptor,
4883     DescriptorPool::ErrorCollector::ErrorLocation location,
4884     absl::FunctionRef<std::string()> make_error) {
4885   std::string error = make_error();
4886   if (error_collector_ == nullptr) {
4887     ABSL_LOG(WARNING) << filename_ << " " << element_name << ": " << error;
4888   } else {
4889     error_collector_->RecordWarning(filename_, element_name, &descriptor,
4890                                     location, error);
4891   }
4892 }
4893 
AddWarning(const absl::string_view element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const char * error)4894 PROTOBUF_NOINLINE void DescriptorBuilder::AddWarning(
4895     const absl::string_view element_name, const Message& descriptor,
4896     DescriptorPool::ErrorCollector::ErrorLocation location, const char* error) {
4897   AddWarning(element_name, descriptor, location,
4898              [error]() -> std::string { return error; });
4899 }
4900 
IsInPackage(const FileDescriptor * file,absl::string_view package_name)4901 bool DescriptorBuilder::IsInPackage(const FileDescriptor* file,
4902                                     absl::string_view package_name) {
4903   return absl::StartsWith(file->package(), package_name) &&
4904          (file->package().size() == package_name.size() ||
4905           file->package()[package_name.size()] == '.');
4906 }
4907 
RecordPublicDependencies(const FileDescriptor * file)4908 void DescriptorBuilder::RecordPublicDependencies(const FileDescriptor* file) {
4909   if (file == nullptr || !dependencies_.insert(file).second) return;
4910   for (int i = 0; file != nullptr && i < file->public_dependency_count(); i++) {
4911     RecordPublicDependencies(file->public_dependency(i));
4912   }
4913 }
4914 
FindSymbolNotEnforcingDepsHelper(const DescriptorPool * pool,const absl::string_view name,bool build_it)4915 Symbol DescriptorBuilder::FindSymbolNotEnforcingDepsHelper(
4916     const DescriptorPool* pool, const absl::string_view name, bool build_it) {
4917   // If we are looking at an underlay, we must lock its mutex_, since we are
4918   // accessing the underlay's tables_ directly.
4919   absl::MutexLockMaybe lock((pool == pool_) ? nullptr : pool->mutex_);
4920 
4921   Symbol result = pool->tables_->FindSymbol(name);
4922   if (result.IsNull() && pool->underlay_ != nullptr) {
4923     // Symbol not found; check the underlay.
4924     result = FindSymbolNotEnforcingDepsHelper(pool->underlay_, name);
4925   }
4926 
4927   if (result.IsNull()) {
4928     // With lazily_build_dependencies_, a symbol lookup at cross link time is
4929     // not guaranteed to be successful. In most cases, build_it will be false,
4930     // which intentionally prevents us from building an import until it's
4931     // actually needed. In some cases, like registering an extension, we want
4932     // to build the file containing the symbol, and build_it will be set.
4933     // Also, build_it will be true when !lazily_build_dependencies_, to provide
4934     // better error reporting of missing dependencies.
4935     if (build_it &&
4936         pool->TryFindSymbolInFallbackDatabase(name, deferred_validation_)) {
4937       result = pool->tables_->FindSymbol(name);
4938     }
4939   }
4940 
4941   return result;
4942 }
4943 
FindSymbolNotEnforcingDeps(const absl::string_view name,bool build_it)4944 Symbol DescriptorBuilder::FindSymbolNotEnforcingDeps(
4945     const absl::string_view name, bool build_it) {
4946   Symbol result = FindSymbolNotEnforcingDepsHelper(pool_, name, build_it);
4947   // Only find symbols which were defined in this file or one of its
4948   // dependencies.
4949   const FileDescriptor* file = result.GetFile();
4950   if ((file == file_ || dependencies_.contains(file)) && !result.IsPackage()) {
4951     unused_dependency_.erase(file);
4952   }
4953   return result;
4954 }
4955 
FindSymbol(const absl::string_view name,bool build_it)4956 Symbol DescriptorBuilder::FindSymbol(const absl::string_view name,
4957                                      bool build_it) {
4958   Symbol result = FindSymbolNotEnforcingDeps(name, build_it);
4959 
4960   if (result.IsNull()) return result;
4961 
4962   if (!pool_->enforce_dependencies_) {
4963     // Hack for CompilerUpgrader, and also used for lazily_build_dependencies_
4964     return result;
4965   }
4966 
4967   // Only find symbols which were defined in this file or one of its
4968   // dependencies.
4969   const FileDescriptor* file = result.GetFile();
4970   if (file == file_ || dependencies_.contains(file)) {
4971     return result;
4972   }
4973 
4974   if (result.IsPackage()) {
4975     // Arg, this is overcomplicated.  The symbol is a package name.  It could
4976     // be that the package was defined in multiple files.  result.GetFile()
4977     // returns the first file we saw that used this package.  We've determined
4978     // that that file is not a direct dependency of the file we are currently
4979     // building, but it could be that some other file which *is* a direct
4980     // dependency also defines the same package.  We can't really rule out this
4981     // symbol unless none of the dependencies define it.
4982     if (IsInPackage(file_, name)) return result;
4983     for (const auto* dep : dependencies_) {
4984       // Note:  A dependency may be nullptr if it was not found or had errors.
4985       if (dep != nullptr && IsInPackage(dep, name)) return result;
4986     }
4987   }
4988 
4989   possible_undeclared_dependency_ = file;
4990   possible_undeclared_dependency_name_ = std::string(name);
4991   return Symbol();
4992 }
4993 
LookupSymbolNoPlaceholder(const absl::string_view name,const absl::string_view relative_to,ResolveMode resolve_mode,bool build_it)4994 Symbol DescriptorBuilder::LookupSymbolNoPlaceholder(
4995     const absl::string_view name, const absl::string_view relative_to,
4996     ResolveMode resolve_mode, bool build_it) {
4997   possible_undeclared_dependency_ = nullptr;
4998   undefine_resolved_name_.clear();
4999 
5000   if (!name.empty() && name[0] == '.') {
5001     // Fully-qualified name.
5002     return FindSymbol(name.substr(1), build_it);
5003   }
5004 
5005   // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
5006   // defined in multiple parent scopes, we only want to find "Bar.baz" in the
5007   // innermost one.  E.g., the following should produce an error:
5008   //   message Bar { message Baz {} }
5009   //   message Foo {
5010   //     message Bar {
5011   //     }
5012   //     optional Bar.Baz baz = 1;
5013   //   }
5014   // So, we look for just "Foo" first, then look for "Bar.baz" within it if
5015   // found.
5016   std::string::size_type name_dot_pos = name.find_first_of('.');
5017   absl::string_view first_part_of_name;
5018   if (name_dot_pos == std::string::npos) {
5019     first_part_of_name = name;
5020   } else {
5021     first_part_of_name = name.substr(0, name_dot_pos);
5022   }
5023 
5024   std::string scope_to_try(relative_to);
5025 
5026   while (true) {
5027     // Chop off the last component of the scope.
5028     std::string::size_type dot_pos = scope_to_try.find_last_of('.');
5029     if (dot_pos == std::string::npos) {
5030       return FindSymbol(name, build_it);
5031     } else {
5032       scope_to_try.erase(dot_pos);
5033     }
5034 
5035     // Append ".first_part_of_name" and try to find.
5036     std::string::size_type old_size = scope_to_try.size();
5037     absl::StrAppend(&scope_to_try, ".", first_part_of_name);
5038     Symbol result = FindSymbol(scope_to_try, build_it);
5039     if (!result.IsNull()) {
5040       if (first_part_of_name.size() < name.size()) {
5041         // name is a compound symbol, of which we only found the first part.
5042         // Now try to look up the rest of it.
5043         if (result.IsAggregate()) {
5044           absl::StrAppend(&scope_to_try,
5045                           name.substr(first_part_of_name.size()));
5046           result = FindSymbol(scope_to_try, build_it);
5047           if (result.IsNull()) {
5048             undefine_resolved_name_ = scope_to_try;
5049           }
5050           return result;
5051         } else {
5052           // We found a symbol but it's not an aggregate.  Continue the loop.
5053         }
5054       } else {
5055         if (resolve_mode == LOOKUP_TYPES && !result.IsType()) {
5056           // We found a symbol but it's not a type.  Continue the loop.
5057         } else {
5058           return result;
5059         }
5060       }
5061     }
5062 
5063     // Not found.  Remove the name so we can try again.
5064     scope_to_try.erase(old_size);
5065   }
5066 }
5067 
LookupSymbol(const absl::string_view name,const absl::string_view relative_to,DescriptorPool::PlaceholderType placeholder_type,ResolveMode resolve_mode,bool build_it)5068 Symbol DescriptorBuilder::LookupSymbol(
5069     const absl::string_view name, const absl::string_view relative_to,
5070     DescriptorPool::PlaceholderType placeholder_type, ResolveMode resolve_mode,
5071     bool build_it) {
5072   Symbol result =
5073       LookupSymbolNoPlaceholder(name, relative_to, resolve_mode, build_it);
5074   if (result.IsNull() && pool_->allow_unknown_) {
5075     // Not found, but AllowUnknownDependencies() is enabled.  Return a
5076     // placeholder instead.
5077     result = pool_->NewPlaceholderWithMutexHeld(name, placeholder_type);
5078   }
5079   return result;
5080 }
5081 
ValidateQualifiedName(absl::string_view name)5082 static bool ValidateQualifiedName(absl::string_view name) {
5083   bool last_was_period = false;
5084 
5085   for (char character : name) {
5086     // I don't trust isalnum() due to locales.  :(
5087     if (('a' <= character && character <= 'z') ||
5088         ('A' <= character && character <= 'Z') ||
5089         ('0' <= character && character <= '9') || (character == '_')) {
5090       last_was_period = false;
5091     } else if (character == '.') {
5092       if (last_was_period) return false;
5093       last_was_period = true;
5094     } else {
5095       return false;
5096     }
5097   }
5098 
5099   return !name.empty() && !last_was_period;
5100 }
5101 
NewPlaceholder(absl::string_view name,PlaceholderType placeholder_type) const5102 Symbol DescriptorPool::NewPlaceholder(absl::string_view name,
5103                                       PlaceholderType placeholder_type) const {
5104   absl::MutexLockMaybe lock(mutex_);
5105   return NewPlaceholderWithMutexHeld(name, placeholder_type);
5106 }
5107 
NewPlaceholderWithMutexHeld(absl::string_view name,PlaceholderType placeholder_type) const5108 Symbol DescriptorPool::NewPlaceholderWithMutexHeld(
5109     absl::string_view name, PlaceholderType placeholder_type) const {
5110   if (mutex_) {
5111     mutex_->AssertHeld();
5112   }
5113   // Compute names.
5114   absl::string_view placeholder_full_name;
5115   absl::string_view placeholder_name;
5116   const std::string* placeholder_package;
5117 
5118   if (!ValidateQualifiedName(name)) return Symbol();
5119   if (name[0] == '.') {
5120     // Fully-qualified.
5121     placeholder_full_name = name.substr(1);
5122   } else {
5123     placeholder_full_name = name;
5124   }
5125 
5126   // Create the placeholders.
5127   internal::FlatAllocator alloc;
5128   alloc.PlanArray<FileDescriptor>(1);
5129   alloc.PlanArray<std::string>(2);
5130   if (placeholder_type == PLACEHOLDER_ENUM) {
5131     alloc.PlanArray<EnumDescriptor>(1);
5132     alloc.PlanArray<EnumValueDescriptor>(1);
5133     alloc.PlanArray<std::string>(2);  // names for the descriptor.
5134     alloc.PlanArray<std::string>(2);  // names for the value.
5135   } else {
5136     alloc.PlanArray<Descriptor>(1);
5137     alloc.PlanArray<std::string>(2);  // names for the descriptor.
5138     if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
5139       alloc.PlanArray<Descriptor::ExtensionRange>(1);
5140     }
5141   }
5142   alloc.FinalizePlanning(tables_);
5143 
5144   const std::string::size_type dotpos = placeholder_full_name.find_last_of('.');
5145   if (dotpos != std::string::npos) {
5146     placeholder_package =
5147         alloc.AllocateStrings(placeholder_full_name.substr(0, dotpos));
5148     placeholder_name = placeholder_full_name.substr(dotpos + 1);
5149   } else {
5150     placeholder_package = alloc.AllocateStrings("");
5151     placeholder_name = placeholder_full_name;
5152   }
5153 
5154   FileDescriptor* placeholder_file = NewPlaceholderFileWithMutexHeld(
5155       absl::StrCat(placeholder_full_name, ".placeholder.proto"), alloc);
5156   placeholder_file->package_ = placeholder_package;
5157 
5158   if (placeholder_type == PLACEHOLDER_ENUM) {
5159     placeholder_file->enum_type_count_ = 1;
5160     placeholder_file->enum_types_ = alloc.AllocateArray<EnumDescriptor>(1);
5161 
5162     EnumDescriptor* placeholder_enum = &placeholder_file->enum_types_[0];
5163     memset(static_cast<void*>(placeholder_enum), 0, sizeof(*placeholder_enum));
5164 
5165     placeholder_enum->all_names_ =
5166         alloc.AllocateStrings(placeholder_name, placeholder_full_name);
5167     placeholder_enum->file_ = placeholder_file;
5168     placeholder_enum->options_ = &EnumOptions::default_instance();
5169     placeholder_enum->proto_features_ = &FeatureSet::default_instance();
5170     placeholder_enum->merged_features_ = &FeatureSet::default_instance();
5171     placeholder_enum->is_placeholder_ = true;
5172     placeholder_enum->is_unqualified_placeholder_ = (name[0] != '.');
5173 
5174     // Enums must have at least one value.
5175     placeholder_enum->value_count_ = 1;
5176     placeholder_enum->values_ = alloc.AllocateArray<EnumValueDescriptor>(1);
5177     // Disable fast-path lookup for this enum.
5178     placeholder_enum->sequential_value_limit_ = -1;
5179 
5180     EnumValueDescriptor* placeholder_value = &placeholder_enum->values_[0];
5181     memset(static_cast<void*>(placeholder_value), 0,
5182            sizeof(*placeholder_value));
5183 
5184     // Note that enum value names are siblings of their type, not children.
5185     placeholder_value->all_names_ = alloc.AllocateStrings(
5186         "PLACEHOLDER_VALUE",
5187         placeholder_package->empty()
5188             ? "PLACEHOLDER_VALUE"
5189             : absl::StrCat(*placeholder_package, ".PLACEHOLDER_VALUE"));
5190 
5191     placeholder_value->number_ = 0;
5192     placeholder_value->type_ = placeholder_enum;
5193     placeholder_value->options_ = &EnumValueOptions::default_instance();
5194 
5195     return Symbol(placeholder_enum);
5196   } else {
5197     placeholder_file->message_type_count_ = 1;
5198     placeholder_file->message_types_ = alloc.AllocateArray<Descriptor>(1);
5199 
5200     Descriptor* placeholder_message = &placeholder_file->message_types_[0];
5201     memset(static_cast<void*>(placeholder_message), 0,
5202            sizeof(*placeholder_message));
5203 
5204     placeholder_message->all_names_ =
5205         alloc.AllocateStrings(placeholder_name, placeholder_full_name);
5206     placeholder_message->file_ = placeholder_file;
5207     placeholder_message->options_ = &MessageOptions::default_instance();
5208     placeholder_message->proto_features_ = &FeatureSet::default_instance();
5209     placeholder_message->merged_features_ = &FeatureSet::default_instance();
5210     placeholder_message->is_placeholder_ = true;
5211     placeholder_message->is_unqualified_placeholder_ = (name[0] != '.');
5212 
5213     if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
5214       placeholder_message->extension_range_count_ = 1;
5215       placeholder_message->extension_ranges_ =
5216           alloc.AllocateArray<Descriptor::ExtensionRange>(1);
5217       placeholder_message->extension_ranges_[0].start_ = 1;
5218       // kMaxNumber + 1 because ExtensionRange::end is exclusive.
5219       placeholder_message->extension_ranges_[0].end_ =
5220           FieldDescriptor::kMaxNumber + 1;
5221       placeholder_message->extension_ranges_[0].options_ = nullptr;
5222       placeholder_message->extension_ranges_[0].proto_features_ =
5223           &FeatureSet::default_instance();
5224       placeholder_message->extension_ranges_[0].merged_features_ =
5225           &FeatureSet::default_instance();
5226     }
5227 
5228     return Symbol(placeholder_message);
5229   }
5230 }
5231 
NewPlaceholderFile(const absl::string_view name) const5232 FileDescriptor* DescriptorPool::NewPlaceholderFile(
5233     const absl::string_view name) const {
5234   absl::MutexLockMaybe lock(mutex_);
5235   internal::FlatAllocator alloc;
5236   alloc.PlanArray<FileDescriptor>(1);
5237   alloc.PlanArray<std::string>(1);
5238   alloc.FinalizePlanning(tables_);
5239 
5240   return NewPlaceholderFileWithMutexHeld(name, alloc);
5241 }
5242 
NewPlaceholderFileWithMutexHeld(const absl::string_view name,internal::FlatAllocator & alloc) const5243 FileDescriptor* DescriptorPool::NewPlaceholderFileWithMutexHeld(
5244     const absl::string_view name, internal::FlatAllocator& alloc) const {
5245   if (mutex_) {
5246     mutex_->AssertHeld();
5247   }
5248   FileDescriptor* placeholder = alloc.AllocateArray<FileDescriptor>(1);
5249   memset(static_cast<void*>(placeholder), 0, sizeof(*placeholder));
5250 
5251   placeholder->name_ = alloc.AllocateStrings(name);
5252   placeholder->package_ = &internal::GetEmptyString();
5253   placeholder->pool_ = this;
5254   placeholder->options_ = &FileOptions::default_instance();
5255   placeholder->proto_features_ = &FeatureSet::default_instance();
5256   placeholder->merged_features_ = &FeatureSet::default_instance();
5257   placeholder->tables_ = &FileDescriptorTables::GetEmptyInstance();
5258   placeholder->source_code_info_ = &SourceCodeInfo::default_instance();
5259   placeholder->is_placeholder_ = true;
5260   placeholder->finished_building_ = true;
5261   // All other fields are zero or nullptr.
5262 
5263   return placeholder;
5264 }
5265 
AddSymbol(const absl::string_view full_name,const void * parent,const absl::string_view name,const Message & proto,Symbol symbol)5266 bool DescriptorBuilder::AddSymbol(const absl::string_view full_name,
5267                                   const void* parent,
5268                                   const absl::string_view name,
5269                                   const Message& proto, Symbol symbol) {
5270   // If the caller passed nullptr for the parent, the symbol is at file scope.
5271   // Use its file as the parent instead.
5272   if (parent == nullptr) parent = file_;
5273 
5274   if (absl::StrContains(full_name, '\0')) {
5275     AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5276       return absl::StrCat("\"", full_name, "\" contains null character.");
5277     });
5278     return false;
5279   }
5280   if (tables_->AddSymbol(full_name, symbol)) {
5281     if (!file_tables_->AddAliasUnderParent(parent, name, symbol)) {
5282       // This is only possible if there was already an error adding something of
5283       // the same name.
5284       if (!had_errors_) {
5285         ABSL_DLOG(FATAL) << "\"" << full_name
5286                          << "\" not previously defined in "
5287                             "symbols_by_name_, but was defined in "
5288                             "symbols_by_parent_; this shouldn't be possible.";
5289       }
5290       return false;
5291     }
5292     return true;
5293   } else {
5294     const FileDescriptor* other_file = tables_->FindSymbol(full_name).GetFile();
5295     if (other_file == file_) {
5296       std::string::size_type dot_pos = full_name.find_last_of('.');
5297       if (dot_pos == std::string::npos) {
5298         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5299           return absl::StrCat("\"", full_name, "\" is already defined.");
5300         });
5301       } else {
5302         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5303           return absl::StrCat("\"", full_name.substr(dot_pos + 1),
5304                               "\" is already defined in \"",
5305                               full_name.substr(0, dot_pos), "\".");
5306         });
5307       }
5308     } else {
5309       // Symbol seems to have been defined in a different file.
5310       AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5311         return absl::StrCat(
5312             "\"", full_name, "\" is already defined in file \"",
5313             (other_file == nullptr ? "null" : other_file->name()), "\".");
5314       });
5315     }
5316     return false;
5317   }
5318 }
5319 
AddPackage(const absl::string_view name,const Message & proto,FileDescriptor * file,bool toplevel)5320 void DescriptorBuilder::AddPackage(const absl::string_view name,
5321                                    const Message& proto, FileDescriptor* file,
5322                                    bool toplevel) {
5323   if (absl::StrContains(name, '\0')) {
5324     AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5325       return absl::StrCat("\"", name, "\" contains null character.");
5326     });
5327     return;
5328   }
5329 
5330   Symbol existing_symbol = tables_->FindSymbol(name);
5331   // It's OK to redefine a package.
5332   if (existing_symbol.IsNull()) {
5333     if (toplevel) {
5334       // It is the toplevel package name, so insert the descriptor directly.
5335       tables_->AddSymbol(file->package(), Symbol(file));
5336     } else {
5337       auto* package = tables_->Allocate<Symbol::Subpackage>();
5338       // If the name is the package name, then it is already in the arena.
5339       // If not, copy it there. It came from the call to AddPackage below.
5340       package->name_size = static_cast<int>(name.size());
5341       package->file = file;
5342       tables_->AddSymbol(name, Symbol(package));
5343     }
5344     // Also add parent package, if any.
5345     std::string::size_type dot_pos = name.find_last_of('.');
5346     if (dot_pos == std::string::npos) {
5347       // No parents.
5348       ValidateSymbolName(name, name, proto);
5349     } else {
5350       // Has parent.
5351       AddPackage(name.substr(0, dot_pos), proto, file, false);
5352       ValidateSymbolName(name.substr(dot_pos + 1), name, proto);
5353     }
5354   } else if (!existing_symbol.IsPackage()) {
5355     // Symbol seems to have been defined in a different file.
5356     const FileDescriptor* other_file = existing_symbol.GetFile();
5357     AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5358       return absl::StrCat("\"", name,
5359                           "\" is already defined (as something other than "
5360                           "a package) in file \"",
5361                           (other_file == nullptr ? "null" : other_file->name()),
5362                           "\".");
5363     });
5364   }
5365 }
5366 
ValidateSymbolName(const absl::string_view name,const absl::string_view full_name,const Message & proto)5367 void DescriptorBuilder::ValidateSymbolName(const absl::string_view name,
5368                                            const absl::string_view full_name,
5369                                            const Message& proto) {
5370   if (name.empty()) {
5371     AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
5372              "Missing name.");
5373   } else {
5374     for (char character : name) {
5375       // I don't trust isalnum() due to locales.  :(
5376       if ((character < 'a' || 'z' < character) &&
5377           (character < 'A' || 'Z' < character) &&
5378           (character < '0' || '9' < character) && (character != '_')) {
5379         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
5380           return absl::StrCat("\"", name, "\" is not a valid identifier.");
5381         });
5382         return;
5383       }
5384     }
5385   }
5386 }
5387 
5388 // -------------------------------------------------------------------
5389 
5390 // This generic implementation is good for all descriptors except
5391 // FileDescriptor.
5392 template <class DescriptorT>
AllocateOptions(const typename DescriptorT::Proto & proto,DescriptorT * descriptor,int options_field_tag,absl::string_view option_name,internal::FlatAllocator & alloc)5393 void DescriptorBuilder::AllocateOptions(
5394     const typename DescriptorT::Proto& proto, DescriptorT* descriptor,
5395     int options_field_tag, absl::string_view option_name,
5396     internal::FlatAllocator& alloc) {
5397   std::vector<int> options_path;
5398   descriptor->GetLocationPath(&options_path);
5399   options_path.push_back(options_field_tag);
5400   auto options = AllocateOptionsImpl<DescriptorT>(
5401       descriptor->full_name(), descriptor->full_name(), proto, options_path,
5402       option_name, alloc);
5403   descriptor->options_ = options;
5404   descriptor->proto_features_ = &FeatureSet::default_instance();
5405   descriptor->merged_features_ = &FeatureSet::default_instance();
5406 }
5407 
5408 // We specialize for FileDescriptor.
AllocateOptions(const FileDescriptorProto & proto,FileDescriptor * descriptor,internal::FlatAllocator & alloc)5409 void DescriptorBuilder::AllocateOptions(const FileDescriptorProto& proto,
5410                                         FileDescriptor* descriptor,
5411                                         internal::FlatAllocator& alloc) {
5412   std::vector<int> options_path;
5413   options_path.push_back(FileDescriptorProto::kOptionsFieldNumber);
5414   // We add the dummy token so that LookupSymbol does the right thing.
5415   auto options = AllocateOptionsImpl<FileDescriptor>(
5416       absl::StrCat(descriptor->package(), ".dummy"), descriptor->name(), proto,
5417       options_path, "google.protobuf.FileOptions", alloc);
5418   descriptor->options_ = options;
5419   descriptor->proto_features_ = &FeatureSet::default_instance();
5420   descriptor->merged_features_ = &FeatureSet::default_instance();
5421 }
5422 
5423 template <class DescriptorT>
AllocateOptionsImpl(absl::string_view name_scope,absl::string_view element_name,const typename DescriptorT::Proto & proto,absl::Span<const int> options_path,absl::string_view option_name,internal::FlatAllocator & alloc)5424 const typename DescriptorT::OptionsType* DescriptorBuilder::AllocateOptionsImpl(
5425     absl::string_view name_scope, absl::string_view element_name,
5426     const typename DescriptorT::Proto& proto,
5427     absl::Span<const int> options_path, absl::string_view option_name,
5428     internal::FlatAllocator& alloc) {
5429   if (!proto.has_options()) {
5430     return &DescriptorT::OptionsType::default_instance();
5431   }
5432   const typename DescriptorT::OptionsType& orig_options = proto.options();
5433 
5434   auto* options = alloc.AllocateArray<typename DescriptorT::OptionsType>(1);
5435 
5436   if (!orig_options.IsInitialized()) {
5437     AddError(absl::StrCat(name_scope, ".", element_name), orig_options,
5438              DescriptorPool::ErrorCollector::OPTION_NAME,
5439              "Uninterpreted option is missing name or value.");
5440     return &DescriptorT::OptionsType::default_instance();
5441   }
5442 
5443   const bool parse_success =
5444       internal::ParseNoReflection(orig_options.SerializeAsString(), *options);
5445   ABSL_DCHECK(parse_success);
5446 
5447   // Don't add to options_to_interpret_ unless there were uninterpreted
5448   // options.  This not only avoids unnecessary work, but prevents a
5449   // bootstrapping problem when building descriptors for descriptor.proto.
5450   // descriptor.proto does not contain any uninterpreted options, but
5451   // attempting to interpret options anyway will cause
5452   // OptionsType::GetDescriptor() to be called which may then deadlock since
5453   // we're still trying to build it.
5454   if (options->uninterpreted_option_size() > 0) {
5455     options_to_interpret_.push_back(OptionsToInterpret(
5456         name_scope, element_name, options_path, &orig_options, options));
5457   }
5458 
5459   // If the custom option is in unknown fields, no need to interpret it.
5460   // Remove the dependency file from unused_dependency.
5461   const UnknownFieldSet& unknown_fields = orig_options.unknown_fields();
5462   if (!unknown_fields.empty()) {
5463     // Can not use options->GetDescriptor() which may case deadlock.
5464     Symbol msg_symbol = tables_->FindSymbol(option_name);
5465     if (msg_symbol.type() == Symbol::MESSAGE) {
5466       for (int i = 0; i < unknown_fields.field_count(); ++i) {
5467         assert_mutex_held(pool_);
5468         const FieldDescriptor* field =
5469             pool_->InternalFindExtensionByNumberNoLock(
5470                 msg_symbol.descriptor(), unknown_fields.field(i).number());
5471         if (field) {
5472           unused_dependency_.erase(field->file());
5473         }
5474       }
5475     }
5476   }
5477   return options;
5478 }
5479 
5480 template <class ProtoT, class OptionsT>
InferLegacyProtoFeatures(const ProtoT & proto,const OptionsT & options,Edition edition,FeatureSet & features)5481 static void InferLegacyProtoFeatures(const ProtoT& proto,
5482                                      const OptionsT& options, Edition edition,
5483                                      FeatureSet& features) {}
5484 
InferLegacyProtoFeatures(const FieldDescriptorProto & proto,const FieldOptions & options,Edition edition,FeatureSet & features)5485 static void InferLegacyProtoFeatures(const FieldDescriptorProto& proto,
5486                                      const FieldOptions& options,
5487                                      Edition edition, FeatureSet& features) {
5488   if (!features.MutableExtension(pb::cpp)->has_string_type()) {
5489     if (options.ctype() == FieldOptions::CORD) {
5490       features.MutableExtension(pb::cpp)->set_string_type(
5491           pb::CppFeatures::CORD);
5492     }
5493   }
5494 
5495   // Everything below is specifically for proto2/proto.
5496   if (!IsLegacyEdition(edition)) return;
5497 
5498   if (proto.label() == FieldDescriptorProto::LABEL_REQUIRED) {
5499     features.set_field_presence(FeatureSet::LEGACY_REQUIRED);
5500   }
5501   if (proto.type() == FieldDescriptorProto::TYPE_GROUP) {
5502     features.set_message_encoding(FeatureSet::DELIMITED);
5503   }
5504   if (options.packed()) {
5505     features.set_repeated_field_encoding(FeatureSet::PACKED);
5506   }
5507   if (edition == Edition::EDITION_PROTO3) {
5508     if (options.has_packed() && !options.packed()) {
5509       features.set_repeated_field_encoding(FeatureSet::EXPANDED);
5510     }
5511   }
5512 }
5513 
5514 // TODO: we should update proto code to not need ctype to be set
5515 // when string_type is set.
EnforceCTypeStringTypeConsistency(Edition edition,FieldDescriptor::CppType type,const pb::CppFeatures & cpp_features,FieldOptions & options)5516 static void EnforceCTypeStringTypeConsistency(
5517     Edition edition, FieldDescriptor::CppType type,
5518     const pb::CppFeatures& cpp_features, FieldOptions& options) {
5519   if (&options == &FieldOptions::default_instance()) return;
5520   if (type == FieldDescriptor::CPPTYPE_STRING) {
5521     switch (cpp_features.string_type()) {
5522       case pb::CppFeatures::CORD:
5523         options.set_ctype(FieldOptions::CORD);
5524         break;
5525       default:
5526         break;
5527     }
5528   }
5529 }
5530 
5531 template <class DescriptorT>
ResolveFeaturesImpl(Edition edition,const typename DescriptorT::Proto & proto,DescriptorT * descriptor,typename DescriptorT::OptionsType * options,internal::FlatAllocator & alloc,DescriptorPool::ErrorCollector::ErrorLocation error_location,bool force_merge)5532 void DescriptorBuilder::ResolveFeaturesImpl(
5533     Edition edition, const typename DescriptorT::Proto& proto,
5534     DescriptorT* descriptor, typename DescriptorT::OptionsType* options,
5535     internal::FlatAllocator& alloc,
5536     DescriptorPool::ErrorCollector::ErrorLocation error_location,
5537     bool force_merge) {
5538   const FeatureSet& parent_features = GetParentFeatures(descriptor);
5539   descriptor->proto_features_ = &FeatureSet::default_instance();
5540   descriptor->merged_features_ = &FeatureSet::default_instance();
5541 
5542   ABSL_CHECK(feature_resolver_.has_value());
5543 
5544   if (options->has_features()) {
5545     // Remove the features from the child's options proto to avoid leaking
5546     // internal details.
5547     descriptor->proto_features_ =
5548         tables_->InternFeatureSet(std::move(*options->mutable_features()));
5549     options->clear_features();
5550   }
5551 
5552   FeatureSet base_features = *descriptor->proto_features_;
5553 
5554   // Handle feature inference from proto2/proto3.
5555   if (IsLegacyEdition(edition)) {
5556     if (descriptor->proto_features_ != &FeatureSet::default_instance()) {
5557       AddError(descriptor->name(), proto, error_location,
5558                "Features are only valid under editions.");
5559     }
5560   }
5561   InferLegacyProtoFeatures(proto, *options, edition, base_features);
5562 
5563   if (base_features.ByteSizeLong() == 0 && !force_merge) {
5564     // Nothing to merge, and we aren't forcing it.
5565     descriptor->merged_features_ = &parent_features;
5566     return;
5567   }
5568 
5569   // Calculate the merged features for this target.
5570   absl::StatusOr<FeatureSet> merged =
5571       feature_resolver_->MergeFeatures(parent_features, base_features);
5572   if (!merged.ok()) {
5573     AddError(descriptor->name(), proto, error_location,
5574              [&] { return std::string(merged.status().message()); });
5575     return;
5576   }
5577 
5578   descriptor->merged_features_ = tables_->InternFeatureSet(*std::move(merged));
5579 }
5580 
5581 template <class DescriptorT>
ResolveFeatures(const typename DescriptorT::Proto & proto,DescriptorT * descriptor,typename DescriptorT::OptionsType * options,internal::FlatAllocator & alloc)5582 void DescriptorBuilder::ResolveFeatures(
5583     const typename DescriptorT::Proto& proto, DescriptorT* descriptor,
5584     typename DescriptorT::OptionsType* options,
5585     internal::FlatAllocator& alloc) {
5586   ResolveFeaturesImpl(descriptor->file()->edition(), proto, descriptor, options,
5587                       alloc, DescriptorPool::ErrorCollector::NAME);
5588 }
5589 
ResolveFeatures(const FileDescriptorProto & proto,FileDescriptor * descriptor,FileOptions * options,internal::FlatAllocator & alloc)5590 void DescriptorBuilder::ResolveFeatures(const FileDescriptorProto& proto,
5591                                         FileDescriptor* descriptor,
5592                                         FileOptions* options,
5593                                         internal::FlatAllocator& alloc) {
5594   // File descriptors always need their own merged feature set, even without
5595   // any explicit features.
5596   ResolveFeaturesImpl(descriptor->edition(), proto, descriptor, options, alloc,
5597                       DescriptorPool::ErrorCollector::EDITIONS,
5598                       /*force_merge=*/true);
5599 }
5600 
PostProcessFieldFeatures(FieldDescriptor & field,const FieldDescriptorProto & proto)5601 void DescriptorBuilder::PostProcessFieldFeatures(
5602     FieldDescriptor& field, const FieldDescriptorProto& proto) {
5603   // TODO This can be replace by a runtime check in `is_required`
5604   // once the `label` getter is hidden.
5605   if (field.features().field_presence() == FeatureSet::LEGACY_REQUIRED &&
5606       field.label_ == FieldDescriptor::LABEL_OPTIONAL) {
5607     field.label_ = FieldDescriptor::LABEL_REQUIRED;
5608   }
5609   // TODO This can be replace by a runtime check of `is_delimited`
5610   // once the `TYPE_GROUP` value is removed.
5611   if (field.type_ == FieldDescriptor::TYPE_MESSAGE &&
5612       !field.containing_type()->options().map_entry() &&
5613       field.features().message_encoding() == FeatureSet::DELIMITED) {
5614     Symbol type =
5615         LookupSymbol(proto.type_name(), field.full_name(),
5616                      DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_TYPES, false);
5617     if (type.descriptor() == nullptr ||
5618         !type.descriptor()->options().map_entry()) {
5619       field.type_ = FieldDescriptor::TYPE_GROUP;
5620     }
5621   }
5622 }
5623 
5624 // A common pattern:  We want to convert a repeated field in the descriptor
5625 // to an array of values, calling some method to build each value.
5626 #define BUILD_ARRAY(INPUT, OUTPUT, NAME, METHOD, PARENT)               \
5627   OUTPUT->NAME##_count_ = INPUT.NAME##_size();                         \
5628   OUTPUT->NAME##s_ = alloc.AllocateArray<                              \
5629       typename std::remove_pointer<decltype(OUTPUT->NAME##s_)>::type>( \
5630       INPUT.NAME##_size());                                            \
5631   for (int i = 0; i < INPUT.NAME##_size(); i++) {                      \
5632     METHOD(INPUT.NAME(i), PARENT, OUTPUT->NAME##s_ + i, alloc);        \
5633   }
5634 
AddRecursiveImportError(const FileDescriptorProto & proto,int from_here)5635 PROTOBUF_NOINLINE void DescriptorBuilder::AddRecursiveImportError(
5636     const FileDescriptorProto& proto, int from_here) {
5637   auto make_error = [&] {
5638     std::string error_message("File recursively imports itself: ");
5639     for (size_t i = from_here; i < tables_->pending_files_.size(); i++) {
5640       error_message.append(tables_->pending_files_[i]);
5641       error_message.append(" -> ");
5642     }
5643     error_message.append(proto.name());
5644     return error_message;
5645   };
5646 
5647   if (static_cast<size_t>(from_here) < tables_->pending_files_.size() - 1) {
5648     AddError(tables_->pending_files_[from_here + 1], proto,
5649              DescriptorPool::ErrorCollector::IMPORT, make_error);
5650   } else {
5651     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::IMPORT,
5652              make_error);
5653   }
5654 }
5655 
AddTwiceListedError(const FileDescriptorProto & proto,int index)5656 void DescriptorBuilder::AddTwiceListedError(const FileDescriptorProto& proto,
5657                                             int index) {
5658   AddError(proto.dependency(index), proto,
5659            DescriptorPool::ErrorCollector::IMPORT, [&] {
5660              return absl::StrCat("Import \"", proto.dependency(index),
5661                                  "\" was listed twice.");
5662            });
5663 }
5664 
AddImportError(const FileDescriptorProto & proto,int index)5665 void DescriptorBuilder::AddImportError(const FileDescriptorProto& proto,
5666                                        int index) {
5667   auto make_error = [&] {
5668     if (pool_->fallback_database_ == nullptr) {
5669       return absl::StrCat("Import \"", proto.dependency(index),
5670                           "\" has not been loaded.");
5671     }
5672     return absl::StrCat("Import \"", proto.dependency(index),
5673                         "\" was not found or had errors.");
5674   };
5675   AddError(proto.dependency(index), proto,
5676            DescriptorPool::ErrorCollector::IMPORT, make_error);
5677 }
5678 
ExistingFileMatchesProto(Edition edition,const FileDescriptor * existing_file,const FileDescriptorProto & proto)5679 PROTOBUF_NOINLINE static bool ExistingFileMatchesProto(
5680     Edition edition, const FileDescriptor* existing_file,
5681     const FileDescriptorProto& proto) {
5682   FileDescriptorProto existing_proto;
5683   existing_file->CopyTo(&existing_proto);
5684   if (edition == Edition::EDITION_PROTO2 && proto.has_syntax()) {
5685     existing_proto.set_syntax("proto2");
5686   }
5687 
5688   return existing_proto.SerializeAsString() == proto.SerializeAsString();
5689 }
5690 
5691 // These PlanAllocationSize functions will gather into the FlatAllocator all the
5692 // necessary memory allocations that BuildXXX functions below will do on the
5693 // Tables object.
5694 // They *must* be kept in sync. If we miss some PlanArray call we won't have
5695 // enough memory and will ABSL_CHECK-fail.
PlanAllocationSize(const RepeatedPtrField<EnumValueDescriptorProto> & values,internal::FlatAllocator & alloc)5696 static void PlanAllocationSize(
5697     const RepeatedPtrField<EnumValueDescriptorProto>& values,
5698     internal::FlatAllocator& alloc) {
5699   alloc.PlanArray<EnumValueDescriptor>(values.size());
5700   alloc.PlanArray<std::string>(2 * values.size());  // name + full_name
5701   for (const auto& v : values) {
5702     if (v.has_options()) alloc.PlanArray<EnumValueOptions>(1);
5703   }
5704 }
5705 
PlanAllocationSize(const RepeatedPtrField<EnumDescriptorProto> & enums,internal::FlatAllocator & alloc)5706 static void PlanAllocationSize(
5707     const RepeatedPtrField<EnumDescriptorProto>& enums,
5708     internal::FlatAllocator& alloc) {
5709   alloc.PlanArray<EnumDescriptor>(enums.size());
5710   alloc.PlanArray<std::string>(2 * enums.size());  // name + full_name
5711   for (const auto& e : enums) {
5712     if (e.has_options()) alloc.PlanArray<EnumOptions>(1);
5713     PlanAllocationSize(e.value(), alloc);
5714     alloc.PlanArray<EnumDescriptor::ReservedRange>(e.reserved_range_size());
5715     alloc.PlanArray<const std::string*>(e.reserved_name_size());
5716     alloc.PlanArray<std::string>(e.reserved_name_size());
5717   }
5718 }
5719 
PlanAllocationSize(const RepeatedPtrField<OneofDescriptorProto> & oneofs,internal::FlatAllocator & alloc)5720 static void PlanAllocationSize(
5721     const RepeatedPtrField<OneofDescriptorProto>& oneofs,
5722     internal::FlatAllocator& alloc) {
5723   alloc.PlanArray<OneofDescriptor>(oneofs.size());
5724   alloc.PlanArray<std::string>(2 * oneofs.size());  // name + full_name
5725   for (const auto& oneof : oneofs) {
5726     if (oneof.has_options()) alloc.PlanArray<OneofOptions>(1);
5727   }
5728 }
5729 
PlanAllocationSize(const RepeatedPtrField<FieldDescriptorProto> & fields,internal::FlatAllocator & alloc)5730 static void PlanAllocationSize(
5731     const RepeatedPtrField<FieldDescriptorProto>& fields,
5732     internal::FlatAllocator& alloc) {
5733   alloc.PlanArray<FieldDescriptor>(fields.size());
5734   for (const auto& field : fields) {
5735     if (field.has_options()) alloc.PlanArray<FieldOptions>(1);
5736     alloc.PlanFieldNames(field.name(),
5737                          field.has_json_name() ? &field.json_name() : nullptr);
5738     if (field.has_default_value() && field.has_type() &&
5739         (field.type() == FieldDescriptorProto::TYPE_STRING ||
5740          field.type() == FieldDescriptorProto::TYPE_BYTES)) {
5741       // For the default string value.
5742       alloc.PlanArray<std::string>(1);
5743     }
5744   }
5745 }
5746 
PlanAllocationSize(const RepeatedPtrField<DescriptorProto::ExtensionRange> & ranges,internal::FlatAllocator & alloc)5747 static void PlanAllocationSize(
5748     const RepeatedPtrField<DescriptorProto::ExtensionRange>& ranges,
5749     internal::FlatAllocator& alloc) {
5750   alloc.PlanArray<Descriptor::ExtensionRange>(ranges.size());
5751   for (const auto& r : ranges) {
5752     if (r.has_options()) alloc.PlanArray<ExtensionRangeOptions>(1);
5753   }
5754 }
5755 
PlanAllocationSize(const RepeatedPtrField<DescriptorProto> & messages,internal::FlatAllocator & alloc)5756 static void PlanAllocationSize(
5757     const RepeatedPtrField<DescriptorProto>& messages,
5758     internal::FlatAllocator& alloc) {
5759   alloc.PlanArray<Descriptor>(messages.size());
5760   alloc.PlanArray<std::string>(2 * messages.size());  // name + full_name
5761 
5762   for (const auto& message : messages) {
5763     if (message.has_options()) alloc.PlanArray<MessageOptions>(1);
5764     PlanAllocationSize(message.nested_type(), alloc);
5765     PlanAllocationSize(message.field(), alloc);
5766     PlanAllocationSize(message.extension(), alloc);
5767     PlanAllocationSize(message.extension_range(), alloc);
5768     alloc.PlanArray<Descriptor::ReservedRange>(message.reserved_range_size());
5769     alloc.PlanArray<const std::string*>(message.reserved_name_size());
5770     alloc.PlanArray<std::string>(message.reserved_name_size());
5771     PlanAllocationSize(message.enum_type(), alloc);
5772     PlanAllocationSize(message.oneof_decl(), alloc);
5773   }
5774 }
5775 
PlanAllocationSize(const RepeatedPtrField<MethodDescriptorProto> & methods,internal::FlatAllocator & alloc)5776 static void PlanAllocationSize(
5777     const RepeatedPtrField<MethodDescriptorProto>& methods,
5778     internal::FlatAllocator& alloc) {
5779   alloc.PlanArray<MethodDescriptor>(methods.size());
5780   alloc.PlanArray<std::string>(2 * methods.size());  // name + full_name
5781   for (const auto& m : methods) {
5782     if (m.has_options()) alloc.PlanArray<MethodOptions>(1);
5783   }
5784 }
5785 
PlanAllocationSize(const RepeatedPtrField<ServiceDescriptorProto> & services,internal::FlatAllocator & alloc)5786 static void PlanAllocationSize(
5787     const RepeatedPtrField<ServiceDescriptorProto>& services,
5788     internal::FlatAllocator& alloc) {
5789   alloc.PlanArray<ServiceDescriptor>(services.size());
5790   alloc.PlanArray<std::string>(2 * services.size());  // name + full_name
5791   for (const auto& service : services) {
5792     if (service.has_options()) alloc.PlanArray<ServiceOptions>(1);
5793     PlanAllocationSize(service.method(), alloc);
5794   }
5795 }
5796 
PlanAllocationSize(const FileDescriptorProto & proto,internal::FlatAllocator & alloc)5797 static void PlanAllocationSize(const FileDescriptorProto& proto,
5798                                internal::FlatAllocator& alloc) {
5799   alloc.PlanArray<FileDescriptor>(1);
5800   alloc.PlanArray<FileDescriptorTables>(1);
5801   alloc.PlanArray<std::string>(2);  // name + package
5802   if (proto.has_options()) alloc.PlanArray<FileOptions>(1);
5803   if (proto.has_source_code_info()) alloc.PlanArray<SourceCodeInfo>(1);
5804 
5805   PlanAllocationSize(proto.service(), alloc);
5806   PlanAllocationSize(proto.message_type(), alloc);
5807   PlanAllocationSize(proto.enum_type(), alloc);
5808   PlanAllocationSize(proto.extension(), alloc);
5809 
5810   alloc.PlanArray<int>(proto.weak_dependency_size());
5811   alloc.PlanArray<int>(proto.public_dependency_size());
5812   alloc.PlanArray<const FileDescriptor*>(proto.dependency_size());
5813 }
5814 
BuildFile(const FileDescriptorProto & proto)5815 const FileDescriptor* DescriptorBuilder::BuildFile(
5816     const FileDescriptorProto& proto) {
5817   // Ensure the generated pool has been lazily initialized.  This is most
5818   // important for protos that use C++-specific features, since that extension
5819   // is only registered lazily and we always parse options into the generated
5820   // pool.
5821   if (pool_ != DescriptorPool::internal_generated_pool()) {
5822     DescriptorPool::generated_pool();
5823   }
5824 
5825   filename_ = proto.name();
5826 
5827   // Check if the file already exists and is identical to the one being built.
5828   // Note:  This only works if the input is canonical -- that is, it
5829   //   fully-qualifies all type names, has no UninterpretedOptions, etc.
5830   //   This is fine, because this idempotency "feature" really only exists to
5831   //   accommodate one hack in the proto1->proto2 migration layer.
5832   const FileDescriptor* existing_file = tables_->FindFile(filename_);
5833   if (existing_file != nullptr) {
5834     // File already in pool.  Compare the existing one to the input.
5835     if (ExistingFileMatchesProto(existing_file->edition(), existing_file,
5836                                  proto)) {
5837       // They're identical.  Return the existing descriptor.
5838       return existing_file;
5839     }
5840 
5841     // Not a match.  The error will be detected and handled later.
5842   }
5843 
5844   // Check to see if this file is already on the pending files list.
5845   // TODO:  Allow recursive imports?  It may not work with some
5846   //   (most?) programming languages.  E.g., in C++, a forward declaration
5847   //   of a type is not sufficient to allow it to be used even in a
5848   //   generated header file due to inlining.  This could perhaps be
5849   //   worked around using tricks involving inserting #include statements
5850   //   mid-file, but that's pretty ugly, and I'm pretty sure there are
5851   //   some languages out there that do not allow recursive dependencies
5852   //   at all.
5853   for (size_t i = 0; i < tables_->pending_files_.size(); i++) {
5854     if (tables_->pending_files_[i] == proto.name()) {
5855       AddRecursiveImportError(proto, i);
5856       return nullptr;
5857     }
5858   }
5859 
5860   static const int kMaximumPackageLength = 511;
5861   if (proto.package().size() > kMaximumPackageLength) {
5862     AddError(proto.package(), proto, DescriptorPool::ErrorCollector::NAME,
5863              "Package name is too long");
5864     return nullptr;
5865   }
5866 
5867   // If we have a fallback_database_, and we aren't doing lazy import building,
5868   // attempt to load all dependencies now, before checkpointing tables_.  This
5869   // avoids confusion with recursive checkpoints.
5870   if (!pool_->lazily_build_dependencies_) {
5871     if (pool_->fallback_database_ != nullptr) {
5872       tables_->pending_files_.push_back(proto.name());
5873       for (int i = 0; i < proto.dependency_size(); i++) {
5874         if (tables_->FindFile(proto.dependency(i)) == nullptr &&
5875             (pool_->underlay_ == nullptr ||
5876              pool_->underlay_->FindFileByName(proto.dependency(i)) ==
5877                  nullptr)) {
5878           // We don't care what this returns since we'll find out below anyway.
5879           pool_->TryFindFileInFallbackDatabase(proto.dependency(i),
5880                                                deferred_validation_);
5881         }
5882       }
5883       tables_->pending_files_.pop_back();
5884     }
5885   }
5886 
5887   // Checkpoint the tables so that we can roll back if something goes wrong.
5888   tables_->AddCheckpoint();
5889 
5890   auto alloc = absl::make_unique<internal::FlatAllocator>();
5891   PlanAllocationSize(proto, *alloc);
5892   alloc->FinalizePlanning(tables_);
5893   FileDescriptor* result = BuildFileImpl(proto, *alloc);
5894 
5895   file_tables_->FinalizeTables();
5896   if (result) {
5897     tables_->ClearLastCheckpoint();
5898     result->finished_building_ = true;
5899     alloc->ExpectConsumed();
5900   } else {
5901     tables_->RollbackToLastCheckpoint(deferred_validation_);
5902   }
5903 
5904   return result;
5905 }
5906 
BuildFileImpl(const FileDescriptorProto & proto,internal::FlatAllocator & alloc)5907 FileDescriptor* DescriptorBuilder::BuildFileImpl(
5908     const FileDescriptorProto& proto, internal::FlatAllocator& alloc) {
5909   FileDescriptor* result = alloc.AllocateArray<FileDescriptor>(1);
5910   file_ = result;
5911 
5912   if (proto.has_edition()) {
5913     file_->edition_ = proto.edition();
5914   } else if (proto.syntax().empty() || proto.syntax() == "proto2") {
5915     file_->edition_ = Edition::EDITION_PROTO2;
5916   } else if (proto.syntax() == "proto3") {
5917     file_->edition_ = Edition::EDITION_PROTO3;
5918   } else {
5919     file_->edition_ = Edition::EDITION_UNKNOWN;
5920     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER, [&] {
5921       return absl::StrCat("Unrecognized syntax: ", proto.syntax());
5922     });
5923   }
5924 
5925   const FeatureSetDefaults& defaults =
5926       pool_->feature_set_defaults_spec_ == nullptr
5927           ? GetCppFeatureSetDefaults()
5928           : *pool_->feature_set_defaults_spec_;
5929 
5930   absl::StatusOr<FeatureResolver> feature_resolver =
5931       FeatureResolver::Create(file_->edition_, defaults);
5932   if (!feature_resolver.ok()) {
5933     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::EDITIONS,
5934              [&] { return std::string(feature_resolver.status().message()); });
5935   } else {
5936     feature_resolver_.emplace(std::move(feature_resolver).value());
5937   }
5938 
5939   result->is_placeholder_ = false;
5940   result->finished_building_ = false;
5941   SourceCodeInfo* info = nullptr;
5942   if (proto.has_source_code_info()) {
5943     info = alloc.AllocateArray<SourceCodeInfo>(1);
5944     *info = proto.source_code_info();
5945     result->source_code_info_ = info;
5946   } else {
5947     result->source_code_info_ = &SourceCodeInfo::default_instance();
5948   }
5949 
5950   file_tables_ = alloc.AllocateArray<FileDescriptorTables>(1);
5951   file_->tables_ = file_tables_;
5952 
5953   if (!proto.has_name()) {
5954     AddError("", proto, DescriptorPool::ErrorCollector::OTHER,
5955              "Missing field: FileDescriptorProto.name.");
5956   }
5957 
5958   result->name_ = alloc.AllocateStrings(proto.name());
5959   if (proto.has_package()) {
5960     result->package_ = alloc.AllocateStrings(proto.package());
5961   } else {
5962     // We cannot rely on proto.package() returning a valid string if
5963     // proto.has_package() is false, because we might be running at static
5964     // initialization time, in which case default values have not yet been
5965     // initialized.
5966     result->package_ = alloc.AllocateStrings("");
5967   }
5968   result->pool_ = pool_;
5969 
5970   if (absl::StrContains(result->name(), '\0')) {
5971     AddError(result->name(), proto, DescriptorPool::ErrorCollector::NAME, [&] {
5972       return absl::StrCat("\"", result->name(), "\" contains null character.");
5973     });
5974     return nullptr;
5975   }
5976 
5977   // Add to tables.
5978   if (!tables_->AddFile(result)) {
5979     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5980              "A file with this name is already in the pool.");
5981     // Bail out early so that if this is actually the exact same file, we
5982     // don't end up reporting that every single symbol is already defined.
5983     return nullptr;
5984   }
5985   if (!result->package().empty()) {
5986     if (std::count(result->package().begin(), result->package().end(), '.') >
5987         kPackageLimit) {
5988       AddError(result->package(), proto, DescriptorPool::ErrorCollector::NAME,
5989                "Exceeds Maximum Package Depth");
5990       return nullptr;
5991     }
5992     AddPackage(result->package(), proto, result, true);
5993   }
5994 
5995   // Make sure all dependencies are loaded.
5996   absl::flat_hash_set<absl::string_view> seen_dependencies;
5997   result->dependency_count_ = proto.dependency_size();
5998   result->dependencies_ =
5999       alloc.AllocateArray<const FileDescriptor*>(proto.dependency_size());
6000   result->dependencies_once_ = nullptr;
6001   unused_dependency_.clear();
6002   absl::flat_hash_set<int> weak_deps;
6003   for (int i = 0; i < proto.weak_dependency_size(); ++i) {
6004     weak_deps.insert(proto.weak_dependency(i));
6005   }
6006 
6007   bool need_lazy_deps = false;
6008   for (int i = 0; i < proto.dependency_size(); i++) {
6009     if (!seen_dependencies.insert(proto.dependency(i)).second) {
6010       AddTwiceListedError(proto, i);
6011     }
6012 
6013     const FileDescriptor* dependency = tables_->FindFile(proto.dependency(i));
6014     if (dependency == nullptr && pool_->underlay_ != nullptr) {
6015       dependency = pool_->underlay_->FindFileByName(proto.dependency(i));
6016     }
6017 
6018     if (dependency == result) {
6019       // Recursive import.  dependency/result is not fully initialized, and it's
6020       // dangerous to try to do anything with it.  The recursive import error
6021       // will be detected and reported in DescriptorBuilder::BuildFile().
6022       return nullptr;
6023     }
6024 
6025     if (dependency == nullptr) {
6026       if (!pool_->lazily_build_dependencies_) {
6027         if (pool_->allow_unknown_ ||
6028             (!pool_->enforce_weak_ && weak_deps.contains(i))) {
6029           internal::FlatAllocator lazy_dep_alloc;
6030           lazy_dep_alloc.PlanArray<FileDescriptor>(1);
6031           lazy_dep_alloc.PlanArray<std::string>(1);
6032           lazy_dep_alloc.FinalizePlanning(tables_);
6033           dependency = pool_->NewPlaceholderFileWithMutexHeld(
6034               proto.dependency(i), lazy_dep_alloc);
6035         } else {
6036           AddImportError(proto, i);
6037         }
6038       }
6039     } else {
6040       // Add to unused_dependency_ to track unused imported files.
6041       // Note: do not track unused imported files for public import.
6042       if (pool_->enforce_dependencies_ &&
6043           (pool_->direct_input_files_.find(proto.name()) !=
6044            pool_->direct_input_files_.end()) &&
6045           (dependency->public_dependency_count() == 0)) {
6046         unused_dependency_.insert(dependency);
6047       }
6048     }
6049 
6050     result->dependencies_[i] = dependency;
6051     if (pool_->lazily_build_dependencies_ && !dependency) {
6052       need_lazy_deps = true;
6053     }
6054   }
6055   if (need_lazy_deps) {
6056     int total_char_size = 0;
6057     for (int i = 0; i < proto.dependency_size(); i++) {
6058       if (result->dependencies_[i] == nullptr) {
6059         total_char_size += static_cast<int>(proto.dependency(i).size());
6060       }
6061       ++total_char_size;  // For NUL char
6062     }
6063 
6064     void* data = tables_->AllocateBytes(
6065         static_cast<int>(sizeof(absl::once_flag)) + total_char_size);
6066     result->dependencies_once_ = ::new (data) absl::once_flag{};
6067     char* name_data = reinterpret_cast<char*>(result->dependencies_once_ + 1);
6068 
6069     for (int i = 0; i < proto.dependency_size(); i++) {
6070       if (result->dependencies_[i] == nullptr) {
6071         memcpy(name_data, proto.dependency(i).data(),
6072                proto.dependency(i).size());
6073         name_data += proto.dependency(i).size();
6074       }
6075       *name_data++ = '\0';
6076     }
6077   }
6078 
6079   // Check public dependencies.
6080   int public_dependency_count = 0;
6081   result->public_dependencies_ =
6082       alloc.AllocateArray<int>(proto.public_dependency_size());
6083   for (int i = 0; i < proto.public_dependency_size(); i++) {
6084     // Only put valid public dependency indexes.
6085     int index = proto.public_dependency(i);
6086     if (index >= 0 && index < proto.dependency_size()) {
6087       result->public_dependencies_[public_dependency_count++] = index;
6088       // Do not track unused imported files for public import.
6089       // Calling dependency(i) builds that file when doing lazy imports,
6090       // need to avoid doing this. Unused dependency detection isn't done
6091       // when building lazily, anyways.
6092       if (!pool_->lazily_build_dependencies_) {
6093         unused_dependency_.erase(result->dependency(index));
6094       }
6095     } else {
6096       AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
6097                "Invalid public dependency index.");
6098     }
6099   }
6100   result->public_dependency_count_ = public_dependency_count;
6101 
6102   // Build dependency set
6103   dependencies_.clear();
6104   // We don't/can't do proper dependency error checking when
6105   // lazily_build_dependencies_, and calling dependency(i) will force
6106   // a dependency to be built, which we don't want.
6107   if (!pool_->lazily_build_dependencies_) {
6108     for (int i = 0; i < result->dependency_count(); i++) {
6109       RecordPublicDependencies(result->dependency(i));
6110     }
6111   }
6112 
6113   // Check weak dependencies.
6114   int weak_dependency_count = 0;
6115   result->weak_dependencies_ =
6116       alloc.AllocateArray<int>(proto.weak_dependency_size());
6117   for (int i = 0; i < proto.weak_dependency_size(); i++) {
6118     int index = proto.weak_dependency(i);
6119     if (index >= 0 && index < proto.dependency_size()) {
6120       result->weak_dependencies_[weak_dependency_count++] = index;
6121     } else {
6122       AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
6123                "Invalid weak dependency index.");
6124     }
6125   }
6126   result->weak_dependency_count_ = weak_dependency_count;
6127 
6128   // Convert children.
6129   BUILD_ARRAY(proto, result, message_type, BuildMessage, nullptr);
6130   BUILD_ARRAY(proto, result, enum_type, BuildEnum, nullptr);
6131   BUILD_ARRAY(proto, result, service, BuildService, nullptr);
6132   BUILD_ARRAY(proto, result, extension, BuildExtension, nullptr);
6133 
6134   // Copy options.
6135   AllocateOptions(proto, result, alloc);
6136 
6137   // Note that the following steps must occur in exactly the specified order.
6138 
6139   // Cross-link.
6140   CrossLinkFile(result, proto);
6141 
6142   if (!message_hints_.empty()) {
6143     SuggestFieldNumbers(result, proto);
6144   }
6145 
6146   // Interpret only the non-extension options first, including features and
6147   // their extensions.  This has to be done in two passes, since option
6148   // extensions defined in this file may have features attached to them.
6149   if (!had_errors_) {
6150     OptionInterpreter option_interpreter(this);
6151     for (std::vector<OptionsToInterpret>::iterator iter =
6152              options_to_interpret_.begin();
6153          iter != options_to_interpret_.end(); ++iter) {
6154       option_interpreter.InterpretNonExtensionOptions(&(*iter));
6155     }
6156 
6157     // TODO: move this check back to generator.cc once we no longer
6158     // need to set both ctype and string_type internally.
6159     internal::VisitDescriptors(
6160         *result, proto,
6161         [&](const FieldDescriptor& field, const FieldDescriptorProto& proto) {
6162           if (field.options_->has_ctype() && field.options_->features()
6163                                                  .GetExtension(pb::cpp)
6164                                                  .has_string_type()) {
6165             AddError(field.full_name(), proto,
6166                      DescriptorPool::ErrorCollector::TYPE, [&] {
6167                        return absl::StrFormat(
6168                            "Field %s specifies both string_type and ctype "
6169                            "which is not supported.",
6170                            field.full_name());
6171                      });
6172           }
6173         });
6174 
6175     // Handle feature resolution.  This must occur after option interpretation,
6176     // but before validation.
6177     {
6178       auto cleanup = DisableTracking();
6179       internal::VisitDescriptors(
6180           *result, proto, [&](const auto& descriptor, const auto& proto) {
6181             using OptionsT =
6182                 typename std::remove_const<typename std::remove_pointer<
6183                     decltype(descriptor.options_)>::type>::type;
6184             using DescriptorT =
6185                 typename std::remove_const<typename std::remove_reference<
6186                     decltype(descriptor)>::type>::type;
6187 
6188             ResolveFeatures(
6189                 proto, const_cast<DescriptorT*>(&descriptor),
6190                 const_cast<  // NOLINT(google3-runtime-proto-const-cast)
6191                     OptionsT*>(descriptor.options_),
6192                 alloc);
6193           });
6194     }
6195 
6196     internal::VisitDescriptors(*result, [&](const FieldDescriptor& field) {
6197       if (result->edition() >= Edition::EDITION_2024 &&
6198           field.options().has_ctype()) {
6199         // "ctype" is no longer supported in edition 2024 and beyond.
6200         AddError(
6201             field.full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6202             "ctype option is not allowed under edition 2024 and beyond. Use "
6203             "the feature string_type = VIEW|CORD|STRING|... instead.");
6204       }
6205       EnforceCTypeStringTypeConsistency(
6206           field.file()->edition(), field.cpp_type(),
6207           field.merged_features_->GetExtension(pb::cpp),
6208           const_cast<  // NOLINT(google3-runtime-proto-const-cast)
6209               FieldOptions&>(*field.options_));
6210     });
6211 
6212     // Post-process cleanup for field features.
6213     internal::VisitDescriptors(
6214         *result, proto,
6215         [&](const FieldDescriptor& field, const FieldDescriptorProto& proto) {
6216           PostProcessFieldFeatures(const_cast<FieldDescriptor&>(field), proto);
6217         });
6218 
6219     // Interpret any remaining uninterpreted options gathered into
6220     // options_to_interpret_ during descriptor building.  Cross-linking has made
6221     // extension options known, so all interpretations should now succeed.
6222     for (std::vector<OptionsToInterpret>::iterator iter =
6223              options_to_interpret_.begin();
6224          iter != options_to_interpret_.end(); ++iter) {
6225       option_interpreter.InterpretOptionExtensions(&(*iter));
6226     }
6227     options_to_interpret_.clear();
6228     if (info != nullptr) {
6229       option_interpreter.UpdateSourceCodeInfo(info);
6230     }
6231   }
6232 
6233   // Validate options. See comments at InternalSetLazilyBuildDependencies about
6234   // error checking and lazy import building.
6235   if (!had_errors_ && !pool_->lazily_build_dependencies_) {
6236     internal::VisitDescriptors(
6237         *result, proto, [&](const auto& descriptor, const auto& desc_proto) {
6238           ValidateOptions(&descriptor, desc_proto);
6239         });
6240   }
6241 
6242   // Additional naming conflict check for map entry types. Only need to check
6243   // this if there are already errors.
6244   if (had_errors_) {
6245     for (int i = 0; i < proto.message_type_size(); ++i) {
6246       DetectMapConflicts(result->message_type(i), proto.message_type(i));
6247     }
6248   }
6249 
6250 
6251   // Again, see comments at InternalSetLazilyBuildDependencies about error
6252   // checking. Also, don't log unused dependencies if there were previous
6253   // errors, since the results might be inaccurate.
6254   if (!had_errors_ && !unused_dependency_.empty() &&
6255       !pool_->lazily_build_dependencies_) {
6256     LogUnusedDependency(proto, result);
6257   }
6258 
6259   // Store feature information for deferred validation outside of the database
6260   // mutex.
6261   if (!had_errors_ && !pool_->lazily_build_dependencies_) {
6262     internal::VisitDescriptors(
6263         *result, proto, [&](const auto& descriptor, const auto& desc_proto) {
6264           if (descriptor.proto_features_ != &FeatureSet::default_instance()) {
6265             deferred_validation_.ValidateFeatureLifetimes(
6266                 GetFile(descriptor), {descriptor.proto_features_, &desc_proto,
6267                                       GetFullName(descriptor), proto.name()});
6268           }
6269         });
6270   }
6271 
6272   if (had_errors_) {
6273     return nullptr;
6274   } else {
6275     return result;
6276   }
6277 }
6278 
6279 
AllocateNameStrings(const absl::string_view scope,const absl::string_view proto_name,internal::FlatAllocator & alloc)6280 const std::string* DescriptorBuilder::AllocateNameStrings(
6281     const absl::string_view scope, const absl::string_view proto_name,
6282     internal::FlatAllocator& alloc) {
6283   if (scope.empty()) {
6284     return alloc.AllocateStrings(proto_name, proto_name);
6285   } else {
6286     return alloc.AllocateStrings(proto_name,
6287                                  absl::StrCat(scope, ".", proto_name));
6288   }
6289 }
6290 
6291 namespace {
6292 
6293 // Helper for BuildMessage below.
6294 struct IncrementWhenDestroyed {
~IncrementWhenDestroyedgoogle::protobuf::__anona654feba2711::IncrementWhenDestroyed6295   ~IncrementWhenDestroyed() { ++to_increment; }
6296   int& to_increment;
6297 };
6298 
6299 }  // namespace
6300 
6301 namespace {
IsNonMessageType(absl::string_view type)6302 bool IsNonMessageType(absl::string_view type) {
6303   static const auto* non_message_types =
6304       new absl::flat_hash_set<absl::string_view>(
6305           {"double", "float", "int64", "uint64", "int32", "fixed32", "fixed64",
6306            "bool", "string", "bytes", "uint32", "enum", "sfixed32", "sfixed64",
6307            "sint32", "sint64"});
6308   return non_message_types->contains(type);
6309 }
6310 }  // namespace
6311 
6312 
BuildMessage(const DescriptorProto & proto,const Descriptor * parent,Descriptor * result,internal::FlatAllocator & alloc)6313 void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
6314                                      const Descriptor* parent,
6315                                      Descriptor* result,
6316                                      internal::FlatAllocator& alloc) {
6317   const absl::string_view scope =
6318       (parent == nullptr) ? file_->package() : parent->full_name();
6319   result->all_names_ = AllocateNameStrings(scope, proto.name(), alloc);
6320   ValidateSymbolName(proto.name(), result->full_name(), proto);
6321 
6322   result->file_ = file_;
6323   result->containing_type_ = parent;
6324   result->is_placeholder_ = false;
6325   result->is_unqualified_placeholder_ = false;
6326   result->well_known_type_ = Descriptor::WELLKNOWNTYPE_UNSPECIFIED;
6327   result->options_ = nullptr;  // Set to default_instance later if necessary.
6328 
6329   auto it = pool_->tables_->well_known_types_.find(result->full_name());
6330   if (it != pool_->tables_->well_known_types_.end()) {
6331     result->well_known_type_ = it->second;
6332   }
6333 
6334   // Calculate the continuous sequence of fields.
6335   // These can be fast-path'd during lookup and don't need to be added to the
6336   // tables.
6337   // We use uint16_t to save space for sequential_field_limit_, so stop before
6338   // overflowing it. Worst case, we are not taking full advantage on huge
6339   // messages, but it is unlikely.
6340   result->sequential_field_limit_ = 0;
6341   for (int i = 0; i < std::numeric_limits<uint16_t>::max() &&
6342                   i < proto.field_size() && proto.field(i).number() == i + 1;
6343        ++i) {
6344     result->sequential_field_limit_ = i + 1;
6345   }
6346 
6347   // Build oneofs first so that fields and extension ranges can refer to them.
6348   BUILD_ARRAY(proto, result, oneof_decl, BuildOneof, result);
6349   BUILD_ARRAY(proto, result, field, BuildField, result);
6350   BUILD_ARRAY(proto, result, enum_type, BuildEnum, result);
6351   BUILD_ARRAY(proto, result, extension_range, BuildExtensionRange, result);
6352   BUILD_ARRAY(proto, result, extension, BuildExtension, result);
6353   BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
6354 
6355   // Copy options.
6356   AllocateOptions(proto, result, DescriptorProto::kOptionsFieldNumber,
6357                   "google.protobuf.MessageOptions", alloc);
6358 
6359   // Before building submessages, check recursion limit.
6360   --recursion_depth_;
6361   IncrementWhenDestroyed revert{recursion_depth_};
6362   if (recursion_depth_ <= 0) {
6363     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::OTHER,
6364              "Reached maximum recursion limit for nested messages.");
6365     result->nested_types_ = nullptr;
6366     result->nested_type_count_ = 0;
6367     return;
6368   }
6369   BUILD_ARRAY(proto, result, nested_type, BuildMessage, result);
6370 
6371   // Copy reserved names.
6372   int reserved_name_count = proto.reserved_name_size();
6373   result->reserved_name_count_ = reserved_name_count;
6374   result->reserved_names_ =
6375       alloc.AllocateArray<const std::string*>(reserved_name_count);
6376   for (int i = 0; i < reserved_name_count; ++i) {
6377     result->reserved_names_[i] = alloc.AllocateStrings(proto.reserved_name(i));
6378   }
6379 
6380   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6381 
6382   for (int i = 0; i < proto.reserved_range_size(); i++) {
6383     const DescriptorProto_ReservedRange& range1 = proto.reserved_range(i);
6384     for (int j = i + 1; j < proto.reserved_range_size(); j++) {
6385       const DescriptorProto_ReservedRange& range2 = proto.reserved_range(j);
6386       if (range1.end() > range2.start() && range2.end() > range1.start()) {
6387         AddError(result->full_name(), proto.reserved_range(i),
6388                  DescriptorPool::ErrorCollector::NUMBER, [&] {
6389                    return absl::Substitute(
6390                        "Reserved range $0 to $1 overlaps with "
6391                        "already-defined range $2 to $3.",
6392                        range2.start(), range2.end() - 1, range1.start(),
6393                        range1.end() - 1);
6394                  });
6395       }
6396     }
6397   }
6398 
6399   absl::flat_hash_set<absl::string_view> reserved_name_set;
6400   for (const std::string& name : proto.reserved_name()) {
6401     if (!reserved_name_set.insert(name).second) {
6402       AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
6403         return absl::Substitute("Field name \"$0\" is reserved multiple times.",
6404                                 name);
6405       });
6406     }
6407   }
6408   // Check that fields aren't using reserved names or numbers and that they
6409   // aren't using extension numbers.
6410   for (int i = 0; i < result->field_count(); i++) {
6411     const FieldDescriptor* field = result->field(i);
6412     for (int j = 0; j < result->extension_range_count(); j++) {
6413       const Descriptor::ExtensionRange* range = result->extension_range(j);
6414       if (range->start_number() <= field->number() &&
6415           field->number() < range->end_number()) {
6416         message_hints_[result].RequestHintOnFieldNumbers(
6417             proto.extension_range(j), DescriptorPool::ErrorCollector::NUMBER);
6418         AddError(field->full_name(), proto.extension_range(j),
6419                  DescriptorPool::ErrorCollector::NUMBER, [&] {
6420                    return absl::Substitute(
6421                        "Extension range $0 to $1 includes field \"$2\" ($3).",
6422                        range->start_number(), range->end_number() - 1,
6423                        field->name(), field->number());
6424                  });
6425       }
6426     }
6427     for (int j = 0; j < result->reserved_range_count(); j++) {
6428       const Descriptor::ReservedRange* range = result->reserved_range(j);
6429       if (range->start <= field->number() && field->number() < range->end) {
6430         message_hints_[result].RequestHintOnFieldNumbers(
6431             proto.reserved_range(j), DescriptorPool::ErrorCollector::NUMBER);
6432         AddError(field->full_name(), proto.reserved_range(j),
6433                  DescriptorPool::ErrorCollector::NUMBER, [&] {
6434                    return absl::Substitute(
6435                        "Field \"$0\" uses reserved number $1.", field->name(),
6436                        field->number());
6437                  });
6438       }
6439     }
6440     if (reserved_name_set.contains(field->name())) {
6441       AddError(field->full_name(), proto.field(i),
6442                DescriptorPool::ErrorCollector::NAME, [&] {
6443                  return absl::Substitute("Field name \"$0\" is reserved.",
6444                                          field->name());
6445                });
6446     }
6447   }
6448 
6449   // Check that extension ranges don't overlap and don't include
6450   // reserved field numbers or names.
6451   for (int i = 0; i < result->extension_range_count(); i++) {
6452     const Descriptor::ExtensionRange* range1 = result->extension_range(i);
6453     for (int j = 0; j < result->reserved_range_count(); j++) {
6454       const Descriptor::ReservedRange* range2 = result->reserved_range(j);
6455       if (range1->end_number() > range2->start &&
6456           range2->end > range1->start_number()) {
6457         AddError(result->full_name(), proto.extension_range(i),
6458                  DescriptorPool::ErrorCollector::NUMBER, [&] {
6459                    return absl::Substitute(
6460                        "Extension range $0 to $1 overlaps with "
6461                        "reserved range $2 to $3.",
6462                        range1->start_number(), range1->end_number() - 1,
6463                        range2->start, range2->end - 1);
6464                  });
6465       }
6466     }
6467     for (int j = i + 1; j < result->extension_range_count(); j++) {
6468       const Descriptor::ExtensionRange* range2 = result->extension_range(j);
6469       if (range1->end_number() > range2->start_number() &&
6470           range2->end_number() > range1->start_number()) {
6471         AddError(result->full_name(), proto.extension_range(i),
6472                  DescriptorPool::ErrorCollector::NUMBER, [&] {
6473                    return absl::Substitute(
6474                        "Extension range $0 to $1 overlaps with "
6475                        "already-defined range $2 to $3.",
6476                        range2->start_number(), range2->end_number() - 1,
6477                        range1->start_number(), range1->end_number() - 1);
6478                  });
6479       }
6480     }
6481   }
6482 }
6483 
CheckFieldJsonNameUniqueness(const DescriptorProto & proto,const Descriptor * result)6484 void DescriptorBuilder::CheckFieldJsonNameUniqueness(
6485     const DescriptorProto& proto, const Descriptor* result) {
6486   const absl::string_view message_name = result->full_name();
6487   if (!pool_->deprecated_legacy_json_field_conflicts_ &&
6488       !IsLegacyJsonFieldConflictEnabled(result->options())) {
6489     // Check both with and without taking json_name into consideration.  This is
6490     // needed for field masks, which don't use json_name.
6491     CheckFieldJsonNameUniqueness(message_name, proto, result, false);
6492     CheckFieldJsonNameUniqueness(message_name, proto, result, true);
6493   }
6494 }
6495 
6496 namespace {
6497 // Helpers for function below
6498 
6499 struct JsonNameDetails {
6500   const FieldDescriptorProto* field;
6501   std::string orig_name;
6502   bool is_custom;
6503 };
6504 
GetJsonNameDetails(const FieldDescriptorProto * field,bool use_custom)6505 JsonNameDetails GetJsonNameDetails(const FieldDescriptorProto* field,
6506                                    bool use_custom) {
6507   std::string default_json_name = ToJsonName(field->name());
6508   if (use_custom && field->has_json_name() &&
6509       field->json_name() != default_json_name) {
6510     return {field, field->json_name(), true};
6511   }
6512   return {field, std::move(default_json_name), false};
6513 }
6514 
JsonNameLooksLikeExtension(std::string name)6515 bool JsonNameLooksLikeExtension(std::string name) {
6516   return !name.empty() && name.front() == '[' && name.back() == ']';
6517 }
6518 
6519 }  // namespace
6520 
CheckFieldJsonNameUniqueness(const absl::string_view message_name,const DescriptorProto & message,const Descriptor * descriptor,bool use_custom_names)6521 void DescriptorBuilder::CheckFieldJsonNameUniqueness(
6522     const absl::string_view message_name, const DescriptorProto& message,
6523     const Descriptor* descriptor, bool use_custom_names) {
6524   absl::flat_hash_map<std::string, JsonNameDetails> name_to_field;
6525   for (const FieldDescriptorProto& field : message.field()) {
6526     JsonNameDetails details = GetJsonNameDetails(&field, use_custom_names);
6527     if (details.is_custom && JsonNameLooksLikeExtension(details.orig_name)) {
6528       auto make_error = [&] {
6529         return absl::StrFormat(
6530             "The custom JSON name of field \"%s\" (\"%s\") is invalid: "
6531             "JSON names may not start with '[' and end with ']'.",
6532             field.name(), details.orig_name);
6533       };
6534       AddError(message_name, field, DescriptorPool::ErrorCollector::NAME,
6535                make_error);
6536       continue;
6537     }
6538     auto it_inserted = name_to_field.try_emplace(details.orig_name, details);
6539     if (it_inserted.second) {
6540       continue;
6541     }
6542     JsonNameDetails& match = it_inserted.first->second;
6543     if (use_custom_names && !details.is_custom && !match.is_custom) {
6544       // if this pass is considering custom JSON names, but neither of the
6545       // names involved in the conflict are custom, don't bother with a
6546       // message. That will have been reported from other pass (non-custom
6547       // JSON names).
6548       continue;
6549     }
6550     auto make_error = [&] {
6551       absl::string_view this_type = details.is_custom ? "custom" : "default";
6552       absl::string_view existing_type = match.is_custom ? "custom" : "default";
6553       // If the matched name differs (which it can only differ in case), include
6554       // it in the error message, for maximum clarity to user.
6555       std::string name_suffix = "";
6556       if (details.orig_name != match.orig_name) {
6557         name_suffix = absl::StrCat(" (\"", match.orig_name, "\")");
6558       }
6559       return absl::StrFormat(
6560           "The %s JSON name of field \"%s\" (\"%s\") conflicts "
6561           "with the %s JSON name of field \"%s\"%s.",
6562           this_type, field.name(), details.orig_name, existing_type,
6563           match.field->name(), name_suffix);
6564     };
6565 
6566     bool involves_default = !details.is_custom || !match.is_custom;
6567     if (descriptor->features().json_format() ==
6568             FeatureSet::LEGACY_BEST_EFFORT &&
6569         involves_default) {
6570       // TODO Upgrade this to an error once downstream protos
6571       // have been fixed.
6572       AddWarning(message_name, field, DescriptorPool::ErrorCollector::NAME,
6573                  make_error);
6574     } else {
6575       AddError(message_name, field, DescriptorPool::ErrorCollector::NAME,
6576                make_error);
6577     }
6578   }
6579 }
6580 
BuildFieldOrExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,bool is_extension,internal::FlatAllocator & alloc)6581 void DescriptorBuilder::BuildFieldOrExtension(const FieldDescriptorProto& proto,
6582                                               Descriptor* parent,
6583                                               FieldDescriptor* result,
6584                                               bool is_extension,
6585                                               internal::FlatAllocator& alloc) {
6586   const absl::string_view scope =
6587       (parent == nullptr) ? file_->package() : parent->full_name();
6588 
6589   // We allocate all names in a single array, and dedup them.
6590   // We remember the indices for the potentially deduped values.
6591   auto all_names = alloc.AllocateFieldNames(
6592       proto.name(), scope,
6593       proto.has_json_name() ? &proto.json_name() : nullptr);
6594   result->all_names_ = all_names.array;
6595   result->lowercase_name_index_ = all_names.lowercase_index;
6596   result->camelcase_name_index_ = all_names.camelcase_index;
6597   result->json_name_index_ = all_names.json_index;
6598 
6599   ValidateSymbolName(proto.name(), result->full_name(), proto);
6600 
6601   result->file_ = file_;
6602   result->number_ = proto.number();
6603   result->is_extension_ = is_extension;
6604   result->is_oneof_ = false;
6605   result->in_real_oneof_ = false;
6606   result->proto3_optional_ = proto.proto3_optional();
6607 
6608   if (proto.proto3_optional() && file_->edition() != Edition::EDITION_PROTO3) {
6609     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6610              [&] {
6611                return absl::StrCat(
6612                    "The [proto3_optional=true] option may only be set on proto3"
6613                    "fields, not ",
6614                    result->full_name());
6615              });
6616   }
6617 
6618   result->has_json_name_ = proto.has_json_name();
6619 
6620   result->type_ = proto.type();
6621   result->label_ = proto.label();
6622   result->is_repeated_ = result->label_ == FieldDescriptor::LABEL_REPEATED;
6623 
6624   if (result->label() == FieldDescriptor::LABEL_REQUIRED) {
6625     // An extension cannot have a required field (b/13365836).
6626     if (result->is_extension_) {
6627       AddError(result->full_name(), proto,
6628                // Error location `TYPE`: we would really like to indicate
6629                // `LABEL`, but the `ErrorLocation` enum has no entry for this,
6630                // and we don't necessarily know about all implementations of the
6631                // `ErrorCollector` interface to extend them to handle the new
6632                // error location type properly.
6633                DescriptorPool::ErrorCollector::TYPE, [&] {
6634                  return absl::StrCat("The extension ", result->full_name(),
6635                                      " cannot be required.");
6636                });
6637     }
6638   }
6639 
6640   // Some of these may be filled in when cross-linking.
6641   result->containing_type_ = nullptr;
6642   result->type_once_ = nullptr;
6643   result->default_value_enum_ = nullptr;
6644 
6645   result->has_default_value_ = proto.has_default_value();
6646   if (proto.has_default_value() && result->is_repeated()) {
6647     AddError(result->full_name(), proto,
6648              DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6649              "Repeated fields can't have default values.");
6650   }
6651 
6652   if (proto.has_type()) {
6653     if (proto.has_default_value()) {
6654       char* end_pos = nullptr;
6655       switch (result->cpp_type()) {
6656         case FieldDescriptor::CPPTYPE_INT32:
6657           result->default_value_int32_t_ =
6658               std::strtol(proto.default_value().c_str(), &end_pos, 0);
6659           break;
6660         case FieldDescriptor::CPPTYPE_INT64:
6661           static_assert(sizeof(int64_t) == sizeof(long long),  // NOLINT
6662                         "sizeof int64_t is not sizeof long long");
6663           result->default_value_int64_t_ =
6664               std::strtoll(proto.default_value().c_str(), &end_pos, 0);
6665           break;
6666         case FieldDescriptor::CPPTYPE_UINT32:
6667           result->default_value_uint32_t_ =
6668               std::strtoul(proto.default_value().c_str(), &end_pos, 0);
6669           break;
6670         case FieldDescriptor::CPPTYPE_UINT64:
6671           static_assert(
6672               sizeof(uint64_t) == sizeof(unsigned long long),  // NOLINT
6673               "sizeof uint64_t is not sizeof unsigned long long");
6674           result->default_value_uint64_t_ =
6675               std::strtoull(proto.default_value().c_str(), &end_pos, 0);
6676           break;
6677         case FieldDescriptor::CPPTYPE_FLOAT:
6678           if (proto.default_value() == "inf") {
6679             result->default_value_float_ =
6680                 std::numeric_limits<float>::infinity();
6681           } else if (proto.default_value() == "-inf") {
6682             result->default_value_float_ =
6683                 -std::numeric_limits<float>::infinity();
6684           } else if (proto.default_value() == "nan") {
6685             result->default_value_float_ =
6686                 std::numeric_limits<float>::quiet_NaN();
6687           } else {
6688             result->default_value_float_ = io::SafeDoubleToFloat(
6689                 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos));
6690           }
6691           break;
6692         case FieldDescriptor::CPPTYPE_DOUBLE:
6693           if (proto.default_value() == "inf") {
6694             result->default_value_double_ =
6695                 std::numeric_limits<double>::infinity();
6696           } else if (proto.default_value() == "-inf") {
6697             result->default_value_double_ =
6698                 -std::numeric_limits<double>::infinity();
6699           } else if (proto.default_value() == "nan") {
6700             result->default_value_double_ =
6701                 std::numeric_limits<double>::quiet_NaN();
6702           } else {
6703             result->default_value_double_ =
6704                 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos);
6705           }
6706           break;
6707         case FieldDescriptor::CPPTYPE_BOOL:
6708           if (proto.default_value() == "true") {
6709             result->default_value_bool_ = true;
6710           } else if (proto.default_value() == "false") {
6711             result->default_value_bool_ = false;
6712           } else {
6713             AddError(result->full_name(), proto,
6714                      DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6715                      "Boolean default must be true or false.");
6716           }
6717           break;
6718         case FieldDescriptor::CPPTYPE_ENUM:
6719           // This will be filled in when cross-linking.
6720           result->default_value_enum_ = nullptr;
6721           break;
6722         case FieldDescriptor::CPPTYPE_STRING:
6723           if (result->type() == FieldDescriptor::TYPE_BYTES) {
6724             std::string value;
6725             if (absl::CUnescape(proto.default_value(), &value)) {
6726               result->default_value_string_ = alloc.AllocateStrings(value);
6727             } else {
6728               AddError(result->full_name(), proto,
6729                        DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6730                        "Invalid escaping in default value.");
6731             }
6732           } else {
6733             result->default_value_string_ =
6734                 alloc.AllocateStrings(proto.default_value());
6735           }
6736           break;
6737         case FieldDescriptor::CPPTYPE_MESSAGE:
6738           AddError(result->full_name(), proto,
6739                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6740                    "Messages can't have default values.");
6741           result->has_default_value_ = false;
6742           result->default_generated_instance_ = nullptr;
6743           break;
6744       }
6745 
6746       if (end_pos != nullptr) {
6747         // end_pos is only set non-null by the parsers for numeric types,
6748         // above. This checks that the default was non-empty and had no extra
6749         // junk after the end of the number.
6750         if (proto.default_value().empty() || *end_pos != '\0') {
6751           AddError(result->full_name(), proto,
6752                    DescriptorPool::ErrorCollector::DEFAULT_VALUE, [&] {
6753                      return absl::StrCat("Couldn't parse default value \"",
6754                                          proto.default_value(), "\".");
6755                    });
6756         }
6757       }
6758     } else {
6759       // No explicit default value
6760       switch (result->cpp_type()) {
6761         case FieldDescriptor::CPPTYPE_INT32:
6762           result->default_value_int32_t_ = 0;
6763           break;
6764         case FieldDescriptor::CPPTYPE_INT64:
6765           result->default_value_int64_t_ = 0;
6766           break;
6767         case FieldDescriptor::CPPTYPE_UINT32:
6768           result->default_value_uint32_t_ = 0;
6769           break;
6770         case FieldDescriptor::CPPTYPE_UINT64:
6771           result->default_value_uint64_t_ = 0;
6772           break;
6773         case FieldDescriptor::CPPTYPE_FLOAT:
6774           result->default_value_float_ = 0.0f;
6775           break;
6776         case FieldDescriptor::CPPTYPE_DOUBLE:
6777           result->default_value_double_ = 0.0;
6778           break;
6779         case FieldDescriptor::CPPTYPE_BOOL:
6780           result->default_value_bool_ = false;
6781           break;
6782         case FieldDescriptor::CPPTYPE_ENUM:
6783           // This will be filled in when cross-linking.
6784           result->default_value_enum_ = nullptr;
6785           break;
6786         case FieldDescriptor::CPPTYPE_STRING:
6787           result->default_value_string_ = &internal::GetEmptyString();
6788           break;
6789         case FieldDescriptor::CPPTYPE_MESSAGE:
6790           result->default_generated_instance_ = nullptr;
6791           break;
6792       }
6793     }
6794   }
6795 
6796   if (result->number() <= 0) {
6797     message_hints_[parent].RequestHintOnFieldNumbers(
6798         proto, DescriptorPool::ErrorCollector::NUMBER);
6799     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6800              "Field numbers must be positive integers.");
6801   } else if (!is_extension && result->number() > FieldDescriptor::kMaxNumber) {
6802     // Only validate that the number is within the valid field range if it is
6803     // not an extension. Since extension numbers are validated with the
6804     // extendee's valid set of extension numbers, and those are in turn
6805     // validated against the max allowed number, the check is unnecessary for
6806     // extension fields.
6807     // This avoids cross-linking issues that arise when attempting to check if
6808     // the extendee is a message_set_wire_format message, which has a higher max
6809     // on extension numbers.
6810     message_hints_[parent].RequestHintOnFieldNumbers(
6811         proto, DescriptorPool::ErrorCollector::NUMBER);
6812     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6813              [&] {
6814                return absl::Substitute(
6815                    "Field numbers cannot be greater than $0.",
6816                    FieldDescriptor::kMaxNumber);
6817              });
6818   }
6819 
6820   if (is_extension) {
6821     if (!proto.has_extendee()) {
6822       AddError(result->full_name(), proto,
6823                DescriptorPool::ErrorCollector::EXTENDEE,
6824                "FieldDescriptorProto.extendee not set for extension field.");
6825     }
6826 
6827     result->scope_.extension_scope = parent;
6828 
6829     if (proto.has_oneof_index()) {
6830       AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6831                "FieldDescriptorProto.oneof_index should not be set for "
6832                "extensions.");
6833     }
6834   } else {
6835     if (proto.has_extendee()) {
6836       AddError(result->full_name(), proto,
6837                DescriptorPool::ErrorCollector::EXTENDEE,
6838                "FieldDescriptorProto.extendee set for non-extension field.");
6839     }
6840 
6841     result->containing_type_ = parent;
6842 
6843     if (proto.has_oneof_index()) {
6844       if (proto.oneof_index() < 0 ||
6845           proto.oneof_index() >= parent->oneof_decl_count()) {
6846         AddError(result->full_name(), proto,
6847                  DescriptorPool::ErrorCollector::TYPE, [&] {
6848                    return absl::Substitute(
6849                        "FieldDescriptorProto.oneof_index $0 is "
6850                        "out of range for type \"$1\".",
6851                        proto.oneof_index(), parent->name());
6852                  });
6853       } else {
6854         result->is_oneof_ = true;
6855         result->scope_.containing_oneof =
6856             parent->oneof_decl(proto.oneof_index());
6857         result->in_real_oneof_ = !result->proto3_optional_;
6858       }
6859     }
6860   }
6861 
6862   // Copy options.
6863   AllocateOptions(proto, result, FieldDescriptorProto::kOptionsFieldNumber,
6864                   "google.protobuf.FieldOptions", alloc);
6865 
6866   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6867 }
6868 
BuildExtensionRange(const DescriptorProto::ExtensionRange & proto,const Descriptor * parent,Descriptor::ExtensionRange * result,internal::FlatAllocator & alloc)6869 void DescriptorBuilder::BuildExtensionRange(
6870     const DescriptorProto::ExtensionRange& proto, const Descriptor* parent,
6871     Descriptor::ExtensionRange* result, internal::FlatAllocator& alloc) {
6872   result->start_ = proto.start();
6873   result->end_ = proto.end();
6874   result->containing_type_ = parent;
6875 
6876   if (result->start_number() <= 0) {
6877     message_hints_[parent].RequestHintOnFieldNumbers(
6878         proto, DescriptorPool::ErrorCollector::NUMBER, result->start_number(),
6879         result->end_number());
6880     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6881              "Extension numbers must be positive integers.");
6882   }
6883 
6884   // Checking of the upper bound of the extension range is deferred until after
6885   // options interpreting. This allows messages with message_set_wire_format to
6886   // have extensions beyond FieldDescriptor::kMaxNumber, since the extension
6887   // numbers are actually used as int32s in the message_set_wire_format.
6888 
6889   if (result->start_number() >= result->end_number()) {
6890     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6891              "Extension range end number must be greater than start number.");
6892   }
6893 
6894   // Copy options
6895   AllocateOptions(proto, result,
6896                   DescriptorProto_ExtensionRange::kOptionsFieldNumber,
6897                   "google.protobuf.ExtensionRangeOptions", alloc);
6898 }
6899 
BuildReservedRange(const DescriptorProto::ReservedRange & proto,const Descriptor * parent,Descriptor::ReservedRange * result,internal::FlatAllocator &)6900 void DescriptorBuilder::BuildReservedRange(
6901     const DescriptorProto::ReservedRange& proto, const Descriptor* parent,
6902     Descriptor::ReservedRange* result, internal::FlatAllocator&) {
6903   result->start = proto.start();
6904   result->end = proto.end();
6905   if (result->start <= 0) {
6906     message_hints_[parent].RequestHintOnFieldNumbers(
6907         proto, DescriptorPool::ErrorCollector::NUMBER, result->start,
6908         result->end);
6909     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6910              "Reserved numbers must be positive integers.");
6911   }
6912   if (result->start >= result->end) {
6913     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6914              "Reserved range end number must be greater than start number.");
6915   }
6916 }
6917 
BuildReservedRange(const EnumDescriptorProto::EnumReservedRange & proto,const EnumDescriptor * parent,EnumDescriptor::ReservedRange * result,internal::FlatAllocator &)6918 void DescriptorBuilder::BuildReservedRange(
6919     const EnumDescriptorProto::EnumReservedRange& proto,
6920     const EnumDescriptor* parent, EnumDescriptor::ReservedRange* result,
6921     internal::FlatAllocator&) {
6922   result->start = proto.start();
6923   result->end = proto.end();
6924 
6925   if (result->start > result->end) {
6926     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
6927              "Reserved range end number must be greater than start number.");
6928   }
6929 }
6930 
BuildOneof(const OneofDescriptorProto & proto,Descriptor * parent,OneofDescriptor * result,internal::FlatAllocator & alloc)6931 void DescriptorBuilder::BuildOneof(const OneofDescriptorProto& proto,
6932                                    Descriptor* parent, OneofDescriptor* result,
6933                                    internal::FlatAllocator& alloc) {
6934   result->all_names_ =
6935       AllocateNameStrings(parent->full_name(), proto.name(), alloc);
6936   ValidateSymbolName(proto.name(), result->full_name(), proto);
6937 
6938   result->containing_type_ = parent;
6939 
6940   // We need to fill these in later.
6941   result->field_count_ = 0;
6942   result->fields_ = nullptr;
6943 
6944   // Copy options.
6945   AllocateOptions(proto, result, OneofDescriptorProto::kOptionsFieldNumber,
6946                   "google.protobuf.OneofOptions", alloc);
6947 
6948   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6949 }
6950 
CheckEnumValueUniqueness(const EnumDescriptorProto & proto,const EnumDescriptor * result)6951 void DescriptorBuilder::CheckEnumValueUniqueness(
6952     const EnumDescriptorProto& proto, const EnumDescriptor* result) {
6953 
6954   // Check that enum labels are still unique when we remove the enum prefix from
6955   // values that have it.
6956   //
6957   // This will fail for something like:
6958   //
6959   //   enum MyEnum {
6960   //     MY_ENUM_FOO = 0;
6961   //     FOO = 1;
6962   //   }
6963   //
6964   // By enforcing this reasonable constraint, we allow code generators to strip
6965   // the prefix and/or PascalCase it without creating conflicts.  This can lead
6966   // to much nicer language-specific enums like:
6967   //
6968   //   enum NameType {
6969   //     FirstName = 1,
6970   //     LastName = 2,
6971   //   }
6972   //
6973   // Instead of:
6974   //
6975   //   enum NameType {
6976   //     NAME_TYPE_FIRST_NAME = 1,
6977   //     NAME_TYPE_LAST_NAME = 2,
6978   //   }
6979   PrefixRemover remover(result->name());
6980   absl::flat_hash_map<std::string, const EnumValueDescriptor*> values;
6981   for (int i = 0; i < result->value_count(); i++) {
6982     const EnumValueDescriptor* value = result->value(i);
6983     std::string stripped =
6984         EnumValueToPascalCase(remover.MaybeRemove(value->name()));
6985     auto insert_result = values.try_emplace(stripped, value);
6986     bool inserted = insert_result.second;
6987 
6988     // We don't throw the error if the two conflicting symbols are identical, or
6989     // if they map to the same number.  In the former case, the normal symbol
6990     // duplication error will fire so we don't need to (and its error message
6991     // will make more sense). We allow the latter case so users can create
6992     // aliases which add or remove the prefix (code generators that do prefix
6993     // stripping should de-dup the labels in this case).
6994     if (!inserted && insert_result.first->second->name() != value->name() &&
6995         insert_result.first->second->number() != value->number()) {
6996       auto make_error = [&] {
6997         return absl::StrFormat(
6998             "Enum name %s has the same name as %s if you ignore case and strip "
6999             "out the enum name prefix (if any). (If you are using allow_alias, "
7000             "please assign the same number to each enum value name.)",
7001             value->name(), insert_result.first->second->name());
7002       };
7003       // There are proto2 enums out there with conflicting names, so to preserve
7004       // compatibility we issue only a warning for proto2.
7005       if ((pool_->deprecated_legacy_json_field_conflicts_ ||
7006            IsLegacyJsonFieldConflictEnabled(result->options())) &&
7007           result->file()->edition() == Edition::EDITION_PROTO2) {
7008         AddWarning(value->full_name(), proto.value(i),
7009                    DescriptorPool::ErrorCollector::NAME, make_error);
7010         continue;
7011       }
7012       AddError(value->full_name(), proto.value(i),
7013                DescriptorPool::ErrorCollector::NAME, make_error);
7014     }
7015   }
7016 }
7017 
BuildEnum(const EnumDescriptorProto & proto,const Descriptor * parent,EnumDescriptor * result,internal::FlatAllocator & alloc)7018 void DescriptorBuilder::BuildEnum(const EnumDescriptorProto& proto,
7019                                   const Descriptor* parent,
7020                                   EnumDescriptor* result,
7021                                   internal::FlatAllocator& alloc) {
7022   const absl::string_view scope =
7023       (parent == nullptr) ? file_->package() : parent->full_name();
7024 
7025   result->all_names_ = AllocateNameStrings(scope, proto.name(), alloc);
7026   ValidateSymbolName(proto.name(), result->full_name(), proto);
7027   result->file_ = file_;
7028   result->containing_type_ = parent;
7029   result->is_placeholder_ = false;
7030   result->is_unqualified_placeholder_ = false;
7031 
7032   if (proto.value_size() == 0) {
7033     // We cannot allow enums with no values because this would mean there
7034     // would be no valid default value for fields of this type.
7035     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7036              "Enums must contain at least one value.");
7037   }
7038 
7039   // Calculate the continuous sequence of the labels.
7040   // These can be fast-path'd during lookup and don't need to be added to the
7041   // tables.
7042   // We use uint16_t to save space for sequential_value_limit_, so stop before
7043   // overflowing it. Worst case, we are not taking full advantage on huge
7044   // enums, but it is unlikely.
7045   for (int i = 0;
7046        i < std::numeric_limits<uint16_t>::max() && i < proto.value_size() &&
7047        // We do the math in int64_t to avoid overflows.
7048        proto.value(i).number() ==
7049            static_cast<int64_t>(i) + proto.value(0).number();
7050        ++i) {
7051     result->sequential_value_limit_ = i;
7052   }
7053 
7054   BUILD_ARRAY(proto, result, value, BuildEnumValue, result);
7055   BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
7056 
7057   // Copy reserved names.
7058   int reserved_name_count = proto.reserved_name_size();
7059   result->reserved_name_count_ = reserved_name_count;
7060   result->reserved_names_ =
7061       alloc.AllocateArray<const std::string*>(reserved_name_count);
7062   for (int i = 0; i < reserved_name_count; ++i) {
7063     result->reserved_names_[i] = alloc.AllocateStrings(proto.reserved_name(i));
7064   }
7065 
7066   // Copy options.
7067   AllocateOptions(proto, result, EnumDescriptorProto::kOptionsFieldNumber,
7068                   "google.protobuf.EnumOptions", alloc);
7069 
7070   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
7071 
7072   for (int i = 0; i < proto.reserved_range_size(); i++) {
7073     const EnumDescriptorProto_EnumReservedRange& range1 =
7074         proto.reserved_range(i);
7075     for (int j = i + 1; j < proto.reserved_range_size(); j++) {
7076       const EnumDescriptorProto_EnumReservedRange& range2 =
7077           proto.reserved_range(j);
7078       if (range1.end() >= range2.start() && range2.end() >= range1.start()) {
7079         AddError(result->full_name(), proto.reserved_range(i),
7080                  DescriptorPool::ErrorCollector::NUMBER, [&] {
7081                    return absl::Substitute(
7082                        "Reserved range $0 to $1 overlaps with "
7083                        "already-defined range $2 to $3.",
7084                        range2.start(), range2.end(), range1.start(),
7085                        range1.end());
7086                  });
7087       }
7088     }
7089   }
7090 
7091   absl::flat_hash_set<absl::string_view> reserved_name_set;
7092   for (const std::string& name : proto.reserved_name()) {
7093     if (!reserved_name_set.insert(name).second) {
7094       AddError(name, proto, DescriptorPool::ErrorCollector::NAME, [&] {
7095         return absl::Substitute("Enum value \"$0\" is reserved multiple times.",
7096                                 name);
7097       });
7098     }
7099   }
7100 
7101   for (int i = 0; i < result->value_count(); i++) {
7102     const EnumValueDescriptor* value = result->value(i);
7103     for (int j = 0; j < result->reserved_range_count(); j++) {
7104       const EnumDescriptor::ReservedRange* range = result->reserved_range(j);
7105       if (range->start <= value->number() && value->number() <= range->end) {
7106         AddError(value->full_name(), proto.reserved_range(j),
7107                  DescriptorPool::ErrorCollector::NUMBER, [&] {
7108                    return absl::Substitute(
7109                        "Enum value \"$0\" uses reserved number $1.",
7110                        value->name(), value->number());
7111                  });
7112       }
7113     }
7114     if (reserved_name_set.contains(value->name())) {
7115       AddError(value->full_name(), proto.value(i),
7116                DescriptorPool::ErrorCollector::NAME, [&] {
7117                  return absl::Substitute("Enum value \"$0\" is reserved.",
7118                                          value->name());
7119                });
7120     }
7121   }
7122 }
7123 
BuildEnumValue(const EnumValueDescriptorProto & proto,const EnumDescriptor * parent,EnumValueDescriptor * result,internal::FlatAllocator & alloc)7124 void DescriptorBuilder::BuildEnumValue(const EnumValueDescriptorProto& proto,
7125                                        const EnumDescriptor* parent,
7126                                        EnumValueDescriptor* result,
7127                                        internal::FlatAllocator& alloc) {
7128   // Note:  full_name for enum values is a sibling to the parent's name, not a
7129   //   child of it.
7130   std::string full_name;
7131   size_t scope_len = parent->full_name().size() - parent->name().size();
7132   full_name.reserve(scope_len + proto.name().size());
7133   full_name.append(parent->full_name().data(), scope_len);
7134   full_name.append(proto.name());
7135 
7136   result->all_names_ =
7137       alloc.AllocateStrings(proto.name(), std::move(full_name));
7138   result->number_ = proto.number();
7139   result->type_ = parent;
7140 
7141   ValidateSymbolName(proto.name(), result->full_name(), proto);
7142 
7143   // Copy options.
7144   AllocateOptions(proto, result, EnumValueDescriptorProto::kOptionsFieldNumber,
7145                   "google.protobuf.EnumValueOptions", alloc);
7146 
7147   // Again, enum values are weird because we makes them appear as siblings
7148   // of the enum type instead of children of it.  So, we use
7149   // parent->containing_type() as the value's parent.
7150   bool added_to_outer_scope =
7151       AddSymbol(result->full_name(), parent->containing_type(), result->name(),
7152                 proto, Symbol::EnumValue(result, 0));
7153 
7154   // However, we also want to be able to search for values within a single
7155   // enum type, so we add it as a child of the enum type itself, too.
7156   // Note:  This could fail, but if it does, the error has already been
7157   //   reported by the above AddSymbol() call, so we ignore the return code.
7158   bool added_to_inner_scope = file_tables_->AddAliasUnderParent(
7159       parent, result->name(), Symbol::EnumValue(result, 1));
7160 
7161   if (added_to_inner_scope && !added_to_outer_scope) {
7162     // This value did not conflict with any values defined in the same enum,
7163     // but it did conflict with some other symbol defined in the enum type's
7164     // scope.  Let's print an additional error to explain this.
7165     std::string outer_scope;
7166     if (parent->containing_type() == nullptr) {
7167       outer_scope = file_->package();
7168     } else {
7169       outer_scope = parent->containing_type()->full_name();
7170     }
7171 
7172     if (outer_scope.empty()) {
7173       outer_scope = "the global scope";
7174     } else {
7175       outer_scope = absl::StrCat("\"", outer_scope, "\"");
7176     }
7177 
7178     AddError(
7179         result->full_name(), proto, DescriptorPool::ErrorCollector::NAME, [&] {
7180           return absl::StrCat(
7181               "Note that enum values use C++ scoping rules, meaning that "
7182               "enum values are siblings of their type, not children of it.  "
7183               "Therefore, \"",
7184               result->name(), "\" must be unique within ", outer_scope,
7185               ", not just within \"", parent->name(), "\".");
7186         });
7187   }
7188 
7189   // An enum is allowed to define two numbers that refer to the same value.
7190   // FindValueByNumber() should return the first such value, so we simply
7191   // ignore AddEnumValueByNumber()'s return code.
7192   file_tables_->AddEnumValueByNumber(result);
7193 }
7194 
BuildService(const ServiceDescriptorProto & proto,const void *,ServiceDescriptor * result,internal::FlatAllocator & alloc)7195 void DescriptorBuilder::BuildService(const ServiceDescriptorProto& proto,
7196                                      const void* /* dummy */,
7197                                      ServiceDescriptor* result,
7198                                      internal::FlatAllocator& alloc) {
7199   result->all_names_ =
7200       AllocateNameStrings(file_->package(), proto.name(), alloc);
7201   result->file_ = file_;
7202   ValidateSymbolName(proto.name(), result->full_name(), proto);
7203 
7204   BUILD_ARRAY(proto, result, method, BuildMethod, result);
7205 
7206   // Copy options.
7207   AllocateOptions(proto, result, ServiceDescriptorProto::kOptionsFieldNumber,
7208                   "google.protobuf.ServiceOptions", alloc);
7209 
7210   AddSymbol(result->full_name(), nullptr, result->name(), proto,
7211             Symbol(result));
7212 }
7213 
BuildMethod(const MethodDescriptorProto & proto,const ServiceDescriptor * parent,MethodDescriptor * result,internal::FlatAllocator & alloc)7214 void DescriptorBuilder::BuildMethod(const MethodDescriptorProto& proto,
7215                                     const ServiceDescriptor* parent,
7216                                     MethodDescriptor* result,
7217                                     internal::FlatAllocator& alloc) {
7218   result->service_ = parent;
7219   result->all_names_ =
7220       AllocateNameStrings(parent->full_name(), proto.name(), alloc);
7221 
7222   ValidateSymbolName(proto.name(), result->full_name(), proto);
7223 
7224   // These will be filled in when cross-linking.
7225   result->input_type_.Init();
7226   result->output_type_.Init();
7227 
7228   // Copy options.
7229   AllocateOptions(proto, result, MethodDescriptorProto::kOptionsFieldNumber,
7230                   "google.protobuf.MethodOptions", alloc);
7231 
7232   result->client_streaming_ = proto.client_streaming();
7233   result->server_streaming_ = proto.server_streaming();
7234 
7235   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
7236 }
7237 
7238 #undef BUILD_ARRAY
7239 
7240 // -------------------------------------------------------------------
7241 
CrossLinkFile(FileDescriptor * file,const FileDescriptorProto & proto)7242 void DescriptorBuilder::CrossLinkFile(FileDescriptor* file,
7243                                       const FileDescriptorProto& proto) {
7244   for (int i = 0; i < file->message_type_count(); i++) {
7245     CrossLinkMessage(&file->message_types_[i], proto.message_type(i));
7246   }
7247 
7248   for (int i = 0; i < file->extension_count(); i++) {
7249     CrossLinkField(&file->extensions_[i], proto.extension(i));
7250   }
7251 
7252   for (int i = 0; i < file->service_count(); i++) {
7253     CrossLinkService(&file->services_[i], proto.service(i));
7254   }
7255 }
7256 
CrossLinkMessage(Descriptor * message,const DescriptorProto & proto)7257 void DescriptorBuilder::CrossLinkMessage(Descriptor* message,
7258                                          const DescriptorProto& proto) {
7259   for (int i = 0; i < message->nested_type_count(); i++) {
7260     CrossLinkMessage(&message->nested_types_[i], proto.nested_type(i));
7261   }
7262 
7263   for (int i = 0; i < message->field_count(); i++) {
7264     CrossLinkField(&message->fields_[i], proto.field(i));
7265   }
7266 
7267   for (int i = 0; i < message->extension_count(); i++) {
7268     CrossLinkField(&message->extensions_[i], proto.extension(i));
7269   }
7270 
7271   // Set up field array for each oneof.
7272 
7273   // First count the number of fields per oneof.
7274   for (int i = 0; i < message->field_count(); i++) {
7275     const OneofDescriptor* oneof_decl = message->field(i)->containing_oneof();
7276     if (oneof_decl != nullptr) {
7277       // Make sure fields belonging to the same oneof are defined consecutively.
7278       // This enables optimizations in codegens and reflection libraries to
7279       // skip fields in the oneof group, as only one of the field can be set.
7280       // Note that field_count() returns how many fields in this oneof we have
7281       // seen so far. field_count() > 0 guarantees that i > 0, so field(i-1) is
7282       // safe.
7283       if (oneof_decl->field_count() > 0 &&
7284           message->field(i - 1)->containing_oneof() != oneof_decl) {
7285         AddError(
7286             absl::StrCat(message->full_name(), ".",
7287                          message->field(i - 1)->name()),
7288             proto.field(i - 1), DescriptorPool::ErrorCollector::TYPE, [&] {
7289               return absl::Substitute(
7290                   "Fields in the same oneof must be defined consecutively. "
7291                   "\"$0\" cannot be defined before the completion of the "
7292                   "\"$1\" oneof definition.",
7293                   message->field(i - 1)->name(), oneof_decl->name());
7294             });
7295       }
7296       // Must go through oneof_decls_ array to get a non-const version of the
7297       // OneofDescriptor.
7298       auto& out_oneof_decl = message->oneof_decls_[oneof_decl->index()];
7299       if (out_oneof_decl.field_count_ == 0) {
7300         out_oneof_decl.fields_ = message->field(i);
7301       }
7302 
7303       if (!had_errors_) {
7304         // Verify that they are contiguous.
7305         // This is assumed by OneofDescriptor::field(i).
7306         // But only if there are no errors.
7307         ABSL_CHECK_EQ(out_oneof_decl.fields_ + out_oneof_decl.field_count_,
7308                       message->field(i));
7309       }
7310       ++out_oneof_decl.field_count_;
7311     }
7312   }
7313 
7314   // Then verify the sizes.
7315   for (int i = 0; i < message->oneof_decl_count(); i++) {
7316     OneofDescriptor* oneof_decl = &message->oneof_decls_[i];
7317 
7318     if (oneof_decl->field_count() == 0) {
7319       AddError(absl::StrCat(message->full_name(), ".", oneof_decl->name()),
7320                proto.oneof_decl(i), DescriptorPool::ErrorCollector::NAME,
7321                "Oneof must have at least one field.");
7322     }
7323   }
7324 
7325   for (int i = 0; i < message->field_count(); i++) {
7326     const FieldDescriptor* field = message->field(i);
7327     if (field->proto3_optional_) {
7328       if (!field->containing_oneof() ||
7329           !field->containing_oneof()->is_synthetic()) {
7330         AddError(message->full_name(), proto.field(i),
7331                  DescriptorPool::ErrorCollector::OTHER,
7332                  "Fields with proto3_optional set must be "
7333                  "a member of a one-field oneof");
7334       }
7335     }
7336   }
7337 
7338   // Synthetic oneofs must be last.
7339   int first_synthetic = -1;
7340   for (int i = 0; i < message->oneof_decl_count(); i++) {
7341     if (message->oneof_decl(i)->is_synthetic()) {
7342       if (first_synthetic == -1) {
7343         first_synthetic = i;
7344       }
7345     } else {
7346       if (first_synthetic != -1) {
7347         AddError(message->full_name(), proto.oneof_decl(i),
7348                  DescriptorPool::ErrorCollector::OTHER,
7349                  "Synthetic oneofs must be after all other oneofs");
7350       }
7351     }
7352   }
7353 
7354   if (first_synthetic == -1) {
7355     message->real_oneof_decl_count_ = message->oneof_decl_count_;
7356   } else {
7357     message->real_oneof_decl_count_ = first_synthetic;
7358   }
7359 }
7360 
CheckExtensionDeclarationFieldType(const FieldDescriptor & field,const FieldDescriptorProto & proto,absl::string_view type)7361 void DescriptorBuilder::CheckExtensionDeclarationFieldType(
7362     const FieldDescriptor& field, const FieldDescriptorProto& proto,
7363     absl::string_view type) {
7364   if (had_errors_) return;
7365   std::string actual_type = field.type_name();
7366   std::string expected_type(type);
7367   if (field.message_type() || field.enum_type()) {
7368     // Field message type descriptor can be in a partial state which will cause
7369     // segmentation fault if it is being accessed.
7370     if (had_errors_) return;
7371     absl::string_view full_name = field.message_type() != nullptr
7372                                       ? field.message_type()->full_name()
7373                                       : field.enum_type()->full_name();
7374     actual_type = absl::StrCat(".", full_name);
7375   }
7376   if (!IsNonMessageType(type) && !absl::StartsWith(type, ".")) {
7377     expected_type = absl::StrCat(".", type);
7378   }
7379   if (expected_type != actual_type) {
7380     AddError(field.full_name(), proto, DescriptorPool::ErrorCollector::EXTENDEE,
7381              [&] {
7382                return absl::Substitute(
7383                    "\"$0\" extension field $1 is expected to be type "
7384                    "\"$2\", not \"$3\".",
7385                    field.containing_type()->full_name(), field.number(),
7386                    expected_type, actual_type);
7387              });
7388   }
7389 }
7390 
7391 
CheckExtensionDeclaration(const FieldDescriptor & field,const FieldDescriptorProto & proto,absl::string_view declared_full_name,absl::string_view declared_type_name,bool is_repeated)7392 void DescriptorBuilder::CheckExtensionDeclaration(
7393     const FieldDescriptor& field, const FieldDescriptorProto& proto,
7394     absl::string_view declared_full_name, absl::string_view declared_type_name,
7395     bool is_repeated) {
7396   if (!declared_type_name.empty()) {
7397     CheckExtensionDeclarationFieldType(field, proto, declared_type_name);
7398   }
7399   if (!declared_full_name.empty()) {
7400     std::string actual_full_name = absl::StrCat(".", field.full_name());
7401     if (declared_full_name != actual_full_name) {
7402       AddError(field.full_name(), proto,
7403                DescriptorPool::ErrorCollector::EXTENDEE, [&] {
7404                  return absl::Substitute(
7405                      "\"$0\" extension field $1 is expected to have field name "
7406                      "\"$2\", not \"$3\".",
7407                      field.containing_type()->full_name(), field.number(),
7408                      declared_full_name, actual_full_name);
7409                });
7410     }
7411   }
7412 
7413   if (is_repeated != field.is_repeated()) {
7414     AddError(field.full_name(), proto, DescriptorPool::ErrorCollector::EXTENDEE,
7415              [&] {
7416                return absl::Substitute(
7417                    "\"$0\" extension field $1 is expected to be $2.",
7418                    field.containing_type()->full_name(), field.number(),
7419                    is_repeated ? "repeated" : "optional");
7420              });
7421   }
7422 }
7423 
CrossLinkField(FieldDescriptor * field,const FieldDescriptorProto & proto)7424 void DescriptorBuilder::CrossLinkField(FieldDescriptor* field,
7425                                        const FieldDescriptorProto& proto) {
7426   if (proto.has_extendee()) {
7427     Symbol extendee =
7428         LookupSymbol(proto.extendee(), field->full_name(),
7429                      DescriptorPool::PLACEHOLDER_EXTENDABLE_MESSAGE);
7430     if (extendee.IsNull()) {
7431       AddNotDefinedError(field->full_name(), proto,
7432                          DescriptorPool::ErrorCollector::EXTENDEE,
7433                          proto.extendee());
7434       return;
7435     } else if (extendee.type() != Symbol::MESSAGE) {
7436       AddError(field->full_name(), proto,
7437                DescriptorPool::ErrorCollector::EXTENDEE, [&] {
7438                  return absl::StrCat("\"", proto.extendee(),
7439                                      "\" is not a message type.");
7440                });
7441       return;
7442     }
7443     field->containing_type_ = extendee.descriptor();
7444 
7445     const Descriptor::ExtensionRange* extension_range =
7446         field->containing_type()->FindExtensionRangeContainingNumber(
7447             field->number());
7448 
7449     if (extension_range == nullptr) {
7450       AddError(field->full_name(), proto,
7451                DescriptorPool::ErrorCollector::NUMBER, [&] {
7452                  return absl::Substitute(
7453                      "\"$0\" does not declare $1 as an "
7454                      "extension number.",
7455                      field->containing_type()->full_name(), field->number());
7456                });
7457     }
7458   }
7459 
7460   if (field->containing_oneof() != nullptr) {
7461     if (field->label() != FieldDescriptor::LABEL_OPTIONAL) {
7462       // Note that this error will never happen when parsing .proto files.
7463       // It can only happen if you manually construct a FileDescriptorProto
7464       // that is incorrect.
7465       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7466                "Fields of oneofs must themselves have label LABEL_OPTIONAL.");
7467     }
7468   }
7469 
7470   if (proto.has_type_name()) {
7471     // Assume we are expecting a message type unless the proto contains some
7472     // evidence that it expects an enum type.  This only makes a difference if
7473     // we end up creating a placeholder.
7474     bool expecting_enum = (proto.type() == FieldDescriptorProto::TYPE_ENUM) ||
7475                           proto.has_default_value();
7476 
7477     // In case of weak fields we force building the dependency. We need to know
7478     // if the type exist or not. If it doesn't exist we substitute Empty which
7479     // should only be done if the type can't be found in the generated pool.
7480     // TODO Ideally we should query the database directly to check
7481     // if weak fields exist or not so that we don't need to force building
7482     // weak dependencies. However the name lookup rules for symbols are
7483     // somewhat complicated, so I defer it too another CL.
7484     bool is_weak = !pool_->enforce_weak_ && proto.options().weak();
7485     bool is_lazy = pool_->lazily_build_dependencies_ && !is_weak;
7486 
7487     Symbol type =
7488         LookupSymbol(proto.type_name(), field->full_name(),
7489                      expecting_enum ? DescriptorPool::PLACEHOLDER_ENUM
7490                                     : DescriptorPool::PLACEHOLDER_MESSAGE,
7491                      LOOKUP_TYPES, !is_lazy);
7492 
7493     if (type.IsNull()) {
7494       if (is_lazy) {
7495         ABSL_CHECK(field->type_ == FieldDescriptor::TYPE_MESSAGE ||
7496                    field->type_ == FieldDescriptor::TYPE_GROUP ||
7497                    field->type_ == FieldDescriptor::TYPE_ENUM)
7498             << proto;
7499         // Save the symbol names for later for lookup, and allocate the once
7500         // object needed for the accessors.
7501         const std::string& name = proto.type_name();
7502 
7503         int name_sizes = static_cast<int>(name.size() + 1 +
7504                                           proto.default_value().size() + 1);
7505 
7506         field->type_once_ = ::new (tables_->AllocateBytes(
7507             static_cast<int>(sizeof(absl::once_flag)) + name_sizes))
7508             absl::once_flag{};
7509         char* names = reinterpret_cast<char*>(field->type_once_ + 1);
7510 
7511         memcpy(names, name.c_str(), name.size() + 1);
7512         memcpy(names + name.size() + 1, proto.default_value().c_str(),
7513                proto.default_value().size() + 1);
7514 
7515         // AddFieldByNumber and AddExtension are done later in this function,
7516         // and can/must be done if the field type was not found. The related
7517         // error checking is not necessary when in lazily_build_dependencies_
7518         // mode, and can't be done without building the type's descriptor,
7519         // which we don't want to do.
7520         file_tables_->AddFieldByNumber(field);
7521         if (field->is_extension()) {
7522           tables_->AddExtension(field);
7523         }
7524         return;
7525       } else {
7526         // If the type is a weak type, we change the type to a google.protobuf.Empty
7527         // field.
7528         if (is_weak) {
7529           type = FindSymbol(kNonLinkedWeakMessageReplacementName);
7530         }
7531         if (type.IsNull()) {
7532           AddNotDefinedError(field->full_name(), proto,
7533                              DescriptorPool::ErrorCollector::TYPE,
7534                              proto.type_name());
7535           return;
7536         }
7537       }
7538     }
7539 
7540     if (!proto.has_type()) {
7541       // Choose field type based on symbol.
7542       if (type.type() == Symbol::MESSAGE) {
7543         field->type_ = FieldDescriptor::TYPE_MESSAGE;
7544       } else if (type.type() == Symbol::ENUM) {
7545         field->type_ = FieldDescriptor::TYPE_ENUM;
7546       } else {
7547         AddError(field->full_name(), proto,
7548                  DescriptorPool::ErrorCollector::TYPE, [&] {
7549                    return absl::StrCat("\"", proto.type_name(),
7550                                        "\" is not a type.");
7551                  });
7552         return;
7553       }
7554     }
7555 
7556     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
7557       field->type_descriptor_.message_type = type.descriptor();
7558       if (field->type_descriptor_.message_type == nullptr) {
7559         AddError(field->full_name(), proto,
7560                  DescriptorPool::ErrorCollector::TYPE, [&] {
7561                    return absl::StrCat("\"", proto.type_name(),
7562                                        "\" is not a message type.");
7563                  });
7564         return;
7565       }
7566 
7567       if (field->has_default_value()) {
7568         AddError(field->full_name(), proto,
7569                  DescriptorPool::ErrorCollector::DEFAULT_VALUE,
7570                  "Messages can't have default values.");
7571       }
7572     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
7573       field->type_descriptor_.enum_type = type.enum_descriptor();
7574       if (field->type_descriptor_.enum_type == nullptr) {
7575         AddError(field->full_name(), proto,
7576                  DescriptorPool::ErrorCollector::TYPE, [&] {
7577                    return absl::StrCat("\"", proto.type_name(),
7578                                        "\" is not an enum type.");
7579                  });
7580         return;
7581       }
7582 
7583       if (field->enum_type()->is_placeholder_) {
7584         // We can't look up default values for placeholder types.  We'll have
7585         // to just drop them.
7586         field->has_default_value_ = false;
7587       }
7588 
7589       if (field->has_default_value()) {
7590         // Ensure that the default value is an identifier. Parser cannot always
7591         // verify this because it does not have complete type information.
7592         // N.B. that this check yields better error messages but is not
7593         // necessary for correctness (an enum symbol must be a valid identifier
7594         // anyway), only for better errors.
7595         if (!io::Tokenizer::IsIdentifier(proto.default_value())) {
7596           AddError(field->full_name(), proto,
7597                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
7598                    "Default value for an enum field must be an identifier.");
7599         } else {
7600           // We can't just use field->enum_type()->FindValueByName() here
7601           // because that locks the pool's mutex, which we have already locked
7602           // at this point.
7603           const EnumValueDescriptor* default_value =
7604               LookupSymbolNoPlaceholder(proto.default_value(),
7605                                         field->enum_type()->full_name())
7606                   .enum_value_descriptor();
7607 
7608           if (default_value != nullptr &&
7609               default_value->type() == field->enum_type()) {
7610             field->default_value_enum_ = default_value;
7611           } else {
7612             AddError(field->full_name(), proto,
7613                      DescriptorPool::ErrorCollector::DEFAULT_VALUE, [&] {
7614                        return absl::StrCat("Enum type \"",
7615                                            field->enum_type()->full_name(),
7616                                            "\" has no value named \"",
7617                                            proto.default_value(), "\".");
7618                      });
7619           }
7620         }
7621       } else if (field->enum_type()->value_count() > 0) {
7622         // All enums must have at least one value, or we would have reported
7623         // an error elsewhere.  We use the first defined value as the default
7624         // if a default is not explicitly defined.
7625         field->default_value_enum_ = field->enum_type()->value(0);
7626       }
7627     } else {
7628       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7629                "Field with primitive type has type_name.");
7630     }
7631   } else {
7632     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
7633         field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
7634       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7635                "Field with message or enum type missing type_name.");
7636     }
7637   }
7638 
7639   // Add the field to the fields-by-number table.
7640   // Note:  We have to do this *after* cross-linking because extensions do not
7641   // know their containing type until now. If we're in
7642   // lazily_build_dependencies_ mode, we're guaranteed there's no errors, so no
7643   // risk to calling containing_type() or other accessors that will build
7644   // dependencies.
7645   if (!file_tables_->AddFieldByNumber(field)) {
7646     const FieldDescriptor* conflicting_field = file_tables_->FindFieldByNumber(
7647         field->containing_type(), field->number());
7648     const absl::string_view containing_type_name =
7649         field->containing_type() == nullptr
7650             ? absl::string_view("unknown")
7651             : field->containing_type()->full_name();
7652     if (field->is_extension()) {
7653       AddError(field->full_name(), proto,
7654                DescriptorPool::ErrorCollector::NUMBER, [&] {
7655                  return absl::Substitute(
7656                      "Extension number $0 has already been used "
7657                      "in \"$1\" by extension \"$2\".",
7658                      field->number(), containing_type_name,
7659                      conflicting_field->full_name());
7660                });
7661     } else {
7662       AddError(field->full_name(), proto,
7663                DescriptorPool::ErrorCollector::NUMBER, [&] {
7664                  return absl::Substitute(
7665                      "Field number $0 has already been used in "
7666                      "\"$1\" by field \"$2\".",
7667                      field->number(), containing_type_name,
7668                      conflicting_field->name());
7669                });
7670     }
7671   } else {
7672     if (field->is_extension()) {
7673       if (!tables_->AddExtension(field)) {
7674         auto make_error = [&] {
7675           const FieldDescriptor* conflicting_field =
7676               tables_->FindExtension(field->containing_type(), field->number());
7677           const absl::string_view containing_type_name =
7678               field->containing_type() == nullptr
7679                   ? absl::string_view("unknown")
7680                   : field->containing_type()->full_name();
7681           return absl::Substitute(
7682               "Extension number $0 has already been used in \"$1\" by "
7683               "extension "
7684               "\"$2\" defined in $3.",
7685               field->number(), containing_type_name,
7686               conflicting_field->full_name(),
7687               conflicting_field->file()->name());
7688         };
7689         // Conflicting extension numbers should be an error. However, before
7690         // turning this into an error we need to fix all existing broken
7691         // protos first.
7692         // TODO: Change this to an error.
7693         AddWarning(field->full_name(), proto,
7694                    DescriptorPool::ErrorCollector::NUMBER, make_error);
7695       }
7696     }
7697   }
7698 }
7699 
CrossLinkService(ServiceDescriptor * service,const ServiceDescriptorProto & proto)7700 void DescriptorBuilder::CrossLinkService(ServiceDescriptor* service,
7701                                          const ServiceDescriptorProto& proto) {
7702   for (int i = 0; i < service->method_count(); i++) {
7703     CrossLinkMethod(&service->methods_[i], proto.method(i));
7704   }
7705 }
7706 
CrossLinkMethod(MethodDescriptor * method,const MethodDescriptorProto & proto)7707 void DescriptorBuilder::CrossLinkMethod(MethodDescriptor* method,
7708                                         const MethodDescriptorProto& proto) {
7709   Symbol input_type =
7710       LookupSymbol(proto.input_type(), method->full_name(),
7711                    DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
7712                    !pool_->lazily_build_dependencies_);
7713   if (input_type.IsNull()) {
7714     if (!pool_->lazily_build_dependencies_) {
7715       AddNotDefinedError(method->full_name(), proto,
7716                          DescriptorPool::ErrorCollector::INPUT_TYPE,
7717                          proto.input_type());
7718     } else {
7719       method->input_type_.SetLazy(proto.input_type(), file_);
7720     }
7721   } else if (input_type.type() != Symbol::MESSAGE) {
7722     AddError(method->full_name(), proto,
7723              DescriptorPool::ErrorCollector::INPUT_TYPE, [&] {
7724                return absl::StrCat("\"", proto.input_type(),
7725                                    "\" is not a message type.");
7726              });
7727   } else {
7728     method->input_type_.Set(input_type.descriptor());
7729   }
7730 
7731   Symbol output_type =
7732       LookupSymbol(proto.output_type(), method->full_name(),
7733                    DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
7734                    !pool_->lazily_build_dependencies_);
7735   if (output_type.IsNull()) {
7736     if (!pool_->lazily_build_dependencies_) {
7737       AddNotDefinedError(method->full_name(), proto,
7738                          DescriptorPool::ErrorCollector::OUTPUT_TYPE,
7739                          proto.output_type());
7740     } else {
7741       method->output_type_.SetLazy(proto.output_type(), file_);
7742     }
7743   } else if (output_type.type() != Symbol::MESSAGE) {
7744     AddError(method->full_name(), proto,
7745              DescriptorPool::ErrorCollector::OUTPUT_TYPE, [&] {
7746                return absl::StrCat("\"", proto.output_type(),
7747                                    "\" is not a message type.");
7748              });
7749   } else {
7750     method->output_type_.Set(output_type.descriptor());
7751   }
7752 }
7753 
SuggestFieldNumbers(FileDescriptor * file,const FileDescriptorProto & proto)7754 void DescriptorBuilder::SuggestFieldNumbers(FileDescriptor* file,
7755                                             const FileDescriptorProto& proto) {
7756   for (int message_index = 0; message_index < file->message_type_count();
7757        message_index++) {
7758     const Descriptor* message = &file->message_types_[message_index];
7759     auto hints_it = message_hints_.find(message);
7760     if (hints_it == message_hints_.end()) continue;
7761     auto* hints = &hints_it->second;
7762     constexpr int kMaxSuggestions = 3;
7763     int fields_to_suggest = std::min(kMaxSuggestions, hints->fields_to_suggest);
7764     if (fields_to_suggest <= 0) continue;
7765     struct Range {
7766       int from;
7767       int to;
7768     };
7769     std::vector<Range> used_ordinals;
7770     auto add_ordinal = [&](int ordinal) {
7771       if (ordinal <= 0 || ordinal > FieldDescriptor::kMaxNumber) return;
7772       if (!used_ordinals.empty() && ordinal == used_ordinals.back().to) {
7773         used_ordinals.back().to = ordinal + 1;
7774       } else {
7775         used_ordinals.push_back({ordinal, ordinal + 1});
7776       }
7777     };
7778     auto add_range = [&](int from, int to) {
7779       from = std::max(0, std::min(FieldDescriptor::kMaxNumber + 1, from));
7780       to = std::max(0, std::min(FieldDescriptor::kMaxNumber + 1, to));
7781       if (from >= to) return;
7782       used_ordinals.push_back({from, to});
7783     };
7784     for (int i = 0; i < message->field_count(); i++) {
7785       add_ordinal(message->field(i)->number());
7786     }
7787     for (int i = 0; i < message->extension_count(); i++) {
7788       add_ordinal(message->extension(i)->number());
7789     }
7790     for (int i = 0; i < message->reserved_range_count(); i++) {
7791       auto range = message->reserved_range(i);
7792       add_range(range->start, range->end);
7793     }
7794     for (int i = 0; i < message->extension_range_count(); i++) {
7795       auto range = message->extension_range(i);
7796       add_range(range->start_number(), range->end_number());
7797     }
7798     used_ordinals.push_back(
7799         {FieldDescriptor::kMaxNumber, FieldDescriptor::kMaxNumber + 1});
7800     used_ordinals.push_back({FieldDescriptor::kFirstReservedNumber,
7801                              FieldDescriptor::kLastReservedNumber});
7802     std::sort(used_ordinals.begin(), used_ordinals.end(),
7803               [](Range lhs, Range rhs) {
7804                 return std::tie(lhs.from, lhs.to) < std::tie(rhs.from, rhs.to);
7805               });
7806     int current_ordinal = 1;
7807     if (hints->first_reason) {
7808       auto make_error = [&] {
7809         std::stringstream id_list;
7810         id_list << "Suggested field numbers for " << message->full_name()
7811                 << ": ";
7812         const char* separator = "";
7813         for (auto& current_range : used_ordinals) {
7814           while (current_ordinal < current_range.from &&
7815                  fields_to_suggest > 0) {
7816             id_list << separator << current_ordinal++;
7817             separator = ", ";
7818             fields_to_suggest--;
7819           }
7820           if (fields_to_suggest == 0) break;
7821           current_ordinal = std::max(current_ordinal, current_range.to);
7822         }
7823         return id_list.str();
7824       };
7825       AddError(message->full_name(), *hints->first_reason,
7826                hints->first_reason_location, make_error);
7827     }
7828   }
7829 }
7830 
7831 // -------------------------------------------------------------------
7832 
7833 // Determine if the file uses optimize_for = LITE_RUNTIME, being careful to
7834 // avoid problems that exist at init time.
IsLite(const FileDescriptor * file)7835 static bool IsLite(const FileDescriptor* file) {
7836   // TODO:  I don't even remember how many of these conditions are
7837   //   actually possible.  I'm just being super-safe.
7838   return file != nullptr &&
7839          &file->options() != &FileOptions::default_instance() &&
7840          file->options().optimize_for() == FileOptions::LITE_RUNTIME;
7841 }
7842 
ValidateOptions(const FileDescriptor * file,const FileDescriptorProto & proto)7843 void DescriptorBuilder::ValidateOptions(const FileDescriptor* file,
7844                                         const FileDescriptorProto& proto) {
7845   ValidateFileFeatures(file, proto);
7846 
7847   // Lite files can only be imported by other Lite files.
7848   if (!IsLite(file)) {
7849     for (int i = 0; i < file->dependency_count(); i++) {
7850       if (IsLite(file->dependency(i))) {
7851         AddError(file->dependency(i)->name(), proto,
7852                  DescriptorPool::ErrorCollector::IMPORT, [&] {
7853                    return absl::StrCat(
7854                        "Files that do not use optimize_for = LITE_RUNTIME "
7855                        "cannot import files which do use this option.  This "
7856                        "file is not lite, but it imports \"",
7857                        file->dependency(i)->name(), "\" which is.");
7858                  });
7859         break;
7860       }
7861     }
7862   }
7863   if (file->edition() == Edition::EDITION_PROTO3) {
7864     ValidateProto3(file, proto);
7865   }
7866 }
7867 
ValidateProto3(const FileDescriptor * file,const FileDescriptorProto & proto)7868 void DescriptorBuilder::ValidateProto3(const FileDescriptor* file,
7869                                        const FileDescriptorProto& proto) {
7870   for (int i = 0; i < file->extension_count(); ++i) {
7871     ValidateProto3Field(file->extensions_ + i, proto.extension(i));
7872   }
7873   for (int i = 0; i < file->message_type_count(); ++i) {
7874     ValidateProto3Message(file->message_types_ + i, proto.message_type(i));
7875   }
7876 }
7877 
ValidateProto3Message(const Descriptor * message,const DescriptorProto & proto)7878 void DescriptorBuilder::ValidateProto3Message(const Descriptor* message,
7879                                               const DescriptorProto& proto) {
7880   for (int i = 0; i < message->nested_type_count(); ++i) {
7881     ValidateProto3Message(message->nested_types_ + i, proto.nested_type(i));
7882   }
7883   for (int i = 0; i < message->field_count(); ++i) {
7884     ValidateProto3Field(message->fields_ + i, proto.field(i));
7885   }
7886   for (int i = 0; i < message->extension_count(); ++i) {
7887     ValidateProto3Field(message->extensions_ + i, proto.extension(i));
7888   }
7889   if (message->extension_range_count() > 0) {
7890     AddError(message->full_name(), proto.extension_range(0),
7891              DescriptorPool::ErrorCollector::NUMBER,
7892              "Extension ranges are not allowed in proto3.");
7893   }
7894   if (message->options().message_set_wire_format()) {
7895     // Using MessageSet doesn't make sense since we disallow extensions.
7896     AddError(message->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7897              "MessageSet is not supported in proto3.");
7898   }
7899 }
7900 
ValidateProto3Field(const FieldDescriptor * field,const FieldDescriptorProto & proto)7901 void DescriptorBuilder::ValidateProto3Field(const FieldDescriptor* field,
7902                                             const FieldDescriptorProto& proto) {
7903   if (field->is_extension() &&
7904       !AllowedExtendeeInProto3(field->containing_type()->full_name())) {
7905     AddError(field->full_name(), proto,
7906              DescriptorPool::ErrorCollector::EXTENDEE,
7907              "Extensions in proto3 are only allowed for defining options.");
7908   }
7909   if (field->is_required()) {
7910     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7911              "Required fields are not allowed in proto3.");
7912   }
7913   if (field->has_default_value()) {
7914     AddError(field->full_name(), proto,
7915              DescriptorPool::ErrorCollector::DEFAULT_VALUE,
7916              "Explicit default values are not allowed in proto3.");
7917   }
7918   if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
7919       field->enum_type() && field->enum_type()->is_closed()) {
7920     // Proto3 messages can only use open enum types; otherwise we can't
7921     // guarantee that the default value is zero.
7922     AddError(
7923         field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE, [&] {
7924           return absl::StrCat("Enum type \"", field->enum_type()->full_name(),
7925                               "\" is not an open enum, but is used in \"",
7926                               field->containing_type()->full_name(),
7927                               "\" which is a proto3 message type.");
7928         });
7929   }
7930   if (field->type() == FieldDescriptor::TYPE_GROUP) {
7931     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7932              "Groups are not supported in proto3 syntax.");
7933   }
7934 }
7935 
ValidateOptions(const Descriptor * message,const DescriptorProto & proto)7936 void DescriptorBuilder::ValidateOptions(const Descriptor* message,
7937                                         const DescriptorProto& proto) {
7938   CheckFieldJsonNameUniqueness(proto, message);
7939   ValidateExtensionRangeOptions(proto, *message);
7940 }
7941 
ValidateOptions(const OneofDescriptor *,const OneofDescriptorProto &)7942 void DescriptorBuilder::ValidateOptions(const OneofDescriptor* /*oneof*/,
7943                                         const OneofDescriptorProto& /*proto*/) {
7944 }
7945 
7946 
ValidateOptions(const FieldDescriptor * field,const FieldDescriptorProto & proto)7947 void DescriptorBuilder::ValidateOptions(const FieldDescriptor* field,
7948                                         const FieldDescriptorProto& proto) {
7949   if (pool_->lazily_build_dependencies_ && (!field || !field->message_type())) {
7950     return;
7951   }
7952 
7953   ValidateFieldFeatures(field, proto);
7954 
7955   // Only message type fields may be lazy.
7956   if (field->options().lazy() || field->options().unverified_lazy()) {
7957     if (field->type() != FieldDescriptor::TYPE_MESSAGE) {
7958       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7959                "[lazy = true] can only be specified for submessage fields.");
7960     }
7961   }
7962 
7963   // Only repeated primitive fields may be packed.
7964   if (field->options().packed() && !field->is_packable()) {
7965     AddError(
7966         field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7967         "[packed = true] can only be specified for repeated primitive fields.");
7968   }
7969 
7970   // Note:  Default instance may not yet be initialized here, so we have to
7971   //   avoid reading from it.
7972   if (field->containing_type_ != nullptr &&
7973       &field->containing_type()->options() !=
7974           &MessageOptions::default_instance() &&
7975       field->containing_type()->options().message_set_wire_format()) {
7976     if (field->is_extension()) {
7977       if (!field->is_optional() ||
7978           field->type() != FieldDescriptor::TYPE_MESSAGE) {
7979         AddError(field->full_name(), proto,
7980                  DescriptorPool::ErrorCollector::TYPE,
7981                  "Extensions of MessageSets must be optional messages.");
7982       }
7983     } else {
7984       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7985                "MessageSets cannot have fields, only extensions.");
7986     }
7987   }
7988 
7989   // Lite extensions can only be of Lite types.
7990   if (IsLite(field->file()) && field->containing_type_ != nullptr &&
7991       !IsLite(field->containing_type()->file())) {
7992     AddError(field->full_name(), proto,
7993              DescriptorPool::ErrorCollector::EXTENDEE,
7994              "Extensions to non-lite types can only be declared in non-lite "
7995              "files.  Note that you cannot extend a non-lite type to contain "
7996              "a lite type, but the reverse is allowed.");
7997   }
7998 
7999   // Validate map types.
8000   if (field->is_map()) {
8001     if (!ValidateMapEntry(field, proto)) {
8002       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8003                "map_entry should not be set explicitly. Use map<KeyType, "
8004                "ValueType> instead.");
8005     }
8006   }
8007 
8008   ValidateJSType(field, proto);
8009 
8010   // json_name option is not allowed on extension fields. Note that the
8011   // json_name field in FieldDescriptorProto is always populated by protoc
8012   // when it sends descriptor data to plugins (calculated from field name if
8013   // the option is not explicitly set) so we can't rely on its presence to
8014   // determine whether the json_name option is set on the field. Here we
8015   // compare it against the default calculated json_name value and consider
8016   // the option set if they are different. This won't catch the case when
8017   // a user explicitly sets json_name to the default value, but should be
8018   // good enough to catch common misuses.
8019   if (field->is_extension() &&
8020       (field->has_json_name() &&
8021        field->json_name() != ToJsonName(field->name()))) {
8022     AddError(field->full_name(), proto,
8023              DescriptorPool::ErrorCollector::OPTION_NAME,
8024              "option json_name is not allowed on extension fields.");
8025   }
8026 
8027   if (absl::StrContains(field->json_name(), '\0')) {
8028     AddError(field->full_name(), proto,
8029              DescriptorPool::ErrorCollector::OPTION_NAME,
8030              "json_name cannot have embedded null characters.");
8031   }
8032 
8033 
8034   // If this is a declared extension, validate that the actual name and type
8035   // match the declaration.
8036   if (field->is_extension()) {
8037     if (pool_->IsReadyForCheckingDescriptorExtDecl(
8038             field->containing_type()->full_name())) {
8039       return;
8040     }
8041     const Descriptor::ExtensionRange* extension_range =
8042         field->containing_type()->FindExtensionRangeContainingNumber(
8043             field->number());
8044 
8045     if (extension_range->options_ == nullptr) {
8046       return;
8047     }
8048 
8049     if (pool_->enforce_extension_declarations_) {
8050       for (const auto& declaration : extension_range->options_->declaration()) {
8051         if (declaration.number() != field->number()) continue;
8052         if (declaration.reserved()) {
8053           AddError(
8054               field->full_name(), proto,
8055               DescriptorPool::ErrorCollector::EXTENDEE, [&] {
8056                 return absl::Substitute(
8057                     "Cannot use number $0 for extension field $1, as it is "
8058                     "reserved in the extension declarations for message $2.",
8059                     field->number(), field->full_name(),
8060                     field->containing_type()->full_name());
8061               });
8062           return;
8063         }
8064         CheckExtensionDeclaration(*field, proto, declaration.full_name(),
8065                                   declaration.type(), declaration.repeated());
8066         return;
8067       }
8068 
8069       // Either no declarations, or there are but no matches. If there are no
8070       // declarations, we check its verification state. If there are other
8071       // non-matching declarations, we enforce that this extension must also be
8072       // declared.
8073       if (!extension_range->options_->declaration().empty() ||
8074           (extension_range->options_->verification() ==
8075            ExtensionRangeOptions::DECLARATION)) {
8076         AddError(
8077             field->full_name(), proto, DescriptorPool::ErrorCollector::EXTENDEE,
8078             [&] {
8079               return absl::Substitute(
8080                   "Missing extension declaration for field $0 with number $1 "
8081                   "in extendee message $2. An extension range must declare for "
8082                   "all extension fields if its verification state is "
8083                   "DECLARATION or there's any declaration in the range "
8084                   "already. Otherwise, consider splitting up the range.",
8085                   field->full_name(), field->number(),
8086                   field->containing_type()->full_name());
8087             });
8088         return;
8089       }
8090     }
8091   }
8092 }
8093 
IsStringMapType(const FieldDescriptor & field)8094 static bool IsStringMapType(const FieldDescriptor& field) {
8095   if (!field.is_map()) return false;
8096   for (int i = 0; i < field.message_type()->field_count(); ++i) {
8097     if (field.message_type()->field(i)->type() ==
8098         FieldDescriptor::TYPE_STRING) {
8099       return true;
8100     }
8101   }
8102   return false;
8103 }
8104 
ValidateFileFeatures(const FileDescriptor * file,const FileDescriptorProto & proto)8105 void DescriptorBuilder::ValidateFileFeatures(const FileDescriptor* file,
8106                                              const FileDescriptorProto& proto) {
8107   // Rely on our legacy validation for proto2/proto3 files.
8108   if (IsLegacyEdition(file->edition())) {
8109     return;
8110   }
8111 
8112   if (file->features().field_presence() == FeatureSet::LEGACY_REQUIRED) {
8113     AddError(file->name(), proto, DescriptorPool::ErrorCollector::EDITIONS,
8114              "Required presence can't be specified by default.");
8115   }
8116   if (file->options().java_string_check_utf8()) {
8117     AddError(
8118         file->name(), proto, DescriptorPool::ErrorCollector::EDITIONS,
8119         "File option java_string_check_utf8 is not allowed under editions. Use "
8120         "the (pb.java).utf8_validation feature to control this behavior.");
8121   }
8122 }
8123 
ValidateFieldFeatures(const FieldDescriptor * field,const FieldDescriptorProto & proto)8124 void DescriptorBuilder::ValidateFieldFeatures(
8125     const FieldDescriptor* field, const FieldDescriptorProto& proto) {
8126   // Rely on our legacy validation for proto2/proto3 files.
8127   if (field->file()->edition() < Edition::EDITION_2023) {
8128     return;
8129   }
8130 
8131   // Double check proto descriptors in editions.  These would usually be caught
8132   // by the parser, but may not be for dynamically built descriptors.
8133   if (proto.label() == FieldDescriptorProto::LABEL_REQUIRED) {
8134     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8135              "Required label is not allowed under editions.  Use the feature "
8136              "field_presence = LEGACY_REQUIRED to control this behavior.");
8137   }
8138   if (proto.type() == FieldDescriptorProto::TYPE_GROUP) {
8139     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8140              "Group types are not allowed under editions.  Use the feature "
8141              "message_encoding = DELIMITED to control this behavior.");
8142   }
8143 
8144   auto& field_options = field->options();
8145   // Validate legacy options that have been migrated to features.
8146   if (field_options.has_packed()) {
8147     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8148              "Field option packed is not allowed under editions.  Use the "
8149              "repeated_field_encoding feature to control this behavior.");
8150   }
8151 
8152   // Validate fully resolved features.
8153   if (!field->is_repeated() && !field->has_presence()) {
8154     if (field->has_default_value()) {
8155       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8156                "Implicit presence fields can't specify defaults.");
8157     }
8158     if (field->enum_type() != nullptr &&
8159         field->enum_type()->features().enum_type() != FeatureSet::OPEN) {
8160       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8161                "Implicit presence enum fields must always be open.");
8162     }
8163   }
8164   if (field->is_extension() &&
8165       field->features().field_presence() == FeatureSet::LEGACY_REQUIRED) {
8166     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8167              "Extensions can't be required.");
8168   }
8169 
8170   if (field->containing_type() != nullptr &&
8171       field->containing_type()->options().map_entry()) {
8172     // Skip validation of explicit features on generated map fields.  These will
8173     // be blindly propagated from the original map field, and may violate a lot
8174     // of these conditions.  Note: we do still validate the user-specified map
8175     // field.
8176     return;
8177   }
8178 
8179   // Validate explicitly specified features on the field proto.
8180   if (field->proto_features_->has_field_presence()) {
8181     if (field->containing_oneof() != nullptr) {
8182       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8183                "Oneof fields can't specify field presence.");
8184     } else if (field->is_repeated()) {
8185       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8186                "Repeated fields can't specify field presence.");
8187     } else if (field->is_extension() &&
8188                field->proto_features_->field_presence() !=
8189                    FeatureSet::LEGACY_REQUIRED) {
8190       // Note: required extensions will fail elsewhere, so we skip reporting a
8191       // second error here.
8192       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8193                "Extensions can't specify field presence.");
8194     } else if (field->message_type() != nullptr &&
8195                field->proto_features_->field_presence() ==
8196                    FeatureSet::IMPLICIT) {
8197       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8198                "Message fields can't specify implicit presence.");
8199     }
8200   }
8201   if (!field->is_repeated() &&
8202       field->proto_features_->has_repeated_field_encoding()) {
8203     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8204              "Only repeated fields can specify repeated field encoding.");
8205   }
8206   if (field->type() != FieldDescriptor::TYPE_STRING &&
8207       !IsStringMapType(*field) &&
8208       field->proto_features_->has_utf8_validation()) {
8209     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8210              "Only string fields can specify utf8 validation.");
8211   }
8212   if (!field->is_packable() &&
8213       field->proto_features_->repeated_field_encoding() == FeatureSet::PACKED) {
8214     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8215              "Only repeated primitive fields can specify PACKED repeated field "
8216              "encoding.");
8217   }
8218   if ((field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE ||
8219        field->is_map_message_type()) &&
8220       field->proto_features_->has_message_encoding()) {
8221     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8222              "Only message fields can specify message encoding.");
8223   }
8224 }
8225 
ValidateOptions(const EnumDescriptor * enm,const EnumDescriptorProto & proto)8226 void DescriptorBuilder::ValidateOptions(const EnumDescriptor* enm,
8227                                         const EnumDescriptorProto& proto) {
8228   CheckEnumValueUniqueness(proto, enm);
8229 
8230   if (!enm->is_closed() && enm->value_count() > 0 &&
8231       enm->value(0)->number() != 0) {
8232     AddError(enm->full_name(), proto.value(0),
8233              DescriptorPool::ErrorCollector::NUMBER,
8234              "The first enum value must be zero for open enums.");
8235   }
8236 
8237   if (!enm->options().has_allow_alias() || !enm->options().allow_alias()) {
8238     absl::flat_hash_map<int, std::string> used_values;
8239     for (int i = 0; i < enm->value_count(); ++i) {
8240       const EnumValueDescriptor* enum_value = enm->value(i);
8241       auto insert_result =
8242           used_values.emplace(enum_value->number(), enum_value->full_name());
8243       bool inserted = insert_result.second;
8244       if (!inserted) {
8245         if (!enm->options().allow_alias()) {
8246           // Generate error if duplicated enum values are explicitly disallowed.
8247           auto make_error = [&] {
8248             // Find the next free number.
8249             absl::flat_hash_set<int64_t> used;
8250             for (int j = 0; j < enm->value_count(); ++j) {
8251               used.insert(enm->value(j)->number());
8252             }
8253             int64_t next_value = static_cast<int64_t>(enum_value->number()) + 1;
8254             while (used.contains(next_value)) ++next_value;
8255 
8256             std::string error = absl::StrCat(
8257                 "\"", enum_value->full_name(),
8258                 "\" uses the same enum value as \"",
8259                 insert_result.first->second,
8260                 "\". If this is intended, set "
8261                 "'option allow_alias = true;' to the enum definition.");
8262             if (next_value < std::numeric_limits<int32_t>::max()) {
8263               absl::StrAppend(&error, " The next available enum value is ",
8264                               next_value, ".");
8265             }
8266             return error;
8267           };
8268           AddError(enm->full_name(), proto.value(i),
8269                    DescriptorPool::ErrorCollector::NUMBER, make_error);
8270         }
8271       }
8272     }
8273   }
8274 }
8275 
ValidateOptions(const EnumValueDescriptor *,const EnumValueDescriptorProto &)8276 void DescriptorBuilder::ValidateOptions(
8277     const EnumValueDescriptor* /* enum_value */,
8278     const EnumValueDescriptorProto& /* proto */) {
8279   // Nothing to do so far.
8280 }
8281 
8282 namespace {
8283 // Validates that a fully-qualified symbol for extension declaration must
8284 // have a leading dot and valid identifiers.
ValidateSymbolForDeclaration(absl::string_view symbol)8285 absl::optional<std::string> ValidateSymbolForDeclaration(
8286     absl::string_view symbol) {
8287   if (!absl::StartsWith(symbol, ".")) {
8288     return absl::StrCat("\"", symbol,
8289                         "\" must have a leading dot to indicate the "
8290                         "fully-qualified scope.");
8291   }
8292   if (!ValidateQualifiedName(symbol)) {
8293     return absl::StrCat("\"", symbol, "\" contains invalid identifiers.");
8294   }
8295   return absl::nullopt;
8296 }
8297 }  // namespace
8298 
8299 
ValidateExtensionDeclaration(const absl::string_view full_name,const RepeatedPtrField<ExtensionRangeOptions_Declaration> & declarations,const DescriptorProto_ExtensionRange & proto,absl::flat_hash_set<absl::string_view> & full_name_set)8300 void DescriptorBuilder::ValidateExtensionDeclaration(
8301     const absl::string_view full_name,
8302     const RepeatedPtrField<ExtensionRangeOptions_Declaration>& declarations,
8303     const DescriptorProto_ExtensionRange& proto,
8304     absl::flat_hash_set<absl::string_view>& full_name_set) {
8305   absl::flat_hash_set<int> extension_number_set;
8306   for (const auto& declaration : declarations) {
8307     if (declaration.number() < proto.start() ||
8308         declaration.number() >= proto.end()) {
8309       AddError(full_name, proto, DescriptorPool::ErrorCollector::NUMBER, [&] {
8310         return absl::Substitute(
8311             "Extension declaration number $0 is not in the "
8312             "extension range.",
8313             declaration.number());
8314       });
8315     }
8316 
8317     if (!extension_number_set.insert(declaration.number()).second) {
8318       AddError(full_name, proto, DescriptorPool::ErrorCollector::NUMBER, [&] {
8319         return absl::Substitute(
8320             "Extension declaration number $0 is declared multiple times.",
8321             declaration.number());
8322       });
8323     }
8324 
8325     // Both full_name and type should be present. If none of them is set,
8326     // add an error unless reserved is set to true. If only one of them is set,
8327     // add an error whether or not reserved is set to true.
8328     if (!declaration.has_full_name() || !declaration.has_type()) {
8329       if (declaration.has_full_name() != declaration.has_type() ||
8330           !declaration.reserved()) {
8331         AddError(full_name, proto, DescriptorPool::ErrorCollector::EXTENDEE,
8332                  [&] {
8333                    return absl::StrCat(
8334                        "Extension declaration #", declaration.number(),
8335                        " should have both \"full_name\" and \"type\" set.");
8336                  });
8337       }
8338     } else {
8339       if (!full_name_set.insert(declaration.full_name()).second) {
8340         AddError(
8341             declaration.full_name(), proto,
8342             DescriptorPool::ErrorCollector::NAME, [&] {
8343               return absl::Substitute(
8344                   "Extension field name \"$0\" is declared multiple times.",
8345                   declaration.full_name());
8346             });
8347         return;
8348       }
8349       absl::optional<std::string> err =
8350           ValidateSymbolForDeclaration(declaration.full_name());
8351       if (err.has_value()) {
8352         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
8353                  [err] { return *err; });
8354       }
8355       if (!IsNonMessageType(declaration.type())) {
8356         err = ValidateSymbolForDeclaration(declaration.type());
8357         if (err.has_value()) {
8358           AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
8359                    [err] { return *err; });
8360         }
8361       }
8362     }
8363   }
8364 }
8365 
ValidateExtensionRangeOptions(const DescriptorProto & proto,const Descriptor & message)8366 void DescriptorBuilder::ValidateExtensionRangeOptions(
8367     const DescriptorProto& proto, const Descriptor& message) {
8368   const int64_t max_extension_range =
8369       static_cast<int64_t>(message.options().message_set_wire_format()
8370                                ? std::numeric_limits<int32_t>::max()
8371                                : FieldDescriptor::kMaxNumber);
8372 
8373   size_t num_declarations = 0;
8374   for (int i = 0; i < message.extension_range_count(); i++) {
8375     if (message.extension_range(i)->options_ == nullptr) continue;
8376     num_declarations +=
8377         message.extension_range(i)->options_->declaration_size();
8378   }
8379 
8380   // Contains the full names from both "declaration" and "metadata".
8381   absl::flat_hash_set<absl::string_view> declaration_full_name_set;
8382   declaration_full_name_set.reserve(num_declarations);
8383 
8384   for (int i = 0; i < message.extension_range_count(); i++) {
8385     const auto& range = *message.extension_range(i);
8386     if (range.end_number() > max_extension_range + 1) {
8387       AddError(message.full_name(), proto,
8388                DescriptorPool::ErrorCollector::NUMBER, [&] {
8389                  return absl::Substitute(
8390                      "Extension numbers cannot be greater than $0.",
8391                      max_extension_range);
8392                });
8393     }
8394     const auto& range_options = *range.options_;
8395 
8396 
8397     if (!range_options.declaration().empty()) {
8398       // TODO: remove the "has_verification" check once the default
8399       // is flipped to DECLARATION.
8400       if (range_options.has_verification() &&
8401           range_options.verification() == ExtensionRangeOptions::UNVERIFIED) {
8402         AddError(message.full_name(), proto.extension_range(i),
8403                  DescriptorPool::ErrorCollector::EXTENDEE, [&] {
8404                    return "Cannot mark the extension range as UNVERIFIED when "
8405                           "it has extension(s) declared.";
8406                  });
8407         return;
8408       }
8409       ValidateExtensionDeclaration(
8410           message.full_name(), range_options.declaration(),
8411           proto.extension_range(i), declaration_full_name_set);
8412     }
8413   }
8414 }
8415 
ValidateOptions(const ServiceDescriptor * service,const ServiceDescriptorProto & proto)8416 void DescriptorBuilder::ValidateOptions(const ServiceDescriptor* service,
8417                                         const ServiceDescriptorProto& proto) {
8418   if (IsLite(service->file()) &&
8419       (service->file()->options().cc_generic_services() ||
8420        service->file()->options().java_generic_services())) {
8421     AddError(service->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
8422              "Files with optimize_for = LITE_RUNTIME cannot define services "
8423              "unless you set both options cc_generic_services and "
8424              "java_generic_services to false.");
8425   }
8426 }
8427 
ValidateOptions(const MethodDescriptor *,const MethodDescriptorProto &)8428 void DescriptorBuilder::ValidateOptions(
8429     const MethodDescriptor* /* method */,
8430     const MethodDescriptorProto& /* proto */) {
8431   // Nothing to do so far.
8432 }
8433 
ValidateMapEntry(const FieldDescriptor * field,const FieldDescriptorProto & proto)8434 bool DescriptorBuilder::ValidateMapEntry(const FieldDescriptor* field,
8435                                          const FieldDescriptorProto& proto) {
8436   const Descriptor* message = field->message_type();
8437   if (  // Must not contain extensions, extension range or nested message or
8438         // enums
8439       message->extension_count() != 0 ||
8440       field->label() != FieldDescriptor::LABEL_REPEATED ||
8441       message->extension_range_count() != 0 ||
8442       message->nested_type_count() != 0 || message->enum_type_count() != 0 ||
8443       // Must contain exactly two fields
8444       message->field_count() != 2 ||
8445       // Field name and message name must match
8446       message->name() !=
8447           absl::StrCat(ToCamelCase(field->name(), false), "Entry") ||
8448       // Entry message must be in the same containing type of the field.
8449       field->containing_type() != message->containing_type()) {
8450     return false;
8451   }
8452 
8453   const FieldDescriptor* key = message->map_key();
8454   const FieldDescriptor* value = message->map_value();
8455   if (key->label() != FieldDescriptor::LABEL_OPTIONAL || key->number() != 1 ||
8456       key->name() != "key") {
8457     return false;
8458   }
8459   if (value->label() != FieldDescriptor::LABEL_OPTIONAL ||
8460       value->number() != 2 || value->name() != "value") {
8461     return false;
8462   }
8463 
8464   // Check key types are legal.
8465   switch (key->type()) {
8466     case FieldDescriptor::TYPE_ENUM:
8467       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8468                "Key in map fields cannot be enum types.");
8469       break;
8470     case FieldDescriptor::TYPE_FLOAT:
8471     case FieldDescriptor::TYPE_DOUBLE:
8472     case FieldDescriptor::TYPE_MESSAGE:
8473     case FieldDescriptor::TYPE_GROUP:
8474     case FieldDescriptor::TYPE_BYTES:
8475       AddError(
8476           field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8477           "Key in map fields cannot be float/double, bytes or message types.");
8478       break;
8479     case FieldDescriptor::TYPE_BOOL:
8480     case FieldDescriptor::TYPE_INT32:
8481     case FieldDescriptor::TYPE_INT64:
8482     case FieldDescriptor::TYPE_SINT32:
8483     case FieldDescriptor::TYPE_SINT64:
8484     case FieldDescriptor::TYPE_STRING:
8485     case FieldDescriptor::TYPE_UINT32:
8486     case FieldDescriptor::TYPE_UINT64:
8487     case FieldDescriptor::TYPE_FIXED32:
8488     case FieldDescriptor::TYPE_FIXED64:
8489     case FieldDescriptor::TYPE_SFIXED32:
8490     case FieldDescriptor::TYPE_SFIXED64:
8491       // Legal cases
8492       break;
8493       // Do not add a default, so that the compiler will complain when new types
8494       // are added.
8495   }
8496 
8497   if (value->type() == FieldDescriptor::TYPE_ENUM) {
8498     if (value->enum_type()->value(0)->number() != 0) {
8499       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8500                "Enum value in map must define 0 as the first value.");
8501     }
8502   }
8503 
8504   return true;
8505 }
8506 
DetectMapConflicts(const Descriptor * message,const DescriptorProto & proto)8507 void DescriptorBuilder::DetectMapConflicts(const Descriptor* message,
8508                                            const DescriptorProto& proto) {
8509   DescriptorsByNameSet<Descriptor> seen_types;
8510   for (int i = 0; i < message->nested_type_count(); ++i) {
8511     const Descriptor* nested = message->nested_type(i);
8512     auto insert_result = seen_types.insert(nested);
8513     bool inserted = insert_result.second;
8514     if (!inserted) {
8515       if ((*insert_result.first)->options().map_entry() ||
8516           nested->options().map_entry()) {
8517         AddError(message->full_name(), proto,
8518                  DescriptorPool::ErrorCollector::NAME, [&] {
8519                    return absl::StrCat(
8520                        "Expanded map entry type ", nested->name(),
8521                        " conflicts with an existing nested message type.");
8522                  });
8523         break;
8524       }
8525     }
8526     // Recursively test on the nested types.
8527     DetectMapConflicts(message->nested_type(i), proto.nested_type(i));
8528   }
8529   // Check for conflicted field names.
8530   for (int i = 0; i < message->field_count(); ++i) {
8531     const FieldDescriptor* field = message->field(i);
8532     auto iter = seen_types.find(field->name());
8533     if (iter != seen_types.end() && (*iter)->options().map_entry()) {
8534       AddError(message->full_name(), proto,
8535                DescriptorPool::ErrorCollector::NAME, [&] {
8536                  return absl::StrCat("Expanded map entry type ",
8537                                      (*iter)->name(),
8538                                      " conflicts with an existing field.");
8539                });
8540     }
8541   }
8542   // Check for conflicted enum names.
8543   for (int i = 0; i < message->enum_type_count(); ++i) {
8544     const EnumDescriptor* enum_desc = message->enum_type(i);
8545     auto iter = seen_types.find(enum_desc->name());
8546     if (iter != seen_types.end() && (*iter)->options().map_entry()) {
8547       AddError(message->full_name(), proto,
8548                DescriptorPool::ErrorCollector::NAME, [&] {
8549                  return absl::StrCat("Expanded map entry type ",
8550                                      (*iter)->name(),
8551                                      " conflicts with an existing enum type.");
8552                });
8553     }
8554   }
8555   // Check for conflicted oneof names.
8556   for (int i = 0; i < message->oneof_decl_count(); ++i) {
8557     const OneofDescriptor* oneof_desc = message->oneof_decl(i);
8558     auto iter = seen_types.find(oneof_desc->name());
8559     if (iter != seen_types.end() && (*iter)->options().map_entry()) {
8560       AddError(message->full_name(), proto,
8561                DescriptorPool::ErrorCollector::NAME, [&] {
8562                  return absl::StrCat("Expanded map entry type ",
8563                                      (*iter)->name(),
8564                                      " conflicts with an existing oneof type.");
8565                });
8566     }
8567   }
8568 }
8569 
ValidateJSType(const FieldDescriptor * field,const FieldDescriptorProto & proto)8570 void DescriptorBuilder::ValidateJSType(const FieldDescriptor* field,
8571                                        const FieldDescriptorProto& proto) {
8572   FieldOptions::JSType jstype = field->options().jstype();
8573   // The default is always acceptable.
8574   if (jstype == FieldOptions::JS_NORMAL) {
8575     return;
8576   }
8577 
8578   switch (field->type()) {
8579     // Integral 64-bit types may be represented as JavaScript numbers or
8580     // strings.
8581     case FieldDescriptor::TYPE_UINT64:
8582     case FieldDescriptor::TYPE_INT64:
8583     case FieldDescriptor::TYPE_SINT64:
8584     case FieldDescriptor::TYPE_FIXED64:
8585     case FieldDescriptor::TYPE_SFIXED64:
8586       if (jstype == FieldOptions::JS_STRING ||
8587           jstype == FieldOptions::JS_NUMBER) {
8588         return;
8589       }
8590       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8591                [&] {
8592                  return absl::StrCat(
8593                      "Illegal jstype for int64, uint64, sint64, fixed64 "
8594                      "or sfixed64 field: ",
8595                      FieldOptions_JSType_descriptor()->value(jstype)->name());
8596                });
8597       break;
8598 
8599     // No other types permit a jstype option.
8600     default:
8601       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
8602                "jstype is only allowed on int64, uint64, sint64, fixed64 "
8603                "or sfixed64 fields.");
8604       break;
8605   }
8606 }
8607 
8608 // -------------------------------------------------------------------
8609 
OptionInterpreter(DescriptorBuilder * builder)8610 DescriptorBuilder::OptionInterpreter::OptionInterpreter(
8611     DescriptorBuilder* builder)
8612     : builder_(builder) {
8613   ABSL_CHECK(builder_);
8614 }
8615 
8616 DescriptorBuilder::OptionInterpreter::~OptionInterpreter() = default;
8617 
InterpretOptionExtensions(OptionsToInterpret * options_to_interpret)8618 bool DescriptorBuilder::OptionInterpreter::InterpretOptionExtensions(
8619     OptionsToInterpret* options_to_interpret) {
8620   return InterpretOptionsImpl(options_to_interpret, /*skip_extensions=*/false);
8621 }
InterpretNonExtensionOptions(OptionsToInterpret * options_to_interpret)8622 bool DescriptorBuilder::OptionInterpreter::InterpretNonExtensionOptions(
8623     OptionsToInterpret* options_to_interpret) {
8624   return InterpretOptionsImpl(options_to_interpret, /*skip_extensions=*/true);
8625 }
InterpretOptionsImpl(OptionsToInterpret * options_to_interpret,bool skip_extensions)8626 bool DescriptorBuilder::OptionInterpreter::InterpretOptionsImpl(
8627     OptionsToInterpret* options_to_interpret, bool skip_extensions) {
8628   // Note that these may be in different pools, so we can't use the same
8629   // descriptor and reflection objects on both.
8630   Message* options = options_to_interpret->options;
8631   const Message* original_options = options_to_interpret->original_options;
8632 
8633   bool failed = false;
8634   options_to_interpret_ = options_to_interpret;
8635 
8636   // Find the uninterpreted_option field in the mutable copy of the options
8637   // and clear them, since we're about to interpret them.
8638   const FieldDescriptor* uninterpreted_options_field =
8639       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
8640   ABSL_CHECK(uninterpreted_options_field != nullptr)
8641       << "No field named \"uninterpreted_option\" in the Options proto.";
8642   options->GetReflection()->ClearField(options, uninterpreted_options_field);
8643 
8644   std::vector<int> src_path = options_to_interpret->element_path;
8645   src_path.push_back(uninterpreted_options_field->number());
8646 
8647   // Find the uninterpreted_option field in the original options.
8648   const FieldDescriptor* original_uninterpreted_options_field =
8649       original_options->GetDescriptor()->FindFieldByName(
8650           "uninterpreted_option");
8651   ABSL_CHECK(original_uninterpreted_options_field != nullptr)
8652       << "No field named \"uninterpreted_option\" in the Options proto.";
8653 
8654   const int num_uninterpreted_options =
8655       original_options->GetReflection()->FieldSize(
8656           *original_options, original_uninterpreted_options_field);
8657   for (int i = 0; i < num_uninterpreted_options; ++i) {
8658     src_path.push_back(i);
8659     uninterpreted_option_ = DownCastMessage<UninterpretedOption>(
8660         &original_options->GetReflection()->GetRepeatedMessage(
8661             *original_options, original_uninterpreted_options_field, i));
8662     if (!InterpretSingleOption(options, src_path,
8663                                options_to_interpret->element_path,
8664                                skip_extensions)) {
8665       // Error already added by InterpretSingleOption().
8666       failed = true;
8667       break;
8668     }
8669     src_path.pop_back();
8670   }
8671   // Reset these, so we don't have any dangling pointers.
8672   uninterpreted_option_ = nullptr;
8673   options_to_interpret_ = nullptr;
8674 
8675   if (!failed) {
8676     // InterpretSingleOption() added the interpreted options in the
8677     // UnknownFieldSet, in case the option isn't yet known to us.  Now we
8678     // serialize the options message and deserialize it back.  That way, any
8679     // option fields that we do happen to know about will get moved from the
8680     // UnknownFieldSet into the real fields, and thus be available right away.
8681     // If they are not known, that's OK too. They will get reparsed into the
8682     // UnknownFieldSet and wait there until the message is parsed by something
8683     // that does know about the options.
8684 
8685     // Keep the unparsed options around in case the reparsing fails.
8686     std::unique_ptr<Message> unparsed_options(options->New());
8687     options->GetReflection()->Swap(unparsed_options.get(), options);
8688 
8689     std::string buf;
8690     if (!unparsed_options->AppendToString(&buf) ||
8691         !options->ParseFromString(buf)) {
8692       builder_->AddError(
8693           options_to_interpret->element_name, *original_options,
8694           DescriptorPool::ErrorCollector::OTHER, [&] {
8695             return absl::StrCat(
8696                 "Some options could not be correctly parsed using the proto "
8697                 "descriptors compiled into this binary.\n"
8698                 "Unparsed options: ",
8699                 unparsed_options->ShortDebugString(),
8700                 "\n"
8701                 "Parsing attempt:  ",
8702                 options->ShortDebugString());
8703           });
8704       // Restore the unparsed options.
8705       options->GetReflection()->Swap(unparsed_options.get(), options);
8706     }
8707   }
8708 
8709   return !failed;
8710 }
8711 
InterpretSingleOption(Message * options,const std::vector<int> & src_path,const std::vector<int> & options_path,bool skip_extensions)8712 bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
8713     Message* options, const std::vector<int>& src_path,
8714     const std::vector<int>& options_path, bool skip_extensions) {
8715   // First do some basic validation.
8716   if (uninterpreted_option_->name_size() == 0) {
8717     // This should never happen unless the parser has gone seriously awry or
8718     // someone has manually created the uninterpreted option badly.
8719     if (skip_extensions) {
8720       // Come back to it later.
8721       return true;
8722     }
8723     return AddNameError(
8724         []() -> std::string { return "Option must have a name."; });
8725   }
8726   if (uninterpreted_option_->name(0).name_part() == "uninterpreted_option") {
8727     if (skip_extensions) {
8728       // Come back to it later.
8729       return true;
8730     }
8731     return AddNameError([]() -> std::string {
8732       return "Option must not use reserved name \"uninterpreted_option\".";
8733     });
8734   }
8735 
8736   if (skip_extensions == uninterpreted_option_->name(0).is_extension()) {
8737     // Allow feature and option interpretation to occur in two phases.  This is
8738     // necessary because features *are* options and need to be interpreted
8739     // before resolving them.  However, options can also *have* features
8740     // attached to them.
8741     return true;
8742   }
8743 
8744   const Descriptor* options_descriptor = nullptr;
8745   // Get the options message's descriptor from the builder's pool, so that we
8746   // get the version that knows about any extension options declared in the file
8747   // we're currently building. The descriptor should be there as long as the
8748   // file we're building imported descriptor.proto.
8749 
8750   // Note that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
8751   // DescriptorPool::FindMessageTypeByName() because we're already holding the
8752   // pool's mutex, and the latter method locks it again.  We don't use
8753   // FindSymbol() because files that use custom options only need to depend on
8754   // the file that defines the option, not descriptor.proto itself.
8755   Symbol symbol = builder_->FindSymbolNotEnforcingDeps(
8756       options->GetDescriptor()->full_name());
8757   options_descriptor = symbol.descriptor();
8758   if (options_descriptor == nullptr) {
8759     // The options message's descriptor was not in the builder's pool, so use
8760     // the standard version from the generated pool. We're not holding the
8761     // generated pool's mutex, so we can search it the straightforward way.
8762     options_descriptor = options->GetDescriptor();
8763   }
8764   ABSL_CHECK(options_descriptor);
8765 
8766   // We iterate over the name parts to drill into the submessages until we find
8767   // the leaf field for the option. As we drill down we remember the current
8768   // submessage's descriptor in |descriptor| and the next field in that
8769   // submessage in |field|. We also track the fields we're drilling down
8770   // through in |intermediate_fields|. As we go, we reconstruct the full option
8771   // name in |debug_msg_name|, for use in error messages.
8772   const Descriptor* descriptor = options_descriptor;
8773   const FieldDescriptor* field = nullptr;
8774   std::vector<const FieldDescriptor*> intermediate_fields;
8775   std::string debug_msg_name = "";
8776 
8777   std::vector<int> dest_path = options_path;
8778 
8779   for (int i = 0; i < uninterpreted_option_->name_size(); ++i) {
8780     builder_->undefine_resolved_name_.clear();
8781     const std::string& name_part = uninterpreted_option_->name(i).name_part();
8782     if (!debug_msg_name.empty()) {
8783       absl::StrAppend(&debug_msg_name, ".");
8784     }
8785     if (uninterpreted_option_->name(i).is_extension()) {
8786       absl::StrAppend(&debug_msg_name, "(", name_part, ")");
8787       // Search for the extension's descriptor as an extension in the builder's
8788       // pool. Note that we use DescriptorBuilder::LookupSymbol(), not
8789       // DescriptorPool::FindExtensionByName(), for two reasons: 1) It allows
8790       // relative lookups, and 2) because we're already holding the pool's
8791       // mutex, and the latter method locks it again.
8792       symbol =
8793           builder_->LookupSymbol(name_part, options_to_interpret_->name_scope);
8794       field = symbol.field_descriptor();
8795       // If we don't find the field then the field's descriptor was not in the
8796       // builder's pool, but there's no point in looking in the generated
8797       // pool. We require that you import the file that defines any extensions
8798       // you use, so they must be present in the builder's pool.
8799     } else {
8800       absl::StrAppend(&debug_msg_name, name_part);
8801       // Search for the field's descriptor as a regular field.
8802       field = descriptor->FindFieldByName(name_part);
8803     }
8804 
8805     if (field == nullptr) {
8806       if (get_allow_unknown(builder_->pool_)) {
8807         // We can't find the option, but AllowUnknownDependencies() is enabled,
8808         // so we will just leave it as uninterpreted.
8809         AddWithoutInterpreting(*uninterpreted_option_, options);
8810         return true;
8811       } else if (!(builder_->undefine_resolved_name_).empty()) {
8812         // Option is resolved to a name which is not defined.
8813         return AddNameError([&] {
8814           return absl::StrCat(
8815               "Option \"", debug_msg_name, "\" is resolved to \"(",
8816               builder_->undefine_resolved_name_,
8817               ")\", which is not defined. The innermost scope is searched "
8818               "first "
8819               "in name resolution. Consider using a leading '.'(i.e., \"(.",
8820               debug_msg_name.substr(1),
8821               "\") to start from the outermost scope.");
8822         });
8823       } else {
8824         return AddNameError([&] {
8825           return absl::StrCat(
8826               "Option \"", debug_msg_name, "\" unknown. Ensure that your proto",
8827               " definition file imports the proto which defines the option.");
8828         });
8829       }
8830     } else if (field->containing_type() != descriptor) {
8831       if (get_is_placeholder(field->containing_type())) {
8832         // The field is an extension of a placeholder type, so we can't
8833         // reliably verify whether it is a valid extension to use here (e.g.
8834         // we don't know if it is an extension of the correct *Options message,
8835         // or if it has a valid field number, etc.).  Just leave it as
8836         // uninterpreted instead.
8837         AddWithoutInterpreting(*uninterpreted_option_, options);
8838         return true;
8839       } else {
8840         // This can only happen if, due to some insane misconfiguration of the
8841         // pools, we find the options message in one pool but the field in
8842         // another. This would probably imply a hefty bug somewhere.
8843         return AddNameError([&] {
8844           return absl::StrCat("Option field \"", debug_msg_name,
8845                               "\" is not a field or extension of message \"",
8846                               descriptor->name(), "\".");
8847         });
8848       }
8849     } else {
8850       // accumulate field numbers to form path to interpreted option
8851       dest_path.push_back(field->number());
8852 
8853       // Special handling to prevent feature use in the same file as the
8854       // definition.
8855       // TODO Add proper support for cases where this can work.
8856       if (field->file() == builder_->file_ &&
8857           uninterpreted_option_->name(0).name_part() == "features" &&
8858           !uninterpreted_option_->name(0).is_extension()) {
8859         return AddNameError([&] {
8860           return absl::StrCat(
8861               "Feature \"", debug_msg_name,
8862               "\" can't be used in the same file it's defined in.");
8863         });
8864       }
8865 
8866       if (i < uninterpreted_option_->name_size() - 1) {
8867         if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
8868           return AddNameError([&] {
8869             return absl::StrCat("Option \"", debug_msg_name,
8870                                 "\" is an atomic type, not a message.");
8871           });
8872         } else if (field->is_repeated()) {
8873           return AddNameError([&] {
8874             return absl::StrCat("Option field \"", debug_msg_name,
8875                                 "\" is a repeated message. Repeated message "
8876                                 "options must be initialized using an "
8877                                 "aggregate value.");
8878           });
8879         } else {
8880           // Drill down into the submessage.
8881           intermediate_fields.push_back(field);
8882           descriptor = field->message_type();
8883         }
8884       }
8885     }
8886   }
8887 
8888   // We've found the leaf field. Now we use UnknownFieldSets to set its value
8889   // on the options message. We do so because the message may not yet know
8890   // about its extension fields, so we may not be able to set the fields
8891   // directly. But the UnknownFieldSets will serialize to the same wire-format
8892   // message, so reading that message back in once the extension fields are
8893   // known will populate them correctly.
8894 
8895   // First see if the option is already set.
8896   if (!field->is_repeated() &&
8897       !ExamineIfOptionIsSet(
8898           intermediate_fields.begin(), intermediate_fields.end(), field,
8899           debug_msg_name,
8900           options->GetReflection()->GetUnknownFields(*options))) {
8901     return false;  // ExamineIfOptionIsSet() already added the error.
8902   }
8903 
8904   // First set the value on the UnknownFieldSet corresponding to the
8905   // innermost message.
8906   std::unique_ptr<UnknownFieldSet> unknown_fields(new UnknownFieldSet());
8907   if (!SetOptionValue(field, unknown_fields.get())) {
8908     return false;  // SetOptionValue() already added the error.
8909   }
8910 
8911   // Now wrap the UnknownFieldSet with UnknownFieldSets corresponding to all
8912   // the intermediate messages.
8913   for (std::vector<const FieldDescriptor*>::reverse_iterator iter =
8914            intermediate_fields.rbegin();
8915        iter != intermediate_fields.rend(); ++iter) {
8916     std::unique_ptr<UnknownFieldSet> parent_unknown_fields(
8917         new UnknownFieldSet());
8918     switch ((*iter)->type()) {
8919       case FieldDescriptor::TYPE_MESSAGE: {
8920         std::string outstr;
8921         ABSL_CHECK(unknown_fields->SerializeToString(&outstr))
8922             << "Unexpected failure while serializing option submessage "
8923             << debug_msg_name << "\".";
8924         parent_unknown_fields->AddLengthDelimited((*iter)->number(),
8925                                                   std::move(outstr));
8926         break;
8927       }
8928 
8929       case FieldDescriptor::TYPE_GROUP: {
8930         parent_unknown_fields->AddGroup((*iter)->number())
8931             ->MergeFrom(*unknown_fields);
8932         break;
8933       }
8934 
8935       default:
8936         ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: "
8937                         << (*iter)->type();
8938         return false;
8939     }
8940     unknown_fields = std::move(parent_unknown_fields);
8941   }
8942 
8943   // Now merge the UnknownFieldSet corresponding to the top-level message into
8944   // the options message.
8945   options->GetReflection()->MutableUnknownFields(options)->MergeFrom(
8946       *unknown_fields);
8947 
8948   // record the element path of the interpreted option
8949   if (field->is_repeated()) {
8950     int index = repeated_option_counts_[dest_path]++;
8951     dest_path.push_back(index);
8952   }
8953   interpreted_paths_[src_path] = dest_path;
8954 
8955   return true;
8956 }
8957 
UpdateSourceCodeInfo(SourceCodeInfo * info)8958 void DescriptorBuilder::OptionInterpreter::UpdateSourceCodeInfo(
8959     SourceCodeInfo* info) {
8960   if (interpreted_paths_.empty()) {
8961     // nothing to do!
8962     return;
8963   }
8964 
8965   // We find locations that match keys in interpreted_paths_ and
8966   // 1) replace the path with the corresponding value in interpreted_paths_
8967   // 2) remove any subsequent sub-locations (sub-location is one whose path
8968   //    has the parent path as a prefix)
8969   //
8970   // To avoid quadratic behavior of removing interior rows as we go,
8971   // we keep a copy. But we don't actually copy anything until we've
8972   // found the first match (so if the source code info has no locations
8973   // that need to be changed, there is zero copy overhead).
8974 
8975   RepeatedPtrField<SourceCodeInfo_Location>* locs = info->mutable_location();
8976   RepeatedPtrField<SourceCodeInfo_Location> new_locs;
8977   bool copying = false;
8978 
8979   std::vector<int> pathv;
8980   bool matched = false;
8981 
8982   for (RepeatedPtrField<SourceCodeInfo_Location>::iterator loc = locs->begin();
8983        loc != locs->end(); loc++) {
8984     if (matched) {
8985       // see if this location is in the range to remove
8986       bool loc_matches = true;
8987       if (loc->path_size() < static_cast<int64_t>(pathv.size())) {
8988         loc_matches = false;
8989       } else {
8990         for (size_t j = 0; j < pathv.size(); j++) {
8991           if (loc->path(j) != pathv[j]) {
8992             loc_matches = false;
8993             break;
8994           }
8995         }
8996       }
8997 
8998       if (loc_matches) {
8999         // don't copy this row since it is a sub-location that we're removing
9000         continue;
9001       }
9002 
9003       matched = false;
9004     }
9005 
9006     pathv.clear();
9007     for (int j = 0; j < loc->path_size(); j++) {
9008       pathv.push_back(loc->path(j));
9009     }
9010 
9011     auto entry = interpreted_paths_.find(pathv);
9012 
9013     if (entry == interpreted_paths_.end()) {
9014       // not a match
9015       if (copying) {
9016         *new_locs.Add() = *loc;
9017       }
9018       continue;
9019     }
9020 
9021     matched = true;
9022 
9023     if (!copying) {
9024       // initialize the copy we are building
9025       copying = true;
9026       new_locs.Reserve(locs->size());
9027       for (RepeatedPtrField<SourceCodeInfo_Location>::iterator it =
9028                locs->begin();
9029            it != loc; it++) {
9030         *new_locs.Add() = *it;
9031       }
9032     }
9033 
9034     // add replacement and update its path
9035     SourceCodeInfo_Location* replacement = new_locs.Add();
9036     *replacement = *loc;
9037     replacement->clear_path();
9038     for (std::vector<int>::iterator rit = entry->second.begin();
9039          rit != entry->second.end(); rit++) {
9040       replacement->add_path(*rit);
9041     }
9042   }
9043 
9044   // if we made a changed copy, put it in place
9045   if (copying) {
9046     *locs = std::move(new_locs);
9047   }
9048 }
9049 
AddWithoutInterpreting(const UninterpretedOption & uninterpreted_option,Message * options)9050 void DescriptorBuilder::OptionInterpreter::AddWithoutInterpreting(
9051     const UninterpretedOption& uninterpreted_option, Message* options) {
9052   const FieldDescriptor* field =
9053       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
9054   ABSL_CHECK(field != nullptr);
9055 
9056   options->GetReflection()
9057       ->AddMessage(options, field)
9058       ->CopyFrom(uninterpreted_option);
9059 }
9060 
ExamineIfOptionIsSet(std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_iter,std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_end,const FieldDescriptor * innermost_field,const std::string & debug_msg_name,const UnknownFieldSet & unknown_fields)9061 bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
9062     std::vector<const FieldDescriptor*>::const_iterator
9063         intermediate_fields_iter,
9064     std::vector<const FieldDescriptor*>::const_iterator intermediate_fields_end,
9065     const FieldDescriptor* innermost_field, const std::string& debug_msg_name,
9066     const UnknownFieldSet& unknown_fields) {
9067   // We do linear searches of the UnknownFieldSet and its sub-groups.  This
9068   // should be fine since it's unlikely that any one options structure will
9069   // contain more than a handful of options.
9070 
9071   if (intermediate_fields_iter == intermediate_fields_end) {
9072     // We're at the innermost submessage.
9073     for (int i = 0; i < unknown_fields.field_count(); i++) {
9074       if (unknown_fields.field(i).number() == innermost_field->number()) {
9075         return AddNameError([&] {
9076           return absl::StrCat("Option \"", debug_msg_name,
9077                               "\" was already set.");
9078         });
9079       }
9080     }
9081     return true;
9082   }
9083 
9084   for (int i = 0; i < unknown_fields.field_count(); i++) {
9085     if (unknown_fields.field(i).number() ==
9086         (*intermediate_fields_iter)->number()) {
9087       const UnknownField* unknown_field = &unknown_fields.field(i);
9088       FieldDescriptor::Type type = (*intermediate_fields_iter)->type();
9089       // Recurse into the next submessage.
9090       switch (type) {
9091         case FieldDescriptor::TYPE_MESSAGE:
9092           if (unknown_field->type() == UnknownField::TYPE_LENGTH_DELIMITED) {
9093             UnknownFieldSet intermediate_unknown_fields;
9094             if (intermediate_unknown_fields.ParseFromString(
9095                     unknown_field->length_delimited()) &&
9096                 !ExamineIfOptionIsSet(intermediate_fields_iter + 1,
9097                                       intermediate_fields_end, innermost_field,
9098                                       debug_msg_name,
9099                                       intermediate_unknown_fields)) {
9100               return false;  // Error already added.
9101             }
9102           }
9103           break;
9104 
9105         case FieldDescriptor::TYPE_GROUP:
9106           if (unknown_field->type() == UnknownField::TYPE_GROUP) {
9107             if (!ExamineIfOptionIsSet(intermediate_fields_iter + 1,
9108                                       intermediate_fields_end, innermost_field,
9109                                       debug_msg_name, unknown_field->group())) {
9110               return false;  // Error already added.
9111             }
9112           }
9113           break;
9114 
9115         default:
9116           ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: " << type;
9117           return false;
9118       }
9119     }
9120   }
9121   return true;
9122 }
9123 
9124 namespace {
9125 // Helpers for method below
9126 
9127 template <typename T>
ValueOutOfRange(absl::string_view type_name,absl::string_view option_name)9128 std::string ValueOutOfRange(absl::string_view type_name,
9129                             absl::string_view option_name) {
9130   return absl::StrFormat("Value out of range, %d to %d, for %s option \"%s\".",
9131                          std::numeric_limits<T>::min(),
9132                          std::numeric_limits<T>::max(), type_name, option_name);
9133 }
9134 
9135 template <typename T>
ValueMustBeInt(absl::string_view type_name,absl::string_view option_name)9136 std::string ValueMustBeInt(absl::string_view type_name,
9137                            absl::string_view option_name) {
9138   return absl::StrFormat(
9139       "Value must be integer, from %d to %d, for %s option \"%s\".",
9140       std::numeric_limits<T>::min(), std::numeric_limits<T>::max(), type_name,
9141       option_name);
9142 }
9143 
9144 }  // namespace
9145 
SetOptionValue(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)9146 bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
9147     const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
9148   // We switch on the CppType to validate.
9149   switch (option_field->cpp_type()) {
9150     case FieldDescriptor::CPPTYPE_INT32:
9151       if (uninterpreted_option_->has_positive_int_value()) {
9152         if (uninterpreted_option_->positive_int_value() >
9153             static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
9154           return AddValueError([&] {
9155             return ValueOutOfRange<int32_t>("int32", option_field->full_name());
9156           });
9157         } else {
9158           SetInt32(option_field->number(),
9159                    uninterpreted_option_->positive_int_value(),
9160                    option_field->type(), unknown_fields);
9161         }
9162       } else if (uninterpreted_option_->has_negative_int_value()) {
9163         if (uninterpreted_option_->negative_int_value() <
9164             static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
9165           return AddValueError([&] {
9166             return ValueOutOfRange<int32_t>("int32", option_field->full_name());
9167           });
9168         } else {
9169           SetInt32(option_field->number(),
9170                    uninterpreted_option_->negative_int_value(),
9171                    option_field->type(), unknown_fields);
9172         }
9173       } else {
9174         return AddValueError([&] {
9175           return ValueMustBeInt<int32_t>("int32", option_field->full_name());
9176         });
9177       }
9178       break;
9179 
9180     case FieldDescriptor::CPPTYPE_INT64:
9181       if (uninterpreted_option_->has_positive_int_value()) {
9182         if (uninterpreted_option_->positive_int_value() >
9183             static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
9184           return AddValueError([&] {
9185             return ValueOutOfRange<int64_t>("int64", option_field->full_name());
9186           });
9187         } else {
9188           SetInt64(option_field->number(),
9189                    uninterpreted_option_->positive_int_value(),
9190                    option_field->type(), unknown_fields);
9191         }
9192       } else if (uninterpreted_option_->has_negative_int_value()) {
9193         SetInt64(option_field->number(),
9194                  uninterpreted_option_->negative_int_value(),
9195                  option_field->type(), unknown_fields);
9196       } else {
9197         return AddValueError([&] {
9198           return ValueMustBeInt<int64_t>("int64", option_field->full_name());
9199         });
9200       }
9201       break;
9202 
9203     case FieldDescriptor::CPPTYPE_UINT32:
9204       if (uninterpreted_option_->has_positive_int_value()) {
9205         if (uninterpreted_option_->positive_int_value() >
9206             std::numeric_limits<uint32_t>::max()) {
9207           return AddValueError([&] {
9208             return ValueOutOfRange<uint32_t>("uint32",
9209                                              option_field->full_name());
9210           });
9211         } else {
9212           SetUInt32(option_field->number(),
9213                     uninterpreted_option_->positive_int_value(),
9214                     option_field->type(), unknown_fields);
9215         }
9216       } else {
9217         return AddValueError([&] {
9218           return ValueMustBeInt<uint32_t>("uint32", option_field->full_name());
9219         });
9220       }
9221       break;
9222 
9223     case FieldDescriptor::CPPTYPE_UINT64:
9224       if (uninterpreted_option_->has_positive_int_value()) {
9225         SetUInt64(option_field->number(),
9226                   uninterpreted_option_->positive_int_value(),
9227                   option_field->type(), unknown_fields);
9228       } else {
9229         return AddValueError([&] {
9230           return ValueMustBeInt<uint64_t>("uint64", option_field->full_name());
9231         });
9232       }
9233       break;
9234 
9235     case FieldDescriptor::CPPTYPE_FLOAT: {
9236       float value;
9237       if (uninterpreted_option_->has_double_value()) {
9238         value = uninterpreted_option_->double_value();
9239       } else if (uninterpreted_option_->has_positive_int_value()) {
9240         value = uninterpreted_option_->positive_int_value();
9241       } else if (uninterpreted_option_->has_negative_int_value()) {
9242         value = uninterpreted_option_->negative_int_value();
9243       } else if (uninterpreted_option_->identifier_value() == "inf") {
9244         value = std::numeric_limits<float>::infinity();
9245       } else if (uninterpreted_option_->identifier_value() == "nan") {
9246         value = std::numeric_limits<float>::quiet_NaN();
9247       } else {
9248         return AddValueError([&] {
9249           return absl::StrCat("Value must be number for float option \"",
9250                               option_field->full_name(), "\".");
9251         });
9252       }
9253       unknown_fields->AddFixed32(option_field->number(),
9254                                  internal::WireFormatLite::EncodeFloat(value));
9255       break;
9256     }
9257 
9258     case FieldDescriptor::CPPTYPE_DOUBLE: {
9259       double value;
9260       if (uninterpreted_option_->has_double_value()) {
9261         value = uninterpreted_option_->double_value();
9262       } else if (uninterpreted_option_->has_positive_int_value()) {
9263         value = uninterpreted_option_->positive_int_value();
9264       } else if (uninterpreted_option_->has_negative_int_value()) {
9265         value = uninterpreted_option_->negative_int_value();
9266       } else if (uninterpreted_option_->identifier_value() == "inf") {
9267         value = std::numeric_limits<double>::infinity();
9268       } else if (uninterpreted_option_->identifier_value() == "nan") {
9269         value = std::numeric_limits<double>::quiet_NaN();
9270       } else {
9271         return AddValueError([&] {
9272           return absl::StrCat("Value must be number for double option \"",
9273                               option_field->full_name(), "\".");
9274         });
9275       }
9276       unknown_fields->AddFixed64(option_field->number(),
9277                                  internal::WireFormatLite::EncodeDouble(value));
9278       break;
9279     }
9280 
9281     case FieldDescriptor::CPPTYPE_BOOL:
9282       uint64_t value;
9283       if (!uninterpreted_option_->has_identifier_value()) {
9284         return AddValueError([&] {
9285           return absl::StrCat("Value must be identifier for boolean option \"",
9286                               option_field->full_name(), "\".");
9287         });
9288       }
9289       if (uninterpreted_option_->identifier_value() == "true") {
9290         value = 1;
9291       } else if (uninterpreted_option_->identifier_value() == "false") {
9292         value = 0;
9293       } else {
9294         return AddValueError([&] {
9295           return absl::StrCat(
9296               "Value must be \"true\" or \"false\" for boolean option \"",
9297               option_field->full_name(), "\".");
9298         });
9299       }
9300       unknown_fields->AddVarint(option_field->number(), value);
9301       break;
9302 
9303     case FieldDescriptor::CPPTYPE_ENUM: {
9304       if (!uninterpreted_option_->has_identifier_value()) {
9305         return AddValueError([&] {
9306           return absl::StrCat(
9307               "Value must be identifier for enum-valued option \"",
9308               option_field->full_name(), "\".");
9309         });
9310       }
9311       const EnumDescriptor* enum_type = option_field->enum_type();
9312       const std::string& value_name = uninterpreted_option_->identifier_value();
9313       const EnumValueDescriptor* enum_value = nullptr;
9314 
9315       if (enum_type->file()->pool() != DescriptorPool::generated_pool()) {
9316         // Note that the enum value's fully-qualified name is a sibling of the
9317         // enum's name, not a child of it.
9318         std::string fully_qualified_name = std::string(enum_type->full_name());
9319         fully_qualified_name.resize(fully_qualified_name.size() -
9320                                     enum_type->name().size());
9321         fully_qualified_name += value_name;
9322 
9323         // Search for the enum value's descriptor in the builder's pool. Note
9324         // that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
9325         // DescriptorPool::FindEnumValueByName() because we're already holding
9326         // the pool's mutex, and the latter method locks it again.
9327         Symbol symbol =
9328             builder_->FindSymbolNotEnforcingDeps(fully_qualified_name);
9329         if (auto* candidate_descriptor = symbol.enum_value_descriptor()) {
9330           if (candidate_descriptor->type() != enum_type) {
9331             return AddValueError([&] {
9332               return absl::StrCat(
9333                   "Enum type \"", enum_type->full_name(),
9334                   "\" has no value named \"", value_name, "\" for option \"",
9335                   option_field->full_name(),
9336                   "\". This appears to be a value from a sibling type.");
9337             });
9338           } else {
9339             enum_value = candidate_descriptor;
9340           }
9341         }
9342       } else {
9343         // The enum type is in the generated pool, so we can search for the
9344         // value there.
9345         enum_value = enum_type->FindValueByName(value_name);
9346       }
9347 
9348       if (enum_value == nullptr) {
9349         return AddValueError([&] {
9350           return absl::StrCat(
9351               "Enum type \"", option_field->enum_type()->full_name(),
9352               "\" has no value named \"", value_name, "\" for option \"",
9353               option_field->full_name(), "\".");
9354         });
9355       } else {
9356         // Sign-extension is not a problem, since we cast directly from int32_t
9357         // to uint64_t, without first going through uint32_t.
9358         unknown_fields->AddVarint(
9359             option_field->number(),
9360             static_cast<uint64_t>(static_cast<int64_t>(enum_value->number())));
9361       }
9362       break;
9363     }
9364 
9365     case FieldDescriptor::CPPTYPE_STRING:
9366       if (!uninterpreted_option_->has_string_value()) {
9367         return AddValueError([&] {
9368           return absl::StrCat(
9369               "Value must be quoted string for string option \"",
9370               option_field->full_name(), "\".");
9371         });
9372       }
9373       // The string has already been unquoted and unescaped by the parser.
9374       unknown_fields->AddLengthDelimited(option_field->number(),
9375                                          uninterpreted_option_->string_value());
9376       break;
9377 
9378     case FieldDescriptor::CPPTYPE_MESSAGE:
9379       if (!SetAggregateOption(option_field, unknown_fields)) {
9380         return false;
9381       }
9382       break;
9383   }
9384 
9385   return true;
9386 }
9387 
9388 class DescriptorBuilder::OptionInterpreter::AggregateOptionFinder
9389     : public TextFormat::Finder {
9390  public:
9391   DescriptorBuilder* builder_;
9392 
FindAnyType(const Message &,const std::string & prefix,const std::string & name) const9393   const Descriptor* FindAnyType(const Message& /*message*/,
9394                                 const std::string& prefix,
9395                                 const std::string& name) const override {
9396     if (prefix != internal::kTypeGoogleApisComPrefix &&
9397         prefix != internal::kTypeGoogleProdComPrefix) {
9398       return nullptr;
9399     }
9400     assert_mutex_held(builder_->pool_);
9401     return builder_->FindSymbol(name).descriptor();
9402   }
9403 
FindExtension(Message * message,const std::string & name) const9404   const FieldDescriptor* FindExtension(Message* message,
9405                                        const std::string& name) const override {
9406     assert_mutex_held(builder_->pool_);
9407     const Descriptor* descriptor = message->GetDescriptor();
9408     Symbol result =
9409         builder_->LookupSymbolNoPlaceholder(name, descriptor->full_name());
9410     if (auto* field = result.field_descriptor()) {
9411       return field;
9412     } else if (result.type() == Symbol::MESSAGE &&
9413                descriptor->options().message_set_wire_format()) {
9414       const Descriptor* foreign_type = result.descriptor();
9415       // The text format allows MessageSet items to be specified using
9416       // the type name, rather than the extension identifier. If the symbol
9417       // lookup returned a Message, and the enclosing Message has
9418       // message_set_wire_format = true, then return the message set
9419       // extension, if one exists.
9420       for (int i = 0; i < foreign_type->extension_count(); i++) {
9421         const FieldDescriptor* extension = foreign_type->extension(i);
9422         if (extension->containing_type() == descriptor &&
9423             extension->type() == FieldDescriptor::TYPE_MESSAGE &&
9424             extension->is_optional() &&
9425             extension->message_type() == foreign_type) {
9426           // Found it.
9427           return extension;
9428         }
9429       }
9430     }
9431     return nullptr;
9432   }
9433 };
9434 
9435 // A custom error collector to record any text-format parsing errors
9436 namespace {
9437 class AggregateErrorCollector : public io::ErrorCollector {
9438  public:
9439   std::string error_;
9440 
RecordError(int,int,const absl::string_view message)9441   void RecordError(int /* line */, int /* column */,
9442                    const absl::string_view message) override {
9443     if (!error_.empty()) {
9444       absl::StrAppend(&error_, "; ");
9445     }
9446     absl::StrAppend(&error_, message);
9447   }
9448 
RecordWarning(int,int,const absl::string_view)9449   void RecordWarning(int /* line */, int /* column */,
9450                      const absl::string_view /* message */) override {
9451     // Ignore warnings
9452   }
9453 };
9454 }  // namespace
9455 
9456 // We construct a dynamic message of the type corresponding to
9457 // option_field, parse the supplied text-format string into this
9458 // message, and serialize the resulting message to produce the value.
SetAggregateOption(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)9459 bool DescriptorBuilder::OptionInterpreter::SetAggregateOption(
9460     const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
9461   if (!uninterpreted_option_->has_aggregate_value()) {
9462     return AddValueError([&] {
9463       return absl::StrCat("Option \"", option_field->full_name(),
9464                           "\" is a message. "
9465                           "To set the entire message, use syntax like \"",
9466                           option_field->name(),
9467                           " = { <proto text format> }\". "
9468                           "To set fields within it, use syntax like \"",
9469                           option_field->name(), ".foo = value\".");
9470     });
9471   }
9472 
9473   const Descriptor* type = option_field->message_type();
9474   std::unique_ptr<Message> dynamic(dynamic_factory_.GetPrototype(type)->New());
9475   ABSL_CHECK(dynamic.get() != nullptr)
9476       << "Could not create an instance of " << option_field->DebugString();
9477 
9478   AggregateErrorCollector collector;
9479   AggregateOptionFinder finder;
9480   finder.builder_ = builder_;
9481   TextFormat::Parser parser;
9482   parser.RecordErrorsTo(&collector);
9483   parser.SetFinder(&finder);
9484   if (!parser.ParseFromString(uninterpreted_option_->aggregate_value(),
9485                               dynamic.get())) {
9486     AddValueError([&] {
9487       return absl::StrCat("Error while parsing option value for \"",
9488                           option_field->name(), "\": ", collector.error_);
9489     });
9490     return false;
9491   } else {
9492     std::string serial;
9493     dynamic->SerializeToString(&serial);  // Never fails
9494     if (option_field->type() == FieldDescriptor::TYPE_MESSAGE) {
9495       unknown_fields->AddLengthDelimited(option_field->number(), serial);
9496     } else {
9497       ABSL_CHECK_EQ(option_field->type(), FieldDescriptor::TYPE_GROUP);
9498       UnknownFieldSet* group = unknown_fields->AddGroup(option_field->number());
9499       group->ParseFromString(serial);
9500     }
9501     return true;
9502   }
9503 }
9504 
SetInt32(int number,int32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9505 void DescriptorBuilder::OptionInterpreter::SetInt32(
9506     int number, int32_t value, FieldDescriptor::Type type,
9507     UnknownFieldSet* unknown_fields) {
9508   switch (type) {
9509     case FieldDescriptor::TYPE_INT32:
9510       unknown_fields->AddVarint(
9511           number, static_cast<uint64_t>(static_cast<int64_t>(value)));
9512       break;
9513 
9514     case FieldDescriptor::TYPE_SFIXED32:
9515       unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
9516       break;
9517 
9518     case FieldDescriptor::TYPE_SINT32:
9519       unknown_fields->AddVarint(
9520           number, internal::WireFormatLite::ZigZagEncode32(value));
9521       break;
9522 
9523     default:
9524       ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT32: " << type;
9525       break;
9526   }
9527 }
9528 
SetInt64(int number,int64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9529 void DescriptorBuilder::OptionInterpreter::SetInt64(
9530     int number, int64_t value, FieldDescriptor::Type type,
9531     UnknownFieldSet* unknown_fields) {
9532   switch (type) {
9533     case FieldDescriptor::TYPE_INT64:
9534       unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
9535       break;
9536 
9537     case FieldDescriptor::TYPE_SFIXED64:
9538       unknown_fields->AddFixed64(number, static_cast<uint64_t>(value));
9539       break;
9540 
9541     case FieldDescriptor::TYPE_SINT64:
9542       unknown_fields->AddVarint(
9543           number, internal::WireFormatLite::ZigZagEncode64(value));
9544       break;
9545 
9546     default:
9547       ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT64: " << type;
9548       break;
9549   }
9550 }
9551 
SetUInt32(int number,uint32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9552 void DescriptorBuilder::OptionInterpreter::SetUInt32(
9553     int number, uint32_t value, FieldDescriptor::Type type,
9554     UnknownFieldSet* unknown_fields) {
9555   switch (type) {
9556     case FieldDescriptor::TYPE_UINT32:
9557       unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
9558       break;
9559 
9560     case FieldDescriptor::TYPE_FIXED32:
9561       unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
9562       break;
9563 
9564     default:
9565       ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT32: " << type;
9566       break;
9567   }
9568 }
9569 
SetUInt64(int number,uint64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)9570 void DescriptorBuilder::OptionInterpreter::SetUInt64(
9571     int number, uint64_t value, FieldDescriptor::Type type,
9572     UnknownFieldSet* unknown_fields) {
9573   switch (type) {
9574     case FieldDescriptor::TYPE_UINT64:
9575       unknown_fields->AddVarint(number, value);
9576       break;
9577 
9578     case FieldDescriptor::TYPE_FIXED64:
9579       unknown_fields->AddFixed64(number, value);
9580       break;
9581 
9582     default:
9583       ABSL_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT64: " << type;
9584       break;
9585   }
9586 }
9587 
LogUnusedDependency(const FileDescriptorProto & proto,const FileDescriptor * result)9588 void DescriptorBuilder::LogUnusedDependency(const FileDescriptorProto& proto,
9589                                             const FileDescriptor* result) {
9590   (void)result;  // Parameter is used by Google-internal code.
9591 
9592   if (!unused_dependency_.empty()) {
9593     auto itr = pool_->direct_input_files_.find(proto.name());
9594     bool is_error = itr != pool_->direct_input_files_.end() && itr->second;
9595     for (const auto* unused : unused_dependency_) {
9596       auto make_error = [&] {
9597         return absl::StrCat("Import ", unused->name(), " is unused.");
9598       };
9599       if (is_error) {
9600         AddError(unused->name(), proto, DescriptorPool::ErrorCollector::IMPORT,
9601                  make_error);
9602       } else {
9603         AddWarning(unused->name(), proto,
9604                    DescriptorPool::ErrorCollector::IMPORT, make_error);
9605       }
9606     }
9607   }
9608 }
9609 
CrossLinkOnDemandHelper(absl::string_view name,bool expecting_enum) const9610 Symbol DescriptorPool::CrossLinkOnDemandHelper(absl::string_view name,
9611                                                bool expecting_enum) const {
9612   (void)expecting_enum;  // Parameter is used by Google-internal code.
9613   auto lookup_name = std::string(name);
9614   if (!lookup_name.empty() && lookup_name[0] == '.') {
9615     lookup_name = lookup_name.substr(1);
9616   }
9617   Symbol result = tables_->FindByNameHelper(this, lookup_name);
9618   return result;
9619 }
9620 
9621 // Handle the lazy import building for a message field whose type wasn't built
9622 // at cross link time. If that was the case, we saved the name of the type to
9623 // be looked up when the accessor for the type was called. Set type_,
9624 // enum_type_, message_type_, and default_value_enum_ appropriately.
InternalTypeOnceInit() const9625 void FieldDescriptor::InternalTypeOnceInit() const {
9626   ABSL_CHECK(file()->finished_building_ == true);
9627   const EnumDescriptor* enum_type = nullptr;
9628   const char* lazy_type_name = reinterpret_cast<const char*>(type_once_ + 1);
9629   const char* lazy_default_value_enum_name =
9630       lazy_type_name + strlen(lazy_type_name) + 1;
9631   Symbol result = file()->pool()->CrossLinkOnDemandHelper(
9632       lazy_type_name, type_ == FieldDescriptor::TYPE_ENUM);
9633   if (result.type() == Symbol::MESSAGE) {
9634     ABSL_CHECK(type_ == FieldDescriptor::TYPE_MESSAGE ||
9635                type_ == FieldDescriptor::TYPE_GROUP);
9636     type_descriptor_.message_type = result.descriptor();
9637   } else if (result.type() == Symbol::ENUM) {
9638     ABSL_CHECK(type_ == FieldDescriptor::TYPE_ENUM);
9639     enum_type = type_descriptor_.enum_type = result.enum_descriptor();
9640   }
9641 
9642   if (enum_type) {
9643     if (lazy_default_value_enum_name[0] != '\0') {
9644       // Have to build the full name now instead of at CrossLink time,
9645       // because enum_type may not be known at the time.
9646       std::string name = std::string(enum_type->full_name());
9647       // Enum values reside in the same scope as the enum type.
9648       std::string::size_type last_dot = name.find_last_of('.');
9649       if (last_dot != std::string::npos) {
9650         name = absl::StrCat(name.substr(0, last_dot), ".",
9651                             lazy_default_value_enum_name);
9652       } else {
9653         name = lazy_default_value_enum_name;
9654       }
9655       Symbol result_enum = file()->pool()->CrossLinkOnDemandHelper(name, true);
9656       default_value_enum_ = result_enum.enum_value_descriptor();
9657     } else {
9658       default_value_enum_ = nullptr;
9659     }
9660     if (!default_value_enum_) {
9661       // We use the first defined value as the default
9662       // if a default is not explicitly defined.
9663       ABSL_CHECK(enum_type->value_count());
9664       default_value_enum_ = enum_type->value(0);
9665     }
9666   }
9667 }
9668 
TypeOnceInit(const FieldDescriptor * to_init)9669 void FieldDescriptor::TypeOnceInit(const FieldDescriptor* to_init) {
9670   to_init->InternalTypeOnceInit();
9671 }
9672 
9673 // message_type(), enum_type(), default_value_enum(), and type()
9674 // all share the same absl::call_once init path to do lazy
9675 // import building and cross linking of a field of a message.
message_type() const9676 const Descriptor* FieldDescriptor::message_type() const {
9677   if (type_ == TYPE_MESSAGE || type_ == TYPE_GROUP) {
9678     if (type_once_) {
9679       absl::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
9680     }
9681     return type_descriptor_.message_type;
9682   }
9683   return nullptr;
9684 }
9685 
enum_type() const9686 const EnumDescriptor* FieldDescriptor::enum_type() const {
9687   if (type_ == TYPE_ENUM) {
9688     if (type_once_) {
9689       absl::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
9690     }
9691     return type_descriptor_.enum_type;
9692   }
9693   return nullptr;
9694 }
9695 
default_value_enum() const9696 const EnumValueDescriptor* FieldDescriptor::default_value_enum() const {
9697   if (type_once_) {
9698     absl::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
9699   }
9700   return default_value_enum_;
9701 }
9702 
PrintableNameForExtension() const9703 internal::DescriptorStringView FieldDescriptor::PrintableNameForExtension()
9704     const {
9705   const bool is_message_set_extension =
9706       is_extension() &&
9707       containing_type()->options().message_set_wire_format() &&
9708       type() == FieldDescriptor::TYPE_MESSAGE && is_optional() &&
9709       extension_scope() == message_type();
9710   return is_message_set_extension ? message_type()->full_name() : full_name();
9711 }
9712 
InternalDependenciesOnceInit() const9713 void FileDescriptor::InternalDependenciesOnceInit() const {
9714   ABSL_CHECK(finished_building_ == true);
9715   const char* names_ptr = reinterpret_cast<const char*>(dependencies_once_ + 1);
9716   for (int i = 0; i < dependency_count(); i++) {
9717     const char* name = names_ptr;
9718     names_ptr += strlen(name) + 1;
9719     if (name[0] != '\0') {
9720       dependencies_[i] = pool_->FindFileByName(name);
9721     }
9722   }
9723 }
9724 
DependenciesOnceInit(const FileDescriptor * to_init)9725 void FileDescriptor::DependenciesOnceInit(const FileDescriptor* to_init) {
9726   to_init->InternalDependenciesOnceInit();
9727 }
9728 
dependency(int index) const9729 const FileDescriptor* FileDescriptor::dependency(int index) const {
9730   if (dependencies_once_) {
9731     // Do once init for all indices, as it's unlikely only a single index would
9732     // be called, and saves on absl::call_once allocations.
9733     absl::call_once(*dependencies_once_, FileDescriptor::DependenciesOnceInit,
9734                     this);
9735   }
9736   return dependencies_[index];
9737 }
9738 
input_type() const9739 const Descriptor* MethodDescriptor::input_type() const {
9740   return input_type_.Get(service());
9741 }
9742 
output_type() const9743 const Descriptor* MethodDescriptor::output_type() const {
9744   return output_type_.Get(service());
9745 }
9746 
9747 namespace internal {
Set(const Descriptor * descriptor)9748 void LazyDescriptor::Set(const Descriptor* descriptor) {
9749   ABSL_CHECK(!once_);
9750   descriptor_ = descriptor;
9751 }
9752 
SetLazy(absl::string_view name,const FileDescriptor * file)9753 void LazyDescriptor::SetLazy(absl::string_view name,
9754                              const FileDescriptor* file) {
9755   // verify Init() has been called and Set hasn't been called yet.
9756   ABSL_CHECK(!descriptor_);
9757   ABSL_CHECK(!once_);
9758   ABSL_CHECK(file && file->pool_);
9759   ABSL_CHECK(file->pool_->lazily_build_dependencies_);
9760   ABSL_CHECK(!file->finished_building_);
9761   once_ = ::new (file->pool_->tables_->AllocateBytes(static_cast<int>(
9762       sizeof(absl::once_flag) + name.size() + 1))) absl::once_flag{};
9763   char* lazy_name = reinterpret_cast<char*>(once_ + 1);
9764   memcpy(lazy_name, name.data(), name.size());
9765   lazy_name[name.size()] = 0;
9766 }
9767 
Once(const ServiceDescriptor * service)9768 void LazyDescriptor::Once(const ServiceDescriptor* service) {
9769   if (once_) {
9770     absl::call_once(*once_, [&] {
9771       auto* file = service->file();
9772       ABSL_CHECK(file->finished_building_);
9773       const char* lazy_name = reinterpret_cast<const char*>(once_ + 1);
9774       descriptor_ =
9775           file->pool_->CrossLinkOnDemandHelper(lazy_name, false).descriptor();
9776     });
9777   }
9778 }
9779 
ParseNoReflection(absl::string_view from,google::protobuf::MessageLite & to)9780 bool ParseNoReflection(absl::string_view from, google::protobuf::MessageLite& to) {
9781   auto cleanup = DisableTracking();
9782 
9783   to.Clear();
9784   const char* ptr;
9785   internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
9786                              false, &ptr, from);
9787   ptr = to._InternalParse(ptr, &ctx);
9788   if (ptr == nullptr || !ctx.EndedAtLimit()) return false;
9789   return to.IsInitializedWithErrors();
9790 }
9791 
9792 namespace cpp {
HasPreservingUnknownEnumSemantics(const FieldDescriptor * field)9793 bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
9794   if (field->legacy_enum_field_treated_as_closed()) {
9795     return false;
9796   }
9797 
9798   return field->enum_type() != nullptr && !field->enum_type()->is_closed();
9799 }
9800 
HasHasbit(const FieldDescriptor * field)9801 bool HasHasbit(const FieldDescriptor* field) {
9802   return field->has_presence() && !field->real_containing_oneof() &&
9803          !field->options().weak();
9804 }
9805 
IsVerifyUtf8(const FieldDescriptor * field,bool is_lite)9806 static bool IsVerifyUtf8(const FieldDescriptor* field, bool is_lite) {
9807   if (is_lite) return false;
9808   return true;
9809 }
9810 
9811 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,bool is_lite)9812 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, bool is_lite) {
9813   if (field->type() == FieldDescriptor::TYPE_STRING ||
9814       (field->is_map() && (field->message_type()->map_key()->type() ==
9815                                FieldDescriptor::TYPE_STRING ||
9816                            field->message_type()->map_value()->type() ==
9817                                FieldDescriptor::TYPE_STRING))) {
9818     if (IsStrictUtf8(field)) {
9819       return Utf8CheckMode::kStrict;
9820     } else if (IsVerifyUtf8(field, is_lite)) {
9821       return Utf8CheckMode::kVerify;
9822     }
9823   }
9824   return Utf8CheckMode::kNone;
9825 }
9826 
IsGroupLike(const FieldDescriptor & field)9827 bool IsGroupLike(const FieldDescriptor& field) {
9828   // Groups are always tag-delimited, currently specified by a TYPE_GROUP type.
9829   if (field.type() != FieldDescriptor::TYPE_GROUP) return false;
9830   // Group fields always are always the lowercase type name.
9831   if (field.name() != absl::AsciiStrToLower(field.message_type()->name())) {
9832     return false;
9833   }
9834 
9835   if (field.message_type()->file() != field.file()) return false;
9836 
9837   // Group messages are always defined in the same scope as the field.  File
9838   // level extensions will compare NULL == NULL here, which is why the file
9839   // comparison above is necessary to ensure both come from the same file.
9840   return field.is_extension() ? field.message_type()->containing_type() ==
9841                                     field.extension_scope()
9842                               : field.message_type()->containing_type() ==
9843                                     field.containing_type();
9844 }
9845 
IsLazilyInitializedFile(absl::string_view filename)9846 bool IsLazilyInitializedFile(absl::string_view filename) {
9847   if (filename == "third_party/protobuf/cpp_features.proto" ||
9848       filename == "google/protobuf/cpp_features.proto") {
9849     return true;
9850   }
9851   return filename == "net/proto2/proto/descriptor.proto" ||
9852          filename == "google/protobuf/descriptor.proto";
9853 }
9854 
IsTrackingEnabled()9855 bool IsTrackingEnabled() { return is_tracking_enabled(); }
9856 
9857 }  // namespace cpp
9858 }  // namespace internal
9859 
edition() const9860 Edition FileDescriptor::edition() const { return edition_; }
9861 
9862 namespace internal {
ShortEditionName(Edition edition)9863 absl::string_view ShortEditionName(Edition edition) {
9864   return absl::StripPrefix(Edition_Name(edition), "EDITION_");
9865 }
GetEdition(const FileDescriptor & desc)9866 Edition InternalFeatureHelper::GetEdition(const FileDescriptor& desc) {
9867   return desc.edition();
9868 }
9869 }  // namespace internal
9870 
9871 }  // namespace protobuf
9872 }  // namespace google
9873 
9874 #include "google/protobuf/port_undef.inc"
9875