• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/descriptor.h>
36 
37 #include <algorithm>
38 #include <array>
39 #include <functional>
40 #include <limits>
41 #include <map>
42 #include <memory>
43 #include <set>
44 #include <sstream>
45 #include <string>
46 #include <type_traits>
47 #include <unordered_map>
48 #include <unordered_set>
49 #include <vector>
50 
51 #include <google/protobuf/stubs/common.h>
52 #include <google/protobuf/stubs/logging.h>
53 #include <google/protobuf/stubs/strutil.h>
54 #include <google/protobuf/stubs/once.h>
55 #include <google/protobuf/any.h>
56 #include <google/protobuf/descriptor.pb.h>
57 #include <google/protobuf/io/coded_stream.h>
58 #include <google/protobuf/io/tokenizer.h>
59 #include <google/protobuf/io/zero_copy_stream_impl.h>
60 #include <google/protobuf/stubs/casts.h>
61 #include <google/protobuf/stubs/stringprintf.h>
62 #include <google/protobuf/stubs/substitute.h>
63 #include <google/protobuf/descriptor_database.h>
64 #include <google/protobuf/dynamic_message.h>
65 #include <google/protobuf/generated_message_util.h>
66 #include <google/protobuf/io/strtod.h>
67 #include <google/protobuf/port.h>
68 #include <google/protobuf/text_format.h>
69 #include <google/protobuf/unknown_field_set.h>
70 #include <google/protobuf/stubs/map_util.h>
71 #include <google/protobuf/stubs/stl_util.h>
72 #include <google/protobuf/stubs/hash.h>
73 
74 #undef PACKAGE  // autoheader #defines this.  :(
75 
76 
77 // Must be included last.
78 #include <google/protobuf/port_def.inc>
79 
80 namespace google {
81 namespace protobuf {
82 
83 namespace {
84 const int kPackageLimit = 100;
85 
86 // Note:  I distrust ctype.h due to locales.
ToUpper(char ch)87 char ToUpper(char ch) {
88   return (ch >= 'a' && ch <= 'z') ? (ch - 'a' + 'A') : ch;
89 }
90 
ToLower(char ch)91 char ToLower(char ch) {
92   return (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
93 }
94 
ToCamelCase(const std::string & input,bool lower_first)95 std::string ToCamelCase(const std::string& input, bool lower_first) {
96   bool capitalize_next = !lower_first;
97   std::string result;
98   result.reserve(input.size());
99 
100   for (char character : input) {
101     if (character == '_') {
102       capitalize_next = true;
103     } else if (capitalize_next) {
104       result.push_back(ToUpper(character));
105       capitalize_next = false;
106     } else {
107       result.push_back(character);
108     }
109   }
110 
111   // Lower-case the first letter.
112   if (lower_first && !result.empty()) {
113     result[0] = ToLower(result[0]);
114   }
115 
116   return result;
117 }
118 
ToJsonName(const std::string & input)119 std::string ToJsonName(const std::string& input) {
120   bool capitalize_next = false;
121   std::string result;
122   result.reserve(input.size());
123 
124   for (char character : input) {
125     if (character == '_') {
126       capitalize_next = true;
127     } else if (capitalize_next) {
128       result.push_back(ToUpper(character));
129       capitalize_next = false;
130     } else {
131       result.push_back(character);
132     }
133   }
134 
135   return result;
136 }
137 
138 // Backport of fold expressions for the comma operator to C++11.
139 // Usage:  Fold({expr...});
140 // Guaranteed to evaluate left-to-right
141 struct ExpressionEater {
142   template <typename T>
ExpressionEatergoogle::protobuf::__anon94551f7b0111::ExpressionEater143   ExpressionEater(T&&) {}  // NOLINT
144 };
Fold(std::initializer_list<ExpressionEater>)145 void Fold(std::initializer_list<ExpressionEater>) {}
146 
147 template <int R>
RoundUpTo(size_t n)148 constexpr size_t RoundUpTo(size_t n) {
149   static_assert((R & (R - 1)) == 0, "Must be power of two");
150   return (n + (R - 1)) & ~(R - 1);
151 }
152 
Max(size_t a,size_t b)153 constexpr size_t Max(size_t a, size_t b) { return a > b ? a : b; }
154 template <typename T, typename... Ts>
Max(T a,Ts...b)155 constexpr size_t Max(T a, Ts... b) {
156   return Max(a, Max(b...));
157 }
158 
159 template <typename T>
EffectiveAlignof()160 constexpr size_t EffectiveAlignof() {
161   // `char` is special in that it gets aligned to 8. It is where we drop the
162   // trivial structs.
163   return std::is_same<T, char>::value ? 8 : alignof(T);
164 }
165 
166 template <int align, typename U, typename... T>
167 using AppendIfAlign =
168     typename std::conditional<EffectiveAlignof<U>() == align, void (*)(T..., U),
169                               void (*)(T...)>::type;
170 
171 // Metafunction to sort types in descending order of alignment.
172 // Useful for the flat allocator to ensure proper alignment of all elements
173 // without having to add padding.
174 // Instead of implementing a proper sort metafunction we just do a
175 // filter+merge, which is much simpler to write as a metafunction.
176 // We have a fixed set of alignments we can filter on.
177 // For simplicity we use a function pointer as a type list.
178 template <typename In, typename T16, typename T8, typename T4, typename T2,
179           typename T1>
180 struct TypeListSortImpl;
181 
182 template <typename... T16, typename... T8, typename... T4, typename... T2,
183           typename... T1>
184 struct TypeListSortImpl<void (*)(), void (*)(T16...), void (*)(T8...),
185                         void (*)(T4...), void (*)(T2...), void (*)(T1...)> {
186   using type = void (*)(T16..., T8..., T4..., T2..., T1...);
187 };
188 
189 template <typename First, typename... Rest, typename... T16, typename... T8,
190           typename... T4, typename... T2, typename... T1>
191 struct TypeListSortImpl<void (*)(First, Rest...), void (*)(T16...),
192                         void (*)(T8...), void (*)(T4...), void (*)(T2...),
193                         void (*)(T1...)> {
194   using type = typename TypeListSortImpl<
195       void (*)(Rest...), AppendIfAlign<16, First, T16...>,
196       AppendIfAlign<8, First, T8...>, AppendIfAlign<4, First, T4...>,
197       AppendIfAlign<2, First, T2...>, AppendIfAlign<1, First, T1...>>::type;
198 };
199 
200 template <typename... T>
201 using SortByAlignment =
202     typename TypeListSortImpl<void (*)(T...), void (*)(), void (*)(),
203                               void (*)(), void (*)(), void (*)()>::type;
204 
205 template <template <typename...> class C, typename... T>
206 auto ApplyTypeList(void (*)(T...)) -> C<T...>;
207 
208 template <typename T>
FindTypeIndex()209 constexpr int FindTypeIndex() {
210   return -1;
211 }
212 
213 template <typename T, typename T1, typename... Ts>
FindTypeIndex()214 constexpr int FindTypeIndex() {
215   return std::is_same<T, T1>::value ? 0 : FindTypeIndex<T, Ts...>() + 1;
216 }
217 
218 // A type to value map, where the possible keys as specified in `Keys...`.
219 // The values for key `K` is `ValueT<K>`
220 template <template <typename> class ValueT, typename... Keys>
221 class TypeMap {
222  public:
223   template <typename K>
Get()224   ValueT<K>& Get() {
225     return static_cast<Base<K>&>(payload_).value;
226   }
227 
228   template <typename K>
Get() const229   const ValueT<K>& Get() const {
230     return static_cast<const Base<K>&>(payload_).value;
231   }
232 
233  private:
234   template <typename K>
235   struct Base {
236     ValueT<K> value{};
237   };
238   struct Payload : Base<Keys>... {};
239   Payload payload_;
240 };
241 
242 template <typename T>
243 using IntT = int;
244 template <typename T>
245 using PointerT = T*;
246 
247 // Manages an allocation of sequential arrays of type `T...`.
248 // It is more space efficient than storing N (ptr, size) pairs, by storing only
249 // the pointer to the head and the boundaries between the arrays.
250 template <typename... T>
251 class FlatAllocation {
252  public:
253   static constexpr size_t kMaxAlign = Max(alignof(T)...);
254 
FlatAllocation(const TypeMap<IntT,T...> & ends)255   FlatAllocation(const TypeMap<IntT, T...>& ends) : ends_(ends) {
256     // The arrays start just after FlatAllocation, so adjust the ends.
257     Fold({(ends_.template Get<T>() +=
258            RoundUpTo<kMaxAlign>(sizeof(FlatAllocation)))...});
259     Fold({Init<T>()...});
260   }
261 
Destroy()262   void Destroy() {
263     Fold({Destroy<T>()...});
264     internal::SizedDelete(this, total_bytes());
265   }
266 
267   template <int I>
268   using type = typename std::tuple_element<I, std::tuple<T...>>::type;
269 
270   // Gets a tuple of the head pointers for the arrays
Pointers() const271   TypeMap<PointerT, T...> Pointers() const {
272     TypeMap<PointerT, T...> out;
273     Fold({(out.template Get<T>() = Begin<T>())...});
274     return out;
275   }
276 
277 
278  private:
279   // Total number of bytes used by all arrays.
total_bytes() const280   int total_bytes() const {
281     // Get the last end.
282     return ends_.template Get<typename std::tuple_element<
283         sizeof...(T) - 1, std::tuple<T...>>::type>();
284   }
285 
286 
287   template <typename U>
BeginOffset() const288   int BeginOffset() const {
289     constexpr int type_index = FindTypeIndex<U, T...>();
290     // Avoid a negative value here to keep it compiling when type_index == 0
291     constexpr int prev_type_index = type_index == 0 ? 0 : type_index - 1;
292     using PrevType =
293         typename std::tuple_element<prev_type_index, std::tuple<T...>>::type;
294     // Ensure the types are properly aligned.
295     static_assert(EffectiveAlignof<PrevType>() >= EffectiveAlignof<U>(), "");
296     return type_index == 0 ? RoundUpTo<kMaxAlign>(sizeof(FlatAllocation))
297                            : ends_.template Get<PrevType>();
298   }
299 
300   template <typename U>
EndOffset() const301   int EndOffset() const {
302     return ends_.template Get<U>();
303   }
304 
305   // Avoid the reinterpret_cast if the array is empty.
306   // Clang's Control Flow Integrity does not like the cast pointing to memory
307   // that is not yet initialized to be of that type.
308   // (from -fsanitize=cfi-unrelated-cast)
309   template <typename U>
Begin() const310   U* Begin() const {
311     int begin = BeginOffset<U>(), end = EndOffset<U>();
312     if (begin == end) return nullptr;
313     return reinterpret_cast<U*>(data() + begin);
314   }
315 
316   template <typename U>
End() const317   U* End() const {
318     int begin = BeginOffset<U>(), end = EndOffset<U>();
319     if (begin == end) return nullptr;
320     return reinterpret_cast<U*>(data() + end);
321   }
322 
323   template <typename U>
Init()324   bool Init() {
325     // Skip for the `char` block. No need to zero initialize it.
326     if (std::is_same<U, char>::value) return true;
327     for (char *p = data() + BeginOffset<U>(), *end = data() + EndOffset<U>();
328          p != end; p += sizeof(U)) {
329       ::new (p) U{};
330     }
331     return true;
332   }
333 
334   template <typename U>
Destroy()335   bool Destroy() {
336     if (std::is_trivially_destructible<U>::value) return true;
337     for (U* it = Begin<U>(), *end = End<U>(); it != end; ++it) {
338       it->~U();
339     }
340     return true;
341   }
342 
data() const343   char* data() const {
344     return const_cast<char*>(reinterpret_cast<const char*>(this));
345   }
346 
347   TypeMap<IntT, T...> ends_;
348 };
349 
350 template <typename... T>
CalculateEnds(const TypeMap<IntT,T...> & sizes)351 TypeMap<IntT, T...> CalculateEnds(const TypeMap<IntT, T...>& sizes) {
352   int total = 0;
353   TypeMap<IntT, T...> out;
354   Fold({(out.template Get<T>() = total +=
355          sizeof(T) * sizes.template Get<T>())...});
356   return out;
357 }
358 
359 // The implementation for FlatAllocator below.
360 // This separate class template makes it easier to have methods that fold on
361 // `T...`.
362 template <typename... T>
363 class FlatAllocatorImpl {
364  public:
365   using Allocation = FlatAllocation<T...>;
366 
367   template <typename U>
PlanArray(int array_size)368   void PlanArray(int array_size) {
369     // We can't call PlanArray after FinalizePlanning has been called.
370     GOOGLE_CHECK(!has_allocated());
371     if (std::is_trivially_destructible<U>::value) {
372       // Trivial types are aligned to 8 bytes.
373       static_assert(alignof(U) <= 8, "");
374       total_.template Get<char>() += RoundUpTo<8>(array_size * sizeof(U));
375     } else {
376       // Since we can't use `if constexpr`, just make the expression compile
377       // when this path is not taken.
378       using TypeToUse =
379           typename std::conditional<std::is_trivially_destructible<U>::value,
380                                     char, U>::type;
381       total_.template Get<TypeToUse>() += array_size;
382     }
383   }
384 
385   template <typename U>
AllocateArray(int array_size)386   U* AllocateArray(int array_size) {
387     constexpr bool trivial = std::is_trivially_destructible<U>::value;
388     using TypeToUse = typename std::conditional<trivial, char, U>::type;
389 
390     // We can only allocate after FinalizePlanning has been called.
391     GOOGLE_CHECK(has_allocated());
392 
393     TypeToUse*& data = pointers_.template Get<TypeToUse>();
394     int& used = used_.template Get<TypeToUse>();
395     U* res = reinterpret_cast<U*>(data + used);
396     used += trivial ? RoundUpTo<8>(array_size * sizeof(U)) : array_size;
397     GOOGLE_CHECK_LE(used, total_.template Get<TypeToUse>());
398     return res;
399   }
400 
401   template <typename... In>
AllocateStrings(In &&...in)402   const std::string* AllocateStrings(In&&... in) {
403     std::string* strings = AllocateArray<std::string>(sizeof...(in));
404     std::string* res = strings;
405     Fold({(*strings++ = std::string(std::forward<In>(in)))...});
406     return res;
407   }
408 
409   // Allocate all 5 names of the field:
410   // name, full name, lowercase, camelcase and json.
411   // It will dedup the strings when possible.
412   // The resulting array contains `name` at index 0, `full_name` at index 1
413   // and the other 3 indices are specified in the result.
PlanFieldNames(const std::string & name,const std::string * opt_json_name)414   void PlanFieldNames(const std::string& name,
415                       const std::string* opt_json_name) {
416     GOOGLE_CHECK(!has_allocated());
417 
418     // Fast path for snake_case names, which follow the style guide.
419     if (opt_json_name == nullptr) {
420       switch (GetFieldNameCase(name)) {
421         case FieldNameCase::kAllLower:
422           // Case 1: they are all the same.
423           return PlanArray<std::string>(2);
424         case FieldNameCase::kSnakeCase:
425           // Case 2: name==lower, camel==json
426           return PlanArray<std::string>(3);
427         default:
428           break;
429       }
430     }
431 
432     std::string lowercase_name = name;
433     LowerString(&lowercase_name);
434 
435     std::string camelcase_name = ToCamelCase(name, /* lower_first = */ true);
436     std::string json_name =
437         opt_json_name != nullptr ? *opt_json_name : ToJsonName(name);
438 
439     StringPiece all_names[] = {name, lowercase_name, camelcase_name,
440                                      json_name};
441     std::sort(all_names, all_names + 4);
442     int unique =
443         static_cast<int>(std::unique(all_names, all_names + 4) - all_names);
444 
445     PlanArray<std::string>(unique + 1);
446   }
447 
448   struct FieldNamesResult {
449     const std::string* array;
450     int lowercase_index;
451     int camelcase_index;
452     int json_index;
453   };
AllocateFieldNames(const std::string & name,const std::string & scope,const std::string * opt_json_name)454   FieldNamesResult AllocateFieldNames(const std::string& name,
455                                       const std::string& scope,
456                                       const std::string* opt_json_name) {
457     GOOGLE_CHECK(has_allocated());
458 
459     std::string full_name =
460         scope.empty() ? name : StrCat(scope, ".", name);
461 
462     // Fast path for snake_case names, which follow the style guide.
463     if (opt_json_name == nullptr) {
464       switch (GetFieldNameCase(name)) {
465         case FieldNameCase::kAllLower:
466           // Case 1: they are all the same.
467           return {AllocateStrings(name, std::move(full_name)), 0, 0, 0};
468         case FieldNameCase::kSnakeCase:
469           // Case 2: name==lower, camel==json
470           return {AllocateStrings(name, std::move(full_name),
471                                   ToCamelCase(name, /* lower_first = */ true)),
472                   0, 2, 2};
473         default:
474           break;
475       }
476     }
477 
478     std::vector<std::string> names;
479     names.push_back(name);
480     names.push_back(std::move(full_name));
481 
482     const auto push_name = [&](std::string new_name) {
483       for (size_t i = 0; i < names.size(); ++i) {
484         // Do not compare the full_name. It is unlikely to match, except in
485         // custom json_name. We are not taking this into account in
486         // PlanFieldNames so better to not try it.
487         if (i == 1) continue;
488         if (names[i] == new_name) return i;
489       }
490       names.push_back(std::move(new_name));
491       return names.size() - 1;
492     };
493 
494     FieldNamesResult result{nullptr, 0, 0, 0};
495 
496     std::string lowercase_name = name;
497     LowerString(&lowercase_name);
498     result.lowercase_index = push_name(std::move(lowercase_name));
499     result.camelcase_index =
500         push_name(ToCamelCase(name, /* lower_first = */ true));
501     result.json_index =
502         push_name(opt_json_name != nullptr ? *opt_json_name : ToJsonName(name));
503 
504     std::string* all_names = AllocateArray<std::string>(names.size());
505     result.array = all_names;
506     std::move(names.begin(), names.end(), all_names);
507 
508     return result;
509   }
510 
511   template <typename Alloc>
FinalizePlanning(Alloc & alloc)512   void FinalizePlanning(Alloc& alloc) {
513     GOOGLE_CHECK(!has_allocated());
514 
515     pointers_ = alloc->CreateFlatAlloc(total_)->Pointers();
516 
517     GOOGLE_CHECK(has_allocated());
518   }
519 
ExpectConsumed() const520   void ExpectConsumed() const {
521     // We verify that we consumed all the memory requested if there was no
522     // error in processing.
523     Fold({ExpectConsumed<T>()...});
524   }
525 
526  private:
has_allocated() const527   bool has_allocated() const {
528     return pointers_.template Get<char>() != nullptr;
529   }
530 
IsLower(char c)531   static bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
IsDigit(char c)532   static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
IsLowerOrDigit(char c)533   static bool IsLowerOrDigit(char c) { return IsLower(c) || IsDigit(c); }
534 
535   enum class FieldNameCase { kAllLower, kSnakeCase, kOther };
GetFieldNameCase(const std::string & name)536   FieldNameCase GetFieldNameCase(const std::string& name) {
537     if (!IsLower(name[0])) return FieldNameCase::kOther;
538     FieldNameCase best = FieldNameCase::kAllLower;
539     for (char c : name) {
540       if (IsLowerOrDigit(c)) {
541         // nothing to do
542       } else if (c == '_') {
543         best = FieldNameCase::kSnakeCase;
544       } else {
545         return FieldNameCase::kOther;
546       }
547     }
548     return best;
549   }
550 
551   template <typename U>
ExpectConsumed() const552   bool ExpectConsumed() const {
553     GOOGLE_CHECK_EQ(total_.template Get<U>(), used_.template Get<U>());
554     return true;
555   }
556 
557   TypeMap<PointerT, T...> pointers_;
558   TypeMap<IntT, T...> total_;
559   TypeMap<IntT, T...> used_;
560 };
561 
562 }  // namespace
563 
564 class Symbol {
565  public:
566   enum Type {
567     NULL_SYMBOL,
568     MESSAGE,
569     FIELD,
570     ONEOF,
571     ENUM,
572     ENUM_VALUE,
573     ENUM_VALUE_OTHER_PARENT,
574     SERVICE,
575     METHOD,
576     FULL_PACKAGE,
577     SUB_PACKAGE,
578     QUERY_KEY
579   };
580 
Symbol()581   Symbol() {
582     static constexpr internal::SymbolBase null_symbol{};
583     static_assert(null_symbol.symbol_type_ == NULL_SYMBOL, "");
584     // Initialize with a sentinel to make sure `ptr_` is never null.
585     ptr_ = &null_symbol;
586   }
587 
588   // Every object we store derives from internal::SymbolBase, where we store the
589   // symbol type enum.
590   // Storing in the object can be done without using more space in most cases,
591   // while storing it in the Symbol type would require 8 bytes.
592 #define DEFINE_MEMBERS(TYPE, TYPE_CONSTANT, FIELD)                             \
593   explicit Symbol(TYPE* value) : ptr_(value) {                                 \
594     value->symbol_type_ = TYPE_CONSTANT;                                       \
595   }                                                                            \
596   const TYPE* FIELD() const {                                                  \
597     return type() == TYPE_CONSTANT ? static_cast<const TYPE*>(ptr_) : nullptr; \
598   }
599 
600   DEFINE_MEMBERS(Descriptor, MESSAGE, descriptor)
601   DEFINE_MEMBERS(FieldDescriptor, FIELD, field_descriptor)
602   DEFINE_MEMBERS(OneofDescriptor, ONEOF, oneof_descriptor)
603   DEFINE_MEMBERS(EnumDescriptor, ENUM, enum_descriptor)
604   DEFINE_MEMBERS(ServiceDescriptor, SERVICE, service_descriptor)
605   DEFINE_MEMBERS(MethodDescriptor, METHOD, method_descriptor)
606   DEFINE_MEMBERS(FileDescriptor, FULL_PACKAGE, file_descriptor)
607 
608   // We use a special node for subpackage FileDescriptor.
609   // It is potentially added to the table with multiple different names, so we
610   // need a separate place to put the name.
611   struct Subpackage : internal::SymbolBase {
612     int name_size;
613     const FileDescriptor* file;
614   };
DEFINE_MEMBERS(Subpackage,SUB_PACKAGE,sub_package_file_descriptor)615   DEFINE_MEMBERS(Subpackage, SUB_PACKAGE, sub_package_file_descriptor)
616 
617   // Enum values have two different parents.
618   // We use two different identitied for the same object to determine the two
619   // different insertions in the map.
620   static Symbol EnumValue(EnumValueDescriptor* value, int n) {
621     Symbol s;
622     internal::SymbolBase* ptr;
623     if (n == 0) {
624       ptr = static_cast<internal::SymbolBaseN<0>*>(value);
625       ptr->symbol_type_ = ENUM_VALUE;
626     } else {
627       ptr = static_cast<internal::SymbolBaseN<1>*>(value);
628       ptr->symbol_type_ = ENUM_VALUE_OTHER_PARENT;
629     }
630     s.ptr_ = ptr;
631     return s;
632   }
633 
enum_value_descriptor() const634   const EnumValueDescriptor* enum_value_descriptor() const {
635     return type() == ENUM_VALUE
636                ? static_cast<const EnumValueDescriptor*>(
637                      static_cast<const internal::SymbolBaseN<0>*>(ptr_))
638            : type() == ENUM_VALUE_OTHER_PARENT
639                ? static_cast<const EnumValueDescriptor*>(
640                      static_cast<const internal::SymbolBaseN<1>*>(ptr_))
641                : nullptr;
642   }
643 
644   // Not a real symbol.
645   // Only used for heterogeneous lookups and never actually inserted in the
646   // tables.
647   // TODO(b/215557658): If we templetize QueryKey on the expected object type we
648   // can skip the switches for the eq function altogether.
649   struct QueryKey : internal::SymbolBase {
650     StringPiece name;
651     const void* parent;
652     int field_number;
653 
654     // Adaptor functions to look like a Symbol to the comparators.
full_namegoogle::protobuf::Symbol::QueryKey655     StringPiece full_name() const { return name; }
parent_number_keygoogle::protobuf::Symbol::QueryKey656     std::pair<const void*, int> parent_number_key() const {
657       return {parent, field_number};
658     }
parent_name_keygoogle::protobuf::Symbol::QueryKey659     std::pair<const void*, StringPiece> parent_name_key() const {
660       return {parent, name};
661     }
662   };
663   // This constructor is implicit to allow for non-transparent lookups when
664   // necessary.
665   // For transparent lookup cases we query directly with the object without the
666   // type erasure layer.
Symbol(QueryKey & value)667   Symbol(QueryKey& value) : ptr_(&value) {  // NOLINT
668     value.symbol_type_ = QUERY_KEY;
669   }
query_key() const670   const QueryKey* query_key() const {
671     return type() == QUERY_KEY ? static_cast<const QueryKey*>(ptr_) : nullptr;
672   }
673 #undef DEFINE_MEMBERS
674 
type() const675   Type type() const { return static_cast<Type>(ptr_->symbol_type_); }
IsNull() const676   bool IsNull() const { return type() == NULL_SYMBOL; }
IsType() const677   bool IsType() const { return type() == MESSAGE || type() == ENUM; }
IsAggregate() const678   bool IsAggregate() const {
679     return IsType() || IsPackage() || type() == SERVICE;
680   }
IsPackage() const681   bool IsPackage() const {
682     return type() == FULL_PACKAGE || type() == SUB_PACKAGE;
683   }
684 
GetFile() const685   const FileDescriptor* GetFile() const {
686     switch (type()) {
687       case MESSAGE:
688         return descriptor()->file();
689       case FIELD:
690         return field_descriptor()->file();
691       case ONEOF:
692         return oneof_descriptor()->containing_type()->file();
693       case ENUM:
694         return enum_descriptor()->file();
695       case ENUM_VALUE:
696         return enum_value_descriptor()->type()->file();
697       case SERVICE:
698         return service_descriptor()->file();
699       case METHOD:
700         return method_descriptor()->service()->file();
701       case FULL_PACKAGE:
702         return file_descriptor();
703       case SUB_PACKAGE:
704         return sub_package_file_descriptor()->file;
705       default:
706         return nullptr;
707     }
708   }
709 
full_name() const710   StringPiece full_name() const {
711     switch (type()) {
712       case MESSAGE:
713         return descriptor()->full_name();
714       case FIELD:
715         return field_descriptor()->full_name();
716       case ONEOF:
717         return oneof_descriptor()->full_name();
718       case ENUM:
719         return enum_descriptor()->full_name();
720       case ENUM_VALUE:
721         return enum_value_descriptor()->full_name();
722       case SERVICE:
723         return service_descriptor()->full_name();
724       case METHOD:
725         return method_descriptor()->full_name();
726       case FULL_PACKAGE:
727         return file_descriptor()->package();
728       case SUB_PACKAGE:
729         return StringPiece(sub_package_file_descriptor()->file->package())
730             .substr(0, sub_package_file_descriptor()->name_size);
731       case QUERY_KEY:
732         return query_key()->full_name();
733       default:
734         GOOGLE_CHECK(false);
735     }
736     return "";
737   }
738 
parent_name_key() const739   std::pair<const void*, StringPiece> parent_name_key() const {
740     const auto or_file = [&](const void* p) { return p ? p : GetFile(); };
741     switch (type()) {
742       case MESSAGE:
743         return {or_file(descriptor()->containing_type()), descriptor()->name()};
744       case FIELD: {
745         auto* field = field_descriptor();
746         return {or_file(field->is_extension() ? field->extension_scope()
747                                               : field->containing_type()),
748                 field->name()};
749       }
750       case ONEOF:
751         return {oneof_descriptor()->containing_type(),
752                 oneof_descriptor()->name()};
753       case ENUM:
754         return {or_file(enum_descriptor()->containing_type()),
755                 enum_descriptor()->name()};
756       case ENUM_VALUE:
757         return {or_file(enum_value_descriptor()->type()->containing_type()),
758                 enum_value_descriptor()->name()};
759       case ENUM_VALUE_OTHER_PARENT:
760         return {enum_value_descriptor()->type(),
761                 enum_value_descriptor()->name()};
762       case SERVICE:
763         return {GetFile(), service_descriptor()->name()};
764       case METHOD:
765         return {method_descriptor()->service(), method_descriptor()->name()};
766       case QUERY_KEY:
767         return query_key()->parent_name_key();
768       default:
769         GOOGLE_CHECK(false);
770     }
771     return {};
772   }
773 
parent_number_key() const774   std::pair<const void*, int> parent_number_key() const {
775     switch (type()) {
776       case FIELD:
777         return {field_descriptor()->containing_type(),
778                 field_descriptor()->number()};
779       case ENUM_VALUE:
780         return {enum_value_descriptor()->type(),
781                 enum_value_descriptor()->number()};
782       case QUERY_KEY:
783         return query_key()->parent_number_key();
784       default:
785         GOOGLE_CHECK(false);
786     }
787     return {};
788   }
789 
790  private:
791   const internal::SymbolBase* ptr_;
792 };
793 
794 const FieldDescriptor::CppType
795     FieldDescriptor::kTypeToCppTypeMap[MAX_TYPE + 1] = {
796         static_cast<CppType>(0),  // 0 is reserved for errors
797 
798         CPPTYPE_DOUBLE,   // TYPE_DOUBLE
799         CPPTYPE_FLOAT,    // TYPE_FLOAT
800         CPPTYPE_INT64,    // TYPE_INT64
801         CPPTYPE_UINT64,   // TYPE_UINT64
802         CPPTYPE_INT32,    // TYPE_INT32
803         CPPTYPE_UINT64,   // TYPE_FIXED64
804         CPPTYPE_UINT32,   // TYPE_FIXED32
805         CPPTYPE_BOOL,     // TYPE_BOOL
806         CPPTYPE_STRING,   // TYPE_STRING
807         CPPTYPE_MESSAGE,  // TYPE_GROUP
808         CPPTYPE_MESSAGE,  // TYPE_MESSAGE
809         CPPTYPE_STRING,   // TYPE_BYTES
810         CPPTYPE_UINT32,   // TYPE_UINT32
811         CPPTYPE_ENUM,     // TYPE_ENUM
812         CPPTYPE_INT32,    // TYPE_SFIXED32
813         CPPTYPE_INT64,    // TYPE_SFIXED64
814         CPPTYPE_INT32,    // TYPE_SINT32
815         CPPTYPE_INT64,    // TYPE_SINT64
816 };
817 
818 const char* const FieldDescriptor::kTypeToName[MAX_TYPE + 1] = {
819     "ERROR",  // 0 is reserved for errors
820 
821     "double",    // TYPE_DOUBLE
822     "float",     // TYPE_FLOAT
823     "int64",     // TYPE_INT64
824     "uint64",    // TYPE_UINT64
825     "int32",     // TYPE_INT32
826     "fixed64",   // TYPE_FIXED64
827     "fixed32",   // TYPE_FIXED32
828     "bool",      // TYPE_BOOL
829     "string",    // TYPE_STRING
830     "group",     // TYPE_GROUP
831     "message",   // TYPE_MESSAGE
832     "bytes",     // TYPE_BYTES
833     "uint32",    // TYPE_UINT32
834     "enum",      // TYPE_ENUM
835     "sfixed32",  // TYPE_SFIXED32
836     "sfixed64",  // TYPE_SFIXED64
837     "sint32",    // TYPE_SINT32
838     "sint64",    // TYPE_SINT64
839 };
840 
841 const char* const FieldDescriptor::kCppTypeToName[MAX_CPPTYPE + 1] = {
842     "ERROR",  // 0 is reserved for errors
843 
844     "int32",    // CPPTYPE_INT32
845     "int64",    // CPPTYPE_INT64
846     "uint32",   // CPPTYPE_UINT32
847     "uint64",   // CPPTYPE_UINT64
848     "double",   // CPPTYPE_DOUBLE
849     "float",    // CPPTYPE_FLOAT
850     "bool",     // CPPTYPE_BOOL
851     "enum",     // CPPTYPE_ENUM
852     "string",   // CPPTYPE_STRING
853     "message",  // CPPTYPE_MESSAGE
854 };
855 
856 const char* const FieldDescriptor::kLabelToName[MAX_LABEL + 1] = {
857     "ERROR",  // 0 is reserved for errors
858 
859     "optional",  // LABEL_OPTIONAL
860     "required",  // LABEL_REQUIRED
861     "repeated",  // LABEL_REPEATED
862 };
863 
SyntaxName(FileDescriptor::Syntax syntax)864 const char* FileDescriptor::SyntaxName(FileDescriptor::Syntax syntax) {
865   switch (syntax) {
866     case SYNTAX_PROTO2:
867       return "proto2";
868     case SYNTAX_PROTO3:
869       return "proto3";
870     case SYNTAX_UNKNOWN:
871       return "unknown";
872   }
873   GOOGLE_LOG(FATAL) << "can't reach here.";
874   return nullptr;
875 }
876 
877 static const char* const kNonLinkedWeakMessageReplacementName = "google.protobuf.Empty";
878 
879 #if !defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912)
880 const int FieldDescriptor::kMaxNumber;
881 const int FieldDescriptor::kFirstReservedNumber;
882 const int FieldDescriptor::kLastReservedNumber;
883 #endif
884 
885 namespace {
886 
EnumValueToPascalCase(const std::string & input)887 std::string EnumValueToPascalCase(const std::string& input) {
888   bool next_upper = true;
889   std::string result;
890   result.reserve(input.size());
891 
892   for (char character : input) {
893     if (character == '_') {
894       next_upper = true;
895     } else {
896       if (next_upper) {
897         result.push_back(ToUpper(character));
898       } else {
899         result.push_back(ToLower(character));
900       }
901       next_upper = false;
902     }
903   }
904 
905   return result;
906 }
907 
908 // Class to remove an enum prefix from enum values.
909 class PrefixRemover {
910  public:
PrefixRemover(StringPiece prefix)911   PrefixRemover(StringPiece prefix) {
912     // Strip underscores and lower-case the prefix.
913     for (char character : prefix) {
914       if (character != '_') {
915         prefix_ += ascii_tolower(character);
916       }
917     }
918   }
919 
920   // Tries to remove the enum prefix from this enum value.
921   // If this is not possible, returns the input verbatim.
MaybeRemove(StringPiece str)922   std::string MaybeRemove(StringPiece str) {
923     // We can't just lowercase and strip str and look for a prefix.
924     // We need to properly recognize the difference between:
925     //
926     //   enum Foo {
927     //     FOO_BAR_BAZ = 0;
928     //     FOO_BARBAZ = 1;
929     //   }
930     //
931     // This is acceptable (though perhaps not advisable) because even when
932     // we PascalCase, these two will still be distinct (BarBaz vs. Barbaz).
933     size_t i, j;
934 
935     // Skip past prefix_ in str if we can.
936     for (i = 0, j = 0; i < str.size() && j < prefix_.size(); i++) {
937       if (str[i] == '_') {
938         continue;
939       }
940 
941       if (ascii_tolower(str[i]) != prefix_[j++]) {
942         return std::string(str);
943       }
944     }
945 
946     // If we didn't make it through the prefix, we've failed to strip the
947     // prefix.
948     if (j < prefix_.size()) {
949       return std::string(str);
950     }
951 
952     // Skip underscores between prefix and further characters.
953     while (i < str.size() && str[i] == '_') {
954       i++;
955     }
956 
957     // Enum label can't be the empty string.
958     if (i == str.size()) {
959       return std::string(str);
960     }
961 
962     // We successfully stripped the prefix.
963     str.remove_prefix(i);
964     return std::string(str);
965   }
966 
967  private:
968   std::string prefix_;
969 };
970 
971 // A DescriptorPool contains a bunch of hash-maps to implement the
972 // various Find*By*() methods.  Since hashtable lookups are O(1), it's
973 // most efficient to construct a fixed set of large hash-maps used by
974 // all objects in the pool rather than construct one or more small
975 // hash-maps for each object.
976 //
977 // The keys to these hash-maps are (parent, name) or (parent, number) pairs.
978 
979 typedef std::pair<const void*, StringPiece> PointerStringPair;
980 
981 typedef std::pair<const Descriptor*, int> DescriptorIntPair;
982 
983 #define HASH_MAP std::unordered_map
984 #define HASH_SET std::unordered_set
985 #define HASH_FXN hash
986 
987 template <typename PairType>
988 struct PointerIntegerPairHash {
operator ()google::protobuf::__anon94551f7b0411::PointerIntegerPairHash989   size_t operator()(const PairType& p) const {
990     static const size_t prime1 = 16777499;
991     static const size_t prime2 = 16777619;
992     return reinterpret_cast<size_t>(p.first) * prime1 ^
993            static_cast<size_t>(p.second) * prime2;
994   }
995 
996 #ifdef _MSC_VER
997   // Used only by MSVC and platforms where hash_map is not available.
998   static const size_t bucket_size = 4;
999   static const size_t min_buckets = 8;
1000 #endif
operator ()google::protobuf::__anon94551f7b0411::PointerIntegerPairHash1001   inline bool operator()(const PairType& a, const PairType& b) const {
1002     return a < b;
1003   }
1004 };
1005 
1006 struct PointerStringPairHash {
operator ()google::protobuf::__anon94551f7b0411::PointerStringPairHash1007   size_t operator()(const PointerStringPair& p) const {
1008     static const size_t prime = 16777619;
1009     hash<StringPiece> string_hash;
1010     return reinterpret_cast<size_t>(p.first) * prime ^
1011            static_cast<size_t>(string_hash(p.second));
1012   }
1013 
1014 #ifdef _MSC_VER
1015   // Used only by MSVC and platforms where hash_map is not available.
1016   static const size_t bucket_size = 4;
1017   static const size_t min_buckets = 8;
1018 #endif
operator ()google::protobuf::__anon94551f7b0411::PointerStringPairHash1019   inline bool operator()(const PointerStringPair& a,
1020                          const PointerStringPair& b) const {
1021     return a < b;
1022   }
1023 };
1024 
1025 
1026 struct SymbolByFullNameHash {
1027   using is_transparent = void;
1028 
1029   template <typename T>
operator ()google::protobuf::__anon94551f7b0411::SymbolByFullNameHash1030   size_t operator()(const T& s) const {
1031     return HASH_FXN<StringPiece>{}(s.full_name());
1032   }
1033 };
1034 struct SymbolByFullNameEq {
1035   using is_transparent = void;
1036 
1037   template <typename T, typename U>
operator ()google::protobuf::__anon94551f7b0411::SymbolByFullNameEq1038   bool operator()(const T& a, const U& b) const {
1039     return a.full_name() == b.full_name();
1040   }
1041 };
1042 using SymbolsByNameSet =
1043     HASH_SET<Symbol, SymbolByFullNameHash, SymbolByFullNameEq>;
1044 
1045 struct SymbolByParentHash {
1046   using is_transparent = void;
1047 
1048   template <typename T>
operator ()google::protobuf::__anon94551f7b0411::SymbolByParentHash1049   size_t operator()(const T& s) const {
1050     return PointerStringPairHash{}(s.parent_name_key());
1051   }
1052 };
1053 struct SymbolByParentEq {
1054   using is_transparent = void;
1055 
1056   template <typename T, typename U>
operator ()google::protobuf::__anon94551f7b0411::SymbolByParentEq1057   bool operator()(const T& a, const U& b) const {
1058     return a.parent_name_key() == b.parent_name_key();
1059   }
1060 };
1061 using SymbolsByParentSet =
1062     HASH_SET<Symbol, SymbolByParentHash, SymbolByParentEq>;
1063 
1064 typedef HASH_MAP<StringPiece, const FileDescriptor*,
1065                  HASH_FXN<StringPiece>>
1066     FilesByNameMap;
1067 
1068 typedef HASH_MAP<PointerStringPair, const FieldDescriptor*,
1069                  PointerStringPairHash>
1070     FieldsByNameMap;
1071 
1072 struct FieldsByNumberHash {
1073   using is_transparent = void;
1074 
1075   template <typename T>
operator ()google::protobuf::__anon94551f7b0411::FieldsByNumberHash1076   size_t operator()(const T& s) const {
1077     return PointerIntegerPairHash<std::pair<const void*, int>>{}(
1078         s.parent_number_key());
1079   }
1080 };
1081 struct FieldsByNumberEq {
1082   using is_transparent = void;
1083 
1084   template <typename T, typename U>
operator ()google::protobuf::__anon94551f7b0411::FieldsByNumberEq1085   bool operator()(const T& a, const U& b) const {
1086     return a.parent_number_key() == b.parent_number_key();
1087   }
1088 };
1089 using FieldsByNumberSet =
1090     HASH_SET<Symbol, FieldsByNumberHash, FieldsByNumberEq>;
1091 using EnumValuesByNumberSet = FieldsByNumberSet;
1092 
1093 // This is a map rather than a hash-map, since we use it to iterate
1094 // through all the extensions that extend a given Descriptor, and an
1095 // ordered data structure that implements lower_bound is convenient
1096 // for that.
1097 typedef std::map<DescriptorIntPair, const FieldDescriptor*>
1098     ExtensionsGroupedByDescriptorMap;
1099 typedef HASH_MAP<std::string, const SourceCodeInfo_Location*>
1100     LocationsByPathMap;
1101 
NewAllowedProto3Extendee()1102 std::set<std::string>* NewAllowedProto3Extendee() {
1103   auto allowed_proto3_extendees = new std::set<std::string>;
1104   const char* kOptionNames[] = {
1105       "FileOptions",   "MessageOptions",   "FieldOptions",
1106       "EnumOptions",   "EnumValueOptions", "ServiceOptions",
1107       "MethodOptions", "OneofOptions",     "ExtensionRangeOptions"};
1108   for (const char* option_name : kOptionNames) {
1109     // descriptor.proto has a different package name in opensource. We allow
1110     // both so the opensource protocol compiler can also compile internal
1111     // proto3 files with custom options. See: b/27567912
1112     allowed_proto3_extendees->insert(std::string("google.protobuf.") +
1113                                      option_name);
1114     // Split the word to trick the opensource processing scripts so they
1115     // will keep the original package name.
1116     allowed_proto3_extendees->insert(std::string("proto") + "2." + option_name);
1117   }
1118   return allowed_proto3_extendees;
1119 }
1120 
1121 // Checks whether the extendee type is allowed in proto3.
1122 // Only extensions to descriptor options are allowed. We use name comparison
1123 // instead of comparing the descriptor directly because the extensions may be
1124 // defined in a different pool.
AllowedExtendeeInProto3(const std::string & name)1125 bool AllowedExtendeeInProto3(const std::string& name) {
1126   static auto allowed_proto3_extendees =
1127       internal::OnShutdownDelete(NewAllowedProto3Extendee());
1128   return allowed_proto3_extendees->find(name) !=
1129          allowed_proto3_extendees->end();
1130 }
1131 }  // anonymous namespace
1132 
1133 // Contains tables specific to a particular file.  These tables are not
1134 // modified once the file has been constructed, so they need not be
1135 // protected by a mutex.  This makes operations that depend only on the
1136 // contents of a single file -- e.g. Descriptor::FindFieldByName() --
1137 // lock-free.
1138 //
1139 // For historical reasons, the definitions of the methods of
1140 // FileDescriptorTables and DescriptorPool::Tables are interleaved below.
1141 // These used to be a single class.
1142 class FileDescriptorTables {
1143  public:
1144   FileDescriptorTables();
1145   ~FileDescriptorTables();
1146 
1147   // Empty table, used with placeholder files.
1148   inline static const FileDescriptorTables& GetEmptyInstance();
1149 
1150   // -----------------------------------------------------------------
1151   // Finding items.
1152 
1153   // Returns a null Symbol (symbol.IsNull() is true) if not found.
1154   inline Symbol FindNestedSymbol(const void* parent,
1155                                  StringPiece name) const;
1156 
1157   // These return nullptr if not found.
1158   inline const FieldDescriptor* FindFieldByNumber(const Descriptor* parent,
1159                                                   int number) const;
1160   inline const FieldDescriptor* FindFieldByLowercaseName(
1161       const void* parent, StringPiece lowercase_name) const;
1162   inline const FieldDescriptor* FindFieldByCamelcaseName(
1163       const void* parent, StringPiece camelcase_name) const;
1164   inline const EnumValueDescriptor* FindEnumValueByNumber(
1165       const EnumDescriptor* parent, int number) const;
1166   // This creates a new EnumValueDescriptor if not found, in a thread-safe way.
1167   inline const EnumValueDescriptor* FindEnumValueByNumberCreatingIfUnknown(
1168       const EnumDescriptor* parent, int number) const;
1169 
1170   // -----------------------------------------------------------------
1171   // Adding items.
1172 
1173   // These add items to the corresponding tables.  They return false if
1174   // the key already exists in the table.
1175   bool AddAliasUnderParent(const void* parent, const std::string& name,
1176                            Symbol symbol);
1177   bool AddFieldByNumber(FieldDescriptor* field);
1178   bool AddEnumValueByNumber(EnumValueDescriptor* value);
1179 
1180   // Populates p->first->locations_by_path_ from p->second.
1181   // Unusual signature dictated by internal::call_once.
1182   static void BuildLocationsByPath(
1183       std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p);
1184 
1185   // Returns the location denoted by the specified path through info,
1186   // or nullptr if not found.
1187   // The value of info must be that of the corresponding FileDescriptor.
1188   // (Conceptually a pure function, but stateful as an optimisation.)
1189   const SourceCodeInfo_Location* GetSourceLocation(
1190       const std::vector<int>& path, const SourceCodeInfo* info) const;
1191 
1192   // Must be called after BuildFileImpl(), even if the build failed and
1193   // we are going to roll back to the last checkpoint.
1194   void FinalizeTables();
1195 
1196  private:
1197   const void* FindParentForFieldsByMap(const FieldDescriptor* field) const;
1198   static void FieldsByLowercaseNamesLazyInitStatic(
1199       const FileDescriptorTables* tables);
1200   void FieldsByLowercaseNamesLazyInitInternal() const;
1201   static void FieldsByCamelcaseNamesLazyInitStatic(
1202       const FileDescriptorTables* tables);
1203   void FieldsByCamelcaseNamesLazyInitInternal() const;
1204 
1205   SymbolsByParentSet symbols_by_parent_;
1206   mutable internal::once_flag fields_by_lowercase_name_once_;
1207   mutable internal::once_flag fields_by_camelcase_name_once_;
1208   // Make these fields atomic to avoid race conditions with
1209   // GetEstimatedOwnedMemoryBytesSize. Once the pointer is set the map won't
1210   // change anymore.
1211   mutable std::atomic<const FieldsByNameMap*> fields_by_lowercase_name_{};
1212   mutable std::atomic<const FieldsByNameMap*> fields_by_camelcase_name_{};
1213   FieldsByNumberSet fields_by_number_;  // Not including extensions.
1214   EnumValuesByNumberSet enum_values_by_number_;
1215   mutable EnumValuesByNumberSet unknown_enum_values_by_number_
1216       PROTOBUF_GUARDED_BY(unknown_enum_values_mu_);
1217 
1218   // Populated on first request to save space, hence constness games.
1219   mutable internal::once_flag locations_by_path_once_;
1220   mutable LocationsByPathMap locations_by_path_;
1221 
1222   // Mutex to protect the unknown-enum-value map due to dynamic
1223   // EnumValueDescriptor creation on unknown values.
1224   mutable internal::WrappedMutex unknown_enum_values_mu_;
1225 };
1226 
1227 namespace internal {
1228 
1229 // Small sequential allocator to be used within a single file.
1230 // Most of the memory for a single FileDescriptor and everything under it is
1231 // allocated in a single block of memory, with the FlatAllocator giving it out
1232 // in parts later.
1233 // The code first plans the total number of bytes needed by calling PlanArray
1234 // with all the allocations that will happen afterwards, then calls
1235 // FinalizePlanning passing the underlying allocator (the DescriptorPool::Tables
1236 // instance), and then proceeds to get the memory via
1237 // `AllocateArray`/`AllocateString` calls. The calls to PlanArray and
1238 // The calls have to match between planning and allocating, though not
1239 // necessarily in the same order.
1240 class FlatAllocator
1241     : public decltype(ApplyTypeList<FlatAllocatorImpl>(
1242           SortByAlignment<char, std::string, SourceCodeInfo,
1243                           FileDescriptorTables,
1244                           // Option types
1245                           MessageOptions, FieldOptions, EnumOptions,
1246                           EnumValueOptions, ExtensionRangeOptions, OneofOptions,
1247                           ServiceOptions, MethodOptions, FileOptions>())) {};
1248 
1249 }  // namespace internal
1250 
1251 // ===================================================================
1252 // DescriptorPool::Tables
1253 
1254 class DescriptorPool::Tables {
1255  public:
1256   Tables();
1257   ~Tables();
1258 
1259   // Record the current state of the tables to the stack of checkpoints.
1260   // Each call to AddCheckpoint() must be paired with exactly one call to either
1261   // ClearLastCheckpoint() or RollbackToLastCheckpoint().
1262   //
1263   // This is used when building files, since some kinds of validation errors
1264   // cannot be detected until the file's descriptors have already been added to
1265   // the tables.
1266   //
1267   // This supports recursive checkpoints, since building a file may trigger
1268   // recursive building of other files. Note that recursive checkpoints are not
1269   // normally necessary; explicit dependencies are built prior to checkpointing.
1270   // So although we recursively build transitive imports, there is at most one
1271   // checkpoint in the stack during dependency building.
1272   //
1273   // Recursive checkpoints only arise during cross-linking of the descriptors.
1274   // Symbol references must be resolved, via DescriptorBuilder::FindSymbol and
1275   // friends. If the pending file references an unknown symbol
1276   // (e.g., it is not defined in the pending file's explicit dependencies), and
1277   // the pool is using a fallback database, and that database contains a file
1278   // defining that symbol, and that file has not yet been built by the pool,
1279   // the pool builds the file during cross-linking, leading to another
1280   // checkpoint.
1281   void AddCheckpoint();
1282 
1283   // Mark the last checkpoint as having cleared successfully, removing it from
1284   // the stack. If the stack is empty, all pending symbols will be committed.
1285   //
1286   // Note that this does not guarantee that the symbols added since the last
1287   // checkpoint won't be rolled back: if a checkpoint gets rolled back,
1288   // everything past that point gets rolled back, including symbols added after
1289   // checkpoints that were pushed onto the stack after it and marked as cleared.
1290   void ClearLastCheckpoint();
1291 
1292   // Roll back the Tables to the state of the checkpoint at the top of the
1293   // stack, removing everything that was added after that point.
1294   void RollbackToLastCheckpoint();
1295 
1296   // The stack of files which are currently being built.  Used to detect
1297   // cyclic dependencies when loading files from a DescriptorDatabase.  Not
1298   // used when fallback_database_ == nullptr.
1299   std::vector<std::string> pending_files_;
1300 
1301   // A set of files which we have tried to load from the fallback database
1302   // and encountered errors.  We will not attempt to load them again during
1303   // execution of the current public API call, but for compatibility with
1304   // legacy clients, this is cleared at the beginning of each public API call.
1305   // Not used when fallback_database_ == nullptr.
1306   HASH_SET<std::string> known_bad_files_;
1307 
1308   // A set of symbols which we have tried to load from the fallback database
1309   // and encountered errors. We will not attempt to load them again during
1310   // execution of the current public API call, but for compatibility with
1311   // legacy clients, this is cleared at the beginning of each public API call.
1312   HASH_SET<std::string> known_bad_symbols_;
1313 
1314   // The set of descriptors for which we've already loaded the full
1315   // set of extensions numbers from fallback_database_.
1316   HASH_SET<const Descriptor*> extensions_loaded_from_db_;
1317 
1318   // Maps type name to Descriptor::WellKnownType.  This is logically global
1319   // and const, but we make it a member here to simplify its construction and
1320   // destruction.  This only has 20-ish entries and is one per DescriptorPool,
1321   // so the overhead is small.
1322   HASH_MAP<std::string, Descriptor::WellKnownType> well_known_types_;
1323 
1324   // -----------------------------------------------------------------
1325   // Finding items.
1326 
1327   // Find symbols.  This returns a null Symbol (symbol.IsNull() is true)
1328   // if not found.
1329   inline Symbol FindSymbol(StringPiece key) const;
1330 
1331   // This implements the body of DescriptorPool::Find*ByName().  It should
1332   // really be a private method of DescriptorPool, but that would require
1333   // declaring Symbol in descriptor.h, which would drag all kinds of other
1334   // stuff into the header.  Yay C++.
1335   Symbol FindByNameHelper(const DescriptorPool* pool, StringPiece name);
1336 
1337   // These return nullptr if not found.
1338   inline const FileDescriptor* FindFile(StringPiece key) const;
1339   inline const FieldDescriptor* FindExtension(const Descriptor* extendee,
1340                                               int number) const;
1341   inline void FindAllExtensions(const Descriptor* extendee,
1342                                 std::vector<const FieldDescriptor*>* out) const;
1343 
1344   // -----------------------------------------------------------------
1345   // Adding items.
1346 
1347   // These add items to the corresponding tables.  They return false if
1348   // the key already exists in the table.  For AddSymbol(), the string passed
1349   // in must be one that was constructed using AllocateString(), as it will
1350   // be used as a key in the symbols_by_name_ map without copying.
1351   bool AddSymbol(const std::string& full_name, Symbol symbol);
1352   bool AddFile(const FileDescriptor* file);
1353   bool AddExtension(const FieldDescriptor* field);
1354 
1355   // -----------------------------------------------------------------
1356   // Allocating memory.
1357 
1358   // Allocate an object which will be reclaimed when the pool is
1359   // destroyed.  Note that the object's destructor will never be called,
1360   // so its fields must be plain old data (primitive data types and
1361   // pointers).  All of the descriptor types are such objects.
1362   template <typename Type>
1363   Type* Allocate();
1364 
1365   // Allocate some bytes which will be reclaimed when the pool is
1366   // destroyed. Memory is aligned to 8 bytes.
1367   void* AllocateBytes(int size);
1368 
1369   // Create a FlatAllocation for the corresponding sizes.
1370   // All objects within it will be default constructed.
1371   // The whole allocation, including the non-trivial objects within, will be
1372   // destroyed with the pool.
1373   template <typename... T>
1374   internal::FlatAllocator::Allocation* CreateFlatAlloc(
1375       const TypeMap<IntT, T...>& sizes);
1376 
1377 
1378  private:
1379   // All memory allocated in the pool.  Must be first as other objects can
1380   // point into these.
1381   struct MiscDeleter {
operator ()google::protobuf::DescriptorPool::Tables::MiscDeleter1382     void operator()(int* p) const { internal::SizedDelete(p, *p + 8); }
1383   };
1384   // Miscellaneous allocations are length prefixed. The paylaod is 8 bytes after
1385   // the `int` that contains the size. This keeps the payload aligned.
1386   std::vector<std::unique_ptr<int, MiscDeleter>> misc_allocs_;
1387   struct FlatAllocDeleter {
operator ()google::protobuf::DescriptorPool::Tables::FlatAllocDeleter1388     void operator()(internal::FlatAllocator::Allocation* p) const {
1389       p->Destroy();
1390     }
1391   };
1392   std::vector<
1393       std::unique_ptr<internal::FlatAllocator::Allocation, FlatAllocDeleter>>
1394       flat_allocs_;
1395 
1396   SymbolsByNameSet symbols_by_name_;
1397   FilesByNameMap files_by_name_;
1398   ExtensionsGroupedByDescriptorMap extensions_;
1399 
1400   struct CheckPoint {
CheckPointgoogle::protobuf::DescriptorPool::Tables::CheckPoint1401     explicit CheckPoint(const Tables* tables)
1402         : flat_allocations_before_checkpoint(
1403               static_cast<int>(tables->flat_allocs_.size())),
1404           misc_allocations_before_checkpoint(
1405               static_cast<int>(tables->misc_allocs_.size())),
1406           pending_symbols_before_checkpoint(
1407               tables->symbols_after_checkpoint_.size()),
1408           pending_files_before_checkpoint(
1409               tables->files_after_checkpoint_.size()),
1410           pending_extensions_before_checkpoint(
1411               tables->extensions_after_checkpoint_.size()) {}
1412     int flat_allocations_before_checkpoint;
1413     int misc_allocations_before_checkpoint;
1414     int pending_symbols_before_checkpoint;
1415     int pending_files_before_checkpoint;
1416     int pending_extensions_before_checkpoint;
1417   };
1418   std::vector<CheckPoint> checkpoints_;
1419   std::vector<Symbol> symbols_after_checkpoint_;
1420   std::vector<const FileDescriptor*> files_after_checkpoint_;
1421   std::vector<DescriptorIntPair> extensions_after_checkpoint_;
1422 };
1423 
Tables()1424 DescriptorPool::Tables::Tables() {
1425   well_known_types_.insert({
1426       {"google.protobuf.DoubleValue", Descriptor::WELLKNOWNTYPE_DOUBLEVALUE},
1427       {"google.protobuf.FloatValue", Descriptor::WELLKNOWNTYPE_FLOATVALUE},
1428       {"google.protobuf.Int64Value", Descriptor::WELLKNOWNTYPE_INT64VALUE},
1429       {"google.protobuf.UInt64Value", Descriptor::WELLKNOWNTYPE_UINT64VALUE},
1430       {"google.protobuf.Int32Value", Descriptor::WELLKNOWNTYPE_INT32VALUE},
1431       {"google.protobuf.UInt32Value", Descriptor::WELLKNOWNTYPE_UINT32VALUE},
1432       {"google.protobuf.StringValue", Descriptor::WELLKNOWNTYPE_STRINGVALUE},
1433       {"google.protobuf.BytesValue", Descriptor::WELLKNOWNTYPE_BYTESVALUE},
1434       {"google.protobuf.BoolValue", Descriptor::WELLKNOWNTYPE_BOOLVALUE},
1435       {"google.protobuf.Any", Descriptor::WELLKNOWNTYPE_ANY},
1436       {"google.protobuf.FieldMask", Descriptor::WELLKNOWNTYPE_FIELDMASK},
1437       {"google.protobuf.Duration", Descriptor::WELLKNOWNTYPE_DURATION},
1438       {"google.protobuf.Timestamp", Descriptor::WELLKNOWNTYPE_TIMESTAMP},
1439       {"google.protobuf.Value", Descriptor::WELLKNOWNTYPE_VALUE},
1440       {"google.protobuf.ListValue", Descriptor::WELLKNOWNTYPE_LISTVALUE},
1441       {"google.protobuf.Struct", Descriptor::WELLKNOWNTYPE_STRUCT},
1442   });
1443 }
1444 
~Tables()1445 DescriptorPool::Tables::~Tables() { GOOGLE_DCHECK(checkpoints_.empty()); }
1446 
FileDescriptorTables()1447 FileDescriptorTables::FileDescriptorTables() {}
1448 
~FileDescriptorTables()1449 FileDescriptorTables::~FileDescriptorTables() {
1450   delete fields_by_lowercase_name_.load(std::memory_order_acquire);
1451   delete fields_by_camelcase_name_.load(std::memory_order_acquire);
1452 }
1453 
GetEmptyInstance()1454 inline const FileDescriptorTables& FileDescriptorTables::GetEmptyInstance() {
1455   static auto file_descriptor_tables =
1456       internal::OnShutdownDelete(new FileDescriptorTables());
1457   return *file_descriptor_tables;
1458 }
1459 
AddCheckpoint()1460 void DescriptorPool::Tables::AddCheckpoint() {
1461   checkpoints_.push_back(CheckPoint(this));
1462 }
1463 
ClearLastCheckpoint()1464 void DescriptorPool::Tables::ClearLastCheckpoint() {
1465   GOOGLE_DCHECK(!checkpoints_.empty());
1466   checkpoints_.pop_back();
1467   if (checkpoints_.empty()) {
1468     // All checkpoints have been cleared: we can now commit all of the pending
1469     // data.
1470     symbols_after_checkpoint_.clear();
1471     files_after_checkpoint_.clear();
1472     extensions_after_checkpoint_.clear();
1473   }
1474 }
1475 
RollbackToLastCheckpoint()1476 void DescriptorPool::Tables::RollbackToLastCheckpoint() {
1477   GOOGLE_DCHECK(!checkpoints_.empty());
1478   const CheckPoint& checkpoint = checkpoints_.back();
1479 
1480   for (size_t i = checkpoint.pending_symbols_before_checkpoint;
1481        i < symbols_after_checkpoint_.size(); i++) {
1482     symbols_by_name_.erase(symbols_after_checkpoint_[i]);
1483   }
1484   for (size_t i = checkpoint.pending_files_before_checkpoint;
1485        i < files_after_checkpoint_.size(); i++) {
1486     files_by_name_.erase(files_after_checkpoint_[i]->name());
1487   }
1488   for (size_t i = checkpoint.pending_extensions_before_checkpoint;
1489        i < extensions_after_checkpoint_.size(); i++) {
1490     extensions_.erase(extensions_after_checkpoint_[i]);
1491   }
1492 
1493   symbols_after_checkpoint_.resize(
1494       checkpoint.pending_symbols_before_checkpoint);
1495   files_after_checkpoint_.resize(checkpoint.pending_files_before_checkpoint);
1496   extensions_after_checkpoint_.resize(
1497       checkpoint.pending_extensions_before_checkpoint);
1498 
1499   flat_allocs_.resize(checkpoint.flat_allocations_before_checkpoint);
1500   misc_allocs_.resize(checkpoint.misc_allocations_before_checkpoint);
1501   checkpoints_.pop_back();
1502 }
1503 
1504 // -------------------------------------------------------------------
1505 
FindSymbol(StringPiece key) const1506 inline Symbol DescriptorPool::Tables::FindSymbol(StringPiece key) const {
1507   Symbol::QueryKey name;
1508   name.name = key;
1509   auto it = symbols_by_name_.find(name);
1510   return it == symbols_by_name_.end() ? Symbol() : *it;
1511 }
1512 
FindNestedSymbol(const void * parent,StringPiece name) const1513 inline Symbol FileDescriptorTables::FindNestedSymbol(
1514     const void* parent, StringPiece name) const {
1515   Symbol::QueryKey query;
1516   query.name = name;
1517   query.parent = parent;
1518   auto it = symbols_by_parent_.find(query);
1519   return it == symbols_by_parent_.end() ? Symbol() : *it;
1520 }
1521 
FindByNameHelper(const DescriptorPool * pool,StringPiece name)1522 Symbol DescriptorPool::Tables::FindByNameHelper(const DescriptorPool* pool,
1523                                                 StringPiece name) {
1524   if (pool->mutex_ != nullptr) {
1525     // Fast path: the Symbol is already cached.  This is just a hash lookup.
1526     ReaderMutexLock lock(pool->mutex_);
1527     if (known_bad_symbols_.empty() && known_bad_files_.empty()) {
1528       Symbol result = FindSymbol(name);
1529       if (!result.IsNull()) return result;
1530     }
1531   }
1532   MutexLockMaybe lock(pool->mutex_);
1533   if (pool->fallback_database_ != nullptr) {
1534     known_bad_symbols_.clear();
1535     known_bad_files_.clear();
1536   }
1537   Symbol result = FindSymbol(name);
1538 
1539   if (result.IsNull() && pool->underlay_ != nullptr) {
1540     // Symbol not found; check the underlay.
1541     result = pool->underlay_->tables_->FindByNameHelper(pool->underlay_, name);
1542   }
1543 
1544   if (result.IsNull()) {
1545     // Symbol still not found, so check fallback database.
1546     if (pool->TryFindSymbolInFallbackDatabase(name)) {
1547       result = FindSymbol(name);
1548     }
1549   }
1550 
1551   return result;
1552 }
1553 
FindFile(StringPiece key) const1554 inline const FileDescriptor* DescriptorPool::Tables::FindFile(
1555     StringPiece key) const {
1556   return FindPtrOrNull(files_by_name_, key);
1557 }
1558 
FindFieldByNumber(const Descriptor * parent,int number) const1559 inline const FieldDescriptor* FileDescriptorTables::FindFieldByNumber(
1560     const Descriptor* parent, int number) const {
1561   // If `number` is within the sequential range, just index into the parent
1562   // without doing a table lookup.
1563   if (parent != nullptr &&  //
1564       1 <= number && number <= parent->sequential_field_limit_) {
1565     return parent->field(number - 1);
1566   }
1567 
1568   Symbol::QueryKey query;
1569   query.parent = parent;
1570   query.field_number = number;
1571 
1572   auto it = fields_by_number_.find(query);
1573   return it == fields_by_number_.end() ? nullptr : it->field_descriptor();
1574 }
1575 
FindParentForFieldsByMap(const FieldDescriptor * field) const1576 const void* FileDescriptorTables::FindParentForFieldsByMap(
1577     const FieldDescriptor* field) const {
1578   if (field->is_extension()) {
1579     if (field->extension_scope() == nullptr) {
1580       return field->file();
1581     } else {
1582       return field->extension_scope();
1583     }
1584   } else {
1585     return field->containing_type();
1586   }
1587 }
1588 
FieldsByLowercaseNamesLazyInitStatic(const FileDescriptorTables * tables)1589 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic(
1590     const FileDescriptorTables* tables) {
1591   tables->FieldsByLowercaseNamesLazyInitInternal();
1592 }
1593 
FieldsByLowercaseNamesLazyInitInternal() const1594 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitInternal() const {
1595   auto* map = new FieldsByNameMap;
1596   for (Symbol symbol : symbols_by_parent_) {
1597     const FieldDescriptor* field = symbol.field_descriptor();
1598     if (!field) continue;
1599     (*map)[{FindParentForFieldsByMap(field), field->lowercase_name().c_str()}] =
1600         field;
1601   }
1602   fields_by_lowercase_name_.store(map, std::memory_order_release);
1603 }
1604 
FindFieldByLowercaseName(const void * parent,StringPiece lowercase_name) const1605 inline const FieldDescriptor* FileDescriptorTables::FindFieldByLowercaseName(
1606     const void* parent, StringPiece lowercase_name) const {
1607   internal::call_once(
1608       fields_by_lowercase_name_once_,
1609       &FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic, this);
1610   return FindPtrOrNull(
1611       *fields_by_lowercase_name_.load(std::memory_order_acquire),
1612       PointerStringPair(parent, lowercase_name));
1613 }
1614 
FieldsByCamelcaseNamesLazyInitStatic(const FileDescriptorTables * tables)1615 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic(
1616     const FileDescriptorTables* tables) {
1617   tables->FieldsByCamelcaseNamesLazyInitInternal();
1618 }
1619 
FieldsByCamelcaseNamesLazyInitInternal() const1620 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitInternal() const {
1621   auto* map = new FieldsByNameMap;
1622   for (Symbol symbol : symbols_by_parent_) {
1623     const FieldDescriptor* field = symbol.field_descriptor();
1624     if (!field) continue;
1625     (*map)[{FindParentForFieldsByMap(field), field->camelcase_name().c_str()}] =
1626         field;
1627   }
1628   fields_by_camelcase_name_.store(map, std::memory_order_release);
1629 }
1630 
FindFieldByCamelcaseName(const void * parent,StringPiece camelcase_name) const1631 inline const FieldDescriptor* FileDescriptorTables::FindFieldByCamelcaseName(
1632     const void* parent, StringPiece camelcase_name) const {
1633   internal::call_once(
1634       fields_by_camelcase_name_once_,
1635       FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic, this);
1636   return FindPtrOrNull(
1637       *fields_by_camelcase_name_.load(std::memory_order_acquire),
1638       PointerStringPair(parent, camelcase_name));
1639 }
1640 
FindEnumValueByNumber(const EnumDescriptor * parent,int number) const1641 inline const EnumValueDescriptor* FileDescriptorTables::FindEnumValueByNumber(
1642     const EnumDescriptor* parent, int number) const {
1643   // If `number` is within the sequential range, just index into the parent
1644   // without doing a table lookup.
1645   const int base = parent->value(0)->number();
1646   if (base <= number &&
1647       number <= static_cast<int64_t>(base) + parent->sequential_value_limit_) {
1648     return parent->value(number - base);
1649   }
1650 
1651   Symbol::QueryKey query;
1652   query.parent = parent;
1653   query.field_number = number;
1654 
1655   auto it = enum_values_by_number_.find(query);
1656   return it == enum_values_by_number_.end() ? nullptr
1657                                             : it->enum_value_descriptor();
1658 }
1659 
1660 inline const EnumValueDescriptor*
FindEnumValueByNumberCreatingIfUnknown(const EnumDescriptor * parent,int number) const1661 FileDescriptorTables::FindEnumValueByNumberCreatingIfUnknown(
1662     const EnumDescriptor* parent, int number) const {
1663   // First try, with map of compiled-in values.
1664   {
1665     const auto* value = FindEnumValueByNumber(parent, number);
1666     if (value != nullptr) {
1667       return value;
1668     }
1669   }
1670 
1671   Symbol::QueryKey query;
1672   query.parent = parent;
1673   query.field_number = number;
1674 
1675   // Second try, with reader lock held on unknown enum values: common case.
1676   {
1677     ReaderMutexLock l(&unknown_enum_values_mu_);
1678     auto it = unknown_enum_values_by_number_.find(query);
1679     if (it != unknown_enum_values_by_number_.end() &&
1680         it->enum_value_descriptor() != nullptr) {
1681       return it->enum_value_descriptor();
1682     }
1683   }
1684   // If not found, try again with writer lock held, and create new descriptor if
1685   // necessary.
1686   {
1687     WriterMutexLock l(&unknown_enum_values_mu_);
1688     auto it = unknown_enum_values_by_number_.find(query);
1689     if (it != unknown_enum_values_by_number_.end() &&
1690         it->enum_value_descriptor() != nullptr) {
1691       return it->enum_value_descriptor();
1692     }
1693 
1694     // Create an EnumValueDescriptor dynamically. We don't insert it into the
1695     // EnumDescriptor (it's not a part of the enum as originally defined), but
1696     // we do insert it into the table so that we can return the same pointer
1697     // later.
1698     std::string enum_value_name = StringPrintf(
1699         "UNKNOWN_ENUM_VALUE_%s_%d", parent->name().c_str(), number);
1700     auto* pool = DescriptorPool::generated_pool();
1701     auto* tables = const_cast<DescriptorPool::Tables*>(pool->tables_.get());
1702     internal::FlatAllocator alloc;
1703     alloc.PlanArray<EnumValueDescriptor>(1);
1704     alloc.PlanArray<std::string>(2);
1705 
1706     {
1707       // Must lock the pool because we will do allocations in the shared arena.
1708       MutexLockMaybe l2(pool->mutex_);
1709       alloc.FinalizePlanning(tables);
1710     }
1711     EnumValueDescriptor* result = alloc.AllocateArray<EnumValueDescriptor>(1);
1712     result->all_names_ = alloc.AllocateStrings(
1713         enum_value_name,
1714         StrCat(parent->full_name(), ".", enum_value_name));
1715     result->number_ = number;
1716     result->type_ = parent;
1717     result->options_ = &EnumValueOptions::default_instance();
1718     unknown_enum_values_by_number_.insert(Symbol::EnumValue(result, 0));
1719     return result;
1720   }
1721 }
1722 
FindExtension(const Descriptor * extendee,int number) const1723 inline const FieldDescriptor* DescriptorPool::Tables::FindExtension(
1724     const Descriptor* extendee, int number) const {
1725   return FindPtrOrNull(extensions_, std::make_pair(extendee, number));
1726 }
1727 
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const1728 inline void DescriptorPool::Tables::FindAllExtensions(
1729     const Descriptor* extendee,
1730     std::vector<const FieldDescriptor*>* out) const {
1731   ExtensionsGroupedByDescriptorMap::const_iterator it =
1732       extensions_.lower_bound(std::make_pair(extendee, 0));
1733   for (; it != extensions_.end() && it->first.first == extendee; ++it) {
1734     out->push_back(it->second);
1735   }
1736 }
1737 
1738 // -------------------------------------------------------------------
1739 
AddSymbol(const std::string & full_name,Symbol symbol)1740 bool DescriptorPool::Tables::AddSymbol(const std::string& full_name,
1741                                        Symbol symbol) {
1742   GOOGLE_DCHECK_EQ(full_name, symbol.full_name());
1743   if (symbols_by_name_.insert(symbol).second) {
1744     symbols_after_checkpoint_.push_back(symbol);
1745     return true;
1746   } else {
1747     return false;
1748   }
1749 }
1750 
AddAliasUnderParent(const void * parent,const std::string & name,Symbol symbol)1751 bool FileDescriptorTables::AddAliasUnderParent(const void* parent,
1752                                                const std::string& name,
1753                                                Symbol symbol) {
1754   GOOGLE_DCHECK_EQ(name, symbol.parent_name_key().second);
1755   GOOGLE_DCHECK_EQ(parent, symbol.parent_name_key().first);
1756   return symbols_by_parent_.insert(symbol).second;
1757 }
1758 
AddFile(const FileDescriptor * file)1759 bool DescriptorPool::Tables::AddFile(const FileDescriptor* file) {
1760   if (InsertIfNotPresent(&files_by_name_, file->name(), file)) {
1761     files_after_checkpoint_.push_back(file);
1762     return true;
1763   } else {
1764     return false;
1765   }
1766 }
1767 
FinalizeTables()1768 void FileDescriptorTables::FinalizeTables() {}
1769 
AddFieldByNumber(FieldDescriptor * field)1770 bool FileDescriptorTables::AddFieldByNumber(FieldDescriptor* field) {
1771   // Skip fields that are at the start of the sequence.
1772   if (field->containing_type() != nullptr && field->number() >= 1 &&
1773       field->number() <= field->containing_type()->sequential_field_limit_) {
1774     if (field->is_extension()) {
1775       // Conflicts with the field that already exists in the sequential range.
1776       return false;
1777     }
1778     // Only return true if the field at that index matches. Otherwise it
1779     // conflicts with the existing field in the sequential range.
1780     return field->containing_type()->field(field->number() - 1) == field;
1781   }
1782 
1783   return fields_by_number_.insert(Symbol(field)).second;
1784 }
1785 
AddEnumValueByNumber(EnumValueDescriptor * value)1786 bool FileDescriptorTables::AddEnumValueByNumber(EnumValueDescriptor* value) {
1787   // Skip values that are at the start of the sequence.
1788   const int base = value->type()->value(0)->number();
1789   if (base <= value->number() &&
1790       value->number() <=
1791           static_cast<int64_t>(base) + value->type()->sequential_value_limit_)
1792     return true;
1793   return enum_values_by_number_.insert(Symbol::EnumValue(value, 0)).second;
1794 }
1795 
AddExtension(const FieldDescriptor * field)1796 bool DescriptorPool::Tables::AddExtension(const FieldDescriptor* field) {
1797   DescriptorIntPair key(field->containing_type(), field->number());
1798   if (InsertIfNotPresent(&extensions_, key, field)) {
1799     extensions_after_checkpoint_.push_back(key);
1800     return true;
1801   } else {
1802     return false;
1803   }
1804 }
1805 
1806 // -------------------------------------------------------------------
1807 
1808 template <typename Type>
Allocate()1809 Type* DescriptorPool::Tables::Allocate() {
1810   static_assert(std::is_trivially_destructible<Type>::value, "");
1811   static_assert(alignof(Type) <= 8, "");
1812   return ::new (AllocateBytes(sizeof(Type))) Type{};
1813 }
1814 
AllocateBytes(int size)1815 void* DescriptorPool::Tables::AllocateBytes(int size) {
1816   if (size == 0) return nullptr;
1817   void* p = ::operator new(size + RoundUpTo<8>(sizeof(int)));
1818   int* sizep = static_cast<int*>(p);
1819   misc_allocs_.emplace_back(sizep);
1820   *sizep = size;
1821   return static_cast<char*>(p) + RoundUpTo<8>(sizeof(int));
1822 }
1823 
1824 template <typename... T>
CreateFlatAlloc(const TypeMap<IntT,T...> & sizes)1825 internal::FlatAllocator::Allocation* DescriptorPool::Tables::CreateFlatAlloc(
1826     const TypeMap<IntT, T...>& sizes) {
1827   auto ends = CalculateEnds(sizes);
1828   using FlatAlloc = internal::FlatAllocator::Allocation;
1829 
1830   int last_end = ends.template Get<
1831       typename std::tuple_element<sizeof...(T) - 1, std::tuple<T...>>::type>();
1832   size_t total_size =
1833       last_end + RoundUpTo<FlatAlloc::kMaxAlign>(sizeof(FlatAlloc));
1834   char* data = static_cast<char*>(::operator new(total_size));
1835   auto* res = ::new (data) FlatAlloc(ends);
1836   flat_allocs_.emplace_back(res);
1837 
1838   return res;
1839 }
1840 
BuildLocationsByPath(std::pair<const FileDescriptorTables *,const SourceCodeInfo * > * p)1841 void FileDescriptorTables::BuildLocationsByPath(
1842     std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p) {
1843   for (int i = 0, len = p->second->location_size(); i < len; ++i) {
1844     const SourceCodeInfo_Location* loc = &p->second->location().Get(i);
1845     p->first->locations_by_path_[Join(loc->path(), ",")] = loc;
1846   }
1847 }
1848 
GetSourceLocation(const std::vector<int> & path,const SourceCodeInfo * info) const1849 const SourceCodeInfo_Location* FileDescriptorTables::GetSourceLocation(
1850     const std::vector<int>& path, const SourceCodeInfo* info) const {
1851   std::pair<const FileDescriptorTables*, const SourceCodeInfo*> p(
1852       std::make_pair(this, info));
1853   internal::call_once(locations_by_path_once_,
1854                       FileDescriptorTables::BuildLocationsByPath, &p);
1855   return FindPtrOrNull(locations_by_path_, Join(path, ","));
1856 }
1857 
1858 // ===================================================================
1859 // DescriptorPool
1860 
~ErrorCollector()1861 DescriptorPool::ErrorCollector::~ErrorCollector() {}
1862 
DescriptorPool()1863 DescriptorPool::DescriptorPool()
1864     : mutex_(nullptr),
1865       fallback_database_(nullptr),
1866       default_error_collector_(nullptr),
1867       underlay_(nullptr),
1868       tables_(new Tables),
1869       enforce_dependencies_(true),
1870       lazily_build_dependencies_(false),
1871       allow_unknown_(false),
1872       enforce_weak_(false),
1873       disallow_enforce_utf8_(false) {}
1874 
DescriptorPool(DescriptorDatabase * fallback_database,ErrorCollector * error_collector)1875 DescriptorPool::DescriptorPool(DescriptorDatabase* fallback_database,
1876                                ErrorCollector* error_collector)
1877     : mutex_(new internal::WrappedMutex),
1878       fallback_database_(fallback_database),
1879       default_error_collector_(error_collector),
1880       underlay_(nullptr),
1881       tables_(new Tables),
1882       enforce_dependencies_(true),
1883       lazily_build_dependencies_(false),
1884       allow_unknown_(false),
1885       enforce_weak_(false),
1886       disallow_enforce_utf8_(false) {}
1887 
DescriptorPool(const DescriptorPool * underlay)1888 DescriptorPool::DescriptorPool(const DescriptorPool* underlay)
1889     : mutex_(nullptr),
1890       fallback_database_(nullptr),
1891       default_error_collector_(nullptr),
1892       underlay_(underlay),
1893       tables_(new Tables),
1894       enforce_dependencies_(true),
1895       lazily_build_dependencies_(false),
1896       allow_unknown_(false),
1897       enforce_weak_(false),
1898       disallow_enforce_utf8_(false) {}
1899 
~DescriptorPool()1900 DescriptorPool::~DescriptorPool() {
1901   if (mutex_ != nullptr) delete mutex_;
1902 }
1903 
1904 // DescriptorPool::BuildFile() defined later.
1905 // DescriptorPool::BuildFileCollectingErrors() defined later.
1906 
InternalDontEnforceDependencies()1907 void DescriptorPool::InternalDontEnforceDependencies() {
1908   enforce_dependencies_ = false;
1909 }
1910 
AddUnusedImportTrackFile(ConstStringParam file_name,bool is_error)1911 void DescriptorPool::AddUnusedImportTrackFile(ConstStringParam file_name,
1912                                               bool is_error) {
1913   unused_import_track_files_[std::string(file_name)] = is_error;
1914 }
1915 
ClearUnusedImportTrackFiles()1916 void DescriptorPool::ClearUnusedImportTrackFiles() {
1917   unused_import_track_files_.clear();
1918 }
1919 
InternalIsFileLoaded(ConstStringParam filename) const1920 bool DescriptorPool::InternalIsFileLoaded(ConstStringParam filename) const {
1921   MutexLockMaybe lock(mutex_);
1922   return tables_->FindFile(filename) != nullptr;
1923 }
1924 
1925 // generated_pool ====================================================
1926 
1927 namespace {
1928 
1929 
GeneratedDatabase()1930 EncodedDescriptorDatabase* GeneratedDatabase() {
1931   static auto generated_database =
1932       internal::OnShutdownDelete(new EncodedDescriptorDatabase());
1933   return generated_database;
1934 }
1935 
NewGeneratedPool()1936 DescriptorPool* NewGeneratedPool() {
1937   auto generated_pool = new DescriptorPool(GeneratedDatabase());
1938   generated_pool->InternalSetLazilyBuildDependencies();
1939   return generated_pool;
1940 }
1941 
1942 }  // anonymous namespace
1943 
internal_generated_database()1944 DescriptorDatabase* DescriptorPool::internal_generated_database() {
1945   return GeneratedDatabase();
1946 }
1947 
internal_generated_pool()1948 DescriptorPool* DescriptorPool::internal_generated_pool() {
1949   static DescriptorPool* generated_pool =
1950       internal::OnShutdownDelete(NewGeneratedPool());
1951   return generated_pool;
1952 }
1953 
generated_pool()1954 const DescriptorPool* DescriptorPool::generated_pool() {
1955   const DescriptorPool* pool = internal_generated_pool();
1956   // Ensure that descriptor.proto has been registered in the generated pool.
1957   DescriptorProto::descriptor();
1958   return pool;
1959 }
1960 
1961 
InternalAddGeneratedFile(const void * encoded_file_descriptor,int size)1962 void DescriptorPool::InternalAddGeneratedFile(
1963     const void* encoded_file_descriptor, int size) {
1964   // So, this function is called in the process of initializing the
1965   // descriptors for generated proto classes.  Each generated .pb.cc file
1966   // has an internal procedure called AddDescriptors() which is called at
1967   // process startup, and that function calls this one in order to register
1968   // the raw bytes of the FileDescriptorProto representing the file.
1969   //
1970   // We do not actually construct the descriptor objects right away.  We just
1971   // hang on to the bytes until they are actually needed.  We actually construct
1972   // the descriptor the first time one of the following things happens:
1973   // * Someone calls a method like descriptor(), GetDescriptor(), or
1974   //   GetReflection() on the generated types, which requires returning the
1975   //   descriptor or an object based on it.
1976   // * Someone looks up the descriptor in DescriptorPool::generated_pool().
1977   //
1978   // Once one of these happens, the DescriptorPool actually parses the
1979   // FileDescriptorProto and generates a FileDescriptor (and all its children)
1980   // based on it.
1981   //
1982   // Note that FileDescriptorProto is itself a generated protocol message.
1983   // Therefore, when we parse one, we have to be very careful to avoid using
1984   // any descriptor-based operations, since this might cause infinite recursion
1985   // or deadlock.
1986   GOOGLE_CHECK(GeneratedDatabase()->Add(encoded_file_descriptor, size));
1987 }
1988 
1989 
1990 // Find*By* methods ==================================================
1991 
1992 // TODO(kenton):  There's a lot of repeated code here, but I'm not sure if
1993 //   there's any good way to factor it out.  Think about this some time when
1994 //   there's nothing more important to do (read: never).
1995 
FindFileByName(ConstStringParam name) const1996 const FileDescriptor* DescriptorPool::FindFileByName(
1997     ConstStringParam name) const {
1998   MutexLockMaybe lock(mutex_);
1999   if (fallback_database_ != nullptr) {
2000     tables_->known_bad_symbols_.clear();
2001     tables_->known_bad_files_.clear();
2002   }
2003   const FileDescriptor* result = tables_->FindFile(name);
2004   if (result != nullptr) return result;
2005   if (underlay_ != nullptr) {
2006     result = underlay_->FindFileByName(name);
2007     if (result != nullptr) return result;
2008   }
2009   if (TryFindFileInFallbackDatabase(name)) {
2010     result = tables_->FindFile(name);
2011     if (result != nullptr) return result;
2012   }
2013   return nullptr;
2014 }
2015 
FindFileContainingSymbol(ConstStringParam symbol_name) const2016 const FileDescriptor* DescriptorPool::FindFileContainingSymbol(
2017     ConstStringParam symbol_name) const {
2018   MutexLockMaybe lock(mutex_);
2019   if (fallback_database_ != nullptr) {
2020     tables_->known_bad_symbols_.clear();
2021     tables_->known_bad_files_.clear();
2022   }
2023   Symbol result = tables_->FindSymbol(symbol_name);
2024   if (!result.IsNull()) return result.GetFile();
2025   if (underlay_ != nullptr) {
2026     const FileDescriptor* file_result =
2027         underlay_->FindFileContainingSymbol(symbol_name);
2028     if (file_result != nullptr) return file_result;
2029   }
2030   if (TryFindSymbolInFallbackDatabase(symbol_name)) {
2031     result = tables_->FindSymbol(symbol_name);
2032     if (!result.IsNull()) return result.GetFile();
2033   }
2034   return nullptr;
2035 }
2036 
FindMessageTypeByName(ConstStringParam name) const2037 const Descriptor* DescriptorPool::FindMessageTypeByName(
2038     ConstStringParam name) const {
2039   return tables_->FindByNameHelper(this, name).descriptor();
2040 }
2041 
FindFieldByName(ConstStringParam name) const2042 const FieldDescriptor* DescriptorPool::FindFieldByName(
2043     ConstStringParam name) const {
2044   if (const FieldDescriptor* field =
2045           tables_->FindByNameHelper(this, name).field_descriptor()) {
2046     if (!field->is_extension()) {
2047       return field;
2048     }
2049   }
2050   return nullptr;
2051 }
2052 
FindExtensionByName(ConstStringParam name) const2053 const FieldDescriptor* DescriptorPool::FindExtensionByName(
2054     ConstStringParam name) const {
2055   if (const FieldDescriptor* field =
2056           tables_->FindByNameHelper(this, name).field_descriptor()) {
2057     if (field->is_extension()) {
2058       return field;
2059     }
2060   }
2061   return nullptr;
2062 }
2063 
FindOneofByName(ConstStringParam name) const2064 const OneofDescriptor* DescriptorPool::FindOneofByName(
2065     ConstStringParam name) const {
2066   return tables_->FindByNameHelper(this, name).oneof_descriptor();
2067 }
2068 
FindEnumTypeByName(ConstStringParam name) const2069 const EnumDescriptor* DescriptorPool::FindEnumTypeByName(
2070     ConstStringParam name) const {
2071   return tables_->FindByNameHelper(this, name).enum_descriptor();
2072 }
2073 
FindEnumValueByName(ConstStringParam name) const2074 const EnumValueDescriptor* DescriptorPool::FindEnumValueByName(
2075     ConstStringParam name) const {
2076   return tables_->FindByNameHelper(this, name).enum_value_descriptor();
2077 }
2078 
FindServiceByName(ConstStringParam name) const2079 const ServiceDescriptor* DescriptorPool::FindServiceByName(
2080     ConstStringParam name) const {
2081   return tables_->FindByNameHelper(this, name).service_descriptor();
2082 }
2083 
FindMethodByName(ConstStringParam name) const2084 const MethodDescriptor* DescriptorPool::FindMethodByName(
2085     ConstStringParam name) const {
2086   return tables_->FindByNameHelper(this, name).method_descriptor();
2087 }
2088 
FindExtensionByNumber(const Descriptor * extendee,int number) const2089 const FieldDescriptor* DescriptorPool::FindExtensionByNumber(
2090     const Descriptor* extendee, int number) const {
2091   if (extendee->extension_range_count() == 0) return nullptr;
2092   // A faster path to reduce lock contention in finding extensions, assuming
2093   // most extensions will be cache hit.
2094   if (mutex_ != nullptr) {
2095     ReaderMutexLock lock(mutex_);
2096     const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2097     if (result != nullptr) {
2098       return result;
2099     }
2100   }
2101   MutexLockMaybe lock(mutex_);
2102   if (fallback_database_ != nullptr) {
2103     tables_->known_bad_symbols_.clear();
2104     tables_->known_bad_files_.clear();
2105   }
2106   const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2107   if (result != nullptr) {
2108     return result;
2109   }
2110   if (underlay_ != nullptr) {
2111     result = underlay_->FindExtensionByNumber(extendee, number);
2112     if (result != nullptr) return result;
2113   }
2114   if (TryFindExtensionInFallbackDatabase(extendee, number)) {
2115     result = tables_->FindExtension(extendee, number);
2116     if (result != nullptr) {
2117       return result;
2118     }
2119   }
2120   return nullptr;
2121 }
2122 
InternalFindExtensionByNumberNoLock(const Descriptor * extendee,int number) const2123 const FieldDescriptor* DescriptorPool::InternalFindExtensionByNumberNoLock(
2124     const Descriptor* extendee, int number) const {
2125   if (extendee->extension_range_count() == 0) return nullptr;
2126 
2127   const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2128   if (result != nullptr) {
2129     return result;
2130   }
2131 
2132   if (underlay_ != nullptr) {
2133     result = underlay_->InternalFindExtensionByNumberNoLock(extendee, number);
2134     if (result != nullptr) return result;
2135   }
2136 
2137   return nullptr;
2138 }
2139 
FindExtensionByPrintableName(const Descriptor * extendee,ConstStringParam printable_name) const2140 const FieldDescriptor* DescriptorPool::FindExtensionByPrintableName(
2141     const Descriptor* extendee, ConstStringParam printable_name) const {
2142   if (extendee->extension_range_count() == 0) return nullptr;
2143   const FieldDescriptor* result = FindExtensionByName(printable_name);
2144   if (result != nullptr && result->containing_type() == extendee) {
2145     return result;
2146   }
2147   if (extendee->options().message_set_wire_format()) {
2148     // MessageSet extensions may be identified by type name.
2149     const Descriptor* type = FindMessageTypeByName(printable_name);
2150     if (type != nullptr) {
2151       // Look for a matching extension in the foreign type's scope.
2152       const int type_extension_count = type->extension_count();
2153       for (int i = 0; i < type_extension_count; i++) {
2154         const FieldDescriptor* extension = type->extension(i);
2155         if (extension->containing_type() == extendee &&
2156             extension->type() == FieldDescriptor::TYPE_MESSAGE &&
2157             extension->is_optional() && extension->message_type() == type) {
2158           // Found it.
2159           return extension;
2160         }
2161       }
2162     }
2163   }
2164   return nullptr;
2165 }
2166 
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const2167 void DescriptorPool::FindAllExtensions(
2168     const Descriptor* extendee,
2169     std::vector<const FieldDescriptor*>* out) const {
2170   MutexLockMaybe lock(mutex_);
2171   if (fallback_database_ != nullptr) {
2172     tables_->known_bad_symbols_.clear();
2173     tables_->known_bad_files_.clear();
2174   }
2175 
2176   // Initialize tables_->extensions_ from the fallback database first
2177   // (but do this only once per descriptor).
2178   if (fallback_database_ != nullptr &&
2179       tables_->extensions_loaded_from_db_.count(extendee) == 0) {
2180     std::vector<int> numbers;
2181     if (fallback_database_->FindAllExtensionNumbers(extendee->full_name(),
2182                                                     &numbers)) {
2183       for (int number : numbers) {
2184         if (tables_->FindExtension(extendee, number) == nullptr) {
2185           TryFindExtensionInFallbackDatabase(extendee, number);
2186         }
2187       }
2188       tables_->extensions_loaded_from_db_.insert(extendee);
2189     }
2190   }
2191 
2192   tables_->FindAllExtensions(extendee, out);
2193   if (underlay_ != nullptr) {
2194     underlay_->FindAllExtensions(extendee, out);
2195   }
2196 }
2197 
2198 
2199 // -------------------------------------------------------------------
2200 
FindFieldByNumber(int key) const2201 const FieldDescriptor* Descriptor::FindFieldByNumber(int key) const {
2202   const FieldDescriptor* result = file()->tables_->FindFieldByNumber(this, key);
2203   if (result == nullptr || result->is_extension()) {
2204     return nullptr;
2205   } else {
2206     return result;
2207   }
2208 }
2209 
FindFieldByLowercaseName(ConstStringParam key) const2210 const FieldDescriptor* Descriptor::FindFieldByLowercaseName(
2211     ConstStringParam key) const {
2212   const FieldDescriptor* result =
2213       file()->tables_->FindFieldByLowercaseName(this, key);
2214   if (result == nullptr || result->is_extension()) {
2215     return nullptr;
2216   } else {
2217     return result;
2218   }
2219 }
2220 
FindFieldByCamelcaseName(ConstStringParam key) const2221 const FieldDescriptor* Descriptor::FindFieldByCamelcaseName(
2222     ConstStringParam key) const {
2223   const FieldDescriptor* result =
2224       file()->tables_->FindFieldByCamelcaseName(this, key);
2225   if (result == nullptr || result->is_extension()) {
2226     return nullptr;
2227   } else {
2228     return result;
2229   }
2230 }
2231 
FindFieldByName(ConstStringParam key) const2232 const FieldDescriptor* Descriptor::FindFieldByName(ConstStringParam key) const {
2233   const FieldDescriptor* field =
2234       file()->tables_->FindNestedSymbol(this, key).field_descriptor();
2235   return field != nullptr && !field->is_extension() ? field : nullptr;
2236 }
2237 
FindOneofByName(ConstStringParam key) const2238 const OneofDescriptor* Descriptor::FindOneofByName(ConstStringParam key) const {
2239   return file()->tables_->FindNestedSymbol(this, key).oneof_descriptor();
2240 }
2241 
FindExtensionByName(ConstStringParam key) const2242 const FieldDescriptor* Descriptor::FindExtensionByName(
2243     ConstStringParam key) const {
2244   const FieldDescriptor* field =
2245       file()->tables_->FindNestedSymbol(this, key).field_descriptor();
2246   return field != nullptr && field->is_extension() ? field : nullptr;
2247 }
2248 
FindExtensionByLowercaseName(ConstStringParam key) const2249 const FieldDescriptor* Descriptor::FindExtensionByLowercaseName(
2250     ConstStringParam key) const {
2251   const FieldDescriptor* result =
2252       file()->tables_->FindFieldByLowercaseName(this, key);
2253   if (result == nullptr || !result->is_extension()) {
2254     return nullptr;
2255   } else {
2256     return result;
2257   }
2258 }
2259 
FindExtensionByCamelcaseName(ConstStringParam key) const2260 const FieldDescriptor* Descriptor::FindExtensionByCamelcaseName(
2261     ConstStringParam key) const {
2262   const FieldDescriptor* result =
2263       file()->tables_->FindFieldByCamelcaseName(this, key);
2264   if (result == nullptr || !result->is_extension()) {
2265     return nullptr;
2266   } else {
2267     return result;
2268   }
2269 }
2270 
FindNestedTypeByName(ConstStringParam key) const2271 const Descriptor* Descriptor::FindNestedTypeByName(ConstStringParam key) const {
2272   return file()->tables_->FindNestedSymbol(this, key).descriptor();
2273 }
2274 
FindEnumTypeByName(ConstStringParam key) const2275 const EnumDescriptor* Descriptor::FindEnumTypeByName(
2276     ConstStringParam key) const {
2277   return file()->tables_->FindNestedSymbol(this, key).enum_descriptor();
2278 }
2279 
FindEnumValueByName(ConstStringParam key) const2280 const EnumValueDescriptor* Descriptor::FindEnumValueByName(
2281     ConstStringParam key) const {
2282   return file()->tables_->FindNestedSymbol(this, key).enum_value_descriptor();
2283 }
2284 
map_key() const2285 const FieldDescriptor* Descriptor::map_key() const {
2286   if (!options().map_entry()) return nullptr;
2287   GOOGLE_DCHECK_EQ(field_count(), 2);
2288   return field(0);
2289 }
2290 
map_value() const2291 const FieldDescriptor* Descriptor::map_value() const {
2292   if (!options().map_entry()) return nullptr;
2293   GOOGLE_DCHECK_EQ(field_count(), 2);
2294   return field(1);
2295 }
2296 
FindValueByName(ConstStringParam key) const2297 const EnumValueDescriptor* EnumDescriptor::FindValueByName(
2298     ConstStringParam key) const {
2299   return file()->tables_->FindNestedSymbol(this, key).enum_value_descriptor();
2300 }
2301 
FindValueByNumber(int key) const2302 const EnumValueDescriptor* EnumDescriptor::FindValueByNumber(int key) const {
2303   return file()->tables_->FindEnumValueByNumber(this, key);
2304 }
2305 
FindValueByNumberCreatingIfUnknown(int key) const2306 const EnumValueDescriptor* EnumDescriptor::FindValueByNumberCreatingIfUnknown(
2307     int key) const {
2308   return file()->tables_->FindEnumValueByNumberCreatingIfUnknown(this, key);
2309 }
2310 
FindMethodByName(ConstStringParam key) const2311 const MethodDescriptor* ServiceDescriptor::FindMethodByName(
2312     ConstStringParam key) const {
2313   return file()->tables_->FindNestedSymbol(this, key).method_descriptor();
2314 }
2315 
FindMessageTypeByName(ConstStringParam key) const2316 const Descriptor* FileDescriptor::FindMessageTypeByName(
2317     ConstStringParam key) const {
2318   return tables_->FindNestedSymbol(this, key).descriptor();
2319 }
2320 
FindEnumTypeByName(ConstStringParam key) const2321 const EnumDescriptor* FileDescriptor::FindEnumTypeByName(
2322     ConstStringParam key) const {
2323   return tables_->FindNestedSymbol(this, key).enum_descriptor();
2324 }
2325 
FindEnumValueByName(ConstStringParam key) const2326 const EnumValueDescriptor* FileDescriptor::FindEnumValueByName(
2327     ConstStringParam key) const {
2328   return tables_->FindNestedSymbol(this, key).enum_value_descriptor();
2329 }
2330 
FindServiceByName(ConstStringParam key) const2331 const ServiceDescriptor* FileDescriptor::FindServiceByName(
2332     ConstStringParam key) const {
2333   return tables_->FindNestedSymbol(this, key).service_descriptor();
2334 }
2335 
FindExtensionByName(ConstStringParam key) const2336 const FieldDescriptor* FileDescriptor::FindExtensionByName(
2337     ConstStringParam key) const {
2338   const FieldDescriptor* field =
2339       tables_->FindNestedSymbol(this, key).field_descriptor();
2340   return field != nullptr && field->is_extension() ? field : nullptr;
2341 }
2342 
FindExtensionByLowercaseName(ConstStringParam key) const2343 const FieldDescriptor* FileDescriptor::FindExtensionByLowercaseName(
2344     ConstStringParam key) const {
2345   const FieldDescriptor* result = tables_->FindFieldByLowercaseName(this, key);
2346   if (result == nullptr || !result->is_extension()) {
2347     return nullptr;
2348   } else {
2349     return result;
2350   }
2351 }
2352 
FindExtensionByCamelcaseName(ConstStringParam key) const2353 const FieldDescriptor* FileDescriptor::FindExtensionByCamelcaseName(
2354     ConstStringParam key) const {
2355   const FieldDescriptor* result = tables_->FindFieldByCamelcaseName(this, key);
2356   if (result == nullptr || !result->is_extension()) {
2357     return nullptr;
2358   } else {
2359     return result;
2360   }
2361 }
2362 
CopyTo(DescriptorProto_ExtensionRange * proto) const2363 void Descriptor::ExtensionRange::CopyTo(
2364     DescriptorProto_ExtensionRange* proto) const {
2365   proto->set_start(this->start);
2366   proto->set_end(this->end);
2367   if (options_ != &ExtensionRangeOptions::default_instance()) {
2368     *proto->mutable_options() = *options_;
2369   }
2370 }
2371 
2372 const Descriptor::ExtensionRange*
FindExtensionRangeContainingNumber(int number) const2373 Descriptor::FindExtensionRangeContainingNumber(int number) const {
2374   // Linear search should be fine because we don't expect a message to have
2375   // more than a couple extension ranges.
2376   for (int i = 0; i < extension_range_count(); i++) {
2377     if (number >= extension_range(i)->start &&
2378         number < extension_range(i)->end) {
2379       return extension_range(i);
2380     }
2381   }
2382   return nullptr;
2383 }
2384 
FindReservedRangeContainingNumber(int number) const2385 const Descriptor::ReservedRange* Descriptor::FindReservedRangeContainingNumber(
2386     int number) const {
2387   // TODO(chrisn): Consider a non-linear search.
2388   for (int i = 0; i < reserved_range_count(); i++) {
2389     if (number >= reserved_range(i)->start && number < reserved_range(i)->end) {
2390       return reserved_range(i);
2391     }
2392   }
2393   return nullptr;
2394 }
2395 
2396 const EnumDescriptor::ReservedRange*
FindReservedRangeContainingNumber(int number) const2397 EnumDescriptor::FindReservedRangeContainingNumber(int number) const {
2398   // TODO(chrisn): Consider a non-linear search.
2399   for (int i = 0; i < reserved_range_count(); i++) {
2400     if (number >= reserved_range(i)->start &&
2401         number <= reserved_range(i)->end) {
2402       return reserved_range(i);
2403     }
2404   }
2405   return nullptr;
2406 }
2407 
2408 // -------------------------------------------------------------------
2409 
TryFindFileInFallbackDatabase(StringPiece name) const2410 bool DescriptorPool::TryFindFileInFallbackDatabase(
2411     StringPiece name) const {
2412   if (fallback_database_ == nullptr) return false;
2413 
2414   auto name_string = std::string(name);
2415   if (tables_->known_bad_files_.count(name_string) > 0) return false;
2416 
2417   FileDescriptorProto file_proto;
2418   if (!fallback_database_->FindFileByName(name_string, &file_proto) ||
2419       BuildFileFromDatabase(file_proto) == nullptr) {
2420     tables_->known_bad_files_.insert(std::move(name_string));
2421     return false;
2422   }
2423   return true;
2424 }
2425 
IsSubSymbolOfBuiltType(StringPiece name) const2426 bool DescriptorPool::IsSubSymbolOfBuiltType(StringPiece name) const {
2427   auto prefix = std::string(name);
2428   for (;;) {
2429     std::string::size_type dot_pos = prefix.find_last_of('.');
2430     if (dot_pos == std::string::npos) {
2431       break;
2432     }
2433     prefix = prefix.substr(0, dot_pos);
2434     Symbol symbol = tables_->FindSymbol(prefix);
2435     // If the symbol type is anything other than PACKAGE, then its complete
2436     // definition is already known.
2437     if (!symbol.IsNull() && !symbol.IsPackage()) {
2438       return true;
2439     }
2440   }
2441   if (underlay_ != nullptr) {
2442     // Check to see if any prefix of this symbol exists in the underlay.
2443     return underlay_->IsSubSymbolOfBuiltType(name);
2444   }
2445   return false;
2446 }
2447 
TryFindSymbolInFallbackDatabase(StringPiece name) const2448 bool DescriptorPool::TryFindSymbolInFallbackDatabase(
2449     StringPiece name) const {
2450   if (fallback_database_ == nullptr) return false;
2451 
2452   auto name_string = std::string(name);
2453   if (tables_->known_bad_symbols_.count(name_string) > 0) return false;
2454 
2455   FileDescriptorProto file_proto;
2456   if (  // We skip looking in the fallback database if the name is a sub-symbol
2457         // of any descriptor that already exists in the descriptor pool (except
2458         // for package descriptors).  This is valid because all symbols except
2459         // for packages are defined in a single file, so if the symbol exists
2460         // then we should already have its definition.
2461         //
2462         // The other reason to do this is to support "overriding" type
2463         // definitions by merging two databases that define the same type. (Yes,
2464         // people do this.)  The main difficulty with making this work is that
2465         // FindFileContainingSymbol() is allowed to return both false positives
2466         // (e.g., SimpleDescriptorDatabase, UpgradedDescriptorDatabase) and
2467         // false negatives (e.g. ProtoFileParser, SourceTreeDescriptorDatabase).
2468         // When two such databases are merged, looking up a non-existent
2469         // sub-symbol of a type that already exists in the descriptor pool can
2470         // result in an attempt to load multiple definitions of the same type.
2471         // The check below avoids this.
2472       IsSubSymbolOfBuiltType(name)
2473 
2474       // Look up file containing this symbol in fallback database.
2475       || !fallback_database_->FindFileContainingSymbol(name_string, &file_proto)
2476 
2477       // Check if we've already built this file. If so, it apparently doesn't
2478       // contain the symbol we're looking for.  Some DescriptorDatabases
2479       // return false positives.
2480       || tables_->FindFile(file_proto.name()) != nullptr
2481 
2482       // Build the file.
2483       || BuildFileFromDatabase(file_proto) == nullptr) {
2484     tables_->known_bad_symbols_.insert(std::move(name_string));
2485     return false;
2486   }
2487 
2488   return true;
2489 }
2490 
TryFindExtensionInFallbackDatabase(const Descriptor * containing_type,int field_number) const2491 bool DescriptorPool::TryFindExtensionInFallbackDatabase(
2492     const Descriptor* containing_type, int field_number) const {
2493   if (fallback_database_ == nullptr) return false;
2494 
2495   FileDescriptorProto file_proto;
2496   if (!fallback_database_->FindFileContainingExtension(
2497           containing_type->full_name(), field_number, &file_proto)) {
2498     return false;
2499   }
2500 
2501   if (tables_->FindFile(file_proto.name()) != nullptr) {
2502     // We've already loaded this file, and it apparently doesn't contain the
2503     // extension we're looking for.  Some DescriptorDatabases return false
2504     // positives.
2505     return false;
2506   }
2507 
2508   if (BuildFileFromDatabase(file_proto) == nullptr) {
2509     return false;
2510   }
2511 
2512   return true;
2513 }
2514 
2515 // ===================================================================
2516 
is_map_message_type() const2517 bool FieldDescriptor::is_map_message_type() const {
2518   return type_descriptor_.message_type->options().map_entry();
2519 }
2520 
DefaultValueAsString(bool quote_string_type) const2521 std::string FieldDescriptor::DefaultValueAsString(
2522     bool quote_string_type) const {
2523   GOOGLE_CHECK(has_default_value()) << "No default value";
2524   switch (cpp_type()) {
2525     case CPPTYPE_INT32:
2526       return StrCat(default_value_int32_t());
2527     case CPPTYPE_INT64:
2528       return StrCat(default_value_int64_t());
2529     case CPPTYPE_UINT32:
2530       return StrCat(default_value_uint32_t());
2531     case CPPTYPE_UINT64:
2532       return StrCat(default_value_uint64_t());
2533     case CPPTYPE_FLOAT:
2534       return SimpleFtoa(default_value_float());
2535     case CPPTYPE_DOUBLE:
2536       return SimpleDtoa(default_value_double());
2537     case CPPTYPE_BOOL:
2538       return default_value_bool() ? "true" : "false";
2539     case CPPTYPE_STRING:
2540       if (quote_string_type) {
2541         return "\"" + CEscape(default_value_string()) + "\"";
2542       } else {
2543         if (type() == TYPE_BYTES) {
2544           return CEscape(default_value_string());
2545         } else {
2546           return default_value_string();
2547         }
2548       }
2549     case CPPTYPE_ENUM:
2550       return default_value_enum()->name();
2551     case CPPTYPE_MESSAGE:
2552       GOOGLE_LOG(DFATAL) << "Messages can't have default values!";
2553       break;
2554   }
2555   GOOGLE_LOG(FATAL) << "Can't get here: failed to get default value as string";
2556   return "";
2557 }
2558 
2559 // CopyTo methods ====================================================
2560 
CopyTo(FileDescriptorProto * proto) const2561 void FileDescriptor::CopyTo(FileDescriptorProto* proto) const {
2562   proto->set_name(name());
2563   if (!package().empty()) proto->set_package(package());
2564   // TODO(liujisi): Also populate when syntax="proto2".
2565   if (syntax() == SYNTAX_PROTO3) proto->set_syntax(SyntaxName(syntax()));
2566 
2567   for (int i = 0; i < dependency_count(); i++) {
2568     proto->add_dependency(dependency(i)->name());
2569   }
2570 
2571   for (int i = 0; i < public_dependency_count(); i++) {
2572     proto->add_public_dependency(public_dependencies_[i]);
2573   }
2574 
2575   for (int i = 0; i < weak_dependency_count(); i++) {
2576     proto->add_weak_dependency(weak_dependencies_[i]);
2577   }
2578 
2579   for (int i = 0; i < message_type_count(); i++) {
2580     message_type(i)->CopyTo(proto->add_message_type());
2581   }
2582   for (int i = 0; i < enum_type_count(); i++) {
2583     enum_type(i)->CopyTo(proto->add_enum_type());
2584   }
2585   for (int i = 0; i < service_count(); i++) {
2586     service(i)->CopyTo(proto->add_service());
2587   }
2588   for (int i = 0; i < extension_count(); i++) {
2589     extension(i)->CopyTo(proto->add_extension());
2590   }
2591 
2592   if (&options() != &FileOptions::default_instance()) {
2593     proto->mutable_options()->CopyFrom(options());
2594   }
2595 }
2596 
CopyJsonNameTo(FileDescriptorProto * proto) const2597 void FileDescriptor::CopyJsonNameTo(FileDescriptorProto* proto) const {
2598   if (message_type_count() != proto->message_type_size() ||
2599       extension_count() != proto->extension_size()) {
2600     GOOGLE_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2601     return;
2602   }
2603   for (int i = 0; i < message_type_count(); i++) {
2604     message_type(i)->CopyJsonNameTo(proto->mutable_message_type(i));
2605   }
2606   for (int i = 0; i < extension_count(); i++) {
2607     extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
2608   }
2609 }
2610 
CopySourceCodeInfoTo(FileDescriptorProto * proto) const2611 void FileDescriptor::CopySourceCodeInfoTo(FileDescriptorProto* proto) const {
2612   if (source_code_info_ &&
2613       source_code_info_ != &SourceCodeInfo::default_instance()) {
2614     proto->mutable_source_code_info()->CopyFrom(*source_code_info_);
2615   }
2616 }
2617 
CopyTo(DescriptorProto * proto) const2618 void Descriptor::CopyTo(DescriptorProto* proto) const {
2619   proto->set_name(name());
2620 
2621   for (int i = 0; i < field_count(); i++) {
2622     field(i)->CopyTo(proto->add_field());
2623   }
2624   for (int i = 0; i < oneof_decl_count(); i++) {
2625     oneof_decl(i)->CopyTo(proto->add_oneof_decl());
2626   }
2627   for (int i = 0; i < nested_type_count(); i++) {
2628     nested_type(i)->CopyTo(proto->add_nested_type());
2629   }
2630   for (int i = 0; i < enum_type_count(); i++) {
2631     enum_type(i)->CopyTo(proto->add_enum_type());
2632   }
2633   for (int i = 0; i < extension_range_count(); i++) {
2634     extension_range(i)->CopyTo(proto->add_extension_range());
2635   }
2636   for (int i = 0; i < extension_count(); i++) {
2637     extension(i)->CopyTo(proto->add_extension());
2638   }
2639   for (int i = 0; i < reserved_range_count(); i++) {
2640     DescriptorProto::ReservedRange* range = proto->add_reserved_range();
2641     range->set_start(reserved_range(i)->start);
2642     range->set_end(reserved_range(i)->end);
2643   }
2644   for (int i = 0; i < reserved_name_count(); i++) {
2645     proto->add_reserved_name(reserved_name(i));
2646   }
2647 
2648   if (&options() != &MessageOptions::default_instance()) {
2649     proto->mutable_options()->CopyFrom(options());
2650   }
2651 }
2652 
CopyJsonNameTo(DescriptorProto * proto) const2653 void Descriptor::CopyJsonNameTo(DescriptorProto* proto) const {
2654   if (field_count() != proto->field_size() ||
2655       nested_type_count() != proto->nested_type_size() ||
2656       extension_count() != proto->extension_size()) {
2657     GOOGLE_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2658     return;
2659   }
2660   for (int i = 0; i < field_count(); i++) {
2661     field(i)->CopyJsonNameTo(proto->mutable_field(i));
2662   }
2663   for (int i = 0; i < nested_type_count(); i++) {
2664     nested_type(i)->CopyJsonNameTo(proto->mutable_nested_type(i));
2665   }
2666   for (int i = 0; i < extension_count(); i++) {
2667     extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
2668   }
2669 }
2670 
CopyTo(FieldDescriptorProto * proto) const2671 void FieldDescriptor::CopyTo(FieldDescriptorProto* proto) const {
2672   proto->set_name(name());
2673   proto->set_number(number());
2674   if (has_json_name_) {
2675     proto->set_json_name(json_name());
2676   }
2677   if (proto3_optional_) {
2678     proto->set_proto3_optional(true);
2679   }
2680   // Some compilers do not allow static_cast directly between two enum types,
2681   // so we must cast to int first.
2682   proto->set_label(static_cast<FieldDescriptorProto::Label>(
2683       implicit_cast<int>(label())));
2684   proto->set_type(static_cast<FieldDescriptorProto::Type>(
2685       implicit_cast<int>(type())));
2686 
2687   if (is_extension()) {
2688     if (!containing_type()->is_unqualified_placeholder_) {
2689       proto->set_extendee(".");
2690     }
2691     proto->mutable_extendee()->append(containing_type()->full_name());
2692   }
2693 
2694   if (cpp_type() == CPPTYPE_MESSAGE) {
2695     if (message_type()->is_placeholder_) {
2696       // We don't actually know if the type is a message type.  It could be
2697       // an enum.
2698       proto->clear_type();
2699     }
2700 
2701     if (!message_type()->is_unqualified_placeholder_) {
2702       proto->set_type_name(".");
2703     }
2704     proto->mutable_type_name()->append(message_type()->full_name());
2705   } else if (cpp_type() == CPPTYPE_ENUM) {
2706     if (!enum_type()->is_unqualified_placeholder_) {
2707       proto->set_type_name(".");
2708     }
2709     proto->mutable_type_name()->append(enum_type()->full_name());
2710   }
2711 
2712   if (has_default_value()) {
2713     proto->set_default_value(DefaultValueAsString(false));
2714   }
2715 
2716   if (containing_oneof() != nullptr && !is_extension()) {
2717     proto->set_oneof_index(containing_oneof()->index());
2718   }
2719 
2720   if (&options() != &FieldOptions::default_instance()) {
2721     proto->mutable_options()->CopyFrom(options());
2722   }
2723 }
2724 
CopyJsonNameTo(FieldDescriptorProto * proto) const2725 void FieldDescriptor::CopyJsonNameTo(FieldDescriptorProto* proto) const {
2726   proto->set_json_name(json_name());
2727 }
2728 
CopyTo(OneofDescriptorProto * proto) const2729 void OneofDescriptor::CopyTo(OneofDescriptorProto* proto) const {
2730   proto->set_name(name());
2731   if (&options() != &OneofOptions::default_instance()) {
2732     proto->mutable_options()->CopyFrom(options());
2733   }
2734 }
2735 
CopyTo(EnumDescriptorProto * proto) const2736 void EnumDescriptor::CopyTo(EnumDescriptorProto* proto) const {
2737   proto->set_name(name());
2738 
2739   for (int i = 0; i < value_count(); i++) {
2740     value(i)->CopyTo(proto->add_value());
2741   }
2742   for (int i = 0; i < reserved_range_count(); i++) {
2743     EnumDescriptorProto::EnumReservedRange* range = proto->add_reserved_range();
2744     range->set_start(reserved_range(i)->start);
2745     range->set_end(reserved_range(i)->end);
2746   }
2747   for (int i = 0; i < reserved_name_count(); i++) {
2748     proto->add_reserved_name(reserved_name(i));
2749   }
2750 
2751   if (&options() != &EnumOptions::default_instance()) {
2752     proto->mutable_options()->CopyFrom(options());
2753   }
2754 }
2755 
CopyTo(EnumValueDescriptorProto * proto) const2756 void EnumValueDescriptor::CopyTo(EnumValueDescriptorProto* proto) const {
2757   proto->set_name(name());
2758   proto->set_number(number());
2759 
2760   if (&options() != &EnumValueOptions::default_instance()) {
2761     proto->mutable_options()->CopyFrom(options());
2762   }
2763 }
2764 
CopyTo(ServiceDescriptorProto * proto) const2765 void ServiceDescriptor::CopyTo(ServiceDescriptorProto* proto) const {
2766   proto->set_name(name());
2767 
2768   for (int i = 0; i < method_count(); i++) {
2769     method(i)->CopyTo(proto->add_method());
2770   }
2771 
2772   if (&options() != &ServiceOptions::default_instance()) {
2773     proto->mutable_options()->CopyFrom(options());
2774   }
2775 }
2776 
CopyTo(MethodDescriptorProto * proto) const2777 void MethodDescriptor::CopyTo(MethodDescriptorProto* proto) const {
2778   proto->set_name(name());
2779 
2780   if (!input_type()->is_unqualified_placeholder_) {
2781     proto->set_input_type(".");
2782   }
2783   proto->mutable_input_type()->append(input_type()->full_name());
2784 
2785   if (!output_type()->is_unqualified_placeholder_) {
2786     proto->set_output_type(".");
2787   }
2788   proto->mutable_output_type()->append(output_type()->full_name());
2789 
2790   if (&options() != &MethodOptions::default_instance()) {
2791     proto->mutable_options()->CopyFrom(options());
2792   }
2793 
2794   if (client_streaming_) {
2795     proto->set_client_streaming(true);
2796   }
2797   if (server_streaming_) {
2798     proto->set_server_streaming(true);
2799   }
2800 }
2801 
2802 // DebugString methods ===============================================
2803 
2804 namespace {
2805 
RetrieveOptionsAssumingRightPool(int depth,const Message & options,std::vector<std::string> * option_entries)2806 bool RetrieveOptionsAssumingRightPool(
2807     int depth, const Message& options,
2808     std::vector<std::string>* option_entries) {
2809   option_entries->clear();
2810   const Reflection* reflection = options.GetReflection();
2811   std::vector<const FieldDescriptor*> fields;
2812   reflection->ListFields(options, &fields);
2813   for (const FieldDescriptor* field : fields) {
2814     int count = 1;
2815     bool repeated = false;
2816     if (field->is_repeated()) {
2817       count = reflection->FieldSize(options, field);
2818       repeated = true;
2819     }
2820     for (int j = 0; j < count; j++) {
2821       std::string fieldval;
2822       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2823         std::string tmp;
2824         TextFormat::Printer printer;
2825         printer.SetExpandAny(true);
2826         printer.SetInitialIndentLevel(depth + 1);
2827         printer.PrintFieldValueToString(options, field, repeated ? j : -1,
2828                                         &tmp);
2829         fieldval.append("{\n");
2830         fieldval.append(tmp);
2831         fieldval.append(depth * 2, ' ');
2832         fieldval.append("}");
2833       } else {
2834         TextFormat::PrintFieldValueToString(options, field, repeated ? j : -1,
2835                                             &fieldval);
2836       }
2837       std::string name;
2838       if (field->is_extension()) {
2839         name = "(." + field->full_name() + ")";
2840       } else {
2841         name = field->name();
2842       }
2843       option_entries->push_back(name + " = " + fieldval);
2844     }
2845   }
2846   return !option_entries->empty();
2847 }
2848 
2849 // Used by each of the option formatters.
RetrieveOptions(int depth,const Message & options,const DescriptorPool * pool,std::vector<std::string> * option_entries)2850 bool RetrieveOptions(int depth, const Message& options,
2851                      const DescriptorPool* pool,
2852                      std::vector<std::string>* option_entries) {
2853   // When printing custom options for a descriptor, we must use an options
2854   // message built on top of the same DescriptorPool where the descriptor
2855   // is coming from. This is to ensure we are interpreting custom options
2856   // against the right pool.
2857   if (options.GetDescriptor()->file()->pool() == pool) {
2858     return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
2859   } else {
2860     const Descriptor* option_descriptor =
2861         pool->FindMessageTypeByName(options.GetDescriptor()->full_name());
2862     if (option_descriptor == nullptr) {
2863       // descriptor.proto is not in the pool. This means no custom options are
2864       // used so we are safe to proceed with the compiled options message type.
2865       return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
2866     }
2867     DynamicMessageFactory factory;
2868     std::unique_ptr<Message> dynamic_options(
2869         factory.GetPrototype(option_descriptor)->New());
2870     std::string serialized = options.SerializeAsString();
2871     io::CodedInputStream input(
2872         reinterpret_cast<const uint8_t*>(serialized.c_str()),
2873         serialized.size());
2874     input.SetExtensionRegistry(pool, &factory);
2875     if (dynamic_options->ParseFromCodedStream(&input)) {
2876       return RetrieveOptionsAssumingRightPool(depth, *dynamic_options,
2877                                               option_entries);
2878     } else {
2879       GOOGLE_LOG(ERROR) << "Found invalid proto option data for: "
2880                  << options.GetDescriptor()->full_name();
2881       return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
2882     }
2883   }
2884 }
2885 
2886 // Formats options that all appear together in brackets. Does not include
2887 // brackets.
FormatBracketedOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)2888 bool FormatBracketedOptions(int depth, const Message& options,
2889                             const DescriptorPool* pool, std::string* output) {
2890   std::vector<std::string> all_options;
2891   if (RetrieveOptions(depth, options, pool, &all_options)) {
2892     output->append(Join(all_options, ", "));
2893   }
2894   return !all_options.empty();
2895 }
2896 
2897 // Formats options one per line
FormatLineOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)2898 bool FormatLineOptions(int depth, const Message& options,
2899                        const DescriptorPool* pool, std::string* output) {
2900   std::string prefix(depth * 2, ' ');
2901   std::vector<std::string> all_options;
2902   if (RetrieveOptions(depth, options, pool, &all_options)) {
2903     for (const std::string& option : all_options) {
2904       strings::SubstituteAndAppend(output, "$0option $1;\n", prefix, option);
2905     }
2906   }
2907   return !all_options.empty();
2908 }
2909 
2910 class SourceLocationCommentPrinter {
2911  public:
2912   template <typename DescType>
SourceLocationCommentPrinter(const DescType * desc,const std::string & prefix,const DebugStringOptions & options)2913   SourceLocationCommentPrinter(const DescType* desc, const std::string& prefix,
2914                                const DebugStringOptions& options)
2915       : options_(options), prefix_(prefix) {
2916     // Perform the SourceLocation lookup only if we're including user comments,
2917     // because the lookup is fairly expensive.
2918     have_source_loc_ =
2919         options.include_comments && desc->GetSourceLocation(&source_loc_);
2920   }
SourceLocationCommentPrinter(const FileDescriptor * file,const std::vector<int> & path,const std::string & prefix,const DebugStringOptions & options)2921   SourceLocationCommentPrinter(const FileDescriptor* file,
2922                                const std::vector<int>& path,
2923                                const std::string& prefix,
2924                                const DebugStringOptions& options)
2925       : options_(options), prefix_(prefix) {
2926     // Perform the SourceLocation lookup only if we're including user comments,
2927     // because the lookup is fairly expensive.
2928     have_source_loc_ =
2929         options.include_comments && file->GetSourceLocation(path, &source_loc_);
2930   }
AddPreComment(std::string * output)2931   void AddPreComment(std::string* output) {
2932     if (have_source_loc_) {
2933       // Detached leading comments.
2934       for (const std::string& leading_detached_comment :
2935            source_loc_.leading_detached_comments) {
2936         *output += FormatComment(leading_detached_comment);
2937         *output += "\n";
2938       }
2939       // Attached leading comments.
2940       if (!source_loc_.leading_comments.empty()) {
2941         *output += FormatComment(source_loc_.leading_comments);
2942       }
2943     }
2944   }
AddPostComment(std::string * output)2945   void AddPostComment(std::string* output) {
2946     if (have_source_loc_ && source_loc_.trailing_comments.size() > 0) {
2947       *output += FormatComment(source_loc_.trailing_comments);
2948     }
2949   }
2950 
2951   // Format comment such that each line becomes a full-line C++-style comment in
2952   // the DebugString() output.
FormatComment(const std::string & comment_text)2953   std::string FormatComment(const std::string& comment_text) {
2954     std::string stripped_comment = comment_text;
2955     StripWhitespace(&stripped_comment);
2956     std::vector<std::string> lines = Split(stripped_comment, "\n");
2957     std::string output;
2958     for (const std::string& line : lines) {
2959       strings::SubstituteAndAppend(&output, "$0// $1\n", prefix_, line);
2960     }
2961     return output;
2962   }
2963 
2964  private:
2965 
2966   bool have_source_loc_;
2967   SourceLocation source_loc_;
2968   DebugStringOptions options_;
2969   std::string prefix_;
2970 };
2971 
2972 }  // anonymous namespace
2973 
DebugString() const2974 std::string FileDescriptor::DebugString() const {
2975   DebugStringOptions options;  // default options
2976   return DebugStringWithOptions(options);
2977 }
2978 
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const2979 std::string FileDescriptor::DebugStringWithOptions(
2980     const DebugStringOptions& debug_string_options) const {
2981   std::string contents;
2982   {
2983     std::vector<int> path;
2984     path.push_back(FileDescriptorProto::kSyntaxFieldNumber);
2985     SourceLocationCommentPrinter syntax_comment(this, path, "",
2986                                                 debug_string_options);
2987     syntax_comment.AddPreComment(&contents);
2988     strings::SubstituteAndAppend(&contents, "syntax = \"$0\";\n\n",
2989                               SyntaxName(syntax()));
2990     syntax_comment.AddPostComment(&contents);
2991   }
2992 
2993   SourceLocationCommentPrinter comment_printer(this, "", debug_string_options);
2994   comment_printer.AddPreComment(&contents);
2995 
2996   std::set<int> public_dependencies;
2997   std::set<int> weak_dependencies;
2998   public_dependencies.insert(public_dependencies_,
2999                              public_dependencies_ + public_dependency_count_);
3000   weak_dependencies.insert(weak_dependencies_,
3001                            weak_dependencies_ + weak_dependency_count_);
3002 
3003   for (int i = 0; i < dependency_count(); i++) {
3004     if (public_dependencies.count(i) > 0) {
3005       strings::SubstituteAndAppend(&contents, "import public \"$0\";\n",
3006                                 dependency(i)->name());
3007     } else if (weak_dependencies.count(i) > 0) {
3008       strings::SubstituteAndAppend(&contents, "import weak \"$0\";\n",
3009                                 dependency(i)->name());
3010     } else {
3011       strings::SubstituteAndAppend(&contents, "import \"$0\";\n",
3012                                 dependency(i)->name());
3013     }
3014   }
3015 
3016   if (!package().empty()) {
3017     std::vector<int> path;
3018     path.push_back(FileDescriptorProto::kPackageFieldNumber);
3019     SourceLocationCommentPrinter package_comment(this, path, "",
3020                                                  debug_string_options);
3021     package_comment.AddPreComment(&contents);
3022     strings::SubstituteAndAppend(&contents, "package $0;\n\n", package());
3023     package_comment.AddPostComment(&contents);
3024   }
3025 
3026   if (FormatLineOptions(0, options(), pool(), &contents)) {
3027     contents.append("\n");  // add some space if we had options
3028   }
3029 
3030   for (int i = 0; i < enum_type_count(); i++) {
3031     enum_type(i)->DebugString(0, &contents, debug_string_options);
3032     contents.append("\n");
3033   }
3034 
3035   // Find all the 'group' type extensions; we will not output their nested
3036   // definitions (those will be done with their group field descriptor).
3037   std::set<const Descriptor*> groups;
3038   for (int i = 0; i < extension_count(); i++) {
3039     if (extension(i)->type() == FieldDescriptor::TYPE_GROUP) {
3040       groups.insert(extension(i)->message_type());
3041     }
3042   }
3043 
3044   for (int i = 0; i < message_type_count(); i++) {
3045     if (groups.count(message_type(i)) == 0) {
3046       message_type(i)->DebugString(0, &contents, debug_string_options,
3047                                    /* include_opening_clause */ true);
3048       contents.append("\n");
3049     }
3050   }
3051 
3052   for (int i = 0; i < service_count(); i++) {
3053     service(i)->DebugString(&contents, debug_string_options);
3054     contents.append("\n");
3055   }
3056 
3057   const Descriptor* containing_type = nullptr;
3058   for (int i = 0; i < extension_count(); i++) {
3059     if (extension(i)->containing_type() != containing_type) {
3060       if (i > 0) contents.append("}\n\n");
3061       containing_type = extension(i)->containing_type();
3062       strings::SubstituteAndAppend(&contents, "extend .$0 {\n",
3063                                 containing_type->full_name());
3064     }
3065     extension(i)->DebugString(1, &contents, debug_string_options);
3066   }
3067   if (extension_count() > 0) contents.append("}\n\n");
3068 
3069   comment_printer.AddPostComment(&contents);
3070 
3071   return contents;
3072 }
3073 
DebugString() const3074 std::string Descriptor::DebugString() const {
3075   DebugStringOptions options;  // default options
3076   return DebugStringWithOptions(options);
3077 }
3078 
DebugStringWithOptions(const DebugStringOptions & options) const3079 std::string Descriptor::DebugStringWithOptions(
3080     const DebugStringOptions& options) const {
3081   std::string contents;
3082   DebugString(0, &contents, options, /* include_opening_clause */ true);
3083   return contents;
3084 }
3085 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options,bool include_opening_clause) const3086 void Descriptor::DebugString(int depth, std::string* contents,
3087                              const DebugStringOptions& debug_string_options,
3088                              bool include_opening_clause) const {
3089   if (options().map_entry()) {
3090     // Do not generate debug string for auto-generated map-entry type.
3091     return;
3092   }
3093   std::string prefix(depth * 2, ' ');
3094   ++depth;
3095 
3096   SourceLocationCommentPrinter comment_printer(this, prefix,
3097                                                debug_string_options);
3098   comment_printer.AddPreComment(contents);
3099 
3100   if (include_opening_clause) {
3101     strings::SubstituteAndAppend(contents, "$0message $1", prefix, name());
3102   }
3103   contents->append(" {\n");
3104 
3105   FormatLineOptions(depth, options(), file()->pool(), contents);
3106 
3107   // Find all the 'group' types for fields and extensions; we will not output
3108   // their nested definitions (those will be done with their group field
3109   // descriptor).
3110   std::set<const Descriptor*> groups;
3111   for (int i = 0; i < field_count(); i++) {
3112     if (field(i)->type() == FieldDescriptor::TYPE_GROUP) {
3113       groups.insert(field(i)->message_type());
3114     }
3115   }
3116   for (int i = 0; i < extension_count(); i++) {
3117     if (extension(i)->type() == FieldDescriptor::TYPE_GROUP) {
3118       groups.insert(extension(i)->message_type());
3119     }
3120   }
3121 
3122   for (int i = 0; i < nested_type_count(); i++) {
3123     if (groups.count(nested_type(i)) == 0) {
3124       nested_type(i)->DebugString(depth, contents, debug_string_options,
3125                                   /* include_opening_clause */ true);
3126     }
3127   }
3128   for (int i = 0; i < enum_type_count(); i++) {
3129     enum_type(i)->DebugString(depth, contents, debug_string_options);
3130   }
3131   for (int i = 0; i < field_count(); i++) {
3132     if (field(i)->real_containing_oneof() == nullptr) {
3133       field(i)->DebugString(depth, contents, debug_string_options);
3134     } else if (field(i)->containing_oneof()->field(0) == field(i)) {
3135       // This is the first field in this oneof, so print the whole oneof.
3136       field(i)->containing_oneof()->DebugString(depth, contents,
3137                                                 debug_string_options);
3138     }
3139   }
3140 
3141   for (int i = 0; i < extension_range_count(); i++) {
3142     strings::SubstituteAndAppend(contents, "$0  extensions $1 to $2;\n", prefix,
3143                               extension_range(i)->start,
3144                               extension_range(i)->end - 1);
3145   }
3146 
3147   // Group extensions by what they extend, so they can be printed out together.
3148   const Descriptor* containing_type = nullptr;
3149   for (int i = 0; i < extension_count(); i++) {
3150     if (extension(i)->containing_type() != containing_type) {
3151       if (i > 0) strings::SubstituteAndAppend(contents, "$0  }\n", prefix);
3152       containing_type = extension(i)->containing_type();
3153       strings::SubstituteAndAppend(contents, "$0  extend .$1 {\n", prefix,
3154                                 containing_type->full_name());
3155     }
3156     extension(i)->DebugString(depth + 1, contents, debug_string_options);
3157   }
3158   if (extension_count() > 0)
3159     strings::SubstituteAndAppend(contents, "$0  }\n", prefix);
3160 
3161   if (reserved_range_count() > 0) {
3162     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3163     for (int i = 0; i < reserved_range_count(); i++) {
3164       const Descriptor::ReservedRange* range = reserved_range(i);
3165       if (range->end == range->start + 1) {
3166         strings::SubstituteAndAppend(contents, "$0, ", range->start);
3167       } else if (range->end > FieldDescriptor::kMaxNumber) {
3168         strings::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3169       } else {
3170         strings::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3171                                   range->end - 1);
3172       }
3173     }
3174     contents->replace(contents->size() - 2, 2, ";\n");
3175   }
3176 
3177   if (reserved_name_count() > 0) {
3178     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3179     for (int i = 0; i < reserved_name_count(); i++) {
3180       strings::SubstituteAndAppend(contents, "\"$0\", ",
3181                                 CEscape(reserved_name(i)));
3182     }
3183     contents->replace(contents->size() - 2, 2, ";\n");
3184   }
3185 
3186   strings::SubstituteAndAppend(contents, "$0}\n", prefix);
3187   comment_printer.AddPostComment(contents);
3188 }
3189 
DebugString() const3190 std::string FieldDescriptor::DebugString() const {
3191   DebugStringOptions options;  // default options
3192   return DebugStringWithOptions(options);
3193 }
3194 
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const3195 std::string FieldDescriptor::DebugStringWithOptions(
3196     const DebugStringOptions& debug_string_options) const {
3197   std::string contents;
3198   int depth = 0;
3199   if (is_extension()) {
3200     strings::SubstituteAndAppend(&contents, "extend .$0 {\n",
3201                               containing_type()->full_name());
3202     depth = 1;
3203   }
3204   DebugString(depth, &contents, debug_string_options);
3205   if (is_extension()) {
3206     contents.append("}\n");
3207   }
3208   return contents;
3209 }
3210 
3211 // The field type string used in FieldDescriptor::DebugString()
FieldTypeNameDebugString() const3212 std::string FieldDescriptor::FieldTypeNameDebugString() const {
3213   switch (type()) {
3214     case TYPE_MESSAGE:
3215       return "." + message_type()->full_name();
3216     case TYPE_ENUM:
3217       return "." + enum_type()->full_name();
3218     default:
3219       return kTypeToName[type()];
3220   }
3221 }
3222 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3223 void FieldDescriptor::DebugString(
3224     int depth, std::string* contents,
3225     const DebugStringOptions& debug_string_options) const {
3226   std::string prefix(depth * 2, ' ');
3227   std::string field_type;
3228 
3229   // Special case map fields.
3230   if (is_map()) {
3231     strings::SubstituteAndAppend(
3232         &field_type, "map<$0, $1>",
3233         message_type()->field(0)->FieldTypeNameDebugString(),
3234         message_type()->field(1)->FieldTypeNameDebugString());
3235   } else {
3236     field_type = FieldTypeNameDebugString();
3237   }
3238 
3239   std::string label = StrCat(kLabelToName[this->label()], " ");
3240 
3241   // Label is omitted for maps, oneof, and plain proto3 fields.
3242   if (is_map() || real_containing_oneof() ||
3243       (is_optional() && !has_optional_keyword())) {
3244     label.clear();
3245   }
3246 
3247   SourceLocationCommentPrinter comment_printer(this, prefix,
3248                                                debug_string_options);
3249   comment_printer.AddPreComment(contents);
3250 
3251   strings::SubstituteAndAppend(
3252       contents, "$0$1$2 $3 = $4", prefix, label, field_type,
3253       type() == TYPE_GROUP ? message_type()->name() : name(), number());
3254 
3255   bool bracketed = false;
3256   if (has_default_value()) {
3257     bracketed = true;
3258     strings::SubstituteAndAppend(contents, " [default = $0",
3259                               DefaultValueAsString(true));
3260   }
3261   if (has_json_name_) {
3262     if (!bracketed) {
3263       bracketed = true;
3264       contents->append(" [");
3265     } else {
3266       contents->append(", ");
3267     }
3268     contents->append("json_name = \"");
3269     contents->append(CEscape(json_name()));
3270     contents->append("\"");
3271   }
3272 
3273   std::string formatted_options;
3274   if (FormatBracketedOptions(depth, options(), file()->pool(),
3275                              &formatted_options)) {
3276     contents->append(bracketed ? ", " : " [");
3277     bracketed = true;
3278     contents->append(formatted_options);
3279   }
3280 
3281   if (bracketed) {
3282     contents->append("]");
3283   }
3284 
3285   if (type() == TYPE_GROUP) {
3286     if (debug_string_options.elide_group_body) {
3287       contents->append(" { ... };\n");
3288     } else {
3289       message_type()->DebugString(depth, contents, debug_string_options,
3290                                   /* include_opening_clause */ false);
3291     }
3292   } else {
3293     contents->append(";\n");
3294   }
3295 
3296   comment_printer.AddPostComment(contents);
3297 }
3298 
DebugString() const3299 std::string OneofDescriptor::DebugString() const {
3300   DebugStringOptions options;  // default values
3301   return DebugStringWithOptions(options);
3302 }
3303 
DebugStringWithOptions(const DebugStringOptions & options) const3304 std::string OneofDescriptor::DebugStringWithOptions(
3305     const DebugStringOptions& options) const {
3306   std::string contents;
3307   DebugString(0, &contents, options);
3308   return contents;
3309 }
3310 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3311 void OneofDescriptor::DebugString(
3312     int depth, std::string* contents,
3313     const DebugStringOptions& debug_string_options) const {
3314   std::string prefix(depth * 2, ' ');
3315   ++depth;
3316   SourceLocationCommentPrinter comment_printer(this, prefix,
3317                                                debug_string_options);
3318   comment_printer.AddPreComment(contents);
3319   strings::SubstituteAndAppend(contents, "$0oneof $1 {", prefix, name());
3320 
3321   FormatLineOptions(depth, options(), containing_type()->file()->pool(),
3322                     contents);
3323 
3324   if (debug_string_options.elide_oneof_body) {
3325     contents->append(" ... }\n");
3326   } else {
3327     contents->append("\n");
3328     for (int i = 0; i < field_count(); i++) {
3329       field(i)->DebugString(depth, contents, debug_string_options);
3330     }
3331     strings::SubstituteAndAppend(contents, "$0}\n", prefix);
3332   }
3333   comment_printer.AddPostComment(contents);
3334 }
3335 
DebugString() const3336 std::string EnumDescriptor::DebugString() const {
3337   DebugStringOptions options;  // default values
3338   return DebugStringWithOptions(options);
3339 }
3340 
DebugStringWithOptions(const DebugStringOptions & options) const3341 std::string EnumDescriptor::DebugStringWithOptions(
3342     const DebugStringOptions& options) const {
3343   std::string contents;
3344   DebugString(0, &contents, options);
3345   return contents;
3346 }
3347 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3348 void EnumDescriptor::DebugString(
3349     int depth, std::string* contents,
3350     const DebugStringOptions& debug_string_options) const {
3351   std::string prefix(depth * 2, ' ');
3352   ++depth;
3353 
3354   SourceLocationCommentPrinter comment_printer(this, prefix,
3355                                                debug_string_options);
3356   comment_printer.AddPreComment(contents);
3357 
3358   strings::SubstituteAndAppend(contents, "$0enum $1 {\n", prefix, name());
3359 
3360   FormatLineOptions(depth, options(), file()->pool(), contents);
3361 
3362   for (int i = 0; i < value_count(); i++) {
3363     value(i)->DebugString(depth, contents, debug_string_options);
3364   }
3365 
3366   if (reserved_range_count() > 0) {
3367     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3368     for (int i = 0; i < reserved_range_count(); i++) {
3369       const EnumDescriptor::ReservedRange* range = reserved_range(i);
3370       if (range->end == range->start) {
3371         strings::SubstituteAndAppend(contents, "$0, ", range->start);
3372       } else if (range->end == INT_MAX) {
3373         strings::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3374       } else {
3375         strings::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3376                                   range->end);
3377       }
3378     }
3379     contents->replace(contents->size() - 2, 2, ";\n");
3380   }
3381 
3382   if (reserved_name_count() > 0) {
3383     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3384     for (int i = 0; i < reserved_name_count(); i++) {
3385       strings::SubstituteAndAppend(contents, "\"$0\", ",
3386                                 CEscape(reserved_name(i)));
3387     }
3388     contents->replace(contents->size() - 2, 2, ";\n");
3389   }
3390 
3391   strings::SubstituteAndAppend(contents, "$0}\n", prefix);
3392 
3393   comment_printer.AddPostComment(contents);
3394 }
3395 
DebugString() const3396 std::string EnumValueDescriptor::DebugString() const {
3397   DebugStringOptions options;  // default values
3398   return DebugStringWithOptions(options);
3399 }
3400 
DebugStringWithOptions(const DebugStringOptions & options) const3401 std::string EnumValueDescriptor::DebugStringWithOptions(
3402     const DebugStringOptions& options) const {
3403   std::string contents;
3404   DebugString(0, &contents, options);
3405   return contents;
3406 }
3407 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3408 void EnumValueDescriptor::DebugString(
3409     int depth, std::string* contents,
3410     const DebugStringOptions& debug_string_options) const {
3411   std::string prefix(depth * 2, ' ');
3412 
3413   SourceLocationCommentPrinter comment_printer(this, prefix,
3414                                                debug_string_options);
3415   comment_printer.AddPreComment(contents);
3416 
3417   strings::SubstituteAndAppend(contents, "$0$1 = $2", prefix, name(), number());
3418 
3419   std::string formatted_options;
3420   if (FormatBracketedOptions(depth, options(), type()->file()->pool(),
3421                              &formatted_options)) {
3422     strings::SubstituteAndAppend(contents, " [$0]", formatted_options);
3423   }
3424   contents->append(";\n");
3425 
3426   comment_printer.AddPostComment(contents);
3427 }
3428 
DebugString() const3429 std::string ServiceDescriptor::DebugString() const {
3430   DebugStringOptions options;  // default values
3431   return DebugStringWithOptions(options);
3432 }
3433 
DebugStringWithOptions(const DebugStringOptions & options) const3434 std::string ServiceDescriptor::DebugStringWithOptions(
3435     const DebugStringOptions& options) const {
3436   std::string contents;
3437   DebugString(&contents, options);
3438   return contents;
3439 }
3440 
DebugString(std::string * contents,const DebugStringOptions & debug_string_options) const3441 void ServiceDescriptor::DebugString(
3442     std::string* contents,
3443     const DebugStringOptions& debug_string_options) const {
3444   SourceLocationCommentPrinter comment_printer(this, /* prefix */ "",
3445                                                debug_string_options);
3446   comment_printer.AddPreComment(contents);
3447 
3448   strings::SubstituteAndAppend(contents, "service $0 {\n", name());
3449 
3450   FormatLineOptions(1, options(), file()->pool(), contents);
3451 
3452   for (int i = 0; i < method_count(); i++) {
3453     method(i)->DebugString(1, contents, debug_string_options);
3454   }
3455 
3456   contents->append("}\n");
3457 
3458   comment_printer.AddPostComment(contents);
3459 }
3460 
DebugString() const3461 std::string MethodDescriptor::DebugString() const {
3462   DebugStringOptions options;  // default values
3463   return DebugStringWithOptions(options);
3464 }
3465 
DebugStringWithOptions(const DebugStringOptions & options) const3466 std::string MethodDescriptor::DebugStringWithOptions(
3467     const DebugStringOptions& options) const {
3468   std::string contents;
3469   DebugString(0, &contents, options);
3470   return contents;
3471 }
3472 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3473 void MethodDescriptor::DebugString(
3474     int depth, std::string* contents,
3475     const DebugStringOptions& debug_string_options) const {
3476   std::string prefix(depth * 2, ' ');
3477   ++depth;
3478 
3479   SourceLocationCommentPrinter comment_printer(this, prefix,
3480                                                debug_string_options);
3481   comment_printer.AddPreComment(contents);
3482 
3483   strings::SubstituteAndAppend(
3484       contents, "$0rpc $1($4.$2) returns ($5.$3)", prefix, name(),
3485       input_type()->full_name(), output_type()->full_name(),
3486       client_streaming() ? "stream " : "", server_streaming() ? "stream " : "");
3487 
3488   std::string formatted_options;
3489   if (FormatLineOptions(depth, options(), service()->file()->pool(),
3490                         &formatted_options)) {
3491     strings::SubstituteAndAppend(contents, " {\n$0$1}\n", formatted_options,
3492                               prefix);
3493   } else {
3494     contents->append(";\n");
3495   }
3496 
3497   comment_printer.AddPostComment(contents);
3498 }
3499 
3500 // Location methods ===============================================
3501 
GetSourceLocation(const std::vector<int> & path,SourceLocation * out_location) const3502 bool FileDescriptor::GetSourceLocation(const std::vector<int>& path,
3503                                        SourceLocation* out_location) const {
3504   GOOGLE_CHECK(out_location != nullptr);
3505   if (source_code_info_) {
3506     if (const SourceCodeInfo_Location* loc =
3507             tables_->GetSourceLocation(path, source_code_info_)) {
3508       const RepeatedField<int32_t>& span = loc->span();
3509       if (span.size() == 3 || span.size() == 4) {
3510         out_location->start_line = span.Get(0);
3511         out_location->start_column = span.Get(1);
3512         out_location->end_line = span.Get(span.size() == 3 ? 0 : 2);
3513         out_location->end_column = span.Get(span.size() - 1);
3514 
3515         out_location->leading_comments = loc->leading_comments();
3516         out_location->trailing_comments = loc->trailing_comments();
3517         out_location->leading_detached_comments.assign(
3518             loc->leading_detached_comments().begin(),
3519             loc->leading_detached_comments().end());
3520         return true;
3521       }
3522     }
3523   }
3524   return false;
3525 }
3526 
GetSourceLocation(SourceLocation * out_location) const3527 bool FileDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3528   std::vector<int> path;  // empty path for root FileDescriptor
3529   return GetSourceLocation(path, out_location);
3530 }
3531 
is_packed() const3532 bool FieldDescriptor::is_packed() const {
3533   if (!is_packable()) return false;
3534   if (file_->syntax() == FileDescriptor::SYNTAX_PROTO2) {
3535     return (options_ != nullptr) && options_->packed();
3536   } else {
3537     return options_ == nullptr || !options_->has_packed() || options_->packed();
3538   }
3539 }
3540 
GetSourceLocation(SourceLocation * out_location) const3541 bool Descriptor::GetSourceLocation(SourceLocation* out_location) const {
3542   std::vector<int> path;
3543   GetLocationPath(&path);
3544   return file()->GetSourceLocation(path, out_location);
3545 }
3546 
GetSourceLocation(SourceLocation * out_location) const3547 bool FieldDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3548   std::vector<int> path;
3549   GetLocationPath(&path);
3550   return file()->GetSourceLocation(path, out_location);
3551 }
3552 
GetSourceLocation(SourceLocation * out_location) const3553 bool OneofDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3554   std::vector<int> path;
3555   GetLocationPath(&path);
3556   return containing_type()->file()->GetSourceLocation(path, out_location);
3557 }
3558 
GetSourceLocation(SourceLocation * out_location) const3559 bool EnumDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3560   std::vector<int> path;
3561   GetLocationPath(&path);
3562   return file()->GetSourceLocation(path, out_location);
3563 }
3564 
GetSourceLocation(SourceLocation * out_location) const3565 bool MethodDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3566   std::vector<int> path;
3567   GetLocationPath(&path);
3568   return service()->file()->GetSourceLocation(path, out_location);
3569 }
3570 
GetSourceLocation(SourceLocation * out_location) const3571 bool ServiceDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3572   std::vector<int> path;
3573   GetLocationPath(&path);
3574   return file()->GetSourceLocation(path, out_location);
3575 }
3576 
GetSourceLocation(SourceLocation * out_location) const3577 bool EnumValueDescriptor::GetSourceLocation(
3578     SourceLocation* out_location) const {
3579   std::vector<int> path;
3580   GetLocationPath(&path);
3581   return type()->file()->GetSourceLocation(path, out_location);
3582 }
3583 
GetLocationPath(std::vector<int> * output) const3584 void Descriptor::GetLocationPath(std::vector<int>* output) const {
3585   if (containing_type()) {
3586     containing_type()->GetLocationPath(output);
3587     output->push_back(DescriptorProto::kNestedTypeFieldNumber);
3588     output->push_back(index());
3589   } else {
3590     output->push_back(FileDescriptorProto::kMessageTypeFieldNumber);
3591     output->push_back(index());
3592   }
3593 }
3594 
GetLocationPath(std::vector<int> * output) const3595 void FieldDescriptor::GetLocationPath(std::vector<int>* output) const {
3596   if (is_extension()) {
3597     if (extension_scope() == nullptr) {
3598       output->push_back(FileDescriptorProto::kExtensionFieldNumber);
3599       output->push_back(index());
3600     } else {
3601       extension_scope()->GetLocationPath(output);
3602       output->push_back(DescriptorProto::kExtensionFieldNumber);
3603       output->push_back(index());
3604     }
3605   } else {
3606     containing_type()->GetLocationPath(output);
3607     output->push_back(DescriptorProto::kFieldFieldNumber);
3608     output->push_back(index());
3609   }
3610 }
3611 
GetLocationPath(std::vector<int> * output) const3612 void OneofDescriptor::GetLocationPath(std::vector<int>* output) const {
3613   containing_type()->GetLocationPath(output);
3614   output->push_back(DescriptorProto::kOneofDeclFieldNumber);
3615   output->push_back(index());
3616 }
3617 
GetLocationPath(std::vector<int> * output) const3618 void EnumDescriptor::GetLocationPath(std::vector<int>* output) const {
3619   if (containing_type()) {
3620     containing_type()->GetLocationPath(output);
3621     output->push_back(DescriptorProto::kEnumTypeFieldNumber);
3622     output->push_back(index());
3623   } else {
3624     output->push_back(FileDescriptorProto::kEnumTypeFieldNumber);
3625     output->push_back(index());
3626   }
3627 }
3628 
GetLocationPath(std::vector<int> * output) const3629 void EnumValueDescriptor::GetLocationPath(std::vector<int>* output) const {
3630   type()->GetLocationPath(output);
3631   output->push_back(EnumDescriptorProto::kValueFieldNumber);
3632   output->push_back(index());
3633 }
3634 
GetLocationPath(std::vector<int> * output) const3635 void ServiceDescriptor::GetLocationPath(std::vector<int>* output) const {
3636   output->push_back(FileDescriptorProto::kServiceFieldNumber);
3637   output->push_back(index());
3638 }
3639 
GetLocationPath(std::vector<int> * output) const3640 void MethodDescriptor::GetLocationPath(std::vector<int>* output) const {
3641   service()->GetLocationPath(output);
3642   output->push_back(ServiceDescriptorProto::kMethodFieldNumber);
3643   output->push_back(index());
3644 }
3645 
3646 // ===================================================================
3647 
3648 namespace {
3649 
3650 // Represents an options message to interpret. Extension names in the option
3651 // name are resolved relative to name_scope. element_name and orig_opt are
3652 // used only for error reporting (since the parser records locations against
3653 // pointers in the original options, not the mutable copy). The Message must be
3654 // one of the Options messages in descriptor.proto.
3655 struct OptionsToInterpret {
OptionsToInterpretgoogle::protobuf::__anon94551f7b0711::OptionsToInterpret3656   OptionsToInterpret(const std::string& ns, const std::string& el,
3657                      const std::vector<int>& path, const Message* orig_opt,
3658                      Message* opt)
3659       : name_scope(ns),
3660         element_name(el),
3661         element_path(path),
3662         original_options(orig_opt),
3663         options(opt) {}
3664   std::string name_scope;
3665   std::string element_name;
3666   std::vector<int> element_path;
3667   const Message* original_options;
3668   Message* options;
3669 };
3670 
3671 }  // namespace
3672 
3673 class DescriptorBuilder {
3674  public:
3675   DescriptorBuilder(const DescriptorPool* pool, DescriptorPool::Tables* tables,
3676                     DescriptorPool::ErrorCollector* error_collector);
3677   ~DescriptorBuilder();
3678 
3679   const FileDescriptor* BuildFile(const FileDescriptorProto& proto);
3680 
3681  private:
3682   friend class OptionInterpreter;
3683 
3684   // Non-recursive part of BuildFile functionality.
3685   FileDescriptor* BuildFileImpl(const FileDescriptorProto& proto,
3686                                 internal::FlatAllocator& alloc);
3687 
3688   const DescriptorPool* pool_;
3689   DescriptorPool::Tables* tables_;  // for convenience
3690   DescriptorPool::ErrorCollector* error_collector_;
3691 
3692   // As we build descriptors we store copies of the options messages in
3693   // them. We put pointers to those copies in this vector, as we build, so we
3694   // can later (after cross-linking) interpret those options.
3695   std::vector<OptionsToInterpret> options_to_interpret_;
3696 
3697   bool had_errors_;
3698   std::string filename_;
3699   FileDescriptor* file_;
3700   FileDescriptorTables* file_tables_;
3701   std::set<const FileDescriptor*> dependencies_;
3702 
3703   struct MessageHints {
3704     int fields_to_suggest = 0;
3705     const Message* first_reason = nullptr;
3706     DescriptorPool::ErrorCollector::ErrorLocation first_reason_location =
3707         DescriptorPool::ErrorCollector::ErrorLocation::OTHER;
3708 
RequestHintOnFieldNumbersgoogle::protobuf::DescriptorBuilder::MessageHints3709     void RequestHintOnFieldNumbers(
3710         const Message& reason,
3711         DescriptorPool::ErrorCollector::ErrorLocation reason_location,
3712         int range_start = 0, int range_end = 1) {
3713       auto fit = [](int value) {
3714         return std::min(std::max(value, 0), FieldDescriptor::kMaxNumber);
3715       };
3716       fields_to_suggest =
3717           fit(fields_to_suggest + fit(fit(range_end) - fit(range_start)));
3718       if (first_reason) return;
3719       first_reason = &reason;
3720       first_reason_location = reason_location;
3721     }
3722   };
3723 
3724   std::unordered_map<const Descriptor*, MessageHints> message_hints_;
3725 
3726   // unused_dependency_ is used to record the unused imported files.
3727   // Note: public import is not considered.
3728   std::set<const FileDescriptor*> unused_dependency_;
3729 
3730   // If LookupSymbol() finds a symbol that is in a file which is not a declared
3731   // dependency of this file, it will fail, but will set
3732   // possible_undeclared_dependency_ to point at that file.  This is only used
3733   // by AddNotDefinedError() to report a more useful error message.
3734   // possible_undeclared_dependency_name_ is the name of the symbol that was
3735   // actually found in possible_undeclared_dependency_, which may be a parent
3736   // of the symbol actually looked for.
3737   const FileDescriptor* possible_undeclared_dependency_;
3738   std::string possible_undeclared_dependency_name_;
3739 
3740   // If LookupSymbol() could resolve a symbol which is not defined,
3741   // record the resolved name.  This is only used by AddNotDefinedError()
3742   // to report a more useful error message.
3743   std::string undefine_resolved_name_;
3744 
3745   // Tracker for current recursion depth to implement recursion protection.
3746   //
3747   // Counts down to 0 when there is no depth remaining.
3748   //
3749   // Maximum recursion depth corresponds to 32 nested message declarations.
3750   int recursion_depth_ = 32;
3751 
3752   void AddError(const std::string& element_name, const Message& descriptor,
3753                 DescriptorPool::ErrorCollector::ErrorLocation location,
3754                 const std::string& error);
3755   void AddError(const std::string& element_name, const Message& descriptor,
3756                 DescriptorPool::ErrorCollector::ErrorLocation location,
3757                 const char* error);
3758   void AddRecursiveImportError(const FileDescriptorProto& proto, int from_here);
3759   void AddTwiceListedError(const FileDescriptorProto& proto, int index);
3760   void AddImportError(const FileDescriptorProto& proto, int index);
3761 
3762   // Adds an error indicating that undefined_symbol was not defined.  Must
3763   // only be called after LookupSymbol() fails.
3764   void AddNotDefinedError(
3765       const std::string& element_name, const Message& descriptor,
3766       DescriptorPool::ErrorCollector::ErrorLocation location,
3767       const std::string& undefined_symbol);
3768 
3769   void AddWarning(const std::string& element_name, const Message& descriptor,
3770                   DescriptorPool::ErrorCollector::ErrorLocation location,
3771                   const std::string& error);
3772 
3773   // Silly helper which determines if the given file is in the given package.
3774   // I.e., either file->package() == package_name or file->package() is a
3775   // nested package within package_name.
3776   bool IsInPackage(const FileDescriptor* file, const std::string& package_name);
3777 
3778   // Helper function which finds all public dependencies of the given file, and
3779   // stores the them in the dependencies_ set in the builder.
3780   void RecordPublicDependencies(const FileDescriptor* file);
3781 
3782   // Like tables_->FindSymbol(), but additionally:
3783   // - Search the pool's underlay if not found in tables_.
3784   // - Insure that the resulting Symbol is from one of the file's declared
3785   //   dependencies.
3786   Symbol FindSymbol(const std::string& name, bool build_it = true);
3787 
3788   // Like FindSymbol() but does not require that the symbol is in one of the
3789   // file's declared dependencies.
3790   Symbol FindSymbolNotEnforcingDeps(const std::string& name,
3791                                     bool build_it = true);
3792 
3793   // This implements the body of FindSymbolNotEnforcingDeps().
3794   Symbol FindSymbolNotEnforcingDepsHelper(const DescriptorPool* pool,
3795                                           const std::string& name,
3796                                           bool build_it = true);
3797 
3798   // Like FindSymbol(), but looks up the name relative to some other symbol
3799   // name.  This first searches siblings of relative_to, then siblings of its
3800   // parents, etc.  For example, LookupSymbol("foo.bar", "baz.moo.corge") makes
3801   // the following calls, returning the first non-null result:
3802   // FindSymbol("baz.moo.foo.bar"), FindSymbol("baz.foo.bar"),
3803   // FindSymbol("foo.bar").  If AllowUnknownDependencies() has been called
3804   // on the DescriptorPool, this will generate a placeholder type if
3805   // the name is not found (unless the name itself is malformed).  The
3806   // placeholder_type parameter indicates what kind of placeholder should be
3807   // constructed in this case.  The resolve_mode parameter determines whether
3808   // any symbol is returned, or only symbols that are types.  Note, however,
3809   // that LookupSymbol may still return a non-type symbol in LOOKUP_TYPES mode,
3810   // if it believes that's all it could refer to.  The caller should always
3811   // check that it receives the type of symbol it was expecting.
3812   enum ResolveMode { LOOKUP_ALL, LOOKUP_TYPES };
3813   Symbol LookupSymbol(const std::string& name, const std::string& relative_to,
3814                       DescriptorPool::PlaceholderType placeholder_type =
3815                           DescriptorPool::PLACEHOLDER_MESSAGE,
3816                       ResolveMode resolve_mode = LOOKUP_ALL,
3817                       bool build_it = true);
3818 
3819   // Like LookupSymbol() but will not return a placeholder even if
3820   // AllowUnknownDependencies() has been used.
3821   Symbol LookupSymbolNoPlaceholder(const std::string& name,
3822                                    const std::string& relative_to,
3823                                    ResolveMode resolve_mode = LOOKUP_ALL,
3824                                    bool build_it = true);
3825 
3826   // Calls tables_->AddSymbol() and records an error if it fails.  Returns
3827   // true if successful or false if failed, though most callers can ignore
3828   // the return value since an error has already been recorded.
3829   bool AddSymbol(const std::string& full_name, const void* parent,
3830                  const std::string& name, const Message& proto, Symbol symbol);
3831 
3832   // Like AddSymbol(), but succeeds if the symbol is already defined as long
3833   // as the existing definition is also a package (because it's OK to define
3834   // the same package in two different files).  Also adds all parents of the
3835   // package to the symbol table (e.g. AddPackage("foo.bar", ...) will add
3836   // "foo.bar" and "foo" to the table).
3837   void AddPackage(const std::string& name, const Message& proto,
3838                   FileDescriptor* file);
3839 
3840   // Checks that the symbol name contains only alphanumeric characters and
3841   // underscores.  Records an error otherwise.
3842   void ValidateSymbolName(const std::string& name, const std::string& full_name,
3843                           const Message& proto);
3844 
3845   // Allocates a copy of orig_options in tables_ and stores it in the
3846   // descriptor. Remembers its uninterpreted options, to be interpreted
3847   // later. DescriptorT must be one of the Descriptor messages from
3848   // descriptor.proto.
3849   template <class DescriptorT>
3850   void AllocateOptions(const typename DescriptorT::OptionsType& orig_options,
3851                        DescriptorT* descriptor, int options_field_tag,
3852                        const std::string& option_name,
3853                        internal::FlatAllocator& alloc);
3854   // Specialization for FileOptions.
3855   void AllocateOptions(const FileOptions& orig_options,
3856                        FileDescriptor* descriptor,
3857                        internal::FlatAllocator& alloc);
3858 
3859   // Implementation for AllocateOptions(). Don't call this directly.
3860   template <class DescriptorT>
3861   void AllocateOptionsImpl(
3862       const std::string& name_scope, const std::string& element_name,
3863       const typename DescriptorT::OptionsType& orig_options,
3864       DescriptorT* descriptor, const std::vector<int>& options_path,
3865       const std::string& option_name, internal::FlatAllocator& alloc);
3866 
3867   // Allocates an array of two strings, the first one is a copy of `proto_name`,
3868   // and the second one is the full name.
3869   // Full proto name is "scope.proto_name" if scope is non-empty and
3870   // "proto_name" otherwise.
3871   const std::string* AllocateNameStrings(const std::string& scope,
3872                                          const std::string& proto_name,
3873                                          internal::FlatAllocator& alloc);
3874 
3875   // These methods all have the same signature for the sake of the BUILD_ARRAY
3876   // macro, below.
3877   void BuildMessage(const DescriptorProto& proto, const Descriptor* parent,
3878                     Descriptor* result, internal::FlatAllocator& alloc);
3879   void BuildFieldOrExtension(const FieldDescriptorProto& proto,
3880                              Descriptor* parent, FieldDescriptor* result,
3881                              bool is_extension, internal::FlatAllocator& alloc);
BuildField(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,internal::FlatAllocator & alloc)3882   void BuildField(const FieldDescriptorProto& proto, Descriptor* parent,
3883                   FieldDescriptor* result, internal::FlatAllocator& alloc) {
3884     BuildFieldOrExtension(proto, parent, result, false, alloc);
3885   }
BuildExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,internal::FlatAllocator & alloc)3886   void BuildExtension(const FieldDescriptorProto& proto, Descriptor* parent,
3887                       FieldDescriptor* result, internal::FlatAllocator& alloc) {
3888     BuildFieldOrExtension(proto, parent, result, true, alloc);
3889   }
3890   void BuildExtensionRange(const DescriptorProto::ExtensionRange& proto,
3891                            const Descriptor* parent,
3892                            Descriptor::ExtensionRange* result,
3893                            internal::FlatAllocator& alloc);
3894   void BuildReservedRange(const DescriptorProto::ReservedRange& proto,
3895                           const Descriptor* parent,
3896                           Descriptor::ReservedRange* result,
3897                           internal::FlatAllocator& alloc);
3898   void BuildReservedRange(const EnumDescriptorProto::EnumReservedRange& proto,
3899                           const EnumDescriptor* parent,
3900                           EnumDescriptor::ReservedRange* result,
3901                           internal::FlatAllocator& alloc);
3902   void BuildOneof(const OneofDescriptorProto& proto, Descriptor* parent,
3903                   OneofDescriptor* result, internal::FlatAllocator& alloc);
3904   void CheckEnumValueUniqueness(const EnumDescriptorProto& proto,
3905                                 const EnumDescriptor* result);
3906   void BuildEnum(const EnumDescriptorProto& proto, const Descriptor* parent,
3907                  EnumDescriptor* result, internal::FlatAllocator& alloc);
3908   void BuildEnumValue(const EnumValueDescriptorProto& proto,
3909                       const EnumDescriptor* parent, EnumValueDescriptor* result,
3910                       internal::FlatAllocator& alloc);
3911   void BuildService(const ServiceDescriptorProto& proto, const void* dummy,
3912                     ServiceDescriptor* result, internal::FlatAllocator& alloc);
3913   void BuildMethod(const MethodDescriptorProto& proto,
3914                    const ServiceDescriptor* parent, MethodDescriptor* result,
3915                    internal::FlatAllocator& alloc);
3916 
3917   void LogUnusedDependency(const FileDescriptorProto& proto,
3918                            const FileDescriptor* result);
3919 
3920   // Must be run only after building.
3921   //
3922   // NOTE: Options will not be available during cross-linking, as they
3923   // have not yet been interpreted. Defer any handling of options to the
3924   // Validate*Options methods.
3925   void CrossLinkFile(FileDescriptor* file, const FileDescriptorProto& proto);
3926   void CrossLinkMessage(Descriptor* message, const DescriptorProto& proto);
3927   void CrossLinkField(FieldDescriptor* field,
3928                       const FieldDescriptorProto& proto);
3929   void CrossLinkExtensionRange(Descriptor::ExtensionRange* range,
3930                                const DescriptorProto::ExtensionRange& proto);
3931   void CrossLinkEnum(EnumDescriptor* enum_type,
3932                      const EnumDescriptorProto& proto);
3933   void CrossLinkEnumValue(EnumValueDescriptor* enum_value,
3934                           const EnumValueDescriptorProto& proto);
3935   void CrossLinkService(ServiceDescriptor* service,
3936                         const ServiceDescriptorProto& proto);
3937   void CrossLinkMethod(MethodDescriptor* method,
3938                        const MethodDescriptorProto& proto);
3939   void SuggestFieldNumbers(FileDescriptor* file,
3940                            const FileDescriptorProto& proto);
3941 
3942   // Must be run only after cross-linking.
3943   void InterpretOptions();
3944 
3945   // A helper class for interpreting options.
3946   class OptionInterpreter {
3947    public:
3948     // Creates an interpreter that operates in the context of the pool of the
3949     // specified builder, which must not be nullptr. We don't take ownership of
3950     // the builder.
3951     explicit OptionInterpreter(DescriptorBuilder* builder);
3952 
3953     ~OptionInterpreter();
3954 
3955     // Interprets the uninterpreted options in the specified Options message.
3956     // On error, calls AddError() on the underlying builder and returns false.
3957     // Otherwise returns true.
3958     bool InterpretOptions(OptionsToInterpret* options_to_interpret);
3959 
3960     // Updates the given source code info by re-writing uninterpreted option
3961     // locations to refer to the corresponding interpreted option.
3962     void UpdateSourceCodeInfo(SourceCodeInfo* info);
3963 
3964     class AggregateOptionFinder;
3965 
3966    private:
3967     // Interprets uninterpreted_option_ on the specified message, which
3968     // must be the mutable copy of the original options message to which
3969     // uninterpreted_option_ belongs. The given src_path is the source
3970     // location path to the uninterpreted option, and options_path is the
3971     // source location path to the options message. The location paths are
3972     // recorded and then used in UpdateSourceCodeInfo.
3973     bool InterpretSingleOption(Message* options,
3974                                const std::vector<int>& src_path,
3975                                const std::vector<int>& options_path);
3976 
3977     // Adds the uninterpreted_option to the given options message verbatim.
3978     // Used when AllowUnknownDependencies() is in effect and we can't find
3979     // the option's definition.
3980     void AddWithoutInterpreting(const UninterpretedOption& uninterpreted_option,
3981                                 Message* options);
3982 
3983     // A recursive helper function that drills into the intermediate fields
3984     // in unknown_fields to check if field innermost_field is set on the
3985     // innermost message. Returns false and sets an error if so.
3986     bool ExamineIfOptionIsSet(
3987         std::vector<const FieldDescriptor*>::const_iterator
3988             intermediate_fields_iter,
3989         std::vector<const FieldDescriptor*>::const_iterator
3990             intermediate_fields_end,
3991         const FieldDescriptor* innermost_field,
3992         const std::string& debug_msg_name,
3993         const UnknownFieldSet& unknown_fields);
3994 
3995     // Validates the value for the option field of the currently interpreted
3996     // option and then sets it on the unknown_field.
3997     bool SetOptionValue(const FieldDescriptor* option_field,
3998                         UnknownFieldSet* unknown_fields);
3999 
4000     // Parses an aggregate value for a CPPTYPE_MESSAGE option and
4001     // saves it into *unknown_fields.
4002     bool SetAggregateOption(const FieldDescriptor* option_field,
4003                             UnknownFieldSet* unknown_fields);
4004 
4005     // Convenience functions to set an int field the right way, depending on
4006     // its wire type (a single int CppType can represent multiple wire types).
4007     void SetInt32(int number, int32_t value, FieldDescriptor::Type type,
4008                   UnknownFieldSet* unknown_fields);
4009     void SetInt64(int number, int64_t value, FieldDescriptor::Type type,
4010                   UnknownFieldSet* unknown_fields);
4011     void SetUInt32(int number, uint32_t value, FieldDescriptor::Type type,
4012                    UnknownFieldSet* unknown_fields);
4013     void SetUInt64(int number, uint64_t value, FieldDescriptor::Type type,
4014                    UnknownFieldSet* unknown_fields);
4015 
4016     // A helper function that adds an error at the specified location of the
4017     // option we're currently interpreting, and returns false.
AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & msg)4018     bool AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,
4019                         const std::string& msg) {
4020       builder_->AddError(options_to_interpret_->element_name,
4021                          *uninterpreted_option_, location, msg);
4022       return false;
4023     }
4024 
4025     // A helper function that adds an error at the location of the option name
4026     // and returns false.
AddNameError(const std::string & msg)4027     bool AddNameError(const std::string& msg) {
4028 #ifdef PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4029       return true;
4030 #else   // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4031       return AddOptionError(DescriptorPool::ErrorCollector::OPTION_NAME, msg);
4032 #endif  // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
4033     }
4034 
4035     // A helper function that adds an error at the location of the option name
4036     // and returns false.
AddValueError(const std::string & msg)4037     bool AddValueError(const std::string& msg) {
4038       return AddOptionError(DescriptorPool::ErrorCollector::OPTION_VALUE, msg);
4039     }
4040 
4041     // We interpret against this builder's pool. Is never nullptr. We don't own
4042     // this pointer.
4043     DescriptorBuilder* builder_;
4044 
4045     // The options we're currently interpreting, or nullptr if we're not in a
4046     // call to InterpretOptions.
4047     const OptionsToInterpret* options_to_interpret_;
4048 
4049     // The option we're currently interpreting within options_to_interpret_, or
4050     // nullptr if we're not in a call to InterpretOptions(). This points to a
4051     // submessage of the original option, not the mutable copy. Therefore we
4052     // can use it to find locations recorded by the parser.
4053     const UninterpretedOption* uninterpreted_option_;
4054 
4055     // This maps the element path of uninterpreted options to the element path
4056     // of the resulting interpreted option. This is used to modify a file's
4057     // source code info to account for option interpretation.
4058     std::map<std::vector<int>, std::vector<int>> interpreted_paths_;
4059 
4060     // This maps the path to a repeated option field to the known number of
4061     // elements the field contains. This is used to track the compute the
4062     // index portion of the element path when interpreting a single option.
4063     std::map<std::vector<int>, int> repeated_option_counts_;
4064 
4065     // Factory used to create the dynamic messages we need to parse
4066     // any aggregate option values we encounter.
4067     DynamicMessageFactory dynamic_factory_;
4068 
4069     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OptionInterpreter);
4070   };
4071 
4072   // Work-around for broken compilers:  According to the C++ standard,
4073   // OptionInterpreter should have access to the private members of any class
4074   // which has declared DescriptorBuilder as a friend.  Unfortunately some old
4075   // versions of GCC and other compilers do not implement this correctly.  So,
4076   // we have to have these intermediate methods to provide access.  We also
4077   // redundantly declare OptionInterpreter a friend just to make things extra
4078   // clear for these bad compilers.
4079   friend class OptionInterpreter;
4080   friend class OptionInterpreter::AggregateOptionFinder;
4081 
get_allow_unknown(const DescriptorPool * pool)4082   static inline bool get_allow_unknown(const DescriptorPool* pool) {
4083     return pool->allow_unknown_;
4084   }
get_enforce_weak(const DescriptorPool * pool)4085   static inline bool get_enforce_weak(const DescriptorPool* pool) {
4086     return pool->enforce_weak_;
4087   }
get_is_placeholder(const Descriptor * descriptor)4088   static inline bool get_is_placeholder(const Descriptor* descriptor) {
4089     return descriptor != nullptr && descriptor->is_placeholder_;
4090   }
assert_mutex_held(const DescriptorPool * pool)4091   static inline void assert_mutex_held(const DescriptorPool* pool) {
4092     if (pool->mutex_ != nullptr) {
4093       pool->mutex_->AssertHeld();
4094     }
4095   }
4096 
4097   // Must be run only after options have been interpreted.
4098   //
4099   // NOTE: Validation code must only reference the options in the mutable
4100   // descriptors, which are the ones that have been interpreted. The const
4101   // proto references are passed in only so they can be provided to calls to
4102   // AddError(). Do not look at their options, which have not been interpreted.
4103   void ValidateFileOptions(FileDescriptor* file,
4104                            const FileDescriptorProto& proto);
4105   void ValidateMessageOptions(Descriptor* message,
4106                               const DescriptorProto& proto);
4107   void ValidateFieldOptions(FieldDescriptor* field,
4108                             const FieldDescriptorProto& proto);
4109   void ValidateEnumOptions(EnumDescriptor* enm,
4110                            const EnumDescriptorProto& proto);
4111   void ValidateEnumValueOptions(EnumValueDescriptor* enum_value,
4112                                 const EnumValueDescriptorProto& proto);
4113   void ValidateExtensionRangeOptions(
4114       const std::string& full_name, Descriptor::ExtensionRange* extension_range,
4115       const DescriptorProto_ExtensionRange& proto);
4116   void ValidateServiceOptions(ServiceDescriptor* service,
4117                               const ServiceDescriptorProto& proto);
4118   void ValidateMethodOptions(MethodDescriptor* method,
4119                              const MethodDescriptorProto& proto);
4120   void ValidateProto3(FileDescriptor* file, const FileDescriptorProto& proto);
4121   void ValidateProto3Message(Descriptor* message, const DescriptorProto& proto);
4122   void ValidateProto3Field(FieldDescriptor* field,
4123                            const FieldDescriptorProto& proto);
4124   void ValidateProto3Enum(EnumDescriptor* enm,
4125                           const EnumDescriptorProto& proto);
4126 
4127   // Returns true if the map entry message is compatible with the
4128   // auto-generated entry message from map fields syntax.
4129   bool ValidateMapEntry(FieldDescriptor* field,
4130                         const FieldDescriptorProto& proto);
4131 
4132   // Recursively detects naming conflicts with map entry types for a
4133   // better error message.
4134   void DetectMapConflicts(const Descriptor* message,
4135                           const DescriptorProto& proto);
4136 
4137   void ValidateJSType(FieldDescriptor* field,
4138                       const FieldDescriptorProto& proto);
4139 };
4140 
BuildFile(const FileDescriptorProto & proto)4141 const FileDescriptor* DescriptorPool::BuildFile(
4142     const FileDescriptorProto& proto) {
4143   GOOGLE_CHECK(fallback_database_ == nullptr)
4144       << "Cannot call BuildFile on a DescriptorPool that uses a "
4145          "DescriptorDatabase.  You must instead find a way to get your file "
4146          "into the underlying database.";
4147   GOOGLE_CHECK(mutex_ == nullptr);  // Implied by the above GOOGLE_CHECK.
4148   tables_->known_bad_symbols_.clear();
4149   tables_->known_bad_files_.clear();
4150   return DescriptorBuilder(this, tables_.get(), nullptr).BuildFile(proto);
4151 }
4152 
BuildFileCollectingErrors(const FileDescriptorProto & proto,ErrorCollector * error_collector)4153 const FileDescriptor* DescriptorPool::BuildFileCollectingErrors(
4154     const FileDescriptorProto& proto, ErrorCollector* error_collector) {
4155   GOOGLE_CHECK(fallback_database_ == nullptr)
4156       << "Cannot call BuildFile on a DescriptorPool that uses a "
4157          "DescriptorDatabase.  You must instead find a way to get your file "
4158          "into the underlying database.";
4159   GOOGLE_CHECK(mutex_ == nullptr);  // Implied by the above GOOGLE_CHECK.
4160   tables_->known_bad_symbols_.clear();
4161   tables_->known_bad_files_.clear();
4162   return DescriptorBuilder(this, tables_.get(), error_collector)
4163       .BuildFile(proto);
4164 }
4165 
BuildFileFromDatabase(const FileDescriptorProto & proto) const4166 const FileDescriptor* DescriptorPool::BuildFileFromDatabase(
4167     const FileDescriptorProto& proto) const {
4168   mutex_->AssertHeld();
4169   if (tables_->known_bad_files_.count(proto.name()) > 0) {
4170     return nullptr;
4171   }
4172   const FileDescriptor* result =
4173       DescriptorBuilder(this, tables_.get(), default_error_collector_)
4174           .BuildFile(proto);
4175   if (result == nullptr) {
4176     tables_->known_bad_files_.insert(proto.name());
4177   }
4178   return result;
4179 }
4180 
DescriptorBuilder(const DescriptorPool * pool,DescriptorPool::Tables * tables,DescriptorPool::ErrorCollector * error_collector)4181 DescriptorBuilder::DescriptorBuilder(
4182     const DescriptorPool* pool, DescriptorPool::Tables* tables,
4183     DescriptorPool::ErrorCollector* error_collector)
4184     : pool_(pool),
4185       tables_(tables),
4186       error_collector_(error_collector),
4187       had_errors_(false),
4188       possible_undeclared_dependency_(nullptr),
4189       undefine_resolved_name_("") {}
4190 
~DescriptorBuilder()4191 DescriptorBuilder::~DescriptorBuilder() {}
4192 
AddError(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & error)4193 void DescriptorBuilder::AddError(
4194     const std::string& element_name, const Message& descriptor,
4195     DescriptorPool::ErrorCollector::ErrorLocation location,
4196     const std::string& error) {
4197   if (error_collector_ == nullptr) {
4198     if (!had_errors_) {
4199       GOOGLE_LOG(ERROR) << "Invalid proto descriptor for file \"" << filename_
4200                  << "\":";
4201     }
4202     GOOGLE_LOG(ERROR) << "  " << element_name << ": " << error;
4203   } else {
4204     error_collector_->AddError(filename_, element_name, &descriptor, location,
4205                                error);
4206   }
4207   had_errors_ = true;
4208 }
4209 
AddError(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const char * error)4210 void DescriptorBuilder::AddError(
4211     const std::string& element_name, const Message& descriptor,
4212     DescriptorPool::ErrorCollector::ErrorLocation location, const char* error) {
4213   AddError(element_name, descriptor, location, std::string(error));
4214 }
4215 
AddNotDefinedError(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & undefined_symbol)4216 void DescriptorBuilder::AddNotDefinedError(
4217     const std::string& element_name, const Message& descriptor,
4218     DescriptorPool::ErrorCollector::ErrorLocation location,
4219     const std::string& undefined_symbol) {
4220   if (possible_undeclared_dependency_ == nullptr &&
4221       undefine_resolved_name_.empty()) {
4222     AddError(element_name, descriptor, location,
4223              "\"" + undefined_symbol + "\" is not defined.");
4224   } else {
4225     if (possible_undeclared_dependency_ != nullptr) {
4226       AddError(element_name, descriptor, location,
4227                "\"" + possible_undeclared_dependency_name_ +
4228                    "\" seems to be defined in \"" +
4229                    possible_undeclared_dependency_->name() +
4230                    "\", which is not "
4231                    "imported by \"" +
4232                    filename_ +
4233                    "\".  To use it here, please "
4234                    "add the necessary import.");
4235     }
4236     if (!undefine_resolved_name_.empty()) {
4237       AddError(element_name, descriptor, location,
4238                "\"" + undefined_symbol + "\" is resolved to \"" +
4239                    undefine_resolved_name_ +
4240                    "\", which is not defined. "
4241                    "The innermost scope is searched first in name resolution. "
4242                    "Consider using a leading '.'(i.e., \"." +
4243                    undefined_symbol + "\") to start from the outermost scope.");
4244     }
4245   }
4246 }
4247 
AddWarning(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & error)4248 void DescriptorBuilder::AddWarning(
4249     const std::string& element_name, const Message& descriptor,
4250     DescriptorPool::ErrorCollector::ErrorLocation location,
4251     const std::string& error) {
4252   if (error_collector_ == nullptr) {
4253     GOOGLE_LOG(WARNING) << filename_ << " " << element_name << ": " << error;
4254   } else {
4255     error_collector_->AddWarning(filename_, element_name, &descriptor, location,
4256                                  error);
4257   }
4258 }
4259 
IsInPackage(const FileDescriptor * file,const std::string & package_name)4260 bool DescriptorBuilder::IsInPackage(const FileDescriptor* file,
4261                                     const std::string& package_name) {
4262   return HasPrefixString(file->package(), package_name) &&
4263          (file->package().size() == package_name.size() ||
4264           file->package()[package_name.size()] == '.');
4265 }
4266 
RecordPublicDependencies(const FileDescriptor * file)4267 void DescriptorBuilder::RecordPublicDependencies(const FileDescriptor* file) {
4268   if (file == nullptr || !dependencies_.insert(file).second) return;
4269   for (int i = 0; file != nullptr && i < file->public_dependency_count(); i++) {
4270     RecordPublicDependencies(file->public_dependency(i));
4271   }
4272 }
4273 
FindSymbolNotEnforcingDepsHelper(const DescriptorPool * pool,const std::string & name,bool build_it)4274 Symbol DescriptorBuilder::FindSymbolNotEnforcingDepsHelper(
4275     const DescriptorPool* pool, const std::string& name, bool build_it) {
4276   // If we are looking at an underlay, we must lock its mutex_, since we are
4277   // accessing the underlay's tables_ directly.
4278   MutexLockMaybe lock((pool == pool_) ? nullptr : pool->mutex_);
4279 
4280   Symbol result = pool->tables_->FindSymbol(name);
4281   if (result.IsNull() && pool->underlay_ != nullptr) {
4282     // Symbol not found; check the underlay.
4283     result = FindSymbolNotEnforcingDepsHelper(pool->underlay_, name);
4284   }
4285 
4286   if (result.IsNull()) {
4287     // With lazily_build_dependencies_, a symbol lookup at cross link time is
4288     // not guaranteed to be successful. In most cases, build_it will be false,
4289     // which intentionally prevents us from building an import until it's
4290     // actually needed. In some cases, like registering an extension, we want
4291     // to build the file containing the symbol, and build_it will be set.
4292     // Also, build_it will be true when !lazily_build_dependencies_, to provide
4293     // better error reporting of missing dependencies.
4294     if (build_it && pool->TryFindSymbolInFallbackDatabase(name)) {
4295       result = pool->tables_->FindSymbol(name);
4296     }
4297   }
4298 
4299   return result;
4300 }
4301 
FindSymbolNotEnforcingDeps(const std::string & name,bool build_it)4302 Symbol DescriptorBuilder::FindSymbolNotEnforcingDeps(const std::string& name,
4303                                                      bool build_it) {
4304   Symbol result = FindSymbolNotEnforcingDepsHelper(pool_, name, build_it);
4305   // Only find symbols which were defined in this file or one of its
4306   // dependencies.
4307   const FileDescriptor* file = result.GetFile();
4308   if (file == file_ || dependencies_.count(file) > 0) {
4309     unused_dependency_.erase(file);
4310   }
4311   return result;
4312 }
4313 
FindSymbol(const std::string & name,bool build_it)4314 Symbol DescriptorBuilder::FindSymbol(const std::string& name, bool build_it) {
4315   Symbol result = FindSymbolNotEnforcingDeps(name, build_it);
4316 
4317   if (result.IsNull()) return result;
4318 
4319   if (!pool_->enforce_dependencies_) {
4320     // Hack for CompilerUpgrader, and also used for lazily_build_dependencies_
4321     return result;
4322   }
4323 
4324   // Only find symbols which were defined in this file or one of its
4325   // dependencies.
4326   const FileDescriptor* file = result.GetFile();
4327   if (file == file_ || dependencies_.count(file) > 0) {
4328     return result;
4329   }
4330 
4331   if (result.IsPackage()) {
4332     // Arg, this is overcomplicated.  The symbol is a package name.  It could
4333     // be that the package was defined in multiple files.  result.GetFile()
4334     // returns the first file we saw that used this package.  We've determined
4335     // that that file is not a direct dependency of the file we are currently
4336     // building, but it could be that some other file which *is* a direct
4337     // dependency also defines the same package.  We can't really rule out this
4338     // symbol unless none of the dependencies define it.
4339     if (IsInPackage(file_, name)) return result;
4340     for (std::set<const FileDescriptor*>::const_iterator it =
4341              dependencies_.begin();
4342          it != dependencies_.end(); ++it) {
4343       // Note:  A dependency may be nullptr if it was not found or had errors.
4344       if (*it != nullptr && IsInPackage(*it, name)) return result;
4345     }
4346   }
4347 
4348   possible_undeclared_dependency_ = file;
4349   possible_undeclared_dependency_name_ = name;
4350   return Symbol();
4351 }
4352 
LookupSymbolNoPlaceholder(const std::string & name,const std::string & relative_to,ResolveMode resolve_mode,bool build_it)4353 Symbol DescriptorBuilder::LookupSymbolNoPlaceholder(
4354     const std::string& name, const std::string& relative_to,
4355     ResolveMode resolve_mode, bool build_it) {
4356   possible_undeclared_dependency_ = nullptr;
4357   undefine_resolved_name_.clear();
4358 
4359   if (!name.empty() && name[0] == '.') {
4360     // Fully-qualified name.
4361     return FindSymbol(name.substr(1), build_it);
4362   }
4363 
4364   // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
4365   // defined in multiple parent scopes, we only want to find "Bar.baz" in the
4366   // innermost one.  E.g., the following should produce an error:
4367   //   message Bar { message Baz {} }
4368   //   message Foo {
4369   //     message Bar {
4370   //     }
4371   //     optional Bar.Baz baz = 1;
4372   //   }
4373   // So, we look for just "Foo" first, then look for "Bar.baz" within it if
4374   // found.
4375   std::string::size_type name_dot_pos = name.find_first_of('.');
4376   std::string first_part_of_name;
4377   if (name_dot_pos == std::string::npos) {
4378     first_part_of_name = name;
4379   } else {
4380     first_part_of_name = name.substr(0, name_dot_pos);
4381   }
4382 
4383   std::string scope_to_try(relative_to);
4384 
4385   while (true) {
4386     // Chop off the last component of the scope.
4387     std::string::size_type dot_pos = scope_to_try.find_last_of('.');
4388     if (dot_pos == std::string::npos) {
4389       return FindSymbol(name, build_it);
4390     } else {
4391       scope_to_try.erase(dot_pos);
4392     }
4393 
4394     // Append ".first_part_of_name" and try to find.
4395     std::string::size_type old_size = scope_to_try.size();
4396     scope_to_try.append(1, '.');
4397     scope_to_try.append(first_part_of_name);
4398     Symbol result = FindSymbol(scope_to_try, build_it);
4399     if (!result.IsNull()) {
4400       if (first_part_of_name.size() < name.size()) {
4401         // name is a compound symbol, of which we only found the first part.
4402         // Now try to look up the rest of it.
4403         if (result.IsAggregate()) {
4404           scope_to_try.append(name, first_part_of_name.size(),
4405                               name.size() - first_part_of_name.size());
4406           result = FindSymbol(scope_to_try, build_it);
4407           if (result.IsNull()) {
4408             undefine_resolved_name_ = scope_to_try;
4409           }
4410           return result;
4411         } else {
4412           // We found a symbol but it's not an aggregate.  Continue the loop.
4413         }
4414       } else {
4415         if (resolve_mode == LOOKUP_TYPES && !result.IsType()) {
4416           // We found a symbol but it's not a type.  Continue the loop.
4417         } else {
4418           return result;
4419         }
4420       }
4421     }
4422 
4423     // Not found.  Remove the name so we can try again.
4424     scope_to_try.erase(old_size);
4425   }
4426 }
4427 
LookupSymbol(const std::string & name,const std::string & relative_to,DescriptorPool::PlaceholderType placeholder_type,ResolveMode resolve_mode,bool build_it)4428 Symbol DescriptorBuilder::LookupSymbol(
4429     const std::string& name, const std::string& relative_to,
4430     DescriptorPool::PlaceholderType placeholder_type, ResolveMode resolve_mode,
4431     bool build_it) {
4432   Symbol result =
4433       LookupSymbolNoPlaceholder(name, relative_to, resolve_mode, build_it);
4434   if (result.IsNull() && pool_->allow_unknown_) {
4435     // Not found, but AllowUnknownDependencies() is enabled.  Return a
4436     // placeholder instead.
4437     result = pool_->NewPlaceholderWithMutexHeld(name, placeholder_type);
4438   }
4439   return result;
4440 }
4441 
ValidateQualifiedName(StringPiece name)4442 static bool ValidateQualifiedName(StringPiece name) {
4443   bool last_was_period = false;
4444 
4445   for (char character : name) {
4446     // I don't trust isalnum() due to locales.  :(
4447     if (('a' <= character && character <= 'z') ||
4448         ('A' <= character && character <= 'Z') ||
4449         ('0' <= character && character <= '9') || (character == '_')) {
4450       last_was_period = false;
4451     } else if (character == '.') {
4452       if (last_was_period) return false;
4453       last_was_period = true;
4454     } else {
4455       return false;
4456     }
4457   }
4458 
4459   return !name.empty() && !last_was_period;
4460 }
4461 
NewPlaceholder(StringPiece name,PlaceholderType placeholder_type) const4462 Symbol DescriptorPool::NewPlaceholder(StringPiece name,
4463                                       PlaceholderType placeholder_type) const {
4464   MutexLockMaybe lock(mutex_);
4465   return NewPlaceholderWithMutexHeld(name, placeholder_type);
4466 }
4467 
NewPlaceholderWithMutexHeld(StringPiece name,PlaceholderType placeholder_type) const4468 Symbol DescriptorPool::NewPlaceholderWithMutexHeld(
4469     StringPiece name, PlaceholderType placeholder_type) const {
4470   if (mutex_) {
4471     mutex_->AssertHeld();
4472   }
4473   // Compute names.
4474   StringPiece placeholder_full_name;
4475   StringPiece placeholder_name;
4476   const std::string* placeholder_package;
4477 
4478   if (!ValidateQualifiedName(name)) return Symbol();
4479   if (name[0] == '.') {
4480     // Fully-qualified.
4481     placeholder_full_name = name.substr(1);
4482   } else {
4483     placeholder_full_name = name;
4484   }
4485 
4486   // Create the placeholders.
4487   internal::FlatAllocator alloc;
4488   alloc.PlanArray<FileDescriptor>(1);
4489   alloc.PlanArray<std::string>(2);
4490   if (placeholder_type == PLACEHOLDER_ENUM) {
4491     alloc.PlanArray<EnumDescriptor>(1);
4492     alloc.PlanArray<EnumValueDescriptor>(1);
4493     alloc.PlanArray<std::string>(2);  // names for the descriptor.
4494     alloc.PlanArray<std::string>(2);  // names for the value.
4495   } else {
4496     alloc.PlanArray<Descriptor>(1);
4497     alloc.PlanArray<std::string>(2);  // names for the descriptor.
4498     if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
4499       alloc.PlanArray<Descriptor::ExtensionRange>(1);
4500     }
4501   }
4502   alloc.FinalizePlanning(tables_);
4503 
4504   const std::string::size_type dotpos = placeholder_full_name.find_last_of('.');
4505   if (dotpos != std::string::npos) {
4506     placeholder_package =
4507         alloc.AllocateStrings(placeholder_full_name.substr(0, dotpos));
4508     placeholder_name = placeholder_full_name.substr(dotpos + 1);
4509   } else {
4510     placeholder_package = alloc.AllocateStrings("");
4511     placeholder_name = placeholder_full_name;
4512   }
4513 
4514   FileDescriptor* placeholder_file = NewPlaceholderFileWithMutexHeld(
4515       StrCat(placeholder_full_name, ".placeholder.proto"), alloc);
4516   placeholder_file->package_ = placeholder_package;
4517 
4518   if (placeholder_type == PLACEHOLDER_ENUM) {
4519     placeholder_file->enum_type_count_ = 1;
4520     placeholder_file->enum_types_ = alloc.AllocateArray<EnumDescriptor>(1);
4521 
4522     EnumDescriptor* placeholder_enum = &placeholder_file->enum_types_[0];
4523     memset(static_cast<void*>(placeholder_enum), 0, sizeof(*placeholder_enum));
4524 
4525     placeholder_enum->all_names_ =
4526         alloc.AllocateStrings(placeholder_name, placeholder_full_name);
4527     placeholder_enum->file_ = placeholder_file;
4528     placeholder_enum->options_ = &EnumOptions::default_instance();
4529     placeholder_enum->is_placeholder_ = true;
4530     placeholder_enum->is_unqualified_placeholder_ = (name[0] != '.');
4531 
4532     // Enums must have at least one value.
4533     placeholder_enum->value_count_ = 1;
4534     placeholder_enum->values_ = alloc.AllocateArray<EnumValueDescriptor>(1);
4535     // Disable fast-path lookup for this enum.
4536     placeholder_enum->sequential_value_limit_ = -1;
4537 
4538     EnumValueDescriptor* placeholder_value = &placeholder_enum->values_[0];
4539     memset(static_cast<void*>(placeholder_value), 0,
4540            sizeof(*placeholder_value));
4541 
4542     // Note that enum value names are siblings of their type, not children.
4543     placeholder_value->all_names_ = alloc.AllocateStrings(
4544         "PLACEHOLDER_VALUE", placeholder_package->empty()
4545                                  ? "PLACEHOLDER_VALUE"
4546                                  : *placeholder_package + ".PLACEHOLDER_VALUE");
4547 
4548     placeholder_value->number_ = 0;
4549     placeholder_value->type_ = placeholder_enum;
4550     placeholder_value->options_ = &EnumValueOptions::default_instance();
4551 
4552     return Symbol(placeholder_enum);
4553   } else {
4554     placeholder_file->message_type_count_ = 1;
4555     placeholder_file->message_types_ = alloc.AllocateArray<Descriptor>(1);
4556 
4557     Descriptor* placeholder_message = &placeholder_file->message_types_[0];
4558     memset(static_cast<void*>(placeholder_message), 0,
4559            sizeof(*placeholder_message));
4560 
4561     placeholder_message->all_names_ =
4562         alloc.AllocateStrings(placeholder_name, placeholder_full_name);
4563     placeholder_message->file_ = placeholder_file;
4564     placeholder_message->options_ = &MessageOptions::default_instance();
4565     placeholder_message->is_placeholder_ = true;
4566     placeholder_message->is_unqualified_placeholder_ = (name[0] != '.');
4567 
4568     if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
4569       placeholder_message->extension_range_count_ = 1;
4570       placeholder_message->extension_ranges_ =
4571           alloc.AllocateArray<Descriptor::ExtensionRange>(1);
4572       placeholder_message->extension_ranges_[0].start = 1;
4573       // kMaxNumber + 1 because ExtensionRange::end is exclusive.
4574       placeholder_message->extension_ranges_[0].end =
4575           FieldDescriptor::kMaxNumber + 1;
4576       placeholder_message->extension_ranges_[0].options_ = nullptr;
4577     }
4578 
4579     return Symbol(placeholder_message);
4580   }
4581 }
4582 
NewPlaceholderFile(StringPiece name) const4583 FileDescriptor* DescriptorPool::NewPlaceholderFile(
4584     StringPiece name) const {
4585   MutexLockMaybe lock(mutex_);
4586   internal::FlatAllocator alloc;
4587   alloc.PlanArray<FileDescriptor>(1);
4588   alloc.PlanArray<std::string>(1);
4589   alloc.FinalizePlanning(tables_);
4590 
4591   return NewPlaceholderFileWithMutexHeld(name, alloc);
4592 }
4593 
NewPlaceholderFileWithMutexHeld(StringPiece name,internal::FlatAllocator & alloc) const4594 FileDescriptor* DescriptorPool::NewPlaceholderFileWithMutexHeld(
4595     StringPiece name, internal::FlatAllocator& alloc) const {
4596   if (mutex_) {
4597     mutex_->AssertHeld();
4598   }
4599   FileDescriptor* placeholder = alloc.AllocateArray<FileDescriptor>(1);
4600   memset(static_cast<void*>(placeholder), 0, sizeof(*placeholder));
4601 
4602   placeholder->name_ = alloc.AllocateStrings(name);
4603   placeholder->package_ = &internal::GetEmptyString();
4604   placeholder->pool_ = this;
4605   placeholder->options_ = &FileOptions::default_instance();
4606   placeholder->tables_ = &FileDescriptorTables::GetEmptyInstance();
4607   placeholder->source_code_info_ = &SourceCodeInfo::default_instance();
4608   placeholder->is_placeholder_ = true;
4609   placeholder->syntax_ = FileDescriptor::SYNTAX_UNKNOWN;
4610   placeholder->finished_building_ = true;
4611   // All other fields are zero or nullptr.
4612 
4613   return placeholder;
4614 }
4615 
AddSymbol(const std::string & full_name,const void * parent,const std::string & name,const Message & proto,Symbol symbol)4616 bool DescriptorBuilder::AddSymbol(const std::string& full_name,
4617                                   const void* parent, const std::string& name,
4618                                   const Message& proto, Symbol symbol) {
4619   // If the caller passed nullptr for the parent, the symbol is at file scope.
4620   // Use its file as the parent instead.
4621   if (parent == nullptr) parent = file_;
4622 
4623   if (full_name.find('\0') != std::string::npos) {
4624     AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4625              "\"" + full_name + "\" contains null character.");
4626     return false;
4627   }
4628   if (tables_->AddSymbol(full_name, symbol)) {
4629     if (!file_tables_->AddAliasUnderParent(parent, name, symbol)) {
4630       // This is only possible if there was already an error adding something of
4631       // the same name.
4632       if (!had_errors_) {
4633         GOOGLE_LOG(DFATAL) << "\"" << full_name
4634                     << "\" not previously defined in "
4635                        "symbols_by_name_, but was defined in "
4636                        "symbols_by_parent_; this shouldn't be possible.";
4637       }
4638       return false;
4639     }
4640     return true;
4641   } else {
4642     const FileDescriptor* other_file = tables_->FindSymbol(full_name).GetFile();
4643     if (other_file == file_) {
4644       std::string::size_type dot_pos = full_name.find_last_of('.');
4645       if (dot_pos == std::string::npos) {
4646         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4647                  "\"" + full_name + "\" is already defined.");
4648       } else {
4649         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4650                  "\"" + full_name.substr(dot_pos + 1) +
4651                      "\" is already defined in \"" +
4652                      full_name.substr(0, dot_pos) + "\".");
4653       }
4654     } else {
4655       // Symbol seems to have been defined in a different file.
4656       AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4657                "\"" + full_name + "\" is already defined in file \"" +
4658                    (other_file == nullptr ? "null" : other_file->name()) +
4659                    "\".");
4660     }
4661     return false;
4662   }
4663 }
4664 
AddPackage(const std::string & name,const Message & proto,FileDescriptor * file)4665 void DescriptorBuilder::AddPackage(const std::string& name,
4666                                    const Message& proto, FileDescriptor* file) {
4667   if (name.find('\0') != std::string::npos) {
4668     AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
4669              "\"" + name + "\" contains null character.");
4670     return;
4671   }
4672 
4673   Symbol existing_symbol = tables_->FindSymbol(name);
4674   // It's OK to redefine a package.
4675   if (existing_symbol.IsNull()) {
4676     if (&name == &file->package()) {
4677       // It is the toplevel package name, so insert the descriptor directly.
4678       tables_->AddSymbol(file->package(), Symbol(file));
4679     } else {
4680       auto* package = tables_->Allocate<Symbol::Subpackage>();
4681       // If the name is the package name, then it is already in the arena.
4682       // If not, copy it there. It came from the call to AddPackage below.
4683       package->name_size = static_cast<int>(name.size());
4684       package->file = file;
4685       tables_->AddSymbol(name, Symbol(package));
4686     }
4687     // Also add parent package, if any.
4688     std::string::size_type dot_pos = name.find_last_of('.');
4689     if (dot_pos == std::string::npos) {
4690       // No parents.
4691       ValidateSymbolName(name, name, proto);
4692     } else {
4693       // Has parent.
4694       AddPackage(name.substr(0, dot_pos), proto, file);
4695       ValidateSymbolName(name.substr(dot_pos + 1), name, proto);
4696     }
4697   } else if (!existing_symbol.IsPackage()) {
4698     // Symbol seems to have been defined in a different file.
4699     const FileDescriptor* other_file = existing_symbol.GetFile();
4700     AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
4701              "\"" + name +
4702                  "\" is already defined (as something other than "
4703                  "a package) in file \"" +
4704                  (other_file == nullptr ? "null" : other_file->name()) + "\".");
4705   }
4706 }
4707 
ValidateSymbolName(const std::string & name,const std::string & full_name,const Message & proto)4708 void DescriptorBuilder::ValidateSymbolName(const std::string& name,
4709                                            const std::string& full_name,
4710                                            const Message& proto) {
4711   if (name.empty()) {
4712     AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4713              "Missing name.");
4714   } else {
4715     for (char character : name) {
4716       // I don't trust isalnum() due to locales.  :(
4717       if ((character < 'a' || 'z' < character) &&
4718           (character < 'A' || 'Z' < character) &&
4719           (character < '0' || '9' < character) && (character != '_')) {
4720         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4721                  "\"" + name + "\" is not a valid identifier.");
4722         return;
4723       }
4724     }
4725   }
4726 }
4727 
4728 // -------------------------------------------------------------------
4729 
4730 // This generic implementation is good for all descriptors except
4731 // FileDescriptor.
4732 template <class DescriptorT>
AllocateOptions(const typename DescriptorT::OptionsType & orig_options,DescriptorT * descriptor,int options_field_tag,const std::string & option_name,internal::FlatAllocator & alloc)4733 void DescriptorBuilder::AllocateOptions(
4734     const typename DescriptorT::OptionsType& orig_options,
4735     DescriptorT* descriptor, int options_field_tag,
4736     const std::string& option_name, internal::FlatAllocator& alloc) {
4737   std::vector<int> options_path;
4738   descriptor->GetLocationPath(&options_path);
4739   options_path.push_back(options_field_tag);
4740   AllocateOptionsImpl(descriptor->full_name(), descriptor->full_name(),
4741                       orig_options, descriptor, options_path, option_name,
4742                       alloc);
4743 }
4744 
4745 // We specialize for FileDescriptor.
AllocateOptions(const FileOptions & orig_options,FileDescriptor * descriptor,internal::FlatAllocator & alloc)4746 void DescriptorBuilder::AllocateOptions(const FileOptions& orig_options,
4747                                         FileDescriptor* descriptor,
4748                                         internal::FlatAllocator& alloc) {
4749   std::vector<int> options_path;
4750   options_path.push_back(FileDescriptorProto::kOptionsFieldNumber);
4751   // We add the dummy token so that LookupSymbol does the right thing.
4752   AllocateOptionsImpl(descriptor->package() + ".dummy", descriptor->name(),
4753                       orig_options, descriptor, options_path,
4754                       "google.protobuf.FileOptions", alloc);
4755 }
4756 
4757 template <class DescriptorT>
AllocateOptionsImpl(const std::string & name_scope,const std::string & element_name,const typename DescriptorT::OptionsType & orig_options,DescriptorT * descriptor,const std::vector<int> & options_path,const std::string & option_name,internal::FlatAllocator & alloc)4758 void DescriptorBuilder::AllocateOptionsImpl(
4759     const std::string& name_scope, const std::string& element_name,
4760     const typename DescriptorT::OptionsType& orig_options,
4761     DescriptorT* descriptor, const std::vector<int>& options_path,
4762     const std::string& option_name, internal::FlatAllocator& alloc) {
4763   auto* options = alloc.AllocateArray<typename DescriptorT::OptionsType>(1);
4764 
4765   if (!orig_options.IsInitialized()) {
4766     AddError(name_scope + "." + element_name, orig_options,
4767              DescriptorPool::ErrorCollector::OPTION_NAME,
4768              "Uninterpreted option is missing name or value.");
4769     return;
4770   }
4771 
4772   // Avoid using MergeFrom()/CopyFrom() in this class to make it -fno-rtti
4773   // friendly. Without RTTI, MergeFrom() and CopyFrom() will fallback to the
4774   // reflection based method, which requires the Descriptor. However, we are in
4775   // the middle of building the descriptors, thus the deadlock.
4776   options->ParseFromString(orig_options.SerializeAsString());
4777   descriptor->options_ = options;
4778 
4779   // Don't add to options_to_interpret_ unless there were uninterpreted
4780   // options.  This not only avoids unnecessary work, but prevents a
4781   // bootstrapping problem when building descriptors for descriptor.proto.
4782   // descriptor.proto does not contain any uninterpreted options, but
4783   // attempting to interpret options anyway will cause
4784   // OptionsType::GetDescriptor() to be called which may then deadlock since
4785   // we're still trying to build it.
4786   if (options->uninterpreted_option_size() > 0) {
4787     options_to_interpret_.push_back(OptionsToInterpret(
4788         name_scope, element_name, options_path, &orig_options, options));
4789   }
4790 
4791   // If the custom option is in unknown fields, no need to interpret it.
4792   // Remove the dependency file from unused_dependency.
4793   const UnknownFieldSet& unknown_fields = orig_options.unknown_fields();
4794   if (!unknown_fields.empty()) {
4795     // Can not use options->GetDescriptor() which may case deadlock.
4796     Symbol msg_symbol = tables_->FindSymbol(option_name);
4797     if (msg_symbol.type() == Symbol::MESSAGE) {
4798       for (int i = 0; i < unknown_fields.field_count(); ++i) {
4799         assert_mutex_held(pool_);
4800         const FieldDescriptor* field =
4801             pool_->InternalFindExtensionByNumberNoLock(
4802                 msg_symbol.descriptor(), unknown_fields.field(i).number());
4803         if (field) {
4804           unused_dependency_.erase(field->file());
4805         }
4806       }
4807     }
4808   }
4809 }
4810 
4811 // A common pattern:  We want to convert a repeated field in the descriptor
4812 // to an array of values, calling some method to build each value.
4813 #define BUILD_ARRAY(INPUT, OUTPUT, NAME, METHOD, PARENT)               \
4814   OUTPUT->NAME##_count_ = INPUT.NAME##_size();                         \
4815   OUTPUT->NAME##s_ = alloc.AllocateArray<                              \
4816       typename std::remove_pointer<decltype(OUTPUT->NAME##s_)>::type>( \
4817       INPUT.NAME##_size());                                            \
4818   for (int i = 0; i < INPUT.NAME##_size(); i++) {                      \
4819     METHOD(INPUT.NAME(i), PARENT, OUTPUT->NAME##s_ + i, alloc);        \
4820   }
4821 
AddRecursiveImportError(const FileDescriptorProto & proto,int from_here)4822 void DescriptorBuilder::AddRecursiveImportError(
4823     const FileDescriptorProto& proto, int from_here) {
4824   std::string error_message("File recursively imports itself: ");
4825   for (size_t i = from_here; i < tables_->pending_files_.size(); i++) {
4826     error_message.append(tables_->pending_files_[i]);
4827     error_message.append(" -> ");
4828   }
4829   error_message.append(proto.name());
4830 
4831   if (static_cast<size_t>(from_here) < tables_->pending_files_.size() - 1) {
4832     AddError(tables_->pending_files_[from_here + 1], proto,
4833              DescriptorPool::ErrorCollector::IMPORT, error_message);
4834   } else {
4835     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::IMPORT,
4836              error_message);
4837   }
4838 }
4839 
AddTwiceListedError(const FileDescriptorProto & proto,int index)4840 void DescriptorBuilder::AddTwiceListedError(const FileDescriptorProto& proto,
4841                                             int index) {
4842   AddError(proto.dependency(index), proto,
4843            DescriptorPool::ErrorCollector::IMPORT,
4844            "Import \"" + proto.dependency(index) + "\" was listed twice.");
4845 }
4846 
AddImportError(const FileDescriptorProto & proto,int index)4847 void DescriptorBuilder::AddImportError(const FileDescriptorProto& proto,
4848                                        int index) {
4849   std::string message;
4850   if (pool_->fallback_database_ == nullptr) {
4851     message = "Import \"" + proto.dependency(index) + "\" has not been loaded.";
4852   } else {
4853     message = "Import \"" + proto.dependency(index) +
4854               "\" was not found or had errors.";
4855   }
4856   AddError(proto.dependency(index), proto,
4857            DescriptorPool::ErrorCollector::IMPORT, message);
4858 }
4859 
ExistingFileMatchesProto(const FileDescriptor * existing_file,const FileDescriptorProto & proto)4860 static bool ExistingFileMatchesProto(const FileDescriptor* existing_file,
4861                                      const FileDescriptorProto& proto) {
4862   FileDescriptorProto existing_proto;
4863   existing_file->CopyTo(&existing_proto);
4864   // TODO(liujisi): Remove it when CopyTo supports copying syntax params when
4865   // syntax="proto2".
4866   if (existing_file->syntax() == FileDescriptor::SYNTAX_PROTO2 &&
4867       proto.has_syntax()) {
4868     existing_proto.set_syntax(
4869         existing_file->SyntaxName(existing_file->syntax()));
4870   }
4871 
4872   return existing_proto.SerializeAsString() == proto.SerializeAsString();
4873 }
4874 
4875 // These PlanAllocationSize functions will gather into the FlatAllocator all the
4876 // necessary memory allocations that BuildXXX functions below will do on the
4877 // Tables object.
4878 // They *must* be kept in sync. If we miss some PlanArray call we won't have
4879 // enough memory and will GOOGLE_CHECK-fail.
PlanAllocationSize(const RepeatedPtrField<EnumValueDescriptorProto> & values,internal::FlatAllocator & alloc)4880 static void PlanAllocationSize(
4881     const RepeatedPtrField<EnumValueDescriptorProto>& values,
4882     internal::FlatAllocator& alloc) {
4883   alloc.PlanArray<EnumValueDescriptor>(values.size());
4884   alloc.PlanArray<std::string>(2 * values.size());  // name + full_name
4885   for (const auto& v : values) {
4886     if (v.has_options()) alloc.PlanArray<EnumValueOptions>(1);
4887   }
4888 }
4889 
PlanAllocationSize(const RepeatedPtrField<EnumDescriptorProto> & enums,internal::FlatAllocator & alloc)4890 static void PlanAllocationSize(
4891     const RepeatedPtrField<EnumDescriptorProto>& enums,
4892     internal::FlatAllocator& alloc) {
4893   alloc.PlanArray<EnumDescriptor>(enums.size());
4894   alloc.PlanArray<std::string>(2 * enums.size());  // name + full_name
4895   for (const auto& e : enums) {
4896     if (e.has_options()) alloc.PlanArray<EnumOptions>(1);
4897     PlanAllocationSize(e.value(), alloc);
4898     alloc.PlanArray<EnumDescriptor::ReservedRange>(e.reserved_range_size());
4899     alloc.PlanArray<const std::string*>(e.reserved_name_size());
4900     alloc.PlanArray<std::string>(e.reserved_name_size());
4901   }
4902 }
4903 
PlanAllocationSize(const RepeatedPtrField<OneofDescriptorProto> & oneofs,internal::FlatAllocator & alloc)4904 static void PlanAllocationSize(
4905     const RepeatedPtrField<OneofDescriptorProto>& oneofs,
4906     internal::FlatAllocator& alloc) {
4907   alloc.PlanArray<OneofDescriptor>(oneofs.size());
4908   alloc.PlanArray<std::string>(2 * oneofs.size());  // name + full_name
4909   for (const auto& oneof : oneofs) {
4910     if (oneof.has_options()) alloc.PlanArray<OneofOptions>(1);
4911   }
4912 }
4913 
PlanAllocationSize(const RepeatedPtrField<FieldDescriptorProto> & fields,internal::FlatAllocator & alloc)4914 static void PlanAllocationSize(
4915     const RepeatedPtrField<FieldDescriptorProto>& fields,
4916     internal::FlatAllocator& alloc) {
4917   alloc.PlanArray<FieldDescriptor>(fields.size());
4918   for (const auto& field : fields) {
4919     if (field.has_options()) alloc.PlanArray<FieldOptions>(1);
4920     alloc.PlanFieldNames(field.name(),
4921                          field.has_json_name() ? &field.json_name() : nullptr);
4922     if (field.has_default_value() && field.has_type() &&
4923         (field.type() == FieldDescriptorProto::TYPE_STRING ||
4924          field.type() == FieldDescriptorProto::TYPE_BYTES)) {
4925       // For the default string value.
4926       alloc.PlanArray<std::string>(1);
4927     }
4928   }
4929 }
4930 
PlanAllocationSize(const RepeatedPtrField<DescriptorProto::ExtensionRange> & ranges,internal::FlatAllocator & alloc)4931 static void PlanAllocationSize(
4932     const RepeatedPtrField<DescriptorProto::ExtensionRange>& ranges,
4933     internal::FlatAllocator& alloc) {
4934   alloc.PlanArray<Descriptor::ExtensionRange>(ranges.size());
4935   for (const auto& r : ranges) {
4936     if (r.has_options()) alloc.PlanArray<ExtensionRangeOptions>(1);
4937   }
4938 }
4939 
PlanAllocationSize(const RepeatedPtrField<DescriptorProto> & messages,internal::FlatAllocator & alloc)4940 static void PlanAllocationSize(
4941     const RepeatedPtrField<DescriptorProto>& messages,
4942     internal::FlatAllocator& alloc) {
4943   alloc.PlanArray<Descriptor>(messages.size());
4944   alloc.PlanArray<std::string>(2 * messages.size());  // name + full_name
4945 
4946   for (const auto& message : messages) {
4947     if (message.has_options()) alloc.PlanArray<MessageOptions>(1);
4948     PlanAllocationSize(message.nested_type(), alloc);
4949     PlanAllocationSize(message.field(), alloc);
4950     PlanAllocationSize(message.extension(), alloc);
4951     PlanAllocationSize(message.extension_range(), alloc);
4952     alloc.PlanArray<Descriptor::ReservedRange>(message.reserved_range_size());
4953     alloc.PlanArray<const std::string*>(message.reserved_name_size());
4954     alloc.PlanArray<std::string>(message.reserved_name_size());
4955     PlanAllocationSize(message.enum_type(), alloc);
4956     PlanAllocationSize(message.oneof_decl(), alloc);
4957   }
4958 }
4959 
PlanAllocationSize(const RepeatedPtrField<MethodDescriptorProto> & methods,internal::FlatAllocator & alloc)4960 static void PlanAllocationSize(
4961     const RepeatedPtrField<MethodDescriptorProto>& methods,
4962     internal::FlatAllocator& alloc) {
4963   alloc.PlanArray<MethodDescriptor>(methods.size());
4964   alloc.PlanArray<std::string>(2 * methods.size());  // name + full_name
4965   for (const auto& m : methods) {
4966     if (m.has_options()) alloc.PlanArray<MethodOptions>(1);
4967   }
4968 }
4969 
PlanAllocationSize(const RepeatedPtrField<ServiceDescriptorProto> & services,internal::FlatAllocator & alloc)4970 static void PlanAllocationSize(
4971     const RepeatedPtrField<ServiceDescriptorProto>& services,
4972     internal::FlatAllocator& alloc) {
4973   alloc.PlanArray<ServiceDescriptor>(services.size());
4974   alloc.PlanArray<std::string>(2 * services.size());  // name + full_name
4975   for (const auto& service : services) {
4976     if (service.has_options()) alloc.PlanArray<ServiceOptions>(1);
4977     PlanAllocationSize(service.method(), alloc);
4978   }
4979 }
4980 
PlanAllocationSize(const FileDescriptorProto & proto,internal::FlatAllocator & alloc)4981 static void PlanAllocationSize(const FileDescriptorProto& proto,
4982                                internal::FlatAllocator& alloc) {
4983   alloc.PlanArray<FileDescriptor>(1);
4984   alloc.PlanArray<FileDescriptorTables>(1);
4985   alloc.PlanArray<std::string>(2);  // name + package
4986   if (proto.has_options()) alloc.PlanArray<FileOptions>(1);
4987   if (proto.has_source_code_info()) alloc.PlanArray<SourceCodeInfo>(1);
4988 
4989   PlanAllocationSize(proto.service(), alloc);
4990   PlanAllocationSize(proto.message_type(), alloc);
4991   PlanAllocationSize(proto.enum_type(), alloc);
4992   PlanAllocationSize(proto.extension(), alloc);
4993 
4994   alloc.PlanArray<int>(proto.weak_dependency_size());
4995   alloc.PlanArray<int>(proto.public_dependency_size());
4996   alloc.PlanArray<const FileDescriptor*>(proto.dependency_size());
4997 }
4998 
BuildFile(const FileDescriptorProto & proto)4999 const FileDescriptor* DescriptorBuilder::BuildFile(
5000     const FileDescriptorProto& proto) {
5001   filename_ = proto.name();
5002 
5003   // Check if the file already exists and is identical to the one being built.
5004   // Note:  This only works if the input is canonical -- that is, it
5005   //   fully-qualifies all type names, has no UninterpretedOptions, etc.
5006   //   This is fine, because this idempotency "feature" really only exists to
5007   //   accommodate one hack in the proto1->proto2 migration layer.
5008   const FileDescriptor* existing_file = tables_->FindFile(filename_);
5009   if (existing_file != nullptr) {
5010     // File already in pool.  Compare the existing one to the input.
5011     if (ExistingFileMatchesProto(existing_file, proto)) {
5012       // They're identical.  Return the existing descriptor.
5013       return existing_file;
5014     }
5015 
5016     // Not a match.  The error will be detected and handled later.
5017   }
5018 
5019   // Check to see if this file is already on the pending files list.
5020   // TODO(kenton):  Allow recursive imports?  It may not work with some
5021   //   (most?) programming languages.  E.g., in C++, a forward declaration
5022   //   of a type is not sufficient to allow it to be used even in a
5023   //   generated header file due to inlining.  This could perhaps be
5024   //   worked around using tricks involving inserting #include statements
5025   //   mid-file, but that's pretty ugly, and I'm pretty sure there are
5026   //   some languages out there that do not allow recursive dependencies
5027   //   at all.
5028   for (size_t i = 0; i < tables_->pending_files_.size(); i++) {
5029     if (tables_->pending_files_[i] == proto.name()) {
5030       AddRecursiveImportError(proto, i);
5031       return nullptr;
5032     }
5033   }
5034 
5035   static const int kMaximumPackageLength = 511;
5036   if (proto.package().size() > kMaximumPackageLength) {
5037     AddError(proto.package(), proto, DescriptorPool::ErrorCollector::NAME,
5038              "Package name is too long");
5039     return nullptr;
5040   }
5041 
5042   // If we have a fallback_database_, and we aren't doing lazy import building,
5043   // attempt to load all dependencies now, before checkpointing tables_.  This
5044   // avoids confusion with recursive checkpoints.
5045   if (!pool_->lazily_build_dependencies_) {
5046     if (pool_->fallback_database_ != nullptr) {
5047       tables_->pending_files_.push_back(proto.name());
5048       for (int i = 0; i < proto.dependency_size(); i++) {
5049         if (tables_->FindFile(proto.dependency(i)) == nullptr &&
5050             (pool_->underlay_ == nullptr ||
5051              pool_->underlay_->FindFileByName(proto.dependency(i)) ==
5052                  nullptr)) {
5053           // We don't care what this returns since we'll find out below anyway.
5054           pool_->TryFindFileInFallbackDatabase(proto.dependency(i));
5055         }
5056       }
5057       tables_->pending_files_.pop_back();
5058     }
5059   }
5060 
5061   // Checkpoint the tables so that we can roll back if something goes wrong.
5062   tables_->AddCheckpoint();
5063 
5064   internal::FlatAllocator alloc;
5065   PlanAllocationSize(proto, alloc);
5066   alloc.FinalizePlanning(tables_);
5067   FileDescriptor* result = BuildFileImpl(proto, alloc);
5068 
5069   file_tables_->FinalizeTables();
5070   if (result) {
5071     tables_->ClearLastCheckpoint();
5072     result->finished_building_ = true;
5073     alloc.ExpectConsumed();
5074   } else {
5075     tables_->RollbackToLastCheckpoint();
5076   }
5077 
5078   return result;
5079 }
5080 
BuildFileImpl(const FileDescriptorProto & proto,internal::FlatAllocator & alloc)5081 FileDescriptor* DescriptorBuilder::BuildFileImpl(
5082     const FileDescriptorProto& proto, internal::FlatAllocator& alloc) {
5083   FileDescriptor* result = alloc.AllocateArray<FileDescriptor>(1);
5084   file_ = result;
5085 
5086   result->is_placeholder_ = false;
5087   result->finished_building_ = false;
5088   SourceCodeInfo* info = nullptr;
5089   if (proto.has_source_code_info()) {
5090     info = alloc.AllocateArray<SourceCodeInfo>(1);
5091     info->CopyFrom(proto.source_code_info());
5092     result->source_code_info_ = info;
5093   } else {
5094     result->source_code_info_ = &SourceCodeInfo::default_instance();
5095   }
5096 
5097   file_tables_ = alloc.AllocateArray<FileDescriptorTables>(1);
5098   file_->tables_ = file_tables_;
5099 
5100   if (!proto.has_name()) {
5101     AddError("", proto, DescriptorPool::ErrorCollector::OTHER,
5102              "Missing field: FileDescriptorProto.name.");
5103   }
5104 
5105   // TODO(liujisi): Report error when the syntax is empty after all the protos
5106   // have added the syntax statement.
5107   if (proto.syntax().empty() || proto.syntax() == "proto2") {
5108     file_->syntax_ = FileDescriptor::SYNTAX_PROTO2;
5109   } else if (proto.syntax() == "proto3") {
5110     file_->syntax_ = FileDescriptor::SYNTAX_PROTO3;
5111   } else {
5112     file_->syntax_ = FileDescriptor::SYNTAX_UNKNOWN;
5113     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5114              "Unrecognized syntax: " + proto.syntax());
5115   }
5116 
5117   result->name_ = alloc.AllocateStrings(proto.name());
5118   if (proto.has_package()) {
5119     result->package_ = alloc.AllocateStrings(proto.package());
5120   } else {
5121     // We cannot rely on proto.package() returning a valid string if
5122     // proto.has_package() is false, because we might be running at static
5123     // initialization time, in which case default values have not yet been
5124     // initialized.
5125     result->package_ = alloc.AllocateStrings("");
5126   }
5127   result->pool_ = pool_;
5128 
5129   if (result->name().find('\0') != std::string::npos) {
5130     AddError(result->name(), proto, DescriptorPool::ErrorCollector::NAME,
5131              "\"" + result->name() + "\" contains null character.");
5132     return nullptr;
5133   }
5134 
5135   // Add to tables.
5136   if (!tables_->AddFile(result)) {
5137     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5138              "A file with this name is already in the pool.");
5139     // Bail out early so that if this is actually the exact same file, we
5140     // don't end up reporting that every single symbol is already defined.
5141     return nullptr;
5142   }
5143   if (!result->package().empty()) {
5144     if (std::count(result->package().begin(), result->package().end(), '.') >
5145         kPackageLimit) {
5146       AddError(result->package(), proto, DescriptorPool::ErrorCollector::NAME,
5147                "Exceeds Maximum Package Depth");
5148       return nullptr;
5149     }
5150     AddPackage(result->package(), proto, result);
5151   }
5152 
5153   // Make sure all dependencies are loaded.
5154   std::set<std::string> seen_dependencies;
5155   result->dependency_count_ = proto.dependency_size();
5156   result->dependencies_ =
5157       alloc.AllocateArray<const FileDescriptor*>(proto.dependency_size());
5158   result->dependencies_once_ = nullptr;
5159   unused_dependency_.clear();
5160   std::set<int> weak_deps;
5161   for (int i = 0; i < proto.weak_dependency_size(); ++i) {
5162     weak_deps.insert(proto.weak_dependency(i));
5163   }
5164 
5165   bool need_lazy_deps = false;
5166   for (int i = 0; i < proto.dependency_size(); i++) {
5167     if (!seen_dependencies.insert(proto.dependency(i)).second) {
5168       AddTwiceListedError(proto, i);
5169     }
5170 
5171     const FileDescriptor* dependency = tables_->FindFile(proto.dependency(i));
5172     if (dependency == nullptr && pool_->underlay_ != nullptr) {
5173       dependency = pool_->underlay_->FindFileByName(proto.dependency(i));
5174     }
5175 
5176     if (dependency == result) {
5177       // Recursive import.  dependency/result is not fully initialized, and it's
5178       // dangerous to try to do anything with it.  The recursive import error
5179       // will be detected and reported in DescriptorBuilder::BuildFile().
5180       return nullptr;
5181     }
5182 
5183     if (dependency == nullptr) {
5184       if (!pool_->lazily_build_dependencies_) {
5185         if (pool_->allow_unknown_ ||
5186             (!pool_->enforce_weak_ && weak_deps.find(i) != weak_deps.end())) {
5187           internal::FlatAllocator lazy_dep_alloc;
5188           lazy_dep_alloc.PlanArray<FileDescriptor>(1);
5189           lazy_dep_alloc.PlanArray<std::string>(1);
5190           lazy_dep_alloc.FinalizePlanning(tables_);
5191           dependency = pool_->NewPlaceholderFileWithMutexHeld(
5192               proto.dependency(i), lazy_dep_alloc);
5193         } else {
5194           AddImportError(proto, i);
5195         }
5196       }
5197     } else {
5198       // Add to unused_dependency_ to track unused imported files.
5199       // Note: do not track unused imported files for public import.
5200       if (pool_->enforce_dependencies_ &&
5201           (pool_->unused_import_track_files_.find(proto.name()) !=
5202            pool_->unused_import_track_files_.end()) &&
5203           (dependency->public_dependency_count() == 0)) {
5204         unused_dependency_.insert(dependency);
5205       }
5206     }
5207 
5208     result->dependencies_[i] = dependency;
5209     if (pool_->lazily_build_dependencies_ && !dependency) {
5210       need_lazy_deps = true;
5211     }
5212   }
5213   if (need_lazy_deps) {
5214     int total_char_size = 0;
5215     for (int i = 0; i < proto.dependency_size(); i++) {
5216       if (result->dependencies_[i] == nullptr) {
5217         total_char_size += static_cast<int>(proto.dependency(i).size());
5218       }
5219       ++total_char_size;  // For NUL char
5220     }
5221 
5222     void* data = tables_->AllocateBytes(
5223         static_cast<int>(sizeof(internal::once_flag) + total_char_size));
5224     result->dependencies_once_ = ::new (data) internal::once_flag{};
5225     char* name_data = reinterpret_cast<char*>(result->dependencies_once_ + 1);
5226 
5227     for (int i = 0; i < proto.dependency_size(); i++) {
5228       if (result->dependencies_[i] == nullptr) {
5229         memcpy(name_data, proto.dependency(i).c_str(),
5230                proto.dependency(i).size());
5231         name_data += proto.dependency(i).size();
5232       }
5233       *name_data++ = '\0';
5234     }
5235   }
5236 
5237   // Check public dependencies.
5238   int public_dependency_count = 0;
5239   result->public_dependencies_ =
5240       alloc.AllocateArray<int>(proto.public_dependency_size());
5241   for (int i = 0; i < proto.public_dependency_size(); i++) {
5242     // Only put valid public dependency indexes.
5243     int index = proto.public_dependency(i);
5244     if (index >= 0 && index < proto.dependency_size()) {
5245       result->public_dependencies_[public_dependency_count++] = index;
5246       // Do not track unused imported files for public import.
5247       // Calling dependency(i) builds that file when doing lazy imports,
5248       // need to avoid doing this. Unused dependency detection isn't done
5249       // when building lazily, anyways.
5250       if (!pool_->lazily_build_dependencies_) {
5251         unused_dependency_.erase(result->dependency(index));
5252       }
5253     } else {
5254       AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5255                "Invalid public dependency index.");
5256     }
5257   }
5258   result->public_dependency_count_ = public_dependency_count;
5259 
5260   // Build dependency set
5261   dependencies_.clear();
5262   // We don't/can't do proper dependency error checking when
5263   // lazily_build_dependencies_, and calling dependency(i) will force
5264   // a dependency to be built, which we don't want.
5265   if (!pool_->lazily_build_dependencies_) {
5266     for (int i = 0; i < result->dependency_count(); i++) {
5267       RecordPublicDependencies(result->dependency(i));
5268     }
5269   }
5270 
5271   // Check weak dependencies.
5272   int weak_dependency_count = 0;
5273   result->weak_dependencies_ =
5274       alloc.AllocateArray<int>(proto.weak_dependency_size());
5275   for (int i = 0; i < proto.weak_dependency_size(); i++) {
5276     int index = proto.weak_dependency(i);
5277     if (index >= 0 && index < proto.dependency_size()) {
5278       result->weak_dependencies_[weak_dependency_count++] = index;
5279     } else {
5280       AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5281                "Invalid weak dependency index.");
5282     }
5283   }
5284   result->weak_dependency_count_ = weak_dependency_count;
5285 
5286   // Convert children.
5287   BUILD_ARRAY(proto, result, message_type, BuildMessage, nullptr);
5288   BUILD_ARRAY(proto, result, enum_type, BuildEnum, nullptr);
5289   BUILD_ARRAY(proto, result, service, BuildService, nullptr);
5290   BUILD_ARRAY(proto, result, extension, BuildExtension, nullptr);
5291 
5292   // Copy options.
5293   result->options_ = nullptr;  // Set to default_instance later if necessary.
5294   if (proto.has_options()) {
5295     AllocateOptions(proto.options(), result, alloc);
5296   }
5297 
5298   // Note that the following steps must occur in exactly the specified order.
5299 
5300   // Cross-link.
5301   CrossLinkFile(result, proto);
5302 
5303   if (!message_hints_.empty()) {
5304     SuggestFieldNumbers(result, proto);
5305   }
5306 
5307   // Interpret any remaining uninterpreted options gathered into
5308   // options_to_interpret_ during descriptor building.  Cross-linking has made
5309   // extension options known, so all interpretations should now succeed.
5310   if (!had_errors_) {
5311     OptionInterpreter option_interpreter(this);
5312     for (std::vector<OptionsToInterpret>::iterator iter =
5313              options_to_interpret_.begin();
5314          iter != options_to_interpret_.end(); ++iter) {
5315       option_interpreter.InterpretOptions(&(*iter));
5316     }
5317     options_to_interpret_.clear();
5318     if (info != nullptr) {
5319       option_interpreter.UpdateSourceCodeInfo(info);
5320     }
5321   }
5322 
5323   // Validate options. See comments at InternalSetLazilyBuildDependencies about
5324   // error checking and lazy import building.
5325   if (!had_errors_ && !pool_->lazily_build_dependencies_) {
5326     ValidateFileOptions(result, proto);
5327   }
5328 
5329   // Additional naming conflict check for map entry types. Only need to check
5330   // this if there are already errors.
5331   if (had_errors_) {
5332     for (int i = 0; i < proto.message_type_size(); ++i) {
5333       DetectMapConflicts(result->message_type(i), proto.message_type(i));
5334     }
5335   }
5336 
5337 
5338   // Again, see comments at InternalSetLazilyBuildDependencies about error
5339   // checking. Also, don't log unused dependencies if there were previous
5340   // errors, since the results might be inaccurate.
5341   if (!had_errors_ && !unused_dependency_.empty() &&
5342       !pool_->lazily_build_dependencies_) {
5343     LogUnusedDependency(proto, result);
5344   }
5345 
5346   if (had_errors_) {
5347     return nullptr;
5348   } else {
5349     return result;
5350   }
5351 }
5352 
5353 
AllocateNameStrings(const std::string & scope,const std::string & proto_name,internal::FlatAllocator & alloc)5354 const std::string* DescriptorBuilder::AllocateNameStrings(
5355     const std::string& scope, const std::string& proto_name,
5356     internal::FlatAllocator& alloc) {
5357   if (scope.empty()) {
5358     return alloc.AllocateStrings(proto_name, proto_name);
5359   } else {
5360     return alloc.AllocateStrings(proto_name,
5361                                  StrCat(scope, ".", proto_name));
5362   }
5363 }
5364 
5365 namespace {
5366 
5367 // Helper for BuildMessage below.
5368 struct IncrementWhenDestroyed {
~IncrementWhenDestroyedgoogle::protobuf::__anon94551f7b0911::IncrementWhenDestroyed5369   ~IncrementWhenDestroyed() { ++to_increment; }
5370   int& to_increment;
5371 };
5372 
5373 }  // namespace
5374 
BuildMessage(const DescriptorProto & proto,const Descriptor * parent,Descriptor * result,internal::FlatAllocator & alloc)5375 void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
5376                                      const Descriptor* parent,
5377                                      Descriptor* result,
5378                                      internal::FlatAllocator& alloc) {
5379   const std::string& scope =
5380       (parent == nullptr) ? file_->package() : parent->full_name();
5381   result->all_names_ = AllocateNameStrings(scope, proto.name(), alloc);
5382   ValidateSymbolName(proto.name(), result->full_name(), proto);
5383 
5384   result->file_ = file_;
5385   result->containing_type_ = parent;
5386   result->is_placeholder_ = false;
5387   result->is_unqualified_placeholder_ = false;
5388   result->well_known_type_ = Descriptor::WELLKNOWNTYPE_UNSPECIFIED;
5389   result->options_ = nullptr;  // Set to default_instance later if necessary.
5390 
5391   auto it = pool_->tables_->well_known_types_.find(result->full_name());
5392   if (it != pool_->tables_->well_known_types_.end()) {
5393     result->well_known_type_ = it->second;
5394   }
5395 
5396   // Calculate the continuous sequence of fields.
5397   // These can be fast-path'd during lookup and don't need to be added to the
5398   // tables.
5399   // We use uint16_t to save space for sequential_field_limit_, so stop before
5400   // overflowing it. Worst case, we are not taking full advantage on huge
5401   // messages, but it is unlikely.
5402   result->sequential_field_limit_ = 0;
5403   for (int i = 0; i < std::numeric_limits<uint16_t>::max() &&
5404                   i < proto.field_size() && proto.field(i).number() == i + 1;
5405        ++i) {
5406     result->sequential_field_limit_ = i + 1;
5407   }
5408 
5409   // Build oneofs first so that fields and extension ranges can refer to them.
5410   BUILD_ARRAY(proto, result, oneof_decl, BuildOneof, result);
5411   BUILD_ARRAY(proto, result, field, BuildField, result);
5412   BUILD_ARRAY(proto, result, enum_type, BuildEnum, result);
5413   BUILD_ARRAY(proto, result, extension_range, BuildExtensionRange, result);
5414   BUILD_ARRAY(proto, result, extension, BuildExtension, result);
5415   BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
5416 
5417   // Before building submessages, check recursion limit.
5418   --recursion_depth_;
5419   IncrementWhenDestroyed revert{recursion_depth_};
5420   if (recursion_depth_ <= 0) {
5421     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::OTHER,
5422              "Reached maximum recursion limit for nested messages.");
5423     result->nested_types_ = nullptr;
5424     result->nested_type_count_ = 0;
5425     return;
5426   }
5427   BUILD_ARRAY(proto, result, nested_type, BuildMessage, result);
5428 
5429   // Copy reserved names.
5430   int reserved_name_count = proto.reserved_name_size();
5431   result->reserved_name_count_ = reserved_name_count;
5432   result->reserved_names_ =
5433       alloc.AllocateArray<const std::string*>(reserved_name_count);
5434   for (int i = 0; i < reserved_name_count; ++i) {
5435     result->reserved_names_[i] =
5436         alloc.AllocateStrings(proto.reserved_name(i));
5437   }
5438 
5439   // Copy options.
5440   if (proto.has_options()) {
5441     AllocateOptions(proto.options(), result,
5442                     DescriptorProto::kOptionsFieldNumber,
5443                     "google.protobuf.MessageOptions", alloc);
5444   }
5445 
5446   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5447 
5448   for (int i = 0; i < proto.reserved_range_size(); i++) {
5449     const DescriptorProto_ReservedRange& range1 = proto.reserved_range(i);
5450     for (int j = i + 1; j < proto.reserved_range_size(); j++) {
5451       const DescriptorProto_ReservedRange& range2 = proto.reserved_range(j);
5452       if (range1.end() > range2.start() && range2.end() > range1.start()) {
5453         AddError(result->full_name(), proto.reserved_range(i),
5454                  DescriptorPool::ErrorCollector::NUMBER,
5455                  strings::Substitute("Reserved range $0 to $1 overlaps with "
5456                                   "already-defined range $2 to $3.",
5457                                   range2.start(), range2.end() - 1,
5458                                   range1.start(), range1.end() - 1));
5459       }
5460     }
5461   }
5462 
5463   HASH_SET<std::string> reserved_name_set;
5464   for (int i = 0; i < proto.reserved_name_size(); i++) {
5465     const std::string& name = proto.reserved_name(i);
5466     if (reserved_name_set.find(name) == reserved_name_set.end()) {
5467       reserved_name_set.insert(name);
5468     } else {
5469       AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
5470                strings::Substitute("Field name \"$0\" is reserved multiple times.",
5471                                 name));
5472     }
5473   }
5474 
5475 
5476   for (int i = 0; i < result->field_count(); i++) {
5477     const FieldDescriptor* field = result->field(i);
5478     for (int j = 0; j < result->extension_range_count(); j++) {
5479       const Descriptor::ExtensionRange* range = result->extension_range(j);
5480       if (range->start <= field->number() && field->number() < range->end) {
5481         message_hints_[result].RequestHintOnFieldNumbers(
5482             proto.extension_range(j), DescriptorPool::ErrorCollector::NUMBER);
5483         AddError(
5484             field->full_name(), proto.extension_range(j),
5485             DescriptorPool::ErrorCollector::NUMBER,
5486             strings::Substitute(
5487                 "Extension range $0 to $1 includes field \"$2\" ($3).",
5488                 range->start, range->end - 1, field->name(), field->number()));
5489       }
5490     }
5491     for (int j = 0; j < result->reserved_range_count(); j++) {
5492       const Descriptor::ReservedRange* range = result->reserved_range(j);
5493       if (range->start <= field->number() && field->number() < range->end) {
5494         message_hints_[result].RequestHintOnFieldNumbers(
5495             proto.reserved_range(j), DescriptorPool::ErrorCollector::NUMBER);
5496         AddError(field->full_name(), proto.reserved_range(j),
5497                  DescriptorPool::ErrorCollector::NUMBER,
5498                  strings::Substitute("Field \"$0\" uses reserved number $1.",
5499                                   field->name(), field->number()));
5500       }
5501     }
5502     if (reserved_name_set.find(field->name()) != reserved_name_set.end()) {
5503       AddError(
5504           field->full_name(), proto.field(i),
5505           DescriptorPool::ErrorCollector::NAME,
5506           strings::Substitute("Field name \"$0\" is reserved.", field->name()));
5507     }
5508 
5509   }
5510 
5511   // Check that extension ranges don't overlap and don't include
5512   // reserved field numbers or names.
5513   for (int i = 0; i < result->extension_range_count(); i++) {
5514     const Descriptor::ExtensionRange* range1 = result->extension_range(i);
5515     for (int j = 0; j < result->reserved_range_count(); j++) {
5516       const Descriptor::ReservedRange* range2 = result->reserved_range(j);
5517       if (range1->end > range2->start && range2->end > range1->start) {
5518         AddError(result->full_name(), proto.extension_range(i),
5519                  DescriptorPool::ErrorCollector::NUMBER,
5520                  strings::Substitute("Extension range $0 to $1 overlaps with "
5521                                   "reserved range $2 to $3.",
5522                                   range1->start, range1->end - 1, range2->start,
5523                                   range2->end - 1));
5524       }
5525     }
5526     for (int j = i + 1; j < result->extension_range_count(); j++) {
5527       const Descriptor::ExtensionRange* range2 = result->extension_range(j);
5528       if (range1->end > range2->start && range2->end > range1->start) {
5529         AddError(result->full_name(), proto.extension_range(i),
5530                  DescriptorPool::ErrorCollector::NUMBER,
5531                  strings::Substitute("Extension range $0 to $1 overlaps with "
5532                                   "already-defined range $2 to $3.",
5533                                   range2->start, range2->end - 1, range1->start,
5534                                   range1->end - 1));
5535       }
5536     }
5537   }
5538 }
5539 
BuildFieldOrExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,bool is_extension,internal::FlatAllocator & alloc)5540 void DescriptorBuilder::BuildFieldOrExtension(const FieldDescriptorProto& proto,
5541                                               Descriptor* parent,
5542                                               FieldDescriptor* result,
5543                                               bool is_extension,
5544                                               internal::FlatAllocator& alloc) {
5545   const std::string& scope =
5546       (parent == nullptr) ? file_->package() : parent->full_name();
5547 
5548   // We allocate all names in a single array, and dedup them.
5549   // We remember the indices for the potentially deduped values.
5550   auto all_names = alloc.AllocateFieldNames(
5551       proto.name(), scope,
5552       proto.has_json_name() ? &proto.json_name() : nullptr);
5553   result->all_names_ = all_names.array;
5554   result->lowercase_name_index_ = all_names.lowercase_index;
5555   result->camelcase_name_index_ = all_names.camelcase_index;
5556   result->json_name_index_ = all_names.json_index;
5557 
5558   ValidateSymbolName(proto.name(), result->full_name(), proto);
5559 
5560   result->file_ = file_;
5561   result->number_ = proto.number();
5562   result->is_extension_ = is_extension;
5563   result->is_oneof_ = false;
5564   result->proto3_optional_ = proto.proto3_optional();
5565 
5566   if (proto.proto3_optional() &&
5567       file_->syntax() != FileDescriptor::SYNTAX_PROTO3) {
5568     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
5569              "The [proto3_optional=true] option may only be set on proto3"
5570              "fields, not " +
5571                  result->full_name());
5572   }
5573 
5574   result->has_json_name_ = proto.has_json_name();
5575 
5576   // Some compilers do not allow static_cast directly between two enum types,
5577   // so we must cast to int first.
5578   result->type_ = static_cast<FieldDescriptor::Type>(
5579       implicit_cast<int>(proto.type()));
5580   result->label_ = static_cast<FieldDescriptor::Label>(
5581       implicit_cast<int>(proto.label()));
5582 
5583   if (result->label_ == FieldDescriptor::LABEL_REQUIRED) {
5584     // An extension cannot have a required field (b/13365836).
5585     if (result->is_extension_) {
5586       AddError(result->full_name(), proto,
5587                // Error location `TYPE`: we would really like to indicate
5588                // `LABEL`, but the `ErrorLocation` enum has no entry for this,
5589                // and we don't necessarily know about all implementations of the
5590                // `ErrorCollector` interface to extend them to handle the new
5591                // error location type properly.
5592                DescriptorPool::ErrorCollector::TYPE,
5593                "The extension " + result->full_name() + " cannot be required.");
5594     }
5595   }
5596 
5597   // Some of these may be filled in when cross-linking.
5598   result->containing_type_ = nullptr;
5599   result->type_once_ = nullptr;
5600   result->default_value_enum_ = nullptr;
5601 
5602   result->has_default_value_ = proto.has_default_value();
5603   if (proto.has_default_value() && result->is_repeated()) {
5604     AddError(result->full_name(), proto,
5605              DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5606              "Repeated fields can't have default values.");
5607   }
5608 
5609   if (proto.has_type()) {
5610     if (proto.has_default_value()) {
5611       char* end_pos = nullptr;
5612       switch (result->cpp_type()) {
5613         case FieldDescriptor::CPPTYPE_INT32:
5614           result->default_value_int32_t_ =
5615               strtol(proto.default_value().c_str(), &end_pos, 0);
5616           break;
5617         case FieldDescriptor::CPPTYPE_INT64:
5618           result->default_value_int64_t_ =
5619               strto64(proto.default_value().c_str(), &end_pos, 0);
5620           break;
5621         case FieldDescriptor::CPPTYPE_UINT32:
5622           result->default_value_uint32_t_ =
5623               strtoul(proto.default_value().c_str(), &end_pos, 0);
5624           break;
5625         case FieldDescriptor::CPPTYPE_UINT64:
5626           result->default_value_uint64_t_ =
5627               strtou64(proto.default_value().c_str(), &end_pos, 0);
5628           break;
5629         case FieldDescriptor::CPPTYPE_FLOAT:
5630           if (proto.default_value() == "inf") {
5631             result->default_value_float_ =
5632                 std::numeric_limits<float>::infinity();
5633           } else if (proto.default_value() == "-inf") {
5634             result->default_value_float_ =
5635                 -std::numeric_limits<float>::infinity();
5636           } else if (proto.default_value() == "nan") {
5637             result->default_value_float_ =
5638                 std::numeric_limits<float>::quiet_NaN();
5639           } else {
5640             result->default_value_float_ = io::SafeDoubleToFloat(
5641                 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos));
5642           }
5643           break;
5644         case FieldDescriptor::CPPTYPE_DOUBLE:
5645           if (proto.default_value() == "inf") {
5646             result->default_value_double_ =
5647                 std::numeric_limits<double>::infinity();
5648           } else if (proto.default_value() == "-inf") {
5649             result->default_value_double_ =
5650                 -std::numeric_limits<double>::infinity();
5651           } else if (proto.default_value() == "nan") {
5652             result->default_value_double_ =
5653                 std::numeric_limits<double>::quiet_NaN();
5654           } else {
5655             result->default_value_double_ =
5656                 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos);
5657           }
5658           break;
5659         case FieldDescriptor::CPPTYPE_BOOL:
5660           if (proto.default_value() == "true") {
5661             result->default_value_bool_ = true;
5662           } else if (proto.default_value() == "false") {
5663             result->default_value_bool_ = false;
5664           } else {
5665             AddError(result->full_name(), proto,
5666                      DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5667                      "Boolean default must be true or false.");
5668           }
5669           break;
5670         case FieldDescriptor::CPPTYPE_ENUM:
5671           // This will be filled in when cross-linking.
5672           result->default_value_enum_ = nullptr;
5673           break;
5674         case FieldDescriptor::CPPTYPE_STRING:
5675           if (result->type() == FieldDescriptor::TYPE_BYTES) {
5676             result->default_value_string_ = alloc.AllocateStrings(
5677                 UnescapeCEscapeString(proto.default_value()));
5678           } else {
5679             result->default_value_string_ =
5680                 alloc.AllocateStrings(proto.default_value());
5681           }
5682           break;
5683         case FieldDescriptor::CPPTYPE_MESSAGE:
5684           AddError(result->full_name(), proto,
5685                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5686                    "Messages can't have default values.");
5687           result->has_default_value_ = false;
5688           result->default_generated_instance_ = nullptr;
5689           break;
5690       }
5691 
5692       if (end_pos != nullptr) {
5693         // end_pos is only set non-null by the parsers for numeric types,
5694         // above. This checks that the default was non-empty and had no extra
5695         // junk after the end of the number.
5696         if (proto.default_value().empty() || *end_pos != '\0') {
5697           AddError(result->full_name(), proto,
5698                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5699                    "Couldn't parse default value \"" + proto.default_value() +
5700                        "\".");
5701         }
5702       }
5703     } else {
5704       // No explicit default value
5705       switch (result->cpp_type()) {
5706         case FieldDescriptor::CPPTYPE_INT32:
5707           result->default_value_int32_t_ = 0;
5708           break;
5709         case FieldDescriptor::CPPTYPE_INT64:
5710           result->default_value_int64_t_ = 0;
5711           break;
5712         case FieldDescriptor::CPPTYPE_UINT32:
5713           result->default_value_uint32_t_ = 0;
5714           break;
5715         case FieldDescriptor::CPPTYPE_UINT64:
5716           result->default_value_uint64_t_ = 0;
5717           break;
5718         case FieldDescriptor::CPPTYPE_FLOAT:
5719           result->default_value_float_ = 0.0f;
5720           break;
5721         case FieldDescriptor::CPPTYPE_DOUBLE:
5722           result->default_value_double_ = 0.0;
5723           break;
5724         case FieldDescriptor::CPPTYPE_BOOL:
5725           result->default_value_bool_ = false;
5726           break;
5727         case FieldDescriptor::CPPTYPE_ENUM:
5728           // This will be filled in when cross-linking.
5729           result->default_value_enum_ = nullptr;
5730           break;
5731         case FieldDescriptor::CPPTYPE_STRING:
5732           result->default_value_string_ = &internal::GetEmptyString();
5733           break;
5734         case FieldDescriptor::CPPTYPE_MESSAGE:
5735           result->default_generated_instance_ = nullptr;
5736           break;
5737       }
5738     }
5739   }
5740 
5741   if (result->number() <= 0) {
5742     message_hints_[parent].RequestHintOnFieldNumbers(
5743         proto, DescriptorPool::ErrorCollector::NUMBER);
5744     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5745              "Field numbers must be positive integers.");
5746   } else if (!is_extension && result->number() > FieldDescriptor::kMaxNumber) {
5747     // Only validate that the number is within the valid field range if it is
5748     // not an extension. Since extension numbers are validated with the
5749     // extendee's valid set of extension numbers, and those are in turn
5750     // validated against the max allowed number, the check is unnecessary for
5751     // extension fields.
5752     // This avoids cross-linking issues that arise when attempting to check if
5753     // the extendee is a message_set_wire_format message, which has a higher max
5754     // on extension numbers.
5755     message_hints_[parent].RequestHintOnFieldNumbers(
5756         proto, DescriptorPool::ErrorCollector::NUMBER);
5757     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5758              strings::Substitute("Field numbers cannot be greater than $0.",
5759                               FieldDescriptor::kMaxNumber));
5760   } else if (result->number() >= FieldDescriptor::kFirstReservedNumber &&
5761              result->number() <= FieldDescriptor::kLastReservedNumber) {
5762     message_hints_[parent].RequestHintOnFieldNumbers(
5763         proto, DescriptorPool::ErrorCollector::NUMBER);
5764     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5765              strings::Substitute(
5766                  "Field numbers $0 through $1 are reserved for the protocol "
5767                  "buffer library implementation.",
5768                  FieldDescriptor::kFirstReservedNumber,
5769                  FieldDescriptor::kLastReservedNumber));
5770   }
5771 
5772   if (is_extension) {
5773     if (!proto.has_extendee()) {
5774       AddError(result->full_name(), proto,
5775                DescriptorPool::ErrorCollector::EXTENDEE,
5776                "FieldDescriptorProto.extendee not set for extension field.");
5777     }
5778 
5779     result->scope_.extension_scope = parent;
5780 
5781     if (proto.has_oneof_index()) {
5782       AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
5783                "FieldDescriptorProto.oneof_index should not be set for "
5784                "extensions.");
5785     }
5786   } else {
5787     if (proto.has_extendee()) {
5788       AddError(result->full_name(), proto,
5789                DescriptorPool::ErrorCollector::EXTENDEE,
5790                "FieldDescriptorProto.extendee set for non-extension field.");
5791     }
5792 
5793     result->containing_type_ = parent;
5794 
5795     if (proto.has_oneof_index()) {
5796       if (proto.oneof_index() < 0 ||
5797           proto.oneof_index() >= parent->oneof_decl_count()) {
5798         AddError(result->full_name(), proto,
5799                  DescriptorPool::ErrorCollector::TYPE,
5800                  strings::Substitute("FieldDescriptorProto.oneof_index $0 is "
5801                                   "out of range for type \"$1\".",
5802                                   proto.oneof_index(), parent->name()));
5803       } else {
5804         result->is_oneof_ = true;
5805         result->scope_.containing_oneof =
5806             parent->oneof_decl(proto.oneof_index());
5807       }
5808     }
5809   }
5810 
5811   // Copy options.
5812   result->options_ = nullptr;  // Set to default_instance later if necessary.
5813   if (proto.has_options()) {
5814     AllocateOptions(proto.options(), result,
5815                     FieldDescriptorProto::kOptionsFieldNumber,
5816                     "google.protobuf.FieldOptions", alloc);
5817   }
5818 
5819   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5820 }
5821 
BuildExtensionRange(const DescriptorProto::ExtensionRange & proto,const Descriptor * parent,Descriptor::ExtensionRange * result,internal::FlatAllocator & alloc)5822 void DescriptorBuilder::BuildExtensionRange(
5823     const DescriptorProto::ExtensionRange& proto, const Descriptor* parent,
5824     Descriptor::ExtensionRange* result, internal::FlatAllocator& alloc) {
5825   result->start = proto.start();
5826   result->end = proto.end();
5827   if (result->start <= 0) {
5828     message_hints_[parent].RequestHintOnFieldNumbers(
5829         proto, DescriptorPool::ErrorCollector::NUMBER, result->start,
5830         result->end);
5831     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5832              "Extension numbers must be positive integers.");
5833   }
5834 
5835   // Checking of the upper bound of the extension range is deferred until after
5836   // options interpreting. This allows messages with message_set_wire_format to
5837   // have extensions beyond FieldDescriptor::kMaxNumber, since the extension
5838   // numbers are actually used as int32s in the message_set_wire_format.
5839 
5840   if (result->start >= result->end) {
5841     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5842              "Extension range end number must be greater than start number.");
5843   }
5844 
5845   result->options_ = nullptr;  // Set to default_instance later if necessary.
5846   if (proto.has_options()) {
5847     std::vector<int> options_path;
5848     parent->GetLocationPath(&options_path);
5849     options_path.push_back(DescriptorProto::kExtensionRangeFieldNumber);
5850     // find index of this extension range in order to compute path
5851     int index;
5852     for (index = 0; parent->extension_ranges_ + index != result; index++) {
5853     }
5854     options_path.push_back(index);
5855     options_path.push_back(DescriptorProto_ExtensionRange::kOptionsFieldNumber);
5856     AllocateOptionsImpl(parent->full_name(), parent->full_name(),
5857                         proto.options(), result, options_path,
5858                         "google.protobuf.ExtensionRangeOptions", alloc);
5859   }
5860 }
5861 
BuildReservedRange(const DescriptorProto::ReservedRange & proto,const Descriptor * parent,Descriptor::ReservedRange * result,internal::FlatAllocator &)5862 void DescriptorBuilder::BuildReservedRange(
5863     const DescriptorProto::ReservedRange& proto, const Descriptor* parent,
5864     Descriptor::ReservedRange* result, internal::FlatAllocator&) {
5865   result->start = proto.start();
5866   result->end = proto.end();
5867   if (result->start <= 0) {
5868     message_hints_[parent].RequestHintOnFieldNumbers(
5869         proto, DescriptorPool::ErrorCollector::NUMBER, result->start,
5870         result->end);
5871     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5872              "Reserved numbers must be positive integers.");
5873   }
5874 }
5875 
BuildReservedRange(const EnumDescriptorProto::EnumReservedRange & proto,const EnumDescriptor * parent,EnumDescriptor::ReservedRange * result,internal::FlatAllocator &)5876 void DescriptorBuilder::BuildReservedRange(
5877     const EnumDescriptorProto::EnumReservedRange& proto,
5878     const EnumDescriptor* parent, EnumDescriptor::ReservedRange* result,
5879     internal::FlatAllocator&) {
5880   result->start = proto.start();
5881   result->end = proto.end();
5882 
5883   if (result->start > result->end) {
5884     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5885              "Reserved range end number must be greater than start number.");
5886   }
5887 }
5888 
BuildOneof(const OneofDescriptorProto & proto,Descriptor * parent,OneofDescriptor * result,internal::FlatAllocator & alloc)5889 void DescriptorBuilder::BuildOneof(const OneofDescriptorProto& proto,
5890                                    Descriptor* parent, OneofDescriptor* result,
5891                                    internal::FlatAllocator& alloc) {
5892   result->all_names_ =
5893       AllocateNameStrings(parent->full_name(), proto.name(), alloc);
5894   ValidateSymbolName(proto.name(), result->full_name(), proto);
5895 
5896   result->containing_type_ = parent;
5897 
5898   // We need to fill these in later.
5899   result->field_count_ = 0;
5900   result->fields_ = nullptr;
5901   result->options_ = nullptr;
5902 
5903   // Copy options.
5904   if (proto.has_options()) {
5905     AllocateOptions(proto.options(), result,
5906                     OneofDescriptorProto::kOptionsFieldNumber,
5907                     "google.protobuf.OneofOptions", alloc);
5908   }
5909 
5910   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5911 }
5912 
CheckEnumValueUniqueness(const EnumDescriptorProto & proto,const EnumDescriptor * result)5913 void DescriptorBuilder::CheckEnumValueUniqueness(
5914     const EnumDescriptorProto& proto, const EnumDescriptor* result) {
5915 
5916   // Check that enum labels are still unique when we remove the enum prefix from
5917   // values that have it.
5918   //
5919   // This will fail for something like:
5920   //
5921   //   enum MyEnum {
5922   //     MY_ENUM_FOO = 0;
5923   //     FOO = 1;
5924   //   }
5925   //
5926   // By enforcing this reasonable constraint, we allow code generators to strip
5927   // the prefix and/or PascalCase it without creating conflicts.  This can lead
5928   // to much nicer language-specific enums like:
5929   //
5930   //   enum NameType {
5931   //     FirstName = 1,
5932   //     LastName = 2,
5933   //   }
5934   //
5935   // Instead of:
5936   //
5937   //   enum NameType {
5938   //     NAME_TYPE_FIRST_NAME = 1,
5939   //     NAME_TYPE_LAST_NAME = 2,
5940   //   }
5941   PrefixRemover remover(result->name());
5942   std::map<std::string, const EnumValueDescriptor*> values;
5943   for (int i = 0; i < result->value_count(); i++) {
5944     const EnumValueDescriptor* value = result->value(i);
5945     std::string stripped =
5946         EnumValueToPascalCase(remover.MaybeRemove(value->name()));
5947     std::pair<std::map<std::string, const EnumValueDescriptor*>::iterator, bool>
5948         insert_result = values.insert(std::make_pair(stripped, value));
5949     bool inserted = insert_result.second;
5950 
5951     // We don't throw the error if the two conflicting symbols are identical, or
5952     // if they map to the same number.  In the former case, the normal symbol
5953     // duplication error will fire so we don't need to (and its error message
5954     // will make more sense). We allow the latter case so users can create
5955     // aliases which add or remove the prefix (code generators that do prefix
5956     // stripping should de-dup the labels in this case).
5957     if (!inserted && insert_result.first->second->name() != value->name() &&
5958         insert_result.first->second->number() != value->number()) {
5959       std::string error_message =
5960           "Enum name " + value->name() + " has the same name as " +
5961           values[stripped]->name() +
5962           " if you ignore case and strip out the enum name prefix (if any). "
5963           "This is error-prone and can lead to undefined behavior. "
5964           "Please avoid doing this. If you are using allow_alias, please "
5965           "assign the same numeric value to both enums.";
5966       // There are proto2 enums out there with conflicting names, so to preserve
5967       // compatibility we issue only a warning for proto2.
5968       if (result->file()->syntax() == FileDescriptor::SYNTAX_PROTO2) {
5969         AddWarning(value->full_name(), proto.value(i),
5970                    DescriptorPool::ErrorCollector::NAME, error_message);
5971       } else {
5972         AddError(value->full_name(), proto.value(i),
5973                  DescriptorPool::ErrorCollector::NAME, error_message);
5974       }
5975     }
5976   }
5977 }
5978 
BuildEnum(const EnumDescriptorProto & proto,const Descriptor * parent,EnumDescriptor * result,internal::FlatAllocator & alloc)5979 void DescriptorBuilder::BuildEnum(const EnumDescriptorProto& proto,
5980                                   const Descriptor* parent,
5981                                   EnumDescriptor* result,
5982                                   internal::FlatAllocator& alloc) {
5983   const std::string& scope =
5984       (parent == nullptr) ? file_->package() : parent->full_name();
5985 
5986   result->all_names_ = AllocateNameStrings(scope, proto.name(), alloc);
5987   ValidateSymbolName(proto.name(), result->full_name(), proto);
5988   result->file_ = file_;
5989   result->containing_type_ = parent;
5990   result->is_placeholder_ = false;
5991   result->is_unqualified_placeholder_ = false;
5992 
5993   if (proto.value_size() == 0) {
5994     // We cannot allow enums with no values because this would mean there
5995     // would be no valid default value for fields of this type.
5996     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
5997              "Enums must contain at least one value.");
5998   }
5999 
6000   // Calculate the continuous sequence of the labels.
6001   // These can be fast-path'd during lookup and don't need to be added to the
6002   // tables.
6003   // We use uint16_t to save space for sequential_value_limit_, so stop before
6004   // overflowing it. Worst case, we are not taking full advantage on huge
6005   // enums, but it is unlikely.
6006   for (int i = 0;
6007        i < std::numeric_limits<uint16_t>::max() && i < proto.value_size() &&
6008        // We do the math in int64_t to avoid overflows.
6009        proto.value(i).number() ==
6010            static_cast<int64_t>(i) + proto.value(0).number();
6011        ++i) {
6012     result->sequential_value_limit_ = i;
6013   }
6014 
6015   BUILD_ARRAY(proto, result, value, BuildEnumValue, result);
6016   BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
6017 
6018   // Copy reserved names.
6019   int reserved_name_count = proto.reserved_name_size();
6020   result->reserved_name_count_ = reserved_name_count;
6021   result->reserved_names_ =
6022       alloc.AllocateArray<const std::string*>(reserved_name_count);
6023   for (int i = 0; i < reserved_name_count; ++i) {
6024     result->reserved_names_[i] =
6025         alloc.AllocateStrings(proto.reserved_name(i));
6026   }
6027 
6028   CheckEnumValueUniqueness(proto, result);
6029 
6030   // Copy options.
6031   result->options_ = nullptr;  // Set to default_instance later if necessary.
6032   if (proto.has_options()) {
6033     AllocateOptions(proto.options(), result,
6034                     EnumDescriptorProto::kOptionsFieldNumber,
6035                     "google.protobuf.EnumOptions", alloc);
6036   }
6037 
6038   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6039 
6040   for (int i = 0; i < proto.reserved_range_size(); i++) {
6041     const EnumDescriptorProto_EnumReservedRange& range1 =
6042         proto.reserved_range(i);
6043     for (int j = i + 1; j < proto.reserved_range_size(); j++) {
6044       const EnumDescriptorProto_EnumReservedRange& range2 =
6045           proto.reserved_range(j);
6046       if (range1.end() >= range2.start() && range2.end() >= range1.start()) {
6047         AddError(result->full_name(), proto.reserved_range(i),
6048                  DescriptorPool::ErrorCollector::NUMBER,
6049                  strings::Substitute("Reserved range $0 to $1 overlaps with "
6050                                   "already-defined range $2 to $3.",
6051                                   range2.start(), range2.end(), range1.start(),
6052                                   range1.end()));
6053       }
6054     }
6055   }
6056 
6057   HASH_SET<std::string> reserved_name_set;
6058   for (int i = 0; i < proto.reserved_name_size(); i++) {
6059     const std::string& name = proto.reserved_name(i);
6060     if (reserved_name_set.find(name) == reserved_name_set.end()) {
6061       reserved_name_set.insert(name);
6062     } else {
6063       AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
6064                strings::Substitute("Enum value \"$0\" is reserved multiple times.",
6065                                 name));
6066     }
6067   }
6068 
6069   for (int i = 0; i < result->value_count(); i++) {
6070     const EnumValueDescriptor* value = result->value(i);
6071     for (int j = 0; j < result->reserved_range_count(); j++) {
6072       const EnumDescriptor::ReservedRange* range = result->reserved_range(j);
6073       if (range->start <= value->number() && value->number() <= range->end) {
6074         AddError(value->full_name(), proto.reserved_range(j),
6075                  DescriptorPool::ErrorCollector::NUMBER,
6076                  strings::Substitute("Enum value \"$0\" uses reserved number $1.",
6077                                   value->name(), value->number()));
6078       }
6079     }
6080     if (reserved_name_set.find(value->name()) != reserved_name_set.end()) {
6081       AddError(
6082           value->full_name(), proto.value(i),
6083           DescriptorPool::ErrorCollector::NAME,
6084           strings::Substitute("Enum value \"$0\" is reserved.", value->name()));
6085     }
6086   }
6087 }
6088 
BuildEnumValue(const EnumValueDescriptorProto & proto,const EnumDescriptor * parent,EnumValueDescriptor * result,internal::FlatAllocator & alloc)6089 void DescriptorBuilder::BuildEnumValue(const EnumValueDescriptorProto& proto,
6090                                        const EnumDescriptor* parent,
6091                                        EnumValueDescriptor* result,
6092                                        internal::FlatAllocator& alloc) {
6093   // Note:  full_name for enum values is a sibling to the parent's name, not a
6094   //   child of it.
6095   std::string full_name;
6096   size_t scope_len = parent->full_name().size() - parent->name().size();
6097   full_name.reserve(scope_len + proto.name().size());
6098   full_name.append(parent->full_name().data(), scope_len);
6099   full_name.append(proto.name());
6100 
6101   result->all_names_ =
6102       alloc.AllocateStrings(proto.name(), std::move(full_name));
6103   result->number_ = proto.number();
6104   result->type_ = parent;
6105 
6106   ValidateSymbolName(proto.name(), result->full_name(), proto);
6107 
6108   // Copy options.
6109   result->options_ = nullptr;  // Set to default_instance later if necessary.
6110   if (proto.has_options()) {
6111     AllocateOptions(proto.options(), result,
6112                     EnumValueDescriptorProto::kOptionsFieldNumber,
6113                     "google.protobuf.EnumValueOptions", alloc);
6114   }
6115 
6116   // Again, enum values are weird because we makes them appear as siblings
6117   // of the enum type instead of children of it.  So, we use
6118   // parent->containing_type() as the value's parent.
6119   bool added_to_outer_scope =
6120       AddSymbol(result->full_name(), parent->containing_type(), result->name(),
6121                 proto, Symbol::EnumValue(result, 0));
6122 
6123   // However, we also want to be able to search for values within a single
6124   // enum type, so we add it as a child of the enum type itself, too.
6125   // Note:  This could fail, but if it does, the error has already been
6126   //   reported by the above AddSymbol() call, so we ignore the return code.
6127   bool added_to_inner_scope = file_tables_->AddAliasUnderParent(
6128       parent, result->name(), Symbol::EnumValue(result, 1));
6129 
6130   if (added_to_inner_scope && !added_to_outer_scope) {
6131     // This value did not conflict with any values defined in the same enum,
6132     // but it did conflict with some other symbol defined in the enum type's
6133     // scope.  Let's print an additional error to explain this.
6134     std::string outer_scope;
6135     if (parent->containing_type() == nullptr) {
6136       outer_scope = file_->package();
6137     } else {
6138       outer_scope = parent->containing_type()->full_name();
6139     }
6140 
6141     if (outer_scope.empty()) {
6142       outer_scope = "the global scope";
6143     } else {
6144       outer_scope = "\"" + outer_scope + "\"";
6145     }
6146 
6147     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6148              "Note that enum values use C++ scoping rules, meaning that "
6149              "enum values are siblings of their type, not children of it.  "
6150              "Therefore, \"" +
6151                  result->name() + "\" must be unique within " + outer_scope +
6152                  ", not just within \"" + parent->name() + "\".");
6153   }
6154 
6155   // An enum is allowed to define two numbers that refer to the same value.
6156   // FindValueByNumber() should return the first such value, so we simply
6157   // ignore AddEnumValueByNumber()'s return code.
6158   file_tables_->AddEnumValueByNumber(result);
6159 }
6160 
BuildService(const ServiceDescriptorProto & proto,const void *,ServiceDescriptor * result,internal::FlatAllocator & alloc)6161 void DescriptorBuilder::BuildService(const ServiceDescriptorProto& proto,
6162                                      const void* /* dummy */,
6163                                      ServiceDescriptor* result,
6164                                      internal::FlatAllocator& alloc) {
6165   result->all_names_ =
6166       AllocateNameStrings(file_->package(), proto.name(), alloc);
6167   result->file_ = file_;
6168   ValidateSymbolName(proto.name(), result->full_name(), proto);
6169 
6170   BUILD_ARRAY(proto, result, method, BuildMethod, result);
6171 
6172   // Copy options.
6173   result->options_ = nullptr;  // Set to default_instance later if necessary.
6174   if (proto.has_options()) {
6175     AllocateOptions(proto.options(), result,
6176                     ServiceDescriptorProto::kOptionsFieldNumber,
6177                     "google.protobuf.ServiceOptions", alloc);
6178   }
6179 
6180   AddSymbol(result->full_name(), nullptr, result->name(), proto,
6181             Symbol(result));
6182 }
6183 
BuildMethod(const MethodDescriptorProto & proto,const ServiceDescriptor * parent,MethodDescriptor * result,internal::FlatAllocator & alloc)6184 void DescriptorBuilder::BuildMethod(const MethodDescriptorProto& proto,
6185                                     const ServiceDescriptor* parent,
6186                                     MethodDescriptor* result,
6187                                     internal::FlatAllocator& alloc) {
6188   result->service_ = parent;
6189   result->all_names_ =
6190       AllocateNameStrings(parent->full_name(), proto.name(), alloc);
6191 
6192   ValidateSymbolName(proto.name(), result->full_name(), proto);
6193 
6194   // These will be filled in when cross-linking.
6195   result->input_type_.Init();
6196   result->output_type_.Init();
6197 
6198   // Copy options.
6199   result->options_ = nullptr;  // Set to default_instance later if necessary.
6200   if (proto.has_options()) {
6201     AllocateOptions(proto.options(), result,
6202                     MethodDescriptorProto::kOptionsFieldNumber,
6203                     "google.protobuf.MethodOptions", alloc);
6204   }
6205 
6206   result->client_streaming_ = proto.client_streaming();
6207   result->server_streaming_ = proto.server_streaming();
6208 
6209   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
6210 }
6211 
6212 #undef BUILD_ARRAY
6213 
6214 // -------------------------------------------------------------------
6215 
CrossLinkFile(FileDescriptor * file,const FileDescriptorProto & proto)6216 void DescriptorBuilder::CrossLinkFile(FileDescriptor* file,
6217                                       const FileDescriptorProto& proto) {
6218   if (file->options_ == nullptr) {
6219     file->options_ = &FileOptions::default_instance();
6220   }
6221 
6222   for (int i = 0; i < file->message_type_count(); i++) {
6223     CrossLinkMessage(&file->message_types_[i], proto.message_type(i));
6224   }
6225 
6226   for (int i = 0; i < file->extension_count(); i++) {
6227     CrossLinkField(&file->extensions_[i], proto.extension(i));
6228   }
6229 
6230   for (int i = 0; i < file->enum_type_count(); i++) {
6231     CrossLinkEnum(&file->enum_types_[i], proto.enum_type(i));
6232   }
6233 
6234   for (int i = 0; i < file->service_count(); i++) {
6235     CrossLinkService(&file->services_[i], proto.service(i));
6236   }
6237 }
6238 
CrossLinkMessage(Descriptor * message,const DescriptorProto & proto)6239 void DescriptorBuilder::CrossLinkMessage(Descriptor* message,
6240                                          const DescriptorProto& proto) {
6241   if (message->options_ == nullptr) {
6242     message->options_ = &MessageOptions::default_instance();
6243   }
6244 
6245   for (int i = 0; i < message->nested_type_count(); i++) {
6246     CrossLinkMessage(&message->nested_types_[i], proto.nested_type(i));
6247   }
6248 
6249   for (int i = 0; i < message->enum_type_count(); i++) {
6250     CrossLinkEnum(&message->enum_types_[i], proto.enum_type(i));
6251   }
6252 
6253   for (int i = 0; i < message->field_count(); i++) {
6254     CrossLinkField(&message->fields_[i], proto.field(i));
6255   }
6256 
6257   for (int i = 0; i < message->extension_count(); i++) {
6258     CrossLinkField(&message->extensions_[i], proto.extension(i));
6259   }
6260 
6261   for (int i = 0; i < message->extension_range_count(); i++) {
6262     CrossLinkExtensionRange(&message->extension_ranges_[i],
6263                             proto.extension_range(i));
6264   }
6265 
6266   // Set up field array for each oneof.
6267 
6268   // First count the number of fields per oneof.
6269   for (int i = 0; i < message->field_count(); i++) {
6270     const OneofDescriptor* oneof_decl = message->field(i)->containing_oneof();
6271     if (oneof_decl != nullptr) {
6272       // Make sure fields belonging to the same oneof are defined consecutively.
6273       // This enables optimizations in codegens and reflection libraries to
6274       // skip fields in the oneof group, as only one of the field can be set.
6275       // Note that field_count() returns how many fields in this oneof we have
6276       // seen so far. field_count() > 0 guarantees that i > 0, so field(i-1) is
6277       // safe.
6278       if (oneof_decl->field_count() > 0 &&
6279           message->field(i - 1)->containing_oneof() != oneof_decl) {
6280         AddError(message->full_name() + "." + message->field(i - 1)->name(),
6281                  proto.field(i - 1), DescriptorPool::ErrorCollector::TYPE,
6282                  strings::Substitute(
6283                      "Fields in the same oneof must be defined consecutively. "
6284                      "\"$0\" cannot be defined before the completion of the "
6285                      "\"$1\" oneof definition.",
6286                      message->field(i - 1)->name(), oneof_decl->name()));
6287       }
6288       // Must go through oneof_decls_ array to get a non-const version of the
6289       // OneofDescriptor.
6290       auto& out_oneof_decl = message->oneof_decls_[oneof_decl->index()];
6291       if (out_oneof_decl.field_count_ == 0) {
6292         out_oneof_decl.fields_ = message->field(i);
6293       }
6294 
6295       if (!had_errors_) {
6296         // Verify that they are contiguous.
6297         // This is assumed by OneofDescriptor::field(i).
6298         // But only if there are no errors.
6299         GOOGLE_CHECK_EQ(out_oneof_decl.fields_ + out_oneof_decl.field_count_,
6300                  message->field(i));
6301       }
6302       ++out_oneof_decl.field_count_;
6303     }
6304   }
6305 
6306   // Then verify the sizes.
6307   for (int i = 0; i < message->oneof_decl_count(); i++) {
6308     OneofDescriptor* oneof_decl = &message->oneof_decls_[i];
6309 
6310     if (oneof_decl->field_count() == 0) {
6311       AddError(message->full_name() + "." + oneof_decl->name(),
6312                proto.oneof_decl(i), DescriptorPool::ErrorCollector::NAME,
6313                "Oneof must have at least one field.");
6314     }
6315 
6316     if (oneof_decl->options_ == nullptr) {
6317       oneof_decl->options_ = &OneofOptions::default_instance();
6318     }
6319   }
6320 
6321   for (int i = 0; i < message->field_count(); i++) {
6322     const FieldDescriptor* field = message->field(i);
6323     if (field->proto3_optional_) {
6324       if (!field->containing_oneof() ||
6325           !field->containing_oneof()->is_synthetic()) {
6326         AddError(message->full_name(), proto.field(i),
6327                  DescriptorPool::ErrorCollector::OTHER,
6328                  "Fields with proto3_optional set must be "
6329                  "a member of a one-field oneof");
6330       }
6331     }
6332   }
6333 
6334   // Synthetic oneofs must be last.
6335   int first_synthetic = -1;
6336   for (int i = 0; i < message->oneof_decl_count(); i++) {
6337     const OneofDescriptor* oneof = message->oneof_decl(i);
6338     if (oneof->is_synthetic()) {
6339       if (first_synthetic == -1) {
6340         first_synthetic = i;
6341       }
6342     } else {
6343       if (first_synthetic != -1) {
6344         AddError(message->full_name(), proto.oneof_decl(i),
6345                  DescriptorPool::ErrorCollector::OTHER,
6346                  "Synthetic oneofs must be after all other oneofs");
6347       }
6348     }
6349   }
6350 
6351   if (first_synthetic == -1) {
6352     message->real_oneof_decl_count_ = message->oneof_decl_count_;
6353   } else {
6354     message->real_oneof_decl_count_ = first_synthetic;
6355   }
6356 }
6357 
CrossLinkExtensionRange(Descriptor::ExtensionRange * range,const DescriptorProto::ExtensionRange &)6358 void DescriptorBuilder::CrossLinkExtensionRange(
6359     Descriptor::ExtensionRange* range,
6360     const DescriptorProto::ExtensionRange& /*proto*/) {
6361   if (range->options_ == nullptr) {
6362     range->options_ = &ExtensionRangeOptions::default_instance();
6363   }
6364 }
6365 
CrossLinkField(FieldDescriptor * field,const FieldDescriptorProto & proto)6366 void DescriptorBuilder::CrossLinkField(FieldDescriptor* field,
6367                                        const FieldDescriptorProto& proto) {
6368   if (field->options_ == nullptr) {
6369     field->options_ = &FieldOptions::default_instance();
6370   }
6371 
6372   if (proto.has_extendee()) {
6373     Symbol extendee =
6374         LookupSymbol(proto.extendee(), field->full_name(),
6375                      DescriptorPool::PLACEHOLDER_EXTENDABLE_MESSAGE);
6376     if (extendee.IsNull()) {
6377       AddNotDefinedError(field->full_name(), proto,
6378                          DescriptorPool::ErrorCollector::EXTENDEE,
6379                          proto.extendee());
6380       return;
6381     } else if (extendee.type() != Symbol::MESSAGE) {
6382       AddError(field->full_name(), proto,
6383                DescriptorPool::ErrorCollector::EXTENDEE,
6384                "\"" + proto.extendee() + "\" is not a message type.");
6385       return;
6386     }
6387     field->containing_type_ = extendee.descriptor();
6388 
6389     const Descriptor::ExtensionRange* extension_range =
6390         field->containing_type()->FindExtensionRangeContainingNumber(
6391             field->number());
6392 
6393     if (extension_range == nullptr) {
6394       // Set of valid extension numbers for MessageSet is different (< 2^32)
6395       // from other extendees (< 2^29). If unknown deps are allowed, we may not
6396       // have that information, and wrongly deem the extension as invalid.
6397       auto skip_check = get_allow_unknown(pool_) &&
6398                         proto.extendee() == "google.protobuf.bridge.MessageSet";
6399       if (!skip_check) {
6400         AddError(field->full_name(), proto,
6401                  DescriptorPool::ErrorCollector::NUMBER,
6402                  strings::Substitute("\"$0\" does not declare $1 as an "
6403                                   "extension number.",
6404                                   field->containing_type()->full_name(),
6405                                   field->number()));
6406       }
6407     }
6408   }
6409 
6410   if (field->containing_oneof() != nullptr) {
6411     if (field->label() != FieldDescriptor::LABEL_OPTIONAL) {
6412       // Note that this error will never happen when parsing .proto files.
6413       // It can only happen if you manually construct a FileDescriptorProto
6414       // that is incorrect.
6415       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6416                "Fields of oneofs must themselves have label LABEL_OPTIONAL.");
6417     }
6418   }
6419 
6420   if (proto.has_type_name()) {
6421     // Assume we are expecting a message type unless the proto contains some
6422     // evidence that it expects an enum type.  This only makes a difference if
6423     // we end up creating a placeholder.
6424     bool expecting_enum = (proto.type() == FieldDescriptorProto::TYPE_ENUM) ||
6425                           proto.has_default_value();
6426 
6427     // In case of weak fields we force building the dependency. We need to know
6428     // if the type exist or not. If it doesn't exist we substitute Empty which
6429     // should only be done if the type can't be found in the generated pool.
6430     // TODO(gerbens) Ideally we should query the database directly to check
6431     // if weak fields exist or not so that we don't need to force building
6432     // weak dependencies. However the name lookup rules for symbols are
6433     // somewhat complicated, so I defer it too another CL.
6434     bool is_weak = !pool_->enforce_weak_ && proto.options().weak();
6435     bool is_lazy = pool_->lazily_build_dependencies_ && !is_weak;
6436 
6437     Symbol type =
6438         LookupSymbol(proto.type_name(), field->full_name(),
6439                      expecting_enum ? DescriptorPool::PLACEHOLDER_ENUM
6440                                     : DescriptorPool::PLACEHOLDER_MESSAGE,
6441                      LOOKUP_TYPES, !is_lazy);
6442 
6443     if (type.IsNull()) {
6444       if (is_lazy) {
6445         // Save the symbol names for later for lookup, and allocate the once
6446         // object needed for the accessors.
6447         const std::string& name = proto.type_name();
6448 
6449         int name_sizes = static_cast<int>(name.size() + 1 +
6450                                           proto.default_value().size() + 1);
6451 
6452         field->type_once_ = ::new (tables_->AllocateBytes(static_cast<int>(
6453             sizeof(internal::once_flag) + name_sizes))) internal::once_flag{};
6454         char* names = reinterpret_cast<char*>(field->type_once_ + 1);
6455 
6456         memcpy(names, name.c_str(), name.size() + 1);
6457         memcpy(names + name.size() + 1, proto.default_value().c_str(),
6458                proto.default_value().size() + 1);
6459 
6460         // AddFieldByNumber and AddExtension are done later in this function,
6461         // and can/must be done if the field type was not found. The related
6462         // error checking is not necessary when in lazily_build_dependencies_
6463         // mode, and can't be done without building the type's descriptor,
6464         // which we don't want to do.
6465         file_tables_->AddFieldByNumber(field);
6466         if (field->is_extension()) {
6467           tables_->AddExtension(field);
6468         }
6469         return;
6470       } else {
6471         // If the type is a weak type, we change the type to a google.protobuf.Empty
6472         // field.
6473         if (is_weak) {
6474           type = FindSymbol(kNonLinkedWeakMessageReplacementName);
6475         }
6476         if (type.IsNull()) {
6477           AddNotDefinedError(field->full_name(), proto,
6478                              DescriptorPool::ErrorCollector::TYPE,
6479                              proto.type_name());
6480           return;
6481         }
6482       }
6483     }
6484 
6485     if (!proto.has_type()) {
6486       // Choose field type based on symbol.
6487       if (type.type() == Symbol::MESSAGE) {
6488         field->type_ = FieldDescriptor::TYPE_MESSAGE;
6489       } else if (type.type() == Symbol::ENUM) {
6490         field->type_ = FieldDescriptor::TYPE_ENUM;
6491       } else {
6492         AddError(field->full_name(), proto,
6493                  DescriptorPool::ErrorCollector::TYPE,
6494                  "\"" + proto.type_name() + "\" is not a type.");
6495         return;
6496       }
6497     }
6498 
6499     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
6500       field->type_descriptor_.message_type = type.descriptor();
6501       if (field->type_descriptor_.message_type == nullptr) {
6502         AddError(field->full_name(), proto,
6503                  DescriptorPool::ErrorCollector::TYPE,
6504                  "\"" + proto.type_name() + "\" is not a message type.");
6505         return;
6506       }
6507 
6508       if (field->has_default_value()) {
6509         AddError(field->full_name(), proto,
6510                  DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6511                  "Messages can't have default values.");
6512       }
6513     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
6514       field->type_descriptor_.enum_type = type.enum_descriptor();
6515       if (field->type_descriptor_.enum_type == nullptr) {
6516         AddError(field->full_name(), proto,
6517                  DescriptorPool::ErrorCollector::TYPE,
6518                  "\"" + proto.type_name() + "\" is not an enum type.");
6519         return;
6520       }
6521 
6522       if (field->enum_type()->is_placeholder_) {
6523         // We can't look up default values for placeholder types.  We'll have
6524         // to just drop them.
6525         field->has_default_value_ = false;
6526       }
6527 
6528       if (field->has_default_value()) {
6529         // Ensure that the default value is an identifier. Parser cannot always
6530         // verify this because it does not have complete type information.
6531         // N.B. that this check yields better error messages but is not
6532         // necessary for correctness (an enum symbol must be a valid identifier
6533         // anyway), only for better errors.
6534         if (!io::Tokenizer::IsIdentifier(proto.default_value())) {
6535           AddError(field->full_name(), proto,
6536                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6537                    "Default value for an enum field must be an identifier.");
6538         } else {
6539           // We can't just use field->enum_type()->FindValueByName() here
6540           // because that locks the pool's mutex, which we have already locked
6541           // at this point.
6542           const EnumValueDescriptor* default_value =
6543               LookupSymbolNoPlaceholder(proto.default_value(),
6544                                         field->enum_type()->full_name())
6545                   .enum_value_descriptor();
6546 
6547           if (default_value != nullptr &&
6548               default_value->type() == field->enum_type()) {
6549             field->default_value_enum_ = default_value;
6550           } else {
6551             AddError(field->full_name(), proto,
6552                      DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6553                      "Enum type \"" + field->enum_type()->full_name() +
6554                          "\" has no value named \"" + proto.default_value() +
6555                          "\".");
6556           }
6557         }
6558       } else if (field->enum_type()->value_count() > 0) {
6559         // All enums must have at least one value, or we would have reported
6560         // an error elsewhere.  We use the first defined value as the default
6561         // if a default is not explicitly defined.
6562         field->default_value_enum_ = field->enum_type()->value(0);
6563       }
6564     } else {
6565       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6566                "Field with primitive type has type_name.");
6567     }
6568   } else {
6569     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
6570         field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
6571       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6572                "Field with message or enum type missing type_name.");
6573     }
6574   }
6575 
6576   // Add the field to the fields-by-number table.
6577   // Note:  We have to do this *after* cross-linking because extensions do not
6578   // know their containing type until now. If we're in
6579   // lazily_build_dependencies_ mode, we're guaranteed there's no errors, so no
6580   // risk to calling containing_type() or other accessors that will build
6581   // dependencies.
6582   if (!file_tables_->AddFieldByNumber(field)) {
6583     const FieldDescriptor* conflicting_field = file_tables_->FindFieldByNumber(
6584         field->containing_type(), field->number());
6585     std::string containing_type_name =
6586         field->containing_type() == nullptr
6587             ? "unknown"
6588             : field->containing_type()->full_name();
6589     if (field->is_extension()) {
6590       AddError(field->full_name(), proto,
6591                DescriptorPool::ErrorCollector::NUMBER,
6592                strings::Substitute("Extension number $0 has already been used "
6593                                 "in \"$1\" by extension \"$2\".",
6594                                 field->number(), containing_type_name,
6595                                 conflicting_field->full_name()));
6596     } else {
6597       AddError(field->full_name(), proto,
6598                DescriptorPool::ErrorCollector::NUMBER,
6599                strings::Substitute("Field number $0 has already been used in "
6600                                 "\"$1\" by field \"$2\".",
6601                                 field->number(), containing_type_name,
6602                                 conflicting_field->name()));
6603     }
6604   } else {
6605     if (field->is_extension()) {
6606       if (!tables_->AddExtension(field)) {
6607         const FieldDescriptor* conflicting_field =
6608             tables_->FindExtension(field->containing_type(), field->number());
6609         std::string containing_type_name =
6610             field->containing_type() == nullptr
6611                 ? "unknown"
6612                 : field->containing_type()->full_name();
6613         std::string error_msg = strings::Substitute(
6614             "Extension number $0 has already been used in \"$1\" by extension "
6615             "\"$2\" defined in $3.",
6616             field->number(), containing_type_name,
6617             conflicting_field->full_name(), conflicting_field->file()->name());
6618         // Conflicting extension numbers should be an error. However, before
6619         // turning this into an error we need to fix all existing broken
6620         // protos first.
6621         // TODO(xiaofeng): Change this to an error.
6622         AddWarning(field->full_name(), proto,
6623                    DescriptorPool::ErrorCollector::NUMBER, error_msg);
6624       }
6625     }
6626   }
6627 }
6628 
CrossLinkEnum(EnumDescriptor * enum_type,const EnumDescriptorProto & proto)6629 void DescriptorBuilder::CrossLinkEnum(EnumDescriptor* enum_type,
6630                                       const EnumDescriptorProto& proto) {
6631   if (enum_type->options_ == nullptr) {
6632     enum_type->options_ = &EnumOptions::default_instance();
6633   }
6634 
6635   for (int i = 0; i < enum_type->value_count(); i++) {
6636     CrossLinkEnumValue(&enum_type->values_[i], proto.value(i));
6637   }
6638 }
6639 
CrossLinkEnumValue(EnumValueDescriptor * enum_value,const EnumValueDescriptorProto &)6640 void DescriptorBuilder::CrossLinkEnumValue(
6641     EnumValueDescriptor* enum_value,
6642     const EnumValueDescriptorProto& /* proto */) {
6643   if (enum_value->options_ == nullptr) {
6644     enum_value->options_ = &EnumValueOptions::default_instance();
6645   }
6646 }
6647 
CrossLinkService(ServiceDescriptor * service,const ServiceDescriptorProto & proto)6648 void DescriptorBuilder::CrossLinkService(ServiceDescriptor* service,
6649                                          const ServiceDescriptorProto& proto) {
6650   if (service->options_ == nullptr) {
6651     service->options_ = &ServiceOptions::default_instance();
6652   }
6653 
6654   for (int i = 0; i < service->method_count(); i++) {
6655     CrossLinkMethod(&service->methods_[i], proto.method(i));
6656   }
6657 }
6658 
CrossLinkMethod(MethodDescriptor * method,const MethodDescriptorProto & proto)6659 void DescriptorBuilder::CrossLinkMethod(MethodDescriptor* method,
6660                                         const MethodDescriptorProto& proto) {
6661   if (method->options_ == nullptr) {
6662     method->options_ = &MethodOptions::default_instance();
6663   }
6664 
6665   Symbol input_type =
6666       LookupSymbol(proto.input_type(), method->full_name(),
6667                    DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
6668                    !pool_->lazily_build_dependencies_);
6669   if (input_type.IsNull()) {
6670     if (!pool_->lazily_build_dependencies_) {
6671       AddNotDefinedError(method->full_name(), proto,
6672                          DescriptorPool::ErrorCollector::INPUT_TYPE,
6673                          proto.input_type());
6674     } else {
6675       method->input_type_.SetLazy(proto.input_type(), file_);
6676     }
6677   } else if (input_type.type() != Symbol::MESSAGE) {
6678     AddError(method->full_name(), proto,
6679              DescriptorPool::ErrorCollector::INPUT_TYPE,
6680              "\"" + proto.input_type() + "\" is not a message type.");
6681   } else {
6682     method->input_type_.Set(input_type.descriptor());
6683   }
6684 
6685   Symbol output_type =
6686       LookupSymbol(proto.output_type(), method->full_name(),
6687                    DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
6688                    !pool_->lazily_build_dependencies_);
6689   if (output_type.IsNull()) {
6690     if (!pool_->lazily_build_dependencies_) {
6691       AddNotDefinedError(method->full_name(), proto,
6692                          DescriptorPool::ErrorCollector::OUTPUT_TYPE,
6693                          proto.output_type());
6694     } else {
6695       method->output_type_.SetLazy(proto.output_type(), file_);
6696     }
6697   } else if (output_type.type() != Symbol::MESSAGE) {
6698     AddError(method->full_name(), proto,
6699              DescriptorPool::ErrorCollector::OUTPUT_TYPE,
6700              "\"" + proto.output_type() + "\" is not a message type.");
6701   } else {
6702     method->output_type_.Set(output_type.descriptor());
6703   }
6704 }
6705 
SuggestFieldNumbers(FileDescriptor * file,const FileDescriptorProto & proto)6706 void DescriptorBuilder::SuggestFieldNumbers(FileDescriptor* file,
6707                                             const FileDescriptorProto& proto) {
6708   for (int message_index = 0; message_index < file->message_type_count();
6709        message_index++) {
6710     const Descriptor* message = &file->message_types_[message_index];
6711     auto* hints = FindOrNull(message_hints_, message);
6712     if (!hints) continue;
6713     constexpr int kMaxSuggestions = 3;
6714     int fields_to_suggest = std::min(kMaxSuggestions, hints->fields_to_suggest);
6715     if (fields_to_suggest <= 0) continue;
6716     struct Range {
6717       int from;
6718       int to;
6719     };
6720     std::vector<Range> used_ordinals;
6721     auto add_ordinal = [&](int ordinal) {
6722       if (ordinal <= 0 || ordinal > FieldDescriptor::kMaxNumber) return;
6723       if (!used_ordinals.empty() &&
6724           ordinal == used_ordinals.back().to) {
6725         used_ordinals.back().to = ordinal + 1;
6726       } else {
6727         used_ordinals.push_back({ordinal, ordinal + 1});
6728       }
6729     };
6730     auto add_range = [&](int from, int to) {
6731       from = std::max(0, std::min(FieldDescriptor::kMaxNumber + 1, from));
6732       to = std::max(0, std::min(FieldDescriptor::kMaxNumber + 1, to));
6733       if (from >= to) return;
6734       used_ordinals.push_back({from, to});
6735     };
6736     for (int i = 0; i < message->field_count(); i++) {
6737       add_ordinal(message->field(i)->number());
6738     }
6739     for (int i = 0; i < message->extension_count(); i++) {
6740       add_ordinal(message->extension(i)->number());
6741     }
6742     for (int i = 0; i < message->reserved_range_count(); i++) {
6743       auto range = message->reserved_range(i);
6744       add_range(range->start, range->end);
6745     }
6746     for (int i = 0; i < message->extension_range_count(); i++) {
6747       auto range = message->extension_range(i);
6748       add_range(range->start, range->end);
6749     }
6750     used_ordinals.push_back(
6751         {FieldDescriptor::kMaxNumber, FieldDescriptor::kMaxNumber + 1});
6752     used_ordinals.push_back({FieldDescriptor::kFirstReservedNumber,
6753                              FieldDescriptor::kLastReservedNumber});
6754     std::sort(used_ordinals.begin(), used_ordinals.end(),
6755               [](Range lhs, Range rhs) {
6756                 return std::tie(lhs.from, lhs.to) < std::tie(rhs.from, rhs.to);
6757               });
6758     int current_ordinal = 1;
6759     std::stringstream id_list;
6760     id_list << "Suggested field numbers for " << message->full_name() << ": ";
6761     const char* separator = "";
6762     for (auto& current_range : used_ordinals) {
6763       while (current_ordinal < current_range.from && fields_to_suggest > 0) {
6764         id_list << separator << current_ordinal++;
6765         separator = ", ";
6766         fields_to_suggest--;
6767       }
6768       if (fields_to_suggest == 0) break;
6769       current_ordinal = std::max(current_ordinal, current_range.to);
6770     }
6771     if (hints->first_reason) {
6772       AddError(message->full_name(), *hints->first_reason,
6773                hints->first_reason_location, id_list.str());
6774     }
6775   }
6776 }
6777 
6778 // -------------------------------------------------------------------
6779 
6780 #define VALIDATE_OPTIONS_FROM_ARRAY(descriptor, array_name, type) \
6781   for (int i = 0; i < descriptor->array_name##_count(); ++i) {    \
6782     Validate##type##Options(descriptor->array_name##s_ + i,       \
6783                             proto.array_name(i));                 \
6784   }
6785 
6786 // Determine if the file uses optimize_for = LITE_RUNTIME, being careful to
6787 // avoid problems that exist at init time.
IsLite(const FileDescriptor * file)6788 static bool IsLite(const FileDescriptor* file) {
6789   // TODO(kenton):  I don't even remember how many of these conditions are
6790   //   actually possible.  I'm just being super-safe.
6791   return file != nullptr &&
6792          &file->options() != &FileOptions::default_instance() &&
6793          file->options().optimize_for() == FileOptions::LITE_RUNTIME;
6794 }
6795 
ValidateFileOptions(FileDescriptor * file,const FileDescriptorProto & proto)6796 void DescriptorBuilder::ValidateFileOptions(FileDescriptor* file,
6797                                             const FileDescriptorProto& proto) {
6798   VALIDATE_OPTIONS_FROM_ARRAY(file, message_type, Message);
6799   VALIDATE_OPTIONS_FROM_ARRAY(file, enum_type, Enum);
6800   VALIDATE_OPTIONS_FROM_ARRAY(file, service, Service);
6801   VALIDATE_OPTIONS_FROM_ARRAY(file, extension, Field);
6802 
6803   // Lite files can only be imported by other Lite files.
6804   if (!IsLite(file)) {
6805     for (int i = 0; i < file->dependency_count(); i++) {
6806       if (IsLite(file->dependency(i))) {
6807         AddError(
6808             file->dependency(i)->name(), proto,
6809             DescriptorPool::ErrorCollector::IMPORT,
6810             "Files that do not use optimize_for = LITE_RUNTIME cannot import "
6811             "files which do use this option.  This file is not lite, but it "
6812             "imports \"" +
6813                 file->dependency(i)->name() + "\" which is.");
6814         break;
6815       }
6816     }
6817   }
6818   if (file->syntax() == FileDescriptor::SYNTAX_PROTO3) {
6819     ValidateProto3(file, proto);
6820   }
6821 }
6822 
ValidateProto3(FileDescriptor * file,const FileDescriptorProto & proto)6823 void DescriptorBuilder::ValidateProto3(FileDescriptor* file,
6824                                        const FileDescriptorProto& proto) {
6825   for (int i = 0; i < file->extension_count(); ++i) {
6826     ValidateProto3Field(file->extensions_ + i, proto.extension(i));
6827   }
6828   for (int i = 0; i < file->message_type_count(); ++i) {
6829     ValidateProto3Message(file->message_types_ + i, proto.message_type(i));
6830   }
6831   for (int i = 0; i < file->enum_type_count(); ++i) {
6832     ValidateProto3Enum(file->enum_types_ + i, proto.enum_type(i));
6833   }
6834 }
6835 
ToLowercaseWithoutUnderscores(const std::string & name)6836 static std::string ToLowercaseWithoutUnderscores(const std::string& name) {
6837   std::string result;
6838   for (char character : name) {
6839     if (character != '_') {
6840       if (character >= 'A' && character <= 'Z') {
6841         result.push_back(character - 'A' + 'a');
6842       } else {
6843         result.push_back(character);
6844       }
6845     }
6846   }
6847   return result;
6848 }
6849 
ValidateProto3Message(Descriptor * message,const DescriptorProto & proto)6850 void DescriptorBuilder::ValidateProto3Message(Descriptor* message,
6851                                               const DescriptorProto& proto) {
6852   for (int i = 0; i < message->nested_type_count(); ++i) {
6853     ValidateProto3Message(message->nested_types_ + i, proto.nested_type(i));
6854   }
6855   for (int i = 0; i < message->enum_type_count(); ++i) {
6856     ValidateProto3Enum(message->enum_types_ + i, proto.enum_type(i));
6857   }
6858   for (int i = 0; i < message->field_count(); ++i) {
6859     ValidateProto3Field(message->fields_ + i, proto.field(i));
6860   }
6861   for (int i = 0; i < message->extension_count(); ++i) {
6862     ValidateProto3Field(message->extensions_ + i, proto.extension(i));
6863   }
6864   if (message->extension_range_count() > 0) {
6865     AddError(message->full_name(), proto.extension_range(0),
6866              DescriptorPool::ErrorCollector::NUMBER,
6867              "Extension ranges are not allowed in proto3.");
6868   }
6869   if (message->options().message_set_wire_format()) {
6870     // Using MessageSet doesn't make sense since we disallow extensions.
6871     AddError(message->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6872              "MessageSet is not supported in proto3.");
6873   }
6874 
6875   // In proto3, we reject field names if they conflict in camelCase.
6876   // Note that we currently enforce a stricter rule: Field names must be
6877   // unique after being converted to lowercase with underscores removed.
6878   std::map<std::string, const FieldDescriptor*> name_to_field;
6879   for (int i = 0; i < message->field_count(); ++i) {
6880     std::string lowercase_name =
6881         ToLowercaseWithoutUnderscores(message->field(i)->name());
6882     if (name_to_field.find(lowercase_name) != name_to_field.end()) {
6883       AddError(message->full_name(), proto.field(i),
6884                DescriptorPool::ErrorCollector::NAME,
6885                "The JSON camel-case name of field \"" +
6886                    message->field(i)->name() + "\" conflicts with field \"" +
6887                    name_to_field[lowercase_name]->name() + "\". This is not " +
6888                    "allowed in proto3.");
6889     } else {
6890       name_to_field[lowercase_name] = message->field(i);
6891     }
6892   }
6893 }
6894 
ValidateProto3Field(FieldDescriptor * field,const FieldDescriptorProto & proto)6895 void DescriptorBuilder::ValidateProto3Field(FieldDescriptor* field,
6896                                             const FieldDescriptorProto& proto) {
6897   if (field->is_extension() &&
6898       !AllowedExtendeeInProto3(field->containing_type()->full_name())) {
6899     AddError(field->full_name(), proto,
6900              DescriptorPool::ErrorCollector::EXTENDEE,
6901              "Extensions in proto3 are only allowed for defining options.");
6902   }
6903   if (field->is_required()) {
6904     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6905              "Required fields are not allowed in proto3.");
6906   }
6907   if (field->has_default_value()) {
6908     AddError(field->full_name(), proto,
6909              DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6910              "Explicit default values are not allowed in proto3.");
6911   }
6912   if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
6913       field->enum_type() &&
6914       field->enum_type()->file()->syntax() != FileDescriptor::SYNTAX_PROTO3 &&
6915       field->enum_type()->file()->syntax() != FileDescriptor::SYNTAX_UNKNOWN) {
6916     // Proto3 messages can only use Proto3 enum types; otherwise we can't
6917     // guarantee that the default value is zero.
6918     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6919              "Enum type \"" + field->enum_type()->full_name() +
6920                  "\" is not a proto3 enum, but is used in \"" +
6921                  field->containing_type()->full_name() +
6922                  "\" which is a proto3 message type.");
6923   }
6924   if (field->type() == FieldDescriptor::TYPE_GROUP) {
6925     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6926              "Groups are not supported in proto3 syntax.");
6927   }
6928 }
6929 
ValidateProto3Enum(EnumDescriptor * enm,const EnumDescriptorProto & proto)6930 void DescriptorBuilder::ValidateProto3Enum(EnumDescriptor* enm,
6931                                            const EnumDescriptorProto& proto) {
6932   if (enm->value_count() > 0 && enm->value(0)->number() != 0) {
6933     AddError(enm->full_name(), proto.value(0),
6934              DescriptorPool::ErrorCollector::NUMBER,
6935              "The first enum value must be zero in proto3.");
6936   }
6937 }
6938 
ValidateMessageOptions(Descriptor * message,const DescriptorProto & proto)6939 void DescriptorBuilder::ValidateMessageOptions(Descriptor* message,
6940                                                const DescriptorProto& proto) {
6941   VALIDATE_OPTIONS_FROM_ARRAY(message, field, Field);
6942   VALIDATE_OPTIONS_FROM_ARRAY(message, nested_type, Message);
6943   VALIDATE_OPTIONS_FROM_ARRAY(message, enum_type, Enum);
6944   VALIDATE_OPTIONS_FROM_ARRAY(message, extension, Field);
6945 
6946   const int64_t max_extension_range =
6947       static_cast<int64_t>(message->options().message_set_wire_format()
6948                                ? std::numeric_limits<int32_t>::max()
6949                                : FieldDescriptor::kMaxNumber);
6950   for (int i = 0; i < message->extension_range_count(); ++i) {
6951     if (message->extension_range(i)->end > max_extension_range + 1) {
6952       AddError(message->full_name(), proto.extension_range(i),
6953                DescriptorPool::ErrorCollector::NUMBER,
6954                strings::Substitute("Extension numbers cannot be greater than $0.",
6955                                 max_extension_range));
6956     }
6957 
6958     ValidateExtensionRangeOptions(message->full_name(),
6959                                   message->extension_ranges_ + i,
6960                                   proto.extension_range(i));
6961   }
6962 }
6963 
6964 
ValidateFieldOptions(FieldDescriptor * field,const FieldDescriptorProto & proto)6965 void DescriptorBuilder::ValidateFieldOptions(
6966     FieldDescriptor* field, const FieldDescriptorProto& proto) {
6967   if (pool_->lazily_build_dependencies_ && (!field || !field->message_type())) {
6968     return;
6969   }
6970   // Only message type fields may be lazy.
6971   if (field->options().lazy() || field->options().unverified_lazy()) {
6972     if (field->type() != FieldDescriptor::TYPE_MESSAGE) {
6973       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6974                "[lazy = true] can only be specified for submessage fields.");
6975     }
6976   }
6977 
6978   // Only repeated primitive fields may be packed.
6979   if (field->options().packed() && !field->is_packable()) {
6980     AddError(
6981         field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6982         "[packed = true] can only be specified for repeated primitive fields.");
6983   }
6984 
6985   // Note:  Default instance may not yet be initialized here, so we have to
6986   //   avoid reading from it.
6987   if (field->containing_type_ != nullptr &&
6988       &field->containing_type()->options() !=
6989           &MessageOptions::default_instance() &&
6990       field->containing_type()->options().message_set_wire_format()) {
6991     if (field->is_extension()) {
6992       if (!field->is_optional() ||
6993           field->type() != FieldDescriptor::TYPE_MESSAGE) {
6994         AddError(field->full_name(), proto,
6995                  DescriptorPool::ErrorCollector::TYPE,
6996                  "Extensions of MessageSets must be optional messages.");
6997       }
6998     } else {
6999       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7000                "MessageSets cannot have fields, only extensions.");
7001     }
7002   }
7003 
7004   // Lite extensions can only be of Lite types.
7005   if (IsLite(field->file()) && field->containing_type_ != nullptr &&
7006       !IsLite(field->containing_type()->file())) {
7007     AddError(field->full_name(), proto,
7008              DescriptorPool::ErrorCollector::EXTENDEE,
7009              "Extensions to non-lite types can only be declared in non-lite "
7010              "files.  Note that you cannot extend a non-lite type to contain "
7011              "a lite type, but the reverse is allowed.");
7012   }
7013 
7014   // Validate map types.
7015   if (field->is_map()) {
7016     if (!ValidateMapEntry(field, proto)) {
7017       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7018                "map_entry should not be set explicitly. Use map<KeyType, "
7019                "ValueType> instead.");
7020     }
7021   }
7022 
7023   ValidateJSType(field, proto);
7024 
7025   // json_name option is not allowed on extension fields. Note that the
7026   // json_name field in FieldDescriptorProto is always populated by protoc
7027   // when it sends descriptor data to plugins (calculated from field name if
7028   // the option is not explicitly set) so we can't rely on its presence to
7029   // determine whether the json_name option is set on the field. Here we
7030   // compare it against the default calculated json_name value and consider
7031   // the option set if they are different. This won't catch the case when
7032   // an user explicitly sets json_name to the default value, but should be
7033   // good enough to catch common misuses.
7034   if (field->is_extension() &&
7035       (field->has_json_name() &&
7036        field->json_name() != ToJsonName(field->name()))) {
7037     AddError(field->full_name(), proto,
7038              DescriptorPool::ErrorCollector::OPTION_NAME,
7039              "option json_name is not allowed on extension fields.");
7040   }
7041 
7042 }
7043 
ValidateEnumOptions(EnumDescriptor * enm,const EnumDescriptorProto & proto)7044 void DescriptorBuilder::ValidateEnumOptions(EnumDescriptor* enm,
7045                                             const EnumDescriptorProto& proto) {
7046   VALIDATE_OPTIONS_FROM_ARRAY(enm, value, EnumValue);
7047   if (!enm->options().has_allow_alias() || !enm->options().allow_alias()) {
7048     std::map<int, std::string> used_values;
7049     for (int i = 0; i < enm->value_count(); ++i) {
7050       const EnumValueDescriptor* enum_value = enm->value(i);
7051       if (used_values.find(enum_value->number()) != used_values.end()) {
7052         std::string error =
7053             "\"" + enum_value->full_name() +
7054             "\" uses the same enum value as \"" +
7055             used_values[enum_value->number()] +
7056             "\". If this is intended, set "
7057             "'option allow_alias = true;' to the enum definition.";
7058         if (!enm->options().allow_alias()) {
7059           // Generate error if duplicated enum values are explicitly disallowed.
7060           AddError(enm->full_name(), proto.value(i),
7061                    DescriptorPool::ErrorCollector::NUMBER, error);
7062         }
7063       } else {
7064         used_values[enum_value->number()] = enum_value->full_name();
7065       }
7066     }
7067   }
7068 }
7069 
ValidateEnumValueOptions(EnumValueDescriptor *,const EnumValueDescriptorProto &)7070 void DescriptorBuilder::ValidateEnumValueOptions(
7071     EnumValueDescriptor* /* enum_value */,
7072     const EnumValueDescriptorProto& /* proto */) {
7073   // Nothing to do so far.
7074 }
7075 
ValidateExtensionRangeOptions(const std::string & full_name,Descriptor::ExtensionRange * extension_range,const DescriptorProto_ExtensionRange & proto)7076 void DescriptorBuilder::ValidateExtensionRangeOptions(
7077     const std::string& full_name, Descriptor::ExtensionRange* extension_range,
7078     const DescriptorProto_ExtensionRange& proto) {
7079   (void)full_name;        // Parameter is used by Google-internal code.
7080   (void)extension_range;  // Parameter is used by Google-internal code.
7081 }
7082 
ValidateServiceOptions(ServiceDescriptor * service,const ServiceDescriptorProto & proto)7083 void DescriptorBuilder::ValidateServiceOptions(
7084     ServiceDescriptor* service, const ServiceDescriptorProto& proto) {
7085   if (IsLite(service->file()) &&
7086       (service->file()->options().cc_generic_services() ||
7087        service->file()->options().java_generic_services())) {
7088     AddError(service->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7089              "Files with optimize_for = LITE_RUNTIME cannot define services "
7090              "unless you set both options cc_generic_services and "
7091              "java_generic_services to false.");
7092   }
7093 
7094   VALIDATE_OPTIONS_FROM_ARRAY(service, method, Method);
7095 }
7096 
ValidateMethodOptions(MethodDescriptor *,const MethodDescriptorProto &)7097 void DescriptorBuilder::ValidateMethodOptions(
7098     MethodDescriptor* /* method */, const MethodDescriptorProto& /* proto */) {
7099   // Nothing to do so far.
7100 }
7101 
ValidateMapEntry(FieldDescriptor * field,const FieldDescriptorProto & proto)7102 bool DescriptorBuilder::ValidateMapEntry(FieldDescriptor* field,
7103                                          const FieldDescriptorProto& proto) {
7104   const Descriptor* message = field->message_type();
7105   if (  // Must not contain extensions, extension range or nested message or
7106         // enums
7107       message->extension_count() != 0 ||
7108       field->label() != FieldDescriptor::LABEL_REPEATED ||
7109       message->extension_range_count() != 0 ||
7110       message->nested_type_count() != 0 || message->enum_type_count() != 0 ||
7111       // Must contain exactly two fields
7112       message->field_count() != 2 ||
7113       // Field name and message name must match
7114       message->name() != ToCamelCase(field->name(), false) + "Entry" ||
7115       // Entry message must be in the same containing type of the field.
7116       field->containing_type() != message->containing_type()) {
7117     return false;
7118   }
7119 
7120   const FieldDescriptor* key = message->map_key();
7121   const FieldDescriptor* value = message->map_value();
7122   if (key->label() != FieldDescriptor::LABEL_OPTIONAL || key->number() != 1 ||
7123       key->name() != "key") {
7124     return false;
7125   }
7126   if (value->label() != FieldDescriptor::LABEL_OPTIONAL ||
7127       value->number() != 2 || value->name() != "value") {
7128     return false;
7129   }
7130 
7131   // Check key types are legal.
7132   switch (key->type()) {
7133     case FieldDescriptor::TYPE_ENUM:
7134       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7135                "Key in map fields cannot be enum types.");
7136       break;
7137     case FieldDescriptor::TYPE_FLOAT:
7138     case FieldDescriptor::TYPE_DOUBLE:
7139     case FieldDescriptor::TYPE_MESSAGE:
7140     case FieldDescriptor::TYPE_GROUP:
7141     case FieldDescriptor::TYPE_BYTES:
7142       AddError(
7143           field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7144           "Key in map fields cannot be float/double, bytes or message types.");
7145       break;
7146     case FieldDescriptor::TYPE_BOOL:
7147     case FieldDescriptor::TYPE_INT32:
7148     case FieldDescriptor::TYPE_INT64:
7149     case FieldDescriptor::TYPE_SINT32:
7150     case FieldDescriptor::TYPE_SINT64:
7151     case FieldDescriptor::TYPE_STRING:
7152     case FieldDescriptor::TYPE_UINT32:
7153     case FieldDescriptor::TYPE_UINT64:
7154     case FieldDescriptor::TYPE_FIXED32:
7155     case FieldDescriptor::TYPE_FIXED64:
7156     case FieldDescriptor::TYPE_SFIXED32:
7157     case FieldDescriptor::TYPE_SFIXED64:
7158       // Legal cases
7159       break;
7160       // Do not add a default, so that the compiler will complain when new types
7161       // are added.
7162   }
7163 
7164   if (value->type() == FieldDescriptor::TYPE_ENUM) {
7165     if (value->enum_type()->value(0)->number() != 0) {
7166       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7167                "Enum value in map must define 0 as the first value.");
7168     }
7169   }
7170 
7171   return true;
7172 }
7173 
DetectMapConflicts(const Descriptor * message,const DescriptorProto & proto)7174 void DescriptorBuilder::DetectMapConflicts(const Descriptor* message,
7175                                            const DescriptorProto& proto) {
7176   std::map<std::string, const Descriptor*> seen_types;
7177   for (int i = 0; i < message->nested_type_count(); ++i) {
7178     const Descriptor* nested = message->nested_type(i);
7179     std::pair<std::map<std::string, const Descriptor*>::iterator, bool> result =
7180         seen_types.insert(std::make_pair(nested->name(), nested));
7181     if (!result.second) {
7182       if (result.first->second->options().map_entry() ||
7183           nested->options().map_entry()) {
7184         AddError(message->full_name(), proto,
7185                  DescriptorPool::ErrorCollector::NAME,
7186                  "Expanded map entry type " + nested->name() +
7187                      " conflicts with an existing nested message type.");
7188         break;
7189       }
7190     }
7191     // Recursively test on the nested types.
7192     DetectMapConflicts(message->nested_type(i), proto.nested_type(i));
7193   }
7194   // Check for conflicted field names.
7195   for (int i = 0; i < message->field_count(); ++i) {
7196     const FieldDescriptor* field = message->field(i);
7197     std::map<std::string, const Descriptor*>::iterator iter =
7198         seen_types.find(field->name());
7199     if (iter != seen_types.end() && iter->second->options().map_entry()) {
7200       AddError(message->full_name(), proto,
7201                DescriptorPool::ErrorCollector::NAME,
7202                "Expanded map entry type " + iter->second->name() +
7203                    " conflicts with an existing field.");
7204     }
7205   }
7206   // Check for conflicted enum names.
7207   for (int i = 0; i < message->enum_type_count(); ++i) {
7208     const EnumDescriptor* enum_desc = message->enum_type(i);
7209     std::map<std::string, const Descriptor*>::iterator iter =
7210         seen_types.find(enum_desc->name());
7211     if (iter != seen_types.end() && iter->second->options().map_entry()) {
7212       AddError(message->full_name(), proto,
7213                DescriptorPool::ErrorCollector::NAME,
7214                "Expanded map entry type " + iter->second->name() +
7215                    " conflicts with an existing enum type.");
7216     }
7217   }
7218   // Check for conflicted oneof names.
7219   for (int i = 0; i < message->oneof_decl_count(); ++i) {
7220     const OneofDescriptor* oneof_desc = message->oneof_decl(i);
7221     std::map<std::string, const Descriptor*>::iterator iter =
7222         seen_types.find(oneof_desc->name());
7223     if (iter != seen_types.end() && iter->second->options().map_entry()) {
7224       AddError(message->full_name(), proto,
7225                DescriptorPool::ErrorCollector::NAME,
7226                "Expanded map entry type " + iter->second->name() +
7227                    " conflicts with an existing oneof type.");
7228     }
7229   }
7230 }
7231 
ValidateJSType(FieldDescriptor * field,const FieldDescriptorProto & proto)7232 void DescriptorBuilder::ValidateJSType(FieldDescriptor* field,
7233                                        const FieldDescriptorProto& proto) {
7234   FieldOptions::JSType jstype = field->options().jstype();
7235   // The default is always acceptable.
7236   if (jstype == FieldOptions::JS_NORMAL) {
7237     return;
7238   }
7239 
7240   switch (field->type()) {
7241     // Integral 64-bit types may be represented as JavaScript numbers or
7242     // strings.
7243     case FieldDescriptor::TYPE_UINT64:
7244     case FieldDescriptor::TYPE_INT64:
7245     case FieldDescriptor::TYPE_SINT64:
7246     case FieldDescriptor::TYPE_FIXED64:
7247     case FieldDescriptor::TYPE_SFIXED64:
7248       if (jstype == FieldOptions::JS_STRING ||
7249           jstype == FieldOptions::JS_NUMBER) {
7250         return;
7251       }
7252       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7253                "Illegal jstype for int64, uint64, sint64, fixed64 "
7254                "or sfixed64 field: " +
7255                    FieldOptions_JSType_descriptor()->value(jstype)->name());
7256       break;
7257 
7258     // No other types permit a jstype option.
7259     default:
7260       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
7261                "jstype is only allowed on int64, uint64, sint64, fixed64 "
7262                "or sfixed64 fields.");
7263       break;
7264   }
7265 }
7266 
7267 #undef VALIDATE_OPTIONS_FROM_ARRAY
7268 
7269 // -------------------------------------------------------------------
7270 
OptionInterpreter(DescriptorBuilder * builder)7271 DescriptorBuilder::OptionInterpreter::OptionInterpreter(
7272     DescriptorBuilder* builder)
7273     : builder_(builder) {
7274   GOOGLE_CHECK(builder_);
7275 }
7276 
~OptionInterpreter()7277 DescriptorBuilder::OptionInterpreter::~OptionInterpreter() {}
7278 
InterpretOptions(OptionsToInterpret * options_to_interpret)7279 bool DescriptorBuilder::OptionInterpreter::InterpretOptions(
7280     OptionsToInterpret* options_to_interpret) {
7281   // Note that these may be in different pools, so we can't use the same
7282   // descriptor and reflection objects on both.
7283   Message* options = options_to_interpret->options;
7284   const Message* original_options = options_to_interpret->original_options;
7285 
7286   bool failed = false;
7287   options_to_interpret_ = options_to_interpret;
7288 
7289   // Find the uninterpreted_option field in the mutable copy of the options
7290   // and clear them, since we're about to interpret them.
7291   const FieldDescriptor* uninterpreted_options_field =
7292       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
7293   GOOGLE_CHECK(uninterpreted_options_field != nullptr)
7294       << "No field named \"uninterpreted_option\" in the Options proto.";
7295   options->GetReflection()->ClearField(options, uninterpreted_options_field);
7296 
7297   std::vector<int> src_path = options_to_interpret->element_path;
7298   src_path.push_back(uninterpreted_options_field->number());
7299 
7300   // Find the uninterpreted_option field in the original options.
7301   const FieldDescriptor* original_uninterpreted_options_field =
7302       original_options->GetDescriptor()->FindFieldByName(
7303           "uninterpreted_option");
7304   GOOGLE_CHECK(original_uninterpreted_options_field != nullptr)
7305       << "No field named \"uninterpreted_option\" in the Options proto.";
7306 
7307   const int num_uninterpreted_options =
7308       original_options->GetReflection()->FieldSize(
7309           *original_options, original_uninterpreted_options_field);
7310   for (int i = 0; i < num_uninterpreted_options; ++i) {
7311     src_path.push_back(i);
7312     uninterpreted_option_ = down_cast<const UninterpretedOption*>(
7313         &original_options->GetReflection()->GetRepeatedMessage(
7314             *original_options, original_uninterpreted_options_field, i));
7315     if (!InterpretSingleOption(options, src_path,
7316                                options_to_interpret->element_path)) {
7317       // Error already added by InterpretSingleOption().
7318       failed = true;
7319       break;
7320     }
7321     src_path.pop_back();
7322   }
7323   // Reset these, so we don't have any dangling pointers.
7324   uninterpreted_option_ = nullptr;
7325   options_to_interpret_ = nullptr;
7326 
7327   if (!failed) {
7328     // InterpretSingleOption() added the interpreted options in the
7329     // UnknownFieldSet, in case the option isn't yet known to us.  Now we
7330     // serialize the options message and deserialize it back.  That way, any
7331     // option fields that we do happen to know about will get moved from the
7332     // UnknownFieldSet into the real fields, and thus be available right away.
7333     // If they are not known, that's OK too. They will get reparsed into the
7334     // UnknownFieldSet and wait there until the message is parsed by something
7335     // that does know about the options.
7336 
7337     // Keep the unparsed options around in case the reparsing fails.
7338     std::unique_ptr<Message> unparsed_options(options->New());
7339     options->GetReflection()->Swap(unparsed_options.get(), options);
7340 
7341     std::string buf;
7342     if (!unparsed_options->AppendToString(&buf) ||
7343         !options->ParseFromString(buf)) {
7344       builder_->AddError(
7345           options_to_interpret->element_name, *original_options,
7346           DescriptorPool::ErrorCollector::OTHER,
7347           "Some options could not be correctly parsed using the proto "
7348           "descriptors compiled into this binary.\n"
7349           "Unparsed options: " +
7350               unparsed_options->ShortDebugString() +
7351               "\n"
7352               "Parsing attempt:  " +
7353               options->ShortDebugString());
7354       // Restore the unparsed options.
7355       options->GetReflection()->Swap(unparsed_options.get(), options);
7356     }
7357   }
7358 
7359   return !failed;
7360 }
7361 
InterpretSingleOption(Message * options,const std::vector<int> & src_path,const std::vector<int> & options_path)7362 bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
7363     Message* options, const std::vector<int>& src_path,
7364     const std::vector<int>& options_path) {
7365   // First do some basic validation.
7366   if (uninterpreted_option_->name_size() == 0) {
7367     // This should never happen unless the parser has gone seriously awry or
7368     // someone has manually created the uninterpreted option badly.
7369     return AddNameError("Option must have a name.");
7370   }
7371   if (uninterpreted_option_->name(0).name_part() == "uninterpreted_option") {
7372     return AddNameError(
7373         "Option must not use reserved name "
7374         "\"uninterpreted_option\".");
7375   }
7376 
7377   const Descriptor* options_descriptor = nullptr;
7378   // Get the options message's descriptor from the builder's pool, so that we
7379   // get the version that knows about any extension options declared in the file
7380   // we're currently building. The descriptor should be there as long as the
7381   // file we're building imported descriptor.proto.
7382 
7383   // Note that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
7384   // DescriptorPool::FindMessageTypeByName() because we're already holding the
7385   // pool's mutex, and the latter method locks it again.  We don't use
7386   // FindSymbol() because files that use custom options only need to depend on
7387   // the file that defines the option, not descriptor.proto itself.
7388   Symbol symbol = builder_->FindSymbolNotEnforcingDeps(
7389       options->GetDescriptor()->full_name());
7390   options_descriptor = symbol.descriptor();
7391   if (options_descriptor == nullptr) {
7392     // The options message's descriptor was not in the builder's pool, so use
7393     // the standard version from the generated pool. We're not holding the
7394     // generated pool's mutex, so we can search it the straightforward way.
7395     options_descriptor = options->GetDescriptor();
7396   }
7397   GOOGLE_CHECK(options_descriptor);
7398 
7399   // We iterate over the name parts to drill into the submessages until we find
7400   // the leaf field for the option. As we drill down we remember the current
7401   // submessage's descriptor in |descriptor| and the next field in that
7402   // submessage in |field|. We also track the fields we're drilling down
7403   // through in |intermediate_fields|. As we go, we reconstruct the full option
7404   // name in |debug_msg_name|, for use in error messages.
7405   const Descriptor* descriptor = options_descriptor;
7406   const FieldDescriptor* field = nullptr;
7407   std::vector<const FieldDescriptor*> intermediate_fields;
7408   std::string debug_msg_name = "";
7409 
7410   std::vector<int> dest_path = options_path;
7411 
7412   for (int i = 0; i < uninterpreted_option_->name_size(); ++i) {
7413     builder_->undefine_resolved_name_.clear();
7414     const std::string& name_part = uninterpreted_option_->name(i).name_part();
7415     if (debug_msg_name.size() > 0) {
7416       debug_msg_name += ".";
7417     }
7418     if (uninterpreted_option_->name(i).is_extension()) {
7419       debug_msg_name += "(" + name_part + ")";
7420       // Search for the extension's descriptor as an extension in the builder's
7421       // pool. Note that we use DescriptorBuilder::LookupSymbol(), not
7422       // DescriptorPool::FindExtensionByName(), for two reasons: 1) It allows
7423       // relative lookups, and 2) because we're already holding the pool's
7424       // mutex, and the latter method locks it again.
7425       symbol =
7426           builder_->LookupSymbol(name_part, options_to_interpret_->name_scope);
7427       field = symbol.field_descriptor();
7428       // If we don't find the field then the field's descriptor was not in the
7429       // builder's pool, but there's no point in looking in the generated
7430       // pool. We require that you import the file that defines any extensions
7431       // you use, so they must be present in the builder's pool.
7432     } else {
7433       debug_msg_name += name_part;
7434       // Search for the field's descriptor as a regular field.
7435       field = descriptor->FindFieldByName(name_part);
7436     }
7437 
7438     if (field == nullptr) {
7439       if (get_allow_unknown(builder_->pool_)) {
7440         // We can't find the option, but AllowUnknownDependencies() is enabled,
7441         // so we will just leave it as uninterpreted.
7442         AddWithoutInterpreting(*uninterpreted_option_, options);
7443         return true;
7444       } else if (!(builder_->undefine_resolved_name_).empty()) {
7445         // Option is resolved to a name which is not defined.
7446         return AddNameError(
7447             "Option \"" + debug_msg_name + "\" is resolved to \"(" +
7448             builder_->undefine_resolved_name_ +
7449             ")\", which is not defined. The innermost scope is searched first "
7450             "in name resolution. Consider using a leading '.'(i.e., \"(." +
7451             debug_msg_name.substr(1) +
7452             "\") to start from the outermost scope.");
7453       } else {
7454         return AddNameError(
7455             "Option \"" + debug_msg_name +
7456             "\" unknown. Ensure that your proto" +
7457             " definition file imports the proto which defines the option.");
7458       }
7459     } else if (field->containing_type() != descriptor) {
7460       if (get_is_placeholder(field->containing_type())) {
7461         // The field is an extension of a placeholder type, so we can't
7462         // reliably verify whether it is a valid extension to use here (e.g.
7463         // we don't know if it is an extension of the correct *Options message,
7464         // or if it has a valid field number, etc.).  Just leave it as
7465         // uninterpreted instead.
7466         AddWithoutInterpreting(*uninterpreted_option_, options);
7467         return true;
7468       } else {
7469         // This can only happen if, due to some insane misconfiguration of the
7470         // pools, we find the options message in one pool but the field in
7471         // another. This would probably imply a hefty bug somewhere.
7472         return AddNameError("Option field \"" + debug_msg_name +
7473                             "\" is not a field or extension of message \"" +
7474                             descriptor->name() + "\".");
7475       }
7476     } else {
7477       // accumulate field numbers to form path to interpreted option
7478       dest_path.push_back(field->number());
7479 
7480       if (i < uninterpreted_option_->name_size() - 1) {
7481         if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
7482           return AddNameError("Option \"" + debug_msg_name +
7483                               "\" is an atomic type, not a message.");
7484         } else if (field->is_repeated()) {
7485           return AddNameError("Option field \"" + debug_msg_name +
7486                               "\" is a repeated message. Repeated message "
7487                               "options must be initialized using an "
7488                               "aggregate value.");
7489         } else {
7490           // Drill down into the submessage.
7491           intermediate_fields.push_back(field);
7492           descriptor = field->message_type();
7493         }
7494       }
7495     }
7496   }
7497 
7498   // We've found the leaf field. Now we use UnknownFieldSets to set its value
7499   // on the options message. We do so because the message may not yet know
7500   // about its extension fields, so we may not be able to set the fields
7501   // directly. But the UnknownFieldSets will serialize to the same wire-format
7502   // message, so reading that message back in once the extension fields are
7503   // known will populate them correctly.
7504 
7505   // First see if the option is already set.
7506   if (!field->is_repeated() &&
7507       !ExamineIfOptionIsSet(
7508           intermediate_fields.begin(), intermediate_fields.end(), field,
7509           debug_msg_name,
7510           options->GetReflection()->GetUnknownFields(*options))) {
7511     return false;  // ExamineIfOptionIsSet() already added the error.
7512   }
7513 
7514   // First set the value on the UnknownFieldSet corresponding to the
7515   // innermost message.
7516   std::unique_ptr<UnknownFieldSet> unknown_fields(new UnknownFieldSet());
7517   if (!SetOptionValue(field, unknown_fields.get())) {
7518     return false;  // SetOptionValue() already added the error.
7519   }
7520 
7521   // Now wrap the UnknownFieldSet with UnknownFieldSets corresponding to all
7522   // the intermediate messages.
7523   for (std::vector<const FieldDescriptor*>::reverse_iterator iter =
7524            intermediate_fields.rbegin();
7525        iter != intermediate_fields.rend(); ++iter) {
7526     std::unique_ptr<UnknownFieldSet> parent_unknown_fields(
7527         new UnknownFieldSet());
7528     switch ((*iter)->type()) {
7529       case FieldDescriptor::TYPE_MESSAGE: {
7530         std::string* outstr =
7531             parent_unknown_fields->AddLengthDelimited((*iter)->number());
7532         GOOGLE_CHECK(unknown_fields->SerializeToString(outstr))
7533             << "Unexpected failure while serializing option submessage "
7534             << debug_msg_name << "\".";
7535         break;
7536       }
7537 
7538       case FieldDescriptor::TYPE_GROUP: {
7539         parent_unknown_fields->AddGroup((*iter)->number())
7540             ->MergeFrom(*unknown_fields);
7541         break;
7542       }
7543 
7544       default:
7545         GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: "
7546                    << (*iter)->type();
7547         return false;
7548     }
7549     unknown_fields.reset(parent_unknown_fields.release());
7550   }
7551 
7552   // Now merge the UnknownFieldSet corresponding to the top-level message into
7553   // the options message.
7554   options->GetReflection()->MutableUnknownFields(options)->MergeFrom(
7555       *unknown_fields);
7556 
7557   // record the element path of the interpreted option
7558   if (field->is_repeated()) {
7559     int index = repeated_option_counts_[dest_path]++;
7560     dest_path.push_back(index);
7561   }
7562   interpreted_paths_[src_path] = dest_path;
7563 
7564   return true;
7565 }
7566 
UpdateSourceCodeInfo(SourceCodeInfo * info)7567 void DescriptorBuilder::OptionInterpreter::UpdateSourceCodeInfo(
7568     SourceCodeInfo* info) {
7569   if (interpreted_paths_.empty()) {
7570     // nothing to do!
7571     return;
7572   }
7573 
7574   // We find locations that match keys in interpreted_paths_ and
7575   // 1) replace the path with the corresponding value in interpreted_paths_
7576   // 2) remove any subsequent sub-locations (sub-location is one whose path
7577   //    has the parent path as a prefix)
7578   //
7579   // To avoid quadratic behavior of removing interior rows as we go,
7580   // we keep a copy. But we don't actually copy anything until we've
7581   // found the first match (so if the source code info has no locations
7582   // that need to be changed, there is zero copy overhead).
7583 
7584   RepeatedPtrField<SourceCodeInfo_Location>* locs = info->mutable_location();
7585   RepeatedPtrField<SourceCodeInfo_Location> new_locs;
7586   bool copying = false;
7587 
7588   std::vector<int> pathv;
7589   bool matched = false;
7590 
7591   for (RepeatedPtrField<SourceCodeInfo_Location>::iterator loc = locs->begin();
7592        loc != locs->end(); loc++) {
7593     if (matched) {
7594       // see if this location is in the range to remove
7595       bool loc_matches = true;
7596       if (loc->path_size() < static_cast<int64_t>(pathv.size())) {
7597         loc_matches = false;
7598       } else {
7599         for (size_t j = 0; j < pathv.size(); j++) {
7600           if (loc->path(j) != pathv[j]) {
7601             loc_matches = false;
7602             break;
7603           }
7604         }
7605       }
7606 
7607       if (loc_matches) {
7608         // don't copy this row since it is a sub-location that we're removing
7609         continue;
7610       }
7611 
7612       matched = false;
7613     }
7614 
7615     pathv.clear();
7616     for (int j = 0; j < loc->path_size(); j++) {
7617       pathv.push_back(loc->path(j));
7618     }
7619 
7620     std::map<std::vector<int>, std::vector<int>>::iterator entry =
7621         interpreted_paths_.find(pathv);
7622 
7623     if (entry == interpreted_paths_.end()) {
7624       // not a match
7625       if (copying) {
7626         *new_locs.Add() = *loc;
7627       }
7628       continue;
7629     }
7630 
7631     matched = true;
7632 
7633     if (!copying) {
7634       // initialize the copy we are building
7635       copying = true;
7636       new_locs.Reserve(locs->size());
7637       for (RepeatedPtrField<SourceCodeInfo_Location>::iterator it =
7638                locs->begin();
7639            it != loc; it++) {
7640         *new_locs.Add() = *it;
7641       }
7642     }
7643 
7644     // add replacement and update its path
7645     SourceCodeInfo_Location* replacement = new_locs.Add();
7646     *replacement = *loc;
7647     replacement->clear_path();
7648     for (std::vector<int>::iterator rit = entry->second.begin();
7649          rit != entry->second.end(); rit++) {
7650       replacement->add_path(*rit);
7651     }
7652   }
7653 
7654   // if we made a changed copy, put it in place
7655   if (copying) {
7656     *locs = new_locs;
7657   }
7658 }
7659 
AddWithoutInterpreting(const UninterpretedOption & uninterpreted_option,Message * options)7660 void DescriptorBuilder::OptionInterpreter::AddWithoutInterpreting(
7661     const UninterpretedOption& uninterpreted_option, Message* options) {
7662   const FieldDescriptor* field =
7663       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
7664   GOOGLE_CHECK(field != nullptr);
7665 
7666   options->GetReflection()
7667       ->AddMessage(options, field)
7668       ->CopyFrom(uninterpreted_option);
7669 }
7670 
ExamineIfOptionIsSet(std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_iter,std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_end,const FieldDescriptor * innermost_field,const std::string & debug_msg_name,const UnknownFieldSet & unknown_fields)7671 bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
7672     std::vector<const FieldDescriptor*>::const_iterator
7673         intermediate_fields_iter,
7674     std::vector<const FieldDescriptor*>::const_iterator intermediate_fields_end,
7675     const FieldDescriptor* innermost_field, const std::string& debug_msg_name,
7676     const UnknownFieldSet& unknown_fields) {
7677   // We do linear searches of the UnknownFieldSet and its sub-groups.  This
7678   // should be fine since it's unlikely that any one options structure will
7679   // contain more than a handful of options.
7680 
7681   if (intermediate_fields_iter == intermediate_fields_end) {
7682     // We're at the innermost submessage.
7683     for (int i = 0; i < unknown_fields.field_count(); i++) {
7684       if (unknown_fields.field(i).number() == innermost_field->number()) {
7685         return AddNameError("Option \"" + debug_msg_name +
7686                             "\" was already set.");
7687       }
7688     }
7689     return true;
7690   }
7691 
7692   for (int i = 0; i < unknown_fields.field_count(); i++) {
7693     if (unknown_fields.field(i).number() ==
7694         (*intermediate_fields_iter)->number()) {
7695       const UnknownField* unknown_field = &unknown_fields.field(i);
7696       FieldDescriptor::Type type = (*intermediate_fields_iter)->type();
7697       // Recurse into the next submessage.
7698       switch (type) {
7699         case FieldDescriptor::TYPE_MESSAGE:
7700           if (unknown_field->type() == UnknownField::TYPE_LENGTH_DELIMITED) {
7701             UnknownFieldSet intermediate_unknown_fields;
7702             if (intermediate_unknown_fields.ParseFromString(
7703                     unknown_field->length_delimited()) &&
7704                 !ExamineIfOptionIsSet(intermediate_fields_iter + 1,
7705                                       intermediate_fields_end, innermost_field,
7706                                       debug_msg_name,
7707                                       intermediate_unknown_fields)) {
7708               return false;  // Error already added.
7709             }
7710           }
7711           break;
7712 
7713         case FieldDescriptor::TYPE_GROUP:
7714           if (unknown_field->type() == UnknownField::TYPE_GROUP) {
7715             if (!ExamineIfOptionIsSet(intermediate_fields_iter + 1,
7716                                       intermediate_fields_end, innermost_field,
7717                                       debug_msg_name, unknown_field->group())) {
7718               return false;  // Error already added.
7719             }
7720           }
7721           break;
7722 
7723         default:
7724           GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: " << type;
7725           return false;
7726       }
7727     }
7728   }
7729   return true;
7730 }
7731 
SetOptionValue(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)7732 bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
7733     const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
7734   // We switch on the CppType to validate.
7735   switch (option_field->cpp_type()) {
7736     case FieldDescriptor::CPPTYPE_INT32:
7737       if (uninterpreted_option_->has_positive_int_value()) {
7738         if (uninterpreted_option_->positive_int_value() >
7739             static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
7740           return AddValueError("Value out of range for int32 option \"" +
7741                                option_field->full_name() + "\".");
7742         } else {
7743           SetInt32(option_field->number(),
7744                    uninterpreted_option_->positive_int_value(),
7745                    option_field->type(), unknown_fields);
7746         }
7747       } else if (uninterpreted_option_->has_negative_int_value()) {
7748         if (uninterpreted_option_->negative_int_value() <
7749             static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
7750           return AddValueError("Value out of range for int32 option \"" +
7751                                option_field->full_name() + "\".");
7752         } else {
7753           SetInt32(option_field->number(),
7754                    uninterpreted_option_->negative_int_value(),
7755                    option_field->type(), unknown_fields);
7756         }
7757       } else {
7758         return AddValueError("Value must be integer for int32 option \"" +
7759                              option_field->full_name() + "\".");
7760       }
7761       break;
7762 
7763     case FieldDescriptor::CPPTYPE_INT64:
7764       if (uninterpreted_option_->has_positive_int_value()) {
7765         if (uninterpreted_option_->positive_int_value() >
7766             static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
7767           return AddValueError("Value out of range for int64 option \"" +
7768                                option_field->full_name() + "\".");
7769         } else {
7770           SetInt64(option_field->number(),
7771                    uninterpreted_option_->positive_int_value(),
7772                    option_field->type(), unknown_fields);
7773         }
7774       } else if (uninterpreted_option_->has_negative_int_value()) {
7775         SetInt64(option_field->number(),
7776                  uninterpreted_option_->negative_int_value(),
7777                  option_field->type(), unknown_fields);
7778       } else {
7779         return AddValueError("Value must be integer for int64 option \"" +
7780                              option_field->full_name() + "\".");
7781       }
7782       break;
7783 
7784     case FieldDescriptor::CPPTYPE_UINT32:
7785       if (uninterpreted_option_->has_positive_int_value()) {
7786         if (uninterpreted_option_->positive_int_value() >
7787             std::numeric_limits<uint32_t>::max()) {
7788           return AddValueError("Value out of range for uint32 option \"" +
7789                                option_field->name() + "\".");
7790         } else {
7791           SetUInt32(option_field->number(),
7792                     uninterpreted_option_->positive_int_value(),
7793                     option_field->type(), unknown_fields);
7794         }
7795       } else {
7796         return AddValueError(
7797             "Value must be non-negative integer for uint32 "
7798             "option \"" +
7799             option_field->full_name() + "\".");
7800       }
7801       break;
7802 
7803     case FieldDescriptor::CPPTYPE_UINT64:
7804       if (uninterpreted_option_->has_positive_int_value()) {
7805         SetUInt64(option_field->number(),
7806                   uninterpreted_option_->positive_int_value(),
7807                   option_field->type(), unknown_fields);
7808       } else {
7809         return AddValueError(
7810             "Value must be non-negative integer for uint64 "
7811             "option \"" +
7812             option_field->full_name() + "\".");
7813       }
7814       break;
7815 
7816     case FieldDescriptor::CPPTYPE_FLOAT: {
7817       float value;
7818       if (uninterpreted_option_->has_double_value()) {
7819         value = uninterpreted_option_->double_value();
7820       } else if (uninterpreted_option_->has_positive_int_value()) {
7821         value = uninterpreted_option_->positive_int_value();
7822       } else if (uninterpreted_option_->has_negative_int_value()) {
7823         value = uninterpreted_option_->negative_int_value();
7824       } else {
7825         return AddValueError("Value must be number for float option \"" +
7826                              option_field->full_name() + "\".");
7827       }
7828       unknown_fields->AddFixed32(option_field->number(),
7829                                  internal::WireFormatLite::EncodeFloat(value));
7830       break;
7831     }
7832 
7833     case FieldDescriptor::CPPTYPE_DOUBLE: {
7834       double value;
7835       if (uninterpreted_option_->has_double_value()) {
7836         value = uninterpreted_option_->double_value();
7837       } else if (uninterpreted_option_->has_positive_int_value()) {
7838         value = uninterpreted_option_->positive_int_value();
7839       } else if (uninterpreted_option_->has_negative_int_value()) {
7840         value = uninterpreted_option_->negative_int_value();
7841       } else {
7842         return AddValueError("Value must be number for double option \"" +
7843                              option_field->full_name() + "\".");
7844       }
7845       unknown_fields->AddFixed64(option_field->number(),
7846                                  internal::WireFormatLite::EncodeDouble(value));
7847       break;
7848     }
7849 
7850     case FieldDescriptor::CPPTYPE_BOOL:
7851       uint64_t value;
7852       if (!uninterpreted_option_->has_identifier_value()) {
7853         return AddValueError(
7854             "Value must be identifier for boolean option "
7855             "\"" +
7856             option_field->full_name() + "\".");
7857       }
7858       if (uninterpreted_option_->identifier_value() == "true") {
7859         value = 1;
7860       } else if (uninterpreted_option_->identifier_value() == "false") {
7861         value = 0;
7862       } else {
7863         return AddValueError(
7864             "Value must be \"true\" or \"false\" for boolean "
7865             "option \"" +
7866             option_field->full_name() + "\".");
7867       }
7868       unknown_fields->AddVarint(option_field->number(), value);
7869       break;
7870 
7871     case FieldDescriptor::CPPTYPE_ENUM: {
7872       if (!uninterpreted_option_->has_identifier_value()) {
7873         return AddValueError(
7874             "Value must be identifier for enum-valued option "
7875             "\"" +
7876             option_field->full_name() + "\".");
7877       }
7878       const EnumDescriptor* enum_type = option_field->enum_type();
7879       const std::string& value_name = uninterpreted_option_->identifier_value();
7880       const EnumValueDescriptor* enum_value = nullptr;
7881 
7882       if (enum_type->file()->pool() != DescriptorPool::generated_pool()) {
7883         // Note that the enum value's fully-qualified name is a sibling of the
7884         // enum's name, not a child of it.
7885         std::string fully_qualified_name = enum_type->full_name();
7886         fully_qualified_name.resize(fully_qualified_name.size() -
7887                                     enum_type->name().size());
7888         fully_qualified_name += value_name;
7889 
7890         // Search for the enum value's descriptor in the builder's pool. Note
7891         // that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
7892         // DescriptorPool::FindEnumValueByName() because we're already holding
7893         // the pool's mutex, and the latter method locks it again.
7894         Symbol symbol =
7895             builder_->FindSymbolNotEnforcingDeps(fully_qualified_name);
7896         if (auto* candicate_descriptor = symbol.enum_value_descriptor()) {
7897           if (candicate_descriptor->type() != enum_type) {
7898             return AddValueError(
7899                 "Enum type \"" + enum_type->full_name() +
7900                 "\" has no value named \"" + value_name + "\" for option \"" +
7901                 option_field->full_name() +
7902                 "\". This appears to be a value from a sibling type.");
7903           } else {
7904             enum_value = candicate_descriptor;
7905           }
7906         }
7907       } else {
7908         // The enum type is in the generated pool, so we can search for the
7909         // value there.
7910         enum_value = enum_type->FindValueByName(value_name);
7911       }
7912 
7913       if (enum_value == nullptr) {
7914         return AddValueError("Enum type \"" +
7915                              option_field->enum_type()->full_name() +
7916                              "\" has no value named \"" + value_name +
7917                              "\" for "
7918                              "option \"" +
7919                              option_field->full_name() + "\".");
7920       } else {
7921         // Sign-extension is not a problem, since we cast directly from int32_t
7922         // to uint64_t, without first going through uint32_t.
7923         unknown_fields->AddVarint(
7924             option_field->number(),
7925             static_cast<uint64_t>(static_cast<int64_t>(enum_value->number())));
7926       }
7927       break;
7928     }
7929 
7930     case FieldDescriptor::CPPTYPE_STRING:
7931       if (!uninterpreted_option_->has_string_value()) {
7932         return AddValueError(
7933             "Value must be quoted string for string option "
7934             "\"" +
7935             option_field->full_name() + "\".");
7936       }
7937       // The string has already been unquoted and unescaped by the parser.
7938       unknown_fields->AddLengthDelimited(option_field->number(),
7939                                          uninterpreted_option_->string_value());
7940       break;
7941 
7942     case FieldDescriptor::CPPTYPE_MESSAGE:
7943       if (!SetAggregateOption(option_field, unknown_fields)) {
7944         return false;
7945       }
7946       break;
7947   }
7948 
7949   return true;
7950 }
7951 
7952 class DescriptorBuilder::OptionInterpreter::AggregateOptionFinder
7953     : public TextFormat::Finder {
7954  public:
7955   DescriptorBuilder* builder_;
7956 
FindAnyType(const Message &,const std::string & prefix,const std::string & name) const7957   const Descriptor* FindAnyType(const Message& /*message*/,
7958                                 const std::string& prefix,
7959                                 const std::string& name) const override {
7960     if (prefix != internal::kTypeGoogleApisComPrefix &&
7961         prefix != internal::kTypeGoogleProdComPrefix) {
7962       return nullptr;
7963     }
7964     assert_mutex_held(builder_->pool_);
7965     return builder_->FindSymbol(name).descriptor();
7966   }
7967 
FindExtension(Message * message,const std::string & name) const7968   const FieldDescriptor* FindExtension(Message* message,
7969                                        const std::string& name) const override {
7970     assert_mutex_held(builder_->pool_);
7971     const Descriptor* descriptor = message->GetDescriptor();
7972     Symbol result =
7973         builder_->LookupSymbolNoPlaceholder(name, descriptor->full_name());
7974     if (auto* field = result.field_descriptor()) {
7975       return field;
7976     } else if (result.type() == Symbol::MESSAGE &&
7977                descriptor->options().message_set_wire_format()) {
7978       const Descriptor* foreign_type = result.descriptor();
7979       // The text format allows MessageSet items to be specified using
7980       // the type name, rather than the extension identifier. If the symbol
7981       // lookup returned a Message, and the enclosing Message has
7982       // message_set_wire_format = true, then return the message set
7983       // extension, if one exists.
7984       for (int i = 0; i < foreign_type->extension_count(); i++) {
7985         const FieldDescriptor* extension = foreign_type->extension(i);
7986         if (extension->containing_type() == descriptor &&
7987             extension->type() == FieldDescriptor::TYPE_MESSAGE &&
7988             extension->is_optional() &&
7989             extension->message_type() == foreign_type) {
7990           // Found it.
7991           return extension;
7992         }
7993       }
7994     }
7995     return nullptr;
7996   }
7997 };
7998 
7999 // A custom error collector to record any text-format parsing errors
8000 namespace {
8001 class AggregateErrorCollector : public io::ErrorCollector {
8002  public:
8003   std::string error_;
8004 
AddError(int,int,const std::string & message)8005   void AddError(int /* line */, int /* column */,
8006                 const std::string& message) override {
8007     if (!error_.empty()) {
8008       error_ += "; ";
8009     }
8010     error_ += message;
8011   }
8012 
AddWarning(int,int,const std::string &)8013   void AddWarning(int /* line */, int /* column */,
8014                   const std::string& /* message */) override {
8015     // Ignore warnings
8016   }
8017 };
8018 }  // namespace
8019 
8020 // We construct a dynamic message of the type corresponding to
8021 // option_field, parse the supplied text-format string into this
8022 // message, and serialize the resulting message to produce the value.
SetAggregateOption(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)8023 bool DescriptorBuilder::OptionInterpreter::SetAggregateOption(
8024     const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
8025   if (!uninterpreted_option_->has_aggregate_value()) {
8026     return AddValueError("Option \"" + option_field->full_name() +
8027                          "\" is a message. To set the entire message, use "
8028                          "syntax like \"" +
8029                          option_field->name() +
8030                          " = { <proto text format> }\". "
8031                          "To set fields within it, use "
8032                          "syntax like \"" +
8033                          option_field->name() + ".foo = value\".");
8034   }
8035 
8036   const Descriptor* type = option_field->message_type();
8037   std::unique_ptr<Message> dynamic(dynamic_factory_.GetPrototype(type)->New());
8038   GOOGLE_CHECK(dynamic.get() != nullptr)
8039       << "Could not create an instance of " << option_field->DebugString();
8040 
8041   AggregateErrorCollector collector;
8042   AggregateOptionFinder finder;
8043   finder.builder_ = builder_;
8044   TextFormat::Parser parser;
8045   parser.RecordErrorsTo(&collector);
8046   parser.SetFinder(&finder);
8047   if (!parser.ParseFromString(uninterpreted_option_->aggregate_value(),
8048                               dynamic.get())) {
8049     AddValueError("Error while parsing option value for \"" +
8050                   option_field->name() + "\": " + collector.error_);
8051     return false;
8052   } else {
8053     std::string serial;
8054     dynamic->SerializeToString(&serial);  // Never fails
8055     if (option_field->type() == FieldDescriptor::TYPE_MESSAGE) {
8056       unknown_fields->AddLengthDelimited(option_field->number(), serial);
8057     } else {
8058       GOOGLE_CHECK_EQ(option_field->type(), FieldDescriptor::TYPE_GROUP);
8059       UnknownFieldSet* group = unknown_fields->AddGroup(option_field->number());
8060       group->ParseFromString(serial);
8061     }
8062     return true;
8063   }
8064 }
8065 
SetInt32(int number,int32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)8066 void DescriptorBuilder::OptionInterpreter::SetInt32(
8067     int number, int32_t value, FieldDescriptor::Type type,
8068     UnknownFieldSet* unknown_fields) {
8069   switch (type) {
8070     case FieldDescriptor::TYPE_INT32:
8071       unknown_fields->AddVarint(
8072           number, static_cast<uint64_t>(static_cast<int64_t>(value)));
8073       break;
8074 
8075     case FieldDescriptor::TYPE_SFIXED32:
8076       unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
8077       break;
8078 
8079     case FieldDescriptor::TYPE_SINT32:
8080       unknown_fields->AddVarint(
8081           number, internal::WireFormatLite::ZigZagEncode32(value));
8082       break;
8083 
8084     default:
8085       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT32: " << type;
8086       break;
8087   }
8088 }
8089 
SetInt64(int number,int64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)8090 void DescriptorBuilder::OptionInterpreter::SetInt64(
8091     int number, int64_t value, FieldDescriptor::Type type,
8092     UnknownFieldSet* unknown_fields) {
8093   switch (type) {
8094     case FieldDescriptor::TYPE_INT64:
8095       unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
8096       break;
8097 
8098     case FieldDescriptor::TYPE_SFIXED64:
8099       unknown_fields->AddFixed64(number, static_cast<uint64_t>(value));
8100       break;
8101 
8102     case FieldDescriptor::TYPE_SINT64:
8103       unknown_fields->AddVarint(
8104           number, internal::WireFormatLite::ZigZagEncode64(value));
8105       break;
8106 
8107     default:
8108       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT64: " << type;
8109       break;
8110   }
8111 }
8112 
SetUInt32(int number,uint32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)8113 void DescriptorBuilder::OptionInterpreter::SetUInt32(
8114     int number, uint32_t value, FieldDescriptor::Type type,
8115     UnknownFieldSet* unknown_fields) {
8116   switch (type) {
8117     case FieldDescriptor::TYPE_UINT32:
8118       unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
8119       break;
8120 
8121     case FieldDescriptor::TYPE_FIXED32:
8122       unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
8123       break;
8124 
8125     default:
8126       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT32: " << type;
8127       break;
8128   }
8129 }
8130 
SetUInt64(int number,uint64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)8131 void DescriptorBuilder::OptionInterpreter::SetUInt64(
8132     int number, uint64_t value, FieldDescriptor::Type type,
8133     UnknownFieldSet* unknown_fields) {
8134   switch (type) {
8135     case FieldDescriptor::TYPE_UINT64:
8136       unknown_fields->AddVarint(number, value);
8137       break;
8138 
8139     case FieldDescriptor::TYPE_FIXED64:
8140       unknown_fields->AddFixed64(number, value);
8141       break;
8142 
8143     default:
8144       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT64: " << type;
8145       break;
8146   }
8147 }
8148 
LogUnusedDependency(const FileDescriptorProto & proto,const FileDescriptor * result)8149 void DescriptorBuilder::LogUnusedDependency(const FileDescriptorProto& proto,
8150                                             const FileDescriptor* result) {
8151   (void)result;  // Parameter is used by Google-internal code.
8152 
8153   if (!unused_dependency_.empty()) {
8154     auto itr = pool_->unused_import_track_files_.find(proto.name());
8155     bool is_error =
8156         itr != pool_->unused_import_track_files_.end() && itr->second;
8157     for (std::set<const FileDescriptor*>::const_iterator it =
8158              unused_dependency_.begin();
8159          it != unused_dependency_.end(); ++it) {
8160       std::string error_message = "Import " + (*it)->name() + " is unused.";
8161       if (is_error) {
8162         AddError((*it)->name(), proto, DescriptorPool::ErrorCollector::IMPORT,
8163                  error_message);
8164       } else {
8165         AddWarning((*it)->name(), proto, DescriptorPool::ErrorCollector::IMPORT,
8166                    error_message);
8167       }
8168     }
8169   }
8170 }
8171 
CrossLinkOnDemandHelper(StringPiece name,bool expecting_enum) const8172 Symbol DescriptorPool::CrossLinkOnDemandHelper(StringPiece name,
8173                                                bool expecting_enum) const {
8174   (void)expecting_enum;  // Parameter is used by Google-internal code.
8175   auto lookup_name = std::string(name);
8176   if (!lookup_name.empty() && lookup_name[0] == '.') {
8177     lookup_name = lookup_name.substr(1);
8178   }
8179   Symbol result = tables_->FindByNameHelper(this, lookup_name);
8180   return result;
8181 }
8182 
8183 // Handle the lazy import building for a message field whose type wasn't built
8184 // at cross link time. If that was the case, we saved the name of the type to
8185 // be looked up when the accessor for the type was called. Set type_,
8186 // enum_type_, message_type_, and default_value_enum_ appropriately.
InternalTypeOnceInit() const8187 void FieldDescriptor::InternalTypeOnceInit() const {
8188   GOOGLE_CHECK(file()->finished_building_ == true);
8189   const EnumDescriptor* enum_type = nullptr;
8190   const char* lazy_type_name = reinterpret_cast<const char*>(type_once_ + 1);
8191   const char* lazy_default_value_enum_name =
8192       lazy_type_name + strlen(lazy_type_name) + 1;
8193   Symbol result = file()->pool()->CrossLinkOnDemandHelper(
8194       lazy_type_name, type_ == FieldDescriptor::TYPE_ENUM);
8195   if (result.type() == Symbol::MESSAGE) {
8196     type_ = FieldDescriptor::TYPE_MESSAGE;
8197     type_descriptor_.message_type = result.descriptor();
8198   } else if (result.type() == Symbol::ENUM) {
8199     type_ = FieldDescriptor::TYPE_ENUM;
8200     enum_type = type_descriptor_.enum_type = result.enum_descriptor();
8201   }
8202 
8203   if (enum_type) {
8204     if (lazy_default_value_enum_name[0] != '\0') {
8205       // Have to build the full name now instead of at CrossLink time,
8206       // because enum_type may not be known at the time.
8207       std::string name = enum_type->full_name();
8208       // Enum values reside in the same scope as the enum type.
8209       std::string::size_type last_dot = name.find_last_of('.');
8210       if (last_dot != std::string::npos) {
8211         name = name.substr(0, last_dot) + "." + lazy_default_value_enum_name;
8212       } else {
8213         name = lazy_default_value_enum_name;
8214       }
8215       Symbol result = file()->pool()->CrossLinkOnDemandHelper(name, true);
8216       default_value_enum_ = result.enum_value_descriptor();
8217     } else {
8218       default_value_enum_ = nullptr;
8219     }
8220     if (!default_value_enum_) {
8221       // We use the first defined value as the default
8222       // if a default is not explicitly defined.
8223       GOOGLE_CHECK(enum_type->value_count());
8224       default_value_enum_ = enum_type->value(0);
8225     }
8226   }
8227 }
8228 
TypeOnceInit(const FieldDescriptor * to_init)8229 void FieldDescriptor::TypeOnceInit(const FieldDescriptor* to_init) {
8230   to_init->InternalTypeOnceInit();
8231 }
8232 
8233 // message_type(), enum_type(), default_value_enum(), and type()
8234 // all share the same internal::call_once init path to do lazy
8235 // import building and cross linking of a field of a message.
message_type() const8236 const Descriptor* FieldDescriptor::message_type() const {
8237   if (type_once_) {
8238     internal::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
8239   }
8240   return type_ == TYPE_MESSAGE || type_ == TYPE_GROUP
8241              ? type_descriptor_.message_type
8242              : nullptr;
8243 }
8244 
enum_type() const8245 const EnumDescriptor* FieldDescriptor::enum_type() const {
8246   if (type_once_) {
8247     internal::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
8248   }
8249   return type_ == TYPE_ENUM ? type_descriptor_.enum_type : nullptr;
8250 }
8251 
default_value_enum() const8252 const EnumValueDescriptor* FieldDescriptor::default_value_enum() const {
8253   if (type_once_) {
8254     internal::call_once(*type_once_, FieldDescriptor::TypeOnceInit, this);
8255   }
8256   return default_value_enum_;
8257 }
8258 
PrintableNameForExtension() const8259 const std::string& FieldDescriptor::PrintableNameForExtension() const {
8260   const bool is_message_set_extension =
8261       is_extension() &&
8262       containing_type()->options().message_set_wire_format() &&
8263       type() == FieldDescriptor::TYPE_MESSAGE && is_optional() &&
8264       extension_scope() == message_type();
8265   return is_message_set_extension ? message_type()->full_name() : full_name();
8266 }
8267 
InternalDependenciesOnceInit() const8268 void FileDescriptor::InternalDependenciesOnceInit() const {
8269   GOOGLE_CHECK(finished_building_ == true);
8270   const char* names_ptr = reinterpret_cast<const char*>(dependencies_once_ + 1);
8271   for (int i = 0; i < dependency_count(); i++) {
8272     const char* name = names_ptr;
8273     names_ptr += strlen(name) + 1;
8274     if (name[0] != '\0') {
8275       dependencies_[i] = pool_->FindFileByName(name);
8276     }
8277   }
8278 }
8279 
DependenciesOnceInit(const FileDescriptor * to_init)8280 void FileDescriptor::DependenciesOnceInit(const FileDescriptor* to_init) {
8281   to_init->InternalDependenciesOnceInit();
8282 }
8283 
dependency(int index) const8284 const FileDescriptor* FileDescriptor::dependency(int index) const {
8285   if (dependencies_once_) {
8286     // Do once init for all indices, as it's unlikely only a single index would
8287     // be called, and saves on internal::call_once allocations.
8288     internal::call_once(*dependencies_once_,
8289                         FileDescriptor::DependenciesOnceInit, this);
8290   }
8291   return dependencies_[index];
8292 }
8293 
input_type() const8294 const Descriptor* MethodDescriptor::input_type() const {
8295   return input_type_.Get(service());
8296 }
8297 
output_type() const8298 const Descriptor* MethodDescriptor::output_type() const {
8299   return output_type_.Get(service());
8300 }
8301 
8302 namespace internal {
Set(const Descriptor * descriptor)8303 void LazyDescriptor::Set(const Descriptor* descriptor) {
8304   GOOGLE_CHECK(!once_);
8305   descriptor_ = descriptor;
8306 }
8307 
SetLazy(StringPiece name,const FileDescriptor * file)8308 void LazyDescriptor::SetLazy(StringPiece name,
8309                              const FileDescriptor* file) {
8310   // verify Init() has been called and Set hasn't been called yet.
8311   GOOGLE_CHECK(!descriptor_);
8312   GOOGLE_CHECK(!once_);
8313   GOOGLE_CHECK(file && file->pool_);
8314   GOOGLE_CHECK(file->pool_->lazily_build_dependencies_);
8315   GOOGLE_CHECK(!file->finished_building_);
8316   once_ = ::new (file->pool_->tables_->AllocateBytes(static_cast<int>(
8317       sizeof(internal::once_flag) + name.size() + 1))) internal::once_flag{};
8318   char* lazy_name = reinterpret_cast<char*>(once_ + 1);
8319   memcpy(lazy_name, name.data(), name.size());
8320   lazy_name[name.size()] = 0;
8321 }
8322 
Once(const ServiceDescriptor * service)8323 void LazyDescriptor::Once(const ServiceDescriptor* service) {
8324   if (once_) {
8325     internal::call_once(*once_, [&] {
8326       auto* file = service->file();
8327       GOOGLE_CHECK(file->finished_building_);
8328       const char* lazy_name = reinterpret_cast<const char*>(once_ + 1);
8329       descriptor_ =
8330           file->pool_->CrossLinkOnDemandHelper(lazy_name, false).descriptor();
8331     });
8332   }
8333 }
8334 
8335 }  // namespace internal
8336 
8337 }  // namespace protobuf
8338 }  // namespace google
8339 
8340 #include <google/protobuf/port_undef.inc>
8341