• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_message.h>
36 
37 #include <algorithm>
38 #include <map>
39 #include <memory>
40 #include <unordered_map>
41 #include <utility>
42 #include <vector>
43 
44 #include <google/protobuf/compiler/cpp/cpp_enum.h>
45 #include <google/protobuf/compiler/cpp/cpp_extension.h>
46 #include <google/protobuf/compiler/cpp/cpp_field.h>
47 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
48 #include <google/protobuf/compiler/cpp/cpp_padding_optimizer.h>
49 #include <google/protobuf/descriptor.pb.h>
50 #include <google/protobuf/io/coded_stream.h>
51 #include <google/protobuf/io/printer.h>
52 #include <google/protobuf/generated_message_table_driven.h>
53 #include <google/protobuf/generated_message_util.h>
54 #include <google/protobuf/map_entry_lite.h>
55 #include <google/protobuf/wire_format.h>
56 #include <google/protobuf/stubs/strutil.h>
57 #include <google/protobuf/stubs/substitute.h>
58 
59 
60 #include <google/protobuf/stubs/hash.h>
61 
62 
63 namespace google {
64 namespace protobuf {
65 namespace compiler {
66 namespace cpp {
67 
68 using internal::WireFormat;
69 using internal::WireFormatLite;
70 
71 namespace {
72 
73 template <class T>
PrintFieldComment(const Formatter & format,const T * field)74 void PrintFieldComment(const Formatter& format, const T* field) {
75   // Print the field's (or oneof's) proto-syntax definition as a comment.
76   // We don't want to print group bodies so we cut off after the first
77   // line.
78   DebugStringOptions options;
79   options.elide_group_body = true;
80   options.elide_oneof_body = true;
81   std::string def = field->DebugStringWithOptions(options);
82   format("// $1$\n", def.substr(0, def.find_first_of('\n')));
83 }
84 
PrintPresenceCheck(const Formatter & format,const FieldDescriptor * field,const std::vector<int> & has_bit_indices,io::Printer * printer,int * cached_has_bit_index)85 void PrintPresenceCheck(const Formatter& format, const FieldDescriptor* field,
86                         const std::vector<int>& has_bit_indices,
87                         io::Printer* printer, int* cached_has_bit_index) {
88   if (!field->options().weak()) {
89     int has_bit_index = has_bit_indices[field->index()];
90     if (*cached_has_bit_index != (has_bit_index / 32)) {
91       *cached_has_bit_index = (has_bit_index / 32);
92       format("cached_has_bits = _has_bits_[$1$];\n", *cached_has_bit_index);
93     }
94     const std::string mask =
95         StrCat(strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
96     format("if (cached_has_bits & 0x$1$u) {\n", mask);
97   } else {
98     format("if (has_$1$()) {\n", FieldName(field));
99   }
100   format.Indent();
101 }
102 
103 struct FieldOrderingByNumber {
operator ()google::protobuf::compiler::cpp::__anon3cc1c4960111::FieldOrderingByNumber104   inline bool operator()(const FieldDescriptor* a,
105                          const FieldDescriptor* b) const {
106     return a->number() < b->number();
107   }
108 };
109 
110 // Sort the fields of the given Descriptor by number into a new[]'d array
111 // and return it.
SortFieldsByNumber(const Descriptor * descriptor)112 std::vector<const FieldDescriptor*> SortFieldsByNumber(
113     const Descriptor* descriptor) {
114   std::vector<const FieldDescriptor*> fields(descriptor->field_count());
115   for (int i = 0; i < descriptor->field_count(); i++) {
116     fields[i] = descriptor->field(i);
117   }
118   std::sort(fields.begin(), fields.end(), FieldOrderingByNumber());
119   return fields;
120 }
121 
122 // Functor for sorting extension ranges by their "start" field number.
123 struct ExtensionRangeSorter {
operator ()google::protobuf::compiler::cpp::__anon3cc1c4960111::ExtensionRangeSorter124   bool operator()(const Descriptor::ExtensionRange* left,
125                   const Descriptor::ExtensionRange* right) const {
126     return left->start < right->start;
127   }
128 };
129 
IsPOD(const FieldDescriptor * field)130 bool IsPOD(const FieldDescriptor* field) {
131   if (field->is_repeated() || field->is_extension()) return false;
132   switch (field->cpp_type()) {
133     case FieldDescriptor::CPPTYPE_ENUM:
134     case FieldDescriptor::CPPTYPE_INT32:
135     case FieldDescriptor::CPPTYPE_INT64:
136     case FieldDescriptor::CPPTYPE_UINT32:
137     case FieldDescriptor::CPPTYPE_UINT64:
138     case FieldDescriptor::CPPTYPE_FLOAT:
139     case FieldDescriptor::CPPTYPE_DOUBLE:
140     case FieldDescriptor::CPPTYPE_BOOL:
141       return true;
142     case FieldDescriptor::CPPTYPE_STRING:
143       return false;
144     default:
145       return false;
146   }
147 }
148 
149 // Helper for the code that emits the SharedCtor() method.
CanConstructByZeroing(const FieldDescriptor * field,const Options & options)150 bool CanConstructByZeroing(const FieldDescriptor* field,
151                            const Options& options) {
152   bool ret = CanInitializeByZeroing(field);
153 
154   // Non-repeated, non-lazy message fields are simply raw pointers, so we can
155   // use memset to initialize these in SharedCtor.  We cannot use this in
156   // Clear, as we need to potentially delete the existing value.
157   ret = ret || (!field->is_repeated() && !IsLazy(field, options) &&
158                 field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE);
159   return ret;
160 }
161 
162 // Emits an if-statement with a condition that evaluates to true if |field| is
163 // considered non-default (will be sent over the wire), for message types
164 // without true field presence. Should only be called if
165 // !HasFieldPresence(message_descriptor).
EmitFieldNonDefaultCondition(io::Printer * printer,const std::string & prefix,const FieldDescriptor * field)166 bool EmitFieldNonDefaultCondition(io::Printer* printer,
167                                   const std::string& prefix,
168                                   const FieldDescriptor* field) {
169   Formatter format(printer);
170   format.Set("prefix", prefix);
171   format.Set("name", FieldName(field));
172   // Merge and serialize semantics: primitive fields are merged/serialized only
173   // if non-zero (numeric) or non-empty (string).
174   if (!field->is_repeated() && !field->containing_oneof()) {
175     if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
176       format("if ($prefix$$name$().size() > 0) {\n");
177     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
178       // Message fields still have has_$name$() methods.
179       format("if ($prefix$has_$name$()) {\n");
180     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_DOUBLE ||
181                field->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT) {
182       // Handle float comparison to prevent -Wfloat-equal warnings
183       format("if (!($prefix$$name$() <= 0 && $prefix$$name$() >= 0)) {\n");
184     } else {
185       format("if ($prefix$$name$() != 0) {\n");
186     }
187     format.Indent();
188     return true;
189   } else if (field->containing_oneof()) {
190     format("if (has_$name$()) {\n");
191     format.Indent();
192     return true;
193   }
194   return false;
195 }
196 
197 // Does the given field have a has_$name$() method?
HasHasMethod(const FieldDescriptor * field)198 bool HasHasMethod(const FieldDescriptor* field) {
199   if (HasFieldPresence(field->file())) {
200     // In proto1/proto2, every field has a has_$name$() method.
201     return true;
202   }
203   // For message types without true field presence, only fields with a message
204   // type have a has_$name$() method.
205   return field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE;
206 }
207 
208 // Collects map entry message type information.
CollectMapInfo(const Options & options,const Descriptor * descriptor,std::map<std::string,std::string> * variables)209 void CollectMapInfo(const Options& options, const Descriptor* descriptor,
210                     std::map<std::string, std::string>* variables) {
211   GOOGLE_CHECK(IsMapEntryMessage(descriptor));
212   std::map<std::string, std::string>& vars = *variables;
213   const FieldDescriptor* key = descriptor->FindFieldByName("key");
214   const FieldDescriptor* val = descriptor->FindFieldByName("value");
215   vars["key_cpp"] = PrimitiveTypeName(options, key->cpp_type());
216   switch (val->cpp_type()) {
217     case FieldDescriptor::CPPTYPE_MESSAGE:
218       vars["val_cpp"] = FieldMessageTypeName(val, options);
219       break;
220     case FieldDescriptor::CPPTYPE_ENUM:
221       vars["val_cpp"] = ClassName(val->enum_type(), true);
222       break;
223     default:
224       vars["val_cpp"] = PrimitiveTypeName(options, val->cpp_type());
225   }
226   vars["key_wire_type"] =
227       "TYPE_" + ToUpper(DeclaredTypeMethodName(key->type()));
228   vars["val_wire_type"] =
229       "TYPE_" + ToUpper(DeclaredTypeMethodName(val->type()));
230   if (descriptor->file()->syntax() != FileDescriptor::SYNTAX_PROTO3 &&
231       val->type() == FieldDescriptor::TYPE_ENUM) {
232     const EnumValueDescriptor* default_value = val->default_value_enum();
233     vars["default_enum_value"] = Int32ToString(default_value->number());
234   } else {
235     vars["default_enum_value"] = "0";
236   }
237 }
238 
239 // Does the given field have a private (internal helper only) has_$name$()
240 // method?
HasPrivateHasMethod(const FieldDescriptor * field)241 bool HasPrivateHasMethod(const FieldDescriptor* field) {
242   // Only for oneofs in message types with no field presence. has_$name$(),
243   // based on the oneof case, is still useful internally for generated code.
244   return (!HasFieldPresence(field->file()) &&
245           field->containing_oneof() != NULL);
246 }
247 
248 // TODO(ckennelly):  Cull these exclusions if/when these protos do not have
249 // their methods overriden by subclasses.
250 
ShouldMarkClassAsFinal(const Descriptor * descriptor,const Options & options)251 bool ShouldMarkClassAsFinal(const Descriptor* descriptor,
252                             const Options& options) {
253   return false;
254 }
255 
ShouldMarkClearAsFinal(const Descriptor * descriptor,const Options & options)256 bool ShouldMarkClearAsFinal(const Descriptor* descriptor,
257                             const Options& options) {
258   static std::set<std::string> exclusions{
259   };
260 
261   const std::string name = ClassName(descriptor, true);
262   return exclusions.find(name) == exclusions.end() ||
263          options.opensource_runtime;
264 }
265 
ShouldMarkIsInitializedAsFinal(const Descriptor * descriptor,const Options & options)266 bool ShouldMarkIsInitializedAsFinal(const Descriptor* descriptor,
267                                     const Options& options) {
268   static std::set<std::string> exclusions{
269   };
270 
271   const std::string name = ClassName(descriptor, true);
272   return exclusions.find(name) == exclusions.end() ||
273          options.opensource_runtime;
274 }
275 
ShouldMarkNewAsFinal(const Descriptor * descriptor,const Options & options)276 bool ShouldMarkNewAsFinal(const Descriptor* descriptor,
277                           const Options& options) {
278   static std::set<std::string> exclusions{
279   };
280 
281   const std::string name = ClassName(descriptor, true);
282   return exclusions.find(name) == exclusions.end() ||
283          options.opensource_runtime;
284 }
285 
TableDrivenParsingEnabled(const Descriptor * descriptor,const Options & options)286 bool TableDrivenParsingEnabled(const Descriptor* descriptor,
287                                const Options& options) {
288   if (!options.table_driven_parsing) {
289     return false;
290   }
291 
292   // Consider table-driven parsing.  We only do this if:
293   // - We have has_bits for fields.  This avoids a check on every field we set
294   //   when are present (the common case).
295   if (!HasFieldPresence(descriptor->file())) {
296     return false;
297   }
298 
299   const double table_sparseness = 0.5;
300   int max_field_number = 0;
301   for (auto field : FieldRange(descriptor)) {
302     if (max_field_number < field->number()) {
303       max_field_number = field->number();
304     }
305 
306     // - There are no weak fields.
307     if (IsWeak(field, options)) {
308       return false;
309     }
310 
311     // - There are no lazy fields (they require the non-lite library).
312     if (IsLazy(field, options)) {
313       return false;
314     }
315   }
316 
317   // - There range of field numbers is "small"
318   if (max_field_number >= (2 << 14)) {
319     return false;
320   }
321 
322   // - Field numbers are relatively dense within the actual number of fields.
323   //   We check for strictly greater than in the case where there are no fields
324   //   (only extensions) so max_field_number == descriptor->field_count() == 0.
325   if (max_field_number * table_sparseness > descriptor->field_count()) {
326     return false;
327   }
328 
329   // - This is not a MapEntryMessage.
330   if (IsMapEntryMessage(descriptor)) {
331     return false;
332   }
333 
334   return true;
335 }
336 
SetUnknkownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)337 void SetUnknkownFieldsVariable(const Descriptor* descriptor,
338                                const Options& options,
339                                std::map<std::string, std::string>* variables) {
340   std::string proto_ns = ProtobufNamespace(options);
341   if (UseUnknownFieldSet(descriptor->file(), options)) {
342     (*variables)["unknown_fields_type"] = "::" + proto_ns + "::UnknownFieldSet";
343   } else {
344     (*variables)["unknown_fields_type"] =
345         PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
346   }
347   (*variables)["have_unknown_fields"] =
348       "_internal_metadata_.have_unknown_fields()";
349   (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields()";
350   (*variables)["mutable_unknown_fields"] =
351       "_internal_metadata_.mutable_unknown_fields()";
352 }
353 
IsCrossFileMapField(const FieldDescriptor * field)354 bool IsCrossFileMapField(const FieldDescriptor* field) {
355   if (!field->is_map()) {
356     return false;
357   }
358 
359   const Descriptor* d = field->message_type();
360   const FieldDescriptor* value = d->FindFieldByNumber(2);
361 
362   return IsCrossFileMessage(value);
363 }
364 
IsCrossFileMaybeMap(const FieldDescriptor * field)365 bool IsCrossFileMaybeMap(const FieldDescriptor* field) {
366   if (IsCrossFileMapField(field)) {
367     return true;
368   }
369 
370   return IsCrossFileMessage(field);
371 }
372 
IsRequired(const std::vector<const FieldDescriptor * > & v)373 bool IsRequired(const std::vector<const FieldDescriptor*>& v) {
374   return v.front()->is_required();
375 }
376 
377 // Allows chunking repeated fields together and non-repeated fields if the
378 // fields share the same has_byte index.
379 // TODO(seongkim): use lambda with capture instead of functor.
380 class MatchRepeatedAndHasByte {
381  public:
MatchRepeatedAndHasByte(const std::vector<int> * has_bit_indices,bool has_field_presence)382   MatchRepeatedAndHasByte(const std::vector<int>* has_bit_indices,
383                           bool has_field_presence)
384       : has_bit_indices_(*has_bit_indices),
385         has_field_presence_(has_field_presence) {}
386 
387   // Returns true if the following conditions are met:
388   // --both fields are repeated fields
389   // --both fields are non-repeated fields with either has_field_presence is
390   //   false or have the same has_byte index.
operator ()(const FieldDescriptor * a,const FieldDescriptor * b) const391   bool operator()(const FieldDescriptor* a, const FieldDescriptor* b) const {
392     return a->is_repeated() == b->is_repeated() &&
393            (!has_field_presence_ || a->is_repeated() ||
394             has_bit_indices_[a->index()] / 8 ==
395                 has_bit_indices_[b->index()] / 8);
396   }
397 
398  private:
399   const std::vector<int>& has_bit_indices_;
400   const bool has_field_presence_;
401 };
402 
403 // Allows chunking required fields separately after chunking with
404 // MatchRepeatedAndHasByte.
405 class MatchRepeatedAndHasByteAndRequired : public MatchRepeatedAndHasByte {
406  public:
MatchRepeatedAndHasByteAndRequired(const std::vector<int> * has_bit_indices,bool has_field_presence)407   MatchRepeatedAndHasByteAndRequired(const std::vector<int>* has_bit_indices,
408                                      bool has_field_presence)
409       : MatchRepeatedAndHasByte(has_bit_indices, has_field_presence) {}
410 
operator ()(const FieldDescriptor * a,const FieldDescriptor * b) const411   bool operator()(const FieldDescriptor* a, const FieldDescriptor* b) const {
412     return MatchRepeatedAndHasByte::operator()(a, b) &&
413            a->is_required() == b->is_required();
414   }
415 };
416 
417 // Allows chunking zero-initializable fields separately after chunking with
418 // MatchRepeatedAndHasByte.
419 class MatchRepeatedAndHasByteAndZeroInits : public MatchRepeatedAndHasByte {
420  public:
MatchRepeatedAndHasByteAndZeroInits(const std::vector<int> * has_bit_indices,bool has_field_presence)421   MatchRepeatedAndHasByteAndZeroInits(const std::vector<int>* has_bit_indices,
422                                       bool has_field_presence)
423       : MatchRepeatedAndHasByte(has_bit_indices, has_field_presence) {}
424 
operator ()(const FieldDescriptor * a,const FieldDescriptor * b) const425   bool operator()(const FieldDescriptor* a, const FieldDescriptor* b) const {
426     return MatchRepeatedAndHasByte::operator()(a, b) &&
427            CanInitializeByZeroing(a) == CanInitializeByZeroing(b);
428   }
429 };
430 
431 // Collects neighboring fields based on a given criteria (equivalent predicate).
432 template <typename Predicate>
CollectFields(const std::vector<const FieldDescriptor * > & fields,const Predicate & equivalent)433 std::vector<std::vector<const FieldDescriptor*>> CollectFields(
434     const std::vector<const FieldDescriptor*>& fields,
435     const Predicate& equivalent) {
436   std::vector<std::vector<const FieldDescriptor*>> chunks;
437   if (fields.empty()) {
438     return chunks;
439   }
440 
441   const FieldDescriptor* last_field = fields.front();
442   std::vector<const FieldDescriptor*> chunk;
443   for (auto field : fields) {
444     if (!equivalent(last_field, field) && !chunk.empty()) {
445       chunks.push_back(chunk);
446       chunk.clear();
447     }
448     chunk.push_back(field);
449     last_field = field;
450   }
451   if (!chunk.empty()) {
452     chunks.push_back(chunk);
453   }
454   return chunks;
455 }
456 
457 // Returns a bit mask based on has_bit index of "fields" that are typically on
458 // the same chunk. It is used in a group presence check where _has_bits_ is
459 // masked to tell if any thing in "fields" is present.
GenChunkMask(const std::vector<const FieldDescriptor * > & fields,const std::vector<int> & has_bit_indices)460 uint32 GenChunkMask(const std::vector<const FieldDescriptor*>& fields,
461                     const std::vector<int>& has_bit_indices) {
462   GOOGLE_CHECK(!fields.empty());
463   int first_index_offset = has_bit_indices[fields.front()->index()] / 32;
464   uint32 chunk_mask = 0;
465   for (auto field : fields) {
466     // "index" defines where in the _has_bits_ the field appears.
467     int index = has_bit_indices[field->index()];
468     GOOGLE_CHECK_EQ(first_index_offset, index / 32);
469     chunk_mask |= static_cast<uint32>(1) << (index % 32);
470   }
471   GOOGLE_CHECK_NE(0, chunk_mask);
472   return chunk_mask;
473 }
474 
475 // Return the number of bits set in n, a non-negative integer.
popcnt(uint32 n)476 static int popcnt(uint32 n) {
477   int result = 0;
478   while (n != 0) {
479     result += (n & 1);
480     n = n / 2;
481   }
482   return result;
483 }
484 
485 // For a run of cold chunks, opens and closes an external if statement that
486 // checks multiple has_bits words to skip bulk of cold fields.
487 class ColdChunkSkipper {
488  public:
ColdChunkSkipper(const Options & options,const std::vector<std::vector<const FieldDescriptor * >> & chunks,const std::vector<int> & has_bit_indices,const double cold_threshold,bool has_field_presence)489   ColdChunkSkipper(
490       const Options& options,
491       const std::vector<std::vector<const FieldDescriptor*>>& chunks,
492       const std::vector<int>& has_bit_indices, const double cold_threshold,
493       bool has_field_presence)
494       : chunks_(chunks),
495         has_bit_indices_(has_bit_indices),
496         access_info_map_(options.access_info_map),
497         cold_threshold_(cold_threshold),
498         has_field_presence_(has_field_presence) {
499     SetCommonVars(options, &variables_);
500   }
501 
502   // May open an external if check for a batch of cold fields. "from" is the
503   // prefix to _has_bits_ to allow MergeFrom to use "from._has_bits_".
504   // Otherwise, it should be "".
505   void OnStartChunk(int chunk, int cached_has_bit_index,
506                     const std::string& from, io::Printer* printer);
507   bool OnEndChunk(int chunk, io::Printer* printer);
508 
509  private:
510   bool IsColdChunk(int chunk);
511 
HasbitWord(int chunk,int offset)512   int HasbitWord(int chunk, int offset) {
513     return has_bit_indices_[chunks_[chunk][offset]->index()] / 32;
514   }
515 
516   const std::vector<std::vector<const FieldDescriptor*>>& chunks_;
517   const std::vector<int>& has_bit_indices_;
518   const AccessInfoMap* access_info_map_;
519   const double cold_threshold_;
520   std::map<std::string, std::string> variables_;
521   int limit_chunk_ = -1;
522   bool has_field_presence_;
523 };
524 
525 // Tuning parameters for ColdChunkSkipper.
526 const double kColdRatio = 0.005;
527 
IsColdChunk(int chunk)528 bool ColdChunkSkipper::IsColdChunk(int chunk) {
529   // Mark this variable as used until it is actually used
530   (void)cold_threshold_;
531   return false;
532 }
533 
534 
OnStartChunk(int chunk,int cached_has_bit_index,const std::string & from,io::Printer * printer)535 void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_bit_index,
536                                     const std::string& from,
537                                     io::Printer* printer) {
538   Formatter format(printer, variables_);
539   if (!access_info_map_ || !has_field_presence_) {
540     return;
541   } else if (chunk < limit_chunk_) {
542     // We are already inside a run of cold chunks.
543     return;
544   } else if (!IsColdChunk(chunk)) {
545     // We can't start a run of cold chunks.
546     return;
547   }
548 
549   // Find the end of consecutive cold chunks.
550   limit_chunk_ = chunk;
551   while (limit_chunk_ < chunks_.size() && IsColdChunk(limit_chunk_)) {
552     limit_chunk_++;
553   }
554 
555   if (limit_chunk_ <= chunk + 1) {
556     // Require at least two chunks to emit external has_bit checks.
557     limit_chunk_ = -1;
558     return;
559   }
560 
561   // Emit has_bit check for each has_bit_dword index.
562   format("if (PROTOBUF_PREDICT_FALSE(");
563   int first_word = HasbitWord(chunk, 0);
564   while (chunk < limit_chunk_) {
565     uint32 mask = 0;
566     int this_word = HasbitWord(chunk, 0);
567     // Generate mask for chunks on the same word.
568     for (; chunk < limit_chunk_ && HasbitWord(chunk, 0) == this_word; chunk++) {
569       for (auto field : chunks_[chunk]) {
570         int hasbit_index = has_bit_indices_[field->index()];
571         // Fields on a chunk must be in the same word.
572         GOOGLE_CHECK_EQ(this_word, hasbit_index / 32);
573         mask |= 1 << (hasbit_index % 32);
574       }
575     }
576 
577     if (this_word != first_word) {
578       format(" ||\n    ");
579     }
580     format.Set("mask", strings::Hex(mask, strings::ZERO_PAD_8));
581     if (this_word == cached_has_bit_index) {
582       format("(cached_has_bits & 0x$mask$u) != 0");
583     } else {
584       format("($1$_has_bits_[$2$] & 0x$mask$u) != 0", from, this_word);
585     }
586   }
587   format(")) {\n");
588   format.Indent();
589 }
590 
OnEndChunk(int chunk,io::Printer * printer)591 bool ColdChunkSkipper::OnEndChunk(int chunk, io::Printer* printer) {
592   Formatter format(printer, variables_);
593   if (chunk != limit_chunk_ - 1) {
594     return false;
595   }
596   format.Outdent();
597   format("}\n");
598   return true;
599 }
600 
601 }  // anonymous namespace
602 
603 // ===================================================================
604 
MessageGenerator(const Descriptor * descriptor,const std::map<std::string,std::string> & vars,int index_in_file_messages,const Options & options,MessageSCCAnalyzer * scc_analyzer)605 MessageGenerator::MessageGenerator(
606     const Descriptor* descriptor,
607     const std::map<std::string, std::string>& vars, int index_in_file_messages,
608     const Options& options, MessageSCCAnalyzer* scc_analyzer)
609     : descriptor_(descriptor),
610       index_in_file_messages_(index_in_file_messages),
611       classname_(ClassName(descriptor, false)),
612       options_(options),
613       field_generators_(descriptor, options, scc_analyzer),
614       max_has_bit_index_(0),
615       num_weak_fields_(0),
616       scc_analyzer_(scc_analyzer),
617       variables_(vars) {
618   if (!message_layout_helper_) {
619     message_layout_helper_.reset(new PaddingOptimizer());
620   }
621 
622   // Variables that apply to this class
623   variables_["classname"] = classname_;
624   variables_["classtype"] = QualifiedClassName(descriptor_, options);
625   variables_["scc_info"] =
626       SccInfoSymbol(scc_analyzer_->GetSCC(descriptor_), options_);
627   variables_["full_name"] = descriptor_->full_name();
628   variables_["superclass"] = SuperClassName(descriptor_, options_);
629 
630   // Compute optimized field order to be used for layout and initialization
631   // purposes.
632   for (auto field : FieldRange(descriptor_)) {
633     if (IsWeak(field, options_)) {
634       num_weak_fields_++;
635     } else if (!field->containing_oneof()) {
636       optimized_order_.push_back(field);
637     }
638   }
639 
640   message_layout_helper_->OptimizeLayout(&optimized_order_, options_);
641 
642   if (HasFieldPresence(descriptor_->file())) {
643     // We use -1 as a sentinel.
644     has_bit_indices_.resize(descriptor_->field_count(), -1);
645     for (auto field : optimized_order_) {
646       // Skip fields that do not have has bits.
647       if (field->is_repeated()) {
648         continue;
649       }
650 
651       has_bit_indices_[field->index()] = max_has_bit_index_++;
652     }
653     field_generators_.SetHasBitIndices(has_bit_indices_);
654   }
655 
656   num_required_fields_ = 0;
657   for (int i = 0; i < descriptor->field_count(); i++) {
658     if (descriptor->field(i)->is_required()) {
659       ++num_required_fields_;
660     }
661   }
662 
663   table_driven_ = TableDrivenParsingEnabled(descriptor_, options_);
664 }
665 
666 MessageGenerator::~MessageGenerator() = default;
667 
HasBitsSize() const668 size_t MessageGenerator::HasBitsSize() const {
669   size_t sizeof_has_bits = (max_has_bit_index_ + 31) / 32 * 4;
670   if (sizeof_has_bits == 0) {
671     // Zero-size arrays aren't technically allowed, and MSVC in particular
672     // doesn't like them.  We still need to declare these arrays to make
673     // other code compile.  Since this is an uncommon case, we'll just declare
674     // them with size 1 and waste some space.  Oh well.
675     sizeof_has_bits = 4;
676   }
677 
678   return sizeof_has_bits;
679 }
680 
AddGenerators(std::vector<std::unique_ptr<EnumGenerator>> * enum_generators,std::vector<std::unique_ptr<ExtensionGenerator>> * extension_generators)681 void MessageGenerator::AddGenerators(
682     std::vector<std::unique_ptr<EnumGenerator>>* enum_generators,
683     std::vector<std::unique_ptr<ExtensionGenerator>>* extension_generators) {
684   for (int i = 0; i < descriptor_->enum_type_count(); i++) {
685     enum_generators->emplace_back(
686         new EnumGenerator(descriptor_->enum_type(i), variables_, options_));
687     enum_generators_.push_back(enum_generators->back().get());
688   }
689   for (int i = 0; i < descriptor_->extension_count(); i++) {
690     extension_generators->emplace_back(
691         new ExtensionGenerator(descriptor_->extension(i), options_));
692     extension_generators_.push_back(extension_generators->back().get());
693   }
694 }
695 
GenerateFieldAccessorDeclarations(io::Printer * printer)696 void MessageGenerator::GenerateFieldAccessorDeclarations(io::Printer* printer) {
697   Formatter format(printer, variables_);
698   // optimized_fields_ does not contain fields where
699   //    field->containing_oneof() != NULL
700   // so we need to iterate over those as well.
701   //
702   // We place the non-oneof fields in optimized_order_, as that controls the
703   // order of the _has_bits_ entries and we want GDB's pretty printers to be
704   // able to infer these indices from the k[FIELDNAME]FieldNumber order.
705   std::vector<const FieldDescriptor*> ordered_fields;
706   ordered_fields.reserve(descriptor_->field_count());
707 
708   ordered_fields.insert(ordered_fields.begin(), optimized_order_.begin(),
709                         optimized_order_.end());
710   for (auto field : FieldRange(descriptor_)) {
711     if (field->containing_oneof() == NULL && !field->options().weak()) {
712       continue;
713     }
714     ordered_fields.push_back(field);
715   }
716 
717   if (!ordered_fields.empty()) {
718     format("enum : int {\n");
719     for (auto field : ordered_fields) {
720       Formatter::SaveState save(&format);
721 
722       std::map<std::string, std::string> vars;
723       SetCommonFieldVariables(field, &vars, options_);
724       format.AddMap(vars);
725       format("  ${1$$2$$}$ = $number$,\n", field, FieldConstantName(field));
726     }
727     format("};\n");
728   }
729   for (auto field : ordered_fields) {
730     PrintFieldComment(format, field);
731 
732     Formatter::SaveState save(&format);
733 
734     std::map<std::string, std::string> vars;
735     SetCommonFieldVariables(field, &vars, options_);
736     format.AddMap(vars);
737 
738     if (field->is_repeated()) {
739       format("$deprecated_attr$int ${1$$name$_size$}$() const;\n", field);
740     } else if (HasHasMethod(field)) {
741       format("$deprecated_attr$bool ${1$has_$name$$}$() const;\n", field);
742     } else if (HasPrivateHasMethod(field)) {
743       format(
744           "private:\n"
745           "bool ${1$has_$name$$}$() const;\n"
746           "public:\n",
747           field);
748     }
749 
750     format("$deprecated_attr$void ${1$clear_$name$$}$();\n", field);
751 
752     // Generate type-specific accessor declarations.
753     field_generators_.get(field).GenerateAccessorDeclarations(printer);
754 
755     format("\n");
756   }
757 
758   if (descriptor_->extension_range_count() > 0) {
759     // Generate accessors for extensions.  We just call a macro located in
760     // extension_set.h since the accessors about 80 lines of static code.
761     format("$GOOGLE_PROTOBUF$_EXTENSION_ACCESSORS($classname$)\n");
762     // Generate MessageSet specific APIs for proto2 MessageSet.
763     // For testing purposes we don't check for bridge.MessageSet, so
764     // we don't use IsProto2MessageSet
765     if (descriptor_->options().message_set_wire_format() &&
766         !options_.opensource_runtime && !options_.lite_implicit_weak_fields) {
767       // Special-case MessageSet
768       format("GOOGLE_PROTOBUF_EXTENSION_MESSAGE_SET_ACCESSORS($classname$)\n");
769     }
770   }
771 
772   for (auto oneof : OneOfRange(descriptor_)) {
773     Formatter::SaveState saver(&format);
774     format.Set("oneof_name", oneof->name());
775     format.Set("camel_oneof_name", UnderscoresToCamelCase(oneof->name(), true));
776     format(
777         "void ${1$clear_$oneof_name$$}$();\n"
778         "$camel_oneof_name$Case $oneof_name$_case() const;\n",
779         oneof);
780   }
781 }
782 
GenerateSingularFieldHasBits(const FieldDescriptor * field,Formatter format)783 void MessageGenerator::GenerateSingularFieldHasBits(
784     const FieldDescriptor* field, Formatter format) {
785   if (field->options().weak()) {
786     format(
787         "inline bool $classname$::has_$name$() const {\n"
788         "  return _weak_field_map_.Has($number$);\n"
789         "}\n");
790     return;
791   }
792   if (HasFieldPresence(descriptor_->file())) {
793     // N.B.: without field presence, we do not use has-bits or generate
794     // has_$name$() methods.
795     int has_bit_index = has_bit_indices_[field->index()];
796     GOOGLE_CHECK_GE(has_bit_index, 0);
797 
798     format.Set("has_array_index", has_bit_index / 32);
799     format.Set("has_mask",
800                strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
801     format(
802         "inline bool $classname$::has_$name$() const {\n"
803         "  return (_has_bits_[$has_array_index$] & 0x$has_mask$u) != 0;\n"
804         "}\n");
805   } else {
806     // Message fields have a has_$name$() method.
807     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
808       if (IsLazy(field, options_)) {
809         format(
810             "inline bool $classname$::has_$name$() const {\n"
811             "  return !$name$_.IsCleared();\n"
812             "}\n");
813       } else {
814         format(
815             "inline bool $classname$::has_$name$() const {\n"
816             "  return this != internal_default_instance() "
817             "&& $name$_ != nullptr;\n"
818             "}\n");
819       }
820     }
821   }
822 }
823 
GenerateOneofHasBits(io::Printer * printer)824 void MessageGenerator::GenerateOneofHasBits(io::Printer* printer) {
825   Formatter format(printer, variables_);
826   for (auto oneof : OneOfRange(descriptor_)) {
827     format.Set("oneof_name", oneof->name());
828     format.Set("oneof_index", oneof->index());
829     format.Set("cap_oneof_name", ToUpper(oneof->name()));
830     format(
831         "inline bool $classname$::has_$oneof_name$() const {\n"
832         "  return $oneof_name$_case() != $cap_oneof_name$_NOT_SET;\n"
833         "}\n"
834         "inline void $classname$::clear_has_$oneof_name$() {\n"
835         "  _oneof_case_[$oneof_index$] = $cap_oneof_name$_NOT_SET;\n"
836         "}\n");
837   }
838 }
839 
GenerateOneofMemberHasBits(const FieldDescriptor * field,const Formatter & format)840 void MessageGenerator::GenerateOneofMemberHasBits(const FieldDescriptor* field,
841                                                   const Formatter& format) {
842   // Singular field in a oneof
843   // N.B.: Without field presence, we do not use has-bits or generate
844   // has_$name$() methods, but oneofs still have set_has_$name$().
845   // Oneofs also have has_$name$() but only as a private helper
846   // method, so that generated code is slightly cleaner (vs.  comparing
847   // _oneof_case_[index] against a constant everywhere).
848   format(
849       "inline bool $classname$::has_$name$() const {\n"
850       "  return $oneof_name$_case() == k$field_name$;\n"
851       "}\n"
852       "inline void $classname$::set_has_$name$() {\n"
853       "  _oneof_case_[$oneof_index$] = k$field_name$;\n"
854       "}\n");
855 }
856 
GenerateFieldClear(const FieldDescriptor * field,bool is_inline,Formatter format)857 void MessageGenerator::GenerateFieldClear(const FieldDescriptor* field,
858                                           bool is_inline, Formatter format) {
859   // Generate clear_$name$().
860   if (is_inline) {
861     format("inline ");
862   }
863   format("void $classname$::clear_$name$() {\n");
864 
865   format.Indent();
866 
867   if (field->containing_oneof()) {
868     // Clear this field only if it is the active field in this oneof,
869     // otherwise ignore
870     format("if (has_$name$()) {\n");
871     format.Indent();
872     field_generators_.get(field).GenerateClearingCode(format.printer());
873     format("clear_has_$oneof_name$();\n");
874     format.Outdent();
875     format("}\n");
876   } else {
877     field_generators_.get(field).GenerateClearingCode(format.printer());
878     if (HasFieldPresence(descriptor_->file())) {
879       if (!field->is_repeated() && !field->options().weak()) {
880         int has_bit_index = has_bit_indices_[field->index()];
881         GOOGLE_CHECK_GE(has_bit_index, 0);
882 
883         format.Set("has_array_index", has_bit_index / 32);
884         format.Set("has_mask",
885                    strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
886         format("_has_bits_[$has_array_index$] &= ~0x$has_mask$u;\n");
887       }
888     }
889   }
890 
891   format.Outdent();
892   format("}\n");
893 }
894 
GenerateFieldAccessorDefinitions(io::Printer * printer)895 void MessageGenerator::GenerateFieldAccessorDefinitions(io::Printer* printer) {
896   Formatter format(printer, variables_);
897   format("// $classname$\n\n");
898 
899   for (auto field : FieldRange(descriptor_)) {
900     PrintFieldComment(format, field);
901 
902     std::map<std::string, std::string> vars;
903     SetCommonFieldVariables(field, &vars, options_);
904 
905     Formatter::SaveState saver(&format);
906     format.AddMap(vars);
907 
908     // Generate has_$name$() or $name$_size().
909     if (field->is_repeated()) {
910       format(
911           "inline int $classname$::$name$_size() const {\n"
912           "  return $name$_.size();\n"
913           "}\n");
914     } else if (field->containing_oneof()) {
915       format.Set("field_name", UnderscoresToCamelCase(field->name(), true));
916       format.Set("oneof_name", field->containing_oneof()->name());
917       format.Set("oneof_index",
918                  StrCat(field->containing_oneof()->index()));
919       GenerateOneofMemberHasBits(field, format);
920     } else {
921       // Singular field.
922       GenerateSingularFieldHasBits(field, format);
923     }
924 
925     if (!IsCrossFileMaybeMap(field)) {
926       GenerateFieldClear(field, true, format);
927     }
928 
929     // Generate type-specific accessors.
930     field_generators_.get(field).GenerateInlineAccessorDefinitions(printer);
931 
932     format("\n");
933   }
934 
935   // Generate has_$name$() and clear_has_$name$() functions for oneofs.
936   GenerateOneofHasBits(printer);
937 }
938 
GenerateClassDefinition(io::Printer * printer)939 void MessageGenerator::GenerateClassDefinition(io::Printer* printer) {
940   Formatter format(printer, variables_);
941   format.Set("class_final",
942              ShouldMarkClassAsFinal(descriptor_, options_) ? "final" : "");
943 
944   if (IsMapEntryMessage(descriptor_)) {
945     std::map<std::string, std::string> vars;
946     CollectMapInfo(options_, descriptor_, &vars);
947     vars["lite"] =
948         HasDescriptorMethods(descriptor_->file(), options_) ? "" : "Lite";
949     format.AddMap(vars);
950     format(
951         "class $classname$ : public "
952         "::$proto_ns$::internal::MapEntry$lite$<$classname$, \n"
953         "    $key_cpp$, $val_cpp$,\n"
954         "    ::$proto_ns$::internal::WireFormatLite::$key_wire_type$,\n"
955         "    ::$proto_ns$::internal::WireFormatLite::$val_wire_type$,\n"
956         "    $default_enum_value$ > {\n"
957         "public:\n"
958         "  typedef ::$proto_ns$::internal::MapEntry$lite$<$classname$, \n"
959         "    $key_cpp$, $val_cpp$,\n"
960         "    ::$proto_ns$::internal::WireFormatLite::$key_wire_type$,\n"
961         "    ::$proto_ns$::internal::WireFormatLite::$val_wire_type$,\n"
962         "    $default_enum_value$ > SuperType;\n"
963         "  $classname$();\n"
964         "  $classname$(::$proto_ns$::Arena* arena);\n"
965         "  void MergeFrom(const $classname$& other);\n"
966         "  static const $classname$* internal_default_instance() { return "
967         "reinterpret_cast<const "
968         "$classname$*>(&_$classname$_default_instance_); }\n");
969     std::string suffix = GetUtf8Suffix(descriptor_->field(0), options_);
970     if (descriptor_->field(0)->type() == FieldDescriptor::TYPE_STRING &&
971         !suffix.empty()) {
972       if (suffix == "UTF8") {
973         format(
974             "  static bool ValidateKey(std::string* s) {\n"
975             "    return ::$proto_ns$::internal::WireFormatLite::"
976             "VerifyUtf8String(s->data(), s->size(), "
977             "::$proto_ns$::internal::WireFormatLite::PARSE, \"$1$\");\n"
978             " }\n",
979             descriptor_->field(0)->full_name());
980       } else {
981         GOOGLE_CHECK(suffix == "UTF8Verify");
982         format(
983             "  static bool ValidateKey(std::string* s) {\n"
984             "#ifndef NDEBUG\n"
985             "    ::$proto_ns$::internal::WireFormatLite::VerifyUtf8String(\n"
986             "       s->data(), s->size(), ::$proto_ns$::internal::"
987             "WireFormatLite::PARSE, \"$1$\");\n"
988             "#endif\n"
989             "    return true;\n"
990             " }\n",
991             descriptor_->field(0)->full_name());
992       }
993     } else {
994       format("  static bool ValidateKey(void*) { return true; }\n");
995     }
996     if (descriptor_->field(1)->type() == FieldDescriptor::TYPE_STRING &&
997         !suffix.empty()) {
998       if (suffix == "UTF8") {
999         format(
1000             "  static bool ValidateValue(std::string* s) {\n"
1001             "    return ::$proto_ns$::internal::WireFormatLite::"
1002             "VerifyUtf8String(s->data(), s->size(), "
1003             "::$proto_ns$::internal::WireFormatLite::PARSE, \"$1$\");\n"
1004             " }\n",
1005             descriptor_->field(1)->full_name());
1006       } else {
1007         GOOGLE_CHECK(suffix == "UTF8Verify");
1008         format(
1009             "  static bool ValidateValue(std::string* s) {\n"
1010             "#ifndef NDEBUG\n"
1011             "    ::$proto_ns$::internal::WireFormatLite::VerifyUtf8String(\n"
1012             "       s->data(), s->size(), ::$proto_ns$::internal::"
1013             "WireFormatLite::PARSE, \"$1$\");\n"
1014             "#endif\n"
1015             "    return true;\n"
1016             " }\n",
1017             descriptor_->field(1)->full_name());
1018       }
1019     } else {
1020       format("  static bool ValidateValue(void*) { return true; }\n");
1021     }
1022     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1023       format(
1024           "  void MergeFrom(const ::$proto_ns$::Message& other) final;\n"
1025           "  ::$proto_ns$::Metadata GetMetadata() const final;\n"
1026           "  private:\n"
1027           "  static ::$proto_ns$::Metadata GetMetadataStatic() {\n"
1028           "    ::$proto_ns$::internal::AssignDescriptors(&::$desc_table$);\n"
1029           "    return ::$desc_table$.file_level_metadata[$1$];\n"
1030           "  }\n"
1031           "\n"
1032           "  public:\n"
1033           "};\n",
1034           index_in_file_messages_);
1035     } else {
1036       format("};\n");
1037     }
1038     return;
1039   }
1040 
1041   format(
1042       "class $dllexport_decl $${1$$classname$$}$$ class_final$ :\n"
1043       "    public $superclass$ /* @@protoc_insertion_point("
1044       "class_definition:$full_name$) */ {\n",
1045       descriptor_);
1046   format(" public:\n");
1047   format.Indent();
1048 
1049   format(
1050       "$classname$();\n"
1051       "virtual ~$classname$();\n"
1052       "\n"
1053       "$classname$(const $classname$& from);\n"
1054       "$classname$($classname$&& from) noexcept\n"
1055       "  : $classname$() {\n"
1056       "  *this = ::std::move(from);\n"
1057       "}\n"
1058       "\n"
1059       "inline $classname$& operator=(const $classname$& from) {\n"
1060       "  CopyFrom(from);\n"
1061       "  return *this;\n"
1062       "}\n"
1063       "inline $classname$& operator=($classname$&& from) noexcept {\n"
1064       "  if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {\n"
1065       "    if (this != &from) InternalSwap(&from);\n"
1066       "  } else {\n"
1067       "    CopyFrom(from);\n"
1068       "  }\n"
1069       "  return *this;\n"
1070       "}\n"
1071       "\n");
1072 
1073   if (options_.table_driven_serialization) {
1074     format(
1075         "private:\n"
1076         "const void* InternalGetTable() const;\n"
1077         "public:\n"
1078         "\n");
1079   }
1080 
1081   std::map<std::string, std::string> vars;
1082   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
1083   format.AddMap(vars);
1084   if (PublicUnknownFieldsAccessors(descriptor_)) {
1085     format(
1086         "inline const $unknown_fields_type$& unknown_fields() const {\n"
1087         "  return $unknown_fields$;\n"
1088         "}\n"
1089         "inline $unknown_fields_type$* mutable_unknown_fields() {\n"
1090         "  return $mutable_unknown_fields$;\n"
1091         "}\n"
1092         "\n");
1093   }
1094 
1095   // N.B.: We exclude GetArena() when arena support is disabled, falling back on
1096   // MessageLite's implementation which returns NULL rather than generating our
1097   // own method which returns NULL, in order to reduce code size.
1098   if (SupportsArenas(descriptor_)) {
1099     // virtual method version of GetArenaNoVirtual(), required for generic
1100     // dispatch given a MessageLite* (e.g., in RepeatedField::AddAllocated()).
1101     format(
1102         "inline ::$proto_ns$::Arena* GetArena() const final {\n"
1103         "  return GetArenaNoVirtual();\n"
1104         "}\n"
1105         "inline void* GetMaybeArenaPointer() const final {\n"
1106         "  return MaybeArenaPtr();\n"
1107         "}\n");
1108   }
1109 
1110   // Only generate this member if it's not disabled.
1111   if (HasDescriptorMethods(descriptor_->file(), options_) &&
1112       !descriptor_->options().no_standard_descriptor_accessor()) {
1113     format(
1114         "static const ::$proto_ns$::Descriptor* descriptor() {\n"
1115         "  return GetDescriptor();\n"
1116         "}\n");
1117   }
1118 
1119   if (HasDescriptorMethods(descriptor_->file(), options_)) {
1120     // These shadow non-static methods of the same names in Message.  We
1121     // redefine them here because calls directly on the generated class can be
1122     // statically analyzed -- we know what descriptor types are being requested.
1123     // It also avoids a vtable dispatch.
1124     //
1125     // We would eventually like to eliminate the methods in Message, and having
1126     // this separate also lets us track calls to the base class methods
1127     // separately.
1128     format(
1129         "static const ::$proto_ns$::Descriptor* GetDescriptor() {\n"
1130         "  return GetMetadataStatic().descriptor;\n"
1131         "}\n"
1132         "static const ::$proto_ns$::Reflection* GetReflection() {\n"
1133         "  return GetMetadataStatic().reflection;\n"
1134         "}\n");
1135   }
1136 
1137   format(
1138       "static const $classname$& default_instance();\n"
1139       "\n");
1140 
1141   // Generate enum values for every field in oneofs. One list is generated for
1142   // each oneof with an additional *_NOT_SET value.
1143   for (auto oneof : OneOfRange(descriptor_)) {
1144     format("enum $1$Case {\n", UnderscoresToCamelCase(oneof->name(), true));
1145     format.Indent();
1146     for (auto field : FieldRange(oneof)) {
1147       std::string oneof_enum_case_field_name =
1148           UnderscoresToCamelCase(field->name(), true);
1149       format("k$1$ = $2$,\n", oneof_enum_case_field_name,  // 1
1150              field->number());                             // 2
1151     }
1152     format("$1$_NOT_SET = 0,\n", ToUpper(oneof->name()));
1153     format.Outdent();
1154     format(
1155         "};\n"
1156         "\n");
1157   }
1158 
1159   // TODO(gerbens) make this private, while still granting other protos access.
1160   format(
1161       "static void InitAsDefaultInstance();  // FOR INTERNAL USE ONLY\n"
1162       "static inline const $classname$* internal_default_instance() {\n"
1163       "  return reinterpret_cast<const $classname$*>(\n"
1164       "             &_$classname$_default_instance_);\n"
1165       "}\n"
1166       "static constexpr int kIndexInFileMessages =\n"
1167       "  $1$;\n"
1168       "\n",
1169       index_in_file_messages_);
1170 
1171   if (IsAnyMessage(descriptor_, options_)) {
1172     format(
1173         "// implements Any -----------------------------------------------\n"
1174         "\n");
1175     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1176       format(
1177           "void PackFrom(const ::$proto_ns$::Message& message);\n"
1178           "void PackFrom(const ::$proto_ns$::Message& message,\n"
1179           "              const std::string& type_url_prefix);\n"
1180           "bool UnpackTo(::$proto_ns$::Message* message) const;\n"
1181           "static bool GetAnyFieldDescriptors(\n"
1182           "    const ::$proto_ns$::Message& message,\n"
1183           "    const ::$proto_ns$::FieldDescriptor** type_url_field,\n"
1184           "    const ::$proto_ns$::FieldDescriptor** value_field);\n");
1185     } else {
1186       format(
1187           "template <typename T>\n"
1188           "void PackFrom(const T& message) {\n"
1189           "  _any_metadata_.PackFrom(message);\n"
1190           "}\n"
1191           "template <typename T>\n"
1192           "void PackFrom(const T& message,\n"
1193           "              const std::string& type_url_prefix) {\n"
1194           "  _any_metadata_.PackFrom(message, type_url_prefix);"
1195           "}\n"
1196           "template <typename T>\n"
1197           "bool UnpackTo(T* message) const {\n"
1198           "  return _any_metadata_.UnpackTo(message);\n"
1199           "}\n");
1200     }
1201     format(
1202         "template<typename T> bool Is() const {\n"
1203         "  return _any_metadata_.Is<T>();\n"
1204         "}\n"
1205         "static bool ParseAnyTypeUrl(const string& type_url,\n"
1206         "                            std::string* full_type_name);\n");
1207   }
1208 
1209   format.Set("new_final",
1210              ShouldMarkNewAsFinal(descriptor_, options_) ? "final" : "");
1211 
1212   format(
1213       "friend void swap($classname$& a, $classname$& b) {\n"
1214       "  a.Swap(&b);\n"
1215       "}\n");
1216 
1217   if (SupportsArenas(descriptor_)) {
1218     format(
1219         "inline void Swap($classname$* other) {\n"
1220         "  if (other == this) return;\n"
1221         "  if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) {\n"
1222         "    InternalSwap(other);\n"
1223         "  } else {\n"
1224         "    ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other);\n"
1225         "  }\n"
1226         "}\n"
1227         "void UnsafeArenaSwap($classname$* other) {\n"
1228         "  if (other == this) return;\n"
1229         "  $DCHK$(GetArenaNoVirtual() == other->GetArenaNoVirtual());\n"
1230         "  InternalSwap(other);\n"
1231         "}\n");
1232   } else {
1233     format(
1234         "inline void Swap($classname$* other) {\n"
1235         "  if (other == this) return;\n"
1236         "  InternalSwap(other);\n"
1237         "}\n");
1238   }
1239 
1240   format(
1241       "\n"
1242       "// implements Message ----------------------------------------------\n"
1243       "\n"
1244       "inline $classname$* New() const$ new_final$ {\n"
1245       "  return CreateMaybeMessage<$classname$>(nullptr);\n"
1246       "}\n"
1247       "\n"
1248       "$classname$* New(::$proto_ns$::Arena* arena) const$ new_final$ {\n"
1249       "  return CreateMaybeMessage<$classname$>(arena);\n"
1250       "}\n");
1251 
1252   // For instances that derive from Message (rather than MessageLite), some
1253   // methods are virtual and should be marked as final.
1254   format.Set("full_final", HasDescriptorMethods(descriptor_->file(), options_)
1255                                ? "final"
1256                                : "");
1257 
1258   if (HasGeneratedMethods(descriptor_->file(), options_)) {
1259     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1260       format(
1261           "void CopyFrom(const ::$proto_ns$::Message& from) final;\n"
1262           "void MergeFrom(const ::$proto_ns$::Message& from) final;\n");
1263     } else {
1264       format(
1265           "void CheckTypeAndMergeFrom(const ::$proto_ns$::MessageLite& from)\n"
1266           "  final;\n");
1267     }
1268 
1269     format.Set("clear_final",
1270                ShouldMarkClearAsFinal(descriptor_, options_) ? "final" : "");
1271     format.Set(
1272         "is_initialized_final",
1273         ShouldMarkIsInitializedAsFinal(descriptor_, options_) ? "final" : "");
1274 
1275     format(
1276         "void CopyFrom(const $classname$& from);\n"
1277         "void MergeFrom(const $classname$& from);\n"
1278         "PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear()$ clear_final$;\n"
1279         "bool IsInitialized() const$ is_initialized_final$;\n"
1280         "\n"
1281         "size_t ByteSizeLong() const final;\n"
1282         "#if $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n"
1283         "const char* _InternalParse(const char* ptr, "
1284         "::$proto_ns$::internal::ParseContext* ctx) final;\n"
1285         "#else\n"
1286         "bool MergePartialFromCodedStream(\n"
1287         "    ::$proto_ns$::io::CodedInputStream* input) final;\n"
1288         "#endif  // $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n");
1289 
1290     if (!options_.table_driven_serialization ||
1291         descriptor_->options().message_set_wire_format()) {
1292       format(
1293           "void SerializeWithCachedSizes(\n"
1294           "    ::$proto_ns$::io::CodedOutputStream* output) const final;\n");
1295     }
1296     // DiscardUnknownFields() is implemented in message.cc using reflections. We
1297     // need to implement this function in generated code for messages.
1298     if (!UseUnknownFieldSet(descriptor_->file(), options_)) {
1299       format("void DiscardUnknownFields()$ full_final$;\n");
1300     }
1301     if (HasFastArraySerialization(descriptor_->file(), options_)) {
1302       format(
1303           "$uint8$* InternalSerializeWithCachedSizesToArray(\n"
1304           "    $uint8$* target) const final;\n");
1305     }
1306   }
1307 
1308   format(
1309       "int GetCachedSize() const final { return _cached_size_.Get(); }"
1310       "\n\nprivate:\n"
1311       "inline void SharedCtor();\n"
1312       "inline void SharedDtor();\n"
1313       "void SetCachedSize(int size) const$ full_final$;\n"
1314       "void InternalSwap($classname$* other);\n");
1315 
1316   format(
1317       // Friend AnyMetadata so that it can call this FullMessageName() method.
1318       "friend class ::$proto_ns$::internal::AnyMetadata;\n"
1319       "static $1$ FullMessageName() {\n"
1320       "  return \"$full_name$\";\n"
1321       "}\n",
1322       options_.opensource_runtime ? "::PROTOBUF_NAMESPACE_ID::StringPiece"
1323                                   : "::StringPiece");
1324 
1325   if (SupportsArenas(descriptor_)) {
1326     format(
1327         // TODO(gerbens) Make this private! Currently people are deriving from
1328         // protos to give access to this constructor, breaking the invariants
1329         // we rely on.
1330         "protected:\n"
1331         "explicit $classname$(::$proto_ns$::Arena* arena);\n"
1332         "private:\n"
1333         "static void ArenaDtor(void* object);\n"
1334         "inline void RegisterArenaDtor(::$proto_ns$::Arena* arena);\n");
1335   }
1336 
1337   if (SupportsArenas(descriptor_)) {
1338     format(
1339         "private:\n"
1340         "inline ::$proto_ns$::Arena* GetArenaNoVirtual() const {\n"
1341         "  return _internal_metadata_.arena();\n"
1342         "}\n"
1343         "inline void* MaybeArenaPtr() const {\n"
1344         "  return _internal_metadata_.raw_arena_ptr();\n"
1345         "}\n");
1346   } else {
1347     format(
1348         "private:\n"
1349         "inline ::$proto_ns$::Arena* GetArenaNoVirtual() const {\n"
1350         "  return nullptr;\n"
1351         "}\n"
1352         "inline void* MaybeArenaPtr() const {\n"
1353         "  return nullptr;\n"
1354         "}\n");
1355   }
1356 
1357   format(
1358       "public:\n"
1359       "\n");
1360 
1361   if (HasDescriptorMethods(descriptor_->file(), options_)) {
1362     format(
1363         "::$proto_ns$::Metadata GetMetadata() const final;\n"
1364         "private:\n"
1365         "static ::$proto_ns$::Metadata GetMetadataStatic() {\n"
1366         "  ::$proto_ns$::internal::AssignDescriptors(&::$desc_table$);\n"
1367         "  return ::$desc_table$.file_level_metadata[kIndexInFileMessages];\n"
1368         "}\n"
1369         "\n"
1370         "public:\n"
1371         "\n");
1372   } else {
1373     format(
1374         "std::string GetTypeName() const final;\n"
1375         "\n");
1376   }
1377 
1378   format(
1379       "// nested types ----------------------------------------------------\n"
1380       "\n");
1381 
1382   // Import all nested message classes into this class's scope with typedefs.
1383   for (int i = 0; i < descriptor_->nested_type_count(); i++) {
1384     const Descriptor* nested_type = descriptor_->nested_type(i);
1385     if (!IsMapEntryMessage(nested_type)) {
1386       format.Set("nested_full_name", ClassName(nested_type, false));
1387       format.Set("nested_name", ResolveKeyword(nested_type->name()));
1388       format("typedef ${1$$nested_full_name$$}$ ${1$$nested_name$$}$;\n",
1389              nested_type);
1390     }
1391   }
1392 
1393   if (descriptor_->nested_type_count() > 0) {
1394     format("\n");
1395   }
1396 
1397   // Import all nested enums and their values into this class's scope with
1398   // typedefs and constants.
1399   for (int i = 0; i < descriptor_->enum_type_count(); i++) {
1400     enum_generators_[i]->GenerateSymbolImports(printer);
1401     format("\n");
1402   }
1403 
1404   format(
1405       "// accessors -------------------------------------------------------\n"
1406       "\n");
1407 
1408   // Generate accessor methods for all fields.
1409   GenerateFieldAccessorDeclarations(printer);
1410 
1411   // Declare extension identifiers.
1412   for (int i = 0; i < descriptor_->extension_count(); i++) {
1413     extension_generators_[i]->GenerateDeclaration(printer);
1414   }
1415 
1416 
1417   format("// @@protoc_insertion_point(class_scope:$full_name$)\n");
1418 
1419   // Generate private members.
1420   format.Outdent();
1421   format(" private:\n");
1422   format.Indent();
1423   // TODO(seongkim): Remove hack to track field access and remove this class.
1424   format("class _Internal;\n");
1425 
1426 
1427   for (auto field : FieldRange(descriptor_)) {
1428     // set_has_***() generated in all oneofs.
1429     if (!field->is_repeated() && !field->options().weak() &&
1430         field->containing_oneof()) {
1431       format("void set_has_$1$();\n", FieldName(field));
1432     }
1433   }
1434   format("\n");
1435 
1436   // Generate oneof function declarations
1437   for (auto oneof : OneOfRange(descriptor_)) {
1438     format(
1439         "inline bool has_$1$() const;\n"
1440         "inline void clear_has_$1$();\n\n",
1441         oneof->name());
1442   }
1443 
1444   if (HasGeneratedMethods(descriptor_->file(), options_) &&
1445       !descriptor_->options().message_set_wire_format() &&
1446       num_required_fields_ > 1) {
1447     format(
1448         "// helper for ByteSizeLong()\n"
1449         "size_t RequiredFieldsByteSizeFallback() const;\n\n");
1450   }
1451 
1452   // Prepare decls for _cached_size_ and _has_bits_.  Their position in the
1453   // output will be determined later.
1454 
1455   bool need_to_emit_cached_size = true;
1456   const std::string cached_size_decl =
1457       "mutable ::$proto_ns$::internal::CachedSize _cached_size_;\n";
1458 
1459   const size_t sizeof_has_bits = HasBitsSize();
1460   const std::string has_bits_decl =
1461       sizeof_has_bits == 0
1462           ? ""
1463           : StrCat("::$proto_ns$::internal::HasBits<",
1464                          sizeof_has_bits / 4, "> _has_bits_;\n");
1465 
1466   // To minimize padding, data members are divided into three sections:
1467   // (1) members assumed to align to 8 bytes
1468   // (2) members corresponding to message fields, re-ordered to optimize
1469   //     alignment.
1470   // (3) members assumed to align to 4 bytes.
1471 
1472   // Members assumed to align to 8 bytes:
1473 
1474   if (descriptor_->extension_range_count() > 0) {
1475     format(
1476         "::$proto_ns$::internal::ExtensionSet _extensions_;\n"
1477         "\n");
1478   }
1479 
1480   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
1481     format(
1482         "::$proto_ns$::internal::InternalMetadataWithArena "
1483         "_internal_metadata_;\n");
1484   } else {
1485     format(
1486         "::$proto_ns$::internal::InternalMetadataWithArenaLite "
1487         "_internal_metadata_;\n");
1488   }
1489 
1490   if (SupportsArenas(descriptor_)) {
1491     format(
1492         "template <typename T> friend class "
1493         "::$proto_ns$::Arena::InternalHelper;\n"
1494         "typedef void InternalArenaConstructable_;\n"
1495         "typedef void DestructorSkippable_;\n");
1496   }
1497 
1498   if (HasFieldPresence(descriptor_->file())) {
1499     // _has_bits_ is frequently accessed, so to reduce code size and improve
1500     // speed, it should be close to the start of the object. Placing
1501     // _cached_size_ together with _has_bits_ improves cache locality despite
1502     // potential alignment padding.
1503     format(has_bits_decl.c_str());
1504     format(cached_size_decl.c_str());
1505     need_to_emit_cached_size = false;
1506   }
1507 
1508   // Field members:
1509 
1510   // Emit some private and static members
1511   for (auto field : optimized_order_) {
1512     const FieldGenerator& generator = field_generators_.get(field);
1513     generator.GenerateStaticMembers(printer);
1514     generator.GeneratePrivateMembers(printer);
1515   }
1516 
1517   // For each oneof generate a union
1518   for (auto oneof : OneOfRange(descriptor_)) {
1519     std::string camel_oneof_name = UnderscoresToCamelCase(oneof->name(), true);
1520     format(
1521         "union $1$Union {\n"
1522         // explicit empty constructor is needed when union contains
1523         // ArenaStringPtr members for string fields.
1524         "  $1$Union() {}\n",
1525         camel_oneof_name);
1526     format.Indent();
1527     for (auto field : FieldRange(oneof)) {
1528       field_generators_.get(field).GeneratePrivateMembers(printer);
1529     }
1530     format.Outdent();
1531     format("} $1$_;\n", oneof->name());
1532     for (auto field : FieldRange(oneof)) {
1533       field_generators_.get(field).GenerateStaticMembers(printer);
1534     }
1535   }
1536 
1537   // Members assumed to align to 4 bytes:
1538 
1539   if (need_to_emit_cached_size) {
1540     format(cached_size_decl.c_str());
1541     need_to_emit_cached_size = false;
1542   }
1543 
1544   // Generate _oneof_case_.
1545   if (descriptor_->oneof_decl_count() > 0) {
1546     format(
1547         "$uint32$ _oneof_case_[$1$];\n"
1548         "\n",
1549         descriptor_->oneof_decl_count());
1550   }
1551 
1552   if (num_weak_fields_) {
1553     format("::$proto_ns$::internal::WeakFieldMap _weak_field_map_;\n");
1554   }
1555   // Generate _any_metadata_ for the Any type.
1556   if (IsAnyMessage(descriptor_, options_)) {
1557     format("::$proto_ns$::internal::AnyMetadata _any_metadata_;\n");
1558   }
1559 
1560   // The TableStruct struct needs access to the private parts, in order to
1561   // construct the offsets of all members.
1562   format("friend struct ::$tablename$;\n");
1563 
1564   format.Outdent();
1565   format("};");
1566   GOOGLE_DCHECK(!need_to_emit_cached_size);
1567 }  // NOLINT(readability/fn_size)
1568 
GenerateInlineMethods(io::Printer * printer)1569 void MessageGenerator::GenerateInlineMethods(io::Printer* printer) {
1570   if (IsMapEntryMessage(descriptor_)) return;
1571   GenerateFieldAccessorDefinitions(printer);
1572 
1573   // Generate oneof_case() functions.
1574   for (auto oneof : OneOfRange(descriptor_)) {
1575     Formatter format(printer, variables_);
1576     format.Set("camel_oneof_name", UnderscoresToCamelCase(oneof->name(), true));
1577     format.Set("oneof_name", oneof->name());
1578     format.Set("oneof_index", oneof->index());
1579     format(
1580         "inline $classname$::$camel_oneof_name$Case $classname$::"
1581         "${1$$oneof_name$_case$}$() const {\n"
1582         "  return $classname$::$camel_oneof_name$Case("
1583         "_oneof_case_[$oneof_index$]);\n"
1584         "}\n",
1585         oneof);
1586   }
1587 }
1588 
GenerateExtraDefaultFields(io::Printer * printer)1589 void MessageGenerator::GenerateExtraDefaultFields(io::Printer* printer) {
1590   // Generate oneof default instance and weak field instances for reflection
1591   // usage.
1592   Formatter format(printer, variables_);
1593   if (descriptor_->oneof_decl_count() > 0 || num_weak_fields_ > 0) {
1594     for (auto oneof : OneOfRange(descriptor_)) {
1595       for (auto field : FieldRange(oneof)) {
1596         if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
1597             (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
1598              EffectiveStringCType(field, options_) != FieldOptions::STRING)) {
1599           format("const ");
1600         }
1601         field_generators_.get(field).GeneratePrivateMembers(printer);
1602       }
1603     }
1604     for (auto field : FieldRange(descriptor_)) {
1605       if (field->options().weak()) {
1606         format("  const ::$proto_ns$::Message* $1$_;\n", FieldName(field));
1607       }
1608     }
1609   }
1610 }
1611 
GenerateParseTable(io::Printer * printer,size_t offset,size_t aux_offset)1612 bool MessageGenerator::GenerateParseTable(io::Printer* printer, size_t offset,
1613                                           size_t aux_offset) {
1614   Formatter format(printer, variables_);
1615 
1616   if (!table_driven_) {
1617     format("{ nullptr, nullptr, 0, -1, -1, -1, -1, nullptr, false },\n");
1618     return false;
1619   }
1620 
1621   int max_field_number = 0;
1622   for (auto field : FieldRange(descriptor_)) {
1623     if (max_field_number < field->number()) {
1624       max_field_number = field->number();
1625     }
1626   }
1627 
1628   format("{\n");
1629   format.Indent();
1630 
1631   format(
1632       "$tablename$::entries + $1$,\n"
1633       "$tablename$::aux + $2$,\n"
1634       "$3$,\n",
1635       offset, aux_offset, max_field_number);
1636 
1637   if (!HasFieldPresence(descriptor_->file())) {
1638     // If we don't have field presence, then _has_bits_ does not exist.
1639     format("-1,\n");
1640   } else {
1641     format("PROTOBUF_FIELD_OFFSET($classtype$, _has_bits_),\n");
1642   }
1643 
1644   if (descriptor_->oneof_decl_count() > 0) {
1645     format("PROTOBUF_FIELD_OFFSET($classtype$, _oneof_case_),\n");
1646   } else {
1647     format("-1,  // no _oneof_case_\n");
1648   }
1649 
1650   if (descriptor_->extension_range_count() > 0) {
1651     format("PROTOBUF_FIELD_OFFSET($classtype$, _extensions_),\n");
1652   } else {
1653     format("-1,  // no _extensions_\n");
1654   }
1655 
1656   // TODO(ckennelly): Consolidate this with the calculation for
1657   // AuxillaryParseTableField.
1658   format(
1659       "PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_),\n"
1660       "&$package_ns$::_$classname$_default_instance_,\n");
1661 
1662   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
1663     format("true,\n");
1664   } else {
1665     format("false,\n");
1666   }
1667 
1668   format.Outdent();
1669   format("},\n");
1670   return true;
1671 }
1672 
GenerateSchema(io::Printer * printer,int offset,int has_offset)1673 void MessageGenerator::GenerateSchema(io::Printer* printer, int offset,
1674                                       int has_offset) {
1675   Formatter format(printer, variables_);
1676   has_offset =
1677       HasFieldPresence(descriptor_->file()) || IsMapEntryMessage(descriptor_)
1678           ? offset + has_offset
1679           : -1;
1680 
1681   format("{ $1$, $2$, sizeof($classtype$)},\n", offset, has_offset);
1682 }
1683 
1684 namespace {
1685 
1686 // We need to calculate for each field what function the table driven code
1687 // should use to serialize it. This returns the index in a lookup table.
CalcFieldNum(const FieldGenerator & generator,const FieldDescriptor * field,const Options & options)1688 uint32 CalcFieldNum(const FieldGenerator& generator,
1689                     const FieldDescriptor* field, const Options& options) {
1690   bool is_a_map = IsMapEntryMessage(field->containing_type());
1691   int type = field->type();
1692   if (type == FieldDescriptor::TYPE_STRING ||
1693       type == FieldDescriptor::TYPE_BYTES) {
1694     if (generator.IsInlined()) {
1695       type = internal::FieldMetadata::kInlinedType;
1696     }
1697     // string field
1698     if (IsCord(field, options)) {
1699       type = internal::FieldMetadata::kCordType;
1700     } else if (IsStringPiece(field, options)) {
1701       type = internal::FieldMetadata::kStringPieceType;
1702     }
1703   }
1704   if (field->containing_oneof()) {
1705     return internal::FieldMetadata::CalculateType(
1706         type, internal::FieldMetadata::kOneOf);
1707   }
1708   if (field->is_packed()) {
1709     return internal::FieldMetadata::CalculateType(
1710         type, internal::FieldMetadata::kPacked);
1711   } else if (field->is_repeated()) {
1712     return internal::FieldMetadata::CalculateType(
1713         type, internal::FieldMetadata::kRepeated);
1714   } else if (!HasFieldPresence(field->file()) &&
1715              field->containing_oneof() == NULL && !is_a_map) {
1716     return internal::FieldMetadata::CalculateType(
1717         type, internal::FieldMetadata::kNoPresence);
1718   } else {
1719     return internal::FieldMetadata::CalculateType(
1720         type, internal::FieldMetadata::kPresence);
1721   }
1722 }
1723 
FindMessageIndexInFile(const Descriptor * descriptor)1724 int FindMessageIndexInFile(const Descriptor* descriptor) {
1725   std::vector<const Descriptor*> flatten =
1726       FlattenMessagesInFile(descriptor->file());
1727   return std::find(flatten.begin(), flatten.end(), descriptor) -
1728          flatten.begin();
1729 }
1730 
1731 }  // namespace
1732 
GenerateFieldMetadata(io::Printer * printer)1733 int MessageGenerator::GenerateFieldMetadata(io::Printer* printer) {
1734   Formatter format(printer, variables_);
1735   if (!options_.table_driven_serialization) {
1736     return 0;
1737   }
1738 
1739   std::vector<const FieldDescriptor*> sorted = SortFieldsByNumber(descriptor_);
1740   if (IsMapEntryMessage(descriptor_)) {
1741     for (int i = 0; i < 2; i++) {
1742       const FieldDescriptor* field = sorted[i];
1743       const FieldGenerator& generator = field_generators_.get(field);
1744 
1745       uint32 tag = internal::WireFormatLite::MakeTag(
1746           field->number(), WireFormat::WireTypeForFieldType(field->type()));
1747 
1748       std::map<std::string, std::string> vars;
1749       vars["classtype"] = QualifiedClassName(descriptor_, options_);
1750       vars["field_name"] = FieldName(field);
1751       vars["tag"] = StrCat(tag);
1752       vars["hasbit"] = StrCat(i);
1753       vars["type"] = StrCat(CalcFieldNum(generator, field, options_));
1754       vars["ptr"] = "nullptr";
1755       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1756         GOOGLE_CHECK(!IsMapEntryMessage(field->message_type()));
1757         vars["ptr"] =
1758             "::" + UniqueName("TableStruct", field->message_type(), options_) +
1759             "::serialization_table + " +
1760             StrCat(FindMessageIndexInFile(field->message_type()));
1761       }
1762       Formatter::SaveState saver(&format);
1763       format.AddMap(vars);
1764       format(
1765           "{PROTOBUF_FIELD_OFFSET("
1766           "::$proto_ns$::internal::MapEntryHelper<$classtype$::"
1767           "SuperType>, $field_name$_), $tag$,"
1768           "PROTOBUF_FIELD_OFFSET("
1769           "::$proto_ns$::internal::MapEntryHelper<$classtype$::"
1770           "SuperType>, _has_bits_) * 8 + $hasbit$, $type$, "
1771           "$ptr$},\n");
1772     }
1773     return 2;
1774   }
1775   format(
1776       "{PROTOBUF_FIELD_OFFSET($classtype$, _cached_size_),"
1777       " 0, 0, 0, nullptr},\n");
1778   std::vector<const Descriptor::ExtensionRange*> sorted_extensions;
1779   sorted_extensions.reserve(descriptor_->extension_range_count());
1780   for (int i = 0; i < descriptor_->extension_range_count(); ++i) {
1781     sorted_extensions.push_back(descriptor_->extension_range(i));
1782   }
1783   std::sort(sorted_extensions.begin(), sorted_extensions.end(),
1784             ExtensionRangeSorter());
1785   for (int i = 0, extension_idx = 0; /* no range */; i++) {
1786     for (; extension_idx < sorted_extensions.size() &&
1787            (i == sorted.size() ||
1788             sorted_extensions[extension_idx]->start < sorted[i]->number());
1789          extension_idx++) {
1790       const Descriptor::ExtensionRange* range =
1791           sorted_extensions[extension_idx];
1792       format(
1793           "{PROTOBUF_FIELD_OFFSET($classtype$, _extensions_), "
1794           "$1$, $2$, ::$proto_ns$::internal::FieldMetadata::kSpecial, "
1795           "reinterpret_cast<const "
1796           "void*>(::$proto_ns$::internal::ExtensionSerializer)},\n",
1797           range->start, range->end);
1798     }
1799     if (i == sorted.size()) break;
1800     const FieldDescriptor* field = sorted[i];
1801 
1802     uint32 tag = internal::WireFormatLite::MakeTag(
1803         field->number(), WireFormat::WireTypeForFieldType(field->type()));
1804     if (field->is_packed()) {
1805       tag = internal::WireFormatLite::MakeTag(
1806           field->number(), WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1807     }
1808 
1809     std::string classfieldname = FieldName(field);
1810     if (field->containing_oneof()) {
1811       classfieldname = field->containing_oneof()->name();
1812     }
1813     format.Set("field_name", classfieldname);
1814     std::string ptr = "nullptr";
1815     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1816       if (IsMapEntryMessage(field->message_type())) {
1817         format(
1818             "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), $1$, $2$, "
1819             "::$proto_ns$::internal::FieldMetadata::kSpecial, "
1820             "reinterpret_cast<const void*>(static_cast< "
1821             "::$proto_ns$::internal::SpecialSerializer>("
1822             "::$proto_ns$::internal::MapFieldSerializer< "
1823             "::$proto_ns$::internal::MapEntryToMapField<"
1824             "$3$>::MapFieldType, "
1825             "$tablename$::serialization_table>))},\n",
1826             tag, FindMessageIndexInFile(field->message_type()),
1827             QualifiedClassName(field->message_type(), options_));
1828         continue;
1829       } else if (!field->message_type()->options().message_set_wire_format()) {
1830         // message_set doesn't have the usual table and we need to
1831         // dispatch to generated serializer, hence ptr stays zero.
1832         ptr =
1833             "::" + UniqueName("TableStruct", field->message_type(), options_) +
1834             "::serialization_table + " +
1835             StrCat(FindMessageIndexInFile(field->message_type()));
1836       }
1837     }
1838 
1839     const FieldGenerator& generator = field_generators_.get(field);
1840     int type = CalcFieldNum(generator, field, options_);
1841 
1842     if (IsLazy(field, options_)) {
1843       type = internal::FieldMetadata::kSpecial;
1844       ptr = "reinterpret_cast<const void*>(::" + variables_["proto_ns"] +
1845             "::internal::LazyFieldSerializer";
1846       if (field->containing_oneof()) {
1847         ptr += "OneOf";
1848       } else if (!HasFieldPresence(descriptor_->file()) ||
1849                  has_bit_indices_[field->index()] == -1) {
1850         ptr += "NoPresence";
1851       }
1852       ptr += ")";
1853     }
1854 
1855     if (field->options().weak()) {
1856       // TODO(gerbens) merge weak fields into ranges
1857       format(
1858           "{PROTOBUF_FIELD_OFFSET("
1859           "$classtype$, _weak_field_map_), $1$, $1$, "
1860           "::$proto_ns$::internal::FieldMetadata::kSpecial, "
1861           "reinterpret_cast<const "
1862           "void*>(::$proto_ns$::internal::WeakFieldSerializer)},\n",
1863           tag);
1864     } else if (field->containing_oneof()) {
1865       format.Set("oneofoffset",
1866                  sizeof(uint32) * field->containing_oneof()->index());
1867       format(
1868           "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), $1$,"
1869           " PROTOBUF_FIELD_OFFSET($classtype$, _oneof_case_) + "
1870           "$oneofoffset$, $2$, $3$},\n",
1871           tag, type, ptr);
1872     } else if (HasFieldPresence(descriptor_->file()) &&
1873                has_bit_indices_[field->index()] != -1) {
1874       format.Set("hasbitsoffset", has_bit_indices_[field->index()]);
1875       format(
1876           "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), "
1877           "$1$, PROTOBUF_FIELD_OFFSET($classtype$, _has_bits_) * 8 + "
1878           "$hasbitsoffset$, $2$, $3$},\n",
1879           tag, type, ptr);
1880     } else {
1881       format(
1882           "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), "
1883           "$1$, ~0u, $2$, $3$},\n",
1884           tag, type, ptr);
1885     }
1886   }
1887   int num_field_metadata = 1 + sorted.size() + sorted_extensions.size();
1888   num_field_metadata++;
1889   std::string serializer = UseUnknownFieldSet(descriptor_->file(), options_)
1890                                ? "UnknownFieldSetSerializer"
1891                                : "UnknownFieldSerializerLite";
1892   format(
1893       "{PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_), 0, ~0u, "
1894       "::$proto_ns$::internal::FieldMetadata::kSpecial, reinterpret_cast<const "
1895       "void*>(::$proto_ns$::internal::$1$)},\n",
1896       serializer);
1897   return num_field_metadata;
1898 }
1899 
GenerateFieldDefaultInstances(io::Printer * printer)1900 void MessageGenerator::GenerateFieldDefaultInstances(io::Printer* printer) {
1901   // Construct the default instances for all fields that need one.
1902   for (auto field : FieldRange(descriptor_)) {
1903     field_generators_.get(field).GenerateDefaultInstanceAllocator(printer);
1904   }
1905 }
1906 
GenerateDefaultInstanceInitializer(io::Printer * printer)1907 void MessageGenerator::GenerateDefaultInstanceInitializer(
1908     io::Printer* printer) {
1909   Formatter format(printer, variables_);
1910 
1911   // The default instance needs all of its embedded message pointers
1912   // cross-linked to other default instances.  We can't do this initialization
1913   // in the constructor because some other default instances may not have been
1914   // constructed yet at that time.
1915   // TODO(kenton):  Maybe all message fields (even for non-default messages)
1916   //   should be initialized to point at default instances rather than NULL?
1917   for (auto field : FieldRange(descriptor_)) {
1918     Formatter::SaveState saver(&format);
1919 
1920     if (!field->is_repeated() && !IsLazy(field, options_) &&
1921         field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1922         (field->containing_oneof() == NULL ||
1923          HasDescriptorMethods(descriptor_->file(), options_))) {
1924       std::string name;
1925       if (field->containing_oneof() || field->options().weak()) {
1926         name = "_" + classname_ + "_default_instance_.";
1927       } else {
1928         name =
1929             "_" + classname_ + "_default_instance_._instance.get_mutable()->";
1930       }
1931       name += FieldName(field);
1932       format.Set("name", name);
1933       if (IsWeak(field, options_)) {
1934         format(
1935             "$package_ns$::$name$_ = reinterpret_cast<const "
1936             "::$proto_ns$::Message*>(&$1$);\n"
1937             "if ($package_ns$::$name$_ == nullptr) {\n"
1938             "  $package_ns$::$name$_ = "
1939             "::$proto_ns$::Empty::internal_default_instance();\n"
1940             "}\n",
1941             QualifiedDefaultInstanceName(field->message_type(),
1942                                          options_));  // 1
1943         continue;
1944       }
1945       format(
1946           "$package_ns$::$name$_ = const_cast< $1$*>(\n"
1947           "    $1$::internal_default_instance());\n",
1948           FieldMessageTypeName(field, options_));
1949     } else if (field->containing_oneof() &&
1950                HasDescriptorMethods(descriptor_->file(), options_)) {
1951       field_generators_.get(field).GenerateConstructorCode(printer);
1952     }
1953   }
1954 }
1955 
GenerateClassMethods(io::Printer * printer)1956 void MessageGenerator::GenerateClassMethods(io::Printer* printer) {
1957   Formatter format(printer, variables_);
1958   if (IsMapEntryMessage(descriptor_)) {
1959     format(
1960         "$classname$::$classname$() {}\n"
1961         "$classname$::$classname$(::$proto_ns$::Arena* arena)\n"
1962         "    : SuperType(arena) {}\n"
1963         "void $classname$::MergeFrom(const $classname$& other) {\n"
1964         "  MergeFromInternal(other);\n"
1965         "}\n");
1966     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1967       format(
1968           "::$proto_ns$::Metadata $classname$::GetMetadata() const {\n"
1969           "  return GetMetadataStatic();\n"
1970           "}\n");
1971       format(
1972           "void $classname$::MergeFrom(\n"
1973           "    const ::$proto_ns$::Message& other) {\n"
1974           "  ::$proto_ns$::Message::MergeFrom(other);\n"
1975           "}\n"
1976           "\n");
1977     }
1978     return;
1979   }
1980 
1981   // TODO(gerbens) Remove this function. With a little bit of cleanup and
1982   // refactoring this is superfluous.
1983   format("void $classname$::InitAsDefaultInstance() {\n");
1984   format.Indent();
1985   GenerateDefaultInstanceInitializer(printer);
1986   format.Outdent();
1987   format("}\n");
1988 
1989   if (IsAnyMessage(descriptor_, options_)) {
1990     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1991       format(
1992           "void $classname$::PackFrom(const ::$proto_ns$::Message& message) {\n"
1993           "  _any_metadata_.PackFrom(message);\n"
1994           "}\n"
1995           "\n"
1996           "void $classname$::PackFrom(const ::$proto_ns$::Message& message,\n"
1997           "                           const std::string& type_url_prefix) {\n"
1998           "  _any_metadata_.PackFrom(message, type_url_prefix);\n"
1999           "}\n"
2000           "\n"
2001           "bool $classname$::UnpackTo(::$proto_ns$::Message* message) const {\n"
2002           "  return _any_metadata_.UnpackTo(message);\n"
2003           "}\n"
2004           "bool $classname$::GetAnyFieldDescriptors(\n"
2005           "    const ::$proto_ns$::Message& message,\n"
2006           "    const ::$proto_ns$::FieldDescriptor** type_url_field,\n"
2007           "    const ::$proto_ns$::FieldDescriptor** value_field) {\n"
2008           "  return ::$proto_ns$::internal::GetAnyFieldDescriptors(\n"
2009           "      message, type_url_field, value_field);\n"
2010           "}\n");
2011     }
2012     format(
2013         "bool $classname$::ParseAnyTypeUrl(const string& type_url,\n"
2014         "                                  std::string* full_type_name) {\n"
2015         "  return ::$proto_ns$::internal::ParseAnyTypeUrl(type_url,\n"
2016         "                                             full_type_name);\n"
2017         "}\n"
2018         "\n");
2019   }
2020 
2021   format(
2022       "class $classname$::_Internal {\n"
2023       " public:\n");
2024   format.Indent();
2025   if (HasFieldPresence(descriptor_->file()) && HasBitsSize() != 0) {
2026     format(
2027         "using HasBits = decltype(std::declval<$classname$>()._has_bits_);\n");
2028   }
2029   for (auto field : FieldRange(descriptor_)) {
2030     field_generators_.get(field).GenerateInternalAccessorDeclarations(printer);
2031     if (HasFieldPresence(descriptor_->file()) && !field->is_repeated() &&
2032         !field->options().weak() && !field->containing_oneof()) {
2033       int has_bit_index = has_bit_indices_[field->index()];
2034       GOOGLE_CHECK_GE(has_bit_index, 0);
2035       format(
2036           "static void set_has_$1$(HasBits* has_bits) {\n"
2037           "  (*has_bits)[$2$] |= $3$u;\n"
2038           "}\n",
2039           FieldName(field), has_bit_index / 32, (1u << (has_bit_index % 32)));
2040     }
2041   }
2042   format.Outdent();
2043   format("};\n\n");
2044   for (auto field : FieldRange(descriptor_)) {
2045     field_generators_.get(field).GenerateInternalAccessorDefinitions(printer);
2046   }
2047 
2048   // Generate non-inline field definitions.
2049   for (auto field : FieldRange(descriptor_)) {
2050     field_generators_.get(field).GenerateNonInlineAccessorDefinitions(printer);
2051     if (IsCrossFileMaybeMap(field)) {
2052       Formatter::SaveState saver(&format);
2053       std::map<std::string, std::string> vars;
2054       SetCommonFieldVariables(field, &vars, options_);
2055       if (field->containing_oneof()) {
2056         SetCommonOneofFieldVariables(field, &vars);
2057       }
2058       format.AddMap(vars);
2059       GenerateFieldClear(field, false, format);
2060     }
2061   }
2062 
2063   GenerateStructors(printer);
2064   format("\n");
2065 
2066   if (descriptor_->oneof_decl_count() > 0) {
2067     GenerateOneofClear(printer);
2068     format("\n");
2069   }
2070 
2071   if (HasGeneratedMethods(descriptor_->file(), options_)) {
2072     GenerateClear(printer);
2073     format("\n");
2074 
2075     GenerateMergeFromCodedStream(printer);
2076     format("\n");
2077 
2078     GenerateSerializeWithCachedSizes(printer);
2079     format("\n");
2080 
2081     if (HasFastArraySerialization(descriptor_->file(), options_)) {
2082       GenerateSerializeWithCachedSizesToArray(printer);
2083       format("\n");
2084     }
2085 
2086     GenerateByteSize(printer);
2087     format("\n");
2088 
2089     GenerateMergeFrom(printer);
2090     format("\n");
2091 
2092     GenerateCopyFrom(printer);
2093     format("\n");
2094 
2095     GenerateIsInitialized(printer);
2096     format("\n");
2097   }
2098 
2099   GenerateSwap(printer);
2100   format("\n");
2101 
2102   if (options_.table_driven_serialization) {
2103     format(
2104         "const void* $classname$::InternalGetTable() const {\n"
2105         "  return ::$tablename$::serialization_table + $1$;\n"
2106         "}\n"
2107         "\n",
2108         index_in_file_messages_);
2109   }
2110   if (HasDescriptorMethods(descriptor_->file(), options_)) {
2111     format(
2112         "::$proto_ns$::Metadata $classname$::GetMetadata() const {\n"
2113         "  return GetMetadataStatic();\n"
2114         "}\n"
2115         "\n");
2116   } else {
2117     format(
2118         "std::string $classname$::GetTypeName() const {\n"
2119         "  return \"$full_name$\";\n"
2120         "}\n"
2121         "\n");
2122   }
2123 
2124 }
2125 
GenerateParseOffsets(io::Printer * printer)2126 size_t MessageGenerator::GenerateParseOffsets(io::Printer* printer) {
2127   Formatter format(printer, variables_);
2128 
2129   if (!table_driven_) {
2130     return 0;
2131   }
2132 
2133   // Field "0" is special:  We use it in our switch statement of processing
2134   // types to handle the successful end tag case.
2135   format("{0, 0, 0, ::$proto_ns$::internal::kInvalidMask, 0, 0},\n");
2136   int last_field_number = 1;
2137 
2138   std::vector<const FieldDescriptor*> ordered_fields =
2139       SortFieldsByNumber(descriptor_);
2140 
2141   for (auto field : ordered_fields) {
2142     Formatter::SaveState saver(&format);
2143     GOOGLE_CHECK_GE(field->number(), last_field_number);
2144 
2145     for (; last_field_number < field->number(); last_field_number++) {
2146       format(
2147           "{ 0, 0, ::$proto_ns$::internal::kInvalidMask,\n"
2148           "  ::$proto_ns$::internal::kInvalidMask, 0, 0 },\n");
2149     }
2150     last_field_number++;
2151 
2152     unsigned char normal_wiretype, packed_wiretype, processing_type;
2153     normal_wiretype = WireFormat::WireTypeForFieldType(field->type());
2154 
2155     if (field->is_packable()) {
2156       packed_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
2157     } else {
2158       packed_wiretype = internal::kNotPackedMask;
2159     }
2160 
2161     processing_type = static_cast<unsigned>(field->type());
2162     const FieldGenerator& generator = field_generators_.get(field);
2163     if (field->type() == FieldDescriptor::TYPE_STRING) {
2164       switch (EffectiveStringCType(field, options_)) {
2165         case FieldOptions::STRING:
2166           if (generator.IsInlined()) {
2167             processing_type = internal::TYPE_STRING_INLINED;
2168             break;
2169           }
2170           break;
2171         case FieldOptions::CORD:
2172           processing_type = internal::TYPE_STRING_CORD;
2173           break;
2174         case FieldOptions::STRING_PIECE:
2175           processing_type = internal::TYPE_STRING_STRING_PIECE;
2176           break;
2177       }
2178     } else if (field->type() == FieldDescriptor::TYPE_BYTES) {
2179       switch (EffectiveStringCType(field, options_)) {
2180         case FieldOptions::STRING:
2181           if (generator.IsInlined()) {
2182             processing_type = internal::TYPE_BYTES_INLINED;
2183             break;
2184           }
2185           break;
2186         case FieldOptions::CORD:
2187           processing_type = internal::TYPE_BYTES_CORD;
2188           break;
2189         case FieldOptions::STRING_PIECE:
2190           processing_type = internal::TYPE_BYTES_STRING_PIECE;
2191           break;
2192       }
2193     }
2194 
2195     processing_type |= static_cast<unsigned>(
2196         field->is_repeated() ? internal::kRepeatedMask : 0);
2197     processing_type |= static_cast<unsigned>(
2198         field->containing_oneof() ? internal::kOneofMask : 0);
2199 
2200     if (field->is_map()) {
2201       processing_type = internal::TYPE_MAP;
2202     }
2203 
2204     const unsigned char tag_size =
2205         WireFormat::TagSize(field->number(), field->type());
2206 
2207     std::map<std::string, std::string> vars;
2208     if (field->containing_oneof() != NULL) {
2209       vars["name"] = field->containing_oneof()->name();
2210       vars["presence"] = StrCat(field->containing_oneof()->index());
2211     } else {
2212       vars["name"] = FieldName(field);
2213       vars["presence"] = StrCat(has_bit_indices_[field->index()]);
2214     }
2215     vars["nwtype"] = StrCat(normal_wiretype);
2216     vars["pwtype"] = StrCat(packed_wiretype);
2217     vars["ptype"] = StrCat(processing_type);
2218     vars["tag_size"] = StrCat(tag_size);
2219 
2220     format.AddMap(vars);
2221 
2222     format(
2223         "{\n"
2224         "  PROTOBUF_FIELD_OFFSET($classtype$, $name$_),\n"
2225         "  static_cast<$uint32$>($presence$),\n"
2226         "  $nwtype$, $pwtype$, $ptype$, $tag_size$\n"
2227         "},\n");
2228   }
2229 
2230   return last_field_number;
2231 }
2232 
GenerateParseAuxTable(io::Printer * printer)2233 size_t MessageGenerator::GenerateParseAuxTable(io::Printer* printer) {
2234   Formatter format(printer, variables_);
2235 
2236   if (!table_driven_) {
2237     return 0;
2238   }
2239 
2240   std::vector<const FieldDescriptor*> ordered_fields =
2241       SortFieldsByNumber(descriptor_);
2242 
2243   format("::$proto_ns$::internal::AuxillaryParseTableField(),\n");
2244   int last_field_number = 1;
2245   for (auto field : ordered_fields) {
2246     Formatter::SaveState saver(&format);
2247 
2248     GOOGLE_CHECK_GE(field->number(), last_field_number);
2249     for (; last_field_number < field->number(); last_field_number++) {
2250       format("::$proto_ns$::internal::AuxillaryParseTableField(),\n");
2251     }
2252 
2253     std::map<std::string, std::string> vars;
2254     SetCommonFieldVariables(field, &vars, options_);
2255     format.AddMap(vars);
2256 
2257     switch (field->cpp_type()) {
2258       case FieldDescriptor::CPPTYPE_ENUM:
2259         if (HasPreservingUnknownEnumSemantics(field)) {
2260           format(
2261               "{::$proto_ns$::internal::AuxillaryParseTableField::enum_aux{"
2262               "nullptr}},\n");
2263         } else {
2264           format(
2265               "{::$proto_ns$::internal::AuxillaryParseTableField::enum_aux{"
2266               "$1$_IsValid}},\n",
2267               ClassName(field->enum_type(), true));
2268         }
2269         last_field_number++;
2270         break;
2271       case FieldDescriptor::CPPTYPE_MESSAGE: {
2272         if (field->is_map()) {
2273           format(
2274               "{::$proto_ns$::internal::AuxillaryParseTableField::map_"
2275               "aux{&::$proto_ns$::internal::ParseMap<$1$>}},\n",
2276               QualifiedClassName(field->message_type(), options_));
2277           last_field_number++;
2278           break;
2279         }
2280         format.Set("field_classname", ClassName(field->message_type(), false));
2281         format.Set("default_instance", QualifiedDefaultInstanceName(
2282                                            field->message_type(), options_));
2283 
2284         format(
2285             "{::$proto_ns$::internal::AuxillaryParseTableField::message_aux{\n"
2286             "  &$default_instance$}},\n");
2287         last_field_number++;
2288         break;
2289       }
2290       case FieldDescriptor::CPPTYPE_STRING: {
2291         std::string default_val;
2292         switch (EffectiveStringCType(field, options_)) {
2293           case FieldOptions::STRING:
2294             default_val = field->default_value_string().empty()
2295                               ? "&::" + variables_["proto_ns"] +
2296                                     "::internal::fixed_address_empty_string"
2297                               : "&" +
2298                                     QualifiedClassName(descriptor_, options_) +
2299                                     "::" + MakeDefaultName(field);
2300             break;
2301           case FieldOptions::CORD:
2302           case FieldOptions::STRING_PIECE:
2303             default_val =
2304                 "\"" + CEscape(field->default_value_string()) + "\"";
2305             break;
2306         }
2307         format(
2308             "{::$proto_ns$::internal::AuxillaryParseTableField::string_aux{\n"
2309             "  $1$,\n"
2310             "  \"$2$\"\n"
2311             "}},\n",
2312             default_val, field->full_name());
2313         last_field_number++;
2314         break;
2315       }
2316       default:
2317         break;
2318     }
2319   }
2320 
2321   return last_field_number;
2322 }
2323 
GenerateOffsets(io::Printer * printer)2324 std::pair<size_t, size_t> MessageGenerator::GenerateOffsets(
2325     io::Printer* printer) {
2326   Formatter format(printer, variables_);
2327 
2328   if (HasFieldPresence(descriptor_->file()) || IsMapEntryMessage(descriptor_)) {
2329     format("PROTOBUF_FIELD_OFFSET($classtype$, _has_bits_),\n");
2330   } else {
2331     format("~0u,  // no _has_bits_\n");
2332   }
2333   format("PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_),\n");
2334   if (descriptor_->extension_range_count() > 0) {
2335     format("PROTOBUF_FIELD_OFFSET($classtype$, _extensions_),\n");
2336   } else {
2337     format("~0u,  // no _extensions_\n");
2338   }
2339   if (descriptor_->oneof_decl_count() > 0) {
2340     format("PROTOBUF_FIELD_OFFSET($classtype$, _oneof_case_[0]),\n");
2341   } else {
2342     format("~0u,  // no _oneof_case_\n");
2343   }
2344   if (num_weak_fields_ > 0) {
2345     format("PROTOBUF_FIELD_OFFSET($classtype$, _weak_field_map_),\n");
2346   } else {
2347     format("~0u,  // no _weak_field_map_\n");
2348   }
2349   const int kNumGenericOffsets = 5;  // the number of fixed offsets above
2350   const size_t offsets = kNumGenericOffsets + descriptor_->field_count() +
2351                          descriptor_->oneof_decl_count();
2352   size_t entries = offsets;
2353   for (auto field : FieldRange(descriptor_)) {
2354     if (field->containing_oneof() || field->options().weak()) {
2355       format("offsetof($classtype$DefaultTypeInternal, $1$_)",
2356              FieldName(field));
2357     } else {
2358       format("PROTOBUF_FIELD_OFFSET($classtype$, $1$_)", FieldName(field));
2359     }
2360 
2361     uint32 tag = field_generators_.get(field).CalculateFieldTag();
2362     if (tag != 0) {
2363       format(" | $1$", tag);
2364     }
2365 
2366     format(",\n");
2367   }
2368 
2369   for (auto oneof : OneOfRange(descriptor_)) {
2370     format("PROTOBUF_FIELD_OFFSET($classtype$, $1$_),\n", oneof->name());
2371   }
2372 
2373   if (IsMapEntryMessage(descriptor_)) {
2374     entries += 2;
2375     format(
2376         "0,\n"
2377         "1,\n");
2378   } else if (HasFieldPresence(descriptor_->file())) {
2379     entries += has_bit_indices_.size();
2380     for (int i = 0; i < has_bit_indices_.size(); i++) {
2381       const std::string index =
2382           has_bit_indices_[i] >= 0 ? StrCat(has_bit_indices_[i]) : "~0u";
2383       format("$1$,\n", index);
2384     }
2385   }
2386 
2387   return std::make_pair(entries, offsets);
2388 }
2389 
GenerateSharedConstructorCode(io::Printer * printer)2390 void MessageGenerator::GenerateSharedConstructorCode(io::Printer* printer) {
2391   Formatter format(printer, variables_);
2392 
2393   format("void $classname$::SharedCtor() {\n");
2394   if (scc_analyzer_->GetSCCAnalysis(scc_analyzer_->GetSCC(descriptor_))
2395           .constructor_requires_initialization) {
2396     format("  ::$proto_ns$::internal::InitSCC(&$scc_info$.base);\n");
2397   }
2398 
2399   format.Indent();
2400 
2401   std::vector<bool> processed(optimized_order_.size(), false);
2402   GenerateConstructorBody(printer, processed, false);
2403 
2404   for (auto oneof : OneOfRange(descriptor_)) {
2405     format("clear_has_$1$();\n", oneof->name());
2406   }
2407 
2408   format.Outdent();
2409   format("}\n\n");
2410 }
2411 
GenerateSharedDestructorCode(io::Printer * printer)2412 void MessageGenerator::GenerateSharedDestructorCode(io::Printer* printer) {
2413   Formatter format(printer, variables_);
2414 
2415   format("void $classname$::SharedDtor() {\n");
2416   format.Indent();
2417   if (SupportsArenas(descriptor_)) {
2418     format("$DCHK$(GetArenaNoVirtual() == nullptr);\n");
2419   }
2420   // Write the destructors for each field except oneof members.
2421   // optimized_order_ does not contain oneof fields.
2422   for (auto field : optimized_order_) {
2423     field_generators_.get(field).GenerateDestructorCode(printer);
2424   }
2425 
2426   // Generate code to destruct oneofs. Clearing should do the work.
2427   for (auto oneof : OneOfRange(descriptor_)) {
2428     format(
2429         "if (has_$1$()) {\n"
2430         "  clear_$1$();\n"
2431         "}\n",
2432         oneof->name());
2433   }
2434 
2435   if (num_weak_fields_) {
2436     format("_weak_field_map_.ClearAll();\n");
2437   }
2438   format.Outdent();
2439   format(
2440       "}\n"
2441       "\n");
2442 }
2443 
GenerateArenaDestructorCode(io::Printer * printer)2444 void MessageGenerator::GenerateArenaDestructorCode(io::Printer* printer) {
2445   Formatter format(printer, variables_);
2446 
2447   // Generate the ArenaDtor() method. Track whether any fields actually produced
2448   // code that needs to be called.
2449   format("void $classname$::ArenaDtor(void* object) {\n");
2450   format.Indent();
2451 
2452   // This code is placed inside a static method, rather than an ordinary one,
2453   // since that simplifies Arena's destructor list (ordinary function pointers
2454   // rather than member function pointers). _this is the object being
2455   // destructed.
2456   format(
2457       "$classname$* _this = reinterpret_cast< $classname$* >(object);\n"
2458       // avoid an "unused variable" warning in case no fields have dtor code.
2459       "(void)_this;\n");
2460 
2461   bool need_registration = false;
2462   // Process non-oneof fields first.
2463   for (auto field : optimized_order_) {
2464     if (field_generators_.get(field).GenerateArenaDestructorCode(printer)) {
2465       need_registration = true;
2466     }
2467   }
2468 
2469   // Process oneof fields.
2470   //
2471   // Note:  As of 10/5/2016, GenerateArenaDestructorCode does not emit anything
2472   // and returns false for oneof fields.
2473   for (auto oneof : OneOfRange(descriptor_)) {
2474     for (auto field : FieldRange(oneof)) {
2475       if (field_generators_.get(field).GenerateArenaDestructorCode(printer)) {
2476         need_registration = true;
2477       }
2478     }
2479   }
2480   if (num_weak_fields_) {
2481     // _this is the object being destructed (we are inside a static method
2482     // here).
2483     format("_this->_weak_field_map_.ClearAll();\n");
2484     need_registration = true;
2485   }
2486 
2487   format.Outdent();
2488   format("}\n");
2489 
2490   if (need_registration) {
2491     format(
2492         "inline void $classname$::RegisterArenaDtor(::$proto_ns$::Arena* "
2493         "arena) {\n"
2494         "  if (arena != nullptr) {\n"
2495         "    arena->OwnCustomDestructor(this, &$classname$::ArenaDtor);\n"
2496         "  }\n"
2497         "}\n");
2498   } else {
2499     format(
2500         "void $classname$::RegisterArenaDtor(::$proto_ns$::Arena*) {\n"
2501         "}\n");
2502   }
2503 }
2504 
GenerateConstructorBody(io::Printer * printer,std::vector<bool> processed,bool copy_constructor) const2505 void MessageGenerator::GenerateConstructorBody(io::Printer* printer,
2506                                                std::vector<bool> processed,
2507                                                bool copy_constructor) const {
2508   Formatter format(printer, variables_);
2509   const FieldDescriptor* last_start = NULL;
2510   // RunMap maps from fields that start each run to the number of fields in that
2511   // run.  This is optimized for the common case that there are very few runs in
2512   // a message and that most of the eligible fields appear together.
2513   typedef std::unordered_map<const FieldDescriptor*, size_t> RunMap;
2514   RunMap runs;
2515 
2516   for (auto field : optimized_order_) {
2517     if ((copy_constructor && IsPOD(field)) ||
2518         (!copy_constructor && CanConstructByZeroing(field, options_))) {
2519       if (last_start == NULL) {
2520         last_start = field;
2521       }
2522 
2523       runs[last_start]++;
2524     } else {
2525       last_start = NULL;
2526     }
2527   }
2528 
2529   std::string pod_template;
2530   if (copy_constructor) {
2531     pod_template =
2532         "::memcpy(&$first$_, &from.$first$_,\n"
2533         "  static_cast<size_t>(reinterpret_cast<char*>(&$last$_) -\n"
2534         "  reinterpret_cast<char*>(&$first$_)) + sizeof($last$_));\n";
2535   } else {
2536     pod_template =
2537         "::memset(&$first$_, 0, static_cast<size_t>(\n"
2538         "    reinterpret_cast<char*>(&$last$_) -\n"
2539         "    reinterpret_cast<char*>(&$first$_)) + sizeof($last$_));\n";
2540   }
2541 
2542   for (int i = 0; i < optimized_order_.size(); ++i) {
2543     if (processed[i]) {
2544       continue;
2545     }
2546 
2547     const FieldDescriptor* field = optimized_order_[i];
2548     RunMap::const_iterator it = runs.find(field);
2549 
2550     // We only apply the memset technique to runs of more than one field, as
2551     // assignment is better than memset for generated code clarity.
2552     if (it != runs.end() && it->second > 1) {
2553       // Use a memset, then skip run_length fields.
2554       const size_t run_length = it->second;
2555       const std::string first_field_name = FieldName(field);
2556       const std::string last_field_name =
2557           FieldName(optimized_order_[i + run_length - 1]);
2558 
2559       format.Set("first", first_field_name);
2560       format.Set("last", last_field_name);
2561 
2562       format(pod_template.c_str());
2563 
2564       i += run_length - 1;
2565       // ++i at the top of the loop.
2566     } else {
2567       if (copy_constructor) {
2568         field_generators_.get(field).GenerateCopyConstructorCode(printer);
2569       } else {
2570         field_generators_.get(field).GenerateConstructorCode(printer);
2571       }
2572     }
2573   }
2574 }
2575 
GenerateStructors(io::Printer * printer)2576 void MessageGenerator::GenerateStructors(io::Printer* printer) {
2577   Formatter format(printer, variables_);
2578 
2579   std::string superclass;
2580   superclass = SuperClassName(descriptor_, options_);
2581   std::string initializer_with_arena = superclass + "()";
2582 
2583   if (descriptor_->extension_range_count() > 0) {
2584     initializer_with_arena += ",\n  _extensions_(arena)";
2585   }
2586 
2587   initializer_with_arena += ",\n  _internal_metadata_(arena)";
2588 
2589   // Initialize member variables with arena constructor.
2590   for (auto field : optimized_order_) {
2591     bool has_arena_constructor = field->is_repeated();
2592     if (field->containing_oneof() == NULL &&
2593         (IsLazy(field, options_) || IsStringPiece(field, options_))) {
2594       has_arena_constructor = true;
2595     }
2596     if (has_arena_constructor) {
2597       initializer_with_arena +=
2598           std::string(",\n  ") + FieldName(field) + std::string("_(arena)");
2599     }
2600   }
2601 
2602   if (IsAnyMessage(descriptor_, options_)) {
2603     initializer_with_arena += ",\n  _any_metadata_(&type_url_, &value_)";
2604   }
2605   if (num_weak_fields_ > 0) {
2606     initializer_with_arena += ", _weak_field_map_(arena)";
2607   }
2608 
2609   std::string initializer_null =
2610       superclass + "(), _internal_metadata_(nullptr)";
2611   if (IsAnyMessage(descriptor_, options_)) {
2612     initializer_null += ", _any_metadata_(&type_url_, &value_)";
2613   }
2614   if (num_weak_fields_ > 0) {
2615     initializer_null += ", _weak_field_map_(nullptr)";
2616   }
2617 
2618   format(
2619       "$classname$::$classname$()\n"
2620       "  : $1$ {\n"
2621       "  SharedCtor();\n"
2622       "  // @@protoc_insertion_point(constructor:$full_name$)\n"
2623       "}\n",
2624       initializer_null);
2625 
2626   if (SupportsArenas(descriptor_)) {
2627     format(
2628         "$classname$::$classname$(::$proto_ns$::Arena* arena)\n"
2629         "  : $1$ {\n"
2630         "  SharedCtor();\n"
2631         "  RegisterArenaDtor(arena);\n"
2632         "  // @@protoc_insertion_point(arena_constructor:$full_name$)\n"
2633         "}\n",
2634         initializer_with_arena);
2635   }
2636 
2637   // Generate the copy constructor.
2638   if (UsingImplicitWeakFields(descriptor_->file(), options_)) {
2639     // If we are in lite mode and using implicit weak fields, we generate a
2640     // one-liner copy constructor that delegates to MergeFrom. This saves some
2641     // code size and also cuts down on the complexity of implicit weak fields.
2642     // We might eventually want to do this for all lite protos.
2643     format(
2644         "$classname$::$classname$(const $classname$& from)\n"
2645         "  : $classname$() {\n"
2646         "  MergeFrom(from);\n"
2647         "}\n");
2648   } else {
2649     format(
2650         "$classname$::$classname$(const $classname$& from)\n"
2651         "  : $superclass$()");
2652     format.Indent();
2653     format.Indent();
2654     format.Indent();
2655     format(",\n_internal_metadata_(nullptr)");
2656 
2657     if (HasFieldPresence(descriptor_->file())) {
2658       if (!IsProto2MessageSet(descriptor_, options_)) {
2659         format(",\n_has_bits_(from._has_bits_)");
2660       }
2661     }
2662 
2663     std::vector<bool> processed(optimized_order_.size(), false);
2664     for (int i = 0; i < optimized_order_.size(); i++) {
2665       auto field = optimized_order_[i];
2666       if (!(field->is_repeated() && !(field->is_map())) &&
2667           !IsCord(field, options_)) {
2668         continue;
2669       }
2670 
2671       processed[i] = true;
2672       format(",\n$1$_(from.$1$_)", FieldName(field));
2673     }
2674 
2675     if (IsAnyMessage(descriptor_, options_)) {
2676       format(",\n_any_metadata_(&type_url_, &value_)");
2677     }
2678     if (num_weak_fields_ > 0) {
2679       format(",\n_weak_field_map_(from._weak_field_map_)");
2680     }
2681 
2682     format.Outdent();
2683     format.Outdent();
2684     format(" {\n");
2685 
2686     format("_internal_metadata_.MergeFrom(from._internal_metadata_);\n");
2687 
2688     if (descriptor_->extension_range_count() > 0) {
2689       format("_extensions_.MergeFrom(from._extensions_);\n");
2690     }
2691 
2692     GenerateConstructorBody(printer, processed, true);
2693 
2694     // Copy oneof fields. Oneof field requires oneof case check.
2695     for (auto oneof : OneOfRange(descriptor_)) {
2696       format(
2697           "clear_has_$1$();\n"
2698           "switch (from.$1$_case()) {\n",
2699           oneof->name());
2700       format.Indent();
2701       for (auto field : FieldRange(oneof)) {
2702         format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
2703         format.Indent();
2704         field_generators_.get(field).GenerateMergingCode(printer);
2705         format("break;\n");
2706         format.Outdent();
2707         format("}\n");
2708       }
2709       format(
2710           "case $1$_NOT_SET: {\n"
2711           "  break;\n"
2712           "}\n",
2713           ToUpper(oneof->name()));
2714       format.Outdent();
2715       format("}\n");
2716     }
2717 
2718     format.Outdent();
2719     format(
2720         "  // @@protoc_insertion_point(copy_constructor:$full_name$)\n"
2721         "}\n"
2722         "\n");
2723   }
2724 
2725   // Generate the shared constructor code.
2726   GenerateSharedConstructorCode(printer);
2727 
2728   // Generate the destructor.
2729   format(
2730       "$classname$::~$classname$() {\n"
2731       "  // @@protoc_insertion_point(destructor:$full_name$)\n"
2732       "  SharedDtor();\n"
2733       "}\n"
2734       "\n");
2735 
2736   // Generate the shared destructor code.
2737   GenerateSharedDestructorCode(printer);
2738 
2739   // Generate the arena-specific destructor code.
2740   if (SupportsArenas(descriptor_)) {
2741     GenerateArenaDestructorCode(printer);
2742   }
2743 
2744   // Generate SetCachedSize.
2745   format(
2746       "void $classname$::SetCachedSize(int size) const {\n"
2747       "  _cached_size_.Set(size);\n"
2748       "}\n");
2749 
2750   format(
2751       "const $classname$& $classname$::default_instance() {\n"
2752       "  "
2753       "::$proto_ns$::internal::InitSCC(&::$scc_info$.base)"
2754       ";\n"
2755       "  return *internal_default_instance();\n"
2756       "}\n\n");
2757 }
2758 
GenerateSourceInProto2Namespace(io::Printer * printer)2759 void MessageGenerator::GenerateSourceInProto2Namespace(io::Printer* printer) {
2760   Formatter format(printer, variables_);
2761   format(
2762       "template<> "
2763       "PROTOBUF_NOINLINE "
2764       "$classtype$* Arena::CreateMaybeMessage< $classtype$ >(Arena* arena) {\n"
2765       "  return Arena::$1$Internal< $classtype$ >(arena);\n"
2766       "}\n",
2767       MessageCreateFunction(descriptor_));
2768 }
2769 
GenerateClear(io::Printer * printer)2770 void MessageGenerator::GenerateClear(io::Printer* printer) {
2771   Formatter format(printer, variables_);
2772   // Performance tuning parameters
2773   const int kMaxUnconditionalPrimitiveBytesClear = 4;
2774 
2775   format(
2776       "void $classname$::Clear() {\n"
2777       "// @@protoc_insertion_point(message_clear_start:$full_name$)\n");
2778   format.Indent();
2779 
2780   format(
2781       // TODO(jwb): It would be better to avoid emitting this if it is not used,
2782       // rather than emitting a workaround for the resulting warning.
2783       "$uint32$ cached_has_bits = 0;\n"
2784       "// Prevent compiler warnings about cached_has_bits being unused\n"
2785       "(void) cached_has_bits;\n\n");
2786 
2787   int cached_has_bit_index = -1;
2788 
2789   // Step 1: Extensions
2790   if (descriptor_->extension_range_count() > 0) {
2791     format("_extensions_.Clear();\n");
2792   }
2793 
2794   int unconditional_budget = kMaxUnconditionalPrimitiveBytesClear;
2795   for (int i = 0; i < optimized_order_.size(); i++) {
2796     const FieldDescriptor* field = optimized_order_[i];
2797 
2798     if (!CanInitializeByZeroing(field)) {
2799       continue;
2800     }
2801 
2802     unconditional_budget -= EstimateAlignmentSize(field);
2803   }
2804 
2805   std::vector<std::vector<const FieldDescriptor*>> chunks_frag = CollectFields(
2806       optimized_order_,
2807       MatchRepeatedAndHasByteAndZeroInits(
2808           &has_bit_indices_, HasFieldPresence(descriptor_->file())));
2809 
2810   // Merge next non-zero initializable chunk if it has the same has_byte index
2811   // and not meeting unconditional clear condition.
2812   std::vector<std::vector<const FieldDescriptor*>> chunks;
2813   if (!HasFieldPresence(descriptor_->file())) {
2814     // Don't bother with merging without has_bit field.
2815     chunks = chunks_frag;
2816   } else {
2817     // Note that only the next chunk is considered for merging.
2818     for (int i = 0; i < chunks_frag.size(); i++) {
2819       chunks.push_back(chunks_frag[i]);
2820       const FieldDescriptor* field = chunks_frag[i].front();
2821       const FieldDescriptor* next_field =
2822           (i + 1) < chunks_frag.size() ? chunks_frag[i + 1].front() : nullptr;
2823       if (CanInitializeByZeroing(field) &&
2824           (chunks_frag[i].size() == 1 || unconditional_budget < 0) &&
2825           next_field != nullptr &&
2826           has_bit_indices_[field->index()] / 8 ==
2827               has_bit_indices_[next_field->index()] / 8) {
2828         GOOGLE_CHECK(!CanInitializeByZeroing(next_field));
2829         // Insert next chunk to the current one and skip next chunk.
2830         chunks.back().insert(chunks.back().end(), chunks_frag[i + 1].begin(),
2831                              chunks_frag[i + 1].end());
2832         i++;
2833       }
2834     }
2835   }
2836 
2837   ColdChunkSkipper cold_skipper(options_, chunks, has_bit_indices_, kColdRatio,
2838                                 HasFieldPresence(descriptor_->file()));
2839   for (int chunk_index = 0; chunk_index < chunks.size(); chunk_index++) {
2840     std::vector<const FieldDescriptor*>& chunk = chunks[chunk_index];
2841     GOOGLE_CHECK(!chunk.empty());
2842 
2843     // Step 2: Repeated fields don't use _has_bits_; emit code to clear them
2844     // here.
2845     if (chunk.front()->is_repeated()) {
2846       for (int i = 0; i < chunk.size(); i++) {
2847         const FieldDescriptor* field = chunk[i];
2848         const FieldGenerator& generator = field_generators_.get(field);
2849 
2850         generator.GenerateMessageClearingCode(printer);
2851       }
2852       continue;
2853     }
2854 
2855     cold_skipper.OnStartChunk(chunk_index, cached_has_bit_index, "", printer);
2856 
2857     // Step 3: Non-repeated fields that can be cleared by memset-to-0, then
2858     // non-repeated, non-zero initializable fields.
2859     int last_chunk = HasFieldPresence(descriptor_->file())
2860                          ? has_bit_indices_[chunk.front()->index()] / 8
2861                          : 0;
2862     int last_chunk_start = 0;
2863     int memset_run_start = -1;
2864     int memset_run_end = -1;
2865 
2866     for (int i = 0; i < chunk.size(); i++) {
2867       const FieldDescriptor* field = chunk[i];
2868       if (CanInitializeByZeroing(field)) {
2869         if (memset_run_start == -1) {
2870           memset_run_start = i;
2871         }
2872         memset_run_end = i;
2873       }
2874     }
2875 
2876     const bool have_outer_if =
2877         HasFieldPresence(descriptor_->file()) && chunk.size() > 1 &&
2878         (memset_run_end != chunk.size() - 1 || unconditional_budget < 0);
2879 
2880     if (have_outer_if) {
2881       uint32 last_chunk_mask = GenChunkMask(chunk, has_bit_indices_);
2882       const int count = popcnt(last_chunk_mask);
2883 
2884       // Check (up to) 8 has_bits at a time if we have more than one field in
2885       // this chunk.  Due to field layout ordering, we may check
2886       // _has_bits_[last_chunk * 8 / 32] multiple times.
2887       GOOGLE_DCHECK_LE(2, count);
2888       GOOGLE_DCHECK_GE(8, count);
2889 
2890       if (cached_has_bit_index != last_chunk / 4) {
2891         cached_has_bit_index = last_chunk / 4;
2892         format("cached_has_bits = _has_bits_[$1$];\n", cached_has_bit_index);
2893       }
2894       format("if (cached_has_bits & 0x$1$u) {\n",
2895              StrCat(strings::Hex(last_chunk_mask, strings::ZERO_PAD_8)));
2896       format.Indent();
2897     }
2898 
2899     if (memset_run_start != -1) {
2900       if (memset_run_start == memset_run_end) {
2901         // For clarity, do not memset a single field.
2902         const FieldGenerator& generator =
2903             field_generators_.get(chunk[memset_run_start]);
2904         generator.GenerateMessageClearingCode(printer);
2905       } else {
2906         const std::string first_field_name = FieldName(chunk[memset_run_start]);
2907         const std::string last_field_name = FieldName(chunk[memset_run_end]);
2908 
2909         format(
2910             "::memset(&$1$_, 0, static_cast<size_t>(\n"
2911             "    reinterpret_cast<char*>(&$2$_) -\n"
2912             "    reinterpret_cast<char*>(&$1$_)) + sizeof($2$_));\n",
2913             first_field_name, last_field_name);
2914       }
2915 
2916       // Advance last_chunk_start to skip over the fields we zeroed/memset.
2917       last_chunk_start = memset_run_end + 1;
2918     }
2919 
2920     // Go back and emit clears for each of the fields we processed.
2921     for (int j = last_chunk_start; j < chunk.size(); j++) {
2922       const FieldDescriptor* field = chunk[j];
2923       const FieldGenerator& generator = field_generators_.get(field);
2924 
2925       // It's faster to just overwrite primitive types, but we should only
2926       // clear strings and messages if they were set.
2927       //
2928       // TODO(kenton):  Let the CppFieldGenerator decide this somehow.
2929       bool should_check_bit =
2930           field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
2931           field->cpp_type() == FieldDescriptor::CPPTYPE_STRING;
2932 
2933       bool have_enclosing_if = false;
2934       if (should_check_bit &&
2935           // If no field presence, then always clear strings/messages as well.
2936           HasFieldPresence(descriptor_->file())) {
2937         PrintPresenceCheck(format, field, has_bit_indices_, printer,
2938                            &cached_has_bit_index);
2939         have_enclosing_if = true;
2940       }
2941 
2942       generator.GenerateMessageClearingCode(printer);
2943 
2944       if (have_enclosing_if) {
2945         format.Outdent();
2946         format("}\n");
2947       }
2948     }
2949 
2950     if (have_outer_if) {
2951       format.Outdent();
2952       format("}\n");
2953     }
2954 
2955     if (cold_skipper.OnEndChunk(chunk_index, printer)) {
2956       // Reset here as it may have been updated in just closed if statement.
2957       cached_has_bit_index = -1;
2958     }
2959   }
2960 
2961   // Step 4: Unions.
2962   for (auto oneof : OneOfRange(descriptor_)) {
2963     format("clear_$1$();\n", oneof->name());
2964   }
2965 
2966   if (num_weak_fields_) {
2967     format("_weak_field_map_.ClearAll();\n");
2968   }
2969 
2970   if (HasFieldPresence(descriptor_->file())) {
2971     // Step 5: Everything else.
2972     format("_has_bits_.Clear();\n");
2973   }
2974 
2975   format("_internal_metadata_.Clear();\n");
2976 
2977   format.Outdent();
2978   format("}\n");
2979 }
2980 
GenerateOneofClear(io::Printer * printer)2981 void MessageGenerator::GenerateOneofClear(io::Printer* printer) {
2982   // Generated function clears the active field and union case (e.g. foo_case_).
2983   for (int i = 0; i < descriptor_->oneof_decl_count(); i++) {
2984     auto oneof = descriptor_->oneof_decl(i);
2985     Formatter format(printer, variables_);
2986     format.Set("oneofname", oneof->name());
2987 
2988     format(
2989         "void $classname$::clear_$oneofname$() {\n"
2990         "// @@protoc_insertion_point(one_of_clear_start:$full_name$)\n");
2991     format.Indent();
2992     format("switch ($oneofname$_case()) {\n");
2993     format.Indent();
2994     for (auto field : FieldRange(oneof)) {
2995       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
2996       format.Indent();
2997       // We clear only allocated objects in oneofs
2998       if (!IsStringOrMessage(field)) {
2999         format("// No need to clear\n");
3000       } else {
3001         field_generators_.get(field).GenerateClearingCode(printer);
3002       }
3003       format("break;\n");
3004       format.Outdent();
3005       format("}\n");
3006     }
3007     format(
3008         "case $1$_NOT_SET: {\n"
3009         "  break;\n"
3010         "}\n",
3011         ToUpper(oneof->name()));
3012     format.Outdent();
3013     format(
3014         "}\n"
3015         "_oneof_case_[$1$] = $2$_NOT_SET;\n",
3016         i, ToUpper(oneof->name()));
3017     format.Outdent();
3018     format(
3019         "}\n"
3020         "\n");
3021   }
3022 }
3023 
GenerateSwap(io::Printer * printer)3024 void MessageGenerator::GenerateSwap(io::Printer* printer) {
3025   Formatter format(printer, variables_);
3026 
3027   format("void $classname$::InternalSwap($classname$* other) {\n");
3028   format.Indent();
3029   format("using std::swap;\n");
3030 
3031   if (HasGeneratedMethods(descriptor_->file(), options_)) {
3032     if (descriptor_->extension_range_count() > 0) {
3033       format("_extensions_.Swap(&other->_extensions_);\n");
3034     }
3035 
3036     format("_internal_metadata_.Swap(&other->_internal_metadata_);\n");
3037 
3038     if (HasFieldPresence(descriptor_->file())) {
3039       for (int i = 0; i < HasBitsSize() / 4; ++i) {
3040         format("swap(_has_bits_[$1$], other->_has_bits_[$1$]);\n", i);
3041       }
3042     }
3043 
3044     for (int i = 0; i < optimized_order_.size(); i++) {
3045       // optimized_order_ does not contain oneof fields, but the field
3046       // generators for these fields do not emit swapping code on their own.
3047       const FieldDescriptor* field = optimized_order_[i];
3048       field_generators_.get(field).GenerateSwappingCode(printer);
3049     }
3050 
3051     for (auto oneof : OneOfRange(descriptor_)) {
3052       format("swap($1$_, other->$1$_);\n", oneof->name());
3053     }
3054 
3055     for (int i = 0; i < descriptor_->oneof_decl_count(); i++) {
3056       format("swap(_oneof_case_[$1$], other->_oneof_case_[$1$]);\n", i);
3057     }
3058 
3059     if (num_weak_fields_) {
3060       format("_weak_field_map_.UnsafeArenaSwap(&other->_weak_field_map_);\n");
3061     }
3062   } else {
3063     format("GetReflection()->Swap(this, other);");
3064   }
3065 
3066   format.Outdent();
3067   format("}\n");
3068 }
3069 
GenerateMergeFrom(io::Printer * printer)3070 void MessageGenerator::GenerateMergeFrom(io::Printer* printer) {
3071   Formatter format(printer, variables_);
3072   if (HasDescriptorMethods(descriptor_->file(), options_)) {
3073     // Generate the generalized MergeFrom (aka that which takes in the Message
3074     // base class as a parameter).
3075     format(
3076         "void $classname$::MergeFrom(const ::$proto_ns$::Message& from) {\n"
3077         "// @@protoc_insertion_point(generalized_merge_from_start:"
3078         "$full_name$)\n"
3079         "  $DCHK$_NE(&from, this);\n");
3080     format.Indent();
3081 
3082     // Cast the message to the proper type. If we find that the message is
3083     // *not* of the proper type, we can still call Merge via the reflection
3084     // system, as the GOOGLE_CHECK above ensured that we have the same descriptor
3085     // for each message.
3086     format(
3087         "const $classname$* source =\n"
3088         "    ::$proto_ns$::DynamicCastToGenerated<$classname$>(\n"
3089         "        &from);\n"
3090         "if (source == nullptr) {\n"
3091         "// @@protoc_insertion_point(generalized_merge_from_cast_fail:"
3092         "$full_name$)\n"
3093         "  ::$proto_ns$::internal::ReflectionOps::Merge(from, this);\n"
3094         "} else {\n"
3095         "// @@protoc_insertion_point(generalized_merge_from_cast_success:"
3096         "$full_name$)\n"
3097         "  MergeFrom(*source);\n"
3098         "}\n");
3099 
3100     format.Outdent();
3101     format("}\n\n");
3102   } else {
3103     // Generate CheckTypeAndMergeFrom().
3104     format(
3105         "void $classname$::CheckTypeAndMergeFrom(\n"
3106         "    const ::$proto_ns$::MessageLite& from) {\n"
3107         "  MergeFrom(*::$proto_ns$::internal::DownCast<const $classname$*>(\n"
3108         "      &from));\n"
3109         "}\n"
3110         "\n");
3111   }
3112 
3113   // Generate the class-specific MergeFrom, which avoids the GOOGLE_CHECK and cast.
3114   format(
3115       "void $classname$::MergeFrom(const $classname$& from) {\n"
3116       "// @@protoc_insertion_point(class_specific_merge_from_start:"
3117       "$full_name$)\n"
3118       "  $DCHK$_NE(&from, this);\n");
3119   format.Indent();
3120 
3121   if (descriptor_->extension_range_count() > 0) {
3122     format("_extensions_.MergeFrom(from._extensions_);\n");
3123   }
3124 
3125   format(
3126       "_internal_metadata_.MergeFrom(from._internal_metadata_);\n"
3127       "$uint32$ cached_has_bits = 0;\n"
3128       "(void) cached_has_bits;\n\n");
3129 
3130   if (HasFieldPresence(descriptor_->file())) {
3131     std::vector<std::vector<const FieldDescriptor*>> chunks = CollectFields(
3132         optimized_order_, MatchRepeatedAndHasByte(&has_bit_indices_, true));
3133 
3134     ColdChunkSkipper cold_skipper(options_, chunks, has_bit_indices_,
3135                                   kColdRatio, true);
3136 
3137     // cached_has_bit_index maintains that:
3138     //   cached_has_bits = from._has_bits_[cached_has_bit_index]
3139     // for cached_has_bit_index >= 0
3140     int cached_has_bit_index = -1;
3141 
3142     for (int chunk_index = 0; chunk_index < chunks.size(); chunk_index++) {
3143       const std::vector<const FieldDescriptor*>& chunk = chunks[chunk_index];
3144       GOOGLE_CHECK(!chunk.empty());
3145 
3146       // Merge Repeated fields. These fields do not require a
3147       // check as we can simply iterate over them.
3148       if (chunk.front()->is_repeated()) {
3149         for (int i = 0; i < chunk.size(); i++) {
3150           const FieldDescriptor* field = chunk[i];
3151 
3152           const FieldGenerator& generator = field_generators_.get(field);
3153           generator.GenerateMergingCode(printer);
3154         }
3155         continue;
3156       }
3157 
3158       // Merge Optional and Required fields (after a _has_bit_ check).
3159       cold_skipper.OnStartChunk(chunk_index, cached_has_bit_index, "from.",
3160                                 printer);
3161 
3162       int last_chunk = has_bit_indices_[chunk.front()->index()] / 8;
3163       GOOGLE_DCHECK_NE(-1, last_chunk);
3164 
3165       const bool have_outer_if = chunk.size() > 1;
3166       if (have_outer_if) {
3167         uint32 last_chunk_mask = GenChunkMask(chunk, has_bit_indices_);
3168         const int count = popcnt(last_chunk_mask);
3169 
3170         // Check (up to) 8 has_bits at a time if we have more than one field in
3171         // this chunk.  Due to field layout ordering, we may check
3172         // _has_bits_[last_chunk * 8 / 32] multiple times.
3173         GOOGLE_DCHECK_LE(2, count);
3174         GOOGLE_DCHECK_GE(8, count);
3175 
3176         if (cached_has_bit_index != last_chunk / 4) {
3177           cached_has_bit_index = last_chunk / 4;
3178           format("cached_has_bits = from._has_bits_[$1$];\n",
3179                  cached_has_bit_index);
3180         }
3181         format("if (cached_has_bits & 0x$1$u) {\n",
3182                StrCat(strings::Hex(last_chunk_mask, strings::ZERO_PAD_8)));
3183         format.Indent();
3184       }
3185 
3186       // Go back and emit merging code for each of the fields we processed.
3187       bool deferred_has_bit_changes = false;
3188       for (const auto field : chunk) {
3189         const FieldGenerator& generator = field_generators_.get(field);
3190 
3191         // Attempt to use the state of cached_has_bits, if possible.
3192         int has_bit_index = has_bit_indices_[field->index()];
3193         if (!field->options().weak() &&
3194             cached_has_bit_index == has_bit_index / 32) {
3195           const std::string mask = StrCat(
3196               strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
3197 
3198           format("if (cached_has_bits & 0x$1$u) {\n", mask);
3199         } else {
3200           format("if (from.has_$1$()) {\n", FieldName(field));
3201         }
3202         format.Indent();
3203 
3204         if (have_outer_if && IsPOD(field)) {
3205           // GenerateCopyConstructorCode for enum and primitive scalar fields
3206           // does not do _has_bits_ modifications.  We defer _has_bits_
3207           // manipulation until the end of the outer if.
3208           //
3209           // This can reduce the number of loads/stores by up to 7 per 8 fields.
3210           deferred_has_bit_changes = true;
3211           generator.GenerateCopyConstructorCode(printer);
3212         } else {
3213           generator.GenerateMergingCode(printer);
3214         }
3215 
3216         format.Outdent();
3217         format("}\n");
3218       }
3219 
3220       if (have_outer_if) {
3221         if (deferred_has_bit_changes) {
3222           // Flush the has bits for the primitives we deferred.
3223           GOOGLE_CHECK_LE(0, cached_has_bit_index);
3224           format("_has_bits_[$1$] |= cached_has_bits;\n", cached_has_bit_index);
3225         }
3226 
3227         format.Outdent();
3228         format("}\n");
3229       }
3230 
3231       if (cold_skipper.OnEndChunk(chunk_index, printer)) {
3232         // Reset here as it may have been updated in just closed if statement.
3233         cached_has_bit_index = -1;
3234       }
3235     }
3236   } else {
3237     // proto3
3238     for (const auto field : optimized_order_) {
3239       const FieldGenerator& generator = field_generators_.get(field);
3240       // Merge semantics without true field presence: primitive fields are
3241       // merged only if non-zero (numeric) or non-empty (string).
3242       bool have_enclosing_if =
3243           EmitFieldNonDefaultCondition(printer, "from.", field);
3244 
3245       generator.GenerateMergingCode(printer);
3246 
3247       if (have_enclosing_if) {
3248         format.Outdent();
3249         format("}\n");
3250       }
3251     }
3252   }
3253 
3254   // Merge oneof fields. Oneof field requires oneof case check.
3255   for (auto oneof : OneOfRange(descriptor_)) {
3256     format("switch (from.$1$_case()) {\n", oneof->name());
3257     format.Indent();
3258     for (auto field : FieldRange(oneof)) {
3259       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
3260       format.Indent();
3261       field_generators_.get(field).GenerateMergingCode(printer);
3262       format("break;\n");
3263       format.Outdent();
3264       format("}\n");
3265     }
3266     format(
3267         "case $1$_NOT_SET: {\n"
3268         "  break;\n"
3269         "}\n",
3270         ToUpper(oneof->name()));
3271     format.Outdent();
3272     format("}\n");
3273   }
3274   if (num_weak_fields_) {
3275     format("_weak_field_map_.MergeFrom(from._weak_field_map_);\n");
3276   }
3277 
3278   format.Outdent();
3279   format("}\n");
3280 }
3281 
GenerateCopyFrom(io::Printer * printer)3282 void MessageGenerator::GenerateCopyFrom(io::Printer* printer) {
3283   Formatter format(printer, variables_);
3284   if (HasDescriptorMethods(descriptor_->file(), options_)) {
3285     // Generate the generalized CopyFrom (aka that which takes in the Message
3286     // base class as a parameter).
3287     format(
3288         "void $classname$::CopyFrom(const ::$proto_ns$::Message& from) {\n"
3289         "// @@protoc_insertion_point(generalized_copy_from_start:"
3290         "$full_name$)\n");
3291     format.Indent();
3292 
3293     format("if (&from == this) return;\n");
3294 
3295     if (!options_.opensource_runtime) {
3296       // This check is disabled in the opensource release because we're
3297       // concerned that many users do not define NDEBUG in their release
3298       // builds.
3299       format(
3300           "#ifndef NDEBUG\n"
3301           "size_t from_size = from.ByteSizeLong();\n"
3302           "#endif\n"
3303           "Clear();\n"
3304           "#ifndef NDEBUG\n"
3305           "$CHK$_EQ(from_size, from.ByteSizeLong())\n"
3306           "  << \"Source of CopyFrom changed when clearing target.  Either \"\n"
3307           "  << \"source is a nested message in target (not allowed), or \"\n"
3308           "  << \"another thread is modifying the source.\";\n"
3309           "#endif\n");
3310     } else {
3311       format("Clear();\n");
3312     }
3313     format("MergeFrom(from);\n");
3314 
3315     format.Outdent();
3316     format("}\n\n");
3317   }
3318 
3319   // Generate the class-specific CopyFrom.
3320   format(
3321       "void $classname$::CopyFrom(const $classname$& from) {\n"
3322       "// @@protoc_insertion_point(class_specific_copy_from_start:"
3323       "$full_name$)\n");
3324   format.Indent();
3325 
3326   format("if (&from == this) return;\n");
3327 
3328   if (!options_.opensource_runtime) {
3329     // This check is disabled in the opensource release because we're
3330     // concerned that many users do not define NDEBUG in their release builds.
3331     format(
3332         "#ifndef NDEBUG\n"
3333         "size_t from_size = from.ByteSizeLong();\n"
3334         "#endif\n"
3335         "Clear();\n"
3336         "#ifndef NDEBUG\n"
3337         "$CHK$_EQ(from_size, from.ByteSizeLong())\n"
3338         "  << \"Source of CopyFrom changed when clearing target.  Either \"\n"
3339         "  << \"source is a nested message in target (not allowed), or \"\n"
3340         "  << \"another thread is modifying the source.\";\n"
3341         "#endif\n");
3342   } else {
3343     format("Clear();\n");
3344   }
3345   format("MergeFrom(from);\n");
3346 
3347   format.Outdent();
3348   format("}\n");
3349 }
3350 
GenerateMergeFromCodedStream(io::Printer * printer)3351 void MessageGenerator::GenerateMergeFromCodedStream(io::Printer* printer) {
3352   std::map<std::string, std::string> vars = variables_;
3353   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3354   Formatter format(printer, vars);
3355   if (descriptor_->options().message_set_wire_format()) {
3356     // Special-case MessageSet.
3357     format(
3358         "#if $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n"
3359         "const char* $classname$::_InternalParse(const char* ptr,\n"
3360         "                  ::$proto_ns$::internal::ParseContext* ctx) {\n"
3361         "  return _extensions_.ParseMessageSet(ptr, \n"
3362         "      internal_default_instance(), &_internal_metadata_, ctx);\n"
3363         "}\n"
3364         "#else\n"
3365         "bool $classname$::MergePartialFromCodedStream(\n"
3366         "    ::$proto_ns$::io::CodedInputStream* input) {\n"
3367         "  return _extensions_.ParseMessageSet(input,\n"
3368         "      internal_default_instance(), $mutable_unknown_fields$);\n"
3369         "}\n"
3370         "#endif  // $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n");
3371     return;
3372   }
3373   format("#if $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n");
3374   GenerateParserLoop(descriptor_, max_has_bit_index_, options_, scc_analyzer_,
3375                      printer);
3376   format("#else  // $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n");
3377   std::vector<const FieldDescriptor*> ordered_fields =
3378       SortFieldsByNumber(descriptor_);
3379 
3380   format(
3381       "bool $classname$::MergePartialFromCodedStream(\n"
3382       "    ::$proto_ns$::io::CodedInputStream* input) {\n");
3383 
3384   if (table_driven_) {
3385     format.Indent();
3386 
3387     const std::string lite =
3388         UseUnknownFieldSet(descriptor_->file(), options_) ? "" : "Lite";
3389 
3390     format(
3391         "return ::$proto_ns$::internal::MergePartialFromCodedStream$1$(\n"
3392         "    this, ::$tablename$::schema[\n"
3393         "      $classname$::kIndexInFileMessages], input);\n",
3394         lite);
3395 
3396     format.Outdent();
3397 
3398     format("}\n");
3399     format("#endif  // $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n");
3400     return;
3401   }
3402 
3403   if (SupportsArenas(descriptor_)) {
3404     for (auto field : ordered_fields) {
3405       const FieldGenerator& field_generator = field_generators_.get(field);
3406       if (field_generator.MergeFromCodedStreamNeedsArena()) {
3407         format("  ::$proto_ns$::Arena* arena = GetArenaNoVirtual();\n");
3408         break;
3409       }
3410     }
3411   }
3412 
3413   format(
3414       "#define DO_(EXPRESSION) if "
3415       "(!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure\n"
3416       "  $uint32$ tag;\n");
3417 
3418   if (!UseUnknownFieldSet(descriptor_->file(), options_)) {
3419     format(
3420         "  ::$proto_ns$::internal::LiteUnknownFieldSetter "
3421         "unknown_fields_setter(\n"
3422         "      &_internal_metadata_);\n"
3423         "  ::$proto_ns$::io::StringOutputStream unknown_fields_output(\n"
3424         "      unknown_fields_setter.buffer());\n"
3425         "  ::$proto_ns$::io::CodedOutputStream unknown_fields_stream(\n"
3426         "      &unknown_fields_output, false);\n");
3427   }
3428 
3429   format("  // @@protoc_insertion_point(parse_start:$full_name$)\n");
3430 
3431   format.Indent();
3432   format("for (;;) {\n");
3433   format.Indent();
3434 
3435   // To calculate the maximum tag to expect, we look at the highest-numbered
3436   // field. We need to be prepared to handle more than one wire type if that
3437   // field is a packable repeated field, so to simplify things we assume the
3438   // highest possible wire type of 5.
3439   uint32 maxtag =
3440       ordered_fields.empty() ? 0 : ordered_fields.back()->number() * 8 + 5;
3441   const int kCutoff0 = 127;               // fits in 1-byte varint
3442   const int kCutoff1 = (127 << 7) + 127;  // fits in 2-byte varint
3443 
3444   // We need to capture the last tag when parsing if this is a Group type, as
3445   // our caller will verify (via CodedInputStream::LastTagWas) that the correct
3446   // closing tag was received.
3447   bool capture_last_tag = false;
3448   const Descriptor* parent = descriptor_->containing_type();
3449   if (parent) {
3450     for (auto field : FieldRange(parent)) {
3451       if (field->type() == FieldDescriptor::TYPE_GROUP &&
3452           field->message_type() == descriptor_) {
3453         capture_last_tag = true;
3454         break;
3455       }
3456     }
3457 
3458     for (int i = 0; i < parent->extension_count(); i++) {
3459       const FieldDescriptor* field = parent->extension(i);
3460       if (field->type() == FieldDescriptor::TYPE_GROUP &&
3461           field->message_type() == descriptor_) {
3462         capture_last_tag = true;
3463         break;
3464       }
3465     }
3466   }
3467 
3468   for (int i = 0; i < descriptor_->file()->extension_count(); i++) {
3469     const FieldDescriptor* field = descriptor_->file()->extension(i);
3470     if (field->type() == FieldDescriptor::TYPE_GROUP &&
3471         field->message_type() == descriptor_) {
3472       capture_last_tag = true;
3473       break;
3474     }
3475   }
3476 
3477   format(
3478       "::std::pair<$uint32$, bool> p = "
3479       "input->ReadTagWithCutoffNoLastTag($1$u);\n"
3480       "tag = p.first;\n"
3481       "if (!p.second) goto handle_unusual;\n",
3482       maxtag <= kCutoff0 ? kCutoff0 : (maxtag <= kCutoff1 ? kCutoff1 : maxtag));
3483 
3484   if (descriptor_->field_count() > 0) {
3485     // We don't even want to print the switch() if we have no fields because
3486     // MSVC dislikes switch() statements that contain only a default value.
3487 
3488     // Note:  If we just switched on the tag rather than the field number, we
3489     // could avoid the need for the if() to check the wire type at the beginning
3490     // of each case.  However, this is actually a bit slower in practice as it
3491     // creates a jump table that is 8x larger and sparser, and meanwhile the
3492     // if()s are highly predictable.
3493     //
3494     // Historically, we inserted checks to peek at the next tag on the wire and
3495     // jump directly to the next case statement.  While this avoids the jump
3496     // table that the switch uses, it greatly increases code size (20-60%) and
3497     // inserts branches that may fail (especially for real world protos that
3498     // interleave--in field number order--hot and cold fields).  Loadtests
3499     // confirmed that removing this optimization is performance neutral.
3500     if (num_weak_fields_ > 0) {
3501       format("uint32 weak_offset;\n");
3502     }
3503     format(
3504         "switch (::$proto_ns$::internal::WireFormatLite::"
3505         "GetTagFieldNumber(tag)) {\n");
3506 
3507     format.Indent();
3508 
3509     for (auto field : ordered_fields) {
3510       PrintFieldComment(format, field);
3511       if (IsWeak(field, options_)) {
3512         format(
3513             "case $1$:\n"
3514             "  weak_offset = offsetof($classname$DefaultTypeInternal, $2$_);\n"
3515             "  goto handle_weak_field_map;\n",
3516             field->number(), FieldName(field));
3517         continue;
3518       }
3519 
3520       format("case $1$: {\n", field->number());
3521       format.Indent();
3522       const FieldGenerator& field_generator = field_generators_.get(field);
3523 
3524       // Emit code to parse the common, expected case.
3525       // MSVC is warning about truncating constant in the static_cast so
3526       // we truncate the tag explicitly.
3527       format("if (static_cast< $uint8$>(tag) == ($1$ & 0xFF)) {\n",
3528              WireFormat::MakeTag(field));
3529 
3530       format.Indent();
3531       if (field->is_packed()) {
3532         field_generator.GenerateMergeFromCodedStreamWithPacking(printer);
3533       } else {
3534         field_generator.GenerateMergeFromCodedStream(printer);
3535       }
3536       format.Outdent();
3537 
3538       // Emit code to parse unexpectedly packed or unpacked values.
3539       if (field->is_packed()) {
3540         internal::WireFormatLite::WireType wiretype =
3541             WireFormat::WireTypeForFieldType(field->type());
3542         const uint32 tag =
3543             internal::WireFormatLite::MakeTag(field->number(), wiretype);
3544         format("} else if (static_cast< $uint8$>(tag) == ($1$ & 0xFF)) {\n",
3545                tag);
3546 
3547         format.Indent();
3548         field_generator.GenerateMergeFromCodedStream(printer);
3549         format.Outdent();
3550       } else if (field->is_packable() && !field->is_packed()) {
3551         internal::WireFormatLite::WireType wiretype =
3552             internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
3553         const uint32 tag =
3554             internal::WireFormatLite::MakeTag(field->number(), wiretype);
3555         format("} else if (static_cast< $uint8$>(tag) == ($1$ & 0xFF)) {\n",
3556                tag);
3557         format.Indent();
3558         field_generator.GenerateMergeFromCodedStreamWithPacking(printer);
3559         format.Outdent();
3560       }
3561 
3562       format(
3563           "} else {\n"
3564           "  goto handle_unusual;\n"
3565           "}\n");
3566 
3567       format("break;\n");
3568 
3569       format.Outdent();
3570       format("}\n\n");
3571     }
3572     if (num_weak_fields_ > 0) {
3573       format("handle_weak_field_map: {\n");
3574       format.Indent();
3575 
3576       format(
3577           "if ((tag & 0x7) != 2) goto handle_unusual;\n"
3578           "DO_(_weak_field_map_.ReadMessage(input, tag >> 3,\n"
3579           "    &_$classname$_default_instance_, weak_offset));\n");
3580       format("break;\n");
3581       format.Outdent();
3582       format("}\n\n");
3583     }
3584     format("default: {\n");
3585     format.Indent();
3586   }
3587 
3588   format.Outdent();
3589   format("handle_unusual:\n");
3590   format.Indent();
3591   // If tag is 0 or an end-group tag then this must be the end of the message.
3592   if (capture_last_tag) {
3593     format(
3594         "if (tag == 0 ||\n"
3595         "    ::$proto_ns$::internal::WireFormatLite::GetTagWireType(tag) ==\n"
3596         "    ::$proto_ns$::internal::WireFormatLite::WIRETYPE_END_GROUP) {\n"
3597         "  input->SetLastTag(tag);\n"
3598         "  goto success;\n"
3599         "}\n");
3600   } else {
3601     format(
3602         "if (tag == 0) {\n"
3603         "  goto success;\n"
3604         "}\n");
3605   }
3606 
3607   // Handle extension ranges.
3608   if (descriptor_->extension_range_count() > 0) {
3609     format("if (");
3610     for (int i = 0; i < descriptor_->extension_range_count(); i++) {
3611       const Descriptor::ExtensionRange* range = descriptor_->extension_range(i);
3612       if (i > 0) format(" ||\n    ");
3613 
3614       uint32 start_tag = WireFormatLite::MakeTag(
3615           range->start, static_cast<WireFormatLite::WireType>(0));
3616       uint32 end_tag = WireFormatLite::MakeTag(
3617           range->end, static_cast<WireFormatLite::WireType>(0));
3618 
3619       if (range->end > FieldDescriptor::kMaxNumber) {
3620         format("($1$u <= tag)", start_tag);
3621       } else {
3622         format("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
3623       }
3624     }
3625     format(") {\n");
3626     if (UseUnknownFieldSet(descriptor_->file(), options_)) {
3627       format(
3628           "  DO_(_extensions_.ParseField(tag, input,\n"
3629           "      internal_default_instance(),\n"
3630           "      $mutable_unknown_fields$));\n");
3631     } else {
3632       format(
3633           "  DO_(_extensions_.ParseField(tag, input,\n"
3634           "      internal_default_instance(),\n"
3635           "      &unknown_fields_stream));\n");
3636     }
3637     format(
3638         "  continue;\n"
3639         "}\n");
3640   }
3641 
3642   // We really don't recognize this tag.  Skip it.
3643   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
3644     format(
3645         "DO_(::$proto_ns$::internal::WireFormat::SkipField(\n"
3646         "      input, tag, $mutable_unknown_fields$));\n");
3647   } else {
3648     format(
3649         "DO_(::$proto_ns$::internal::WireFormatLite::SkipField(\n"
3650         "    input, tag, &unknown_fields_stream));\n");
3651   }
3652 
3653   if (descriptor_->field_count() > 0) {
3654     format("break;\n");
3655     format.Outdent();
3656     format("}\n");  // default:
3657     format.Outdent();
3658     format("}\n");  // switch
3659   }
3660 
3661   format.Outdent();
3662   format.Outdent();
3663   format(
3664       "  }\n"  // for (;;)
3665       "success:\n"
3666       "  // @@protoc_insertion_point(parse_success:$full_name$)\n"
3667       "  return true;\n"
3668       "failure:\n"
3669       "  // @@protoc_insertion_point(parse_failure:$full_name$)\n"
3670       "  return false;\n"
3671       "#undef DO_\n"
3672       "}\n");
3673   format("#endif  // $GOOGLE_PROTOBUF$_ENABLE_EXPERIMENTAL_PARSER\n");
3674 }
3675 
GenerateSerializeOneofFields(io::Printer * printer,const std::vector<const FieldDescriptor * > & fields,bool to_array)3676 void MessageGenerator::GenerateSerializeOneofFields(
3677     io::Printer* printer, const std::vector<const FieldDescriptor*>& fields,
3678     bool to_array) {
3679   Formatter format(printer, variables_);
3680   GOOGLE_CHECK(!fields.empty());
3681   if (fields.size() == 1) {
3682     GenerateSerializeOneField(printer, fields[0], to_array, -1);
3683     return;
3684   }
3685   // We have multiple mutually exclusive choices.  Emit a switch statement.
3686   const OneofDescriptor* oneof = fields[0]->containing_oneof();
3687   format("switch ($1$_case()) {\n", oneof->name());
3688   format.Indent();
3689   for (auto field : fields) {
3690     format("case k$1$:\n", UnderscoresToCamelCase(field->name(), true));
3691     format.Indent();
3692     if (to_array) {
3693       field_generators_.get(field).GenerateSerializeWithCachedSizesToArray(
3694           printer);
3695     } else {
3696       field_generators_.get(field).GenerateSerializeWithCachedSizes(printer);
3697     }
3698     format("break;\n");
3699     format.Outdent();
3700   }
3701   format.Outdent();
3702   // Doing nothing is an option.
3703   format(
3704       "  default: ;\n"
3705       "}\n");
3706 }
3707 
GenerateSerializeOneField(io::Printer * printer,const FieldDescriptor * field,bool to_array,int cached_has_bits_index)3708 void MessageGenerator::GenerateSerializeOneField(io::Printer* printer,
3709                                                  const FieldDescriptor* field,
3710                                                  bool to_array,
3711                                                  int cached_has_bits_index) {
3712   Formatter format(printer, variables_);
3713   if (!field->options().weak()) {
3714     // For weakfields, PrintFieldComment is called during iteration.
3715     PrintFieldComment(format, field);
3716   }
3717 
3718   bool have_enclosing_if = false;
3719   if (field->options().weak()) {
3720   } else if (!field->is_repeated() && HasFieldPresence(descriptor_->file())) {
3721     // Attempt to use the state of cached_has_bits, if possible.
3722     int has_bit_index = has_bit_indices_[field->index()];
3723     if (cached_has_bits_index == has_bit_index / 32) {
3724       const std::string mask =
3725           StrCat(strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
3726 
3727       format("if (cached_has_bits & 0x$1$u) {\n", mask);
3728     } else {
3729       format("if (has_$1$()) {\n", FieldName(field));
3730     }
3731 
3732     format.Indent();
3733     have_enclosing_if = true;
3734   } else if (!HasFieldPresence(descriptor_->file())) {
3735     have_enclosing_if = EmitFieldNonDefaultCondition(printer, "this->", field);
3736   }
3737 
3738   if (to_array) {
3739     field_generators_.get(field).GenerateSerializeWithCachedSizesToArray(
3740         printer);
3741   } else {
3742     field_generators_.get(field).GenerateSerializeWithCachedSizes(printer);
3743   }
3744 
3745   if (have_enclosing_if) {
3746     format.Outdent();
3747     format("}\n");
3748   }
3749   format("\n");
3750 }
3751 
GenerateSerializeOneExtensionRange(io::Printer * printer,const Descriptor::ExtensionRange * range,bool to_array)3752 void MessageGenerator::GenerateSerializeOneExtensionRange(
3753     io::Printer* printer, const Descriptor::ExtensionRange* range,
3754     bool to_array) {
3755   std::map<std::string, std::string> vars;
3756   vars["start"] = StrCat(range->start);
3757   vars["end"] = StrCat(range->end);
3758   Formatter format(printer, vars);
3759   format("// Extension range [$start$, $end$)\n");
3760   if (to_array) {
3761     format(
3762         "target = _extensions_.InternalSerializeWithCachedSizesToArray(\n"
3763         "    $start$, $end$, target);\n\n");
3764   } else {
3765     format(
3766         "_extensions_.SerializeWithCachedSizes($start$, $end$, output);\n"
3767         "\n");
3768   }
3769 }
3770 
GenerateSerializeWithCachedSizes(io::Printer * printer)3771 void MessageGenerator::GenerateSerializeWithCachedSizes(io::Printer* printer) {
3772   Formatter format(printer, variables_);
3773   if (descriptor_->options().message_set_wire_format()) {
3774     // Special-case MessageSet.
3775     format(
3776         "void $classname$::SerializeWithCachedSizes(\n"
3777         "    ::$proto_ns$::io::CodedOutputStream* output) const {\n"
3778         "  _extensions_.SerializeMessageSetWithCachedSizes(output);\n");
3779     std::map<std::string, std::string> vars;
3780     SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3781     format.AddMap(vars);
3782     format(
3783         "  "
3784         "::$proto_ns$::internal::SerializeUnknownMessageSetItems(\n"
3785         "      $unknown_fields$, output);\n");
3786     format("}\n");
3787     return;
3788   }
3789   if (options_.table_driven_serialization) return;
3790 
3791   format(
3792       "void $classname$::SerializeWithCachedSizes(\n"
3793       "    ::$proto_ns$::io::CodedOutputStream* output) const {\n");
3794   format.Indent();
3795 
3796   format("// @@protoc_insertion_point(serialize_start:$full_name$)\n");
3797 
3798   GenerateSerializeWithCachedSizesBody(printer, false);
3799 
3800   format("// @@protoc_insertion_point(serialize_end:$full_name$)\n");
3801 
3802   format.Outdent();
3803   format("}\n");
3804 }
3805 
GenerateSerializeWithCachedSizesToArray(io::Printer * printer)3806 void MessageGenerator::GenerateSerializeWithCachedSizesToArray(
3807     io::Printer* printer) {
3808   Formatter format(printer, variables_);
3809   if (descriptor_->options().message_set_wire_format()) {
3810     // Special-case MessageSet.
3811     format(
3812         "$uint8$* $classname$::InternalSerializeWithCachedSizesToArray(\n"
3813         "    $uint8$* target) const {\n"
3814         "  target = _extensions_."
3815         "InternalSerializeMessageSetWithCachedSizesToArray(target);\n");
3816     GOOGLE_CHECK(UseUnknownFieldSet(descriptor_->file(), options_));
3817     std::map<std::string, std::string> vars;
3818     SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3819     format.AddMap(vars);
3820     format(
3821         "  target = ::$proto_ns$::internal::WireFormat::\n"
3822         "             SerializeUnknownMessageSetItemsToArray(\n"
3823         "               $unknown_fields$, target);\n");
3824     format(
3825         "  return target;\n"
3826         "}\n");
3827     return;
3828   }
3829 
3830   format(
3831       "$uint8$* $classname$::InternalSerializeWithCachedSizesToArray(\n"
3832       "    $uint8$* target) const {\n");
3833   format.Indent();
3834 
3835   format("// @@protoc_insertion_point(serialize_to_array_start:$full_name$)\n");
3836 
3837   GenerateSerializeWithCachedSizesBody(printer, true);
3838 
3839   format("// @@protoc_insertion_point(serialize_to_array_end:$full_name$)\n");
3840 
3841   format.Outdent();
3842   format(
3843       "  return target;\n"
3844       "}\n");
3845 }
3846 
GenerateSerializeWithCachedSizesBody(io::Printer * printer,bool to_array)3847 void MessageGenerator::GenerateSerializeWithCachedSizesBody(
3848     io::Printer* printer, bool to_array) {
3849   Formatter format(printer, variables_);
3850   // If there are multiple fields in a row from the same oneof then we
3851   // coalesce them and emit a switch statement.  This is more efficient
3852   // because it lets the C++ compiler know this is a "at most one can happen"
3853   // situation. If we emitted "if (has_x()) ...; if (has_y()) ..." the C++
3854   // compiler's emitted code might check has_y() even when has_x() is true.
3855   class LazySerializerEmitter {
3856    public:
3857     LazySerializerEmitter(MessageGenerator* mg, io::Printer* printer,
3858                           bool to_array)
3859         : mg_(mg),
3860           format_(printer),
3861           to_array_(to_array),
3862           eager_(!HasFieldPresence(mg->descriptor_->file())),
3863           cached_has_bit_index_(-1) {}
3864 
3865     ~LazySerializerEmitter() { Flush(); }
3866 
3867     // If conditions allow, try to accumulate a run of fields from the same
3868     // oneof, and handle them at the next Flush().
3869     void Emit(const FieldDescriptor* field) {
3870       if (eager_ || MustFlush(field)) {
3871         Flush();
3872       }
3873       if (field->containing_oneof() == NULL) {
3874         // TODO(ckennelly): Defer non-oneof fields similarly to oneof fields.
3875 
3876         if (!field->options().weak() && !field->is_repeated() && !eager_) {
3877           // We speculatively load the entire _has_bits_[index] contents, even
3878           // if it is for only one field.  Deferring non-oneof emitting would
3879           // allow us to determine whether this is going to be useful.
3880           int has_bit_index = mg_->has_bit_indices_[field->index()];
3881           if (cached_has_bit_index_ != has_bit_index / 32) {
3882             // Reload.
3883             int new_index = has_bit_index / 32;
3884 
3885             format_("cached_has_bits = _has_bits_[$1$];\n", new_index);
3886 
3887             cached_has_bit_index_ = new_index;
3888           }
3889         }
3890 
3891         mg_->GenerateSerializeOneField(format_.printer(), field, to_array_,
3892                                        cached_has_bit_index_);
3893       } else {
3894         v_.push_back(field);
3895       }
3896     }
3897 
3898     void Flush() {
3899       if (!v_.empty()) {
3900         mg_->GenerateSerializeOneofFields(format_.printer(), v_, to_array_);
3901         v_.clear();
3902       }
3903     }
3904 
3905    private:
3906     // If we have multiple fields in v_ then they all must be from the same
3907     // oneof.  Would adding field to v_ break that invariant?
3908     bool MustFlush(const FieldDescriptor* field) {
3909       return !v_.empty() &&
3910              v_[0]->containing_oneof() != field->containing_oneof();
3911     }
3912 
3913     MessageGenerator* mg_;
3914     Formatter format_;
3915     const bool to_array_;
3916     const bool eager_;
3917     std::vector<const FieldDescriptor*> v_;
3918 
3919     // cached_has_bit_index_ maintains that:
3920     //   cached_has_bits = from._has_bits_[cached_has_bit_index_]
3921     // for cached_has_bit_index_ >= 0
3922     int cached_has_bit_index_;
3923   };
3924 
3925   std::vector<const FieldDescriptor*> ordered_fields =
3926       SortFieldsByNumber(descriptor_);
3927 
3928   std::vector<const Descriptor::ExtensionRange*> sorted_extensions;
3929   sorted_extensions.reserve(descriptor_->extension_range_count());
3930   for (int i = 0; i < descriptor_->extension_range_count(); ++i) {
3931     sorted_extensions.push_back(descriptor_->extension_range(i));
3932   }
3933   std::sort(sorted_extensions.begin(), sorted_extensions.end(),
3934             ExtensionRangeSorter());
3935   if (num_weak_fields_) {
3936     format(
3937         "::$proto_ns$::internal::WeakFieldMap::FieldWriter field_writer("
3938         "_weak_field_map_);\n");
3939   }
3940 
3941   format(
3942       "$uint32$ cached_has_bits = 0;\n"
3943       "(void) cached_has_bits;\n\n");
3944 
3945   // Merge the fields and the extension ranges, both sorted by field number.
3946   {
3947     LazySerializerEmitter e(this, printer, to_array);
3948     const FieldDescriptor* last_weak_field = nullptr;
3949     int i, j;
3950     for (i = 0, j = 0;
3951          i < ordered_fields.size() || j < sorted_extensions.size();) {
3952       if ((j == sorted_extensions.size()) ||
3953           (i < descriptor_->field_count() &&
3954            ordered_fields[i]->number() < sorted_extensions[j]->start)) {
3955         const FieldDescriptor* field = ordered_fields[i++];
3956         if (field->options().weak()) {
3957           last_weak_field = field;
3958           PrintFieldComment(format, field);
3959         } else {
3960           if (last_weak_field != nullptr) {
3961             e.Emit(last_weak_field);
3962             last_weak_field = nullptr;
3963           }
3964           e.Emit(field);
3965         }
3966       } else {
3967         if (last_weak_field != nullptr) {
3968           e.Emit(last_weak_field);
3969           last_weak_field = nullptr;
3970         }
3971         e.Flush();
3972         GenerateSerializeOneExtensionRange(printer, sorted_extensions[j++],
3973                                            to_array);
3974       }
3975     }
3976     if (last_weak_field != nullptr) {
3977       e.Emit(last_weak_field);
3978     }
3979   }
3980 
3981   std::map<std::string, std::string> vars;
3982   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3983   format.AddMap(vars);
3984   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
3985     format("if ($have_unknown_fields$) {\n");
3986     format.Indent();
3987     if (to_array) {
3988       format(
3989           "target = "
3990           "::$proto_ns$::internal::WireFormat::SerializeUnknownFieldsToArray(\n"
3991           "    $unknown_fields$, target);\n");
3992     } else {
3993       format(
3994           "::$proto_ns$::internal::WireFormat::SerializeUnknownFields(\n"
3995           "    $unknown_fields$, output);\n");
3996     }
3997     format.Outdent();
3998 
3999     format("}\n");
4000   } else {
4001     format(
4002         "output->WriteRaw($unknown_fields$.data(),\n"
4003         "                 static_cast<int>($unknown_fields$.size()));\n");
4004   }
4005 }
4006 
RequiredFieldsBitMask() const4007 std::vector<uint32> MessageGenerator::RequiredFieldsBitMask() const {
4008   const int array_size = HasBitsSize();
4009   std::vector<uint32> masks(array_size, 0);
4010 
4011   for (auto field : FieldRange(descriptor_)) {
4012     if (!field->is_required()) {
4013       continue;
4014     }
4015 
4016     const int has_bit_index = has_bit_indices_[field->index()];
4017     masks[has_bit_index / 32] |= static_cast<uint32>(1) << (has_bit_index % 32);
4018   }
4019   return masks;
4020 }
4021 
4022 // Create an expression that evaluates to
4023 //  "for all i, (_has_bits_[i] & masks[i]) == masks[i]"
4024 // masks is allowed to be shorter than _has_bits_, but at least one element of
4025 // masks must be non-zero.
ConditionalToCheckBitmasks(const std::vector<uint32> & masks)4026 static std::string ConditionalToCheckBitmasks(
4027     const std::vector<uint32>& masks) {
4028   std::vector<std::string> parts;
4029   for (int i = 0; i < masks.size(); i++) {
4030     if (masks[i] == 0) continue;
4031     std::string m = StrCat("0x", strings::Hex(masks[i], strings::ZERO_PAD_8));
4032     // Each xor evaluates to 0 if the expected bits are present.
4033     parts.push_back(
4034         StrCat("((_has_bits_[", i, "] & ", m, ") ^ ", m, ")"));
4035   }
4036   GOOGLE_CHECK(!parts.empty());
4037   // If we have multiple parts, each expected to be 0, then bitwise-or them.
4038   std::string result =
4039       parts.size() == 1
4040           ? parts[0]
4041           : StrCat("(", Join(parts, "\n       | "), ")");
4042   return result + " == 0";
4043 }
4044 
GenerateByteSize(io::Printer * printer)4045 void MessageGenerator::GenerateByteSize(io::Printer* printer) {
4046   Formatter format(printer, variables_);
4047 
4048   if (descriptor_->options().message_set_wire_format()) {
4049     // Special-case MessageSet.
4050     std::map<std::string, std::string> vars;
4051     SetUnknkownFieldsVariable(descriptor_, options_, &vars);
4052     format.AddMap(vars);
4053     format(
4054         "size_t $classname$::ByteSizeLong() const {\n"
4055         "// @@protoc_insertion_point(message_set_byte_size_start:$full_name$)\n"
4056         "  size_t total_size = _extensions_.MessageSetByteSize();\n"
4057         "  if ($have_unknown_fields$) {\n"
4058         "    total_size += ::$proto_ns$::internal::\n"
4059         "        ComputeUnknownMessageSetItemsSize($unknown_fields$);\n"
4060         "  }\n"
4061         "  int cached_size = "
4062         "::$proto_ns$::internal::ToCachedSize(total_size);\n"
4063         "  SetCachedSize(cached_size);\n"
4064         "  return total_size;\n"
4065         "}\n");
4066     return;
4067   }
4068 
4069   if (num_required_fields_ > 1 && HasFieldPresence(descriptor_->file())) {
4070     // Emit a function (rarely used, we hope) that handles the required fields
4071     // by checking for each one individually.
4072     format(
4073         "size_t $classname$::RequiredFieldsByteSizeFallback() const {\n"
4074         "// @@protoc_insertion_point(required_fields_byte_size_fallback_start:"
4075         "$full_name$)\n");
4076     format.Indent();
4077     format("size_t total_size = 0;\n");
4078     for (auto field : optimized_order_) {
4079       if (field->is_required()) {
4080         format(
4081             "\n"
4082             "if (has_$1$()) {\n",
4083             FieldName(field));
4084         format.Indent();
4085         PrintFieldComment(format, field);
4086         field_generators_.get(field).GenerateByteSize(printer);
4087         format.Outdent();
4088         format("}\n");
4089       }
4090     }
4091     format(
4092         "\n"
4093         "return total_size;\n");
4094     format.Outdent();
4095     format("}\n");
4096   }
4097 
4098   format(
4099       "size_t $classname$::ByteSizeLong() const {\n"
4100       "// @@protoc_insertion_point(message_byte_size_start:$full_name$)\n");
4101   format.Indent();
4102   format(
4103       "size_t total_size = 0;\n"
4104       "\n");
4105 
4106   if (descriptor_->extension_range_count() > 0) {
4107     format(
4108         "total_size += _extensions_.ByteSize();\n"
4109         "\n");
4110   }
4111 
4112   std::map<std::string, std::string> vars;
4113   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
4114   format.AddMap(vars);
4115   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
4116     format(
4117         "if ($have_unknown_fields$) {\n"
4118         "  total_size +=\n"
4119         "    ::$proto_ns$::internal::WireFormat::ComputeUnknownFieldsSize(\n"
4120         "      $unknown_fields$);\n"
4121         "}\n");
4122   } else {
4123     format(
4124         "total_size += $unknown_fields$.size();\n"
4125         "\n");
4126   }
4127 
4128   // Handle required fields (if any).  We expect all of them to be
4129   // present, so emit one conditional that checks for that.  If they are all
4130   // present then the fast path executes; otherwise the slow path executes.
4131   if (num_required_fields_ > 1 && HasFieldPresence(descriptor_->file())) {
4132     // The fast path works if all required fields are present.
4133     const std::vector<uint32> masks_for_has_bits = RequiredFieldsBitMask();
4134     format("if ($1$) {  // All required fields are present.\n",
4135            ConditionalToCheckBitmasks(masks_for_has_bits));
4136     format.Indent();
4137     // Oneof fields cannot be required, so optimized_order_ contains all of the
4138     // fields that we need to potentially emit.
4139     for (auto field : optimized_order_) {
4140       if (!field->is_required()) continue;
4141       PrintFieldComment(format, field);
4142       field_generators_.get(field).GenerateByteSize(printer);
4143       format("\n");
4144     }
4145     format.Outdent();
4146     format(
4147         "} else {\n"  // the slow path
4148         "  total_size += RequiredFieldsByteSizeFallback();\n"
4149         "}\n");
4150   } else {
4151     // num_required_fields_ <= 1: no need to be tricky
4152     for (auto field : optimized_order_) {
4153       if (!field->is_required()) continue;
4154       PrintFieldComment(format, field);
4155       format("if (has_$1$()) {\n", FieldName(field));
4156       format.Indent();
4157       field_generators_.get(field).GenerateByteSize(printer);
4158       format.Outdent();
4159       format("}\n");
4160     }
4161   }
4162 
4163   std::vector<std::vector<const FieldDescriptor*>> chunks = CollectFields(
4164       optimized_order_,
4165       MatchRepeatedAndHasByteAndRequired(
4166           &has_bit_indices_, HasFieldPresence(descriptor_->file())));
4167 
4168   // Remove chunks with required fields.
4169   chunks.erase(std::remove_if(chunks.begin(), chunks.end(), IsRequired),
4170                chunks.end());
4171 
4172   ColdChunkSkipper cold_skipper(options_, chunks, has_bit_indices_, kColdRatio,
4173                                 HasFieldPresence(descriptor_->file()));
4174 
4175   format(
4176       "$uint32$ cached_has_bits = 0;\n"
4177       "// Prevent compiler warnings about cached_has_bits being unused\n"
4178       "(void) cached_has_bits;\n\n");
4179 
4180   int cached_has_bit_index = -1;
4181 
4182   for (int chunk_index = 0; chunk_index < chunks.size(); chunk_index++) {
4183     const std::vector<const FieldDescriptor*>& chunk = chunks[chunk_index];
4184     GOOGLE_CHECK(!chunk.empty());
4185 
4186     // Handle repeated fields.
4187     if (chunk.front()->is_repeated()) {
4188       for (int i = 0; i < chunk.size(); i++) {
4189         const FieldDescriptor* field = chunk[i];
4190 
4191         PrintFieldComment(format, field);
4192         const FieldGenerator& generator = field_generators_.get(field);
4193         generator.GenerateByteSize(printer);
4194         format("\n");
4195       }
4196       continue;
4197     }
4198 
4199     cold_skipper.OnStartChunk(chunk_index, cached_has_bit_index, "", printer);
4200 
4201     // Handle optional (non-repeated/oneof) fields.
4202     //
4203     // These are handled in chunks of 8.  The first chunk is
4204     // the non-requireds-non-repeateds-non-unions-non-extensions in
4205     //  descriptor_->field(0), descriptor_->field(1), ... descriptor_->field(7),
4206     // and the second chunk is the same for
4207     //  descriptor_->field(8), descriptor_->field(9), ...
4208     //  descriptor_->field(15),
4209     // etc.
4210     int last_chunk = HasFieldPresence(descriptor_->file())
4211                          ? has_bit_indices_[chunk.front()->index()] / 8
4212                          : 0;
4213     GOOGLE_DCHECK_NE(-1, last_chunk);
4214 
4215     const bool have_outer_if =
4216         HasFieldPresence(descriptor_->file()) && chunk.size() > 1;
4217 
4218     if (have_outer_if) {
4219       uint32 last_chunk_mask = GenChunkMask(chunk, has_bit_indices_);
4220       const int count = popcnt(last_chunk_mask);
4221 
4222       // Check (up to) 8 has_bits at a time if we have more than one field in
4223       // this chunk.  Due to field layout ordering, we may check
4224       // _has_bits_[last_chunk * 8 / 32] multiple times.
4225       GOOGLE_DCHECK_LE(2, count);
4226       GOOGLE_DCHECK_GE(8, count);
4227 
4228       if (cached_has_bit_index != last_chunk / 4) {
4229         cached_has_bit_index = last_chunk / 4;
4230         format("cached_has_bits = _has_bits_[$1$];\n", cached_has_bit_index);
4231       }
4232       format("if (cached_has_bits & 0x$1$u) {\n",
4233              StrCat(strings::Hex(last_chunk_mask, strings::ZERO_PAD_8)));
4234       format.Indent();
4235     }
4236 
4237     // Go back and emit checks for each of the fields we processed.
4238     for (int j = 0; j < chunk.size(); j++) {
4239       const FieldDescriptor* field = chunk[j];
4240       const FieldGenerator& generator = field_generators_.get(field);
4241 
4242       PrintFieldComment(format, field);
4243 
4244       bool have_enclosing_if = false;
4245       if (HasFieldPresence(descriptor_->file())) {
4246         PrintPresenceCheck(format, field, has_bit_indices_, printer,
4247                            &cached_has_bit_index);
4248         have_enclosing_if = true;
4249       } else {
4250         // Without field presence: field is serialized only if it has a
4251         // non-default value.
4252         have_enclosing_if =
4253             EmitFieldNonDefaultCondition(printer, "this->", field);
4254       }
4255 
4256       generator.GenerateByteSize(printer);
4257 
4258       if (have_enclosing_if) {
4259         format.Outdent();
4260         format(
4261             "}\n"
4262             "\n");
4263       }
4264     }
4265 
4266     if (have_outer_if) {
4267       format.Outdent();
4268       format("}\n");
4269     }
4270 
4271     if (cold_skipper.OnEndChunk(chunk_index, printer)) {
4272       // Reset here as it may have been updated in just closed if statement.
4273       cached_has_bit_index = -1;
4274     }
4275   }
4276 
4277   // Fields inside a oneof don't use _has_bits_ so we count them in a separate
4278   // pass.
4279   for (auto oneof : OneOfRange(descriptor_)) {
4280     format("switch ($1$_case()) {\n", oneof->name());
4281     format.Indent();
4282     for (auto field : FieldRange(oneof)) {
4283       PrintFieldComment(format, field);
4284       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
4285       format.Indent();
4286       field_generators_.get(field).GenerateByteSize(printer);
4287       format("break;\n");
4288       format.Outdent();
4289       format("}\n");
4290     }
4291     format(
4292         "case $1$_NOT_SET: {\n"
4293         "  break;\n"
4294         "}\n",
4295         ToUpper(oneof->name()));
4296     format.Outdent();
4297     format("}\n");
4298   }
4299 
4300   if (num_weak_fields_) {
4301     // TagSize + MessageSize
4302     format("total_size += _weak_field_map_.ByteSizeLong();\n");
4303   }
4304 
4305   // We update _cached_size_ even though this is a const method.  Because
4306   // const methods might be called concurrently this needs to be atomic
4307   // operations or the program is undefined.  In practice, since any concurrent
4308   // writes will be writing the exact same value, normal writes will work on
4309   // all common processors. We use a dedicated wrapper class to abstract away
4310   // the underlying atomic. This makes it easier on platforms where even relaxed
4311   // memory order might have perf impact to replace it with ordinary loads and
4312   // stores.
4313   format(
4314       "int cached_size = ::$proto_ns$::internal::ToCachedSize(total_size);\n"
4315       "SetCachedSize(cached_size);\n"
4316       "return total_size;\n");
4317 
4318   format.Outdent();
4319   format("}\n");
4320 }
4321 
GenerateIsInitialized(io::Printer * printer)4322 void MessageGenerator::GenerateIsInitialized(io::Printer* printer) {
4323   Formatter format(printer, variables_);
4324   format("bool $classname$::IsInitialized() const {\n");
4325   format.Indent();
4326 
4327   if (descriptor_->extension_range_count() > 0) {
4328     format(
4329         "if (!_extensions_.IsInitialized()) {\n"
4330         "  return false;\n"
4331         "}\n\n");
4332   }
4333 
4334   if (HasFieldPresence(descriptor_->file())) {
4335     // Check that all required fields in this message are set.  We can do this
4336     // most efficiently by checking 32 "has bits" at a time.
4337     const std::vector<uint32> masks = RequiredFieldsBitMask();
4338 
4339     for (int i = 0; i < masks.size(); i++) {
4340       uint32 mask = masks[i];
4341       if (mask == 0) {
4342         continue;
4343       }
4344 
4345       // TODO(ckennelly): Consider doing something similar to ByteSizeLong(),
4346       // where we check all of the required fields in a single branch (assuming
4347       // that we aren't going to benefit from early termination).
4348       format("if ((_has_bits_[$1$] & 0x$2$) != 0x$2$) return false;\n",
4349              i,                                                 // 1
4350              StrCat(strings::Hex(mask, strings::ZERO_PAD_8)));  // 2
4351     }
4352   }
4353 
4354   // Now check that all non-oneof embedded messages are initialized.
4355   for (auto field : optimized_order_) {
4356     // TODO(ckennelly): Push this down into a generator?
4357     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
4358         !ShouldIgnoreRequiredFieldCheck(field, options_) &&
4359         scc_analyzer_->HasRequiredFields(field->message_type())) {
4360       if (field->is_repeated()) {
4361         if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
4362           format(
4363               "if (!::$proto_ns$::internal::AllAreInitializedWeak(this->$1$_))"
4364               " return false;\n",
4365               FieldName(field));
4366         } else {
4367           format(
4368               "if (!::$proto_ns$::internal::AllAreInitialized(this->$1$()))"
4369               " return false;\n",
4370               FieldName(field));
4371         }
4372       } else if (field->options().weak()) {
4373         continue;
4374       } else {
4375         GOOGLE_CHECK(!field->containing_oneof());
4376         format(
4377             "if (has_$1$()) {\n"
4378             "  if (!this->$1$_->IsInitialized()) return false;\n"
4379             "}\n",
4380             FieldName(field));
4381       }
4382     }
4383   }
4384   if (num_weak_fields_) {
4385     // For Weak fields.
4386     format("if (!_weak_field_map_.IsInitialized()) return false;\n");
4387   }
4388   // Go through the oneof fields, emitting a switch if any might have required
4389   // fields.
4390   for (auto oneof : OneOfRange(descriptor_)) {
4391     bool has_required_fields = false;
4392     for (auto field : FieldRange(oneof)) {
4393       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
4394           !ShouldIgnoreRequiredFieldCheck(field, options_) &&
4395           scc_analyzer_->HasRequiredFields(field->message_type())) {
4396         has_required_fields = true;
4397         break;
4398       }
4399     }
4400 
4401     if (!has_required_fields) {
4402       continue;
4403     }
4404 
4405     format("switch ($1$_case()) {\n", oneof->name());
4406     format.Indent();
4407     for (auto field : FieldRange(oneof)) {
4408       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
4409       format.Indent();
4410 
4411       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
4412           !ShouldIgnoreRequiredFieldCheck(field, options_) &&
4413           scc_analyzer_->HasRequiredFields(field->message_type())) {
4414         GOOGLE_CHECK(!(field->options().weak() || !field->containing_oneof()));
4415         if (field->options().weak()) {
4416           // Just skip.
4417         } else {
4418           format(
4419               "if (has_$1$()) {\n"
4420               "  if (!this->$1$().IsInitialized()) return false;\n"
4421               "}\n",
4422               FieldName(field));
4423         }
4424       }
4425 
4426       format("break;\n");
4427       format.Outdent();
4428       format("}\n");
4429     }
4430     format(
4431         "case $1$_NOT_SET: {\n"
4432         "  break;\n"
4433         "}\n",
4434         ToUpper(oneof->name()));
4435     format.Outdent();
4436     format("}\n");
4437   }
4438 
4439   format.Outdent();
4440   format(
4441       "  return true;\n"
4442       "}\n");
4443 }
4444 
4445 }  // namespace cpp
4446 }  // namespace compiler
4447 }  // namespace protobuf
4448 }  // namespace google
4449