• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_message.h>
36 
37 #include <algorithm>
38 #include <functional>
39 #include <map>
40 #include <memory>
41 #include <unordered_map>
42 #include <utility>
43 #include <vector>
44 
45 #include <google/protobuf/compiler/cpp/cpp_enum.h>
46 #include <google/protobuf/compiler/cpp/cpp_extension.h>
47 #include <google/protobuf/compiler/cpp/cpp_field.h>
48 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
49 #include <google/protobuf/compiler/cpp/cpp_padding_optimizer.h>
50 #include <google/protobuf/descriptor.pb.h>
51 #include <google/protobuf/io/coded_stream.h>
52 #include <google/protobuf/io/printer.h>
53 #include <google/protobuf/generated_message_table_driven.h>
54 #include <google/protobuf/generated_message_util.h>
55 #include <google/protobuf/map_entry_lite.h>
56 #include <google/protobuf/wire_format.h>
57 #include <google/protobuf/stubs/strutil.h>
58 #include <google/protobuf/stubs/substitute.h>
59 #include <google/protobuf/stubs/hash.h>
60 
61 
62 namespace google {
63 namespace protobuf {
64 namespace compiler {
65 namespace cpp {
66 
67 using internal::WireFormat;
68 using internal::WireFormatLite;
69 
70 namespace {
71 
72 static constexpr int kNoHasbit = -1;
73 
74 // Create an expression that evaluates to
75 //  "for all i, (_has_bits_[i] & masks[i]) == masks[i]"
76 // masks is allowed to be shorter than _has_bits_, but at least one element of
77 // masks must be non-zero.
ConditionalToCheckBitmasks(const std::vector<uint32> & masks,bool return_success=true,StringPiece has_bits_var="_has_bits_")78 std::string ConditionalToCheckBitmasks(
79     const std::vector<uint32>& masks, bool return_success = true,
80     StringPiece has_bits_var = "_has_bits_") {
81   std::vector<std::string> parts;
82   for (int i = 0; i < masks.size(); i++) {
83     if (masks[i] == 0) continue;
84     std::string m = StrCat("0x", strings::Hex(masks[i], strings::ZERO_PAD_8));
85     // Each xor evaluates to 0 if the expected bits are present.
86     parts.push_back(
87         StrCat("((", has_bits_var, "[", i, "] & ", m, ") ^ ", m, ")"));
88   }
89   GOOGLE_CHECK(!parts.empty());
90   // If we have multiple parts, each expected to be 0, then bitwise-or them.
91   std::string result =
92       parts.size() == 1
93           ? parts[0]
94           : StrCat("(", Join(parts, "\n       | "), ")");
95   return result + (return_success ? " == 0" : " != 0");
96 }
97 
PrintPresenceCheck(const Formatter & format,const FieldDescriptor * field,const std::vector<int> & has_bit_indices,io::Printer * printer,int * cached_has_word_index)98 void PrintPresenceCheck(const Formatter& format, const FieldDescriptor* field,
99                         const std::vector<int>& has_bit_indices,
100                         io::Printer* printer, int* cached_has_word_index) {
101   if (!field->options().weak()) {
102     int has_bit_index = has_bit_indices[field->index()];
103     if (*cached_has_word_index != (has_bit_index / 32)) {
104       *cached_has_word_index = (has_bit_index / 32);
105       format("cached_has_bits = _has_bits_[$1$];\n", *cached_has_word_index);
106     }
107     const std::string mask =
108         StrCat(strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
109     format("if (cached_has_bits & 0x$1$u) {\n", mask);
110   } else {
111     format("if (has_$1$()) {\n", FieldName(field));
112   }
113   format.Indent();
114 }
115 
116 struct FieldOrderingByNumber {
operator ()google::protobuf::compiler::cpp::__anonfb7f711d0111::FieldOrderingByNumber117   inline bool operator()(const FieldDescriptor* a,
118                          const FieldDescriptor* b) const {
119     return a->number() < b->number();
120   }
121 };
122 
123 // Sort the fields of the given Descriptor by number into a new[]'d array
124 // and return it.
SortFieldsByNumber(const Descriptor * descriptor)125 std::vector<const FieldDescriptor*> SortFieldsByNumber(
126     const Descriptor* descriptor) {
127   std::vector<const FieldDescriptor*> fields(descriptor->field_count());
128   for (int i = 0; i < descriptor->field_count(); i++) {
129     fields[i] = descriptor->field(i);
130   }
131   std::sort(fields.begin(), fields.end(), FieldOrderingByNumber());
132   return fields;
133 }
134 
135 // Functor for sorting extension ranges by their "start" field number.
136 struct ExtensionRangeSorter {
operator ()google::protobuf::compiler::cpp::__anonfb7f711d0111::ExtensionRangeSorter137   bool operator()(const Descriptor::ExtensionRange* left,
138                   const Descriptor::ExtensionRange* right) const {
139     return left->start < right->start;
140   }
141 };
142 
IsPOD(const FieldDescriptor * field)143 bool IsPOD(const FieldDescriptor* field) {
144   if (field->is_repeated() || field->is_extension()) return false;
145   switch (field->cpp_type()) {
146     case FieldDescriptor::CPPTYPE_ENUM:
147     case FieldDescriptor::CPPTYPE_INT32:
148     case FieldDescriptor::CPPTYPE_INT64:
149     case FieldDescriptor::CPPTYPE_UINT32:
150     case FieldDescriptor::CPPTYPE_UINT64:
151     case FieldDescriptor::CPPTYPE_FLOAT:
152     case FieldDescriptor::CPPTYPE_DOUBLE:
153     case FieldDescriptor::CPPTYPE_BOOL:
154       return true;
155     case FieldDescriptor::CPPTYPE_STRING:
156       return false;
157     default:
158       return false;
159   }
160 }
161 
162 // Helper for the code that emits the SharedCtor() and InternalSwap() methods.
163 // Anything that is a POD or a "normal" message (represented by a pointer) can
164 // be manipulated as raw bytes.
CanBeManipulatedAsRawBytes(const FieldDescriptor * field,const Options & options)165 bool CanBeManipulatedAsRawBytes(const FieldDescriptor* field,
166                                 const Options& options) {
167   bool ret = CanInitializeByZeroing(field);
168 
169   // Non-repeated, non-lazy message fields are simply raw pointers, so we can
170   // swap them or use memset to initialize these in SharedCtor. We cannot use
171   // this in Clear, as we need to potentially delete the existing value.
172   ret = ret || (!field->is_repeated() && !IsLazy(field, options) &&
173                 field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE);
174   return ret;
175 }
176 
177 // Finds runs of fields for which `predicate` is true.
178 // RunMap maps from fields that start each run to the number of fields in that
179 // run.  This is optimized for the common case that there are very few runs in
180 // a message and that most of the eligible fields appear together.
181 using RunMap = std::unordered_map<const FieldDescriptor*, size_t>;
FindRuns(const std::vector<const FieldDescriptor * > & fields,const std::function<bool (const FieldDescriptor *)> & predicate)182 RunMap FindRuns(const std::vector<const FieldDescriptor*>& fields,
183                 const std::function<bool(const FieldDescriptor*)>& predicate) {
184   RunMap runs;
185   const FieldDescriptor* last_start = nullptr;
186 
187   for (auto field : fields) {
188     if (predicate(field)) {
189       if (last_start == nullptr) {
190         last_start = field;
191       }
192 
193       runs[last_start]++;
194     } else {
195       last_start = nullptr;
196     }
197   }
198   return runs;
199 }
200 
201 // Emits an if-statement with a condition that evaluates to true if |field| is
202 // considered non-default (will be sent over the wire), for message types
203 // without true field presence. Should only be called if
204 // !HasHasbit(field).
EmitFieldNonDefaultCondition(io::Printer * printer,const std::string & prefix,const FieldDescriptor * field)205 bool EmitFieldNonDefaultCondition(io::Printer* printer,
206                                   const std::string& prefix,
207                                   const FieldDescriptor* field) {
208   GOOGLE_CHECK(!HasHasbit(field));
209   Formatter format(printer);
210   format.Set("prefix", prefix);
211   format.Set("name", FieldName(field));
212   // Merge and serialize semantics: primitive fields are merged/serialized only
213   // if non-zero (numeric) or non-empty (string).
214   if (!field->is_repeated() && !field->containing_oneof()) {
215     if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
216       format("if ($prefix$$name$().size() > 0) {\n");
217     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
218       // Message fields still have has_$name$() methods.
219       format("if ($prefix$has_$name$()) {\n");
220     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_DOUBLE ||
221                field->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT) {
222       // Handle float comparison to prevent -Wfloat-equal warnings
223       format("if (!($prefix$$name$() <= 0 && $prefix$$name$() >= 0)) {\n");
224     } else {
225       format("if ($prefix$$name$() != 0) {\n");
226     }
227     format.Indent();
228     return true;
229   } else if (field->real_containing_oneof()) {
230     format("if (_internal_has_$name$()) {\n");
231     format.Indent();
232     return true;
233   }
234   return false;
235 }
236 
237 // Does the given field have a has_$name$() method?
HasHasMethod(const FieldDescriptor * field)238 bool HasHasMethod(const FieldDescriptor* field) {
239   if (HasFieldPresence(field->file())) {
240     // In proto1/proto2, every field has a has_$name$() method.
241     return true;
242   }
243   // For message types without true field presence, only fields with a message
244   // type have a has_$name$() method.
245   return field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
246          field->has_optional_keyword();
247 }
248 
249 // Collects map entry message type information.
CollectMapInfo(const Options & options,const Descriptor * descriptor,std::map<std::string,std::string> * variables)250 void CollectMapInfo(const Options& options, const Descriptor* descriptor,
251                     std::map<std::string, std::string>* variables) {
252   GOOGLE_CHECK(IsMapEntryMessage(descriptor));
253   std::map<std::string, std::string>& vars = *variables;
254   const FieldDescriptor* key = descriptor->FindFieldByName("key");
255   const FieldDescriptor* val = descriptor->FindFieldByName("value");
256   vars["key_cpp"] = PrimitiveTypeName(options, key->cpp_type());
257   switch (val->cpp_type()) {
258     case FieldDescriptor::CPPTYPE_MESSAGE:
259       vars["val_cpp"] = FieldMessageTypeName(val, options);
260       break;
261     case FieldDescriptor::CPPTYPE_ENUM:
262       vars["val_cpp"] = ClassName(val->enum_type(), true);
263       break;
264     default:
265       vars["val_cpp"] = PrimitiveTypeName(options, val->cpp_type());
266   }
267   vars["key_wire_type"] =
268       "TYPE_" + ToUpper(DeclaredTypeMethodName(key->type()));
269   vars["val_wire_type"] =
270       "TYPE_" + ToUpper(DeclaredTypeMethodName(val->type()));
271   if (descriptor->file()->syntax() != FileDescriptor::SYNTAX_PROTO3 &&
272       val->type() == FieldDescriptor::TYPE_ENUM) {
273     const EnumValueDescriptor* default_value = val->default_value_enum();
274     vars["default_enum_value"] = Int32ToString(default_value->number());
275   } else {
276     vars["default_enum_value"] = "0";
277   }
278 }
279 
280 // Does the given field have a private (internal helper only) has_$name$()
281 // method?
HasPrivateHasMethod(const FieldDescriptor * field)282 bool HasPrivateHasMethod(const FieldDescriptor* field) {
283   // Only for oneofs in message types with no field presence. has_$name$(),
284   // based on the oneof case, is still useful internally for generated code.
285   return (!HasFieldPresence(field->file()) && field->real_containing_oneof());
286 }
287 
288 // TODO(ckennelly):  Cull these exclusions if/when these protos do not have
289 // their methods overridden by subclasses.
290 
ShouldMarkClassAsFinal(const Descriptor * descriptor,const Options & options)291 bool ShouldMarkClassAsFinal(const Descriptor* descriptor,
292                             const Options& options) {
293   return true;
294 }
295 
ShouldMarkClearAsFinal(const Descriptor * descriptor,const Options & options)296 bool ShouldMarkClearAsFinal(const Descriptor* descriptor,
297                             const Options& options) {
298   static std::set<std::string> exclusions{
299   };
300 
301   const std::string name = ClassName(descriptor, true);
302   return exclusions.find(name) == exclusions.end() ||
303          options.opensource_runtime;
304 }
305 
ShouldMarkIsInitializedAsFinal(const Descriptor * descriptor,const Options & options)306 bool ShouldMarkIsInitializedAsFinal(const Descriptor* descriptor,
307                                     const Options& options) {
308   static std::set<std::string> exclusions{
309   };
310 
311   const std::string name = ClassName(descriptor, true);
312   return exclusions.find(name) == exclusions.end() ||
313          options.opensource_runtime;
314 }
315 
ShouldMarkNewAsFinal(const Descriptor * descriptor,const Options & options)316 bool ShouldMarkNewAsFinal(const Descriptor* descriptor,
317                           const Options& options) {
318   static std::set<std::string> exclusions{
319   };
320 
321   const std::string name = ClassName(descriptor, true);
322   return exclusions.find(name) == exclusions.end() ||
323          options.opensource_runtime;
324 }
325 
TableDrivenParsingEnabled(const Descriptor * descriptor,const Options & options)326 bool TableDrivenParsingEnabled(const Descriptor* descriptor,
327                                const Options& options) {
328   if (!options.table_driven_parsing) {
329     return false;
330   }
331 
332   // Consider table-driven parsing.  We only do this if:
333   // - We have has_bits for fields.  This avoids a check on every field we set
334   //   when are present (the common case).
335   bool has_hasbit = false;
336   for (int i = 0; i < descriptor->field_count(); i++) {
337     if (HasHasbit(descriptor->field(i))) {
338       has_hasbit = true;
339       break;
340     }
341   }
342 
343   if (!has_hasbit) return false;
344 
345   const double table_sparseness = 0.5;
346   int max_field_number = 0;
347   for (auto field : FieldRange(descriptor)) {
348     if (max_field_number < field->number()) {
349       max_field_number = field->number();
350     }
351 
352     // - There are no weak fields.
353     if (IsWeak(field, options)) {
354       return false;
355     }
356 
357     // - There are no lazy fields (they require the non-lite library).
358     if (IsLazy(field, options)) {
359       return false;
360     }
361   }
362 
363   // - There range of field numbers is "small"
364   if (max_field_number >= (2 << 14)) {
365     return false;
366   }
367 
368   // - Field numbers are relatively dense within the actual number of fields.
369   //   We check for strictly greater than in the case where there are no fields
370   //   (only extensions) so max_field_number == descriptor->field_count() == 0.
371   if (max_field_number * table_sparseness > descriptor->field_count()) {
372     return false;
373   }
374 
375   // - This is not a MapEntryMessage.
376   if (IsMapEntryMessage(descriptor)) {
377     return false;
378   }
379 
380   return true;
381 }
382 
IsCrossFileMapField(const FieldDescriptor * field)383 bool IsCrossFileMapField(const FieldDescriptor* field) {
384   if (!field->is_map()) {
385     return false;
386   }
387 
388   const Descriptor* d = field->message_type();
389   const FieldDescriptor* value = d->FindFieldByNumber(2);
390 
391   return IsCrossFileMessage(value);
392 }
393 
IsCrossFileMaybeMap(const FieldDescriptor * field)394 bool IsCrossFileMaybeMap(const FieldDescriptor* field) {
395   if (IsCrossFileMapField(field)) {
396     return true;
397   }
398 
399   return IsCrossFileMessage(field);
400 }
401 
IsRequired(const std::vector<const FieldDescriptor * > & v)402 bool IsRequired(const std::vector<const FieldDescriptor*>& v) {
403   return v.front()->is_required();
404 }
405 
406 // Collects neighboring fields based on a given criteria (equivalent predicate).
407 template <typename Predicate>
CollectFields(const std::vector<const FieldDescriptor * > & fields,const Predicate & equivalent)408 std::vector<std::vector<const FieldDescriptor*>> CollectFields(
409     const std::vector<const FieldDescriptor*>& fields,
410     const Predicate& equivalent) {
411   std::vector<std::vector<const FieldDescriptor*>> chunks;
412   for (auto field : fields) {
413     if (chunks.empty() || !equivalent(chunks.back().back(), field)) {
414       chunks.emplace_back();
415     }
416     chunks.back().push_back(field);
417   }
418   return chunks;
419 }
420 
421 // Returns a bit mask based on has_bit index of "fields" that are typically on
422 // the same chunk. It is used in a group presence check where _has_bits_ is
423 // masked to tell if any thing in "fields" is present.
GenChunkMask(const std::vector<const FieldDescriptor * > & fields,const std::vector<int> & has_bit_indices)424 uint32 GenChunkMask(const std::vector<const FieldDescriptor*>& fields,
425                     const std::vector<int>& has_bit_indices) {
426   GOOGLE_CHECK(!fields.empty());
427   int first_index_offset = has_bit_indices[fields.front()->index()] / 32;
428   uint32 chunk_mask = 0;
429   for (auto field : fields) {
430     // "index" defines where in the _has_bits_ the field appears.
431     int index = has_bit_indices[field->index()];
432     GOOGLE_CHECK_EQ(first_index_offset, index / 32);
433     chunk_mask |= static_cast<uint32>(1) << (index % 32);
434   }
435   GOOGLE_CHECK_NE(0, chunk_mask);
436   return chunk_mask;
437 }
438 
439 // Return the number of bits set in n, a non-negative integer.
popcnt(uint32 n)440 static int popcnt(uint32 n) {
441   int result = 0;
442   while (n != 0) {
443     result += (n & 1);
444     n = n / 2;
445   }
446   return result;
447 }
448 
449 // For a run of cold chunks, opens and closes an external if statement that
450 // checks multiple has_bits words to skip bulk of cold fields.
451 class ColdChunkSkipper {
452  public:
ColdChunkSkipper(const Options & options,const std::vector<std::vector<const FieldDescriptor * >> & chunks,const std::vector<int> & has_bit_indices,const double cold_threshold)453   ColdChunkSkipper(
454       const Options& options,
455       const std::vector<std::vector<const FieldDescriptor*>>& chunks,
456       const std::vector<int>& has_bit_indices, const double cold_threshold)
457       : chunks_(chunks),
458         has_bit_indices_(has_bit_indices),
459         access_info_map_(options.access_info_map),
460         cold_threshold_(cold_threshold) {
461     SetCommonVars(options, &variables_);
462   }
463 
464   // May open an external if check for a batch of cold fields. "from" is the
465   // prefix to _has_bits_ to allow MergeFrom to use "from._has_bits_".
466   // Otherwise, it should be "".
467   void OnStartChunk(int chunk, int cached_has_word_index,
468                     const std::string& from, io::Printer* printer);
469   bool OnEndChunk(int chunk, io::Printer* printer);
470 
471  private:
472   bool IsColdChunk(int chunk);
473 
HasbitWord(int chunk,int offset)474   int HasbitWord(int chunk, int offset) {
475     return has_bit_indices_[chunks_[chunk][offset]->index()] / 32;
476   }
477 
478   const std::vector<std::vector<const FieldDescriptor*>>& chunks_;
479   const std::vector<int>& has_bit_indices_;
480   const AccessInfoMap* access_info_map_;
481   const double cold_threshold_;
482   std::map<std::string, std::string> variables_;
483   int limit_chunk_ = -1;
484 };
485 
486 // Tuning parameters for ColdChunkSkipper.
487 const double kColdRatio = 0.005;
488 
IsColdChunk(int chunk)489 bool ColdChunkSkipper::IsColdChunk(int chunk) {
490   // Mark this variable as used until it is actually used
491   (void)cold_threshold_;
492   return false;
493 }
494 
495 
OnStartChunk(int chunk,int cached_has_word_index,const std::string & from,io::Printer * printer)496 void ColdChunkSkipper::OnStartChunk(int chunk, int cached_has_word_index,
497                                     const std::string& from,
498                                     io::Printer* printer) {
499   Formatter format(printer, variables_);
500   if (!access_info_map_) {
501     return;
502   } else if (chunk < limit_chunk_) {
503     // We are already inside a run of cold chunks.
504     return;
505   } else if (!IsColdChunk(chunk)) {
506     // We can't start a run of cold chunks.
507     return;
508   }
509 
510   // Find the end of consecutive cold chunks.
511   limit_chunk_ = chunk;
512   while (limit_chunk_ < chunks_.size() && IsColdChunk(limit_chunk_)) {
513     limit_chunk_++;
514   }
515 
516   if (limit_chunk_ <= chunk + 1) {
517     // Require at least two chunks to emit external has_bit checks.
518     limit_chunk_ = -1;
519     return;
520   }
521 
522   // Emit has_bit check for each has_bit_dword index.
523   format("if (PROTOBUF_PREDICT_FALSE(");
524   int first_word = HasbitWord(chunk, 0);
525   while (chunk < limit_chunk_) {
526     uint32 mask = 0;
527     int this_word = HasbitWord(chunk, 0);
528     // Generate mask for chunks on the same word.
529     for (; chunk < limit_chunk_ && HasbitWord(chunk, 0) == this_word; chunk++) {
530       for (auto field : chunks_[chunk]) {
531         int hasbit_index = has_bit_indices_[field->index()];
532         // Fields on a chunk must be in the same word.
533         GOOGLE_CHECK_EQ(this_word, hasbit_index / 32);
534         mask |= 1 << (hasbit_index % 32);
535       }
536     }
537 
538     if (this_word != first_word) {
539       format(" ||\n    ");
540     }
541     format.Set("mask", strings::Hex(mask, strings::ZERO_PAD_8));
542     if (this_word == cached_has_word_index) {
543       format("(cached_has_bits & 0x$mask$u) != 0");
544     } else {
545       format("($1$_has_bits_[$2$] & 0x$mask$u) != 0", from, this_word);
546     }
547   }
548   format(")) {\n");
549   format.Indent();
550 }
551 
OnEndChunk(int chunk,io::Printer * printer)552 bool ColdChunkSkipper::OnEndChunk(int chunk, io::Printer* printer) {
553   Formatter format(printer, variables_);
554   if (chunk != limit_chunk_ - 1) {
555     return false;
556   }
557   format.Outdent();
558   format("}\n");
559   return true;
560 }
561 
562 }  // anonymous namespace
563 
564 // ===================================================================
565 
MessageGenerator(const Descriptor * descriptor,const std::map<std::string,std::string> & vars,int index_in_file_messages,const Options & options,MessageSCCAnalyzer * scc_analyzer)566 MessageGenerator::MessageGenerator(
567     const Descriptor* descriptor,
568     const std::map<std::string, std::string>& vars, int index_in_file_messages,
569     const Options& options, MessageSCCAnalyzer* scc_analyzer)
570     : descriptor_(descriptor),
571       index_in_file_messages_(index_in_file_messages),
572       classname_(ClassName(descriptor, false)),
573       options_(options),
574       field_generators_(descriptor, options, scc_analyzer),
575       max_has_bit_index_(0),
576       num_weak_fields_(0),
577       scc_analyzer_(scc_analyzer),
578       variables_(vars) {
579   if (!message_layout_helper_) {
580     message_layout_helper_.reset(new PaddingOptimizer());
581   }
582 
583   // Variables that apply to this class
584   variables_["classname"] = classname_;
585   variables_["classtype"] = QualifiedClassName(descriptor_, options);
586   variables_["scc_info"] =
587       SccInfoSymbol(scc_analyzer_->GetSCC(descriptor_), options_);
588   variables_["full_name"] = descriptor_->full_name();
589   variables_["superclass"] = SuperClassName(descriptor_, options_);
590 
591   // Compute optimized field order to be used for layout and initialization
592   // purposes.
593   for (auto field : FieldRange(descriptor_)) {
594     if (!IsFieldUsed(field, options_)) {
595       continue;
596     }
597 
598     if (IsWeak(field, options_)) {
599       num_weak_fields_++;
600     } else if (!field->real_containing_oneof()) {
601       optimized_order_.push_back(field);
602     }
603   }
604 
605   message_layout_helper_->OptimizeLayout(&optimized_order_, options_);
606 
607   // This message has hasbits iff one or more fields need one.
608   for (auto field : optimized_order_) {
609     if (HasHasbit(field)) {
610       if (has_bit_indices_.empty()) {
611         has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit);
612       }
613       has_bit_indices_[field->index()] = max_has_bit_index_++;
614     }
615   }
616 
617   if (!has_bit_indices_.empty()) {
618     field_generators_.SetHasBitIndices(has_bit_indices_);
619   }
620 
621   num_required_fields_ = 0;
622   for (int i = 0; i < descriptor->field_count(); i++) {
623     if (descriptor->field(i)->is_required()) {
624       ++num_required_fields_;
625     }
626   }
627 
628   table_driven_ = TableDrivenParsingEnabled(descriptor_, options_);
629 }
630 
631 MessageGenerator::~MessageGenerator() = default;
632 
HasBitsSize() const633 size_t MessageGenerator::HasBitsSize() const {
634   size_t sizeof_has_bits = (max_has_bit_index_ + 31) / 32 * 4;
635   if (sizeof_has_bits == 0) {
636     // Zero-size arrays aren't technically allowed, and MSVC in particular
637     // doesn't like them.  We still need to declare these arrays to make
638     // other code compile.  Since this is an uncommon case, we'll just declare
639     // them with size 1 and waste some space.  Oh well.
640     sizeof_has_bits = 4;
641   }
642 
643   return sizeof_has_bits;
644 }
645 
HasBitIndex(const FieldDescriptor * field) const646 int MessageGenerator::HasBitIndex(const FieldDescriptor* field) const {
647   return has_bit_indices_.empty() ? kNoHasbit
648                                   : has_bit_indices_[field->index()];
649 }
650 
HasByteIndex(const FieldDescriptor * field) const651 int MessageGenerator::HasByteIndex(const FieldDescriptor* field) const {
652   int hasbit = HasBitIndex(field);
653   return hasbit == kNoHasbit ? kNoHasbit : hasbit / 8;
654 }
655 
HasWordIndex(const FieldDescriptor * field) const656 int MessageGenerator::HasWordIndex(const FieldDescriptor* field) const {
657   int hasbit = HasBitIndex(field);
658   return hasbit == kNoHasbit ? kNoHasbit : hasbit / 32;
659 }
660 
AddGenerators(std::vector<std::unique_ptr<EnumGenerator>> * enum_generators,std::vector<std::unique_ptr<ExtensionGenerator>> * extension_generators)661 void MessageGenerator::AddGenerators(
662     std::vector<std::unique_ptr<EnumGenerator>>* enum_generators,
663     std::vector<std::unique_ptr<ExtensionGenerator>>* extension_generators) {
664   for (int i = 0; i < descriptor_->enum_type_count(); i++) {
665     enum_generators->emplace_back(
666         new EnumGenerator(descriptor_->enum_type(i), variables_, options_));
667     enum_generators_.push_back(enum_generators->back().get());
668   }
669   for (int i = 0; i < descriptor_->extension_count(); i++) {
670     extension_generators->emplace_back(
671         new ExtensionGenerator(descriptor_->extension(i), options_));
672     extension_generators_.push_back(extension_generators->back().get());
673   }
674 }
675 
GenerateFieldAccessorDeclarations(io::Printer * printer)676 void MessageGenerator::GenerateFieldAccessorDeclarations(io::Printer* printer) {
677   Formatter format(printer, variables_);
678   // optimized_fields_ does not contain fields where
679   //    field->real_containing_oneof()
680   // so we need to iterate over those as well.
681   //
682   // We place the non-oneof fields in optimized_order_, as that controls the
683   // order of the _has_bits_ entries and we want GDB's pretty printers to be
684   // able to infer these indices from the k[FIELDNAME]FieldNumber order.
685   std::vector<const FieldDescriptor*> ordered_fields;
686   ordered_fields.reserve(descriptor_->field_count());
687 
688   ordered_fields.insert(ordered_fields.begin(), optimized_order_.begin(),
689                         optimized_order_.end());
690   for (auto field : FieldRange(descriptor_)) {
691     if (!field->real_containing_oneof() && !field->options().weak() &&
692         IsFieldUsed(field, options_)) {
693       continue;
694     }
695     ordered_fields.push_back(field);
696   }
697 
698   if (!ordered_fields.empty()) {
699     format("enum : int {\n");
700     for (auto field : ordered_fields) {
701       Formatter::SaveState save(&format);
702 
703       std::map<std::string, std::string> vars;
704       SetCommonFieldVariables(field, &vars, options_);
705       format.AddMap(vars);
706       format("  ${1$$2$$}$ = $number$,\n", field, FieldConstantName(field));
707     }
708     format("};\n");
709   }
710   for (auto field : ordered_fields) {
711     PrintFieldComment(format, field);
712 
713     Formatter::SaveState save(&format);
714 
715     std::map<std::string, std::string> vars;
716     SetCommonFieldVariables(field, &vars, options_);
717     format.AddMap(vars);
718 
719     if (field->is_repeated()) {
720       format("$deprecated_attr$int ${1$$name$_size$}$() const$2$\n", field,
721              IsFieldUsed(field, options_) ? ";" : " {__builtin_trap();}");
722       if (IsFieldUsed(field, options_)) {
723         format(
724             "private:\n"
725             "int ${1$_internal_$name$_size$}$() const;\n"
726             "public:\n",
727             field);
728       }
729     } else if (HasHasMethod(field)) {
730       format("$deprecated_attr$bool ${1$has_$name$$}$() const$2$\n", field,
731              IsFieldUsed(field, options_) ? ";" : " {__builtin_trap();}");
732       if (IsFieldUsed(field, options_)) {
733         format(
734             "private:\n"
735             "bool _internal_has_$name$() const;\n"
736             "public:\n");
737       }
738     } else if (HasPrivateHasMethod(field)) {
739       if (IsFieldUsed(field, options_)) {
740         format(
741             "private:\n"
742             "bool ${1$_internal_has_$name$$}$() const;\n"
743             "public:\n",
744             field);
745       }
746     }
747     format("$deprecated_attr$void ${1$clear_$name$$}$()$2$\n", field,
748            IsFieldUsed(field, options_) ? ";" : "{__builtin_trap();}");
749 
750     // Generate type-specific accessor declarations.
751     field_generators_.get(field).GenerateAccessorDeclarations(printer);
752 
753     format("\n");
754   }
755 
756   if (descriptor_->extension_range_count() > 0) {
757     // Generate accessors for extensions.  We just call a macro located in
758     // extension_set.h since the accessors about 80 lines of static code.
759     format("$GOOGLE_PROTOBUF$_EXTENSION_ACCESSORS($classname$)\n");
760     // Generate MessageSet specific APIs for proto2 MessageSet.
761     // For testing purposes we don't check for bridge.MessageSet, so
762     // we don't use IsProto2MessageSet
763     if (descriptor_->options().message_set_wire_format() &&
764         !options_.opensource_runtime && !options_.lite_implicit_weak_fields) {
765       // Special-case MessageSet
766       format("GOOGLE_PROTOBUF_EXTENSION_MESSAGE_SET_ACCESSORS($classname$)\n");
767     }
768   }
769 
770   for (auto oneof : OneOfRange(descriptor_)) {
771     Formatter::SaveState saver(&format);
772     format.Set("oneof_name", oneof->name());
773     format.Set("camel_oneof_name", UnderscoresToCamelCase(oneof->name(), true));
774     format(
775         "void ${1$clear_$oneof_name$$}$();\n"
776         "$camel_oneof_name$Case $oneof_name$_case() const;\n",
777         oneof);
778   }
779 }
780 
GenerateSingularFieldHasBits(const FieldDescriptor * field,Formatter format)781 void MessageGenerator::GenerateSingularFieldHasBits(
782     const FieldDescriptor* field, Formatter format) {
783   if (!IsFieldUsed(field, options_)) {
784     format(
785         "inline bool $classname$::has_$name$() const { "
786         "__builtin_trap(); }\n");
787     return;
788   }
789   if (field->options().weak()) {
790     format(
791         "inline bool $classname$::has_$name$() const {\n"
792         "$annotate_accessor$"
793         "  return _weak_field_map_.Has($number$);\n"
794         "}\n");
795     return;
796   }
797   if (HasHasbit(field)) {
798     int has_bit_index = HasBitIndex(field);
799     GOOGLE_CHECK_NE(has_bit_index, kNoHasbit);
800 
801     format.Set("has_array_index", has_bit_index / 32);
802     format.Set("has_mask",
803                strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
804     format(
805         "inline bool $classname$::_internal_has_$name$() const {\n"
806         "  bool value = "
807         "(_has_bits_[$has_array_index$] & 0x$has_mask$u) != 0;\n");
808 
809     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
810         !IsLazy(field, options_)) {
811       // We maintain the invariant that for a submessage x, has_x() returning
812       // true implies that x_ is not null. By giving this information to the
813       // compiler, we allow it to eliminate unnecessary null checks later on.
814       format("  PROTOBUF_ASSUME(!value || $name$_ != nullptr);\n");
815     }
816 
817     format(
818         "  return value;\n"
819         "}\n"
820         "inline bool $classname$::has_$name$() const {\n"
821         "$annotate_accessor$"
822         "  return _internal_has_$name$();\n"
823         "}\n");
824   } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
825     // Message fields have a has_$name$() method.
826     if (IsLazy(field, options_)) {
827       format(
828           "inline bool $classname$::_internal_has_$name$() const {\n"
829           "  return !$name$_.IsCleared();\n"
830           "}\n");
831     } else {
832       format(
833           "inline bool $classname$::_internal_has_$name$() const {\n"
834           "  return this != internal_default_instance() "
835           "&& $name$_ != nullptr;\n"
836           "}\n");
837     }
838     format(
839         "inline bool $classname$::has_$name$() const {\n"
840         "$annotate_accessor$"
841         "  return _internal_has_$name$();\n"
842         "}\n");
843   }
844 }
845 
GenerateOneofHasBits(io::Printer * printer)846 void MessageGenerator::GenerateOneofHasBits(io::Printer* printer) {
847   Formatter format(printer, variables_);
848   for (auto oneof : OneOfRange(descriptor_)) {
849     format.Set("oneof_name", oneof->name());
850     format.Set("oneof_index", oneof->index());
851     format.Set("cap_oneof_name", ToUpper(oneof->name()));
852     format(
853         "inline bool $classname$::has_$oneof_name$() const {\n"
854         "  return $oneof_name$_case() != $cap_oneof_name$_NOT_SET;\n"
855         "}\n"
856         "inline void $classname$::clear_has_$oneof_name$() {\n"
857         "  _oneof_case_[$oneof_index$] = $cap_oneof_name$_NOT_SET;\n"
858         "}\n");
859   }
860 }
861 
GenerateOneofMemberHasBits(const FieldDescriptor * field,const Formatter & format)862 void MessageGenerator::GenerateOneofMemberHasBits(const FieldDescriptor* field,
863                                                   const Formatter& format) {
864   if (!IsFieldUsed(field, options_)) {
865     if (HasHasMethod(field)) {
866       format(
867           "inline bool $classname$::has_$name$() const { "
868           "__builtin_trap(); }\n");
869     }
870     format(
871         "inline void $classname$::set_has_$name$() { __builtin_trap(); "
872         "}\n");
873     return;
874   }
875   // Singular field in a oneof
876   // N.B.: Without field presence, we do not use has-bits or generate
877   // has_$name$() methods, but oneofs still have set_has_$name$().
878   // Oneofs also have has_$name$() but only as a private helper
879   // method, so that generated code is slightly cleaner (vs.  comparing
880   // _oneof_case_[index] against a constant everywhere).
881   //
882   // If has_$name$() is private, there is no need to add an internal accessor.
883   // Only annotate public accessors.
884   if (HasHasMethod(field)) {
885     format(
886         "inline bool $classname$::_internal_has_$name$() const {\n"
887         "  return $oneof_name$_case() == k$field_name$;\n"
888         "}\n"
889         "inline bool $classname$::has_$name$() const {\n"
890         "$annotate_accessor$"
891         "  return _internal_has_$name$();\n"
892         "}\n");
893   } else if (HasPrivateHasMethod(field)) {
894     format(
895         "inline bool $classname$::_internal_has_$name$() const {\n"
896         "  return $oneof_name$_case() == k$field_name$;\n"
897         "}\n");
898   }
899   // set_has_$name$() for oneof fields is always private; hence should not be
900   // annotated.
901   format(
902       "inline void $classname$::set_has_$name$() {\n"
903       "  _oneof_case_[$oneof_index$] = k$field_name$;\n"
904       "}\n");
905 }
906 
GenerateFieldClear(const FieldDescriptor * field,bool is_inline,Formatter format)907 void MessageGenerator::GenerateFieldClear(const FieldDescriptor* field,
908                                           bool is_inline, Formatter format) {
909   if (!IsFieldUsed(field, options_)) {
910     format("void $classname$::clear_$name$() { __builtin_trap(); }\n");
911     return;
912   }
913 
914   // Generate clear_$name$().
915   if (is_inline) {
916     format("inline ");
917   }
918   format(
919       "void $classname$::clear_$name$() {\n"
920       "$annotate_accessor$");
921 
922   format.Indent();
923 
924   if (field->real_containing_oneof()) {
925     // Clear this field only if it is the active field in this oneof,
926     // otherwise ignore
927     format("if (_internal_has_$name$()) {\n");
928     format.Indent();
929     field_generators_.get(field).GenerateClearingCode(format.printer());
930     format("clear_has_$oneof_name$();\n");
931     format.Outdent();
932     format("}\n");
933   } else {
934     field_generators_.get(field).GenerateClearingCode(format.printer());
935     if (HasHasbit(field)) {
936       int has_bit_index = HasBitIndex(field);
937       format.Set("has_array_index", has_bit_index / 32);
938       format.Set("has_mask",
939                  strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
940       format("_has_bits_[$has_array_index$] &= ~0x$has_mask$u;\n");
941     }
942   }
943 
944   format.Outdent();
945   format("}\n");
946 }
947 
GenerateFieldAccessorDefinitions(io::Printer * printer)948 void MessageGenerator::GenerateFieldAccessorDefinitions(io::Printer* printer) {
949   Formatter format(printer, variables_);
950   format("// $classname$\n\n");
951 
952   for (auto field : FieldRange(descriptor_)) {
953     PrintFieldComment(format, field);
954 
955     if (!IsFieldUsed(field, options_)) {
956       continue;
957     }
958 
959     std::map<std::string, std::string> vars;
960     SetCommonFieldVariables(field, &vars, options_);
961 
962     Formatter::SaveState saver(&format);
963     format.AddMap(vars);
964 
965     // Generate has_$name$() or $name$_size().
966     if (field->is_repeated()) {
967       if (!IsFieldUsed(field, options_)) {
968         format(
969             "inline int $classname$::$name$_size() const { "
970             "__builtin_trap(); }\n");
971       } else {
972         format(
973             "inline int $classname$::_internal_$name$_size() const {\n"
974             "  return $name$_$1$.size();\n"
975             "}\n"
976             "inline int $classname$::$name$_size() const {\n"
977             "$annotate_accessor$"
978             "  return _internal_$name$_size();\n"
979             "}\n",
980             IsImplicitWeakField(field, options_, scc_analyzer_) &&
981                     field->message_type()
982                 ? ".weak"
983                 : "");
984       }
985     } else if (field->real_containing_oneof()) {
986       format.Set("field_name", UnderscoresToCamelCase(field->name(), true));
987       format.Set("oneof_name", field->containing_oneof()->name());
988       format.Set("oneof_index",
989                  StrCat(field->containing_oneof()->index()));
990       GenerateOneofMemberHasBits(field, format);
991     } else {
992       // Singular field.
993       GenerateSingularFieldHasBits(field, format);
994     }
995 
996     if (!IsCrossFileMaybeMap(field)) {
997       GenerateFieldClear(field, true, format);
998     }
999 
1000     // Generate type-specific accessors.
1001     if (IsFieldUsed(field, options_)) {
1002       field_generators_.get(field).GenerateInlineAccessorDefinitions(printer);
1003     }
1004 
1005     format("\n");
1006   }
1007 
1008   // Generate has_$name$() and clear_has_$name$() functions for oneofs.
1009   GenerateOneofHasBits(printer);
1010 }
1011 
GenerateClassDefinition(io::Printer * printer)1012 void MessageGenerator::GenerateClassDefinition(io::Printer* printer) {
1013   Formatter format(printer, variables_);
1014   format.Set("class_final", ShouldMarkClassAsFinal(descriptor_, options_)
1015                                 ? "PROTOBUF_FINAL"
1016                                 : "");
1017 
1018   if (IsMapEntryMessage(descriptor_)) {
1019     std::map<std::string, std::string> vars;
1020     CollectMapInfo(options_, descriptor_, &vars);
1021     vars["lite"] =
1022         HasDescriptorMethods(descriptor_->file(), options_) ? "" : "Lite";
1023     format.AddMap(vars);
1024     format(
1025         "class $classname$ : public "
1026         "::$proto_ns$::internal::MapEntry$lite$<$classname$, \n"
1027         "    $key_cpp$, $val_cpp$,\n"
1028         "    ::$proto_ns$::internal::WireFormatLite::$key_wire_type$,\n"
1029         "    ::$proto_ns$::internal::WireFormatLite::$val_wire_type$,\n"
1030         "    $default_enum_value$ > {\n"
1031         "public:\n"
1032         "  typedef ::$proto_ns$::internal::MapEntry$lite$<$classname$, \n"
1033         "    $key_cpp$, $val_cpp$,\n"
1034         "    ::$proto_ns$::internal::WireFormatLite::$key_wire_type$,\n"
1035         "    ::$proto_ns$::internal::WireFormatLite::$val_wire_type$,\n"
1036         "    $default_enum_value$ > SuperType;\n"
1037         "  $classname$();\n"
1038         "  explicit $classname$(::$proto_ns$::Arena* arena);\n"
1039         "  void MergeFrom(const $classname$& other);\n"
1040         "  static const $classname$* internal_default_instance() { return "
1041         "reinterpret_cast<const "
1042         "$classname$*>(&_$classname$_default_instance_); }\n");
1043     auto utf8_check = GetUtf8CheckMode(descriptor_->field(0), options_);
1044     if (descriptor_->field(0)->type() == FieldDescriptor::TYPE_STRING &&
1045         utf8_check != NONE) {
1046       if (utf8_check == STRICT) {
1047         format(
1048             "  static bool ValidateKey(std::string* s) {\n"
1049             "    return ::$proto_ns$::internal::WireFormatLite::"
1050             "VerifyUtf8String(s->data(), static_cast<int>(s->size()), "
1051             "::$proto_ns$::internal::WireFormatLite::PARSE, \"$1$\");\n"
1052             " }\n",
1053             descriptor_->field(0)->full_name());
1054       } else {
1055         GOOGLE_CHECK(utf8_check == VERIFY);
1056         format(
1057             "  static bool ValidateKey(std::string* s) {\n"
1058             "#ifndef NDEBUG\n"
1059             "    ::$proto_ns$::internal::WireFormatLite::VerifyUtf8String(\n"
1060             "       s->data(), static_cast<int>(s->size()), "
1061             "::$proto_ns$::internal::"
1062             "WireFormatLite::PARSE, \"$1$\");\n"
1063             "#endif\n"
1064             "    return true;\n"
1065             " }\n",
1066             descriptor_->field(0)->full_name());
1067       }
1068     } else {
1069       format("  static bool ValidateKey(void*) { return true; }\n");
1070     }
1071     if (descriptor_->field(1)->type() == FieldDescriptor::TYPE_STRING &&
1072         utf8_check != NONE) {
1073       if (utf8_check == STRICT) {
1074         format(
1075             "  static bool ValidateValue(std::string* s) {\n"
1076             "    return ::$proto_ns$::internal::WireFormatLite::"
1077             "VerifyUtf8String(s->data(), static_cast<int>(s->size()), "
1078             "::$proto_ns$::internal::WireFormatLite::PARSE, \"$1$\");\n"
1079             " }\n",
1080             descriptor_->field(1)->full_name());
1081       } else {
1082         GOOGLE_CHECK(utf8_check = VERIFY);
1083         format(
1084             "  static bool ValidateValue(std::string* s) {\n"
1085             "#ifndef NDEBUG\n"
1086             "    ::$proto_ns$::internal::WireFormatLite::VerifyUtf8String(\n"
1087             "       s->data(), static_cast<int>(s->size()), "
1088             "::$proto_ns$::internal::"
1089             "WireFormatLite::PARSE, \"$1$\");\n"
1090             "#endif\n"
1091             "    return true;\n"
1092             " }\n",
1093             descriptor_->field(1)->full_name());
1094       }
1095     } else {
1096       format("  static bool ValidateValue(void*) { return true; }\n");
1097     }
1098     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1099       format(
1100           "  void MergeFrom(const ::$proto_ns$::Message& other) final;\n"
1101           "  ::$proto_ns$::Metadata GetMetadata() const final;\n"
1102           "  private:\n"
1103           "  static ::$proto_ns$::Metadata GetMetadataStatic() {\n"
1104           "    ::$proto_ns$::internal::AssignDescriptors(&::$desc_table$);\n"
1105           "    return ::$desc_table$.file_level_metadata[$1$];\n"
1106           "  }\n"
1107           "\n"
1108           "  public:\n"
1109           "};\n",
1110           index_in_file_messages_);
1111     } else {
1112       format("};\n");
1113     }
1114     return;
1115   }
1116 
1117   format(
1118       "class $dllexport_decl $${1$$classname$$}$$ class_final$ :\n"
1119       "    public $superclass$ /* @@protoc_insertion_point("
1120       "class_definition:$full_name$) */ {\n",
1121       descriptor_);
1122   format(" public:\n");
1123   format.Indent();
1124 
1125   if (SupportsArenas(descriptor_)) {
1126     format("inline $classname$() : $classname$(nullptr) {}\n");
1127   } else {
1128     format("$classname$();\n");
1129   }
1130 
1131   format(
1132       "virtual ~$classname$();\n"
1133       "\n"
1134       "$classname$(const $classname$& from);\n"
1135       "$classname$($classname$&& from) noexcept\n"
1136       "  : $classname$() {\n"
1137       "  *this = ::std::move(from);\n"
1138       "}\n"
1139       "\n"
1140       "inline $classname$& operator=(const $classname$& from) {\n"
1141       "  CopyFrom(from);\n"
1142       "  return *this;\n"
1143       "}\n"
1144       "inline $classname$& operator=($classname$&& from) noexcept {\n"
1145       "  if (GetArena() == from.GetArena()) {\n"
1146       "    if (this != &from) InternalSwap(&from);\n"
1147       "  } else {\n"
1148       "    CopyFrom(from);\n"
1149       "  }\n"
1150       "  return *this;\n"
1151       "}\n"
1152       "\n");
1153 
1154   if (options_.table_driven_serialization) {
1155     format(
1156         "private:\n"
1157         "const void* InternalGetTable() const;\n"
1158         "public:\n"
1159         "\n");
1160   }
1161 
1162   std::map<std::string, std::string> vars;
1163   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
1164   format.AddMap(vars);
1165   if (PublicUnknownFieldsAccessors(descriptor_)) {
1166     format(
1167         "inline const $unknown_fields_type$& unknown_fields() const {\n"
1168         "  return $unknown_fields$;\n"
1169         "}\n"
1170         "inline $unknown_fields_type$* mutable_unknown_fields() {\n"
1171         "  return $mutable_unknown_fields$;\n"
1172         "}\n"
1173         "\n");
1174   }
1175 
1176   // Only generate this member if it's not disabled.
1177   if (HasDescriptorMethods(descriptor_->file(), options_) &&
1178       !descriptor_->options().no_standard_descriptor_accessor()) {
1179     format(
1180         "static const ::$proto_ns$::Descriptor* descriptor() {\n"
1181         "  return GetDescriptor();\n"
1182         "}\n");
1183   }
1184 
1185   if (HasDescriptorMethods(descriptor_->file(), options_)) {
1186     // These shadow non-static methods of the same names in Message.  We
1187     // redefine them here because calls directly on the generated class can be
1188     // statically analyzed -- we know what descriptor types are being requested.
1189     // It also avoids a vtable dispatch.
1190     //
1191     // We would eventually like to eliminate the methods in Message, and having
1192     // this separate also lets us track calls to the base class methods
1193     // separately.
1194     format(
1195         "static const ::$proto_ns$::Descriptor* GetDescriptor() {\n"
1196         "  return GetMetadataStatic().descriptor;\n"
1197         "}\n"
1198         "static const ::$proto_ns$::Reflection* GetReflection() {\n"
1199         "  return GetMetadataStatic().reflection;\n"
1200         "}\n");
1201   }
1202 
1203   format(
1204       "static const $classname$& default_instance();\n"
1205       "\n");
1206 
1207   // Generate enum values for every field in oneofs. One list is generated for
1208   // each oneof with an additional *_NOT_SET value.
1209   for (auto oneof : OneOfRange(descriptor_)) {
1210     format("enum $1$Case {\n", UnderscoresToCamelCase(oneof->name(), true));
1211     format.Indent();
1212     for (auto field : FieldRange(oneof)) {
1213       std::string oneof_enum_case_field_name =
1214           UnderscoresToCamelCase(field->name(), true);
1215       format("k$1$ = $2$,\n", oneof_enum_case_field_name,  // 1
1216              field->number());                             // 2
1217     }
1218     format("$1$_NOT_SET = 0,\n", ToUpper(oneof->name()));
1219     format.Outdent();
1220     format(
1221         "};\n"
1222         "\n");
1223   }
1224 
1225   // TODO(gerbens) make this private, while still granting other protos access.
1226   format(
1227       "static void InitAsDefaultInstance();  // FOR INTERNAL USE ONLY\n"
1228       "static inline const $classname$* internal_default_instance() {\n"
1229       "  return reinterpret_cast<const $classname$*>(\n"
1230       "             &_$classname$_default_instance_);\n"
1231       "}\n"
1232       "static constexpr int kIndexInFileMessages =\n"
1233       "  $1$;\n"
1234       "\n",
1235       index_in_file_messages_);
1236 
1237   if (IsAnyMessage(descriptor_, options_)) {
1238     format(
1239         "// implements Any -----------------------------------------------\n"
1240         "\n");
1241     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1242       format(
1243           "void PackFrom(const ::$proto_ns$::Message& message) {\n"
1244           "  _any_metadata_.PackFrom(message);\n"
1245           "}\n"
1246           "void PackFrom(const ::$proto_ns$::Message& message,\n"
1247           "              const std::string& type_url_prefix) {\n"
1248           "  _any_metadata_.PackFrom(message, type_url_prefix);\n"
1249           "}\n"
1250           "bool UnpackTo(::$proto_ns$::Message* message) const {\n"
1251           "  return _any_metadata_.UnpackTo(message);\n"
1252           "}\n"
1253           "static bool GetAnyFieldDescriptors(\n"
1254           "    const ::$proto_ns$::Message& message,\n"
1255           "    const ::$proto_ns$::FieldDescriptor** type_url_field,\n"
1256           "    const ::$proto_ns$::FieldDescriptor** value_field);\n"
1257           "template <typename T, class = typename std::enable_if<"
1258           "!std::is_convertible<T, const ::$proto_ns$::Message&>"
1259           "::value>::type>\n"
1260           "void PackFrom(const T& message) {\n"
1261           "  _any_metadata_.PackFrom<T>(message);\n"
1262           "}\n"
1263           "template <typename T, class = typename std::enable_if<"
1264           "!std::is_convertible<T, const ::$proto_ns$::Message&>"
1265           "::value>::type>\n"
1266           "void PackFrom(const T& message,\n"
1267           "              const std::string& type_url_prefix) {\n"
1268           "  _any_metadata_.PackFrom<T>(message, type_url_prefix);"
1269           "}\n"
1270           "template <typename T, class = typename std::enable_if<"
1271           "!std::is_convertible<T, const ::$proto_ns$::Message&>"
1272           "::value>::type>\n"
1273           "bool UnpackTo(T* message) const {\n"
1274           "  return _any_metadata_.UnpackTo<T>(message);\n"
1275           "}\n");
1276     } else {
1277       format(
1278           "template <typename T>\n"
1279           "void PackFrom(const T& message) {\n"
1280           "  _any_metadata_.PackFrom(message);\n"
1281           "}\n"
1282           "template <typename T>\n"
1283           "void PackFrom(const T& message,\n"
1284           "              const std::string& type_url_prefix) {\n"
1285           "  _any_metadata_.PackFrom(message, type_url_prefix);\n"
1286           "}\n"
1287           "template <typename T>\n"
1288           "bool UnpackTo(T* message) const {\n"
1289           "  return _any_metadata_.UnpackTo(message);\n"
1290           "}\n");
1291     }
1292     format(
1293         "template<typename T> bool Is() const {\n"
1294         "  return _any_metadata_.Is<T>();\n"
1295         "}\n"
1296         "static bool ParseAnyTypeUrl(const string& type_url,\n"
1297         "                            std::string* full_type_name);\n");
1298   }
1299 
1300   format.Set("new_final",
1301              ShouldMarkNewAsFinal(descriptor_, options_) ? "final" : "");
1302 
1303   format(
1304       "friend void swap($classname$& a, $classname$& b) {\n"
1305       "  a.Swap(&b);\n"
1306       "}\n");
1307 
1308   if (SupportsArenas(descriptor_)) {
1309     format(
1310         "inline void Swap($classname$* other) {\n"
1311         "  if (other == this) return;\n"
1312         "  if (GetArena() == other->GetArena()) {\n"
1313         "    InternalSwap(other);\n"
1314         "  } else {\n"
1315         "    ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other);\n"
1316         "  }\n"
1317         "}\n"
1318         "void UnsafeArenaSwap($classname$* other) {\n"
1319         "  if (other == this) return;\n"
1320         "  $DCHK$(GetArena() == other->GetArena());\n"
1321         "  InternalSwap(other);\n"
1322         "}\n");
1323   } else {
1324     format(
1325         "inline void Swap($classname$* other) {\n"
1326         "  if (other == this) return;\n"
1327         "  InternalSwap(other);\n"
1328         "}\n");
1329   }
1330 
1331   format(
1332       "\n"
1333       "// implements Message ----------------------------------------------\n"
1334       "\n"
1335       "inline $classname$* New() const$ new_final$ {\n"
1336       "  return CreateMaybeMessage<$classname$>(nullptr);\n"
1337       "}\n"
1338       "\n"
1339       "$classname$* New(::$proto_ns$::Arena* arena) const$ new_final$ {\n"
1340       "  return CreateMaybeMessage<$classname$>(arena);\n"
1341       "}\n");
1342 
1343   // For instances that derive from Message (rather than MessageLite), some
1344   // methods are virtual and should be marked as final.
1345   format.Set("full_final", HasDescriptorMethods(descriptor_->file(), options_)
1346                                ? "final"
1347                                : "");
1348 
1349   if (HasGeneratedMethods(descriptor_->file(), options_)) {
1350     if (HasDescriptorMethods(descriptor_->file(), options_)) {
1351       format(
1352           "void CopyFrom(const ::$proto_ns$::Message& from) final;\n"
1353           "void MergeFrom(const ::$proto_ns$::Message& from) final;\n");
1354     } else {
1355       format(
1356           "void CheckTypeAndMergeFrom(const ::$proto_ns$::MessageLite& from)\n"
1357           "  final;\n");
1358     }
1359 
1360     format.Set("clear_final",
1361                ShouldMarkClearAsFinal(descriptor_, options_) ? "final" : "");
1362     format.Set(
1363         "is_initialized_final",
1364         ShouldMarkIsInitializedAsFinal(descriptor_, options_) ? "final" : "");
1365 
1366     format(
1367         "void CopyFrom(const $classname$& from);\n"
1368         "void MergeFrom(const $classname$& from);\n"
1369         "PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear()$ clear_final$;\n"
1370         "bool IsInitialized() const$ is_initialized_final$;\n"
1371         "\n"
1372         "size_t ByteSizeLong() const final;\n"
1373         "const char* _InternalParse(const char* ptr, "
1374         "::$proto_ns$::internal::ParseContext* ctx) final;\n"
1375         "$uint8$* _InternalSerialize(\n"
1376         "    $uint8$* target, ::$proto_ns$::io::EpsCopyOutputStream* stream) "
1377         "const final;\n");
1378 
1379     // DiscardUnknownFields() is implemented in message.cc using reflections. We
1380     // need to implement this function in generated code for messages.
1381     if (!UseUnknownFieldSet(descriptor_->file(), options_)) {
1382       format("void DiscardUnknownFields()$ full_final$;\n");
1383     }
1384   }
1385 
1386   format(
1387       "int GetCachedSize() const final { return _cached_size_.Get(); }"
1388       "\n\nprivate:\n"
1389       "inline void SharedCtor();\n"
1390       "inline void SharedDtor();\n"
1391       "void SetCachedSize(int size) const$ full_final$;\n"
1392       "void InternalSwap($classname$* other);\n");
1393 
1394   format(
1395       // Friend AnyMetadata so that it can call this FullMessageName() method.
1396       "friend class ::$proto_ns$::internal::AnyMetadata;\n"
1397       "static $1$ FullMessageName() {\n"
1398       "  return \"$full_name$\";\n"
1399       "}\n",
1400       options_.opensource_runtime ? "::PROTOBUF_NAMESPACE_ID::StringPiece"
1401                                   : "::StringPiece");
1402 
1403   if (SupportsArenas(descriptor_)) {
1404     format(
1405         // TODO(gerbens) Make this private! Currently people are deriving from
1406         // protos to give access to this constructor, breaking the invariants
1407         // we rely on.
1408         "protected:\n"
1409         "explicit $classname$(::$proto_ns$::Arena* arena);\n"
1410         "private:\n"
1411         "static void ArenaDtor(void* object);\n"
1412         "inline void RegisterArenaDtor(::$proto_ns$::Arena* arena);\n");
1413   }
1414 
1415   format(
1416       "public:\n"
1417       "\n");
1418 
1419   if (HasDescriptorMethods(descriptor_->file(), options_)) {
1420     format(
1421         "::$proto_ns$::Metadata GetMetadata() const final;\n"
1422         "private:\n"
1423         "static ::$proto_ns$::Metadata GetMetadataStatic() {\n"
1424         "  ::$proto_ns$::internal::AssignDescriptors(&::$desc_table$);\n"
1425         "  return ::$desc_table$.file_level_metadata[kIndexInFileMessages];\n"
1426         "}\n"
1427         "\n"
1428         "public:\n"
1429         "\n");
1430   } else {
1431     format(
1432         "std::string GetTypeName() const final;\n"
1433         "\n");
1434   }
1435 
1436   format(
1437       "// nested types ----------------------------------------------------\n"
1438       "\n");
1439 
1440   // Import all nested message classes into this class's scope with typedefs.
1441   for (int i = 0; i < descriptor_->nested_type_count(); i++) {
1442     const Descriptor* nested_type = descriptor_->nested_type(i);
1443     if (!IsMapEntryMessage(nested_type)) {
1444       format.Set("nested_full_name", ClassName(nested_type, false));
1445       format.Set("nested_name", ResolveKeyword(nested_type->name()));
1446       format("typedef ${1$$nested_full_name$$}$ ${1$$nested_name$$}$;\n",
1447              nested_type);
1448     }
1449   }
1450 
1451   if (descriptor_->nested_type_count() > 0) {
1452     format("\n");
1453   }
1454 
1455   // Import all nested enums and their values into this class's scope with
1456   // typedefs and constants.
1457   for (int i = 0; i < descriptor_->enum_type_count(); i++) {
1458     enum_generators_[i]->GenerateSymbolImports(printer);
1459     format("\n");
1460   }
1461 
1462   format(
1463       "// accessors -------------------------------------------------------\n"
1464       "\n");
1465 
1466   // Generate accessor methods for all fields.
1467   GenerateFieldAccessorDeclarations(printer);
1468 
1469   // Declare extension identifiers.
1470   for (int i = 0; i < descriptor_->extension_count(); i++) {
1471     extension_generators_[i]->GenerateDeclaration(printer);
1472   }
1473 
1474 
1475   format("// @@protoc_insertion_point(class_scope:$full_name$)\n");
1476 
1477   // Generate private members.
1478   format.Outdent();
1479   format(" private:\n");
1480   format.Indent();
1481   // TODO(seongkim): Remove hack to track field access and remove this class.
1482   format("class _Internal;\n");
1483 
1484   for (auto field : FieldRange(descriptor_)) {
1485     // set_has_***() generated in all oneofs.
1486     if (!field->is_repeated() && !field->options().weak() &&
1487         field->real_containing_oneof()) {
1488       format("void set_has_$1$();\n", FieldName(field));
1489     }
1490   }
1491   format("\n");
1492 
1493   // Generate oneof function declarations
1494   for (auto oneof : OneOfRange(descriptor_)) {
1495     format(
1496         "inline bool has_$1$() const;\n"
1497         "inline void clear_has_$1$();\n\n",
1498         oneof->name());
1499   }
1500 
1501   if (HasGeneratedMethods(descriptor_->file(), options_) &&
1502       !descriptor_->options().message_set_wire_format() &&
1503       num_required_fields_ > 1) {
1504     format(
1505         "// helper for ByteSizeLong()\n"
1506         "size_t RequiredFieldsByteSizeFallback() const;\n\n");
1507   }
1508 
1509   // Prepare decls for _cached_size_ and _has_bits_.  Their position in the
1510   // output will be determined later.
1511 
1512   bool need_to_emit_cached_size = true;
1513   const std::string cached_size_decl =
1514       "mutable ::$proto_ns$::internal::CachedSize _cached_size_;\n";
1515 
1516   const size_t sizeof_has_bits = HasBitsSize();
1517   const std::string has_bits_decl =
1518       sizeof_has_bits == 0
1519           ? ""
1520           : StrCat("::$proto_ns$::internal::HasBits<",
1521                          sizeof_has_bits / 4, "> _has_bits_;\n");
1522 
1523   // To minimize padding, data members are divided into three sections:
1524   // (1) members assumed to align to 8 bytes
1525   // (2) members corresponding to message fields, re-ordered to optimize
1526   //     alignment.
1527   // (3) members assumed to align to 4 bytes.
1528 
1529   // Members assumed to align to 8 bytes:
1530 
1531   if (descriptor_->extension_range_count() > 0) {
1532     format(
1533         "::$proto_ns$::internal::ExtensionSet _extensions_;\n"
1534         "\n");
1535   }
1536 
1537   if (SupportsArenas(descriptor_)) {
1538     format(
1539         "template <typename T> friend class "
1540         "::$proto_ns$::Arena::InternalHelper;\n"
1541         "typedef void InternalArenaConstructable_;\n"
1542         "typedef void DestructorSkippable_;\n");
1543   }
1544 
1545   if (!has_bit_indices_.empty()) {
1546     // _has_bits_ is frequently accessed, so to reduce code size and improve
1547     // speed, it should be close to the start of the object. Placing
1548     // _cached_size_ together with _has_bits_ improves cache locality despite
1549     // potential alignment padding.
1550     format(has_bits_decl.c_str());
1551     format(cached_size_decl.c_str());
1552     need_to_emit_cached_size = false;
1553   }
1554 
1555   // Field members:
1556 
1557   // Emit some private and static members
1558   for (auto field : optimized_order_) {
1559     const FieldGenerator& generator = field_generators_.get(field);
1560     generator.GenerateStaticMembers(printer);
1561     generator.GeneratePrivateMembers(printer);
1562   }
1563 
1564   // For each oneof generate a union
1565   for (auto oneof : OneOfRange(descriptor_)) {
1566     std::string camel_oneof_name = UnderscoresToCamelCase(oneof->name(), true);
1567     format(
1568         "union $1$Union {\n"
1569         // explicit empty constructor is needed when union contains
1570         // ArenaStringPtr members for string fields.
1571         "  $1$Union() {}\n",
1572         camel_oneof_name);
1573     format.Indent();
1574     for (auto field : FieldRange(oneof)) {
1575       if (IsFieldUsed(field, options_)) {
1576         field_generators_.get(field).GeneratePrivateMembers(printer);
1577       }
1578     }
1579     format.Outdent();
1580     format("} $1$_;\n", oneof->name());
1581     for (auto field : FieldRange(oneof)) {
1582       if (IsFieldUsed(field, options_)) {
1583         field_generators_.get(field).GenerateStaticMembers(printer);
1584       }
1585     }
1586   }
1587 
1588   // Members assumed to align to 4 bytes:
1589 
1590   if (need_to_emit_cached_size) {
1591     format(cached_size_decl.c_str());
1592     need_to_emit_cached_size = false;
1593   }
1594 
1595   // Generate _oneof_case_.
1596   if (descriptor_->real_oneof_decl_count() > 0) {
1597     format(
1598         "$uint32$ _oneof_case_[$1$];\n"
1599         "\n",
1600         descriptor_->real_oneof_decl_count());
1601   }
1602 
1603   if (num_weak_fields_) {
1604     format("::$proto_ns$::internal::WeakFieldMap _weak_field_map_;\n");
1605   }
1606   // Generate _any_metadata_ for the Any type.
1607   if (IsAnyMessage(descriptor_, options_)) {
1608     format("::$proto_ns$::internal::AnyMetadata _any_metadata_;\n");
1609   }
1610 
1611   // The TableStruct struct needs access to the private parts, in order to
1612   // construct the offsets of all members.
1613   format("friend struct ::$tablename$;\n");
1614 
1615   format.Outdent();
1616   format("};");
1617   GOOGLE_DCHECK(!need_to_emit_cached_size);
1618 }  // NOLINT(readability/fn_size)
1619 
GenerateInlineMethods(io::Printer * printer)1620 void MessageGenerator::GenerateInlineMethods(io::Printer* printer) {
1621   if (IsMapEntryMessage(descriptor_)) return;
1622   GenerateFieldAccessorDefinitions(printer);
1623 
1624   // Generate oneof_case() functions.
1625   for (auto oneof : OneOfRange(descriptor_)) {
1626     Formatter format(printer, variables_);
1627     format.Set("camel_oneof_name", UnderscoresToCamelCase(oneof->name(), true));
1628     format.Set("oneof_name", oneof->name());
1629     format.Set("oneof_index", oneof->index());
1630     format(
1631         "inline $classname$::$camel_oneof_name$Case $classname$::"
1632         "${1$$oneof_name$_case$}$() const {\n"
1633         "  return $classname$::$camel_oneof_name$Case("
1634         "_oneof_case_[$oneof_index$]);\n"
1635         "}\n",
1636         oneof);
1637   }
1638 }
1639 
GenerateExtraDefaultFields(io::Printer * printer)1640 void MessageGenerator::GenerateExtraDefaultFields(io::Printer* printer) {
1641   // Generate oneof default instance and weak field instances for reflection
1642   // usage.
1643   Formatter format(printer, variables_);
1644   for (auto oneof : OneOfRange(descriptor_)) {
1645     for (auto field : FieldRange(oneof)) {
1646       if (!IsFieldUsed(field, options_)) {
1647         continue;
1648       }
1649       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
1650           (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
1651            EffectiveStringCType(field, options_) != FieldOptions::STRING)) {
1652         format("const ");
1653       }
1654       field_generators_.get(field).GeneratePrivateMembers(printer);
1655     }
1656   }
1657   for (auto field : FieldRange(descriptor_)) {
1658     if (field->options().weak() && IsFieldUsed(field, options_)) {
1659       format("  const ::$proto_ns$::Message* $1$_;\n", FieldName(field));
1660     }
1661   }
1662 }
1663 
GenerateParseTable(io::Printer * printer,size_t offset,size_t aux_offset)1664 bool MessageGenerator::GenerateParseTable(io::Printer* printer, size_t offset,
1665                                           size_t aux_offset) {
1666   Formatter format(printer, variables_);
1667 
1668   if (!table_driven_) {
1669     format("{ nullptr, nullptr, 0, -1, -1, -1, -1, nullptr, false },\n");
1670     return false;
1671   }
1672 
1673   int max_field_number = 0;
1674   for (auto field : FieldRange(descriptor_)) {
1675     if (max_field_number < field->number()) {
1676       max_field_number = field->number();
1677     }
1678   }
1679 
1680   format("{\n");
1681   format.Indent();
1682 
1683   format(
1684       "$tablename$::entries + $1$,\n"
1685       "$tablename$::aux + $2$,\n"
1686       "$3$,\n",
1687       offset, aux_offset, max_field_number);
1688 
1689   if (has_bit_indices_.empty()) {
1690     // If no fields have hasbits, then _has_bits_ does not exist.
1691     format("-1,\n");
1692   } else {
1693     format("PROTOBUF_FIELD_OFFSET($classtype$, _has_bits_),\n");
1694   }
1695 
1696   if (descriptor_->real_oneof_decl_count() > 0) {
1697     format("PROTOBUF_FIELD_OFFSET($classtype$, _oneof_case_),\n");
1698   } else {
1699     format("-1,  // no _oneof_case_\n");
1700   }
1701 
1702   if (descriptor_->extension_range_count() > 0) {
1703     format("PROTOBUF_FIELD_OFFSET($classtype$, _extensions_),\n");
1704   } else {
1705     format("-1,  // no _extensions_\n");
1706   }
1707 
1708   // TODO(ckennelly): Consolidate this with the calculation for
1709   // AuxiliaryParseTableField.
1710   format(
1711       "PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_),\n"
1712       "&$package_ns$::_$classname$_default_instance_,\n");
1713 
1714   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
1715     format("true,\n");
1716   } else {
1717     format("false,\n");
1718   }
1719 
1720   format.Outdent();
1721   format("},\n");
1722   return true;
1723 }
1724 
GenerateSchema(io::Printer * printer,int offset,int has_offset)1725 void MessageGenerator::GenerateSchema(io::Printer* printer, int offset,
1726                                       int has_offset) {
1727   Formatter format(printer, variables_);
1728   has_offset = !has_bit_indices_.empty() || IsMapEntryMessage(descriptor_)
1729                    ? offset + has_offset
1730                    : -1;
1731 
1732   format("{ $1$, $2$, sizeof($classtype$)},\n", offset, has_offset);
1733 }
1734 
1735 namespace {
1736 
1737 // We need to calculate for each field what function the table driven code
1738 // should use to serialize it. This returns the index in a lookup table.
CalcFieldNum(const FieldGenerator & generator,const FieldDescriptor * field,const Options & options)1739 uint32 CalcFieldNum(const FieldGenerator& generator,
1740                     const FieldDescriptor* field, const Options& options) {
1741   bool is_a_map = IsMapEntryMessage(field->containing_type());
1742   int type = field->type();
1743   if (type == FieldDescriptor::TYPE_STRING ||
1744       type == FieldDescriptor::TYPE_BYTES) {
1745     if (generator.IsInlined()) {
1746       type = internal::FieldMetadata::kInlinedType;
1747     }
1748     // string field
1749     if (IsCord(field, options)) {
1750       type = internal::FieldMetadata::kCordType;
1751     } else if (IsStringPiece(field, options)) {
1752       type = internal::FieldMetadata::kStringPieceType;
1753     }
1754   }
1755 
1756   if (field->real_containing_oneof()) {
1757     return internal::FieldMetadata::CalculateType(
1758         type, internal::FieldMetadata::kOneOf);
1759   } else if (field->is_packed()) {
1760     return internal::FieldMetadata::CalculateType(
1761         type, internal::FieldMetadata::kPacked);
1762   } else if (field->is_repeated()) {
1763     return internal::FieldMetadata::CalculateType(
1764         type, internal::FieldMetadata::kRepeated);
1765   } else if (HasHasbit(field) || field->real_containing_oneof() || is_a_map) {
1766     return internal::FieldMetadata::CalculateType(
1767         type, internal::FieldMetadata::kPresence);
1768   } else {
1769     return internal::FieldMetadata::CalculateType(
1770         type, internal::FieldMetadata::kNoPresence);
1771   }
1772 }
1773 
FindMessageIndexInFile(const Descriptor * descriptor)1774 int FindMessageIndexInFile(const Descriptor* descriptor) {
1775   std::vector<const Descriptor*> flatten =
1776       FlattenMessagesInFile(descriptor->file());
1777   return std::find(flatten.begin(), flatten.end(), descriptor) -
1778          flatten.begin();
1779 }
1780 
1781 }  // namespace
1782 
GenerateFieldMetadata(io::Printer * printer)1783 int MessageGenerator::GenerateFieldMetadata(io::Printer* printer) {
1784   Formatter format(printer, variables_);
1785   if (!options_.table_driven_serialization) {
1786     return 0;
1787   }
1788 
1789   std::vector<const FieldDescriptor*> sorted = SortFieldsByNumber(descriptor_);
1790   if (IsMapEntryMessage(descriptor_)) {
1791     for (int i = 0; i < 2; i++) {
1792       const FieldDescriptor* field = sorted[i];
1793       const FieldGenerator& generator = field_generators_.get(field);
1794 
1795       uint32 tag = internal::WireFormatLite::MakeTag(
1796           field->number(), WireFormat::WireTypeForFieldType(field->type()));
1797 
1798       std::map<std::string, std::string> vars;
1799       vars["classtype"] = QualifiedClassName(descriptor_, options_);
1800       vars["field_name"] = FieldName(field);
1801       vars["tag"] = StrCat(tag);
1802       vars["hasbit"] = StrCat(i);
1803       vars["type"] = StrCat(CalcFieldNum(generator, field, options_));
1804       vars["ptr"] = "nullptr";
1805       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1806         GOOGLE_CHECK(!IsMapEntryMessage(field->message_type()));
1807         vars["ptr"] =
1808             "::" + UniqueName("TableStruct", field->message_type(), options_) +
1809             "::serialization_table + " +
1810             StrCat(FindMessageIndexInFile(field->message_type()));
1811       }
1812       Formatter::SaveState saver(&format);
1813       format.AddMap(vars);
1814       format(
1815           "{PROTOBUF_FIELD_OFFSET("
1816           "::$proto_ns$::internal::MapEntryHelper<$classtype$::"
1817           "SuperType>, $field_name$_), $tag$,"
1818           "PROTOBUF_FIELD_OFFSET("
1819           "::$proto_ns$::internal::MapEntryHelper<$classtype$::"
1820           "SuperType>, _has_bits_) * 8 + $hasbit$, $type$, "
1821           "$ptr$},\n");
1822     }
1823     return 2;
1824   }
1825   format(
1826       "{PROTOBUF_FIELD_OFFSET($classtype$, _cached_size_),"
1827       " 0, 0, 0, nullptr},\n");
1828   std::vector<const Descriptor::ExtensionRange*> sorted_extensions;
1829   sorted_extensions.reserve(descriptor_->extension_range_count());
1830   for (int i = 0; i < descriptor_->extension_range_count(); ++i) {
1831     sorted_extensions.push_back(descriptor_->extension_range(i));
1832   }
1833   std::sort(sorted_extensions.begin(), sorted_extensions.end(),
1834             ExtensionRangeSorter());
1835   for (int i = 0, extension_idx = 0; /* no range */; i++) {
1836     for (; extension_idx < sorted_extensions.size() &&
1837            (i == sorted.size() ||
1838             sorted_extensions[extension_idx]->start < sorted[i]->number());
1839          extension_idx++) {
1840       const Descriptor::ExtensionRange* range =
1841           sorted_extensions[extension_idx];
1842       format(
1843           "{PROTOBUF_FIELD_OFFSET($classtype$, _extensions_), "
1844           "$1$, $2$, ::$proto_ns$::internal::FieldMetadata::kSpecial, "
1845           "reinterpret_cast<const "
1846           "void*>(::$proto_ns$::internal::ExtensionSerializer)},\n",
1847           range->start, range->end);
1848     }
1849     if (i == sorted.size()) break;
1850     const FieldDescriptor* field = sorted[i];
1851 
1852     uint32 tag = internal::WireFormatLite::MakeTag(
1853         field->number(), WireFormat::WireTypeForFieldType(field->type()));
1854     if (field->is_packed()) {
1855       tag = internal::WireFormatLite::MakeTag(
1856           field->number(), WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1857     }
1858 
1859     std::string classfieldname = FieldName(field);
1860     if (field->real_containing_oneof()) {
1861       classfieldname = field->containing_oneof()->name();
1862     }
1863     format.Set("field_name", classfieldname);
1864     std::string ptr = "nullptr";
1865     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1866       if (IsMapEntryMessage(field->message_type())) {
1867         format(
1868             "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), $1$, $2$, "
1869             "::$proto_ns$::internal::FieldMetadata::kSpecial, "
1870             "reinterpret_cast<const void*>(static_cast< "
1871             "::$proto_ns$::internal::SpecialSerializer>("
1872             "::$proto_ns$::internal::MapFieldSerializer< "
1873             "::$proto_ns$::internal::MapEntryToMapField<"
1874             "$3$>::MapFieldType, "
1875             "$tablename$::serialization_table>))},\n",
1876             tag, FindMessageIndexInFile(field->message_type()),
1877             QualifiedClassName(field->message_type(), options_));
1878         continue;
1879       } else if (!field->message_type()->options().message_set_wire_format()) {
1880         // message_set doesn't have the usual table and we need to
1881         // dispatch to generated serializer, hence ptr stays zero.
1882         ptr =
1883             "::" + UniqueName("TableStruct", field->message_type(), options_) +
1884             "::serialization_table + " +
1885             StrCat(FindMessageIndexInFile(field->message_type()));
1886       }
1887     }
1888 
1889     const FieldGenerator& generator = field_generators_.get(field);
1890     int type = CalcFieldNum(generator, field, options_);
1891 
1892     if (IsLazy(field, options_)) {
1893       type = internal::FieldMetadata::kSpecial;
1894       ptr = "reinterpret_cast<const void*>(::" + variables_["proto_ns"] +
1895             "::internal::LazyFieldSerializer";
1896       if (field->real_containing_oneof()) {
1897         ptr += "OneOf";
1898       } else if (!HasHasbit(field)) {
1899         ptr += "NoPresence";
1900       }
1901       ptr += ")";
1902     }
1903 
1904     if (field->options().weak()) {
1905       // TODO(gerbens) merge weak fields into ranges
1906       format(
1907           "{PROTOBUF_FIELD_OFFSET("
1908           "$classtype$, _weak_field_map_), $1$, $1$, "
1909           "::$proto_ns$::internal::FieldMetadata::kSpecial, "
1910           "reinterpret_cast<const "
1911           "void*>(::$proto_ns$::internal::WeakFieldSerializer)},\n",
1912           tag);
1913     } else if (field->real_containing_oneof()) {
1914       format.Set("oneofoffset",
1915                  sizeof(uint32) * field->containing_oneof()->index());
1916       format(
1917           "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), $1$,"
1918           " PROTOBUF_FIELD_OFFSET($classtype$, _oneof_case_) + "
1919           "$oneofoffset$, $2$, $3$},\n",
1920           tag, type, ptr);
1921     } else if (HasHasbit(field)) {
1922       format.Set("hasbitsoffset", has_bit_indices_[field->index()]);
1923       format(
1924           "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), "
1925           "$1$, PROTOBUF_FIELD_OFFSET($classtype$, _has_bits_) * 8 + "
1926           "$hasbitsoffset$, $2$, $3$},\n",
1927           tag, type, ptr);
1928     } else {
1929       format(
1930           "{PROTOBUF_FIELD_OFFSET($classtype$, $field_name$_), "
1931           "$1$, ~0u, $2$, $3$},\n",
1932           tag, type, ptr);
1933     }
1934   }
1935   int num_field_metadata = 1 + sorted.size() + sorted_extensions.size();
1936   num_field_metadata++;
1937   std::string serializer = UseUnknownFieldSet(descriptor_->file(), options_)
1938                                ? "UnknownFieldSetSerializer"
1939                                : "UnknownFieldSerializerLite";
1940   format(
1941       "{PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_), 0, ~0u, "
1942       "::$proto_ns$::internal::FieldMetadata::kSpecial, reinterpret_cast<const "
1943       "void*>(::$proto_ns$::internal::$1$)},\n",
1944       serializer);
1945   return num_field_metadata;
1946 }
1947 
GenerateFieldDefaultInstances(io::Printer * printer)1948 void MessageGenerator::GenerateFieldDefaultInstances(io::Printer* printer) {
1949   // Construct the default instances for all fields that need one.
1950   for (auto field : FieldRange(descriptor_)) {
1951     field_generators_.get(field).GenerateDefaultInstanceAllocator(printer);
1952   }
1953 }
1954 
GenerateDefaultInstanceInitializer(io::Printer * printer)1955 void MessageGenerator::GenerateDefaultInstanceInitializer(
1956     io::Printer* printer) {
1957   Formatter format(printer, variables_);
1958 
1959   // The default instance needs all of its embedded message pointers
1960   // cross-linked to other default instances.  We can't do this initialization
1961   // in the constructor because some other default instances may not have been
1962   // constructed yet at that time.
1963   // TODO(kenton):  Maybe all message fields (even for non-default messages)
1964   //   should be initialized to point at default instances rather than NULL?
1965   for (auto field : FieldRange(descriptor_)) {
1966     if (!IsFieldUsed(field, options_)) {
1967       continue;
1968     }
1969     Formatter::SaveState saver(&format);
1970 
1971     if (!field->is_repeated() && !IsLazy(field, options_) &&
1972         field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1973         (!field->real_containing_oneof() ||
1974          HasDescriptorMethods(descriptor_->file(), options_))) {
1975       std::string name;
1976       if (field->real_containing_oneof() || field->options().weak()) {
1977         name = "_" + classname_ + "_default_instance_.";
1978       } else {
1979         name =
1980             "_" + classname_ + "_default_instance_._instance.get_mutable()->";
1981       }
1982       name += FieldName(field);
1983       format.Set("name", name);
1984       if (IsWeak(field, options_)) {
1985         format(
1986             "$package_ns$::$name$_ = reinterpret_cast<const "
1987             "::$proto_ns$::Message*>(&$1$);\n"
1988             "if ($package_ns$::$name$_ == nullptr) {\n"
1989             "  $package_ns$::$name$_ = "
1990             "::$proto_ns$::Empty::internal_default_instance();\n"
1991             "}\n",
1992             QualifiedDefaultInstanceName(field->message_type(),
1993                                          options_));  // 1
1994         continue;
1995       }
1996       if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1997         format(
1998             "$package_ns$::$name$_ = reinterpret_cast<$1$*>(\n"
1999             "    $2$);\n",
2000             FieldMessageTypeName(field, options_),
2001             QualifiedDefaultInstancePtr(field->message_type(), options_));
2002       } else {
2003         format(
2004             "$package_ns$::$name$_ = const_cast< $1$*>(\n"
2005             "    $1$::internal_default_instance());\n",
2006             FieldMessageTypeName(field, options_));
2007       }
2008     } else if (field->real_containing_oneof() &&
2009                HasDescriptorMethods(descriptor_->file(), options_)) {
2010       field_generators_.get(field).GenerateConstructorCode(printer);
2011     }
2012   }
2013 }
2014 
GenerateClassMethods(io::Printer * printer)2015 void MessageGenerator::GenerateClassMethods(io::Printer* printer) {
2016   Formatter format(printer, variables_);
2017   if (IsMapEntryMessage(descriptor_)) {
2018     format(
2019         "$classname$::$classname$() {}\n"
2020         "$classname$::$classname$(::$proto_ns$::Arena* arena)\n"
2021         "    : SuperType(arena) {}\n"
2022         "void $classname$::MergeFrom(const $classname$& other) {\n"
2023         "  MergeFromInternal(other);\n"
2024         "}\n");
2025     if (HasDescriptorMethods(descriptor_->file(), options_)) {
2026       format(
2027           "::$proto_ns$::Metadata $classname$::GetMetadata() const {\n"
2028           "  return GetMetadataStatic();\n"
2029           "}\n");
2030       format(
2031           "void $classname$::MergeFrom(\n"
2032           "    const ::$proto_ns$::Message& other) {\n"
2033           "  ::$proto_ns$::Message::MergeFrom(other);\n"
2034           "}\n"
2035           "\n");
2036     }
2037     return;
2038   }
2039 
2040   // TODO(gerbens) Remove this function. With a little bit of cleanup and
2041   // refactoring this is superfluous.
2042   format("void $classname$::InitAsDefaultInstance() {\n");
2043   format.Indent();
2044   GenerateDefaultInstanceInitializer(printer);
2045   format.Outdent();
2046   format("}\n");
2047 
2048   if (IsAnyMessage(descriptor_, options_)) {
2049     if (HasDescriptorMethods(descriptor_->file(), options_)) {
2050       format(
2051           "bool $classname$::GetAnyFieldDescriptors(\n"
2052           "    const ::$proto_ns$::Message& message,\n"
2053           "    const ::$proto_ns$::FieldDescriptor** type_url_field,\n"
2054           "    const ::$proto_ns$::FieldDescriptor** value_field) {\n"
2055           "  return ::$proto_ns$::internal::GetAnyFieldDescriptors(\n"
2056           "      message, type_url_field, value_field);\n"
2057           "}\n");
2058     }
2059     format(
2060         "bool $classname$::ParseAnyTypeUrl(const string& type_url,\n"
2061         "                                  std::string* full_type_name) {\n"
2062         "  return ::$proto_ns$::internal::ParseAnyTypeUrl(type_url,\n"
2063         "                                             full_type_name);\n"
2064         "}\n"
2065         "\n");
2066   }
2067 
2068   format(
2069       "class $classname$::_Internal {\n"
2070       " public:\n");
2071   format.Indent();
2072   if (!has_bit_indices_.empty()) {
2073     format(
2074         "using HasBits = decltype(std::declval<$classname$>()._has_bits_);\n");
2075   }
2076   for (auto field : FieldRange(descriptor_)) {
2077     field_generators_.get(field).GenerateInternalAccessorDeclarations(printer);
2078     if (!IsFieldUsed(field, options_)) {
2079       continue;
2080     }
2081     if (HasHasbit(field)) {
2082       int has_bit_index = HasBitIndex(field);
2083       GOOGLE_CHECK_NE(has_bit_index, kNoHasbit) << field->full_name();
2084       format(
2085           "static void set_has_$1$(HasBits* has_bits) {\n"
2086           "  (*has_bits)[$2$] |= $3$u;\n"
2087           "}\n",
2088           FieldName(field), has_bit_index / 32, (1u << (has_bit_index % 32)));
2089     }
2090   }
2091   if (num_required_fields_ > 0) {
2092     const std::vector<uint32> masks_for_has_bits = RequiredFieldsBitMask();
2093     format(
2094         "static bool MissingRequiredFields(const HasBits& has_bits) "
2095         "{\n"
2096         "  return $1$;\n"
2097         "}\n",
2098         ConditionalToCheckBitmasks(masks_for_has_bits, false, "has_bits"));
2099   }
2100 
2101   format.Outdent();
2102   format("};\n\n");
2103   for (auto field : FieldRange(descriptor_)) {
2104     if (IsFieldUsed(field, options_)) {
2105       field_generators_.get(field).GenerateInternalAccessorDefinitions(printer);
2106     }
2107   }
2108 
2109   // Generate non-inline field definitions.
2110   for (auto field : FieldRange(descriptor_)) {
2111     if (!IsFieldUsed(field, options_)) {
2112       continue;
2113     }
2114     field_generators_.get(field).GenerateNonInlineAccessorDefinitions(printer);
2115     if (IsCrossFileMaybeMap(field)) {
2116       Formatter::SaveState saver(&format);
2117       std::map<std::string, std::string> vars;
2118       SetCommonFieldVariables(field, &vars, options_);
2119       if (field->real_containing_oneof()) {
2120         SetCommonOneofFieldVariables(field, &vars);
2121       }
2122       format.AddMap(vars);
2123       GenerateFieldClear(field, false, format);
2124     }
2125   }
2126 
2127   GenerateStructors(printer);
2128   format("\n");
2129 
2130   if (descriptor_->real_oneof_decl_count() > 0) {
2131     GenerateOneofClear(printer);
2132     format("\n");
2133   }
2134 
2135   if (HasGeneratedMethods(descriptor_->file(), options_)) {
2136     GenerateClear(printer);
2137     format("\n");
2138 
2139     GenerateMergeFromCodedStream(printer);
2140     format("\n");
2141 
2142     GenerateSerializeWithCachedSizesToArray(printer);
2143     format("\n");
2144 
2145     GenerateByteSize(printer);
2146     format("\n");
2147 
2148     GenerateMergeFrom(printer);
2149     format("\n");
2150 
2151     GenerateClassSpecificMergeFrom(printer);
2152     format("\n");
2153 
2154     GenerateCopyFrom(printer);
2155     format("\n");
2156 
2157     GenerateIsInitialized(printer);
2158     format("\n");
2159   }
2160 
2161   GenerateSwap(printer);
2162   format("\n");
2163 
2164   if (options_.table_driven_serialization) {
2165     format(
2166         "const void* $classname$::InternalGetTable() const {\n"
2167         "  return ::$tablename$::serialization_table + $1$;\n"
2168         "}\n"
2169         "\n",
2170         index_in_file_messages_);
2171   }
2172   if (HasDescriptorMethods(descriptor_->file(), options_)) {
2173     format(
2174         "::$proto_ns$::Metadata $classname$::GetMetadata() const {\n"
2175         "  return GetMetadataStatic();\n"
2176         "}\n"
2177         "\n");
2178   } else {
2179     format(
2180         "std::string $classname$::GetTypeName() const {\n"
2181         "  return \"$full_name$\";\n"
2182         "}\n"
2183         "\n");
2184   }
2185 
2186 }
2187 
GenerateParseOffsets(io::Printer * printer)2188 size_t MessageGenerator::GenerateParseOffsets(io::Printer* printer) {
2189   Formatter format(printer, variables_);
2190 
2191   if (!table_driven_) {
2192     return 0;
2193   }
2194 
2195   // Field "0" is special:  We use it in our switch statement of processing
2196   // types to handle the successful end tag case.
2197   format("{0, 0, 0, ::$proto_ns$::internal::kInvalidMask, 0, 0},\n");
2198   int last_field_number = 1;
2199 
2200   std::vector<const FieldDescriptor*> ordered_fields =
2201       SortFieldsByNumber(descriptor_);
2202 
2203   for (auto field : ordered_fields) {
2204     Formatter::SaveState saver(&format);
2205     GOOGLE_CHECK_GE(field->number(), last_field_number);
2206 
2207     for (; last_field_number < field->number(); last_field_number++) {
2208       format(
2209           "{ 0, 0, ::$proto_ns$::internal::kInvalidMask,\n"
2210           "  ::$proto_ns$::internal::kInvalidMask, 0, 0 },\n");
2211     }
2212     last_field_number++;
2213 
2214     unsigned char normal_wiretype, packed_wiretype, processing_type;
2215     normal_wiretype = WireFormat::WireTypeForFieldType(field->type());
2216 
2217     if (field->is_packable()) {
2218       packed_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
2219     } else {
2220       packed_wiretype = internal::kNotPackedMask;
2221     }
2222 
2223     processing_type = static_cast<unsigned>(field->type());
2224     const FieldGenerator& generator = field_generators_.get(field);
2225     if (field->type() == FieldDescriptor::TYPE_STRING) {
2226       switch (EffectiveStringCType(field, options_)) {
2227         case FieldOptions::STRING:
2228           if (generator.IsInlined()) {
2229             processing_type = internal::TYPE_STRING_INLINED;
2230             break;
2231           }
2232           break;
2233         case FieldOptions::CORD:
2234           processing_type = internal::TYPE_STRING_CORD;
2235           break;
2236         case FieldOptions::STRING_PIECE:
2237           processing_type = internal::TYPE_STRING_STRING_PIECE;
2238           break;
2239       }
2240     } else if (field->type() == FieldDescriptor::TYPE_BYTES) {
2241       switch (EffectiveStringCType(field, options_)) {
2242         case FieldOptions::STRING:
2243           if (generator.IsInlined()) {
2244             processing_type = internal::TYPE_BYTES_INLINED;
2245             break;
2246           }
2247           break;
2248         case FieldOptions::CORD:
2249           processing_type = internal::TYPE_BYTES_CORD;
2250           break;
2251         case FieldOptions::STRING_PIECE:
2252           processing_type = internal::TYPE_BYTES_STRING_PIECE;
2253           break;
2254       }
2255     }
2256 
2257     processing_type |= static_cast<unsigned>(
2258         field->is_repeated() ? internal::kRepeatedMask : 0);
2259     processing_type |= static_cast<unsigned>(
2260         field->real_containing_oneof() ? internal::kOneofMask : 0);
2261 
2262     if (field->is_map()) {
2263       processing_type = internal::TYPE_MAP;
2264     }
2265 
2266     const unsigned char tag_size =
2267         WireFormat::TagSize(field->number(), field->type());
2268 
2269     std::map<std::string, std::string> vars;
2270     if (field->real_containing_oneof()) {
2271       vars["name"] = field->containing_oneof()->name();
2272       vars["presence"] = StrCat(field->containing_oneof()->index());
2273     } else {
2274       vars["name"] = FieldName(field);
2275       vars["presence"] = StrCat(has_bit_indices_[field->index()]);
2276     }
2277     vars["nwtype"] = StrCat(normal_wiretype);
2278     vars["pwtype"] = StrCat(packed_wiretype);
2279     vars["ptype"] = StrCat(processing_type);
2280     vars["tag_size"] = StrCat(tag_size);
2281 
2282     format.AddMap(vars);
2283 
2284     format(
2285         "{\n"
2286         "  PROTOBUF_FIELD_OFFSET($classtype$, $name$_),\n"
2287         "  static_cast<$uint32$>($presence$),\n"
2288         "  $nwtype$, $pwtype$, $ptype$, $tag_size$\n"
2289         "},\n");
2290   }
2291 
2292   return last_field_number;
2293 }
2294 
GenerateParseAuxTable(io::Printer * printer)2295 size_t MessageGenerator::GenerateParseAuxTable(io::Printer* printer) {
2296   Formatter format(printer, variables_);
2297 
2298   if (!table_driven_) {
2299     return 0;
2300   }
2301 
2302   std::vector<const FieldDescriptor*> ordered_fields =
2303       SortFieldsByNumber(descriptor_);
2304 
2305   format("::$proto_ns$::internal::AuxiliaryParseTableField(),\n");
2306   int last_field_number = 1;
2307   for (auto field : ordered_fields) {
2308     Formatter::SaveState saver(&format);
2309 
2310     GOOGLE_CHECK_GE(field->number(), last_field_number);
2311     for (; last_field_number < field->number(); last_field_number++) {
2312       format("::$proto_ns$::internal::AuxiliaryParseTableField(),\n");
2313     }
2314 
2315     std::map<std::string, std::string> vars;
2316     SetCommonFieldVariables(field, &vars, options_);
2317     format.AddMap(vars);
2318 
2319     switch (field->cpp_type()) {
2320       case FieldDescriptor::CPPTYPE_ENUM:
2321         if (HasPreservingUnknownEnumSemantics(field)) {
2322           format(
2323               "{::$proto_ns$::internal::AuxiliaryParseTableField::enum_aux{"
2324               "nullptr}},\n");
2325         } else {
2326           format(
2327               "{::$proto_ns$::internal::AuxiliaryParseTableField::enum_aux{"
2328               "$1$_IsValid}},\n",
2329               ClassName(field->enum_type(), true));
2330         }
2331         last_field_number++;
2332         break;
2333       case FieldDescriptor::CPPTYPE_MESSAGE: {
2334         if (field->is_map()) {
2335           format(
2336               "{::$proto_ns$::internal::AuxiliaryParseTableField::map_"
2337               "aux{&::$proto_ns$::internal::ParseMap<$1$>}},\n",
2338               QualifiedClassName(field->message_type(), options_));
2339           last_field_number++;
2340           break;
2341         }
2342         format.Set("field_classname", ClassName(field->message_type(), false));
2343         format.Set("default_instance", QualifiedDefaultInstanceName(
2344                                            field->message_type(), options_));
2345 
2346         format(
2347             "{::$proto_ns$::internal::AuxiliaryParseTableField::message_aux{\n"
2348             "  &$default_instance$}},\n");
2349         last_field_number++;
2350         break;
2351       }
2352       case FieldDescriptor::CPPTYPE_STRING: {
2353         std::string default_val;
2354         switch (EffectiveStringCType(field, options_)) {
2355           case FieldOptions::STRING:
2356             default_val = field->default_value_string().empty()
2357                               ? "&::" + variables_["proto_ns"] +
2358                                     "::internal::fixed_address_empty_string"
2359                               : "&" +
2360                                     QualifiedClassName(descriptor_, options_) +
2361                                     "::" + MakeDefaultName(field);
2362             break;
2363           case FieldOptions::CORD:
2364           case FieldOptions::STRING_PIECE:
2365             default_val =
2366                 "\"" + CEscape(field->default_value_string()) + "\"";
2367             break;
2368         }
2369         format(
2370             "{::$proto_ns$::internal::AuxiliaryParseTableField::string_aux{\n"
2371             "  $1$,\n"
2372             "  \"$2$\"\n"
2373             "}},\n",
2374             default_val, field->full_name());
2375         last_field_number++;
2376         break;
2377       }
2378       default:
2379         break;
2380     }
2381   }
2382 
2383   return last_field_number;
2384 }
2385 
GenerateOffsets(io::Printer * printer)2386 std::pair<size_t, size_t> MessageGenerator::GenerateOffsets(
2387     io::Printer* printer) {
2388   Formatter format(printer, variables_);
2389 
2390   if (!has_bit_indices_.empty() || IsMapEntryMessage(descriptor_)) {
2391     format("PROTOBUF_FIELD_OFFSET($classtype$, _has_bits_),\n");
2392   } else {
2393     format("~0u,  // no _has_bits_\n");
2394   }
2395   format("PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_),\n");
2396   if (descriptor_->extension_range_count() > 0) {
2397     format("PROTOBUF_FIELD_OFFSET($classtype$, _extensions_),\n");
2398   } else {
2399     format("~0u,  // no _extensions_\n");
2400   }
2401   if (descriptor_->real_oneof_decl_count() > 0) {
2402     format("PROTOBUF_FIELD_OFFSET($classtype$, _oneof_case_[0]),\n");
2403   } else {
2404     format("~0u,  // no _oneof_case_\n");
2405   }
2406   if (num_weak_fields_ > 0) {
2407     format("PROTOBUF_FIELD_OFFSET($classtype$, _weak_field_map_),\n");
2408   } else {
2409     format("~0u,  // no _weak_field_map_\n");
2410   }
2411   const int kNumGenericOffsets = 5;  // the number of fixed offsets above
2412   const size_t offsets = kNumGenericOffsets + descriptor_->field_count() +
2413                          descriptor_->real_oneof_decl_count();
2414   size_t entries = offsets;
2415   for (auto field : FieldRange(descriptor_)) {
2416     if (!IsFieldUsed(field, options_)) {
2417       format("~0u,  // stripped\n");
2418       continue;
2419     }
2420     if (field->real_containing_oneof() || field->options().weak()) {
2421       format("offsetof($classtype$DefaultTypeInternal, $1$_)",
2422              FieldName(field));
2423     } else {
2424       format("PROTOBUF_FIELD_OFFSET($classtype$, $1$_)", FieldName(field));
2425     }
2426 
2427     uint32 tag = field_generators_.get(field).CalculateFieldTag();
2428     if (tag != 0) {
2429       format(" | $1$", tag);
2430     }
2431 
2432     format(",\n");
2433   }
2434 
2435   int count = 0;
2436   for (auto oneof : OneOfRange(descriptor_)) {
2437     format("PROTOBUF_FIELD_OFFSET($classtype$, $1$_),\n", oneof->name());
2438     count++;
2439   }
2440   GOOGLE_CHECK_EQ(count, descriptor_->real_oneof_decl_count());
2441 
2442   if (IsMapEntryMessage(descriptor_)) {
2443     entries += 2;
2444     format(
2445         "0,\n"
2446         "1,\n");
2447   } else if (!has_bit_indices_.empty()) {
2448     entries += has_bit_indices_.size();
2449     for (int i = 0; i < has_bit_indices_.size(); i++) {
2450       const std::string index =
2451           has_bit_indices_[i] >= 0 ? StrCat(has_bit_indices_[i]) : "~0u";
2452       format("$1$,\n", index);
2453     }
2454   }
2455 
2456   return std::make_pair(entries, offsets);
2457 }
2458 
GenerateSharedConstructorCode(io::Printer * printer)2459 void MessageGenerator::GenerateSharedConstructorCode(io::Printer* printer) {
2460   Formatter format(printer, variables_);
2461 
2462   format("void $classname$::SharedCtor() {\n");
2463   if (scc_analyzer_->GetSCCAnalysis(scc_analyzer_->GetSCC(descriptor_))
2464           .constructor_requires_initialization) {
2465     format("  ::$proto_ns$::internal::InitSCC(&$scc_info$.base);\n");
2466   }
2467 
2468   format.Indent();
2469 
2470   std::vector<bool> processed(optimized_order_.size(), false);
2471   GenerateConstructorBody(printer, processed, false);
2472 
2473   for (auto oneof : OneOfRange(descriptor_)) {
2474     format("clear_has_$1$();\n", oneof->name());
2475   }
2476 
2477   format.Outdent();
2478   format("}\n\n");
2479 }
2480 
GenerateSharedDestructorCode(io::Printer * printer)2481 void MessageGenerator::GenerateSharedDestructorCode(io::Printer* printer) {
2482   Formatter format(printer, variables_);
2483 
2484   format("void $classname$::SharedDtor() {\n");
2485   format.Indent();
2486   if (SupportsArenas(descriptor_)) {
2487     format("$DCHK$(GetArena() == nullptr);\n");
2488   }
2489   // Write the destructors for each field except oneof members.
2490   // optimized_order_ does not contain oneof fields.
2491   for (auto field : optimized_order_) {
2492     field_generators_.get(field).GenerateDestructorCode(printer);
2493   }
2494 
2495   // Generate code to destruct oneofs. Clearing should do the work.
2496   for (auto oneof : OneOfRange(descriptor_)) {
2497     format(
2498         "if (has_$1$()) {\n"
2499         "  clear_$1$();\n"
2500         "}\n",
2501         oneof->name());
2502   }
2503 
2504   if (num_weak_fields_) {
2505     format("_weak_field_map_.ClearAll();\n");
2506   }
2507   format.Outdent();
2508   format(
2509       "}\n"
2510       "\n");
2511 }
2512 
GenerateArenaDestructorCode(io::Printer * printer)2513 void MessageGenerator::GenerateArenaDestructorCode(io::Printer* printer) {
2514   Formatter format(printer, variables_);
2515 
2516   // Generate the ArenaDtor() method. Track whether any fields actually produced
2517   // code that needs to be called.
2518   format("void $classname$::ArenaDtor(void* object) {\n");
2519   format.Indent();
2520 
2521   // This code is placed inside a static method, rather than an ordinary one,
2522   // since that simplifies Arena's destructor list (ordinary function pointers
2523   // rather than member function pointers). _this is the object being
2524   // destructed.
2525   format(
2526       "$classname$* _this = reinterpret_cast< $classname$* >(object);\n"
2527       // avoid an "unused variable" warning in case no fields have dtor code.
2528       "(void)_this;\n");
2529 
2530   bool need_registration = false;
2531   // Process non-oneof fields first.
2532   for (auto field : optimized_order_) {
2533     if (field_generators_.get(field).GenerateArenaDestructorCode(printer)) {
2534       need_registration = true;
2535     }
2536   }
2537 
2538   // Process oneof fields.
2539   //
2540   // Note:  As of 10/5/2016, GenerateArenaDestructorCode does not emit anything
2541   // and returns false for oneof fields.
2542   for (auto oneof : OneOfRange(descriptor_)) {
2543     for (auto field : FieldRange(oneof)) {
2544       if (IsFieldUsed(field, options_) &&
2545           field_generators_.get(field).GenerateArenaDestructorCode(printer)) {
2546         need_registration = true;
2547       }
2548     }
2549   }
2550   if (num_weak_fields_) {
2551     // _this is the object being destructed (we are inside a static method
2552     // here).
2553     format("_this->_weak_field_map_.ClearAll();\n");
2554     need_registration = true;
2555   }
2556 
2557   format.Outdent();
2558   format("}\n");
2559 
2560   if (need_registration) {
2561     format(
2562         "inline void $classname$::RegisterArenaDtor(::$proto_ns$::Arena* "
2563         "arena) {\n"
2564         "  if (arena != nullptr) {\n"
2565         "    arena->OwnCustomDestructor(this, &$classname$::ArenaDtor);\n"
2566         "  }\n"
2567         "}\n");
2568   } else {
2569     format(
2570         "void $classname$::RegisterArenaDtor(::$proto_ns$::Arena*) {\n"
2571         "}\n");
2572   }
2573 }
2574 
GenerateConstructorBody(io::Printer * printer,std::vector<bool> processed,bool copy_constructor) const2575 void MessageGenerator::GenerateConstructorBody(io::Printer* printer,
2576                                                std::vector<bool> processed,
2577                                                bool copy_constructor) const {
2578   Formatter format(printer, variables_);
2579 
2580   const RunMap runs = FindRuns(
2581       optimized_order_, [copy_constructor, this](const FieldDescriptor* field) {
2582         return (copy_constructor && IsPOD(field)) ||
2583                (!copy_constructor &&
2584                 CanBeManipulatedAsRawBytes(field, options_));
2585       });
2586 
2587   std::string pod_template;
2588   if (copy_constructor) {
2589     pod_template =
2590         "::memcpy(&$first$_, &from.$first$_,\n"
2591         "  static_cast<size_t>(reinterpret_cast<char*>(&$last$_) -\n"
2592         "  reinterpret_cast<char*>(&$first$_)) + sizeof($last$_));\n";
2593   } else {
2594     pod_template =
2595         "::memset(&$first$_, 0, static_cast<size_t>(\n"
2596         "    reinterpret_cast<char*>(&$last$_) -\n"
2597         "    reinterpret_cast<char*>(&$first$_)) + sizeof($last$_));\n";
2598   }
2599 
2600   for (int i = 0; i < optimized_order_.size(); ++i) {
2601     if (processed[i]) {
2602       continue;
2603     }
2604 
2605     const FieldDescriptor* field = optimized_order_[i];
2606     const auto it = runs.find(field);
2607 
2608     // We only apply the memset technique to runs of more than one field, as
2609     // assignment is better than memset for generated code clarity.
2610     if (it != runs.end() && it->second > 1) {
2611       // Use a memset, then skip run_length fields.
2612       const size_t run_length = it->second;
2613       const std::string first_field_name = FieldName(field);
2614       const std::string last_field_name =
2615           FieldName(optimized_order_[i + run_length - 1]);
2616 
2617       format.Set("first", first_field_name);
2618       format.Set("last", last_field_name);
2619 
2620       format(pod_template.c_str());
2621 
2622       i += run_length - 1;
2623       // ++i at the top of the loop.
2624     } else {
2625       if (copy_constructor) {
2626         field_generators_.get(field).GenerateCopyConstructorCode(printer);
2627       } else {
2628         field_generators_.get(field).GenerateConstructorCode(printer);
2629       }
2630     }
2631   }
2632 }
2633 
GenerateStructors(io::Printer * printer)2634 void MessageGenerator::GenerateStructors(io::Printer* printer) {
2635   Formatter format(printer, variables_);
2636 
2637   std::string superclass;
2638   superclass = SuperClassName(descriptor_, options_);
2639   std::string initializer_with_arena = superclass + "(arena)";
2640 
2641   if (descriptor_->extension_range_count() > 0) {
2642     initializer_with_arena += ",\n  _extensions_(arena)";
2643   }
2644 
2645   // Initialize member variables with arena constructor.
2646   for (auto field : optimized_order_) {
2647     GOOGLE_DCHECK(IsFieldUsed(field, options_));
2648     bool has_arena_constructor = field->is_repeated();
2649     if (!field->real_containing_oneof() &&
2650         (IsLazy(field, options_) || IsStringPiece(field, options_))) {
2651       has_arena_constructor = true;
2652     }
2653     if (has_arena_constructor) {
2654       initializer_with_arena +=
2655           std::string(",\n  ") + FieldName(field) + std::string("_(arena)");
2656     }
2657   }
2658 
2659   if (IsAnyMessage(descriptor_, options_)) {
2660     initializer_with_arena += ",\n  _any_metadata_(&type_url_, &value_)";
2661   }
2662   if (num_weak_fields_ > 0) {
2663     initializer_with_arena += ", _weak_field_map_(arena)";
2664   }
2665 
2666   std::string initializer_null = superclass + "()";
2667   if (IsAnyMessage(descriptor_, options_)) {
2668     initializer_null += ", _any_metadata_(&type_url_, &value_)";
2669   }
2670   if (num_weak_fields_ > 0) {
2671     initializer_null += ", _weak_field_map_(nullptr)";
2672   }
2673 
2674   if (SupportsArenas(descriptor_)) {
2675     format(
2676         "$classname$::$classname$(::$proto_ns$::Arena* arena)\n"
2677         "  : $1$ {\n"
2678         "  SharedCtor();\n"
2679         "  RegisterArenaDtor(arena);\n"
2680         "  // @@protoc_insertion_point(arena_constructor:$full_name$)\n"
2681         "}\n",
2682         initializer_with_arena);
2683   } else {
2684     format(
2685         "$classname$::$classname$()\n"
2686         "  : $1$ {\n"
2687         "  SharedCtor();\n"
2688         "  // @@protoc_insertion_point(constructor:$full_name$)\n"
2689         "}\n",
2690         initializer_null);
2691   }
2692 
2693   std::map<std::string, std::string> vars;
2694   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
2695   format.AddMap(vars);
2696 
2697   // Generate the copy constructor.
2698   if (UsingImplicitWeakFields(descriptor_->file(), options_)) {
2699     // If we are in lite mode and using implicit weak fields, we generate a
2700     // one-liner copy constructor that delegates to MergeFrom. This saves some
2701     // code size and also cuts down on the complexity of implicit weak fields.
2702     // We might eventually want to do this for all lite protos.
2703     format(
2704         "$classname$::$classname$(const $classname$& from)\n"
2705         "  : $classname$() {\n"
2706         "  MergeFrom(from);\n"
2707         "}\n");
2708   } else {
2709     format(
2710         "$classname$::$classname$(const $classname$& from)\n"
2711         "  : $superclass$()");
2712     format.Indent();
2713     format.Indent();
2714     format.Indent();
2715 
2716     if (!has_bit_indices_.empty()) {
2717       format(",\n_has_bits_(from._has_bits_)");
2718     }
2719 
2720     std::vector<bool> processed(optimized_order_.size(), false);
2721     for (int i = 0; i < optimized_order_.size(); i++) {
2722       auto field = optimized_order_[i];
2723       if (!(field->is_repeated() && !(field->is_map())) &&
2724           !IsCord(field, options_)) {
2725         continue;
2726       }
2727 
2728       processed[i] = true;
2729       format(",\n$1$_(from.$1$_)", FieldName(field));
2730     }
2731 
2732     if (IsAnyMessage(descriptor_, options_)) {
2733       format(",\n_any_metadata_(&type_url_, &value_)");
2734     }
2735     if (num_weak_fields_ > 0) {
2736       format(",\n_weak_field_map_(from._weak_field_map_)");
2737     }
2738 
2739     format.Outdent();
2740     format.Outdent();
2741     format(" {\n");
2742 
2743     format(
2744         "_internal_metadata_.MergeFrom<$unknown_fields_type$>(from._internal_"
2745         "metadata_);\n");
2746 
2747     if (descriptor_->extension_range_count() > 0) {
2748       format("_extensions_.MergeFrom(from._extensions_);\n");
2749     }
2750 
2751     GenerateConstructorBody(printer, processed, true);
2752 
2753     // Copy oneof fields. Oneof field requires oneof case check.
2754     for (auto oneof : OneOfRange(descriptor_)) {
2755       format(
2756           "clear_has_$1$();\n"
2757           "switch (from.$1$_case()) {\n",
2758           oneof->name());
2759       format.Indent();
2760       for (auto field : FieldRange(oneof)) {
2761         format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
2762         format.Indent();
2763         if (IsFieldUsed(field, options_)) {
2764           field_generators_.get(field).GenerateMergingCode(printer);
2765         }
2766         format("break;\n");
2767         format.Outdent();
2768         format("}\n");
2769       }
2770       format(
2771           "case $1$_NOT_SET: {\n"
2772           "  break;\n"
2773           "}\n",
2774           ToUpper(oneof->name()));
2775       format.Outdent();
2776       format("}\n");
2777     }
2778 
2779     format.Outdent();
2780     format(
2781         "  // @@protoc_insertion_point(copy_constructor:$full_name$)\n"
2782         "}\n"
2783         "\n");
2784   }
2785 
2786   // Generate the shared constructor code.
2787   GenerateSharedConstructorCode(printer);
2788 
2789   // Generate the destructor.
2790   format(
2791       "$classname$::~$classname$() {\n"
2792       "  // @@protoc_insertion_point(destructor:$full_name$)\n"
2793       "  SharedDtor();\n"
2794       "  _internal_metadata_.Delete<$unknown_fields_type$>();\n"
2795       "}\n"
2796       "\n");
2797 
2798   // Generate the shared destructor code.
2799   GenerateSharedDestructorCode(printer);
2800 
2801   // Generate the arena-specific destructor code.
2802   if (SupportsArenas(descriptor_)) {
2803     GenerateArenaDestructorCode(printer);
2804   }
2805 
2806   // Generate SetCachedSize.
2807   format(
2808       "void $classname$::SetCachedSize(int size) const {\n"
2809       "  _cached_size_.Set(size);\n"
2810       "}\n");
2811 
2812   format(
2813       "const $classname$& $classname$::default_instance() {\n"
2814       "  "
2815       "::$proto_ns$::internal::InitSCC(&::$scc_info$.base)"
2816       ";\n"
2817       "  return *internal_default_instance();\n"
2818       "}\n\n");
2819 }
2820 
GenerateSourceInProto2Namespace(io::Printer * printer)2821 void MessageGenerator::GenerateSourceInProto2Namespace(io::Printer* printer) {
2822   Formatter format(printer, variables_);
2823   format(
2824       "template<> "
2825       "PROTOBUF_NOINLINE "
2826       "$classtype$* Arena::CreateMaybeMessage< $classtype$ >(Arena* arena) {\n"
2827       "  return Arena::$1$Internal< $classtype$ >(arena);\n"
2828       "}\n",
2829       MessageCreateFunction(descriptor_));
2830 }
2831 
GenerateClear(io::Printer * printer)2832 void MessageGenerator::GenerateClear(io::Printer* printer) {
2833   Formatter format(printer, variables_);
2834 
2835   // The maximum number of bytes we will memset to zero without checking their
2836   // hasbit to see if a zero-init is necessary.
2837   const int kMaxUnconditionalPrimitiveBytesClear = 4;
2838 
2839   format(
2840       "void $classname$::Clear() {\n"
2841       "// @@protoc_insertion_point(message_clear_start:$full_name$)\n");
2842   format.Indent();
2843 
2844   format(
2845       // TODO(jwb): It would be better to avoid emitting this if it is not used,
2846       // rather than emitting a workaround for the resulting warning.
2847       "$uint32$ cached_has_bits = 0;\n"
2848       "// Prevent compiler warnings about cached_has_bits being unused\n"
2849       "(void) cached_has_bits;\n\n");
2850 
2851   if (descriptor_->extension_range_count() > 0) {
2852     format("_extensions_.Clear();\n");
2853   }
2854 
2855   // Collect fields into chunks. Each chunk may have an if() condition that
2856   // checks all hasbits in the chunk and skips it if none are set.
2857   int zero_init_bytes = 0;
2858   for (const auto& field : optimized_order_) {
2859     if (CanInitializeByZeroing(field)) {
2860       zero_init_bytes += EstimateAlignmentSize(field);
2861     }
2862   }
2863   bool merge_zero_init = zero_init_bytes > kMaxUnconditionalPrimitiveBytesClear;
2864   int chunk_count = 0;
2865 
2866   std::vector<std::vector<const FieldDescriptor*>> chunks = CollectFields(
2867       optimized_order_,
2868       [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
2869         chunk_count++;
2870         // This predicate guarantees that there is only a single zero-init
2871         // (memset) per chunk, and if present it will be at the beginning.
2872         bool same = HasByteIndex(a) == HasByteIndex(b) &&
2873                     a->is_repeated() == b->is_repeated() &&
2874                     (CanInitializeByZeroing(a) == CanInitializeByZeroing(b) ||
2875                      (CanInitializeByZeroing(a) &&
2876                       (chunk_count == 1 || merge_zero_init)));
2877         if (!same) chunk_count = 0;
2878         return same;
2879       });
2880 
2881   ColdChunkSkipper cold_skipper(options_, chunks, has_bit_indices_, kColdRatio);
2882   int cached_has_word_index = -1;
2883 
2884   for (int chunk_index = 0; chunk_index < chunks.size(); chunk_index++) {
2885     std::vector<const FieldDescriptor*>& chunk = chunks[chunk_index];
2886     cold_skipper.OnStartChunk(chunk_index, cached_has_word_index, "", printer);
2887 
2888     const FieldDescriptor* memset_start = nullptr;
2889     const FieldDescriptor* memset_end = nullptr;
2890     bool saw_non_zero_init = false;
2891 
2892     for (const auto& field : chunk) {
2893       if (CanInitializeByZeroing(field)) {
2894         GOOGLE_CHECK(!saw_non_zero_init);
2895         if (!memset_start) memset_start = field;
2896         memset_end = field;
2897       } else {
2898         saw_non_zero_init = true;
2899       }
2900     }
2901 
2902     // Whether we wrap this chunk in:
2903     //   if (cached_has_bits & <chunk hasbits) { /* chunk. */ }
2904     // We can omit the if() for chunk size 1, or if our fields do not have
2905     // hasbits. I don't understand the rationale for the last part of the
2906     // condition, but it matches the old logic.
2907     const bool have_outer_if = HasBitIndex(chunk.front()) != kNoHasbit &&
2908                                chunk.size() > 1 &&
2909                                (memset_end != chunk.back() || merge_zero_init);
2910 
2911     if (have_outer_if) {
2912       // Emit an if() that will let us skip the whole chunk if none are set.
2913       uint32 chunk_mask = GenChunkMask(chunk, has_bit_indices_);
2914       std::string chunk_mask_str =
2915           StrCat(strings::Hex(chunk_mask, strings::ZERO_PAD_8));
2916 
2917       // Check (up to) 8 has_bits at a time if we have more than one field in
2918       // this chunk.  Due to field layout ordering, we may check
2919       // _has_bits_[last_chunk * 8 / 32] multiple times.
2920       GOOGLE_DCHECK_LE(2, popcnt(chunk_mask));
2921       GOOGLE_DCHECK_GE(8, popcnt(chunk_mask));
2922 
2923       if (cached_has_word_index != HasWordIndex(chunk.front())) {
2924         cached_has_word_index = HasWordIndex(chunk.front());
2925         format("cached_has_bits = _has_bits_[$1$];\n", cached_has_word_index);
2926       }
2927       format("if (cached_has_bits & 0x$1$u) {\n", chunk_mask_str);
2928       format.Indent();
2929     }
2930 
2931     if (memset_start) {
2932       if (memset_start == memset_end) {
2933         // For clarity, do not memset a single field.
2934         field_generators_.get(memset_start)
2935             .GenerateMessageClearingCode(printer);
2936       } else {
2937         format(
2938             "::memset(&$1$_, 0, static_cast<size_t>(\n"
2939             "    reinterpret_cast<char*>(&$2$_) -\n"
2940             "    reinterpret_cast<char*>(&$1$_)) + sizeof($2$_));\n",
2941             FieldName(memset_start), FieldName(memset_end));
2942       }
2943     }
2944 
2945     // Clear all non-zero-initializable fields in the chunk.
2946     for (const auto& field : chunk) {
2947       if (CanInitializeByZeroing(field)) continue;
2948       // It's faster to just overwrite primitive types, but we should only
2949       // clear strings and messages if they were set.
2950       //
2951       // TODO(kenton):  Let the CppFieldGenerator decide this somehow.
2952       bool have_enclosing_if =
2953           HasBitIndex(field) != kNoHasbit &&
2954           (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
2955            field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
2956 
2957       if (have_enclosing_if) {
2958         PrintPresenceCheck(format, field, has_bit_indices_, printer,
2959                            &cached_has_word_index);
2960       }
2961 
2962       field_generators_.get(field).GenerateMessageClearingCode(printer);
2963 
2964       if (have_enclosing_if) {
2965         format.Outdent();
2966         format("}\n");
2967       }
2968     }
2969 
2970     if (have_outer_if) {
2971       format.Outdent();
2972       format("}\n");
2973     }
2974 
2975     if (cold_skipper.OnEndChunk(chunk_index, printer)) {
2976       // Reset here as it may have been updated in just closed if statement.
2977       cached_has_word_index = -1;
2978     }
2979   }
2980 
2981   // Step 4: Unions.
2982   for (auto oneof : OneOfRange(descriptor_)) {
2983     format("clear_$1$();\n", oneof->name());
2984   }
2985 
2986   if (num_weak_fields_) {
2987     format("_weak_field_map_.ClearAll();\n");
2988   }
2989 
2990   if (!has_bit_indices_.empty()) {
2991     // Step 5: Everything else.
2992     format("_has_bits_.Clear();\n");
2993   }
2994 
2995   std::map<std::string, std::string> vars;
2996   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
2997   format.AddMap(vars);
2998   format("_internal_metadata_.Clear<$unknown_fields_type$>();\n");
2999 
3000   format.Outdent();
3001   format("}\n");
3002 }
3003 
GenerateOneofClear(io::Printer * printer)3004 void MessageGenerator::GenerateOneofClear(io::Printer* printer) {
3005   // Generated function clears the active field and union case (e.g. foo_case_).
3006   int i = 0;
3007   for (auto oneof : OneOfRange(descriptor_)) {
3008     Formatter format(printer, variables_);
3009     format.Set("oneofname", oneof->name());
3010 
3011     format(
3012         "void $classname$::clear_$oneofname$() {\n"
3013         "// @@protoc_insertion_point(one_of_clear_start:$full_name$)\n");
3014     format.Indent();
3015     format("switch ($oneofname$_case()) {\n");
3016     format.Indent();
3017     for (auto field : FieldRange(oneof)) {
3018       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
3019       format.Indent();
3020       // We clear only allocated objects in oneofs
3021       if (!IsStringOrMessage(field) || !IsFieldUsed(field, options_)) {
3022         format("// No need to clear\n");
3023       } else {
3024         field_generators_.get(field).GenerateClearingCode(printer);
3025       }
3026       format("break;\n");
3027       format.Outdent();
3028       format("}\n");
3029     }
3030     format(
3031         "case $1$_NOT_SET: {\n"
3032         "  break;\n"
3033         "}\n",
3034         ToUpper(oneof->name()));
3035     format.Outdent();
3036     format(
3037         "}\n"
3038         "_oneof_case_[$1$] = $2$_NOT_SET;\n",
3039         i, ToUpper(oneof->name()));
3040     format.Outdent();
3041     format(
3042         "}\n"
3043         "\n");
3044     i++;
3045   }
3046 }
3047 
GenerateSwap(io::Printer * printer)3048 void MessageGenerator::GenerateSwap(io::Printer* printer) {
3049   Formatter format(printer, variables_);
3050 
3051   format("void $classname$::InternalSwap($classname$* other) {\n");
3052   format.Indent();
3053   format("using std::swap;\n");
3054 
3055   if (HasGeneratedMethods(descriptor_->file(), options_)) {
3056     if (descriptor_->extension_range_count() > 0) {
3057       format("_extensions_.Swap(&other->_extensions_);\n");
3058     }
3059 
3060     std::map<std::string, std::string> vars;
3061     SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3062     format.AddMap(vars);
3063     format(
3064         "_internal_metadata_.Swap<$unknown_fields_type$>(&other->_internal_"
3065         "metadata_);\n");
3066 
3067     if (!has_bit_indices_.empty()) {
3068       for (int i = 0; i < HasBitsSize() / 4; ++i) {
3069         format("swap(_has_bits_[$1$], other->_has_bits_[$1$]);\n", i);
3070       }
3071     }
3072 
3073     // If possible, we swap several fields at once, including padding.
3074     const RunMap runs =
3075         FindRuns(optimized_order_, [this](const FieldDescriptor* field) {
3076           return CanBeManipulatedAsRawBytes(field, options_);
3077         });
3078 
3079     for (int i = 0; i < optimized_order_.size(); ++i) {
3080       const FieldDescriptor* field = optimized_order_[i];
3081       const auto it = runs.find(field);
3082 
3083       // We only apply the memswap technique to runs of more than one field, as
3084       // `swap(field_, other.field_)` is better than
3085       // `memswap<...>(&field_, &other.field_)` for generated code readability.
3086       if (it != runs.end() && it->second > 1) {
3087         // Use a memswap, then skip run_length fields.
3088         const size_t run_length = it->second;
3089         const std::string first_field_name = FieldName(field);
3090         const std::string last_field_name =
3091             FieldName(optimized_order_[i + run_length - 1]);
3092 
3093         format.Set("first", first_field_name);
3094         format.Set("last", last_field_name);
3095 
3096         format(
3097             "::PROTOBUF_NAMESPACE_ID::internal::memswap<\n"
3098             "    PROTOBUF_FIELD_OFFSET($classname$, $last$_)\n"
3099             "    + sizeof($classname$::$last$_)\n"
3100             "    - PROTOBUF_FIELD_OFFSET($classname$, $first$_)>(\n"
3101             "        reinterpret_cast<char*>(&$first$_),\n"
3102             "        reinterpret_cast<char*>(&other->$first$_));\n");
3103 
3104         i += run_length - 1;
3105         // ++i at the top of the loop.
3106       } else {
3107         field_generators_.get(field).GenerateSwappingCode(printer);
3108       }
3109     }
3110 
3111     for (auto oneof : OneOfRange(descriptor_)) {
3112       format("swap($1$_, other->$1$_);\n", oneof->name());
3113     }
3114 
3115     for (int i = 0; i < descriptor_->real_oneof_decl_count(); i++) {
3116       format("swap(_oneof_case_[$1$], other->_oneof_case_[$1$]);\n", i);
3117     }
3118 
3119     if (num_weak_fields_) {
3120       format("_weak_field_map_.UnsafeArenaSwap(&other->_weak_field_map_);\n");
3121     }
3122   } else {
3123     format("GetReflection()->Swap(this, other);");
3124   }
3125 
3126   format.Outdent();
3127   format("}\n");
3128 }
3129 
GenerateMergeFrom(io::Printer * printer)3130 void MessageGenerator::GenerateMergeFrom(io::Printer* printer) {
3131   Formatter format(printer, variables_);
3132   if (HasDescriptorMethods(descriptor_->file(), options_)) {
3133     // Generate the generalized MergeFrom (aka that which takes in the Message
3134     // base class as a parameter).
3135     format(
3136         "void $classname$::MergeFrom(const ::$proto_ns$::Message& from) {\n"
3137         "// @@protoc_insertion_point(generalized_merge_from_start:"
3138         "$full_name$)\n"
3139         "  $DCHK$_NE(&from, this);\n");
3140     format.Indent();
3141 
3142     // Cast the message to the proper type. If we find that the message is
3143     // *not* of the proper type, we can still call Merge via the reflection
3144     // system, as the GOOGLE_CHECK above ensured that we have the same descriptor
3145     // for each message.
3146     format(
3147         "const $classname$* source =\n"
3148         "    ::$proto_ns$::DynamicCastToGenerated<$classname$>(\n"
3149         "        &from);\n"
3150         "if (source == nullptr) {\n"
3151         "// @@protoc_insertion_point(generalized_merge_from_cast_fail:"
3152         "$full_name$)\n"
3153         "  ::$proto_ns$::internal::ReflectionOps::Merge(from, this);\n"
3154         "} else {\n"
3155         "// @@protoc_insertion_point(generalized_merge_from_cast_success:"
3156         "$full_name$)\n"
3157         "  MergeFrom(*source);\n"
3158         "}\n");
3159 
3160     format.Outdent();
3161     format("}\n");
3162   } else {
3163     // Generate CheckTypeAndMergeFrom().
3164     format(
3165         "void $classname$::CheckTypeAndMergeFrom(\n"
3166         "    const ::$proto_ns$::MessageLite& from) {\n"
3167         "  MergeFrom(*::$proto_ns$::internal::DownCast<const $classname$*>(\n"
3168         "      &from));\n"
3169         "}\n");
3170   }
3171 }
3172 
GenerateClassSpecificMergeFrom(io::Printer * printer)3173 void MessageGenerator::GenerateClassSpecificMergeFrom(io::Printer* printer) {
3174   // Generate the class-specific MergeFrom, which avoids the GOOGLE_CHECK and cast.
3175   Formatter format(printer, variables_);
3176   format(
3177       "void $classname$::MergeFrom(const $classname$& from) {\n"
3178       "// @@protoc_insertion_point(class_specific_merge_from_start:"
3179       "$full_name$)\n"
3180       "  $DCHK$_NE(&from, this);\n");
3181   format.Indent();
3182 
3183   if (descriptor_->extension_range_count() > 0) {
3184     format("_extensions_.MergeFrom(from._extensions_);\n");
3185   }
3186   std::map<std::string, std::string> vars;
3187   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3188   format.AddMap(vars);
3189   format(
3190       "_internal_metadata_.MergeFrom<$unknown_fields_type$>(from._internal_"
3191       "metadata_);\n"
3192       "$uint32$ cached_has_bits = 0;\n"
3193       "(void) cached_has_bits;\n\n");
3194 
3195   std::vector<std::vector<const FieldDescriptor*>> chunks = CollectFields(
3196       optimized_order_,
3197       [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
3198         return HasByteIndex(a) == HasByteIndex(b);
3199       });
3200 
3201   ColdChunkSkipper cold_skipper(options_, chunks, has_bit_indices_, kColdRatio);
3202 
3203   // cached_has_word_index maintains that:
3204   //   cached_has_bits = from._has_bits_[cached_has_word_index]
3205   // for cached_has_word_index >= 0
3206   int cached_has_word_index = -1;
3207 
3208   for (int chunk_index = 0; chunk_index < chunks.size(); chunk_index++) {
3209     const std::vector<const FieldDescriptor*>& chunk = chunks[chunk_index];
3210     bool have_outer_if =
3211         chunk.size() > 1 && HasByteIndex(chunk.front()) != kNoHasbit;
3212     cold_skipper.OnStartChunk(chunk_index, cached_has_word_index, "from.",
3213                               printer);
3214 
3215     if (have_outer_if) {
3216       // Emit an if() that will let us skip the whole chunk if none are set.
3217       uint32 chunk_mask = GenChunkMask(chunk, has_bit_indices_);
3218       std::string chunk_mask_str =
3219           StrCat(strings::Hex(chunk_mask, strings::ZERO_PAD_8));
3220 
3221       // Check (up to) 8 has_bits at a time if we have more than one field in
3222       // this chunk.  Due to field layout ordering, we may check
3223       // _has_bits_[last_chunk * 8 / 32] multiple times.
3224       GOOGLE_DCHECK_LE(2, popcnt(chunk_mask));
3225       GOOGLE_DCHECK_GE(8, popcnt(chunk_mask));
3226 
3227       if (cached_has_word_index != HasWordIndex(chunk.front())) {
3228         cached_has_word_index = HasWordIndex(chunk.front());
3229         format("cached_has_bits = from._has_bits_[$1$];\n",
3230                cached_has_word_index);
3231       }
3232 
3233       format("if (cached_has_bits & 0x$1$u) {\n", chunk_mask_str);
3234       format.Indent();
3235     }
3236 
3237     // Go back and emit merging code for each of the fields we processed.
3238     bool deferred_has_bit_changes = false;
3239     for (const auto field : chunk) {
3240       const FieldGenerator& generator = field_generators_.get(field);
3241 
3242       if (field->is_repeated()) {
3243         generator.GenerateMergingCode(printer);
3244       } else if (field->is_optional() && !HasHasbit(field)) {
3245         // Merge semantics without true field presence: primitive fields are
3246         // merged only if non-zero (numeric) or non-empty (string).
3247         bool have_enclosing_if =
3248             EmitFieldNonDefaultCondition(printer, "from.", field);
3249         generator.GenerateMergingCode(printer);
3250         if (have_enclosing_if) {
3251           format.Outdent();
3252           format("}\n");
3253         }
3254       } else if (field->options().weak() ||
3255                  cached_has_word_index != HasWordIndex(field)) {
3256         // Check hasbit, not using cached bits.
3257         GOOGLE_CHECK(HasHasbit(field));
3258         format("if (from._internal_has_$1$()) {\n", FieldName(field));
3259         format.Indent();
3260         generator.GenerateMergingCode(printer);
3261         format.Outdent();
3262         format("}\n");
3263       } else {
3264         // Check hasbit, using cached bits.
3265         GOOGLE_CHECK(HasHasbit(field));
3266         int has_bit_index = has_bit_indices_[field->index()];
3267         const std::string mask = StrCat(
3268             strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
3269         format("if (cached_has_bits & 0x$1$u) {\n", mask);
3270         format.Indent();
3271 
3272         if (have_outer_if && IsPOD(field)) {
3273           // Defer hasbit modification until the end of chunk.
3274           // This can reduce the number of loads/stores by up to 7 per 8 fields.
3275           deferred_has_bit_changes = true;
3276           generator.GenerateCopyConstructorCode(printer);
3277         } else {
3278           generator.GenerateMergingCode(printer);
3279         }
3280 
3281         format.Outdent();
3282         format("}\n");
3283       }
3284     }
3285 
3286     if (have_outer_if) {
3287       if (deferred_has_bit_changes) {
3288         // Flush the has bits for the primitives we deferred.
3289         GOOGLE_CHECK_LE(0, cached_has_word_index);
3290         format("_has_bits_[$1$] |= cached_has_bits;\n", cached_has_word_index);
3291       }
3292 
3293       format.Outdent();
3294       format("}\n");
3295     }
3296 
3297     if (cold_skipper.OnEndChunk(chunk_index, printer)) {
3298       // Reset here as it may have been updated in just closed if statement.
3299       cached_has_word_index = -1;
3300     }
3301   }
3302 
3303   // Merge oneof fields. Oneof field requires oneof case check.
3304   for (auto oneof : OneOfRange(descriptor_)) {
3305     format("switch (from.$1$_case()) {\n", oneof->name());
3306     format.Indent();
3307     for (auto field : FieldRange(oneof)) {
3308       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
3309       format.Indent();
3310       if (IsFieldUsed(field, options_)) {
3311         field_generators_.get(field).GenerateMergingCode(printer);
3312       }
3313       format("break;\n");
3314       format.Outdent();
3315       format("}\n");
3316     }
3317     format(
3318         "case $1$_NOT_SET: {\n"
3319         "  break;\n"
3320         "}\n",
3321         ToUpper(oneof->name()));
3322     format.Outdent();
3323     format("}\n");
3324   }
3325   if (num_weak_fields_) {
3326     format("_weak_field_map_.MergeFrom(from._weak_field_map_);\n");
3327   }
3328 
3329   format.Outdent();
3330   format("}\n");
3331 }
3332 
GenerateCopyFrom(io::Printer * printer)3333 void MessageGenerator::GenerateCopyFrom(io::Printer* printer) {
3334   Formatter format(printer, variables_);
3335   if (HasDescriptorMethods(descriptor_->file(), options_)) {
3336     // Generate the generalized CopyFrom (aka that which takes in the Message
3337     // base class as a parameter).
3338     format(
3339         "void $classname$::CopyFrom(const ::$proto_ns$::Message& from) {\n"
3340         "// @@protoc_insertion_point(generalized_copy_from_start:"
3341         "$full_name$)\n");
3342     format.Indent();
3343 
3344     format("if (&from == this) return;\n");
3345 
3346     if (!options_.opensource_runtime) {
3347       // This check is disabled in the opensource release because we're
3348       // concerned that many users do not define NDEBUG in their release
3349       // builds.
3350       format(
3351           "#ifndef NDEBUG\n"
3352           "size_t from_size = from.ByteSizeLong();\n"
3353           "#endif\n"
3354           "Clear();\n"
3355           "#ifndef NDEBUG\n"
3356           "$CHK$_EQ(from_size, from.ByteSizeLong())\n"
3357           "  << \"Source of CopyFrom changed when clearing target.  Either \"\n"
3358           "  << \"source is a nested message in target (not allowed), or \"\n"
3359           "  << \"another thread is modifying the source.\";\n"
3360           "#endif\n");
3361     } else {
3362       format("Clear();\n");
3363     }
3364     format("MergeFrom(from);\n");
3365 
3366     format.Outdent();
3367     format("}\n\n");
3368   }
3369 
3370   // Generate the class-specific CopyFrom.
3371   format(
3372       "void $classname$::CopyFrom(const $classname$& from) {\n"
3373       "// @@protoc_insertion_point(class_specific_copy_from_start:"
3374       "$full_name$)\n");
3375   format.Indent();
3376 
3377   format("if (&from == this) return;\n");
3378 
3379   if (!options_.opensource_runtime) {
3380     // This check is disabled in the opensource release because we're
3381     // concerned that many users do not define NDEBUG in their release builds.
3382     format(
3383         "#ifndef NDEBUG\n"
3384         "size_t from_size = from.ByteSizeLong();\n"
3385         "#endif\n"
3386         "Clear();\n"
3387         "#ifndef NDEBUG\n"
3388         "$CHK$_EQ(from_size, from.ByteSizeLong())\n"
3389         "  << \"Source of CopyFrom changed when clearing target.  Either \"\n"
3390         "  << \"source is a nested message in target (not allowed), or \"\n"
3391         "  << \"another thread is modifying the source.\";\n"
3392         "#endif\n");
3393   } else {
3394     format("Clear();\n");
3395   }
3396   format("MergeFrom(from);\n");
3397 
3398   format.Outdent();
3399   format("}\n");
3400 }
3401 
GenerateMergeFromCodedStream(io::Printer * printer)3402 void MessageGenerator::GenerateMergeFromCodedStream(io::Printer* printer) {
3403   std::map<std::string, std::string> vars = variables_;
3404   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3405   Formatter format(printer, vars);
3406   if (descriptor_->options().message_set_wire_format()) {
3407     // Special-case MessageSet.
3408     format(
3409         "const char* $classname$::_InternalParse(const char* ptr,\n"
3410         "                  ::$proto_ns$::internal::ParseContext* ctx) {\n"
3411         "  return _extensions_.ParseMessageSet(ptr, \n"
3412         "      internal_default_instance(), &_internal_metadata_, ctx);\n"
3413         "}\n");
3414     return;
3415   }
3416   GenerateParserLoop(descriptor_, max_has_bit_index_, options_, scc_analyzer_,
3417                      printer);
3418 }
3419 
GenerateSerializeOneofFields(io::Printer * printer,const std::vector<const FieldDescriptor * > & fields)3420 void MessageGenerator::GenerateSerializeOneofFields(
3421     io::Printer* printer, const std::vector<const FieldDescriptor*>& fields) {
3422   Formatter format(printer, variables_);
3423   GOOGLE_CHECK(!fields.empty());
3424   if (fields.size() == 1) {
3425     GenerateSerializeOneField(printer, fields[0], -1);
3426     return;
3427   }
3428   // We have multiple mutually exclusive choices.  Emit a switch statement.
3429   const OneofDescriptor* oneof = fields[0]->containing_oneof();
3430   format("switch ($1$_case()) {\n", oneof->name());
3431   format.Indent();
3432   for (auto field : fields) {
3433     format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
3434     format.Indent();
3435     field_generators_.get(field).GenerateSerializeWithCachedSizesToArray(
3436         printer);
3437     format("break;\n");
3438     format.Outdent();
3439     format("}\n");
3440   }
3441   format.Outdent();
3442   // Doing nothing is an option.
3443   format(
3444       "  default: ;\n"
3445       "}\n");
3446 }
3447 
GenerateSerializeOneField(io::Printer * printer,const FieldDescriptor * field,int cached_has_bits_index)3448 void MessageGenerator::GenerateSerializeOneField(io::Printer* printer,
3449                                                  const FieldDescriptor* field,
3450                                                  int cached_has_bits_index) {
3451   Formatter format(printer, variables_);
3452   if (!field->options().weak()) {
3453     // For weakfields, PrintFieldComment is called during iteration.
3454     PrintFieldComment(format, field);
3455   }
3456 
3457   bool have_enclosing_if = false;
3458   if (field->options().weak()) {
3459   } else if (HasHasbit(field)) {
3460     // Attempt to use the state of cached_has_bits, if possible.
3461     int has_bit_index = HasBitIndex(field);
3462     if (cached_has_bits_index == has_bit_index / 32) {
3463       const std::string mask =
3464           StrCat(strings::Hex(1u << (has_bit_index % 32), strings::ZERO_PAD_8));
3465 
3466       format("if (cached_has_bits & 0x$1$u) {\n", mask);
3467     } else {
3468       format("if (_internal_has_$1$()) {\n", FieldName(field));
3469     }
3470 
3471     format.Indent();
3472     have_enclosing_if = true;
3473   } else if (field->is_optional() && !HasHasbit(field)) {
3474     have_enclosing_if = EmitFieldNonDefaultCondition(printer, "this->", field);
3475   }
3476 
3477   field_generators_.get(field).GenerateSerializeWithCachedSizesToArray(printer);
3478 
3479   if (have_enclosing_if) {
3480     format.Outdent();
3481     format("}\n");
3482   }
3483   format("\n");
3484 }
3485 
GenerateSerializeOneExtensionRange(io::Printer * printer,const Descriptor::ExtensionRange * range)3486 void MessageGenerator::GenerateSerializeOneExtensionRange(
3487     io::Printer* printer, const Descriptor::ExtensionRange* range) {
3488   std::map<std::string, std::string> vars = variables_;
3489   vars["start"] = StrCat(range->start);
3490   vars["end"] = StrCat(range->end);
3491   Formatter format(printer, vars);
3492   format("// Extension range [$start$, $end$)\n");
3493   format(
3494       "target = _extensions_._InternalSerialize(\n"
3495       "    $start$, $end$, target, stream);\n\n");
3496 }
3497 
GenerateSerializeWithCachedSizesToArray(io::Printer * printer)3498 void MessageGenerator::GenerateSerializeWithCachedSizesToArray(
3499     io::Printer* printer) {
3500   Formatter format(printer, variables_);
3501   if (descriptor_->options().message_set_wire_format()) {
3502     // Special-case MessageSet.
3503     format(
3504         "$uint8$* $classname$::_InternalSerialize(\n"
3505         "    $uint8$* target, ::$proto_ns$::io::EpsCopyOutputStream* stream) "
3506         "const {\n"
3507         "  target = _extensions_."
3508         "InternalSerializeMessageSetWithCachedSizesToArray(target, stream);\n");
3509     std::map<std::string, std::string> vars;
3510     SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3511     format.AddMap(vars);
3512     format(
3513         "  target = ::$proto_ns$::internal::"
3514         "InternalSerializeUnknownMessageSetItemsToArray(\n"
3515         "               $unknown_fields$, target, stream);\n");
3516     format(
3517         "  return target;\n"
3518         "}\n");
3519     return;
3520   }
3521 
3522   format(
3523       "$uint8$* $classname$::_InternalSerialize(\n"
3524       "    $uint8$* target, ::$proto_ns$::io::EpsCopyOutputStream* stream) "
3525       "const {\n");
3526   format.Indent();
3527 
3528   format("// @@protoc_insertion_point(serialize_to_array_start:$full_name$)\n");
3529 
3530   GenerateSerializeWithCachedSizesBody(printer);
3531 
3532   format("// @@protoc_insertion_point(serialize_to_array_end:$full_name$)\n");
3533 
3534   format.Outdent();
3535   format(
3536       "  return target;\n"
3537       "}\n");
3538 }
3539 
GenerateSerializeWithCachedSizesBody(io::Printer * printer)3540 void MessageGenerator::GenerateSerializeWithCachedSizesBody(
3541     io::Printer* printer) {
3542   Formatter format(printer, variables_);
3543   // If there are multiple fields in a row from the same oneof then we
3544   // coalesce them and emit a switch statement.  This is more efficient
3545   // because it lets the C++ compiler know this is a "at most one can happen"
3546   // situation. If we emitted "if (has_x()) ...; if (has_y()) ..." the C++
3547   // compiler's emitted code might check has_y() even when has_x() is true.
3548   class LazySerializerEmitter {
3549    public:
3550     LazySerializerEmitter(MessageGenerator* mg, io::Printer* printer)
3551         : mg_(mg),
3552           format_(printer),
3553           eager_(!HasFieldPresence(mg->descriptor_->file())),
3554           cached_has_bit_index_(kNoHasbit) {}
3555 
3556     ~LazySerializerEmitter() { Flush(); }
3557 
3558     // If conditions allow, try to accumulate a run of fields from the same
3559     // oneof, and handle them at the next Flush().
3560     void Emit(const FieldDescriptor* field) {
3561       if (eager_ || MustFlush(field)) {
3562         Flush();
3563       }
3564       if (!field->real_containing_oneof()) {
3565         // TODO(ckennelly): Defer non-oneof fields similarly to oneof fields.
3566 
3567         if (!field->options().weak() && !field->is_repeated() && !eager_) {
3568           // We speculatively load the entire _has_bits_[index] contents, even
3569           // if it is for only one field.  Deferring non-oneof emitting would
3570           // allow us to determine whether this is going to be useful.
3571           int has_bit_index = mg_->has_bit_indices_[field->index()];
3572           if (cached_has_bit_index_ != has_bit_index / 32) {
3573             // Reload.
3574             int new_index = has_bit_index / 32;
3575 
3576             format_("cached_has_bits = _has_bits_[$1$];\n", new_index);
3577 
3578             cached_has_bit_index_ = new_index;
3579           }
3580         }
3581 
3582         mg_->GenerateSerializeOneField(format_.printer(), field,
3583                                        cached_has_bit_index_);
3584       } else {
3585         v_.push_back(field);
3586       }
3587     }
3588 
3589     void Flush() {
3590       if (!v_.empty()) {
3591         mg_->GenerateSerializeOneofFields(format_.printer(), v_);
3592         v_.clear();
3593       }
3594     }
3595 
3596    private:
3597     // If we have multiple fields in v_ then they all must be from the same
3598     // oneof.  Would adding field to v_ break that invariant?
3599     bool MustFlush(const FieldDescriptor* field) {
3600       return !v_.empty() &&
3601              v_[0]->containing_oneof() != field->containing_oneof();
3602     }
3603 
3604     MessageGenerator* mg_;
3605     Formatter format_;
3606     const bool eager_;
3607     std::vector<const FieldDescriptor*> v_;
3608 
3609     // cached_has_bit_index_ maintains that:
3610     //   cached_has_bits = from._has_bits_[cached_has_bit_index_]
3611     // for cached_has_bit_index_ >= 0
3612     int cached_has_bit_index_;
3613   };
3614 
3615   std::vector<const FieldDescriptor*> ordered_fields =
3616       SortFieldsByNumber(descriptor_);
3617 
3618   std::vector<const Descriptor::ExtensionRange*> sorted_extensions;
3619   sorted_extensions.reserve(descriptor_->extension_range_count());
3620   for (int i = 0; i < descriptor_->extension_range_count(); ++i) {
3621     sorted_extensions.push_back(descriptor_->extension_range(i));
3622   }
3623   std::sort(sorted_extensions.begin(), sorted_extensions.end(),
3624             ExtensionRangeSorter());
3625   if (num_weak_fields_) {
3626     format(
3627         "::$proto_ns$::internal::WeakFieldMap::FieldWriter field_writer("
3628         "_weak_field_map_);\n");
3629   }
3630 
3631   format(
3632       "$uint32$ cached_has_bits = 0;\n"
3633       "(void) cached_has_bits;\n\n");
3634 
3635   // Merge the fields and the extension ranges, both sorted by field number.
3636   {
3637     LazySerializerEmitter e(this, printer);
3638     const FieldDescriptor* last_weak_field = nullptr;
3639     int i, j;
3640     for (i = 0, j = 0;
3641          i < ordered_fields.size() || j < sorted_extensions.size();) {
3642       if ((j == sorted_extensions.size()) ||
3643           (i < descriptor_->field_count() &&
3644            ordered_fields[i]->number() < sorted_extensions[j]->start)) {
3645         const FieldDescriptor* field = ordered_fields[i++];
3646         if (!IsFieldUsed(field, options_)) {
3647           continue;
3648         }
3649         if (field->options().weak()) {
3650           last_weak_field = field;
3651           PrintFieldComment(format, field);
3652         } else {
3653           if (last_weak_field != nullptr) {
3654             e.Emit(last_weak_field);
3655             last_weak_field = nullptr;
3656           }
3657           e.Emit(field);
3658         }
3659       } else {
3660         if (last_weak_field != nullptr) {
3661           e.Emit(last_weak_field);
3662           last_weak_field = nullptr;
3663         }
3664         e.Flush();
3665         GenerateSerializeOneExtensionRange(printer, sorted_extensions[j++]);
3666       }
3667     }
3668     if (last_weak_field != nullptr) {
3669       e.Emit(last_weak_field);
3670     }
3671   }
3672 
3673   std::map<std::string, std::string> vars;
3674   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3675   format.AddMap(vars);
3676   format("if (PROTOBUF_PREDICT_FALSE($have_unknown_fields$)) {\n");
3677   format.Indent();
3678   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
3679     format(
3680         "target = "
3681         "::$proto_ns$::internal::WireFormat::"
3682         "InternalSerializeUnknownFieldsToArray(\n"
3683         "    $unknown_fields$, target, stream);\n");
3684   } else {
3685     format(
3686         "target = stream->WriteRaw($unknown_fields$.data(),\n"
3687         "    static_cast<int>($unknown_fields$.size()), target);\n");
3688   }
3689   format.Outdent();
3690   format("}\n");
3691 }
3692 
RequiredFieldsBitMask() const3693 std::vector<uint32> MessageGenerator::RequiredFieldsBitMask() const {
3694   const int array_size = HasBitsSize();
3695   std::vector<uint32> masks(array_size, 0);
3696 
3697   for (auto field : FieldRange(descriptor_)) {
3698     if (!field->is_required()) {
3699       continue;
3700     }
3701 
3702     const int has_bit_index = has_bit_indices_[field->index()];
3703     masks[has_bit_index / 32] |= static_cast<uint32>(1) << (has_bit_index % 32);
3704   }
3705   return masks;
3706 }
3707 
GenerateByteSize(io::Printer * printer)3708 void MessageGenerator::GenerateByteSize(io::Printer* printer) {
3709   Formatter format(printer, variables_);
3710 
3711   if (descriptor_->options().message_set_wire_format()) {
3712     // Special-case MessageSet.
3713     std::map<std::string, std::string> vars;
3714     SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3715     format.AddMap(vars);
3716     format(
3717         "size_t $classname$::ByteSizeLong() const {\n"
3718         "// @@protoc_insertion_point(message_set_byte_size_start:$full_name$)\n"
3719         "  size_t total_size = _extensions_.MessageSetByteSize();\n"
3720         "  if ($have_unknown_fields$) {\n"
3721         "    total_size += ::$proto_ns$::internal::\n"
3722         "        ComputeUnknownMessageSetItemsSize($unknown_fields$);\n"
3723         "  }\n"
3724         "  int cached_size = "
3725         "::$proto_ns$::internal::ToCachedSize(total_size);\n"
3726         "  SetCachedSize(cached_size);\n"
3727         "  return total_size;\n"
3728         "}\n");
3729     return;
3730   }
3731 
3732   if (num_required_fields_ > 1) {
3733     // Emit a function (rarely used, we hope) that handles the required fields
3734     // by checking for each one individually.
3735     format(
3736         "size_t $classname$::RequiredFieldsByteSizeFallback() const {\n"
3737         "// @@protoc_insertion_point(required_fields_byte_size_fallback_start:"
3738         "$full_name$)\n");
3739     format.Indent();
3740     format("size_t total_size = 0;\n");
3741     for (auto field : optimized_order_) {
3742       if (field->is_required()) {
3743         format(
3744             "\n"
3745             "if (_internal_has_$1$()) {\n",
3746             FieldName(field));
3747         format.Indent();
3748         PrintFieldComment(format, field);
3749         field_generators_.get(field).GenerateByteSize(printer);
3750         format.Outdent();
3751         format("}\n");
3752       }
3753     }
3754     format(
3755         "\n"
3756         "return total_size;\n");
3757     format.Outdent();
3758     format("}\n");
3759   }
3760 
3761   format(
3762       "size_t $classname$::ByteSizeLong() const {\n"
3763       "// @@protoc_insertion_point(message_byte_size_start:$full_name$)\n");
3764   format.Indent();
3765   format(
3766       "size_t total_size = 0;\n"
3767       "\n");
3768 
3769   if (descriptor_->extension_range_count() > 0) {
3770     format(
3771         "total_size += _extensions_.ByteSize();\n"
3772         "\n");
3773   }
3774 
3775   std::map<std::string, std::string> vars;
3776   SetUnknkownFieldsVariable(descriptor_, options_, &vars);
3777   format.AddMap(vars);
3778 
3779   // Handle required fields (if any).  We expect all of them to be
3780   // present, so emit one conditional that checks for that.  If they are all
3781   // present then the fast path executes; otherwise the slow path executes.
3782   if (num_required_fields_ > 1) {
3783     // The fast path works if all required fields are present.
3784     const std::vector<uint32> masks_for_has_bits = RequiredFieldsBitMask();
3785     format("if ($1$) {  // All required fields are present.\n",
3786            ConditionalToCheckBitmasks(masks_for_has_bits));
3787     format.Indent();
3788     // Oneof fields cannot be required, so optimized_order_ contains all of the
3789     // fields that we need to potentially emit.
3790     for (auto field : optimized_order_) {
3791       if (!field->is_required()) continue;
3792       PrintFieldComment(format, field);
3793       field_generators_.get(field).GenerateByteSize(printer);
3794       format("\n");
3795     }
3796     format.Outdent();
3797     format(
3798         "} else {\n"  // the slow path
3799         "  total_size += RequiredFieldsByteSizeFallback();\n"
3800         "}\n");
3801   } else {
3802     // num_required_fields_ <= 1: no need to be tricky
3803     for (auto field : optimized_order_) {
3804       if (!field->is_required()) continue;
3805       PrintFieldComment(format, field);
3806       format("if (_internal_has_$1$()) {\n", FieldName(field));
3807       format.Indent();
3808       field_generators_.get(field).GenerateByteSize(printer);
3809       format.Outdent();
3810       format("}\n");
3811     }
3812   }
3813 
3814   std::vector<std::vector<const FieldDescriptor*>> chunks = CollectFields(
3815       optimized_order_,
3816       [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
3817         return a->label() == b->label() && HasByteIndex(a) == HasByteIndex(b);
3818       });
3819 
3820   // Remove chunks with required fields.
3821   chunks.erase(std::remove_if(chunks.begin(), chunks.end(), IsRequired),
3822                chunks.end());
3823 
3824   ColdChunkSkipper cold_skipper(options_, chunks, has_bit_indices_, kColdRatio);
3825   int cached_has_word_index = -1;
3826 
3827   format(
3828       "$uint32$ cached_has_bits = 0;\n"
3829       "// Prevent compiler warnings about cached_has_bits being unused\n"
3830       "(void) cached_has_bits;\n\n");
3831 
3832   for (int chunk_index = 0; chunk_index < chunks.size(); chunk_index++) {
3833     const std::vector<const FieldDescriptor*>& chunk = chunks[chunk_index];
3834     const bool have_outer_if =
3835         chunk.size() > 1 && HasWordIndex(chunk[0]) != kNoHasbit;
3836     cold_skipper.OnStartChunk(chunk_index, cached_has_word_index, "", printer);
3837 
3838     if (have_outer_if) {
3839       // Emit an if() that will let us skip the whole chunk if none are set.
3840       uint32 chunk_mask = GenChunkMask(chunk, has_bit_indices_);
3841       std::string chunk_mask_str =
3842           StrCat(strings::Hex(chunk_mask, strings::ZERO_PAD_8));
3843 
3844       // Check (up to) 8 has_bits at a time if we have more than one field in
3845       // this chunk.  Due to field layout ordering, we may check
3846       // _has_bits_[last_chunk * 8 / 32] multiple times.
3847       GOOGLE_DCHECK_LE(2, popcnt(chunk_mask));
3848       GOOGLE_DCHECK_GE(8, popcnt(chunk_mask));
3849 
3850       if (cached_has_word_index != HasWordIndex(chunk.front())) {
3851         cached_has_word_index = HasWordIndex(chunk.front());
3852         format("cached_has_bits = _has_bits_[$1$];\n", cached_has_word_index);
3853       }
3854       format("if (cached_has_bits & 0x$1$u) {\n", chunk_mask_str);
3855       format.Indent();
3856     }
3857 
3858     // Go back and emit checks for each of the fields we processed.
3859     for (int j = 0; j < chunk.size(); j++) {
3860       const FieldDescriptor* field = chunk[j];
3861       const FieldGenerator& generator = field_generators_.get(field);
3862       bool have_enclosing_if = false;
3863       bool need_extra_newline = false;
3864 
3865       PrintFieldComment(format, field);
3866 
3867       if (field->is_repeated()) {
3868         // No presence check is required.
3869         need_extra_newline = true;
3870       } else if (HasHasbit(field)) {
3871         PrintPresenceCheck(format, field, has_bit_indices_, printer,
3872                            &cached_has_word_index);
3873         have_enclosing_if = true;
3874       } else {
3875         // Without field presence: field is serialized only if it has a
3876         // non-default value.
3877         have_enclosing_if =
3878             EmitFieldNonDefaultCondition(printer, "this->", field);
3879       }
3880 
3881       generator.GenerateByteSize(printer);
3882 
3883       if (have_enclosing_if) {
3884         format.Outdent();
3885         format(
3886             "}\n"
3887             "\n");
3888       }
3889       if (need_extra_newline) {
3890         format("\n");
3891       }
3892     }
3893 
3894     if (have_outer_if) {
3895       format.Outdent();
3896       format("}\n");
3897     }
3898 
3899     if (cold_skipper.OnEndChunk(chunk_index, printer)) {
3900       // Reset here as it may have been updated in just closed if statement.
3901       cached_has_word_index = -1;
3902     }
3903   }
3904 
3905   // Fields inside a oneof don't use _has_bits_ so we count them in a separate
3906   // pass.
3907   for (auto oneof : OneOfRange(descriptor_)) {
3908     format("switch ($1$_case()) {\n", oneof->name());
3909     format.Indent();
3910     for (auto field : FieldRange(oneof)) {
3911       PrintFieldComment(format, field);
3912       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
3913       format.Indent();
3914       if (IsFieldUsed(field, options_)) {
3915         field_generators_.get(field).GenerateByteSize(printer);
3916       }
3917       format("break;\n");
3918       format.Outdent();
3919       format("}\n");
3920     }
3921     format(
3922         "case $1$_NOT_SET: {\n"
3923         "  break;\n"
3924         "}\n",
3925         ToUpper(oneof->name()));
3926     format.Outdent();
3927     format("}\n");
3928   }
3929 
3930   if (num_weak_fields_) {
3931     // TagSize + MessageSize
3932     format("total_size += _weak_field_map_.ByteSizeLong();\n");
3933   }
3934 
3935   format("if (PROTOBUF_PREDICT_FALSE($have_unknown_fields$)) {\n");
3936   if (UseUnknownFieldSet(descriptor_->file(), options_)) {
3937     // We go out of our way to put the computation of the uncommon path of
3938     // unknown fields in tail position. This allows for better code generation
3939     // of this function for simple protos.
3940     format(
3941         "  return ::$proto_ns$::internal::ComputeUnknownFieldsSize(\n"
3942         "      _internal_metadata_, total_size, &_cached_size_);\n");
3943   } else {
3944     format("  total_size += $unknown_fields$.size();\n");
3945   }
3946   format("}\n");
3947 
3948   // We update _cached_size_ even though this is a const method.  Because
3949   // const methods might be called concurrently this needs to be atomic
3950   // operations or the program is undefined.  In practice, since any concurrent
3951   // writes will be writing the exact same value, normal writes will work on
3952   // all common processors. We use a dedicated wrapper class to abstract away
3953   // the underlying atomic. This makes it easier on platforms where even relaxed
3954   // memory order might have perf impact to replace it with ordinary loads and
3955   // stores.
3956   format(
3957       "int cached_size = ::$proto_ns$::internal::ToCachedSize(total_size);\n"
3958       "SetCachedSize(cached_size);\n"
3959       "return total_size;\n");
3960 
3961   format.Outdent();
3962   format("}\n");
3963 }
3964 
GenerateIsInitialized(io::Printer * printer)3965 void MessageGenerator::GenerateIsInitialized(io::Printer* printer) {
3966   Formatter format(printer, variables_);
3967   format("bool $classname$::IsInitialized() const {\n");
3968   format.Indent();
3969 
3970   if (descriptor_->extension_range_count() > 0) {
3971     format(
3972         "if (!_extensions_.IsInitialized()) {\n"
3973         "  return false;\n"
3974         "}\n\n");
3975   }
3976 
3977   if (num_required_fields_ > 0) {
3978     format(
3979         "if (_Internal::MissingRequiredFields(_has_bits_))"
3980         " return false;\n");
3981   }
3982 
3983   // Now check that all non-oneof embedded messages are initialized.
3984   for (auto field : optimized_order_) {
3985     // TODO(ckennelly): Push this down into a generator?
3986     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
3987         !ShouldIgnoreRequiredFieldCheck(field, options_) &&
3988         scc_analyzer_->HasRequiredFields(field->message_type())) {
3989       if (field->is_repeated()) {
3990         if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
3991           format(
3992               "if "
3993               "(!::$proto_ns$::internal::AllAreInitializedWeak($1$_.weak)"
3994               ")"
3995               " return false;\n",
3996               FieldName(field));
3997         } else {
3998           format(
3999               "if (!::$proto_ns$::internal::AllAreInitialized($1$_))"
4000               " return false;\n",
4001               FieldName(field));
4002         }
4003       } else if (field->options().weak()) {
4004         continue;
4005       } else {
4006         GOOGLE_CHECK(!field->real_containing_oneof());
4007         format(
4008             "if (_internal_has_$1$()) {\n"
4009             "  if (!$1$_->IsInitialized()) return false;\n"
4010             "}\n",
4011             FieldName(field));
4012       }
4013     }
4014   }
4015   if (num_weak_fields_) {
4016     // For Weak fields.
4017     format("if (!_weak_field_map_.IsInitialized()) return false;\n");
4018   }
4019   // Go through the oneof fields, emitting a switch if any might have required
4020   // fields.
4021   for (auto oneof : OneOfRange(descriptor_)) {
4022     bool has_required_fields = false;
4023     for (auto field : FieldRange(oneof)) {
4024       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
4025           !ShouldIgnoreRequiredFieldCheck(field, options_) &&
4026           scc_analyzer_->HasRequiredFields(field->message_type())) {
4027         has_required_fields = true;
4028         break;
4029       }
4030     }
4031 
4032     if (!has_required_fields) {
4033       continue;
4034     }
4035 
4036     format("switch ($1$_case()) {\n", oneof->name());
4037     format.Indent();
4038     for (auto field : FieldRange(oneof)) {
4039       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
4040       format.Indent();
4041 
4042       if (IsFieldUsed(field, options_) &&
4043           field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
4044           !ShouldIgnoreRequiredFieldCheck(field, options_) &&
4045           scc_analyzer_->HasRequiredFields(field->message_type())) {
4046         GOOGLE_CHECK(!(field->options().weak() || !field->real_containing_oneof()));
4047         if (field->options().weak()) {
4048           // Just skip.
4049         } else {
4050           format(
4051               "if (has_$1$()) {\n"
4052               "  if (!this->$1$().IsInitialized()) return false;\n"
4053               "}\n",
4054               FieldName(field));
4055         }
4056       }
4057 
4058       format("break;\n");
4059       format.Outdent();
4060       format("}\n");
4061     }
4062     format(
4063         "case $1$_NOT_SET: {\n"
4064         "  break;\n"
4065         "}\n",
4066         ToUpper(oneof->name()));
4067     format.Outdent();
4068     format("}\n");
4069   }
4070 
4071   format.Outdent();
4072   format(
4073       "  return true;\n"
4074       "}\n");
4075 }
4076 
4077 }  // namespace cpp
4078 }  // namespace compiler
4079 }  // namespace protobuf
4080 }  // namespace google
4081