• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/compiler/cpp/message.h"
13 
14 #include <algorithm>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdint>
18 #include <functional>
19 #include <iterator>
20 #include <limits>
21 #include <memory>
22 #include <new>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/functional/any_invocable.h"
30 #include "absl/log/absl_check.h"
31 #include "absl/log/absl_log.h"
32 #include "absl/strings/ascii.h"
33 #include "absl/strings/escaping.h"
34 #include "absl/strings/match.h"
35 #include "absl/strings/str_cat.h"
36 #include "absl/strings/str_format.h"
37 #include "absl/strings/str_join.h"
38 #include "absl/strings/string_view.h"
39 #include "google/protobuf/compiler/cpp/enum.h"
40 #include "google/protobuf/compiler/cpp/extension.h"
41 #include "google/protobuf/compiler/cpp/field.h"
42 #include "google/protobuf/compiler/cpp/helpers.h"
43 #include "google/protobuf/compiler/cpp/names.h"
44 #include "google/protobuf/compiler/cpp/options.h"
45 #include "google/protobuf/compiler/cpp/padding_optimizer.h"
46 #include "google/protobuf/compiler/cpp/parse_function_generator.h"
47 #include "google/protobuf/compiler/cpp/tracker.h"
48 #include "google/protobuf/descriptor.h"
49 #include "google/protobuf/descriptor.pb.h"
50 #include "google/protobuf/io/printer.h"
51 #include "google/protobuf/wire_format.h"
52 #include "google/protobuf/wire_format_lite.h"
53 
54 
55 // Must be included last.
56 #include "google/protobuf/port_def.inc"
57 
58 namespace google {
59 namespace protobuf {
60 namespace compiler {
61 namespace cpp {
62 namespace {
63 using ::google::protobuf::internal::WireFormat;
64 using ::google::protobuf::internal::WireFormatLite;
65 using ::google::protobuf::internal::cpp::HasHasbit;
66 using Semantic = ::google::protobuf::io::AnnotationCollector::Semantic;
67 using Sub = ::google::protobuf::io::Printer::Sub;
68 
69 static constexpr int kNoHasbit = -1;
70 
71 // Create an expression that evaluates to
72 //  "for all i, (_has_bits_[i] & masks[i]) == masks[i]"
73 // masks is allowed to be shorter than _has_bits_, but at least one element of
74 // masks must be non-zero.
ConditionalToCheckBitmasks(const std::vector<uint32_t> & masks,bool return_success=true,absl::string_view has_bits_var="_impl_._has_bits_")75 std::string ConditionalToCheckBitmasks(
76     const std::vector<uint32_t>& masks, bool return_success = true,
77     absl::string_view has_bits_var = "_impl_._has_bits_") {
78   std::vector<std::string> parts;
79   for (size_t i = 0; i < masks.size(); ++i) {
80     if (masks[i] == 0) continue;
81     std::string m = absl::StrCat("0x", absl::Hex(masks[i], absl::kZeroPad8));
82     // Each xor evaluates to 0 if the expected bits are present.
83     parts.push_back(
84         absl::StrCat("((", has_bits_var, "[", i, "] & ", m, ") ^ ", m, ")"));
85   }
86   ABSL_CHECK(!parts.empty());
87   // If we have multiple parts, each expected to be 0, then bitwise-or them.
88   std::string result =
89       parts.size() == 1
90           ? parts[0]
91           : absl::StrCat("(", absl::StrJoin(parts, "\n       | "), ")");
92   return result + (return_success ? " == 0" : " != 0");
93 }
94 
PrintPresenceCheck(const FieldDescriptor * field,const std::vector<int> & has_bit_indices,io::Printer * p,int * cached_has_word_index)95 void PrintPresenceCheck(const FieldDescriptor* field,
96                         const std::vector<int>& has_bit_indices, io::Printer* p,
97                         int* cached_has_word_index) {
98   if (!field->options().weak()) {
99     int has_bit_index = has_bit_indices[field->index()];
100     if (*cached_has_word_index != (has_bit_index / 32)) {
101       *cached_has_word_index = (has_bit_index / 32);
102       p->Emit({{"index", *cached_has_word_index}},
103               R"cc(
104                 cached_has_bits = $has_bits$[$index$];
105               )cc");
106     }
107     p->Emit({{"mask", absl::StrFormat("0x%08xu", 1u << (has_bit_index % 32))}},
108             R"cc(
109               if (cached_has_bits & $mask$) {
110             )cc");
111   } else {
112     p->Emit(R"cc(
113       if (has_$name$()) {
114     )cc");
115   }
116 }
117 
118 struct FieldOrderingByNumber {
operator ()google::protobuf::compiler::cpp::__anonb44a127c0111::FieldOrderingByNumber119   inline bool operator()(const FieldDescriptor* a,
120                          const FieldDescriptor* b) const {
121     return a->number() < b->number();
122   }
123 };
124 
125 // Sort the fields of the given Descriptor by number into a new[]'d array
126 // and return it.
SortFieldsByNumber(const Descriptor * descriptor)127 std::vector<const FieldDescriptor*> SortFieldsByNumber(
128     const Descriptor* descriptor) {
129   std::vector<const FieldDescriptor*> fields(descriptor->field_count());
130   for (int i = 0; i < descriptor->field_count(); ++i) {
131     fields[i] = descriptor->field(i);
132   }
133   std::sort(fields.begin(), fields.end(), FieldOrderingByNumber());
134   return fields;
135 }
136 
137 // Functor for sorting extension ranges by their "start" field number.
138 struct ExtensionRangeSorter {
operator ()google::protobuf::compiler::cpp::__anonb44a127c0111::ExtensionRangeSorter139   bool operator()(const Descriptor::ExtensionRange* left,
140                   const Descriptor::ExtensionRange* right) const {
141     return left->start_number() < right->start_number();
142   }
143 };
144 
IsPOD(const FieldDescriptor * field)145 bool IsPOD(const FieldDescriptor* field) {
146   if (field->is_repeated() || field->is_extension()) return false;
147   switch (field->cpp_type()) {
148     case FieldDescriptor::CPPTYPE_ENUM:
149     case FieldDescriptor::CPPTYPE_INT32:
150     case FieldDescriptor::CPPTYPE_INT64:
151     case FieldDescriptor::CPPTYPE_UINT32:
152     case FieldDescriptor::CPPTYPE_UINT64:
153     case FieldDescriptor::CPPTYPE_FLOAT:
154     case FieldDescriptor::CPPTYPE_DOUBLE:
155     case FieldDescriptor::CPPTYPE_BOOL:
156       return true;
157     case FieldDescriptor::CPPTYPE_STRING:
158       return false;
159     default:
160       return false;
161   }
162 }
163 
164 // Finds runs of fields for which `predicate` is true.
165 // RunMap maps from fields that start each run to the number of fields in that
166 // run.  This is optimized for the common case that there are very few runs in
167 // a message and that most of the eligible fields appear together.
168 using RunMap = absl::flat_hash_map<const FieldDescriptor*, size_t>;
FindRuns(const std::vector<const FieldDescriptor * > & fields,const std::function<bool (const FieldDescriptor *)> & predicate)169 RunMap FindRuns(const std::vector<const FieldDescriptor*>& fields,
170                 const std::function<bool(const FieldDescriptor*)>& predicate) {
171   RunMap runs;
172   const FieldDescriptor* last_start = nullptr;
173 
174   for (auto field : fields) {
175     if (predicate(field)) {
176       if (last_start == nullptr) {
177         last_start = field;
178       }
179 
180       runs[last_start]++;
181     } else {
182       last_start = nullptr;
183     }
184   }
185   return runs;
186 }
187 
EmitNonDefaultCheck(io::Printer * p,const std::string & prefix,const FieldDescriptor * field)188 void EmitNonDefaultCheck(io::Printer* p, const std::string& prefix,
189                          const FieldDescriptor* field) {
190   ABSL_CHECK(!HasHasbit(field));
191   ABSL_CHECK(!field->is_repeated());
192   ABSL_CHECK(!field->containing_oneof() || field->real_containing_oneof());
193 
194   auto v = p->WithVars({{
195       {"prefix", prefix},
196       {"name", FieldName(field)},
197   }});
198   // Merge and serialize semantics: primitive fields are merged/serialized only
199   // if non-zero (numeric) or non-empty (string).
200   if (!field->containing_oneof()) {
201     if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
202       p->Emit("!$prefix$_internal_$name$().empty()");
203     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
204       // Message fields still have has_$name$() methods.
205       p->Emit("$prefix$_internal_has_$name$()");
206     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT) {
207       p->Emit("::absl::bit_cast<::uint32_t>($prefix$_internal_$name$()) != 0");
208     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_DOUBLE) {
209       p->Emit("::absl::bit_cast<::uint64_t>($prefix$_internal_$name$()) != 0");
210     } else {
211       p->Emit("$prefix$_internal_$name$() != 0");
212     }
213   } else if (field->real_containing_oneof()) {
214     p->Emit("$prefix$$has_field$");
215   }
216 }
217 
ShouldEmitNonDefaultCheck(const FieldDescriptor * field)218 bool ShouldEmitNonDefaultCheck(const FieldDescriptor* field) {
219   return (!field->is_repeated() && !field->containing_oneof()) ||
220          field->real_containing_oneof();
221 }
222 
223 // Emits an if-statement with a condition that evaluates to true if |field| is
224 // considered non-default (will be sent over the wire), for message types
225 // without true field presence. Should only be called if
226 // !HasHasbit(field).
MayEmitIfNonDefaultCheck(io::Printer * p,const std::string & prefix,const FieldDescriptor * field,absl::AnyInvocable<void ()> emit_body)227 void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix,
228                               const FieldDescriptor* field,
229                               absl::AnyInvocable<void()> emit_body) {
230   ABSL_CHECK(!HasHasbit(field));
231 
232   if (ShouldEmitNonDefaultCheck(field)) {
233     p->Emit(
234         {
235             {"condition", [&] { EmitNonDefaultCheck(p, prefix, field); }},
236             {"emit_body", [&] { emit_body(); }},
237         },
238         R"cc(
239           if ($condition$) {
240             $emit_body$;
241           }
242         )cc");
243   } else {
244     // In repeated fields, the same variable name may be emitted multiple
245     // times, hence the need for emitting braces even when the if condition is
246     // not necessary, so that the code looks like:
247     // {
248     //   int tmpvar = ...;
249     //   total += tmpvar;
250     // }
251     // {
252     //   int tmpvar = ...;
253     //   total += tmpvar;
254     // }
255     p->Emit({{"emit_body", [&] { emit_body(); }}},
256             R"cc(
257               {
258                 //~ Force newline.
259                 $emit_body$;
260               }
261             )cc");
262   }
263 }
264 
HasInternalHasMethod(const FieldDescriptor * field)265 bool HasInternalHasMethod(const FieldDescriptor* field) {
266   return !HasHasbit(field) &&
267          field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE;
268 }
269 
270 // Collects map entry message type information.
CollectMapInfo(const Options & options,const Descriptor * descriptor,absl::flat_hash_map<absl::string_view,std::string> * variables)271 void CollectMapInfo(
272     const Options& options, const Descriptor* descriptor,
273     absl::flat_hash_map<absl::string_view, std::string>* variables) {
274   ABSL_CHECK(IsMapEntryMessage(descriptor));
275   absl::flat_hash_map<absl::string_view, std::string>& vars = *variables;
276   const FieldDescriptor* key = descriptor->map_key();
277   const FieldDescriptor* val = descriptor->map_value();
278   vars["key_cpp"] = PrimitiveTypeName(options, key->cpp_type());
279   switch (val->cpp_type()) {
280     case FieldDescriptor::CPPTYPE_MESSAGE:
281       vars["val_cpp"] =
282           absl::StrCat("::", ProtobufNamespace(options), "::Message");
283       break;
284     case FieldDescriptor::CPPTYPE_ENUM:
285       vars["val_cpp"] = ClassName(val->enum_type(), true);
286       break;
287     default:
288       vars["val_cpp"] = PrimitiveTypeName(options, val->cpp_type());
289   }
290   vars["key_wire_type"] = absl::StrCat(
291       "TYPE_", absl::AsciiStrToUpper(DeclaredTypeMethodName(key->type())));
292   vars["val_wire_type"] = absl::StrCat(
293       "TYPE_", absl::AsciiStrToUpper(DeclaredTypeMethodName(val->type())));
294 }
295 
296 
297 // Returns true to make the message serialize in order, decided by the following
298 // factors in the order of precedence.
299 // --options().message_set_wire_format() == true
300 // --the message is in the allowlist (true)
301 // --GOOGLE_PROTOBUF_SHUFFLE_SERIALIZE is defined (false)
302 // --a ranage of message names that are allowed to stay in order (true)
ShouldSerializeInOrder(const Descriptor * descriptor,const Options & options)303 bool ShouldSerializeInOrder(const Descriptor* descriptor,
304                             const Options& options) {
305   return true;
306 }
307 
IsCrossFileMapField(const FieldDescriptor * field)308 bool IsCrossFileMapField(const FieldDescriptor* field) {
309   if (!field->is_map()) {
310     return false;
311   }
312 
313   const Descriptor* d = field->message_type();
314   const FieldDescriptor* value = d->FindFieldByNumber(2);
315 
316   return IsCrossFileMessage(value);
317 }
318 
IsCrossFileMaybeMap(const FieldDescriptor * field)319 bool IsCrossFileMaybeMap(const FieldDescriptor* field) {
320   if (IsCrossFileMapField(field)) {
321     return true;
322   }
323 
324   return IsCrossFileMessage(field);
325 }
326 
HasNonSplitOptionalString(const Descriptor * desc,const Options & options)327 bool HasNonSplitOptionalString(const Descriptor* desc, const Options& options) {
328   for (const auto* field : FieldRange(desc)) {
329     if (IsString(field) && !field->is_repeated() &&
330         !field->real_containing_oneof() && !ShouldSplit(field, options)) {
331       return true;
332     }
333   }
334   return false;
335 }
336 
337 struct FieldChunk {
FieldChunkgoogle::protobuf::compiler::cpp::__anonb44a127c0111::FieldChunk338   FieldChunk(bool has_hasbit, bool is_rarely_present, bool should_split)
339       : has_hasbit(has_hasbit),
340         is_rarely_present(is_rarely_present),
341         should_split(should_split) {}
342 
343   bool has_hasbit;
344   bool is_rarely_present;
345   bool should_split;
346 
347   std::vector<const FieldDescriptor*> fields;
348 };
349 
350 using ChunkIterator = std::vector<FieldChunk>::const_iterator;
351 
352 // Breaks down a single chunk of fields into a few chunks that share attributes
353 // controlled by "equivalent" predicate. Returns an array of chunks.
354 template <typename Predicate>
CollectFields(const std::vector<const FieldDescriptor * > & fields,const Options & options,const Predicate & equivalent)355 std::vector<FieldChunk> CollectFields(
356     const std::vector<const FieldDescriptor*>& fields, const Options& options,
357     const Predicate& equivalent) {
358   std::vector<FieldChunk> chunks;
359   for (auto field : fields) {
360     if (chunks.empty() || !equivalent(chunks.back().fields.back(), field)) {
361       chunks.emplace_back(HasHasbit(field), IsRarelyPresent(field, options),
362                           ShouldSplit(field, options));
363     }
364     chunks.back().fields.push_back(field);
365   }
366   return chunks;
367 }
368 
369 template <typename Predicate>
FindNextUnequalChunk(ChunkIterator start,ChunkIterator end,const Predicate & equal)370 ChunkIterator FindNextUnequalChunk(ChunkIterator start, ChunkIterator end,
371                                    const Predicate& equal) {
372   auto it = start;
373   while (++it != end) {
374     if (!equal(*start, *it)) {
375       return it;
376     }
377   }
378   return end;
379 }
380 
381 // Returns true if two chunks may be grouped for hasword check to skip multiple
382 // cold fields at once. They have to share the following traits:
383 // - whether they have hasbits
384 // - whether they are rarely present
385 // - whether they are split
MayGroupChunksForHaswordsCheck(const FieldChunk & a,const FieldChunk & b)386 bool MayGroupChunksForHaswordsCheck(const FieldChunk& a, const FieldChunk& b) {
387   return a.has_hasbit == b.has_hasbit &&
388          a.is_rarely_present == b.is_rarely_present &&
389          a.should_split == b.should_split;
390 }
391 
392 // Returns a bit mask based on has_bit index of "fields" that are typically on
393 // the same chunk. It is used in a group presence check where _has_bits_ is
394 // masked to tell if any thing in "fields" is present.
GenChunkMask(const std::vector<const FieldDescriptor * > & fields,const std::vector<int> & has_bit_indices)395 uint32_t GenChunkMask(const std::vector<const FieldDescriptor*>& fields,
396                       const std::vector<int>& has_bit_indices) {
397   ABSL_CHECK(!fields.empty());
398   int first_index_offset = has_bit_indices[fields.front()->index()] / 32;
399   uint32_t chunk_mask = 0;
400   for (auto field : fields) {
401     // "index" defines where in the _has_bits_ the field appears.
402     int index = has_bit_indices[field->index()];
403     ABSL_CHECK_EQ(first_index_offset, index / 32);
404     chunk_mask |= static_cast<uint32_t>(1) << (index % 32);
405   }
406   ABSL_CHECK_NE(0u, chunk_mask);
407   return chunk_mask;
408 }
409 
410 // Returns a bit mask based on has_bit index of "fields" in chunks in [it, end).
411 // Assumes that all chunks share the same hasbit word.
GenChunkMask(ChunkIterator it,ChunkIterator end,const std::vector<int> & has_bit_indices)412 uint32_t GenChunkMask(ChunkIterator it, ChunkIterator end,
413                       const std::vector<int>& has_bit_indices) {
414   ABSL_CHECK(it != end);
415 
416   int first_index_offset = has_bit_indices[it->fields.front()->index()] / 32;
417   uint32_t chunk_mask = 0;
418   do {
419     ABSL_CHECK_EQ(first_index_offset,
420                   has_bit_indices[it->fields.front()->index()] / 32);
421     chunk_mask |= GenChunkMask(it->fields, has_bit_indices);
422   } while (++it != end);
423   return chunk_mask;
424 }
425 
426 // Return the number of bits set in n, a non-negative integer.
popcnt(uint32_t n)427 static int popcnt(uint32_t n) {
428   int result = 0;
429   while (n != 0) {
430     result += (n & 1);
431     n = n / 2;
432   }
433   return result;
434 }
435 
436 // Returns true if it emits conditional check against hasbit words. This is
437 // useful to skip multiple fields that are unlikely present based on profile
438 // (go/pdproto).
MaybeEmitHaswordsCheck(ChunkIterator it,ChunkIterator end,const Options & options,const std::vector<int> & has_bit_indices,int cached_has_word_index,const std::string & from,io::Printer * p)439 bool MaybeEmitHaswordsCheck(ChunkIterator it, ChunkIterator end,
440                             const Options& options,
441                             const std::vector<int>& has_bit_indices,
442                             int cached_has_word_index, const std::string& from,
443                             io::Printer* p) {
444   if (!it->has_hasbit || !IsProfileDriven(options) ||
445       std::distance(it, end) < 2 || !it->is_rarely_present) {
446     return false;
447   }
448 
449   auto hasbit_word = [&has_bit_indices](const FieldDescriptor* field) {
450     return has_bit_indices[field->index()] / 32;
451   };
452   auto is_same_hasword = [&](const FieldChunk& a, const FieldChunk& b) {
453     return hasbit_word(a.fields.front()) == hasbit_word(b.fields.front());
454   };
455 
456   struct HasWordMask {
457     int word;
458     uint32_t mask;
459   };
460 
461   std::vector<HasWordMask> hasword_masks;
462   while (it != end) {
463     auto next = FindNextUnequalChunk(it, end, is_same_hasword);
464     hasword_masks.push_back({hasbit_word(it->fields.front()),
465                              GenChunkMask(it, next, has_bit_indices)});
466     it = next;
467   }
468 
469   // Emit has_bit check for each has_bit_dword index.
470   p->Emit(
471       {{"cond",
472         [&] {
473           int first_word = hasword_masks.front().word;
474           for (const auto& m : hasword_masks) {
475             uint32_t mask = m.mask;
476             int this_word = m.word;
477             if (this_word != first_word) {
478               p->Emit(R"cc(
479                 ||
480               )cc");
481             }
482             auto v = p->WithVars({{"mask", absl::StrFormat("0x%08xu", mask)}});
483             if (this_word == cached_has_word_index) {
484               p->Emit("(cached_has_bits & $mask$) != 0");
485             } else {
486               p->Emit({{"from", from}, {"word", this_word}},
487                       "($from$_impl_._has_bits_[$word$] & $mask$) != 0");
488             }
489           }
490         }}},
491       R"cc(
492         if (PROTOBUF_PREDICT_FALSE($cond$)) {
493       )cc");
494   p->Indent();
495   return true;
496 }
497 
498 using Sub = ::google::protobuf::io::Printer::Sub;
ClassVars(const Descriptor * desc,Options opts)499 std::vector<Sub> ClassVars(const Descriptor* desc, Options opts) {
500   std::vector<Sub> vars = {
501       {"pkg", Namespace(desc, opts)},
502       {"Msg", ClassName(desc, false)},
503       {"pkg::Msg", QualifiedClassName(desc, opts)},
504       {"pkg.Msg", desc->full_name()},
505 
506       // Old-style names, to be removed once all usages are gone in this and
507       // other files.
508       {"classname", ClassName(desc, false)},
509       {"classtype", QualifiedClassName(desc, opts)},
510       {"full_name", desc->full_name()},
511       {"superclass", SuperClassName(desc, opts)},
512 
513       Sub("WeakDescriptorSelfPin",
514           UsingImplicitWeakDescriptor(desc->file(), opts)
515               ? absl::StrCat(StrongReferenceToType(desc, opts), ";")
516               : "")
517           .WithSuffix(";"),
518   };
519 
520   for (auto& pair : MessageVars(desc)) {
521     vars.push_back({std::string(pair.first), pair.second});
522   }
523 
524   for (auto& pair : UnknownFieldsVars(desc, opts)) {
525     vars.push_back({std::string(pair.first), pair.second});
526   }
527 
528   return vars;
529 }
530 
531 }  // anonymous namespace
532 
533 // ===================================================================
534 
MessageGenerator(const Descriptor * descriptor,const absl::flat_hash_map<absl::string_view,std::string> &,int index_in_file_messages,const Options & options,MessageSCCAnalyzer * scc_analyzer)535 MessageGenerator::MessageGenerator(
536     const Descriptor* descriptor,
537     const absl::flat_hash_map<absl::string_view, std::string>&,
538     int index_in_file_messages, const Options& options,
539     MessageSCCAnalyzer* scc_analyzer)
540     : descriptor_(descriptor),
541       index_in_file_messages_(index_in_file_messages),
542       options_(options),
543       field_generators_(descriptor),
544       scc_analyzer_(scc_analyzer) {
545 
546   if (!message_layout_helper_) {
547     message_layout_helper_ = std::make_unique<PaddingOptimizer>();
548   }
549 
550   // Compute optimized field order to be used for layout and initialization
551   // purposes.
552   for (auto field : FieldRange(descriptor_)) {
553     if (IsWeak(field, options_)) {
554       ++num_weak_fields_;
555       continue;
556     }
557 
558     if (!field->real_containing_oneof()) {
559       optimized_order_.push_back(field);
560     }
561   }
562 
563   const size_t initial_size = optimized_order_.size();
564   message_layout_helper_->OptimizeLayout(&optimized_order_, options_,
565                                          scc_analyzer_);
566   ABSL_CHECK_EQ(initial_size, optimized_order_.size());
567 
568   // This message has hasbits iff one or more fields need one.
569   for (auto field : optimized_order_) {
570     if (HasHasbit(field)) {
571       if (has_bit_indices_.empty()) {
572         has_bit_indices_.resize(descriptor_->field_count(), kNoHasbit);
573       }
574       has_bit_indices_[field->index()] = max_has_bit_index_++;
575     }
576     if (IsStringInlined(field, options_)) {
577       if (inlined_string_indices_.empty()) {
578         inlined_string_indices_.resize(descriptor_->field_count(), kNoHasbit);
579         // The bitset[0] is for arena dtor tracking. Donating states start from
580         // bitset[1];
581         ++max_inlined_string_index_;
582       }
583 
584       inlined_string_indices_[field->index()] = max_inlined_string_index_++;
585     }
586   }
587   field_generators_.Build(options_, scc_analyzer_, has_bit_indices_,
588                           inlined_string_indices_);
589 
590   for (int i = 0; i < descriptor->field_count(); ++i) {
591     if (descriptor->field(i)->is_required()) {
592       ++num_required_fields_;
593     }
594   }
595 
596   parse_function_generator_ = std::make_unique<ParseFunctionGenerator>(
597       descriptor_, max_has_bit_index_, has_bit_indices_,
598       inlined_string_indices_, options_, scc_analyzer_, variables_,
599       index_in_file_messages_);
600 }
601 
HasBitsSize() const602 size_t MessageGenerator::HasBitsSize() const {
603   return (max_has_bit_index_ + 31) / 32;
604 }
605 
InlinedStringDonatedSize() const606 size_t MessageGenerator::InlinedStringDonatedSize() const {
607   return (max_inlined_string_index_ + 31) / 32;
608 }
609 
610 absl::flat_hash_map<absl::string_view, std::string>
HasBitVars(const FieldDescriptor * field) const611 MessageGenerator::HasBitVars(const FieldDescriptor* field) const {
612   int has_bit_index = HasBitIndex(field);
613   ABSL_CHECK_NE(has_bit_index, kNoHasbit);
614   return {
615       {"has_array_index", absl::StrCat(has_bit_index / 32)},
616       {"has_mask", absl::StrFormat("0x%08xu", 1u << (has_bit_index % 32))},
617   };
618 }
619 
HasBitIndex(const FieldDescriptor * field) const620 int MessageGenerator::HasBitIndex(const FieldDescriptor* field) const {
621   return has_bit_indices_.empty() ? kNoHasbit
622                                   : has_bit_indices_[field->index()];
623 }
624 
HasByteIndex(const FieldDescriptor * field) const625 int MessageGenerator::HasByteIndex(const FieldDescriptor* field) const {
626   int hasbit = HasBitIndex(field);
627   return hasbit == kNoHasbit ? kNoHasbit : hasbit / 8;
628 }
629 
HasWordIndex(const FieldDescriptor * field) const630 int MessageGenerator::HasWordIndex(const FieldDescriptor* field) const {
631   int hasbit = HasBitIndex(field);
632   return hasbit == kNoHasbit ? kNoHasbit : hasbit / 32;
633 }
634 
AddGenerators(std::vector<std::unique_ptr<EnumGenerator>> * enum_generators,std::vector<std::unique_ptr<ExtensionGenerator>> * extension_generators)635 void MessageGenerator::AddGenerators(
636     std::vector<std::unique_ptr<EnumGenerator>>* enum_generators,
637     std::vector<std::unique_ptr<ExtensionGenerator>>* extension_generators) {
638   for (int i = 0; i < descriptor_->enum_type_count(); ++i) {
639     enum_generators->emplace_back(
640         std::make_unique<EnumGenerator>(descriptor_->enum_type(i), options_));
641     enum_generators_.push_back(enum_generators->back().get());
642   }
643   for (int i = 0; i < descriptor_->extension_count(); ++i) {
644     extension_generators->emplace_back(std::make_unique<ExtensionGenerator>(
645         descriptor_->extension(i), options_, scc_analyzer_));
646     extension_generators_.push_back(extension_generators->back().get());
647   }
648 }
649 
GenerateFieldAccessorDeclarations(io::Printer * p)650 void MessageGenerator::GenerateFieldAccessorDeclarations(io::Printer* p) {
651   auto v = p->WithVars(MessageVars(descriptor_));
652 
653   // optimized_fields_ does not contain fields where
654   //    field->real_containing_oneof()
655   // so we need to iterate over those as well.
656   //
657   // We place the non-oneof fields in optimized_order_, as that controls the
658   // order of the _has_bits_ entries and we want GDB's pretty ps to be
659   // able to infer these indices from the k[FIELDNAME]FieldNumber order.
660   std::vector<const FieldDescriptor*> ordered_fields;
661   ordered_fields.reserve(descriptor_->field_count());
662   ordered_fields.insert(ordered_fields.begin(), optimized_order_.begin(),
663                         optimized_order_.end());
664 
665   for (auto field : FieldRange(descriptor_)) {
666     if (!field->real_containing_oneof() && !field->options().weak()) {
667       continue;
668     }
669     ordered_fields.push_back(field);
670   }
671 
672   if (!ordered_fields.empty()) {
673     p->Emit({{
674                 "kFields",
675                 [&] {
676                   for (auto field : ordered_fields) {
677                     auto v = p->WithVars(FieldVars(field, options_));
678                     p->Emit({Sub("kField", FieldConstantName(field))
679                                  .AnnotatedAs(field)},
680                             R"cc(
681                               $kField$ = $number$,
682                             )cc");
683                   }
684                 },
685             }},
686             R"cc(
687               enum : int {
688                 $kFields$,
689               };
690             )cc");
691   }
692   for (auto field : ordered_fields) {
693     auto name = FieldName(field);
694 
695     auto v = p->WithVars(FieldVars(field, options_));
696     auto t = p->WithVars(MakeTrackerCalls(field, options_));
697     p->Emit({{"field_comment", FieldComment(field, options_)},
698              Sub("const_impl", "const;").WithSuffix(";"),
699              Sub("impl", ";").WithSuffix(";"),
700              {"sizer",
701               [&] {
702                 if (!field->is_repeated()) return;
703                 p->Emit({Sub("name_size", absl::StrCat(name, "_size"))
704                              .AnnotatedAs(field)},
705                         R"cc(
706                           $deprecated_attr $int $name_size$() $const_impl$;
707                         )cc");
708 
709                 p->Emit({Sub("_internal_name_size",
710                              absl::StrCat("_internal_", name, "_size"))
711                              .AnnotatedAs(field)},
712                         R"cc(
713                           private:
714                           int $_internal_name_size$() const;
715 
716                           public:
717                         )cc");
718               }},
719              {"hazzer",
720               [&] {
721                 if (!field->has_presence()) return;
722                 p->Emit({Sub("has_name", absl::StrCat("has_", name))
723                              .AnnotatedAs(field)},
724                         R"cc(
725                           $deprecated_attr $bool $has_name$() $const_impl$;
726                         )cc");
727               }},
728              {"internal_hazzer",
729               [&] {
730                 if (field->is_repeated() || !HasInternalHasMethod(field)) {
731                   return;
732                 }
733                 p->Emit({Sub("_internal_has_name",
734                              absl::StrCat("_internal_has_", name))
735                              .AnnotatedAs(field)},
736                         R"cc(
737                           private:
738                           bool $_internal_has_name$() const;
739 
740                           public:
741                         )cc");
742               }},
743              {"clearer",
744               [&] {
745                 p->Emit({Sub("clear_name", absl::StrCat("clear_", name))
746                              .AnnotatedAs({
747                                  field,
748                                  Semantic::kSet,
749                              })},
750                         R"cc(
751                           $deprecated_attr $void $clear_name$() $impl$;
752                         )cc");
753               }},
754              {"accessors",
755               [&] {
756                 field_generators_.get(field).GenerateAccessorDeclarations(p);
757               }}},
758             R"cc(
759               // $field_comment$
760               $sizer$;
761               $hazzer$;
762               $internal_hazzer$;
763               $clearer$;
764               $accessors$;
765             )cc");
766   }
767 
768   if (descriptor_->extension_range_count() > 0) {
769     // Generate accessors for extensions.
770     // We use "_proto_TypeTraits" as a type name below because "TypeTraits"
771     // causes problems if the class has a nested message or enum type with that
772     // name and "_TypeTraits" is technically reserved for the C++ library since
773     // it starts with an underscore followed by a capital letter.
774     //
775     // For similar reason, we use "_field_type" and "_is_packed" as parameter
776     // names below, so that "field_type" and "is_packed" can be used as field
777     // names.
778     p->Emit(R"cc(
779       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
780                 bool _is_packed,
781                 typename = typename _proto_TypeTraits::Singular>
782       inline bool HasExtension(
783           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
784                                            _field_type, _is_packed>& id) const {
785         $WeakDescriptorSelfPin$;
786         $annotate_extension_has$;
787         return $extensions$.Has(id.number());
788       }
789 
790       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
791                 bool _is_packed>
792       inline void ClearExtension(
793           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
794                                            _field_type, _is_packed>& id) {
795         $WeakDescriptorSelfPin$;
796         $extensions$.ClearExtension(id.number());
797         $annotate_extension_clear$;
798       }
799 
800       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
801                 bool _is_packed,
802                 typename = typename _proto_TypeTraits::Repeated>
803       inline int ExtensionSize(
804           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
805                                            _field_type, _is_packed>& id) const {
806         $WeakDescriptorSelfPin$;
807         $annotate_extension_repeated_size$;
808         return $extensions$.ExtensionSize(id.number());
809       }
810 
811       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
812                 bool _is_packed,
813                 std::enable_if_t<!_proto_TypeTraits::kLifetimeBound, int> = 0>
814       inline typename _proto_TypeTraits::Singular::ConstType GetExtension(
815           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
816                                            _field_type, _is_packed>& id) const {
817         $WeakDescriptorSelfPin$;
818         $annotate_extension_get$;
819         return _proto_TypeTraits::Get(id.number(), $extensions$, id.default_value());
820       }
821 
822       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
823                 bool _is_packed,
824                 std::enable_if_t<_proto_TypeTraits::kLifetimeBound, int> = 0>
825       inline typename _proto_TypeTraits::Singular::ConstType GetExtension(
826           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
827                                            _field_type, _is_packed>& id) const
828           ABSL_ATTRIBUTE_LIFETIME_BOUND {
829         $WeakDescriptorSelfPin$;
830         $annotate_extension_get$;
831         return _proto_TypeTraits::Get(id.number(), $extensions$, id.default_value());
832       }
833 
834       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
835                 bool _is_packed>
836       inline typename _proto_TypeTraits::Singular::MutableType MutableExtension(
837           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
838                                            _field_type, _is_packed>& id)
839           ABSL_ATTRIBUTE_LIFETIME_BOUND {
840         $WeakDescriptorSelfPin$;
841         $annotate_extension_mutable$;
842         return _proto_TypeTraits::Mutable(id.number(), _field_type, &$extensions$);
843       }
844 
845       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
846                 bool _is_packed>
847       inline void SetExtension(
848           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
849                                            _field_type, _is_packed>& id,
850           typename _proto_TypeTraits::Singular::ConstType value) {
851         $WeakDescriptorSelfPin$;
852         _proto_TypeTraits::Set(id.number(), _field_type, value, &$extensions$);
853         $annotate_extension_set$;
854       }
855 
856       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
857                 bool _is_packed>
858       inline void SetAllocatedExtension(
859           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
860                                            _field_type, _is_packed>& id,
861           typename _proto_TypeTraits::Singular::MutableType value) {
862         $WeakDescriptorSelfPin$;
863         _proto_TypeTraits::SetAllocated(id.number(), _field_type, value,
864                                         &$extensions$);
865         $annotate_extension_set$;
866       }
867       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
868                 bool _is_packed>
869       inline void UnsafeArenaSetAllocatedExtension(
870           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
871                                            _field_type, _is_packed>& id,
872           typename _proto_TypeTraits::Singular::MutableType value) {
873         $WeakDescriptorSelfPin$;
874         _proto_TypeTraits::UnsafeArenaSetAllocated(id.number(), _field_type,
875                                                    value, &$extensions$);
876         $annotate_extension_set$;
877       }
878       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
879                 bool _is_packed>
880       PROTOBUF_NODISCARD inline
881           typename _proto_TypeTraits::Singular::MutableType
882           ReleaseExtension(
883               const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
884                                                _field_type, _is_packed>& id) {
885         $WeakDescriptorSelfPin$;
886         $annotate_extension_release$;
887         return _proto_TypeTraits::Release(id.number(), _field_type, &$extensions$);
888       }
889       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
890                 bool _is_packed>
891       inline typename _proto_TypeTraits::Singular::MutableType
892       UnsafeArenaReleaseExtension(
893           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
894                                            _field_type, _is_packed>& id) {
895         $WeakDescriptorSelfPin$;
896         $annotate_extension_release$;
897         return _proto_TypeTraits::UnsafeArenaRelease(id.number(), _field_type,
898                                                      &$extensions$);
899       }
900 
901       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
902                 bool _is_packed,
903                 std::enable_if_t<!_proto_TypeTraits::kLifetimeBound, int> = 0>
904       inline typename _proto_TypeTraits::Repeated::ConstType GetExtension(
905           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
906                                            _field_type, _is_packed>& id,
907           int index) const {
908         $WeakDescriptorSelfPin$;
909         $annotate_repeated_extension_get$;
910         return _proto_TypeTraits::Get(id.number(), $extensions$, index);
911       }
912 
913       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
914                 bool _is_packed,
915                 std::enable_if_t<_proto_TypeTraits::kLifetimeBound, int> = 0>
916       inline typename _proto_TypeTraits::Repeated::ConstType GetExtension(
917           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
918                                            _field_type, _is_packed>& id,
919           int index) const ABSL_ATTRIBUTE_LIFETIME_BOUND {
920         $WeakDescriptorSelfPin$;
921         $annotate_repeated_extension_get$;
922         return _proto_TypeTraits::Get(id.number(), $extensions$, index);
923       }
924 
925       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
926                 bool _is_packed>
927       inline typename _proto_TypeTraits::Repeated::MutableType MutableExtension(
928           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
929                                            _field_type, _is_packed>& id,
930           int index) ABSL_ATTRIBUTE_LIFETIME_BOUND {
931         $WeakDescriptorSelfPin$;
932         $annotate_repeated_extension_mutable$;
933         return _proto_TypeTraits::Mutable(id.number(), index, &$extensions$);
934       }
935 
936       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
937                 bool _is_packed>
938       inline void SetExtension(
939           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
940                                            _field_type, _is_packed>& id,
941           int index, typename _proto_TypeTraits::Repeated::ConstType value) {
942         $WeakDescriptorSelfPin$;
943         _proto_TypeTraits::Set(id.number(), index, value, &$extensions$);
944         $annotate_repeated_extension_set$;
945       }
946 
947       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
948                 bool _is_packed>
949       inline typename _proto_TypeTraits::Repeated::MutableType AddExtension(
950           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
951                                            _field_type, _is_packed>& id)
952           ABSL_ATTRIBUTE_LIFETIME_BOUND {
953         $WeakDescriptorSelfPin$;
954         typename _proto_TypeTraits::Repeated::MutableType to_add =
955             _proto_TypeTraits::Add(id.number(), _field_type, &$extensions$);
956         $annotate_repeated_extension_add_mutable$;
957         return to_add;
958       }
959 
960       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
961                 bool _is_packed>
962       inline void AddExtension(
963           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
964                                            _field_type, _is_packed>& id,
965           typename _proto_TypeTraits::Repeated::ConstType value) {
966         $WeakDescriptorSelfPin$;
967         _proto_TypeTraits::Add(id.number(), _field_type, _is_packed, value,
968                                &$extensions$);
969         $annotate_repeated_extension_add$;
970       }
971 
972       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
973                 bool _is_packed>
974       inline const typename _proto_TypeTraits::Repeated::RepeatedFieldType&
975       GetRepeatedExtension(
976           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
977                                            _field_type, _is_packed>& id) const
978           ABSL_ATTRIBUTE_LIFETIME_BOUND {
979         $WeakDescriptorSelfPin$;
980         $annotate_repeated_extension_list$;
981         return _proto_TypeTraits::GetRepeated(id.number(), $extensions$);
982       }
983 
984       template <typename _proto_TypeTraits, $pbi$::FieldType _field_type,
985                 bool _is_packed>
986       inline typename _proto_TypeTraits::Repeated::RepeatedFieldType*
987       MutableRepeatedExtension(
988           const $pbi$::ExtensionIdentifier<$Msg$, _proto_TypeTraits,
989                                            _field_type, _is_packed>& id)
990           ABSL_ATTRIBUTE_LIFETIME_BOUND {
991         $WeakDescriptorSelfPin$;
992         $annotate_repeated_extension_list_mutable$;
993         return _proto_TypeTraits::MutableRepeated(id.number(), _field_type,
994                                                   _is_packed, &$extensions$);
995       }
996     )cc");
997 
998     // Generate MessageSet specific APIs for proto2 MessageSet.
999     // For testing purposes we don't check for bridge.MessageSet, so
1000     // we don't use IsProto2MessageSet
1001     if (descriptor_->options().message_set_wire_format() &&
1002         !options_.opensource_runtime && !options_.lite_implicit_weak_fields) {
1003       // Special-case MessageSet.
1004       p->Emit(R"cc(
1005         GOOGLE_PROTOBUF_EXTENSION_MESSAGE_SET_ACCESSORS($Msg$);
1006       )cc");
1007     }
1008   }
1009 
1010   for (auto oneof : OneOfRange(descriptor_)) {
1011     p->Emit({{"oneof_name", oneof->name()},
1012              Sub{"clear_oneof_name", absl::StrCat("clear_", oneof->name())}
1013                  .AnnotatedAs({oneof, Semantic::kSet}),
1014              {"OneOfName", UnderscoresToCamelCase(oneof->name(), true)}},
1015             R"cc(
1016               void $clear_oneof_name$();
1017               $OneOfName$Case $oneof_name$_case() const;
1018             )cc");
1019   }
1020 }
1021 
GenerateSingularFieldHasBits(const FieldDescriptor * field,io::Printer * p)1022 void MessageGenerator::GenerateSingularFieldHasBits(
1023     const FieldDescriptor* field, io::Printer* p) {
1024   auto t = p->WithVars(MakeTrackerCalls(field, options_));
1025   if (field->options().weak()) {
1026     p->Emit(
1027         R"cc(
1028           inline bool $classname$::has_$name$() const {
1029             $WeakDescriptorSelfPin$;
1030             $annotate_has$;
1031             return $weak_field_map$.Has($number$);
1032           }
1033         )cc");
1034     return;
1035   }
1036   if (HasHasbit(field)) {
1037     auto v = p->WithVars(HasBitVars(field));
1038     p->Emit(
1039         {Sub{"ASSUME",
1040              [&] {
1041                if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1042                    !IsLazy(field, options_, scc_analyzer_)) {
1043                  // We maintain the invariant that for a submessage x, has_x()
1044                  // returning true implies that x_ is not null. By giving this
1045                  // information to the compiler, we allow it to eliminate
1046                  // unnecessary null checks later on.
1047                  p->Emit(
1048                      R"cc(PROTOBUF_ASSUME(!value || $field$ != nullptr);)cc");
1049                }
1050              }}
1051              .WithSuffix(";")},
1052         R"cc(
1053           inline bool $classname$::has_$name$() const {
1054             $WeakDescriptorSelfPin$;
1055             $annotate_has$;
1056             bool value = ($has_bits$[$has_array_index$] & $has_mask$) != 0;
1057             $ASSUME$;
1058             return value;
1059           }
1060         )cc");
1061   } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1062     // Message fields have a has_$name$() method.
1063     if (IsLazy(field, options_, scc_analyzer_)) {
1064       p->Emit(R"cc(
1065         inline bool $classname$::_internal_has_$name$() const {
1066           return !$field$.IsCleared();
1067         }
1068       )cc");
1069     } else {
1070       p->Emit(R"cc(
1071         inline bool $classname$::_internal_has_$name$() const {
1072           return this != internal_default_instance() && $field$ != nullptr;
1073         }
1074       )cc");
1075     }
1076     p->Emit(R"cc(
1077       inline bool $classname$::has_$name$() const {
1078         $annotate_has$;
1079         return _internal_has_$name$();
1080       }
1081     )cc");
1082   }
1083 }
1084 
GenerateOneofHasBits(io::Printer * p)1085 void MessageGenerator::GenerateOneofHasBits(io::Printer* p) {
1086   for (const auto* oneof : OneOfRange(descriptor_)) {
1087     p->Emit(
1088         {
1089             {"oneof_index", oneof->index()},
1090             {"oneof_name", oneof->name()},
1091             {"cap_oneof_name", absl::AsciiStrToUpper(oneof->name())},
1092         },
1093         R"cc(
1094           inline bool $classname$::has_$oneof_name$() const {
1095             return $oneof_name$_case() != $cap_oneof_name$_NOT_SET;
1096           }
1097           inline void $classname$::clear_has_$oneof_name$() {
1098             $oneof_case$[$oneof_index$] = $cap_oneof_name$_NOT_SET;
1099           }
1100         )cc");
1101   }
1102 }
1103 
GenerateOneofMemberHasBits(const FieldDescriptor * field,io::Printer * p)1104 void MessageGenerator::GenerateOneofMemberHasBits(const FieldDescriptor* field,
1105                                                   io::Printer* p) {
1106   // Singular field in a oneof
1107   // N.B.: Without field presence, we do not use has-bits or generate
1108   // has_$name$() methods, but oneofs still have set_has_$name$().
1109   // Oneofs also have private _internal_has_$name$() a helper method.
1110   if (field->has_presence()) {
1111     auto t = p->WithVars(MakeTrackerCalls(field, options_));
1112     p->Emit(R"cc(
1113       inline bool $classname$::has_$name$() const {
1114         $WeakDescriptorSelfPin$;
1115         $annotate_has$;
1116         return $has_field$;
1117       }
1118     )cc");
1119   }
1120   if (HasInternalHasMethod(field)) {
1121     p->Emit(R"cc(
1122       inline bool $classname$::_internal_has_$name_internal$() const {
1123         return $has_field$;
1124       }
1125     )cc");
1126   }
1127   // set_has_$name$() for oneof fields is always private; hence should not be
1128   // annotated.
1129   p->Emit(R"cc(
1130     inline void $classname$::set_has_$name_internal$() {
1131       $oneof_case$[$oneof_index$] = k$field_name$;
1132     }
1133   )cc");
1134 }
1135 
GenerateFieldClear(const FieldDescriptor * field,bool is_inline,io::Printer * p)1136 void MessageGenerator::GenerateFieldClear(const FieldDescriptor* field,
1137                                           bool is_inline, io::Printer* p) {
1138   auto t = p->WithVars(MakeTrackerCalls(field, options_));
1139   p->Emit({{"inline", is_inline ? "inline" : ""},
1140            {"body",
1141             [&] {
1142               if (field->real_containing_oneof()) {
1143                 // Clear this field only if it is the active field in this
1144                 // oneof, otherwise ignore
1145                 p->Emit(
1146                     {{"clearing_code",
1147                       [&] {
1148                         field_generators_.get(field).GenerateClearingCode(p);
1149                       }}},
1150                     R"cc(
1151                       if ($has_field$) {
1152                         $clearing_code$;
1153                         clear_has_$oneof_name$();
1154                       }
1155                     )cc");
1156               } else {
1157                 // TODO: figure out if early return breaks tracking
1158                 if (ShouldSplit(field, options_)) {
1159                   p->Emit(R"cc(
1160                     if (PROTOBUF_PREDICT_TRUE(IsSplitMessageDefault()))
1161                       return;
1162                   )cc");
1163                 }
1164                 field_generators_.get(field).GenerateClearingCode(p);
1165                 if (HasHasbit(field)) {
1166                   auto v = p->WithVars(HasBitVars(field));
1167                   p->Emit(R"cc(
1168                     $has_bits$[$has_array_index$] &= ~$has_mask$;
1169                   )cc");
1170                 }
1171               }
1172             }}},
1173           R"cc(
1174             $inline $void $classname$::clear_$name$() {
1175               $pbi$::TSanWrite(&_impl_);
1176               $WeakDescriptorSelfPin$;
1177               $body$;
1178               $annotate_clear$;
1179             }
1180           )cc");
1181 }
1182 
1183 namespace {
1184 
1185 class AccessorVerifier {
1186  public:
1187   using SourceLocation = io::Printer::SourceLocation;
1188 
AccessorVerifier(const FieldDescriptor * field)1189   explicit AccessorVerifier(const FieldDescriptor* field) : field_(field) {}
~AccessorVerifier()1190   ~AccessorVerifier() {
1191     ABSL_CHECK(!needs_annotate_) << Error(SourceLocation::current());
1192     ABSL_CHECK(!needs_weak_descriptor_pin_) << Error(SourceLocation::current());
1193   }
1194 
operator ()(absl::string_view label,io::Printer::SourceLocation loc)1195   void operator()(absl::string_view label, io::Printer::SourceLocation loc) {
1196     if (label == "name" || label == "release_name") {
1197       // All accessors use $name$ or $release_name$ when constructing the
1198       // function name. We hook into those to determine that an accessor is
1199       // starting.
1200       SetTracker(needs_annotate_, true, loc);
1201       SetTracker(needs_weak_descriptor_pin_, true, loc);
1202       loc_ = loc;
1203     } else if (absl::StartsWith(label, "annotate")) {
1204       // All annotation labels start with `annotate`. Eg `annotate_get`.
1205       SetTracker(needs_annotate_, false, loc);
1206       loc_ = loc;
1207     } else if (label == "WeakDescriptorSelfPin") {
1208       // The self pin for weak descriptor types must be on every accessor.
1209       SetTracker(needs_weak_descriptor_pin_, false, loc);
1210       loc_ = loc;
1211     }
1212   }
1213 
1214  private:
Error(SourceLocation loc) const1215   std::string Error(SourceLocation loc) const {
1216     return absl::StrFormat("Field %s printed from %s:%d (prev %s:%d)\n",
1217                            field_->full_name(), loc.file_name(), loc.line(),
1218                            loc_.file_name(), loc_.line());
1219   }
1220 
SetTracker(bool & v,bool new_value,SourceLocation loc)1221   void SetTracker(bool& v, bool new_value, SourceLocation loc) {
1222     ABSL_CHECK_NE(v, new_value) << Error(loc);
1223     v = new_value;
1224   }
1225 
1226   bool needs_annotate_ = false;
1227   bool needs_weak_descriptor_pin_ = false;
1228   // We keep these fields for error reporting.
1229   const FieldDescriptor* field_;
1230   // On error, we report two locations: the current one and the last one. This
1231   // can help determine where the bug is. For example, if we see "name" twice in
1232   // a row, the bug is likely in the "last" one and not the current one because
1233   // it means the previous accessor didn't add the required code.
1234   SourceLocation loc_;
1235 };
1236 
1237 }  // namespace
1238 
EmitCheckAndUpdateByteSizeForField(const FieldDescriptor * field,io::Printer * p) const1239 void MessageGenerator::EmitCheckAndUpdateByteSizeForField(
1240     const FieldDescriptor* field, io::Printer* p) const {
1241   absl::AnyInvocable<void()> emit_body = [&] {
1242     field_generators_.get(field).GenerateByteSize(p);
1243   };
1244 
1245   if (!HasHasbit(field)) {
1246     MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body));
1247     return;
1248   }
1249   if (field->options().weak()) {
1250     p->Emit({{"emit_body", [&] { emit_body(); }}},
1251             R"cc(
1252               if (has_$name$()) {
1253                 $emit_body$;
1254               }
1255             )cc");
1256     return;
1257   }
1258 
1259   int has_bit_index = has_bit_indices_[field->index()];
1260   p->Emit({{"mask",
1261             absl::StrFormat("0x%08xu", uint32_t{1} << (has_bit_index % 32))},
1262            {"emit_body", [&] { emit_body(); }}},
1263           R"cc(
1264             if (cached_has_bits & $mask$) {
1265               $emit_body$;
1266             }
1267           )cc");
1268 }
1269 
EmitUpdateByteSizeForField(const FieldDescriptor * field,io::Printer * p,int & cached_has_word_index) const1270 void MessageGenerator::EmitUpdateByteSizeForField(
1271     const FieldDescriptor* field, io::Printer* p,
1272     int& cached_has_word_index) const {
1273   p->Emit(
1274       {{"comment", [&] { PrintFieldComment(Formatter{p}, field, options_); }},
1275        {"update_cached_has_bits",
1276         [&] {
1277           if (!HasHasbit(field) || field->options().weak()) return;
1278 
1279           int has_bit_index = has_bit_indices_[field->index()];
1280 
1281           if (cached_has_word_index == (has_bit_index / 32)) return;
1282 
1283           cached_has_word_index = (has_bit_index / 32);
1284           p->Emit({{"index", cached_has_word_index}},
1285                   R"cc(
1286                     cached_has_bits = this_.$has_bits$[$index$];
1287                   )cc");
1288         }},
1289        {"check_and_update_byte_size_for_field",
1290         [&]() { EmitCheckAndUpdateByteSizeForField(field, p); }}},
1291       R"cc(
1292         $comment$;
1293         $update_cached_has_bits$;
1294         $check_and_update_byte_size_for_field$;
1295       )cc");
1296 }
1297 
GenerateFieldAccessorDefinitions(io::Printer * p)1298 void MessageGenerator::GenerateFieldAccessorDefinitions(io::Printer* p) {
1299   p->Emit("// $classname$\n\n");
1300 
1301   for (auto field : FieldRange(descriptor_)) {
1302     // We use a print listener to verify that the field generators properly add
1303     // the right annotations. This is only a verification step aimed to prevent
1304     // bugs where we have lack of test coverage. Note that this will verify the
1305     // annotations even when the particular feature is not on because we look at
1306     // the substitution variables, not the substitution result.
1307     // The check is a state machine that verifies that every substitution for
1308     // `name` is followed by one and only one for needed annotations. False
1309     // positives are accessors that are using $name$ for an internal name. For
1310     // those you can use $name_internal$ which is the same substitution but not
1311     // tracked by the verifier.
1312     const auto accessor_verifier =
1313         p->WithSubstitutionListener(AccessorVerifier(field));
1314 
1315     PrintFieldComment(Formatter{p}, field, options_);
1316 
1317     auto v = p->WithVars(FieldVars(field, options_));
1318     auto t = p->WithVars(MakeTrackerCalls(field, options_));
1319     if (field->is_repeated()) {
1320       p->Emit(R"cc(
1321         inline int $classname$::_internal_$name_internal$_size() const {
1322           return _internal_$name_internal$().size();
1323         }
1324         inline int $classname$::$name$_size() const {
1325           $WeakDescriptorSelfPin$;
1326           $annotate_size$;
1327           return _internal_$name_internal$_size();
1328         }
1329       )cc");
1330     } else if (field->real_containing_oneof()) {
1331       GenerateOneofMemberHasBits(field, p);
1332     } else {
1333       GenerateSingularFieldHasBits(field, p);
1334     }
1335 
1336     if (!IsCrossFileMaybeMap(field)) {
1337       GenerateFieldClear(field, true, p);
1338     }
1339     // Generate type-specific accessors.
1340     field_generators_.get(field).GenerateInlineAccessorDefinitions(p);
1341 
1342     p->Emit("\n");
1343   }
1344 
1345   GenerateOneofHasBits(p);
1346 }
1347 
GenerateVerifyDecl(io::Printer * p)1348 void MessageGenerator::GenerateVerifyDecl(io::Printer* p) {
1349 }
1350 
GenerateAnnotationDecl(io::Printer * p)1351 void MessageGenerator::GenerateAnnotationDecl(io::Printer* p) {
1352 }
1353 
GenerateMapEntryClassDefinition(io::Printer * p)1354 void MessageGenerator::GenerateMapEntryClassDefinition(io::Printer* p) {
1355   Formatter format(p);
1356   absl::flat_hash_map<absl::string_view, std::string> vars;
1357   CollectMapInfo(options_, descriptor_, &vars);
1358   ABSL_CHECK(HasDescriptorMethods(descriptor_->file(), options_));
1359   auto v = p->WithVars(std::move(vars));
1360   // Templatize constexpr constructor as a workaround for a bug in gcc 12
1361   // (warning in gcc 13).
1362   p->Emit(
1363       {{"decl_verify_func",
1364         [&] {
1365         }},
1366        {"decl_annotate", [&] { GenerateAnnotationDecl(p); }},
1367        {"parse_decls",
1368         [&] {
1369           parse_function_generator_->GenerateDataDecls(p);
1370         }}},
1371       R"cc(
1372         class $classname$ final
1373             : public ::$proto_ns$::internal::MapEntry<
1374                   $key_cpp$, $val_cpp$,
1375                   ::$proto_ns$::internal::WireFormatLite::$key_wire_type$,
1376                   ::$proto_ns$::internal::WireFormatLite::$val_wire_type$> {
1377          public:
1378           using SuperType = ::$proto_ns$::internal::MapEntry<
1379               $key_cpp$, $val_cpp$,
1380               ::$proto_ns$::internal::WireFormatLite::$key_wire_type$,
1381               ::$proto_ns$::internal::WireFormatLite::$val_wire_type$>;
1382           $classname$();
1383           template <typename = void>
1384           explicit PROTOBUF_CONSTEXPR $classname$(
1385               ::$proto_ns$::internal::ConstantInitialized);
1386           explicit $classname$(::$proto_ns$::Arena* arena);
1387           static const $classname$* internal_default_instance() {
1388             return reinterpret_cast<const $classname$*>(
1389                 &_$classname$_default_instance_);
1390           }
1391 
1392           $decl_verify_func$;
1393 
1394          private:
1395           friend class ::$proto_ns$::MessageLite;
1396           friend struct ::$tablename$;
1397 
1398           $parse_decls$;
1399           $decl_annotate$;
1400 
1401           const $pbi$::ClassData* GetClassData() const PROTOBUF_FINAL;
1402           static void* PlacementNew_(const void*, void* mem,
1403                                      ::$proto_ns$::Arena* arena);
1404           static constexpr auto InternalNewImpl_();
1405           static const $pbi$::ClassDataFull _class_data_;
1406         };
1407       )cc");
1408 }
1409 
GenerateImplDefinition(io::Printer * p)1410 void MessageGenerator::GenerateImplDefinition(io::Printer* p) {
1411   // Prepare decls for _cached_size_ and _has_bits_.  Their position in the
1412   // output will be determined later.
1413   bool need_to_emit_cached_size = !HasSimpleBaseClass(descriptor_, options_);
1414   const size_t sizeof_has_bits = HasBitsSize();
1415 
1416   // To minimize padding, data members are divided into three sections:
1417   // (1) members assumed to align to 8 bytes
1418   // (2) members corresponding to message fields, re-ordered to optimize
1419   //     alignment.
1420   // (3) members assumed to align to 4 bytes.
1421   p->Emit(
1422       {{"extension_set",
1423         [&] {
1424           if (descriptor_->extension_range_count() == 0) return;
1425 
1426           p->Emit(R"cc(
1427             ::$proto_ns$::internal::ExtensionSet _extensions_;
1428           )cc");
1429         }},
1430        {"tracker",
1431         [&] {
1432           if (!HasTracker(descriptor_, options_)) return;
1433 
1434           p->Emit(R"cc(
1435             static ::$proto_ns$::AccessListener<$Msg$> _tracker_;
1436             static void TrackerOnGetMetadata() { $annotate_reflection$; }
1437           )cc");
1438         }},
1439        {"inlined_string_donated",
1440         [&] {
1441           // Generate _inlined_string_donated_ for inlined string type.
1442           // TODO: To avoid affecting the locality of
1443           // `_has_bits_`, should this be below or above `_has_bits_`?
1444           if (inlined_string_indices_.empty()) return;
1445 
1446           p->Emit({{"donated_size", InlinedStringDonatedSize()}},
1447                   R"cc(
1448                     ::$proto_ns$::internal::HasBits<$donated_size$>
1449                         _inlined_string_donated_;
1450                   )cc");
1451         }},
1452        {"has_bits",
1453         [&] {
1454           if (has_bit_indices_.empty()) return;
1455 
1456           // _has_bits_ is frequently accessed, so to reduce code size and
1457           // improve speed, it should be close to the start of the object.
1458           // Placing _cached_size_ together with _has_bits_ improves cache
1459           // locality despite potential alignment padding.
1460           p->Emit({{"sizeof_has_bits", sizeof_has_bits}}, R"cc(
1461             ::$proto_ns$::internal::HasBits<$sizeof_has_bits$> _has_bits_;
1462           )cc");
1463           if (need_to_emit_cached_size) {
1464             p->Emit(R"cc(
1465               ::$proto_ns$::internal::CachedSize _cached_size_;
1466             )cc");
1467             need_to_emit_cached_size = false;
1468           }
1469         }},
1470        {"field_members",
1471         [&] {
1472           // Emit some private and static members
1473           for (auto field : optimized_order_) {
1474             field_generators_.get(field).GenerateStaticMembers(p);
1475             if (!ShouldSplit(field, options_)) {
1476               field_generators_.get(field).GeneratePrivateMembers(p);
1477             }
1478           }
1479         }},
1480        {"decl_split",
1481         [&] {
1482           if (!ShouldSplit(descriptor_, options_)) return;
1483           p->Emit({{"split_field",
1484                     [&] {
1485                       for (auto field : optimized_order_) {
1486                         if (!ShouldSplit(field, options_)) continue;
1487                         field_generators_.get(field).GeneratePrivateMembers(p);
1488                       }
1489                     }}},
1490                   R"cc(
1491                     struct Split {
1492                       $split_field$;
1493                       using InternalArenaConstructable_ = void;
1494                       using DestructorSkippable_ = void;
1495                     };
1496                     static_assert(std::is_trivially_copy_constructible<Split>::value);
1497                     static_assert(std::is_trivially_destructible<Split>::value);
1498                     Split* _split_;
1499                   )cc");
1500         }},
1501        {"oneof_members",
1502         [&] {
1503           // For each oneof generate a union
1504           for (auto oneof : OneOfRange(descriptor_)) {
1505             // explicit empty constructor is needed when union contains
1506             // ArenaStringPtr members for string fields.
1507             p->Emit(
1508                 {{"camel_oneof_name",
1509                   UnderscoresToCamelCase(oneof->name(), true)},
1510                  {"oneof_name", oneof->name()},
1511                  {"oneof_fields",
1512                   [&] {
1513                     for (auto field : FieldRange(oneof)) {
1514                       field_generators_.get(field).GeneratePrivateMembers(p);
1515                     }
1516                   }}},
1517                 R"cc(
1518                   union $camel_oneof_name$Union {
1519                     constexpr $camel_oneof_name$Union() : _constinit_{} {}
1520                     ::$proto_ns$::internal::ConstantInitialized _constinit_;
1521                     $oneof_fields$;
1522                   } $oneof_name$_;
1523                 )cc");
1524             for (auto field : FieldRange(oneof)) {
1525               field_generators_.get(field).GenerateStaticMembers(p);
1526             }
1527           }
1528         }},
1529        {"cached_size_if_no_hasbits",
1530         [&] {
1531           if (!need_to_emit_cached_size) return;
1532 
1533           need_to_emit_cached_size = false;
1534           p->Emit(R"cc(
1535             ::$proto_ns$::internal::CachedSize _cached_size_;
1536           )cc");
1537         }},
1538        {"oneof_case",
1539         [&] {
1540           // Generate _oneof_case_.
1541           if (descriptor_->real_oneof_decl_count() == 0) return;
1542 
1543           p->Emit({{"count", descriptor_->real_oneof_decl_count()}},
1544                   R"cc(
1545                     $uint32$ _oneof_case_[$count$];
1546                   )cc");
1547         }},
1548        {"weak_field_map",
1549         [&] {
1550           if (num_weak_fields_ == 0) return;
1551 
1552           p->Emit(R"cc(
1553             ::$proto_ns$::internal::WeakFieldMap _weak_field_map_;
1554           )cc");
1555         }},
1556        {"union_impl",
1557         [&] {
1558           // Only create the _impl_ field if it contains data.
1559           if (!HasImplData(descriptor_, options_)) return;
1560 
1561           // clang-format off
1562             p->Emit(R"cc(union { Impl_ _impl_; };)cc");
1563           // clang-format on
1564         }}},
1565       R"cc(
1566         struct Impl_ {
1567           //~ TODO: check if/when there is a need for an
1568           //~ outline dtor.
1569           inline explicit constexpr Impl_(
1570               ::$proto_ns$::internal::ConstantInitialized) noexcept;
1571           inline explicit Impl_($pbi$::InternalVisibility visibility,
1572                                 ::$proto_ns$::Arena* arena);
1573           inline explicit Impl_($pbi$::InternalVisibility visibility,
1574                                 ::$proto_ns$::Arena* arena, const Impl_& from,
1575                                 const $classname$& from_msg);
1576           //~ Members assumed to align to 8 bytes:
1577           $extension_set$;
1578           $tracker$;
1579           $inlined_string_donated$;
1580           $has_bits$;
1581           //~ Field members:
1582           $field_members$;
1583           $decl_split$;
1584           $oneof_members$;
1585           //~ Members assumed to align to 4 bytes:
1586           $cached_size_if_no_hasbits$;
1587           $oneof_case$;
1588           $weak_field_map$;
1589           //~ For detecting when concurrent accessor calls cause races.
1590           PROTOBUF_TSAN_DECLARE_MEMBER
1591         };
1592         $union_impl$;
1593       )cc");
1594 
1595   ABSL_DCHECK(!need_to_emit_cached_size);
1596 }
1597 
GenerateAnyMethodDefinition(io::Printer * p)1598 void MessageGenerator::GenerateAnyMethodDefinition(io::Printer* p) {
1599   ABSL_DCHECK(IsAnyMessage(descriptor_));
1600 
1601   p->Emit(
1602       {{"any_methods",
1603         [&] {
1604           if (HasDescriptorMethods(descriptor_->file(), options_)) {
1605             p->Emit(
1606                 R"cc(
1607                   bool PackFrom(const ::$proto_ns$::Message& message) {
1608                     $DCHK$_NE(&message, this);
1609                     return ::$proto_ns$::internal::InternalPackFrom(
1610                         message, mutable_type_url(), _internal_mutable_value());
1611                   }
1612                   bool PackFrom(const ::$proto_ns$::Message& message,
1613                                 ::absl::string_view type_url_prefix) {
1614                     $DCHK$_NE(&message, this);
1615                     return ::$proto_ns$::internal::InternalPackFrom(
1616                         message, type_url_prefix, mutable_type_url(),
1617                         _internal_mutable_value());
1618                   }
1619                   bool UnpackTo(::$proto_ns$::Message* message) const {
1620                     return ::$proto_ns$::internal::InternalUnpackTo(
1621                         _internal_type_url(), _internal_value(), message);
1622                   }
1623                   static bool GetAnyFieldDescriptors(
1624                       const ::$proto_ns$::Message& message,
1625                       const ::$proto_ns$::FieldDescriptor** type_url_field,
1626                       const ::$proto_ns$::FieldDescriptor** value_field);
1627                   template <
1628                       typename T,
1629                       class = typename std::enable_if<!std::is_convertible<
1630                           T, const ::$proto_ns$::Message&>::value>::type>
1631                   bool PackFrom(const T& message) {
1632                     return ::$proto_ns$::internal::InternalPackFrom<T>(
1633                         message, mutable_type_url(), _internal_mutable_value());
1634                   }
1635                   template <
1636                       typename T,
1637                       class = typename std::enable_if<!std::is_convertible<
1638                           T, const ::$proto_ns$::Message&>::value>::type>
1639                   bool PackFrom(const T& message,
1640                                 ::absl::string_view type_url_prefix) {
1641                     return ::$proto_ns$::internal::InternalPackFrom<T>(
1642                         message, type_url_prefix, mutable_type_url(),
1643                         _internal_mutable_value());
1644                   }
1645                   template <
1646                       typename T,
1647                       class = typename std::enable_if<!std::is_convertible<
1648                           T, const ::$proto_ns$::Message&>::value>::type>
1649                   bool UnpackTo(T* message) const {
1650                     return ::$proto_ns$::internal::InternalUnpackTo<T>(
1651                         _internal_type_url(), _internal_value(), message);
1652                   }
1653                 )cc");
1654           } else {
1655             p->Emit(
1656                 R"cc(
1657                   template <typename T>
1658                   bool PackFrom(const T& message) {
1659                     return ::$proto_ns$::internal::InternalPackFrom(
1660                         message, mutable_type_url(), _internal_mutable_value());
1661                   }
1662                   template <typename T>
1663                   bool PackFrom(const T& message,
1664                                 ::absl::string_view type_url_prefix) {
1665                     return ::$proto_ns$::internal::InternalPackFrom(
1666                         message, type_url_prefix, mutable_type_url(),
1667                         _internal_mutable_value());
1668                   }
1669                   template <typename T>
1670                   bool UnpackTo(T* message) const {
1671                     return ::$proto_ns$::internal::InternalUnpackTo(
1672                         _internal_type_url(), _internal_value(), message);
1673                   }
1674                 )cc");
1675           }
1676         }}},
1677       R"cc(
1678         // implements Any
1679         // -----------------------------------------------
1680 
1681         $any_methods$;
1682 
1683         template <typename T>
1684         bool Is() const {
1685           return ::$proto_ns$::internal::InternalIs<T>(_internal_type_url());
1686         }
1687         static bool ParseAnyTypeUrl(::absl::string_view type_url,
1688                                     std::string* full_type_name);
1689       )cc");
1690 }
1691 
GenerateClassDefinition(io::Printer * p)1692 void MessageGenerator::GenerateClassDefinition(io::Printer* p) {
1693   if (!ShouldGenerateClass(descriptor_, options_)) return;
1694 
1695   auto v = p->WithVars(ClassVars(descriptor_, options_));
1696   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
1697   Formatter format(p);
1698 
1699   if (IsMapEntryMessage(descriptor_)) {
1700     GenerateMapEntryClassDefinition(p);
1701     return;
1702   }
1703 
1704   auto annotation = p->WithAnnotations({{"classname", descriptor_}});
1705   p->Emit(
1706       {{"decl_dtor",
1707         [&] {
1708           if (HasSimpleBaseClass(descriptor_, options_)) return;
1709 
1710           p->Emit(R"cc(
1711             ~$classname$() PROTOBUF_FINAL;
1712           )cc");
1713         }},
1714        {"decl_annotate", [&] { GenerateAnnotationDecl(p); }},
1715        {"decl_verify_func", [&] { GenerateVerifyDecl(p); }},
1716        {"descriptor_accessor",
1717         [&] {
1718           // Only generate this member if it's not disabled.
1719           if (!HasDescriptorMethods(descriptor_->file(), options_) ||
1720               descriptor_->options().no_standard_descriptor_accessor()) {
1721             return;
1722           }
1723 
1724           p->Emit(R"cc(
1725             static const ::$proto_ns$::Descriptor* descriptor() {
1726               return GetDescriptor();
1727             }
1728           )cc");
1729         }},
1730        {"get_descriptor",
1731         [&] {
1732           // These shadow non-static methods of the same names in Message.
1733           // We redefine them here because calls directly on the generated
1734           // class can be statically analyzed -- we know what descriptor
1735           // types are being requested. It also avoids a vtable dispatch.
1736           //
1737           // We would eventually like to eliminate the methods in Message,
1738           // and having this separate also lets us track calls to the base
1739           // class methods separately.
1740           if (!HasDescriptorMethods(descriptor_->file(), options_)) return;
1741 
1742           p->Emit(R"cc(
1743             static const ::$proto_ns$::Descriptor* GetDescriptor() {
1744               return default_instance().GetMetadata().descriptor;
1745             }
1746             static const ::$proto_ns$::Reflection* GetReflection() {
1747               return default_instance().GetMetadata().reflection;
1748             }
1749           )cc");
1750         }},
1751        {"decl_oneof",
1752         [&] {
1753           // Generate enum values for every field in oneofs. One list is
1754           // generated for each oneof with an additional *_NOT_SET value.
1755           for (auto oneof : OneOfRange(descriptor_)) {
1756             p->Emit(
1757                 {{"oneof_camel_name",
1758                   UnderscoresToCamelCase(oneof->name(), true)},
1759                  {"oneof_field",
1760                   [&] {
1761                     for (auto field : FieldRange(oneof)) {
1762                       p->Emit(
1763                           {
1764                               {"oneof_constant", OneofCaseConstantName(field)},
1765                               {"field_number", field->number()},
1766                           },
1767                           R"cc(
1768                             $oneof_constant$ = $field_number$,
1769                           )cc");
1770                     }
1771                   }},
1772                  {"upper_oneof_name", absl::AsciiStrToUpper(oneof->name())}},
1773                 R"cc(
1774                   enum $oneof_camel_name$Case {
1775                     $oneof_field$,
1776                     $upper_oneof_name$_NOT_SET = 0,
1777                   };
1778                 )cc");
1779           }
1780         }},
1781        {"index_in_file_messages", index_in_file_messages_},
1782        {"decl_any_methods",
1783         [&] {
1784           if (!IsAnyMessage(descriptor_)) return;
1785 
1786           GenerateAnyMethodDefinition(p);
1787         }},
1788        {"generated_methods",
1789         [&] {
1790           if (!HasGeneratedMethods(descriptor_->file(), options_)) return;
1791 
1792           if (HasDescriptorMethods(descriptor_->file(), options_)) {
1793             if (!HasSimpleBaseClass(descriptor_, options_)) {
1794               // Use Message's built-in MergeFrom and CopyFrom when the
1795               // passed-in argument is a generic Message instance, and
1796               // only define the custom MergeFrom and CopyFrom
1797               // instances when the source of the merge/copy is known
1798               // to be the same class as the destination.
1799               p->Emit(R"cc(
1800                 using $superclass$::CopyFrom;
1801                 void CopyFrom(const $classname$& from);
1802                 using $superclass$::MergeFrom;
1803                 void MergeFrom(const $classname$& from) { $classname$::MergeImpl(*this, from); }
1804 
1805                 private:
1806                 static void MergeImpl(
1807                     ::$proto_ns$::MessageLite& to_msg,
1808                     const ::$proto_ns$::MessageLite& from_msg);
1809 
1810                 public:
1811               )cc");
1812             } else {
1813               p->Emit(R"cc(
1814                 using $superclass$::CopyFrom;
1815                 inline void CopyFrom(const $classname$& from) {
1816                   $superclass$::CopyImpl(*this, from);
1817                 }
1818                 using $superclass$::MergeFrom;
1819                 void MergeFrom(const $classname$& from) {
1820                   $superclass$::MergeImpl(*this, from);
1821                 }
1822 
1823                 public:
1824               )cc");
1825             }
1826           } else {
1827             p->Emit(R"cc(
1828               void CopyFrom(const $classname$& from);
1829               void MergeFrom(const $classname$& from) { $classname$::MergeImpl(*this, from); }
1830 
1831               private:
1832               static void MergeImpl(::$proto_ns$::MessageLite& to_msg,
1833                                     const ::$proto_ns$::MessageLite& from_msg);
1834 
1835               public:
1836             )cc");
1837           }
1838 
1839           if (NeedsIsInitialized()) {
1840             p->Emit(R"cc(
1841               bool IsInitialized() const {
1842                 $WeakDescriptorSelfPin$;
1843                 return IsInitializedImpl(*this);
1844               }
1845 
1846               private:
1847               static bool IsInitializedImpl(const MessageLite& msg);
1848 
1849               public:
1850             )cc");
1851           } else {
1852             p->Emit(R"cc(
1853               bool IsInitialized() const {
1854                 $WeakDescriptorSelfPin$;
1855                 return true;
1856               }
1857             )cc");
1858           }
1859 
1860           if (!HasSimpleBaseClass(descriptor_, options_)) {
1861             // In custom vtable mode, the functions are implemented as static
1862             // functions, which are the ones we put in the custom vtable. The
1863             // non-static functions are small trampolines. In normal mode, the
1864             // functions implemented are the non-static members which are a
1865             // virtual overrides. This reduces the number of functions in the
1866             // binary in both modes.
1867             p->Emit(R"cc(
1868               ABSL_ATTRIBUTE_REINITIALIZES void Clear() PROTOBUF_FINAL;
1869 #if defined(PROTOBUF_CUSTOM_VTABLE)
1870               private:
1871               static ::size_t ByteSizeLong(const ::$proto_ns$::MessageLite& msg);
1872               static $uint8$* _InternalSerialize(
1873                   const MessageLite& msg, $uint8$* target,
1874                   ::$proto_ns$::io::EpsCopyOutputStream* stream);
1875 
1876               public:
1877               ::size_t ByteSizeLong() const { return ByteSizeLong(*this); }
1878               $uint8$* _InternalSerialize(
1879                   $uint8$* target,
1880                   ::$proto_ns$::io::EpsCopyOutputStream* stream) const {
1881                 return _InternalSerialize(*this, target, stream);
1882               }
1883 #else   // PROTOBUF_CUSTOM_VTABLE
1884               ::size_t ByteSizeLong() const final;
1885               $uint8$* _InternalSerialize(
1886                   $uint8$* target,
1887                   ::$proto_ns$::io::EpsCopyOutputStream* stream) const final;
1888 #endif  // PROTOBUF_CUSTOM_VTABLE
1889             )cc");
1890           }
1891         }},
1892        {"internal_field_number",
1893         [&] {
1894           if (!options_.field_listener_options.inject_field_listener_events)
1895             return;
1896 
1897           p->Emit({{"field_count", descriptor_->field_count()}}, R"cc(
1898             static constexpr int _kInternalFieldNumber = $field_count$;
1899           )cc");
1900         }},
1901        {"decl_non_simple_base",
1902         [&] {
1903           if (HasSimpleBaseClass(descriptor_, options_)) return;
1904           p->Emit(
1905               R"cc(
1906                 int GetCachedSize() const { return $cached_size$.Get(); }
1907 
1908                 private:
1909                 void SharedCtor(::$proto_ns$::Arena* arena);
1910                 static void SharedDtor(MessageLite& self);
1911                 void InternalSwap($classname$* other);
1912               )cc");
1913         }},
1914        {"arena_dtor",
1915         [&] {
1916           switch (NeedsArenaDestructor()) {
1917             case ArenaDtorNeeds::kOnDemand:
1918               p->Emit(R"cc(
1919                 private:
1920                 static void ArenaDtor(void* object);
1921                 static void OnDemandRegisterArenaDtor(
1922                     MessageLite& msg, ::$proto_ns$::Arena& arena) {
1923                   auto& this_ = static_cast<$classname$&>(msg);
1924                   if ((this_.$inlined_string_donated_array$[0] & 0x1u) == 0) {
1925                     return;
1926                   }
1927                   this_.$inlined_string_donated_array$[0] &= 0xFFFFFFFEu;
1928                   arena.OwnCustomDestructor(&this_, &$classname$::ArenaDtor);
1929                 }
1930               )cc");
1931               break;
1932             case ArenaDtorNeeds::kRequired:
1933               p->Emit(R"cc(
1934                 private:
1935                 static void ArenaDtor(void* object);
1936               )cc");
1937               break;
1938             case ArenaDtorNeeds::kNone:
1939               break;
1940           }
1941         }},
1942        {"get_metadata",
1943         [&] {
1944           if (!HasDescriptorMethods(descriptor_->file(), options_)) return;
1945 
1946           p->Emit(R"cc(
1947             ::$proto_ns$::Metadata GetMetadata() const;
1948           )cc");
1949         }},
1950        {"decl_split_methods",
1951         [&] {
1952           if (!ShouldSplit(descriptor_, options_)) return;
1953           p->Emit({{"default_name", DefaultInstanceName(descriptor_, options_,
1954                                                         /*split=*/true)}},
1955                   R"cc(
1956                     private:
1957                     inline bool IsSplitMessageDefault() const {
1958                       return $split$ == reinterpret_cast<const Impl_::Split*>(&$default_name$);
1959                     }
1960                     PROTOBUF_NOINLINE void PrepareSplitMessageForWrite();
1961 
1962                     public:
1963                   )cc");
1964         }},
1965        {"nested_types",
1966         [&] {
1967           // Import all nested message classes into this class's scope with
1968           // typedefs.
1969           for (int i = 0; i < descriptor_->nested_type_count(); ++i) {
1970             const Descriptor* nested_type = descriptor_->nested_type(i);
1971             if (!IsMapEntryMessage(nested_type)) {
1972               p->Emit(
1973                   {
1974                       Sub{"nested_full_name", ClassName(nested_type, false)}
1975                           .AnnotatedAs(nested_type),
1976                       Sub{"nested_name", ResolveKeyword(nested_type->name())}
1977                           .AnnotatedAs(nested_type),
1978                   },
1979                   R"cc(
1980                     using $nested_name$ = $nested_full_name$;
1981                   )cc");
1982             }
1983           }
1984         }},
1985        {"nested_enums",
1986         [&] {
1987           // Import all nested enums and their values into this class's
1988           // scope with typedefs and constants.
1989           for (int i = 0; i < descriptor_->enum_type_count(); ++i) {
1990             enum_generators_[i]->GenerateSymbolImports(p);
1991           }
1992         }},
1993        {"decl_field_accessors",
1994         [&] {
1995           // Generate accessor methods for all fields.
1996           GenerateFieldAccessorDeclarations(p);
1997         }},
1998        {"decl_extension_ids",
1999         [&] {
2000           // Declare extension identifiers.
2001           for (int i = 0; i < descriptor_->extension_count(); ++i) {
2002             extension_generators_[i]->GenerateDeclaration(p);
2003           }
2004         }},
2005        {"proto2_message_sets",
2006         [&] {
2007         }},
2008        {"decl_set_has",
2009         [&] {
2010           for (auto field : FieldRange(descriptor_)) {
2011             // set_has_***() generated in all oneofs.
2012             if (!field->is_repeated() && !field->options().weak() &&
2013                 field->real_containing_oneof()) {
2014               p->Emit({{"field_name", FieldName(field)}}, R"cc(
2015                 void set_has_$field_name$();
2016               )cc");
2017             }
2018           }
2019         }},
2020        {"decl_oneof_has",
2021         [&] {
2022           // Generate oneof function declarations
2023           for (auto oneof : OneOfRange(descriptor_)) {
2024             p->Emit({{"oneof_name", oneof->name()}}, R"cc(
2025               inline bool has_$oneof_name$() const;
2026               inline void clear_has_$oneof_name$();
2027             )cc");
2028           }
2029         }},
2030        {"decl_data",
2031         [&] {
2032           parse_function_generator_->GenerateDataDecls(p);
2033         }},
2034        {"post_loop_handler",
2035         [&] {
2036           if (!NeedsPostLoopHandler(descriptor_, options_)) return;
2037           p->Emit(R"cc(
2038             static const char* PostLoopHandler(MessageLite* msg,
2039                                                const char* ptr,
2040                                                $pbi$::ParseContext* ctx);
2041           )cc");
2042         }},
2043        {"decl_impl", [&] { GenerateImplDefinition(p); }},
2044        {"classdata_type",
2045         HasDescriptorMethods(descriptor_->file(), options_)
2046             ? "ClassDataFull"
2047             : absl::StrFormat("ClassDataLite<%d>",
2048                               descriptor_->full_name().size() + 1)},
2049        {"split_friend",
2050         [&] {
2051           if (!ShouldSplit(descriptor_, options_)) return;
2052 
2053           p->Emit({{"split_default", DefaultInstanceType(descriptor_, options_,
2054                                                          /*split=*/true)}},
2055                   R"cc(
2056                     friend struct $split_default$;
2057                   )cc");
2058         }}},
2059       R"cc(
2060         class $dllexport_decl $$classname$ final : public $superclass$
2061         /* @@protoc_insertion_point(class_definition:$full_name$) */ {
2062          public:
2063           inline $classname$() : $classname$(nullptr) {}
2064           $decl_dtor$;
2065 
2066 #if defined(PROTOBUF_CUSTOM_VTABLE)
2067           //~ Define a derived `operator delete` to avoid dynamic dispatch when
2068           //~ the type is statically known
2069           void operator delete($classname$* msg, std::destroying_delete_t) {
2070             SharedDtor(*msg);
2071             $pbi$::SizedDelete(msg, sizeof($classname$));
2072           }
2073 #endif
2074 
2075           //~ Templatize constexpr constructor as a workaround for a bug in
2076           //~ gcc 12 (warning in gcc 13).
2077           template <typename = void>
2078           explicit PROTOBUF_CONSTEXPR $classname$(
2079               ::$proto_ns$::internal::ConstantInitialized);
2080 
2081           inline $classname$(const $classname$& from) : $classname$(nullptr, from) {}
2082           inline $classname$($classname$&& from) noexcept
2083               : $classname$(nullptr, std::move(from)) {}
2084           inline $classname$& operator=(const $classname$& from) {
2085             CopyFrom(from);
2086             return *this;
2087           }
2088           inline $classname$& operator=($classname$&& from) noexcept {
2089             if (this == &from) return *this;
2090             if ($pbi$::CanMoveWithInternalSwap(GetArena(), from.GetArena())) {
2091               InternalSwap(&from);
2092             } else {
2093               CopyFrom(from);
2094             }
2095             return *this;
2096           }
2097           $decl_verify_func$;
2098 
2099           inline const $unknown_fields_type$& unknown_fields() const
2100               ABSL_ATTRIBUTE_LIFETIME_BOUND {
2101             $annotate_unknown_fields$;
2102             return $unknown_fields$;
2103           }
2104           inline $unknown_fields_type$* mutable_unknown_fields()
2105               ABSL_ATTRIBUTE_LIFETIME_BOUND {
2106             $annotate_mutable_unknown_fields$;
2107             return $mutable_unknown_fields$;
2108           }
2109 
2110           $descriptor_accessor$;
2111           $get_descriptor$;
2112           static const $classname$& default_instance() {
2113             return *internal_default_instance();
2114           }
2115           $decl_oneof$;
2116           //~ TODO make this private, while still granting other
2117           //~ protos access.
2118           static inline const $classname$* internal_default_instance() {
2119             return reinterpret_cast<const $classname$*>(
2120                 &_$classname$_default_instance_);
2121           }
2122           static constexpr int kIndexInFileMessages = $index_in_file_messages$;
2123           $decl_any_methods$;
2124           friend void swap($classname$& a, $classname$& b) { a.Swap(&b); }
2125           inline void Swap($classname$* other) {
2126             if (other == this) return;
2127             if ($pbi$::CanUseInternalSwap(GetArena(), other->GetArena())) {
2128               InternalSwap(other);
2129             } else {
2130               $pbi$::GenericSwap(this, other);
2131             }
2132           }
2133           void UnsafeArenaSwap($classname$* other) {
2134             if (other == this) return;
2135             $DCHK$(GetArena() == other->GetArena());
2136             InternalSwap(other);
2137           }
2138 
2139           // implements Message ----------------------------------------------
2140 
2141           $classname$* New(::$proto_ns$::Arena* arena = nullptr) const {
2142             return $superclass$::DefaultConstruct<$classname$>(arena);
2143           }
2144           $generated_methods$;
2145           $internal_field_number$;
2146           $decl_non_simple_base$;
2147           //~ Friend the template function GetAnyMessageName<T>() so that it can
2148           //~ call this FullMessageName() method.
2149           //~ NOTE: parentheses around the symbol GetAnyMessageName is required
2150           //~       for compiler to resolve the symbol correctly and interpret
2151           //~       it as a function (instead of trying to find the symbol under
2152           //~       the absl::string_view namespace).
2153          private:
2154           template <typename T>
2155           friend ::absl::string_view(
2156               ::$proto_ns$::internal::GetAnyMessageName)();
2157           static ::absl::string_view FullMessageName() { return "$full_name$"; }
2158           $decl_annotate$;
2159 
2160           //~ TODO Make this private! Currently people are
2161           //~ deriving from protos to give access to this constructor,
2162           //~ breaking the invariants we rely on.
2163          protected:
2164           explicit $classname$(::$proto_ns$::Arena* arena);
2165           $classname$(::$proto_ns$::Arena* arena, const $classname$& from);
2166           $classname$(::$proto_ns$::Arena* arena, $classname$&& from) noexcept
2167               : $classname$(arena) {
2168             *this = ::std::move(from);
2169           }
2170           $arena_dtor$;
2171           const $pbi$::ClassData* GetClassData() const PROTOBUF_FINAL;
2172           static void* PlacementNew_(const void*, void* mem,
2173                                      ::$proto_ns$::Arena* arena);
2174           static constexpr auto InternalNewImpl_();
2175           static const $pbi$::$classdata_type$ _class_data_;
2176 
2177          public:
2178           $get_metadata$;
2179           $decl_split_methods$;
2180           // nested types ----------------------------------------------------
2181           $nested_types$;
2182           $nested_enums$;
2183 
2184           // accessors -------------------------------------------------------
2185           $decl_field_accessors$;
2186           $decl_extension_ids$;
2187           $proto2_message_sets$;
2188           // @@protoc_insertion_point(class_scope:$full_name$)
2189           //~ Generate private members.
2190          private:
2191           //~ TODO: Remove hack to track field access and remove
2192           //~ this class.
2193           class _Internal;
2194           $decl_set_has$;
2195           $decl_oneof_has$;
2196           $decl_data$;
2197           $post_loop_handler$;
2198 
2199           friend class ::$proto_ns$::MessageLite;
2200           friend class ::$proto_ns$::Arena;
2201           template <typename T>
2202           friend class ::$proto_ns$::Arena::InternalHelper;
2203           using InternalArenaConstructable_ = void;
2204           using DestructorSkippable_ = void;
2205           $decl_impl$;
2206           $split_friend$;
2207           //~ The TableStruct struct needs access to the private parts, in
2208           //~ order to construct the offsets of all members.
2209           friend struct ::$tablename$;
2210         };
2211       )cc");
2212 }  // NOLINT(readability/fn_size)
2213 
GenerateInlineMethods(io::Printer * p)2214 void MessageGenerator::GenerateInlineMethods(io::Printer* p) {
2215   auto v = p->WithVars(ClassVars(descriptor_, options_));
2216   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
2217   if (IsMapEntryMessage(descriptor_)) return;
2218   GenerateFieldAccessorDefinitions(p);
2219 
2220   // Generate oneof_case() functions.
2221   for (auto oneof : OneOfRange(descriptor_)) {
2222     p->Emit(
2223         {
2224             Sub{"oneof_name", absl::StrCat(oneof->name(), "_case")}.AnnotatedAs(
2225                 oneof),
2226             {"OneofName",
2227              absl::StrCat(UnderscoresToCamelCase(oneof->name(), true), "Case")},
2228             {"oneof_index", oneof->index()},
2229         },
2230         R"cc(
2231           inline $classname$::$OneofName$ $classname$::$oneof_name$() const {
2232             return $classname$::$OneofName$($oneof_case$[$oneof_index$]);
2233           }
2234         )cc");
2235   }
2236 }
2237 
GenerateSchema(io::Printer * p,int offset,int has_offset)2238 void MessageGenerator::GenerateSchema(io::Printer* p, int offset,
2239                                       int has_offset) {
2240   has_offset = !has_bit_indices_.empty() || IsMapEntryMessage(descriptor_)
2241                    ? offset + has_offset
2242                    : -1;
2243   int inlined_string_indices_offset;
2244   if (inlined_string_indices_.empty()) {
2245     inlined_string_indices_offset = -1;
2246   } else {
2247     ABSL_DCHECK_NE(has_offset, -1);
2248     ABSL_DCHECK(!IsMapEntryMessage(descriptor_));
2249     inlined_string_indices_offset = has_offset + has_bit_indices_.size();
2250   }
2251 
2252   auto v = p->WithVars(ClassVars(descriptor_, options_));
2253   p->Emit(
2254       {
2255           {"offset", offset},
2256           {"has_offset", has_offset},
2257           {"string_offsets", inlined_string_indices_offset},
2258       },
2259       R"cc(
2260         {$offset$, $has_offset$, $string_offsets$, sizeof($classtype$)},
2261       )cc");
2262 }
2263 
GenerateClassMethods(io::Printer * p)2264 void MessageGenerator::GenerateClassMethods(io::Printer* p) {
2265   if (!ShouldGenerateClass(descriptor_, options_)) return;
2266 
2267   auto v = p->WithVars(ClassVars(descriptor_, options_));
2268   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
2269 
2270   if (IsMapEntryMessage(descriptor_)) {
2271     p->Emit({{"annotate_accessors",
2272               [&] {
2273                 if (!options_.annotate_accessor) return;
2274                 for (auto f : FieldRange(descriptor_)) {
2275                   p->Emit({{"field", FieldName(f)}},
2276                           R"cc(
2277                             volatile bool $classname$::$field$_AccessedNoStrip;
2278                           )cc");
2279                 }
2280               }},
2281              {"verify",
2282               [&] {
2283                 // Delegates generating verify function as only a subset of map
2284                 // entry messages need it; i.e. UTF8 string key/value or message
2285                 // type value.
2286                 GenerateVerify(p);
2287               }},
2288              {"class_data", [&] { GenerateClassData(p); }}},
2289             R"cc(
2290 #if defined(PROTOBUF_CUSTOM_VTABLE)
2291               $classname$::$classname$() : SuperType(_class_data_.base()) {}
2292               $classname$::$classname$(::$proto_ns$::Arena* arena)
2293                   : SuperType(arena, _class_data_.base()) {}
2294 #else   // PROTOBUF_CUSTOM_VTABLE
2295               $classname$::$classname$() : SuperType() {}
2296               $classname$::$classname$(::$proto_ns$::Arena* arena) : SuperType(arena) {}
2297 #endif  // PROTOBUF_CUSTOM_VTABLE
2298               $annotate_accessors$;
2299               $verify$;
2300               $class_data$;
2301             )cc");
2302     parse_function_generator_->GenerateDataDefinitions(p);
2303     return;
2304   }
2305   if (IsAnyMessage(descriptor_)) {
2306     p->Emit({{"any_field_descriptor",
2307               [&] {
2308                 if (!HasDescriptorMethods(descriptor_->file(), options_)) {
2309                   return;
2310                 }
2311                 p->Emit(
2312                     R"cc(
2313                       bool $classname$::GetAnyFieldDescriptors(
2314                           const ::$proto_ns$::Message& message,
2315                           const ::$proto_ns$::FieldDescriptor** type_url_field,
2316                           const ::$proto_ns$::FieldDescriptor** value_field) {
2317                         return ::_pbi::GetAnyFieldDescriptors(message, type_url_field, value_field);
2318                       }
2319                     )cc");
2320               }}},
2321             R"cc(
2322               $any_field_descriptor$;
2323               bool $classname$::ParseAnyTypeUrl(::absl::string_view type_url,
2324                                                 std::string* full_type_name) {
2325                 return ::_pbi::ParseAnyTypeUrl(type_url, full_type_name);
2326               }
2327             )cc");
2328   }
2329   p->Emit(
2330       {{"has_bit",
2331         [&] {
2332           if (has_bit_indices_.empty()) return;
2333           p->Emit(
2334               R"cc(
2335                 using HasBits =
2336                     decltype(std::declval<$classname$>().$has_bits$);
2337                 static constexpr ::int32_t kHasBitsOffset =
2338                     8 * PROTOBUF_FIELD_OFFSET($classname$, _impl_._has_bits_);
2339               )cc");
2340         }},
2341        {"oneof",
2342         [&] {
2343           if (descriptor_->real_oneof_decl_count() == 0) return;
2344           p->Emit(
2345               R"cc(
2346                 static constexpr ::int32_t kOneofCaseOffset =
2347                     PROTOBUF_FIELD_OFFSET($classtype$, $oneof_case$);
2348               )cc");
2349         }},
2350        {"required",
2351         [&] {
2352           if (num_required_fields_ == 0) return;
2353           const std::vector<uint32_t> masks_for_has_bits =
2354               RequiredFieldsBitMask();
2355           p->Emit(
2356               {{"check_bit_mask", ConditionalToCheckBitmasks(
2357                                       masks_for_has_bits, false, "has_bits")}},
2358               R"cc(
2359                 static bool MissingRequiredFields(const HasBits& has_bits) {
2360                   return $check_bit_mask$;
2361                 }
2362               )cc");
2363         }}},
2364       R"cc(
2365         class $classname$::_Internal {
2366          public:
2367           $has_bit$;
2368           $oneof$;
2369           $required$;
2370         };
2371       )cc");
2372   p->Emit("\n");
2373 
2374   // Generate non-inline field definitions.
2375   for (auto field : FieldRange(descriptor_)) {
2376     auto v = p->WithVars(FieldVars(field, options_));
2377     auto t = p->WithVars(MakeTrackerCalls(field, options_));
2378     field_generators_.get(field).GenerateNonInlineAccessorDefinitions(p);
2379     if (IsCrossFileMaybeMap(field)) {
2380       GenerateFieldClear(field, false, p);
2381     }
2382   }
2383 
2384   GenerateStructors(p);
2385   p->Emit("\n");
2386 
2387   if (descriptor_->real_oneof_decl_count() > 0) {
2388     GenerateOneofClear(p);
2389     p->Emit("\n");
2390   }
2391 
2392   GenerateClassData(p);
2393   parse_function_generator_->GenerateDataDefinitions(p);
2394 
2395   if (HasGeneratedMethods(descriptor_->file(), options_)) {
2396     GenerateClear(p);
2397     p->Emit("\n");
2398 
2399     GenerateSerializeWithCachedSizesToArray(p);
2400     p->Emit("\n");
2401 
2402     GenerateByteSize(p);
2403     p->Emit("\n");
2404 
2405     GenerateClassSpecificMergeImpl(p);
2406     p->Emit("\n");
2407 
2408     GenerateCopyFrom(p);
2409     p->Emit("\n");
2410 
2411     GenerateIsInitialized(p);
2412     p->Emit("\n");
2413   }
2414 
2415   if (ShouldSplit(descriptor_, options_)) {
2416     p->Emit({{"split_default",
2417               DefaultInstanceName(descriptor_, options_, /*split=*/true)},
2418              {"default",
2419               DefaultInstanceName(descriptor_, options_, /*split=*/false)}},
2420             R"cc(
2421               void $classname$::PrepareSplitMessageForWrite() {
2422                 if (PROTOBUF_PREDICT_TRUE(IsSplitMessageDefault())) {
2423                   void* chunk = $pbi$::CreateSplitMessageGeneric(
2424                       GetArena(), &$split_default$, sizeof(Impl_::Split), this,
2425                       &$default$);
2426                   $split$ = reinterpret_cast<Impl_::Split*>(chunk);
2427                 }
2428               }
2429             )cc");
2430   }
2431 
2432   GenerateVerify(p);
2433 
2434   GenerateSwap(p);
2435   p->Emit("\n");
2436 
2437   p->Emit(
2438       {{"annotate_accessor_definition",
2439         [&] {
2440           if (!options_.annotate_accessor) return;
2441           for (auto f : FieldRange(descriptor_)) {
2442             p->Emit({{"field", FieldName(f)}},
2443                     R"cc(
2444                       volatile bool $classname$::$field$_AccessedNoStrip;
2445                     )cc");
2446           }
2447         }},
2448        {"get_metadata",
2449         [&] {
2450           if (!HasDescriptorMethods(descriptor_->file(), options_)) return;
2451           // Same as the base class, but it avoids virtual dispatch.
2452           p->Emit(R"cc(
2453             ::$proto_ns$::Metadata $classname$::GetMetadata() const {
2454               return $superclass$::GetMetadataImpl(GetClassData()->full());
2455             }
2456           )cc");
2457         }},
2458        {"post_loop_handler",
2459         [&] {
2460           if (!NeedsPostLoopHandler(descriptor_, options_)) return;
2461           p->Emit({{"required",
2462                     [&] {
2463                     }}},
2464                   R"cc(
2465                     const char* $classname$::PostLoopHandler(
2466                         MessageLite* msg, const char* ptr,
2467                         ::_pbi::ParseContext* ctx) {
2468                       $classname$* _this = static_cast<$classname$*>(msg);
2469                       $annotate_deserialize$;
2470                       $required$;
2471                       return ptr;
2472                     }
2473                   )cc");
2474         }},
2475        {"message_set_definition",
2476         [&] {
2477         }},
2478        {"tracker_decl",
2479         [&] {
2480           if (!HasTracker(descriptor_, options_)) return;
2481           p->Emit(R"cc(
2482             ::$proto_ns$::AccessListener<$classtype$> $classname$::$tracker$(
2483                 &FullMessageName);
2484           )cc");
2485         }}},
2486       R"cc(
2487         $annotate_accessor_definition$;
2488         $get_metadata$;
2489         $post_loop_handler$;
2490         $message_set_definition$;
2491         $tracker_decl$;
2492       )cc");
2493 }
2494 
GenerateOffsets(io::Printer * p)2495 std::pair<size_t, size_t> MessageGenerator::GenerateOffsets(io::Printer* p) {
2496   auto v = p->WithVars(ClassVars(descriptor_, options_));
2497   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
2498   Formatter format(p);
2499 
2500   if (!has_bit_indices_.empty() || IsMapEntryMessage(descriptor_)) {
2501     format("PROTOBUF_FIELD_OFFSET($classtype$, $has_bits$),\n");
2502   } else {
2503     format("~0u,  // no _has_bits_\n");
2504   }
2505   format("PROTOBUF_FIELD_OFFSET($classtype$, _internal_metadata_),\n");
2506   if (descriptor_->extension_range_count() > 0) {
2507     format("PROTOBUF_FIELD_OFFSET($classtype$, $extensions$),\n");
2508   } else {
2509     format("~0u,  // no _extensions_\n");
2510   }
2511   if (descriptor_->real_oneof_decl_count() > 0) {
2512     format("PROTOBUF_FIELD_OFFSET($classtype$, $oneof_case$[0]),\n");
2513   } else {
2514     format("~0u,  // no _oneof_case_\n");
2515   }
2516   if (num_weak_fields_ > 0) {
2517     format("PROTOBUF_FIELD_OFFSET($classtype$, $weak_field_map$),\n");
2518   } else {
2519     format("~0u,  // no _weak_field_map_\n");
2520   }
2521   if (!inlined_string_indices_.empty()) {
2522     format(
2523         "PROTOBUF_FIELD_OFFSET($classtype$, "
2524         "$inlined_string_donated_array$),\n");
2525   } else {
2526     format("~0u,  // no _inlined_string_donated_\n");
2527   }
2528   if (ShouldSplit(descriptor_, options_)) {
2529     format(
2530         "PROTOBUF_FIELD_OFFSET($classtype$, $split$),\n"
2531         "sizeof($classtype$::Impl_::Split),\n");
2532   } else {
2533     format(
2534         "~0u,  // no _split_\n"
2535         "~0u,  // no sizeof(Split)\n");
2536   }
2537   const int kNumGenericOffsets = 8;  // the number of fixed offsets above
2538   const size_t offsets = kNumGenericOffsets + descriptor_->field_count() +
2539                          descriptor_->real_oneof_decl_count();
2540   size_t entries = offsets;
2541   for (auto field : FieldRange(descriptor_)) {
2542     // TODO: We should not have an entry in the offset table for fields
2543     // that do not use them.
2544     if (field->options().weak() || field->real_containing_oneof()) {
2545       // Mark the field to prevent unintentional access through reflection.
2546       // Don't use the top bit because that is for unused fields.
2547       format("::_pbi::kInvalidFieldOffsetTag");
2548     } else {
2549       format("PROTOBUF_FIELD_OFFSET($classtype$$1$, $2$)",
2550              ShouldSplit(field, options_) ? "::Impl_::Split" : "",
2551              ShouldSplit(field, options_)
2552                  ? absl::StrCat(FieldName(field), "_")
2553                  : FieldMemberName(field, /*split=*/false));
2554     }
2555 
2556     // Some information about a field is in the pdproto profile. The profile is
2557     // only available at compile time. So we embed such information in the
2558     // offset of the field, so that the information is available when
2559     // reflectively accessing the field at run time.
2560     //
2561     // We embed whether the field is cold to the MSB of the offset, and whether
2562     // the field is eagerly verified lazy or inlined string to the LSB of the
2563     // offset.
2564 
2565     if (ShouldSplit(field, options_)) {
2566       format(" | ::_pbi::kSplitFieldOffsetMask /*split*/");
2567     }
2568     if (IsEagerlyVerifiedLazy(field, options_, scc_analyzer_)) {
2569       format(" | 0x1u /*eagerly verified lazy*/");
2570     } else if (IsStringInlined(field, options_)) {
2571       format(" | 0x1u /*inlined*/");
2572     }
2573     format(",\n");
2574   }
2575 
2576   int count = 0;
2577   for (auto oneof : OneOfRange(descriptor_)) {
2578     format("PROTOBUF_FIELD_OFFSET($classtype$, _impl_.$1$_),\n", oneof->name());
2579     count++;
2580   }
2581   ABSL_CHECK_EQ(count, descriptor_->real_oneof_decl_count());
2582 
2583   if (IsMapEntryMessage(descriptor_)) {
2584     entries += 2;
2585     format(
2586         "0,\n"
2587         "1,\n");
2588   } else if (!has_bit_indices_.empty()) {
2589     entries += has_bit_indices_.size();
2590     for (size_t i = 0; i < has_bit_indices_.size(); ++i) {
2591       const std::string index =
2592           has_bit_indices_[i] >= 0 ? absl::StrCat(has_bit_indices_[i]) : "~0u";
2593       format("$1$,\n", index);
2594     }
2595   }
2596   if (!inlined_string_indices_.empty()) {
2597     entries += inlined_string_indices_.size();
2598     for (int inlined_string_index : inlined_string_indices_) {
2599       const std::string index =
2600           inlined_string_index >= 0
2601               ? absl::StrCat(inlined_string_index, ",  // inlined_string_index")
2602               : "~0u,";
2603       format("$1$\n", index);
2604     }
2605   }
2606 
2607   return std::make_pair(entries, offsets);
2608 }
2609 
GenerateZeroInitFields(io::Printer * p) const2610 void MessageGenerator::GenerateZeroInitFields(io::Printer* p) const {
2611   using Iterator = decltype(optimized_order_.begin());
2612   const FieldDescriptor* first = nullptr;
2613   auto emit_pending_zero_fields = [&](Iterator end) {
2614     if (first != nullptr) {
2615       const FieldDescriptor* last = end[-1];
2616       if (first != last ||
2617           !field_generators_.get(first).has_brace_default_assign()) {
2618         p->Emit({{"first", FieldName(first)},
2619                  {"last", FieldName(last)},
2620                  {"Impl", "Impl_"},
2621                  {"impl", "_impl_"}},
2622                 R"cc(
2623                   ::memset(reinterpret_cast<char *>(&$impl$) +
2624                                offsetof($Impl$, $first$_),
2625                            0,
2626                            offsetof($Impl$, $last$_) -
2627                                offsetof($Impl$, $first$_) +
2628                                sizeof($Impl$::$last$_));
2629                 )cc");
2630       } else {
2631         p->Emit({{"field", FieldMemberName(first, false)}},
2632                 R"cc(
2633                   $field$ = {};
2634                 )cc");
2635       }
2636       first = nullptr;
2637     }
2638   };
2639 
2640   auto it = optimized_order_.begin();
2641   auto end = optimized_order_.end();
2642   for (; it != end && !ShouldSplit(*it, options_); ++it) {
2643     auto const& generator = field_generators_.get(*it);
2644     if (generator.has_trivial_zero_default()) {
2645       if (first == nullptr) first = *it;
2646     } else {
2647       emit_pending_zero_fields(it);
2648     }
2649   }
2650   emit_pending_zero_fields(it);
2651 }
2652 
2653 namespace {
2654 
2655 class MemberInitSeparator {
2656  public:
MemberInitSeparator(io::Printer * printer)2657   explicit MemberInitSeparator(io::Printer* printer) : printer_(printer) {}
2658   MemberInitSeparator(const MemberInitSeparator&) = delete;
2659 
~MemberInitSeparator()2660   ~MemberInitSeparator() {
2661     if (separators_) printer_->Outdent();
2662   }
2663 
operator ()()2664   void operator()() {
2665     if (separators_) {
2666       printer_->Emit(",\n");
2667     } else {
2668       printer_->Emit(": ");
2669       printer_->Indent();
2670       separators_ = true;
2671     }
2672   }
2673 
2674  private:
2675   bool separators_ = false;
2676   io::Printer* const printer_;
2677 };
2678 
2679 }  // namespace
2680 
GenerateImplMemberInit(io::Printer * p,InitType init_type)2681 void MessageGenerator::GenerateImplMemberInit(io::Printer* p,
2682                                               InitType init_type) {
2683   ABSL_DCHECK(!HasSimpleBaseClass(descriptor_, options_));
2684 
2685   auto indent = p->WithIndent();
2686   MemberInitSeparator separator(p);
2687 
2688   auto init_extensions = [&] {
2689     if (descriptor_->extension_range_count() > 0 &&
2690         init_type != InitType::kConstexpr) {
2691       separator();
2692       p->Emit("_extensions_{visibility, arena}");
2693     }
2694   };
2695 
2696   auto init_inlined_string_indices = [&] {
2697     if (!inlined_string_indices_.empty()) {
2698       bool dtor_on_demand = NeedsArenaDestructor() == ArenaDtorNeeds::kOnDemand;
2699       auto values = [&] {
2700         for (size_t i = 0; i < InlinedStringDonatedSize(); ++i) {
2701           if (i > 0) {
2702             p->Emit(", ");
2703           }
2704           p->Emit(dtor_on_demand
2705                       ? "::_pbi::InitDonatingStates()"
2706                       : "::_pbi::InitDonatingStates() & 0xFFFFFFFEu");
2707         }
2708       };
2709       separator();
2710       p->Emit({{"values", values}}, "_inlined_string_donated_{$values$}");
2711     }
2712   };
2713 
2714   auto init_has_bits = [&] {
2715     if (!has_bit_indices_.empty()) {
2716       if (init_type == InitType::kArenaCopy) {
2717         separator();
2718         p->Emit("_has_bits_{from._has_bits_}");
2719       }
2720       separator();
2721       p->Emit("_cached_size_{0}");
2722     }
2723   };
2724 
2725   auto init_fields = [&] {
2726     for (auto* field : optimized_order_) {
2727       if (ShouldSplit(field, options_)) continue;
2728 
2729       auto const& generator = field_generators_.get(field);
2730       switch (init_type) {
2731         case InitType::kConstexpr:
2732           separator();
2733           generator.GenerateMemberConstexprConstructor(p);
2734           break;
2735         case InitType::kArena:
2736           if (!generator.has_trivial_zero_default()) {
2737             separator();
2738             generator.GenerateMemberConstructor(p);
2739           }
2740           break;
2741         case InitType::kArenaCopy:
2742           if (!generator.has_trivial_value()) {
2743             separator();
2744             generator.GenerateMemberCopyConstructor(p);
2745           }
2746           break;
2747       }
2748     }
2749   };
2750 
2751   auto init_split = [&] {
2752     if (ShouldSplit(descriptor_, options_)) {
2753       separator();
2754       p->Emit({{"name", DefaultInstanceName(descriptor_, options_, true)}},
2755               "_split_{const_cast<Split*>(&$name$._instance)}");
2756     }
2757   };
2758 
2759   auto init_oneofs = [&] {
2760     for (auto oneof : OneOfRange(descriptor_)) {
2761       separator();
2762       p->Emit({{"name", oneof->name()}}, "$name$_{}");
2763     }
2764   };
2765 
2766   auto init_cached_size_if_no_hasbits = [&] {
2767     if (has_bit_indices_.empty()) {
2768       separator();
2769       p->Emit("_cached_size_{0}");
2770     }
2771   };
2772 
2773   auto init_oneof_cases = [&] {
2774     if (int count = descriptor_->real_oneof_decl_count()) {
2775       separator();
2776       if (init_type == InitType::kArenaCopy) {
2777         auto cases = [&] {
2778           for (int i = 0; i < count; ++i) {
2779             p->Emit({{"index", i}, {"comma", i ? ", " : ""}},
2780                     "$comma$from._oneof_case_[$index$]");
2781           }
2782         };
2783         p->Emit({{"cases", cases}}, "_oneof_case_{$cases$}");
2784       } else {
2785         p->Emit("_oneof_case_{}");
2786       }
2787     }
2788   };
2789 
2790   auto init_weak_field_map = [&] {
2791     if (num_weak_fields_ && init_type != InitType::kConstexpr) {
2792       separator();
2793       if (init_type == InitType::kArenaCopy) {
2794         p->Emit("_weak_field_map_{visibility, arena, from._weak_field_map_}");
2795       } else {
2796         p->Emit("_weak_field_map_{visibility, arena}");
2797       }
2798     }
2799   };
2800 
2801   // Initialization order of the various fields inside `_impl_(...)`
2802   init_extensions();
2803   init_inlined_string_indices();
2804   init_has_bits();
2805   init_fields();
2806   init_split();
2807   init_oneofs();
2808   init_cached_size_if_no_hasbits();
2809   init_oneof_cases();
2810   init_weak_field_map();
2811 }
2812 
GenerateSharedConstructorCode(io::Printer * p)2813 void MessageGenerator::GenerateSharedConstructorCode(io::Printer* p) {
2814   if (HasSimpleBaseClass(descriptor_, options_)) return;
2815 
2816   // Generate Impl_::Imp_(visibility, Arena*);
2817   p->Emit({{"init_impl", [&] { GenerateImplMemberInit(p, InitType::kArena); }},
2818            {"zero_init", [&] { GenerateZeroInitFields(p); }}},
2819           R"cc(
2820             inline PROTOBUF_NDEBUG_INLINE $classname$::Impl_::Impl_(
2821                 $pbi$::InternalVisibility visibility,
2822                 ::$proto_ns$::Arena* arena)
2823                 //~
2824                 $init_impl$ {}
2825 
2826             inline void $classname$::SharedCtor(::_pb::Arena* arena) {
2827               new (&_impl_) Impl_(internal_visibility(), arena);
2828               $zero_init$;
2829             }
2830           )cc");
2831 }
2832 
GenerateInitDefaultSplitInstance(io::Printer * p)2833 void MessageGenerator::GenerateInitDefaultSplitInstance(io::Printer* p) {
2834   if (!ShouldSplit(descriptor_, options_)) return;
2835 
2836   auto v = p->WithVars(ClassVars(descriptor_, options_));
2837   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
2838   p->Emit("\n");
2839   for (const auto* field : optimized_order_) {
2840     if (ShouldSplit(field, options_)) {
2841       field_generators_.get(field).GenerateConstexprAggregateInitializer(p);
2842     }
2843   }
2844 }
2845 
GenerateSharedDestructorCode(io::Printer * p)2846 void MessageGenerator::GenerateSharedDestructorCode(io::Printer* p) {
2847   if (HasSimpleBaseClass(descriptor_, options_)) return;
2848   auto emit_field_dtors = [&](bool split_fields) {
2849     // Write the destructors for each field except oneof members.
2850     // optimized_order_ does not contain oneof fields.
2851     for (const auto* field : optimized_order_) {
2852       if (ShouldSplit(field, options_) != split_fields) continue;
2853       field_generators_.get(field).GenerateDestructorCode(p);
2854     }
2855   };
2856   p->Emit(
2857       {
2858           {"field_dtors", [&] { emit_field_dtors(/* split_fields= */ false); }},
2859           {"split_field_dtors",
2860            [&] {
2861              if (!ShouldSplit(descriptor_, options_)) return;
2862              p->Emit(
2863                  {
2864                      {"split_field_dtors_impl",
2865                       [&] { emit_field_dtors(/* split_fields= */ true); }},
2866                  },
2867                  R"cc(
2868                    if (PROTOBUF_PREDICT_FALSE(!this_.IsSplitMessageDefault())) {
2869                      auto* $cached_split_ptr$ = this_.$split$;
2870                      $split_field_dtors_impl$;
2871                      delete $cached_split_ptr$;
2872                    }
2873                  )cc");
2874            }},
2875           {"oneof_field_dtors",
2876            [&] {
2877              for (const auto* oneof : OneOfRange(descriptor_)) {
2878                p->Emit({{"name", oneof->name()}},
2879                        R"cc(
2880                          if (this_.has_$name$()) {
2881                            this_.clear_$name$();
2882                          }
2883                        )cc");
2884              }
2885            }},
2886           {"weak_fields_dtor",
2887            [&] {
2888              if (num_weak_fields_ == 0) return;
2889              // Generate code to destruct oneofs. Clearing should do the work.
2890              p->Emit(R"cc(
2891                this_.$weak_field_map$.ClearAll();
2892              )cc");
2893            }},
2894           {"impl_dtor", [&] { p->Emit("this_._impl_.~Impl_();\n"); }},
2895       },
2896       R"cc(
2897         inline void $classname$::SharedDtor(MessageLite& self) {
2898           $classname$& this_ = static_cast<$classname$&>(self);
2899           this_._internal_metadata_.Delete<$unknown_fields_type$>();
2900           $DCHK$(this_.GetArena() == nullptr);
2901           $WeakDescriptorSelfPin$;
2902           $field_dtors$;
2903           $split_field_dtors$;
2904           $oneof_field_dtors$;
2905           $weak_fields_dtor$;
2906           $impl_dtor$;
2907         }
2908       )cc");
2909 }
2910 
NeedsArenaDestructor() const2911 ArenaDtorNeeds MessageGenerator::NeedsArenaDestructor() const {
2912   if (HasSimpleBaseClass(descriptor_, options_)) return ArenaDtorNeeds::kNone;
2913   ArenaDtorNeeds needs = ArenaDtorNeeds::kNone;
2914   for (const auto* field : FieldRange(descriptor_)) {
2915     needs =
2916         std::max(needs, field_generators_.get(field).NeedsArenaDestructor());
2917   }
2918   return needs;
2919 }
2920 
GenerateArenaDestructorCode(io::Printer * p)2921 void MessageGenerator::GenerateArenaDestructorCode(io::Printer* p) {
2922   ABSL_CHECK(NeedsArenaDestructor() > ArenaDtorNeeds::kNone);
2923   auto emit_field_dtors = [&](bool split_fields) {
2924     // Write the destructors for each field except oneof members.
2925     // optimized_order_ does not contain oneof fields.
2926     for (const auto* field : optimized_order_) {
2927       if (ShouldSplit(field, options_) != split_fields) continue;
2928       field_generators_.get(field).GenerateArenaDestructorCode(p);
2929     }
2930   };
2931   bool needs_arena_dtor_split = false;
2932   for (const auto* field : optimized_order_) {
2933     if (!ShouldSplit(field, options_)) continue;
2934     if (field_generators_.get(field).NeedsArenaDestructor() >
2935         ArenaDtorNeeds::kNone) {
2936       needs_arena_dtor_split = true;
2937       break;
2938     }
2939   }
2940 
2941   // This code is placed inside a static method, rather than an ordinary one,
2942   // since that simplifies Arena's destructor list (ordinary function pointers
2943   // rather than member function pointers). _this is the object being
2944   // destructed.
2945   p->Emit(
2946       {
2947           {"field_dtors", [&] { emit_field_dtors(/* split_fields= */ false); }},
2948           {"split_field_dtors",
2949            [&] {
2950              if (!ShouldSplit(descriptor_, options_)) return;
2951              if (!needs_arena_dtor_split) {
2952                return;
2953              }
2954              p->Emit(
2955                  {
2956                      {"split_field_dtors_impl",
2957                       [&] { emit_field_dtors(/* split_fields= */ true); }},
2958                  },
2959                  R"cc(
2960                    if (PROTOBUF_PREDICT_FALSE(
2961                            !_this->IsSplitMessageDefault())) {
2962                      $split_field_dtors_impl$;
2963                    }
2964                  )cc");
2965            }},
2966           {"oneof_field_dtors",
2967            [&] {
2968              for (const auto* oneof : OneOfRange(descriptor_)) {
2969                for (const auto* field : FieldRange(oneof)) {
2970                  field_generators_.get(field).GenerateArenaDestructorCode(p);
2971                }
2972              }
2973            }},
2974       },
2975       R"cc(
2976         void $classname$::ArenaDtor(void* object) {
2977           $classname$* _this = reinterpret_cast<$classname$*>(object);
2978           $field_dtors$;
2979           $split_field_dtors$;
2980           $oneof_field_dtors$;
2981         }
2982       )cc");
2983 }
2984 
GenerateConstexprConstructor(io::Printer * p)2985 void MessageGenerator::GenerateConstexprConstructor(io::Printer* p) {
2986   if (!ShouldGenerateClass(descriptor_, options_)) return;
2987 
2988   auto v = p->WithVars(ClassVars(descriptor_, options_));
2989   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
2990   auto c = p->WithVars({{"constexpr", "PROTOBUF_CONSTEXPR"}});
2991   Formatter format(p);
2992 
2993   if (IsMapEntryMessage(descriptor_) || !HasImplData(descriptor_, options_)) {
2994     p->Emit({{"base",
2995               [&] {
2996                 if (IsMapEntryMessage(descriptor_)) {
2997                   p->Emit("$classname$::MapEntry");
2998                 } else {
2999                   p->Emit("$superclass$");
3000                 }
3001               }}},
3002             R"cc(
3003               //~ Templatize constexpr constructor as a workaround for a bug in
3004               //~ gcc 12 (warning in gcc 13).
3005               template <typename>
3006               $constexpr$ $classname$::$classname$(::_pbi::ConstantInitialized)
3007 #if defined(PROTOBUF_CUSTOM_VTABLE)
3008                   : $base$(_class_data_.base()){}
3009 #else   // PROTOBUF_CUSTOM_VTABLE
3010                   : $base$() {
3011               }
3012 #endif  // PROTOBUF_CUSTOM_VTABLE
3013             )cc");
3014     return;
3015   }
3016 
3017   // Generate Impl_::Imp_(::_pbi::ConstantInitialized);
3018   // We use separate p->Emit() calls for LF and #ifdefs as they result in
3019   // awkward layout and more awkward indenting of the function statement.
3020   p->Emit("\n");
3021   p->Emit({{"init", [&] { GenerateImplMemberInit(p, InitType::kConstexpr); }}},
3022           R"cc(
3023             inline constexpr $classname$::Impl_::Impl_(
3024                 ::_pbi::ConstantInitialized) noexcept
3025                 //~
3026                 $init$ {}
3027           )cc");
3028   p->Emit("\n");
3029 
3030   p->Emit(
3031       R"cc(
3032         template <typename>
3033         $constexpr$ $classname$::$classname$(::_pbi::ConstantInitialized)
3034 #if defined(PROTOBUF_CUSTOM_VTABLE)
3035             : $superclass$(_class_data_.base()),
3036 #else   // PROTOBUF_CUSTOM_VTABLE
3037             : $superclass$(),
3038 #endif  // PROTOBUF_CUSTOM_VTABLE
3039               _impl_(::_pbi::ConstantInitialized()) {
3040         }
3041       )cc");
3042 }
3043 
ImplHasCopyCtor() const3044 bool MessageGenerator::ImplHasCopyCtor() const {
3045   if (ShouldSplit(descriptor_, options_)) return false;
3046   if (HasSimpleBaseClass(descriptor_, options_)) return false;
3047   if (descriptor_->extension_range_count() > 0) return false;
3048   if (descriptor_->real_oneof_decl_count() > 0) return false;
3049   if (num_weak_fields_ > 0) return false;
3050 
3051   // If the message contains only scalar fields (ints and enums),
3052   // then we can copy the entire impl_ section with a single statement.
3053   for (const auto* field : optimized_order_) {
3054     if (field->is_repeated()) return false;
3055     if (field->is_extension()) return false;
3056     switch (field->cpp_type()) {
3057       case FieldDescriptor::CPPTYPE_ENUM:
3058       case FieldDescriptor::CPPTYPE_INT32:
3059       case FieldDescriptor::CPPTYPE_INT64:
3060       case FieldDescriptor::CPPTYPE_UINT32:
3061       case FieldDescriptor::CPPTYPE_UINT64:
3062       case FieldDescriptor::CPPTYPE_FLOAT:
3063       case FieldDescriptor::CPPTYPE_DOUBLE:
3064       case FieldDescriptor::CPPTYPE_BOOL:
3065         break;
3066       default:
3067         return false;
3068     }
3069   }
3070   return true;
3071 }
3072 
GenerateCopyInitFields(io::Printer * p) const3073 void MessageGenerator::GenerateCopyInitFields(io::Printer* p) const {
3074   auto begin = optimized_order_.begin();
3075   auto end = optimized_order_.end();
3076   const FieldDescriptor* first = nullptr;
3077 
3078   auto emit_pending_copy_fields = [&](decltype(end) itend, bool split) {
3079     if (first != nullptr) {
3080       const FieldDescriptor* last = itend[-1];
3081       if (first != last) {
3082         p->Emit({{"first", FieldName(first)},
3083                  {"last", FieldName(last)},
3084                  {"Impl", split ? "Impl_::Split" : "Impl_"},
3085                  {"pdst", split ? "_impl_._split_" : "&_impl_"},
3086                  {"psrc", split ? "from._impl_._split_" : "&from._impl_"}},
3087                 R"cc(
3088                   ::memcpy(reinterpret_cast<char *>($pdst$) +
3089                                offsetof($Impl$, $first$_),
3090                            reinterpret_cast<const char *>($psrc$) +
3091                                offsetof($Impl$, $first$_),
3092                            offsetof($Impl$, $last$_) -
3093                                offsetof($Impl$, $first$_) +
3094                                sizeof($Impl$::$last$_));
3095                 )cc");
3096       } else {
3097         p->Emit({{"field", FieldMemberName(first, split)}},
3098                 R"cc(
3099                   $field$ = from.$field$;
3100                 )cc");
3101       }
3102       first = nullptr;
3103     }
3104   };
3105 
3106   int has_bit_word_index = -1;
3107   auto load_has_bits = [&](const FieldDescriptor* field) {
3108     if (has_bit_indices_.empty()) return;
3109     int has_bit_index = has_bit_indices_[field->index()];
3110     if (has_bit_word_index != has_bit_index / 32) {
3111       p->Emit({{"declare", has_bit_word_index < 0 ? "::uint32_t " : ""},
3112                {"index", has_bit_index / 32}},
3113               "$declare$cached_has_bits = _impl_._has_bits_[$index$];\n");
3114       has_bit_word_index = has_bit_index / 32;
3115     }
3116   };
3117 
3118   auto has_message = [&](const FieldDescriptor* field) {
3119     if (has_bit_indices_.empty()) {
3120       p->Emit("from.$field$ != nullptr");
3121     } else {
3122       int index = has_bit_indices_[field->index()];
3123       std::string mask = absl::StrFormat("0x%08xu", 1u << (index % 32));
3124       p->Emit({{"mask", mask}}, "cached_has_bits & $mask$");
3125     }
3126   };
3127 
3128   auto emit_copy_message = [&](const FieldDescriptor* field) {
3129     load_has_bits(field);
3130     p->Emit({{"has_msg", [&] { has_message(field); }},
3131              {"submsg", FieldMessageTypeName(field, options_)}},
3132             R"cc(
3133               $field$ = ($has_msg$) ? $superclass$::CopyConstruct<$submsg$>(
3134                                           arena, *from.$field$)
3135                                     : nullptr;
3136             )cc");
3137   };
3138 
3139   auto generate_copy_fields = [&] {
3140     for (auto it = begin; it != end; ++it) {
3141       const auto& gen = field_generators_.get(*it);
3142       auto v = p->WithVars(FieldVars(*it, options_));
3143 
3144       // Non trivial field values are copy constructed
3145       if (!gen.has_trivial_value() || gen.should_split()) {
3146         emit_pending_copy_fields(it, false);
3147         continue;
3148       }
3149 
3150       if (gen.is_message()) {
3151         emit_pending_copy_fields(it, false);
3152         emit_copy_message(*it);
3153       } else if (first == nullptr) {
3154         first = *it;
3155       }
3156     }
3157     emit_pending_copy_fields(end, false);
3158   };
3159 
3160   auto generate_copy_split_fields = [&] {
3161     for (auto it = begin; it != end; ++it) {
3162       const auto& gen = field_generators_.get(*it);
3163       auto v = p->WithVars(FieldVars(*it, options_));
3164 
3165       if (!gen.should_split()) {
3166         emit_pending_copy_fields(it, true);
3167         continue;
3168       }
3169 
3170       if (gen.is_trivial()) {
3171         if (first == nullptr) first = *it;
3172       } else {
3173         emit_pending_copy_fields(it, true);
3174         gen.GenerateCopyConstructorCode(p);
3175       }
3176     }
3177     emit_pending_copy_fields(end, true);
3178   };
3179 
3180   auto generate_copy_oneof_fields = [&]() {
3181     for (const auto* oneof : OneOfRange(descriptor_)) {
3182       p->Emit(
3183           {{"name", oneof->name()},
3184            {"NAME", absl::AsciiStrToUpper(oneof->name())},
3185            {"cases",
3186             [&] {
3187               for (const auto* field : FieldRange(oneof)) {
3188                 p->Emit(
3189                     {{"Name", UnderscoresToCamelCase(field->name(), true)},
3190                      {"field", FieldMemberName(field, /*split=*/false)},
3191                      {"body",
3192                       [&] {
3193                         field_generators_.get(field).GenerateOneofCopyConstruct(
3194                             p);
3195                       }}},
3196                     R"cc(
3197                       case k$Name$:
3198                         $body$;
3199                         break;
3200                     )cc");
3201               }
3202             }}},
3203           R"cc(
3204             switch ($name$_case()) {
3205               case $NAME$_NOT_SET:
3206                 break;
3207                 $cases$;
3208             }
3209           )cc");
3210     }
3211   };
3212 
3213   if (descriptor_->extension_range_count() > 0) {
3214     p->Emit(R"cc(
3215       _impl_._extensions_.MergeFrom(this, from._impl_._extensions_);
3216     )cc");
3217   }
3218 
3219   p->Emit({{"copy_fields", generate_copy_fields},
3220            {"copy_oneof_fields", generate_copy_oneof_fields}},
3221           R"cc(
3222             $copy_fields$;
3223             $copy_oneof_fields$;
3224           )cc");
3225 
3226   if (ShouldSplit(descriptor_, options_)) {
3227     p->Emit({{"copy_split_fields", generate_copy_split_fields}},
3228             R"cc(
3229               if (PROTOBUF_PREDICT_FALSE(!from.IsSplitMessageDefault())) {
3230                 PrepareSplitMessageForWrite();
3231                 $copy_split_fields$;
3232               }
3233             )cc");
3234   }
3235 }
3236 
GenerateArenaEnabledCopyConstructor(io::Printer * p)3237 void MessageGenerator::GenerateArenaEnabledCopyConstructor(io::Printer* p) {
3238   if (!HasSimpleBaseClass(descriptor_, options_)) {
3239     // Generate Impl_::Imp_(visibility, Arena*, const& from);
3240     p->Emit(
3241         {{"init", [&] { GenerateImplMemberInit(p, InitType::kArenaCopy); }}},
3242         R"cc(
3243           inline PROTOBUF_NDEBUG_INLINE $classname$::Impl_::Impl_(
3244               $pbi$::InternalVisibility visibility, ::$proto_ns$::Arena* arena,
3245               const Impl_& from, const $classtype$& from_msg)
3246               //~
3247               $init$ {}
3248         )cc");
3249     p->Emit("\n");
3250   }
3251 
3252   auto copy_construct_impl = [&] {
3253     if (!HasSimpleBaseClass(descriptor_, options_)) {
3254       p->Emit(R"cc(
3255         new (&_impl_) Impl_(internal_visibility(), arena, from._impl_, from);
3256       )cc");
3257     }
3258   };
3259 
3260   auto force_allocation = [&] {
3261     if (ShouldForceAllocationOnConstruction(descriptor_, options_)) {
3262       p->Emit(R"cc(
3263         if ($pbi$::DebugHardenForceAllocationOnConstruction()) {
3264           $mutable_unknown_fields$;
3265         }
3266       )cc");
3267     }
3268   };
3269 
3270   auto maybe_register_arena_dtor = [&] {
3271     switch (NeedsArenaDestructor()) {
3272       case ArenaDtorNeeds::kRequired: {
3273         p->Emit(R"cc(
3274           if (arena != nullptr) {
3275             arena->OwnCustomDestructor(this, &$classname$::ArenaDtor);
3276           }
3277         )cc");
3278         break;
3279       }
3280       case ArenaDtorNeeds::kOnDemand: {
3281         p->Emit(R"cc(
3282           ::_pbi::InternalRegisterArenaDtor(arena, this,
3283                                             &$classname$::ArenaDtor);
3284         )cc");
3285         break;
3286       }
3287       case ArenaDtorNeeds::kNone:
3288         break;
3289     }
3290   };
3291 
3292   p->Emit({{"copy_construct_impl", copy_construct_impl},
3293            {"copy_init_fields", [&] { GenerateCopyInitFields(p); }},
3294            {"force_allocation", force_allocation},
3295            {"maybe_register_arena_dtor", maybe_register_arena_dtor}},
3296           R"cc(
3297             $classname$::$classname$(
3298                 //~ force alignment
3299                 ::$proto_ns$::Arena* arena,
3300                 //~ force alignment
3301                 const $classname$& from)
3302 #if defined(PROTOBUF_CUSTOM_VTABLE)
3303                 : $superclass$(arena, _class_data_.base()) {
3304 #else   // PROTOBUF_CUSTOM_VTABLE
3305                 : $superclass$(arena) {
3306 #endif  // PROTOBUF_CUSTOM_VTABLE
3307               $classname$* const _this = this;
3308               (void)_this;
3309               _internal_metadata_.MergeFrom<$unknown_fields_type$>(
3310                   from._internal_metadata_);
3311               $copy_construct_impl$;
3312               $copy_init_fields$;
3313               $force_allocation$;
3314               $maybe_register_arena_dtor$;
3315 
3316               // @@protoc_insertion_point(copy_constructor:$full_name$)
3317             }
3318           )cc");
3319 }
3320 
GenerateStructors(io::Printer * p)3321 void MessageGenerator::GenerateStructors(io::Printer* p) {
3322   p->Emit(
3323       {
3324           {"superclass", SuperClassName(descriptor_, options_)},
3325           {"ctor_body",
3326            [&] {
3327              if (HasSimpleBaseClass(descriptor_, options_)) return;
3328              p->Emit(R"cc(SharedCtor(arena);)cc");
3329              switch (NeedsArenaDestructor()) {
3330                case ArenaDtorNeeds::kRequired: {
3331                  p->Emit(R"cc(
3332                    if (arena != nullptr) {
3333                      arena->OwnCustomDestructor(this, &$classname$::ArenaDtor);
3334                    }
3335                  )cc");
3336                  break;
3337                }
3338                case ArenaDtorNeeds::kOnDemand: {
3339                  p->Emit(R"cc(
3340                    ::_pbi::InternalRegisterArenaDtor(arena, this,
3341                                                      &$classname$::ArenaDtor);
3342                  )cc");
3343                  break;
3344                }
3345                case ArenaDtorNeeds::kNone:
3346                  break;
3347              }
3348            }},
3349       },
3350       R"cc(
3351         $classname$::$classname$(::$proto_ns$::Arena* arena)
3352 #if defined(PROTOBUF_CUSTOM_VTABLE)
3353             : $superclass$(arena, _class_data_.base()) {
3354 #else   // PROTOBUF_CUSTOM_VTABLE
3355             : $superclass$(arena) {
3356 #endif  // PROTOBUF_CUSTOM_VTABLE
3357           $ctor_body$;
3358           // @@protoc_insertion_point(arena_constructor:$full_name$)
3359         }
3360       )cc");
3361 
3362   // Generate the copy constructor.
3363   if (UsingImplicitWeakFields(descriptor_->file(), options_)) {
3364     // If we are in lite mode and using implicit weak fields, we generate a
3365     // one-liner copy constructor that delegates to MergeFrom. This saves some
3366     // code size and also cuts down on the complexity of implicit weak fields.
3367     // We might eventually want to do this for all lite protos.
3368     p->Emit(R"cc(
3369       $classname$::$classname$(
3370           //~ Force alignment
3371           ::$proto_ns$::Arena* arena, const $classname$& from)
3372           : $classname$(arena) {
3373         MergeFrom(from);
3374       }
3375     )cc");
3376   } else if (ImplHasCopyCtor()) {
3377     p->Emit(R"cc(
3378       $classname$::$classname$(
3379           //~ Force alignment
3380           ::$proto_ns$::Arena* arena, const $classname$& from)
3381           : $classname$(arena) {
3382         MergeFrom(from);
3383       }
3384     )cc");
3385   } else {
3386     GenerateArenaEnabledCopyConstructor(p);
3387   }
3388 
3389   // Generate the shared constructor code.
3390   GenerateSharedConstructorCode(p);
3391 
3392   // Generate the destructor.
3393   if (HasSimpleBaseClass(descriptor_, options_)) {
3394     // For messages using simple base classes, having no destructor
3395     // allows our vtable to share the same destructor as every other
3396     // message with a simple base class.  This works only as long as
3397     // we have no fields needing destruction, of course.  (No strings
3398     // or extensions)
3399   } else {
3400     p->Emit(
3401         R"cc(
3402           $classname$::~$classname$() {
3403             // @@protoc_insertion_point(destructor:$full_name$)
3404             SharedDtor(*this);
3405           }
3406         )cc");
3407   }
3408 
3409   // Generate the shared destructor code.
3410   GenerateSharedDestructorCode(p);
3411 
3412   // Generate the arena-specific destructor code.
3413   if (NeedsArenaDestructor() > ArenaDtorNeeds::kNone) {
3414     GenerateArenaDestructorCode(p);
3415   }
3416 }
3417 
GenerateSourceInProto2Namespace(io::Printer * p)3418 void MessageGenerator::GenerateSourceInProto2Namespace(io::Printer* p) {
3419   auto v = p->WithVars(ClassVars(descriptor_, options_));
3420   auto t = p->WithVars(MakeTrackerCalls(descriptor_, options_));
3421   Formatter format(p);
3422   if (ShouldGenerateExternSpecializations(options_) &&
3423       ShouldGenerateClass(descriptor_, options_)) {
3424     p->Emit(R"cc(
3425       template void* Arena::DefaultConstruct<$classtype$>(Arena*);
3426     )cc");
3427     if (!IsMapEntryMessage(descriptor_)) {
3428       p->Emit(R"cc(
3429         template void* Arena::CopyConstruct<$classtype$>(Arena*, const void*);
3430       )cc");
3431     }
3432   }
3433 }
3434 
GenerateClear(io::Printer * p)3435 void MessageGenerator::GenerateClear(io::Printer* p) {
3436   if (HasSimpleBaseClass(descriptor_, options_)) return;
3437   Formatter format(p);
3438 
3439   // The maximum number of bytes we will memset to zero without checking their
3440   // hasbit to see if a zero-init is necessary.
3441   const int kMaxUnconditionalPrimitiveBytesClear = 4;
3442 
3443   format(
3444       "PROTOBUF_NOINLINE void $classname$::Clear() {\n"
3445       "// @@protoc_insertion_point(message_clear_start:$full_name$)\n");
3446   format.Indent();
3447 
3448   format("$pbi$::TSanWrite(&_impl_);\n");
3449 
3450   format(
3451       // TODO: It would be better to avoid emitting this if it is not used,
3452       // rather than emitting a workaround for the resulting warning.
3453       "$uint32$ cached_has_bits = 0;\n"
3454       "// Prevent compiler warnings about cached_has_bits being unused\n"
3455       "(void) cached_has_bits;\n\n");
3456 
3457   if (descriptor_->extension_range_count() > 0) {
3458     format("$extensions$.Clear();\n");
3459   }
3460 
3461   // Collect fields into chunks. Each chunk may have an if() condition that
3462   // checks all hasbits in the chunk and skips it if none are set.
3463   int zero_init_bytes = 0;
3464   for (const auto& field : optimized_order_) {
3465     if (CanClearByZeroing(field)) {
3466       zero_init_bytes += EstimateAlignmentSize(field);
3467     }
3468   }
3469   bool merge_zero_init = zero_init_bytes > kMaxUnconditionalPrimitiveBytesClear;
3470   int chunk_count = 0;
3471 
3472   std::vector<FieldChunk> chunks = CollectFields(
3473       optimized_order_, options_,
3474       [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
3475         chunk_count++;
3476         // This predicate guarantees that there is only a single zero-init
3477         // (memset) per chunk, and if present it will be at the beginning.
3478         bool same =
3479             HasByteIndex(a) == HasByteIndex(b) &&
3480             a->is_repeated() == b->is_repeated() &&
3481             IsLikelyPresent(a, options_) == IsLikelyPresent(b, options_) &&
3482             ShouldSplit(a, options_) == ShouldSplit(b, options_) &&
3483             (CanClearByZeroing(a) == CanClearByZeroing(b) ||
3484              (CanClearByZeroing(a) && (chunk_count == 1 || merge_zero_init)));
3485         if (!same) chunk_count = 0;
3486         return same;
3487       });
3488 
3489   auto it = chunks.begin();
3490   auto end = chunks.end();
3491   int cached_has_word_index = -1;
3492   while (it != end) {
3493     auto next = FindNextUnequalChunk(it, end, MayGroupChunksForHaswordsCheck);
3494     bool has_haswords_check = MaybeEmitHaswordsCheck(
3495         it, next, options_, has_bit_indices_, cached_has_word_index, "", p);
3496     bool has_default_split_check = !it->fields.empty() && it->should_split;
3497     if (has_default_split_check) {
3498       // Some fields are cleared without checking has_bit. So we add the
3499       // condition here to avoid writing to the default split instance.
3500       format("if (!IsSplitMessageDefault()) {\n");
3501       format.Indent();
3502     }
3503     while (it != next) {
3504       const std::vector<const FieldDescriptor*>& fields = it->fields;
3505       bool chunk_is_split = it->should_split;
3506       ABSL_CHECK_EQ(has_default_split_check, chunk_is_split);
3507 
3508       const FieldDescriptor* memset_start = nullptr;
3509       const FieldDescriptor* memset_end = nullptr;
3510       bool saw_non_zero_init = false;
3511 
3512       for (const auto& field : fields) {
3513         if (CanClearByZeroing(field)) {
3514           ABSL_CHECK(!saw_non_zero_init);
3515           if (!memset_start) memset_start = field;
3516           memset_end = field;
3517         } else {
3518           saw_non_zero_init = true;
3519         }
3520       }
3521 
3522       // Whether we wrap this chunk in:
3523       //   if (cached_has_bits & <chunk hasbits) { /* chunk. */ }
3524       // We can omit the if() for chunk size 1, or if our fields do not have
3525       // hasbits. I don't understand the rationale for the last part of the
3526       // condition, but it matches the old logic.
3527       const bool check_has_byte =
3528           HasBitIndex(fields.front()) != kNoHasbit && fields.size() > 1 &&
3529           !IsLikelyPresent(fields.back(), options_) &&
3530           (memset_end != fields.back() || merge_zero_init);
3531 
3532       if (check_has_byte) {
3533         // Emit an if() that will let us skip the whole chunk if none are set.
3534         uint32_t chunk_mask = GenChunkMask(fields, has_bit_indices_);
3535         std::string chunk_mask_str =
3536             absl::StrCat(absl::Hex(chunk_mask, absl::kZeroPad8));
3537 
3538         // Check (up to) 8 has_bits at a time if we have more than one field in
3539         // this chunk.  Due to field layout ordering, we may check
3540         // _has_bits_[last_chunk * 8 / 32] multiple times.
3541         ABSL_DCHECK_LE(2, popcnt(chunk_mask));
3542         ABSL_DCHECK_GE(8, popcnt(chunk_mask));
3543 
3544         if (cached_has_word_index != HasWordIndex(fields.front())) {
3545           cached_has_word_index = HasWordIndex(fields.front());
3546           format("cached_has_bits = $has_bits$[$1$];\n", cached_has_word_index);
3547         }
3548         format("if (cached_has_bits & 0x$1$u) {\n", chunk_mask_str);
3549         format.Indent();
3550       }
3551 
3552       if (memset_start) {
3553         if (memset_start == memset_end) {
3554           // For clarity, do not memset a single field.
3555           field_generators_.get(memset_start).GenerateMessageClearingCode(p);
3556         } else {
3557           ABSL_CHECK_EQ(chunk_is_split, ShouldSplit(memset_start, options_));
3558           ABSL_CHECK_EQ(chunk_is_split, ShouldSplit(memset_end, options_));
3559           format(
3560               "::memset(&$1$, 0, static_cast<::size_t>(\n"
3561               "    reinterpret_cast<char*>(&$2$) -\n"
3562               "    reinterpret_cast<char*>(&$1$)) + sizeof($2$));\n",
3563               FieldMemberName(memset_start, chunk_is_split),
3564               FieldMemberName(memset_end, chunk_is_split));
3565         }
3566       }
3567 
3568       // Clear all non-zero-initializable fields in the chunk.
3569       for (const auto& field : fields) {
3570         if (CanClearByZeroing(field)) continue;
3571         // It's faster to just overwrite primitive types, but we should only
3572         // clear strings and messages if they were set.
3573         //
3574         // TODO:  Let the CppFieldGenerator decide this somehow.
3575         bool have_enclosing_if =
3576             HasBitIndex(field) != kNoHasbit &&
3577             (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
3578              field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
3579 
3580         if (have_enclosing_if) {
3581           PrintPresenceCheck(field, has_bit_indices_, p,
3582                              &cached_has_word_index);
3583           format.Indent();
3584         }
3585 
3586         field_generators_.get(field).GenerateMessageClearingCode(p);
3587 
3588         if (have_enclosing_if) {
3589           format.Outdent();
3590           format("}\n");
3591         }
3592       }
3593 
3594       if (check_has_byte) {
3595         format.Outdent();
3596         format("}\n");
3597       }
3598 
3599       // To next chunk.
3600       ++it;
3601     }
3602 
3603     if (has_default_split_check) {
3604       format.Outdent();
3605       format("}\n");
3606     }
3607     if (has_haswords_check) {
3608       p->Outdent();
3609       p->Emit(R"cc(
3610         }
3611       )cc");
3612 
3613       // Reset here as it may have been updated in just closed if statement.
3614       cached_has_word_index = -1;
3615     }
3616   }
3617   // Step 4: Unions.
3618   for (auto oneof : OneOfRange(descriptor_)) {
3619     format("clear_$1$();\n", oneof->name());
3620   }
3621 
3622   if (num_weak_fields_) {
3623     format("$weak_field_map$.ClearAll();\n");
3624   }
3625 
3626   // We don't clear donated status.
3627 
3628   if (!has_bit_indices_.empty()) {
3629     // Step 5: Everything else.
3630     format("$has_bits$.Clear();\n");
3631   }
3632 
3633   format("_internal_metadata_.Clear<$unknown_fields_type$>();\n");
3634 
3635   format.Outdent();
3636   format("}\n");
3637 }
3638 
GenerateOneofClear(io::Printer * p)3639 void MessageGenerator::GenerateOneofClear(io::Printer* p) {
3640   // Generated function clears the active field and union case (e.g. foo_case_).
3641   int i = 0;
3642   for (auto oneof : OneOfRange(descriptor_)) {
3643     Formatter format(p);
3644     auto v = p->WithVars({{"oneofname", oneof->name()}});
3645 
3646     format(
3647         "void $classname$::clear_$oneofname$() {\n"
3648         "// @@protoc_insertion_point(one_of_clear_start:$full_name$)\n");
3649     format.Indent();
3650     format("$pbi$::TSanWrite(&_impl_);\n");
3651     format("switch ($oneofname$_case()) {\n");
3652     format.Indent();
3653     for (auto field : FieldRange(oneof)) {
3654       format("case k$1$: {\n", UnderscoresToCamelCase(field->name(), true));
3655       format.Indent();
3656       // We clear only allocated objects in oneofs
3657       if (!IsStringOrMessage(field)) {
3658         format("// No need to clear\n");
3659       } else {
3660         field_generators_.get(field).GenerateClearingCode(p);
3661       }
3662       format("break;\n");
3663       format.Outdent();
3664       format("}\n");
3665     }
3666     format(
3667         "case $1$_NOT_SET: {\n"
3668         "  break;\n"
3669         "}\n",
3670         absl::AsciiStrToUpper(oneof->name()));
3671     format.Outdent();
3672     format(
3673         "}\n"
3674         "$oneof_case$[$1$] = $2$_NOT_SET;\n",
3675         i, absl::AsciiStrToUpper(oneof->name()));
3676     format.Outdent();
3677     format(
3678         "}\n"
3679         "\n");
3680     ++i;
3681   }
3682 }
3683 
GenerateSwap(io::Printer * p)3684 void MessageGenerator::GenerateSwap(io::Printer* p) {
3685   if (HasSimpleBaseClass(descriptor_, options_)) return;
3686   Formatter format(p);
3687 
3688   format(
3689       "void $classname$::InternalSwap($classname$* PROTOBUF_RESTRICT other) "
3690       "{\n");
3691   format.Indent();
3692   format("using std::swap;\n");
3693   format("$WeakDescriptorSelfPin$");
3694 
3695   if (HasGeneratedMethods(descriptor_->file(), options_)) {
3696     if (descriptor_->extension_range_count() > 0) {
3697       format(
3698           "$extensions$.InternalSwap(&other->$extensions$);"
3699           "\n");
3700     }
3701 
3702     if (HasNonSplitOptionalString(descriptor_, options_)) {
3703       p->Emit(R"cc(
3704         auto* arena = GetArena();
3705         ABSL_DCHECK_EQ(arena, other->GetArena());
3706       )cc");
3707     }
3708     format("_internal_metadata_.InternalSwap(&other->_internal_metadata_);\n");
3709 
3710     if (!has_bit_indices_.empty()) {
3711       for (size_t i = 0; i < HasBitsSize(); ++i) {
3712         format("swap($has_bits$[$1$], other->$has_bits$[$1$]);\n", i);
3713       }
3714     }
3715 
3716     // If possible, we swap several fields at once, including padding.
3717     const RunMap runs =
3718         FindRuns(optimized_order_, [this](const FieldDescriptor* field) {
3719           return !ShouldSplit(field, options_) &&
3720                  HasTrivialSwap(field, options_, scc_analyzer_);
3721         });
3722 
3723     for (size_t i = 0; i < optimized_order_.size(); ++i) {
3724       const FieldDescriptor* field = optimized_order_[i];
3725       if (ShouldSplit(field, options_)) {
3726         continue;
3727       }
3728       const auto it = runs.find(field);
3729 
3730       // We only apply the memswap technique to runs of more than one field, as
3731       // `swap(field_, other.field_)` is better than
3732       // `memswap<...>(&field_, &other.field_)` for generated code readability.
3733       if (it != runs.end() && it->second > 1) {
3734         // Use a memswap, then skip run_length fields.
3735         const size_t run_length = it->second;
3736         const std::string first_field_name =
3737             FieldMemberName(field, /*split=*/false);
3738         const std::string last_field_name = FieldMemberName(
3739             optimized_order_[i + run_length - 1], /*split=*/false);
3740 
3741         auto v = p->WithVars({
3742             {"first", first_field_name},
3743             {"last", last_field_name},
3744         });
3745 
3746         format(
3747             "$pbi$::memswap<\n"
3748             "    PROTOBUF_FIELD_OFFSET($classname$, $last$)\n"
3749             "    + sizeof($classname$::$last$)\n"
3750             "    - PROTOBUF_FIELD_OFFSET($classname$, $first$)>(\n"
3751             "        reinterpret_cast<char*>(&$first$),\n"
3752             "        reinterpret_cast<char*>(&other->$first$));\n");
3753 
3754         i += run_length - 1;
3755         // ++i at the top of the loop.
3756       } else {
3757         field_generators_.get(field).GenerateSwappingCode(p);
3758       }
3759     }
3760     if (ShouldSplit(descriptor_, options_)) {
3761       format("swap($split$, other->$split$);\n");
3762     }
3763 
3764     for (auto oneof : OneOfRange(descriptor_)) {
3765       format("swap(_impl_.$1$_, other->_impl_.$1$_);\n", oneof->name());
3766     }
3767 
3768     for (int i = 0; i < descriptor_->real_oneof_decl_count(); ++i) {
3769       format("swap($oneof_case$[$1$], other->$oneof_case$[$1$]);\n", i);
3770     }
3771 
3772     if (num_weak_fields_) {
3773       format(
3774           "$weak_field_map$.UnsafeArenaSwap(&other->$weak_field_map$)"
3775           ";\n");
3776     }
3777 
3778     if (!inlined_string_indices_.empty()) {
3779       for (size_t i = 0; i < InlinedStringDonatedSize(); ++i) {
3780         format(
3781             "swap($inlined_string_donated_array$[$1$], "
3782             "other->$inlined_string_donated_array$[$1$]);\n",
3783             i);
3784       }
3785     }
3786   } else {
3787     format("GetReflection()->Swap(this, other);");
3788   }
3789 
3790   format.Outdent();
3791   format("}\n");
3792 }
3793 
GetNewOp(io::Printer * arena_emitter) const3794 MessageGenerator::NewOpRequirements MessageGenerator::GetNewOp(
3795     io::Printer* arena_emitter) const {
3796   size_t arena_seeding_count = 0;
3797   NewOpRequirements op;
3798   if (IsBootstrapProto(options_, descriptor_->file())) {
3799     // To simplify bootstrapping we always use a function for these types.
3800     // It makes it easier to change the ABI of the `MessageCreator` class.
3801     op.needs_to_run_constructor = true;
3802     return op;
3803   }
3804 
3805   if (NeedsArenaDestructor() == ArenaDtorNeeds::kRequired) {
3806     // We can't skip the ArenaDtor for these messages.
3807     op.needs_to_run_constructor = true;
3808   }
3809 
3810   if (descriptor_->extension_range_count() > 0) {
3811     op.needs_arena_seeding = true;
3812     ++arena_seeding_count;
3813     if (arena_emitter) {
3814       arena_emitter->Emit(R"cc(
3815         PROTOBUF_FIELD_OFFSET($classname$, $extensions$) +
3816             decltype($classname$::$extensions$)::InternalGetArenaOffset(
3817                 $superclass$::internal_visibility()),
3818       )cc");
3819     }
3820   }
3821 
3822   if (num_weak_fields_ != 0) {
3823     op.needs_to_run_constructor = true;
3824   }
3825 
3826   for (const FieldDescriptor* field : FieldRange(descriptor_)) {
3827     const auto print_arena_offset = [&](absl::string_view suffix = "") {
3828       ++arena_seeding_count;
3829       if (arena_emitter) {
3830         arena_emitter->Emit(
3831             {{"field", FieldMemberName(field, false)}, {"suffix", suffix}},
3832             R"cc(
3833               PROTOBUF_FIELD_OFFSET($classname$, $field$) +
3834                   decltype($classname$::$field$)::
3835                       InternalGetArenaOffset$suffix$(
3836                           $superclass$::internal_visibility()),
3837             )cc");
3838       }
3839     };
3840     if (ShouldSplit(field, options_)) {
3841       op.needs_memcpy = true;
3842     } else if (field->real_containing_oneof() != nullptr) {
3843       /* nothing to do */
3844     } else if (field->is_map()) {
3845       op.needs_arena_seeding = true;
3846       // MapField contains an internal vtable pointer we need to copy.
3847       op.needs_memcpy = true;
3848       print_arena_offset();
3849       // Non-lite maps currently have more than one arena pointer in them. Print
3850       // both.
3851       if (HasDescriptorMethods(descriptor_->file(), options_)) {
3852         print_arena_offset("Alt");
3853       }
3854     } else if (field->is_repeated()) {
3855       op.needs_arena_seeding = true;
3856       print_arena_offset();
3857     } else {
3858       const auto& generator = field_generators_.get(field);
3859       if (generator.has_trivial_zero_default()) {
3860         /* nothing to do */
3861       } else {
3862         switch (field->cpp_type()) {
3863           case FieldDescriptor::CPPTYPE_INT32:
3864           case FieldDescriptor::CPPTYPE_INT64:
3865           case FieldDescriptor::CPPTYPE_UINT32:
3866           case FieldDescriptor::CPPTYPE_UINT64:
3867           case FieldDescriptor::CPPTYPE_DOUBLE:
3868           case FieldDescriptor::CPPTYPE_FLOAT:
3869           case FieldDescriptor::CPPTYPE_BOOL:
3870           case FieldDescriptor::CPPTYPE_ENUM:
3871             op.needs_memcpy = true;
3872             break;
3873 
3874           case FieldDescriptor::CPPTYPE_STRING:
3875             switch (internal::cpp::EffectiveStringCType(field)) {
3876               case FieldOptions::STRING_PIECE:
3877                 op.needs_arena_seeding = true;
3878                 print_arena_offset();
3879                 break;
3880               case FieldOptions::CORD:
3881                 // Cord fields are currently rejected above because of ArenaDtor
3882                 // requirements.
3883                 ABSL_CHECK(op.needs_to_run_constructor);
3884                 break;
3885               case FieldOptions::STRING:
3886                 op.needs_memcpy = true;
3887                 break;
3888               default:
3889                 ABSL_LOG(FATAL);
3890             }
3891             break;
3892           case FieldDescriptor::CPPTYPE_MESSAGE:
3893             ABSL_LOG(FATAL) << "Message should be zero initializable.";
3894             break;
3895         }
3896       }
3897     }
3898   }
3899 
3900   // If we are going to generate too many arena seeding offsets, we can skip the
3901   // attempt because we know it will fail at compile time and fallback to
3902   // placement new. The arena seeding code can handle up to an offset of
3903   // `63 * sizeof(Arena*)`.
3904   // This prevents generating huge lists that have to be run during constant
3905   // evaluation to just fail anyway. The actual upper bound is smaller than
3906   // this, but any reasonable value is enough to prevent long compile times for
3907   // big messages.
3908   if (arena_seeding_count >= 64) {
3909     op.needs_to_run_constructor = true;
3910   }
3911 
3912   return op;
3913 }
3914 
GenerateClassData(io::Printer * p)3915 void MessageGenerator::GenerateClassData(io::Printer* p) {
3916   const auto new_op = GetNewOp(nullptr);
3917   // Always generate PlacementNew_ because we might need it for different
3918   // reasons. EnableCustomNewFor<T> might be false in this compiler, or the
3919   // object might be too large for arena seeding.
3920   // We mark `inline` to avoid library bloat if the function is unused.
3921   p->Emit(R"cc(
3922     inline void* $classname$::PlacementNew_(const void*, void* mem,
3923                                             ::$proto_ns$::Arena* arena) {
3924       return ::new (mem) $classname$(arena);
3925     }
3926   )cc");
3927   if (new_op.needs_to_run_constructor) {
3928     p->Emit(R"cc(
3929       constexpr auto $classname$::InternalNewImpl_() {
3930         return $pbi$::MessageCreator(&$classname$::PlacementNew_,
3931                                      sizeof($classname$), alignof($classname$));
3932       }
3933     )cc");
3934   } else if (new_op.needs_arena_seeding) {
3935     p->Emit({{"copy_type", new_op.needs_memcpy ? "CopyInit" : "ZeroInit"},
3936              {"arena_offsets", [&] { GetNewOp(p); }}},
3937             R"cc(
3938               constexpr auto $classname$::InternalNewImpl_() {
3939                 constexpr auto arena_bits = $pbi$::EncodePlacementArenaOffsets({
3940                     $arena_offsets$,
3941                 });
3942                 if (arena_bits.has_value()) {
3943                   return $pbi$::MessageCreator::$copy_type$(
3944                       sizeof($classname$), alignof($classname$), *arena_bits);
3945                 } else {
3946                   return $pbi$::MessageCreator(&$classname$::PlacementNew_,
3947                                                sizeof($classname$),
3948                                                alignof($classname$));
3949                 }
3950               }
3951             )cc");
3952   } else {
3953     p->Emit({{"copy_type", new_op.needs_memcpy ? "CopyInit" : "ZeroInit"},
3954              {"arena_offsets", [&] { GetNewOp(p); }}},
3955             R"cc(
3956               constexpr auto $classname$::InternalNewImpl_() {
3957                 return $pbi$::MessageCreator::$copy_type$(sizeof($classname$),
3958                                                           alignof($classname$));
3959               }
3960             )cc");
3961   }
3962 
3963   auto vars = p->WithVars(
3964       {{"default_instance",
3965         absl::StrCat("&", DefaultInstanceName(descriptor_, options_),
3966                      "._instance")}});
3967   const auto on_demand_register_arena_dtor = [&] {
3968     if (NeedsArenaDestructor() == ArenaDtorNeeds::kOnDemand) {
3969       p->Emit(R"cc(
3970         $classname$::OnDemandRegisterArenaDtor,
3971       )cc");
3972     } else {
3973       p->Emit(R"cc(
3974         nullptr,  // OnDemandRegisterArenaDtor
3975       )cc");
3976     }
3977   };
3978   const auto is_initialized = [&] {
3979     if (NeedsIsInitialized()) {
3980       p->Emit(R"cc(
3981         $classname$::IsInitializedImpl,
3982       )cc");
3983     } else {
3984       p->Emit(R"cc(
3985         nullptr,  // IsInitialized
3986       )cc");
3987     }
3988   };
3989   const auto custom_vtable_methods = [&] {
3990     if (HasGeneratedMethods(descriptor_->file(), options_) &&
3991         !IsMapEntryMessage(descriptor_)) {
3992       p->Emit(R"cc(
3993         $superclass$::GetClearImpl<$classname$>(), &$classname$::ByteSizeLong,
3994             &$classname$::_InternalSerialize,
3995       )cc");
3996     } else {
3997       p->Emit(R"cc(
3998         static_cast<void (::$proto_ns$::MessageLite::*)()>(
3999             &$classname$::ClearImpl),
4000             $superclass$::ByteSizeLongImpl, $superclass$::_InternalSerializeImpl
4001             ,
4002       )cc");
4003     }
4004   };
4005 
4006   if (HasDescriptorMethods(descriptor_->file(), options_)) {
4007     const auto pin_weak_descriptor = [&] {
4008       if (!UsingImplicitWeakDescriptor(descriptor_->file(), options_)) return;
4009 
4010       p->Emit({{"pin", StrongReferenceToType(descriptor_, options_)}},
4011               R"cc(
4012                 $pin$;
4013               )cc");
4014 
4015       // For CODE_SIZE types, we need to pin the submessages too.
4016       // SPEED types will pin them via the TcParse table automatically.
4017       if (HasGeneratedMethods(descriptor_->file(), options_)) return;
4018       for (int i = 0; i < descriptor_->field_count(); ++i) {
4019         auto* field = descriptor_->field(i);
4020         if (field->type() != field->TYPE_MESSAGE) continue;
4021         p->Emit(
4022             {{"pin", StrongReferenceToType(field->message_type(), options_)}},
4023             R"cc(
4024               $pin$;
4025             )cc");
4026       }
4027     };
4028     p->Emit(
4029         {
4030             {"on_demand_register_arena_dtor", on_demand_register_arena_dtor},
4031             {"is_initialized", is_initialized},
4032             {"pin_weak_descriptor", pin_weak_descriptor},
4033             {"custom_vtable_methods", custom_vtable_methods},
4034             {"v2_msg_table",
4035              [&] {
4036              }},
4037             {"tracker_on_get_metadata",
4038              [&] {
4039                if (HasTracker(descriptor_, options_)) {
4040                  p->Emit(R"cc(
4041                    &Impl_::TrackerOnGetMetadata,
4042                  )cc");
4043                } else {
4044                  p->Emit(R"cc(
4045                    nullptr,  // tracker
4046                  )cc");
4047                }
4048              }},
4049         },
4050         R"cc(
4051           PROTOBUF_CONSTINIT
4052           PROTOBUF_ATTRIBUTE_INIT_PRIORITY1
4053           const $pbi$::ClassDataFull $classname$::_class_data_ = {
4054               $pbi$::ClassData{
4055                   $default_instance$,
4056                   &_table_.header,
4057                   $on_demand_register_arena_dtor$,
4058                   $is_initialized$,
4059                   &$classname$::MergeImpl,
4060                   $superclass$::GetNewImpl<$classname$>(),
4061 #if defined(PROTOBUF_CUSTOM_VTABLE)
4062                   &$classname$::SharedDtor,
4063                   $custom_vtable_methods$,
4064 #endif  // PROTOBUF_CUSTOM_VTABLE
4065                   PROTOBUF_FIELD_OFFSET($classname$, $cached_size$),
4066                   false,
4067                   $v2_msg_table$,
4068               },
4069               &$classname$::kDescriptorMethods,
4070               &$desc_table$,
4071               $tracker_on_get_metadata$,
4072           };
4073           const $pbi$::ClassData* $classname$::GetClassData() const {
4074             $pin_weak_descriptor$;
4075             $pbi$::PrefetchToLocalCache(&_class_data_);
4076             $pbi$::PrefetchToLocalCache(_class_data_.tc_table);
4077             return _class_data_.base();
4078           }
4079         )cc");
4080   } else {
4081     p->Emit(
4082         {
4083             {"type_size", descriptor_->full_name().size() + 1},
4084             {"on_demand_register_arena_dtor", on_demand_register_arena_dtor},
4085             {"is_initialized", is_initialized},
4086             {"custom_vtable_methods", custom_vtable_methods},
4087             {"v2_msg_table",
4088              [&] {
4089              }},
4090         },
4091         R"cc(
4092           PROTOBUF_CONSTINIT
4093           PROTOBUF_ATTRIBUTE_INIT_PRIORITY1
4094           const $pbi$::ClassDataLite<$type_size$> $classname$::_class_data_ = {
4095               {
4096                   $default_instance$,
4097                   &_table_.header,
4098                   $on_demand_register_arena_dtor$,
4099                   $is_initialized$,
4100                   &$classname$::MergeImpl,
4101                   $superclass$::GetNewImpl<$classname$>(),
4102 #if defined(PROTOBUF_CUSTOM_VTABLE)
4103                   &$classname$::SharedDtor,
4104                   $custom_vtable_methods$,
4105 #endif  // PROTOBUF_CUSTOM_VTABLE
4106                   PROTOBUF_FIELD_OFFSET($classname$, $cached_size$),
4107                   true,
4108                   $v2_msg_table$,
4109               },
4110               "$full_name$",
4111           };
4112           const $pbi$::ClassData* $classname$::GetClassData() const {
4113             return _class_data_.base();
4114           }
4115         )cc");
4116   }
4117 }
4118 
RequiresArena(GeneratorFunction function) const4119 bool MessageGenerator::RequiresArena(GeneratorFunction function) const {
4120   for (const FieldDescriptor* field : FieldRange(descriptor_)) {
4121     if (field_generators_.get(field).RequiresArena(function)) {
4122       return true;
4123     }
4124   }
4125   return false;
4126 }
4127 
GenerateClassSpecificMergeImpl(io::Printer * p)4128 void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) {
4129   if (HasSimpleBaseClass(descriptor_, options_)) return;
4130   // Generate the class-specific MergeFrom, which avoids the ABSL_CHECK and
4131   // cast.
4132   Formatter format(p);
4133   format(
4134       "void $classname$::MergeImpl(::$proto_ns$::MessageLite& to_msg, const "
4135       "::$proto_ns$::MessageLite& from_msg) {\n"
4136       "$WeakDescriptorSelfPin$"
4137       "  auto* const _this = static_cast<$classname$*>(&to_msg);\n"
4138       "  auto& from = static_cast<const $classname$&>(from_msg);\n");
4139   format.Indent();
4140   if (RequiresArena(GeneratorFunction::kMergeFrom)) {
4141     p->Emit(R"cc(
4142       ::$proto_ns$::Arena* arena = _this->GetArena();
4143     )cc");
4144   }
4145   format(
4146       "$annotate_mergefrom$"
4147       "// @@protoc_insertion_point(class_specific_merge_from_start:"
4148       "$full_name$)\n");
4149   format("$DCHK$_NE(&from, _this);\n");
4150 
4151   format(
4152       "$uint32$ cached_has_bits = 0;\n"
4153       "(void) cached_has_bits;\n\n");
4154 
4155   if (ShouldSplit(descriptor_, options_)) {
4156     format(
4157         "if (PROTOBUF_PREDICT_FALSE(!from.IsSplitMessageDefault())) {\n"
4158         "  _this->PrepareSplitMessageForWrite();\n"
4159         "}\n");
4160   }
4161 
4162   std::vector<FieldChunk> chunks = CollectFields(
4163       optimized_order_, options_,
4164       [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
4165         return HasByteIndex(a) == HasByteIndex(b) &&
4166                IsLikelyPresent(a, options_) == IsLikelyPresent(b, options_) &&
4167                ShouldSplit(a, options_) == ShouldSplit(b, options_);
4168       });
4169 
4170   auto it = chunks.begin();
4171   auto end = chunks.end();
4172   // cached_has_word_index maintains that:
4173   //   cached_has_bits = from._has_bits_[cached_has_word_index]
4174   // for cached_has_word_index >= 0
4175   int cached_has_word_index = -1;
4176   while (it != end) {
4177     auto next = FindNextUnequalChunk(it, end, MayGroupChunksForHaswordsCheck);
4178     bool has_haswords_check =
4179         MaybeEmitHaswordsCheck(it, next, options_, has_bit_indices_,
4180                                cached_has_word_index, "from.", p);
4181 
4182     while (it != next) {
4183       const std::vector<const FieldDescriptor*>& fields = it->fields;
4184       const bool cache_has_bits = HasByteIndex(fields.front()) != kNoHasbit;
4185       const bool check_has_byte = cache_has_bits && fields.size() > 1 &&
4186                                   !IsLikelyPresent(fields.back(), options_);
4187 
4188       if (cache_has_bits &&
4189           cached_has_word_index != HasWordIndex(fields.front())) {
4190         cached_has_word_index = HasWordIndex(fields.front());
4191         format("cached_has_bits = from.$has_bits$[$1$];\n",
4192                cached_has_word_index);
4193       }
4194 
4195       if (check_has_byte) {
4196         // Emit an if() that will let us skip the whole chunk if none are set.
4197         uint32_t chunk_mask = GenChunkMask(fields, has_bit_indices_);
4198         std::string chunk_mask_str =
4199             absl::StrCat(absl::Hex(chunk_mask, absl::kZeroPad8));
4200 
4201         // Check (up to) 8 has_bits at a time if we have more than one field in
4202         // this chunk.  Due to field layout ordering, we may check
4203         // _has_bits_[last_chunk * 8 / 32] multiple times.
4204         ABSL_DCHECK_LE(2, popcnt(chunk_mask));
4205         ABSL_DCHECK_GE(8, popcnt(chunk_mask));
4206 
4207         format("if (cached_has_bits & 0x$1$u) {\n", chunk_mask_str);
4208         format.Indent();
4209       }
4210 
4211       // Go back and emit merging code for each of the fields we processed.
4212       for (const auto* field : fields) {
4213         const auto& generator = field_generators_.get(field);
4214 
4215         if (field->is_repeated()) {
4216           generator.GenerateMergingCode(p);
4217         } else if (field->is_optional() && !HasHasbit(field)) {
4218           // Merge semantics without true field presence: primitive fields are
4219           // merged only if non-zero (numeric) or non-empty (string).
4220           MayEmitIfNonDefaultCheck(p, "from.", field, /*emit_body=*/[&]() {
4221             generator.GenerateMergingCode(p);
4222           });
4223         } else if (field->options().weak() ||
4224                    cached_has_word_index != HasWordIndex(field)) {
4225           // Check hasbit, not using cached bits.
4226           auto v = p->WithVars(HasBitVars(field));
4227           format(
4228               "if ((from.$has_bits$[$has_array_index$] & $has_mask$) != 0) "
4229               "{\n");
4230           format.Indent();
4231           generator.GenerateMergingCode(p);
4232           format.Outdent();
4233           format("}\n");
4234         } else {
4235           // Check hasbit, using cached bits.
4236           ABSL_CHECK(HasHasbit(field));
4237           int has_bit_index = has_bit_indices_[field->index()];
4238           const std::string mask = absl::StrCat(
4239               absl::Hex(1u << (has_bit_index % 32), absl::kZeroPad8));
4240           format("if (cached_has_bits & 0x$1$u) {\n", mask);
4241           format.Indent();
4242 
4243           if (check_has_byte && IsPOD(field)) {
4244             generator.GenerateCopyConstructorCode(p);
4245           } else {
4246             generator.GenerateMergingCode(p);
4247           }
4248 
4249           format.Outdent();
4250           format("}\n");
4251         }
4252       }
4253 
4254       if (check_has_byte) {
4255         format.Outdent();
4256         format("}\n");
4257       }
4258 
4259       // To next chunk.
4260       ++it;
4261     }
4262 
4263     if (has_haswords_check) {
4264       p->Outdent();
4265       p->Emit(R"cc(
4266         }
4267       )cc");
4268 
4269       // Reset here as it may have been updated in just closed if statement.
4270       cached_has_word_index = -1;
4271     }
4272   }
4273 
4274   if (HasBitsSize() == 1) {
4275     // Optimization to avoid a load. Assuming that most messages have fewer than
4276     // 32 fields, this seems useful.
4277     p->Emit(R"cc(
4278       _this->$has_bits$[0] |= cached_has_bits;
4279     )cc");
4280   } else if (HasBitsSize() > 1) {
4281     p->Emit(R"cc(
4282       _this->$has_bits$.Or(from.$has_bits$);
4283     )cc");
4284   }
4285 
4286   // Merge oneof fields. Oneof field requires oneof case check.
4287   for (auto oneof : OneOfRange(descriptor_)) {
4288     p->Emit({{"name", oneof->name()},
4289              {"NAME", absl::AsciiStrToUpper(oneof->name())},
4290              {"index", oneof->index()},
4291              {"cases",
4292               [&] {
4293                 for (const auto* field : FieldRange(oneof)) {
4294                   p->Emit(
4295                       {{"Label", UnderscoresToCamelCase(field->name(), true)},
4296                        {"body",
4297                         [&] {
4298                           field_generators_.get(field).GenerateMergingCode(p);
4299                         }}},
4300                       R"cc(
4301                         case k$Label$: {
4302                           $body$;
4303                           break;
4304                         }
4305                       )cc");
4306                 }
4307               }}},
4308             R"cc(
4309               if (const uint32_t oneof_from_case = from.$oneof_case$[$index$]) {
4310                 const uint32_t oneof_to_case = _this->$oneof_case$[$index$];
4311                 const bool oneof_needs_init = oneof_to_case != oneof_from_case;
4312                 if (oneof_needs_init) {
4313                   if (oneof_to_case != 0) {
4314                     _this->clear_$name$();
4315                   }
4316                   _this->$oneof_case$[$index$] = oneof_from_case;
4317                 }
4318 
4319                 switch (oneof_from_case) {
4320                   $cases$;
4321                   case $NAME$_NOT_SET:
4322                     break;
4323                 }
4324               }
4325             )cc");
4326   }
4327   if (num_weak_fields_) {
4328     format(
4329         "_this->$weak_field_map$.MergeFrom(from.$weak_field_map$);"
4330         "\n");
4331   }
4332 
4333   // Merging of extensions and unknown fields is done last, to maximize
4334   // the opportunity for tail calls.
4335   if (descriptor_->extension_range_count() > 0) {
4336     format(
4337         "_this->$extensions$.MergeFrom(internal_default_instance(), "
4338         "from.$extensions$);\n");
4339   }
4340 
4341   format(
4342       "_this->_internal_metadata_.MergeFrom<$unknown_fields_type$>(from._"
4343       "internal_"
4344       "metadata_);\n");
4345 
4346   format.Outdent();
4347   format("}\n");
4348 }
4349 
GenerateCopyFrom(io::Printer * p)4350 void MessageGenerator::GenerateCopyFrom(io::Printer* p) {
4351   if (HasSimpleBaseClass(descriptor_, options_)) return;
4352   Formatter format(p);
4353   if (HasDescriptorMethods(descriptor_->file(), options_)) {
4354     // We don't override the generalized CopyFrom (aka that which
4355     // takes in the Message base class as a parameter); instead we just
4356     // let the base Message::CopyFrom take care of it.  The base MergeFrom
4357     // knows how to quickly confirm the types exactly match, and if so, will
4358     // use GetClassData() to get the address of Message::CopyWithSourceCheck,
4359     // which calls Clear() and then MergeFrom(), as well as making sure that
4360     // clearing the destination message doesn't alter the source, when in debug
4361     // builds. Most callers avoid this by passing a "from" message that is the
4362     // same type as the message being merged into, rather than a generic
4363     // Message.
4364   }
4365 
4366   // Generate the class-specific CopyFrom.
4367   format(
4368       "void $classname$::CopyFrom(const $classname$& from) {\n"
4369       "// @@protoc_insertion_point(class_specific_copy_from_start:"
4370       "$full_name$)\n");
4371   format.Indent();
4372 
4373   format("if (&from == this) return;\n");
4374 
4375   if (!options_.opensource_runtime && HasMessageFieldOrExtension(descriptor_)) {
4376     // This check is disabled in the opensource release because we're
4377     // concerned that many users do not define NDEBUG in their release builds.
4378     // It is also disabled if a message has neither message fields nor
4379     // extensions, as it's impossible to copy from its descendant.
4380     //
4381     // Note that IsDescendant is implemented by reflection and not available for
4382     // lite runtime. In that case, check if the size of the source has changed
4383     // after Clear.
4384     if (HasDescriptorMethods(descriptor_->file(), options_)) {
4385       format(
4386           "$DCHK$(!::_pbi::IsDescendant(*this, from))\n"
4387           "    << \"Source of CopyFrom cannot be a descendant of the "
4388           "target.\";\n"
4389           "Clear();\n");
4390     } else {
4391       format(
4392           "#ifndef NDEBUG\n"
4393           "::size_t from_size = from.ByteSizeLong();\n"
4394           "#endif\n"
4395           "Clear();\n"
4396           "#ifndef NDEBUG\n"
4397           "$CHK$_EQ(from_size, from.ByteSizeLong())\n"
4398           "  << \"Source of CopyFrom changed when clearing target.  Either \"\n"
4399           "     \"source is a nested message in target (not allowed), or \"\n"
4400           "     \"another thread is modifying the source.\";\n"
4401           "#endif\n");
4402     }
4403   } else {
4404     format("Clear();\n");
4405   }
4406   format("MergeFrom(from);\n");
4407 
4408   format.Outdent();
4409   format("}\n");
4410 }
4411 
GenerateVerify(io::Printer * p)4412 void MessageGenerator::GenerateVerify(io::Printer* p) {
4413 }
4414 
GenerateSerializeOneofFields(io::Printer * p,const std::vector<const FieldDescriptor * > & fields)4415 void MessageGenerator::GenerateSerializeOneofFields(
4416     io::Printer* p, const std::vector<const FieldDescriptor*>& fields) {
4417   ABSL_CHECK(!fields.empty());
4418   if (fields.size() == 1) {
4419     GenerateSerializeOneField(p, fields[0], -1);
4420     return;
4421   }
4422   // We have multiple mutually exclusive choices.  Emit a switch statement.
4423   const OneofDescriptor* oneof = fields[0]->containing_oneof();
4424   p->Emit({{"name", oneof->name()},
4425            {"cases",
4426             [&] {
4427               for (const auto* field : fields) {
4428                 p->Emit({{"Name", UnderscoresToCamelCase(field->name(), true)},
4429                          {"body",
4430                           [&] {
4431                             field_generators_.get(field)
4432                                 .GenerateSerializeWithCachedSizesToArray(p);
4433                           }}},
4434                         R"cc(
4435                           case k$Name$: {
4436                             $body$;
4437                             break;
4438                           }
4439                         )cc");
4440               }
4441             }}},
4442           R"cc(
4443             switch (this_.$name$_case()) {
4444               $cases$;
4445               default:
4446                 break;
4447             }
4448           )cc");
4449 }
4450 
GenerateSerializeOneField(io::Printer * p,const FieldDescriptor * field,int cached_has_bits_index)4451 void MessageGenerator::GenerateSerializeOneField(io::Printer* p,
4452                                                  const FieldDescriptor* field,
4453                                                  int cached_has_bits_index) {
4454   auto v = p->WithVars(FieldVars(field, options_));
4455   auto emit_body = [&] {
4456     field_generators_.get(field).GenerateSerializeWithCachedSizesToArray(p);
4457   };
4458 
4459   if (field->options().weak()) {
4460     emit_body();
4461     p->Emit("\n");
4462     return;
4463   }
4464 
4465   PrintFieldComment(Formatter{p}, field, options_);
4466   if (HasHasbit(field)) {
4467     p->Emit(
4468         {
4469             {"body", emit_body},
4470             {"cond",
4471              [&] {
4472                int has_bit_index = HasBitIndex(field);
4473                auto v = p->WithVars(HasBitVars(field));
4474                // Attempt to use the state of cached_has_bits, if possible.
4475                if (cached_has_bits_index == has_bit_index / 32) {
4476                  p->Emit("cached_has_bits & $has_mask$");
4477                } else {
4478                  p->Emit(
4479                      "(this_.$has_bits$[$has_array_index$] & $has_mask$) != 0");
4480                }
4481              }},
4482         },
4483         R"cc(
4484           if ($cond$) {
4485             $body$;
4486           }
4487         )cc");
4488   } else if (field->is_optional()) {
4489     MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body));
4490   } else {
4491     emit_body();
4492   }
4493   p->Emit("\n");
4494 }
4495 
GenerateSerializeOneExtensionRange(io::Printer * p,int start,int end)4496 void MessageGenerator::GenerateSerializeOneExtensionRange(io::Printer* p,
4497                                                           int start, int end) {
4498   auto v = p->WithVars(variables_);
4499   p->Emit({{"start", start}, {"end", end}},
4500           R"cc(
4501             // Extension range [$start$, $end$)
4502             target = this_.$extensions$._InternalSerialize(
4503                 internal_default_instance(), $start$, $end$, target, stream);
4504           )cc");
4505 }
4506 
GenerateSerializeWithCachedSizesToArray(io::Printer * p)4507 void MessageGenerator::GenerateSerializeWithCachedSizesToArray(io::Printer* p) {
4508   if (HasSimpleBaseClass(descriptor_, options_)) return;
4509   if (descriptor_->options().message_set_wire_format()) {
4510     // Special-case MessageSet.
4511     p->Emit(R"cc(
4512 #if defined(PROTOBUF_CUSTOM_VTABLE)
4513       $uint8$* $classname$::_InternalSerialize(
4514           const MessageLite& base, $uint8$* target,
4515           ::$proto_ns$::io::EpsCopyOutputStream* stream) {
4516         const $classname$& this_ = static_cast<const $classname$&>(base);
4517 #else   // PROTOBUF_CUSTOM_VTABLE
4518       $uint8$* $classname$::_InternalSerialize(
4519           $uint8$* target,
4520           ::$proto_ns$::io::EpsCopyOutputStream* stream) const {
4521         const $classname$& this_ = *this;
4522 #endif  // PROTOBUF_CUSTOM_VTABLE
4523         $annotate_serialize$ target =
4524             this_.$extensions$
4525                 .InternalSerializeMessageSetWithCachedSizesToArray(
4526                     internal_default_instance(), target, stream);
4527         target = ::_pbi::InternalSerializeUnknownMessageSetItemsToArray(
4528             this_.$unknown_fields$, target, stream);
4529         return target;
4530       }
4531     )cc");
4532     return;
4533   }
4534 
4535   p->Emit(
4536       {
4537           {"debug_cond", ShouldSerializeInOrder(descriptor_, options_)
4538                              ? "1"
4539                              : "defined(NDEBUG)"},
4540           {"ndebug", [&] { GenerateSerializeWithCachedSizesBody(p); }},
4541           {"debug", [&] { GenerateSerializeWithCachedSizesBodyShuffled(p); }},
4542           {"ifdef",
4543            [&] {
4544              if (ShouldSerializeInOrder(descriptor_, options_)) {
4545                p->Emit("$ndebug$");
4546              } else {
4547                p->Emit(R"cc(
4548                  //~ force indenting level
4549 #ifdef NDEBUG
4550                  $ndebug$;
4551 #else   // NDEBUG
4552                  $debug$;
4553 #endif  // !NDEBUG
4554                )cc");
4555              }
4556            }},
4557       },
4558       R"cc(
4559 #if defined(PROTOBUF_CUSTOM_VTABLE)
4560         $uint8$* $classname$::_InternalSerialize(
4561             const MessageLite& base, $uint8$* target,
4562             ::$proto_ns$::io::EpsCopyOutputStream* stream) {
4563           const $classname$& this_ = static_cast<const $classname$&>(base);
4564 #else   // PROTOBUF_CUSTOM_VTABLE
4565         $uint8$* $classname$::_InternalSerialize(
4566             $uint8$* target,
4567             ::$proto_ns$::io::EpsCopyOutputStream* stream) const {
4568           const $classname$& this_ = *this;
4569 #endif  // PROTOBUF_CUSTOM_VTABLE
4570           $annotate_serialize$;
4571           // @@protoc_insertion_point(serialize_to_array_start:$full_name$)
4572           $ifdef$;
4573           // @@protoc_insertion_point(serialize_to_array_end:$full_name$)
4574           return target;
4575         }
4576       )cc");
4577 }
4578 
GenerateSerializeWithCachedSizesBody(io::Printer * p)4579 void MessageGenerator::GenerateSerializeWithCachedSizesBody(io::Printer* p) {
4580   if (HasSimpleBaseClass(descriptor_, options_)) return;
4581   // If there are multiple fields in a row from the same oneof then we
4582   // coalesce them and emit a switch statement.  This is more efficient
4583   // because it lets the C++ compiler know this is a "at most one can happen"
4584   // situation. If we emitted "if (has_x()) ...; if (has_y()) ..." the C++
4585   // compiler's emitted code might check has_y() even when has_x() is true.
4586   class LazySerializerEmitter {
4587    public:
4588     LazySerializerEmitter(MessageGenerator* mg, io::Printer* p)
4589         : mg_(mg), p_(p), cached_has_bit_index_(kNoHasbit) {}
4590 
4591     ~LazySerializerEmitter() { Flush(); }
4592 
4593     // If conditions allow, try to accumulate a run of fields from the same
4594     // oneof, and handle them at the next Flush().
4595     void Emit(const FieldDescriptor* field) {
4596       if (!field->has_presence() || MustFlush(field)) {
4597         Flush();
4598       }
4599       if (field->real_containing_oneof()) {
4600         v_.push_back(field);
4601       } else {
4602         // TODO: Defer non-oneof fields similarly to oneof fields.
4603         if (HasHasbit(field) && field->has_presence()) {
4604           // We speculatively load the entire _has_bits_[index] contents, even
4605           // if it is for only one field.  Deferring non-oneof emitting would
4606           // allow us to determine whether this is going to be useful.
4607           int has_bit_index = mg_->has_bit_indices_[field->index()];
4608           if (cached_has_bit_index_ != has_bit_index / 32) {
4609             // Reload.
4610             int new_index = has_bit_index / 32;
4611             p_->Emit({{"index", new_index}},
4612                      R"cc(
4613                        cached_has_bits = this_._impl_._has_bits_[$index$];
4614                      )cc");
4615             cached_has_bit_index_ = new_index;
4616           }
4617         }
4618 
4619         mg_->GenerateSerializeOneField(p_, field, cached_has_bit_index_);
4620       }
4621     }
4622 
4623     void EmitIfNotNull(const FieldDescriptor* field) {
4624       if (field != nullptr) {
4625         Emit(field);
4626       }
4627     }
4628 
4629     void Flush() {
4630       if (!v_.empty()) {
4631         mg_->GenerateSerializeOneofFields(p_, v_);
4632         v_.clear();
4633       }
4634     }
4635 
4636    private:
4637     // If we have multiple fields in v_ then they all must be from the same
4638     // oneof.  Would adding field to v_ break that invariant?
4639     bool MustFlush(const FieldDescriptor* field) {
4640       return !v_.empty() &&
4641              v_[0]->containing_oneof() != field->containing_oneof();
4642     }
4643 
4644     MessageGenerator* mg_;
4645     io::Printer* p_;
4646     std::vector<const FieldDescriptor*> v_;
4647 
4648     // cached_has_bit_index_ maintains that:
4649     //   cached_has_bits = from._has_bits_[cached_has_bit_index_]
4650     // for cached_has_bit_index_ >= 0
4651     int cached_has_bit_index_;
4652   };
4653 
4654   class LazyExtensionRangeEmitter {
4655    public:
4656     LazyExtensionRangeEmitter(MessageGenerator* mg, io::Printer* p)
4657         : mg_(mg), p_(p) {}
4658 
4659     void AddToRange(const Descriptor::ExtensionRange* range) {
4660       if (!has_current_range_) {
4661         min_start_ = range->start_number();
4662         max_end_ = range->end_number();
4663         has_current_range_ = true;
4664       } else {
4665         min_start_ = std::min(min_start_, range->start_number());
4666         max_end_ = std::max(max_end_, range->end_number());
4667       }
4668     }
4669 
4670     void Flush() {
4671       if (has_current_range_) {
4672         mg_->GenerateSerializeOneExtensionRange(p_, min_start_, max_end_);
4673       }
4674       has_current_range_ = false;
4675     }
4676 
4677    private:
4678     MessageGenerator* mg_;
4679     io::Printer* p_;
4680     bool has_current_range_ = false;
4681     int min_start_ = 0;
4682     int max_end_ = 0;
4683   };
4684 
4685   // We need to track the largest weak field, because weak fields are serialized
4686   // differently than normal fields.  The WeakFieldMap::FieldWriter will
4687   // serialize all weak fields that are ordinally between the last serialized
4688   // weak field and the current field.  In order to guarantee that all weak
4689   // fields are serialized, we need to make sure to emit the code to serialize
4690   // the largest weak field present at some point.
4691   class LargestWeakFieldHolder {
4692    public:
4693     const FieldDescriptor* Release() {
4694       const FieldDescriptor* result = field_;
4695       field_ = nullptr;
4696       return result;
4697     }
4698     void ReplaceIfLarger(const FieldDescriptor* field) {
4699       if (field_ == nullptr || field_->number() < field->number()) {
4700         field_ = field;
4701       }
4702     }
4703 
4704    private:
4705     const FieldDescriptor* field_ = nullptr;
4706   };
4707 
4708   std::vector<const FieldDescriptor*> ordered_fields =
4709       SortFieldsByNumber(descriptor_);
4710 
4711   std::vector<const Descriptor::ExtensionRange*> sorted_extensions;
4712   sorted_extensions.reserve(descriptor_->extension_range_count());
4713   for (int i = 0; i < descriptor_->extension_range_count(); ++i) {
4714     sorted_extensions.push_back(descriptor_->extension_range(i));
4715   }
4716   std::sort(sorted_extensions.begin(), sorted_extensions.end(),
4717             ExtensionRangeSorter());
4718   p->Emit(
4719       {
4720           {"handle_weak_fields",
4721            [&] {
4722              if (num_weak_fields_ == 0) return;
4723              p->Emit(R"cc(
4724                ::_pbi::WeakFieldMap::FieldWriter field_writer(
4725                    this_.$weak_field_map$);
4726              )cc");
4727            }},
4728           {"handle_lazy_fields",
4729            [&] {
4730              // Merge fields and extension ranges, sorted by field number.
4731              LazySerializerEmitter e(this, p);
4732              LazyExtensionRangeEmitter re(this, p);
4733              LargestWeakFieldHolder largest_weak_field;
4734              size_t i, j;
4735              for (i = 0, j = 0;
4736                   i < ordered_fields.size() || j < sorted_extensions.size();) {
4737                if ((j == sorted_extensions.size()) ||
4738                    (i < static_cast<size_t>(descriptor_->field_count()) &&
4739                     ordered_fields[i]->number() <
4740                         sorted_extensions[j]->start_number())) {
4741                  const FieldDescriptor* field = ordered_fields[i++];
4742                  re.Flush();
4743                  if (field->options().weak()) {
4744                    largest_weak_field.ReplaceIfLarger(field);
4745                    PrintFieldComment(Formatter{p}, field, options_);
4746                  } else {
4747                    e.EmitIfNotNull(largest_weak_field.Release());
4748                    e.Emit(field);
4749                  }
4750                } else {
4751                  e.EmitIfNotNull(largest_weak_field.Release());
4752                  e.Flush();
4753                  re.AddToRange(sorted_extensions[j++]);
4754                }
4755              }
4756              re.Flush();
4757              e.EmitIfNotNull(largest_weak_field.Release());
4758            }},
4759           {"handle_unknown_fields",
4760            [&] {
4761              if (UseUnknownFieldSet(descriptor_->file(), options_)) {
4762                p->Emit(R"cc(
4763                  target =
4764                      ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray(
4765                          this_.$unknown_fields$, target, stream);
4766                )cc");
4767              } else {
4768                p->Emit(R"cc(
4769                  target = stream->WriteRaw(
4770                      this_.$unknown_fields$.data(),
4771                      static_cast<int>(this_.$unknown_fields$.size()), target);
4772                )cc");
4773              }
4774            }},
4775       },
4776       R"cc(
4777         $handle_weak_fields$;
4778         $uint32$ cached_has_bits = 0;
4779         (void)cached_has_bits;
4780 
4781         $handle_lazy_fields$;
4782         if (PROTOBUF_PREDICT_FALSE(this_.$have_unknown_fields$)) {
4783           $handle_unknown_fields$;
4784         }
4785       )cc");
4786 }
4787 
GenerateSerializeWithCachedSizesBodyShuffled(io::Printer * p)4788 void MessageGenerator::GenerateSerializeWithCachedSizesBodyShuffled(
4789     io::Printer* p) {
4790   std::vector<const FieldDescriptor*> ordered_fields =
4791       SortFieldsByNumber(descriptor_);
4792 
4793   std::vector<const Descriptor::ExtensionRange*> sorted_extensions;
4794   sorted_extensions.reserve(descriptor_->extension_range_count());
4795   for (int i = 0; i < descriptor_->extension_range_count(); ++i) {
4796     sorted_extensions.push_back(descriptor_->extension_range(i));
4797   }
4798   std::sort(sorted_extensions.begin(), sorted_extensions.end(),
4799             ExtensionRangeSorter());
4800 
4801   int num_fields = ordered_fields.size() + sorted_extensions.size();
4802   constexpr int kLargePrime = 1000003;
4803   ABSL_CHECK_LT(num_fields, kLargePrime)
4804       << "Prime offset must be greater than the number of fields to ensure "
4805          "those are coprime.";
4806   p->Emit(
4807       {
4808           {"last_field", num_fields - 1},
4809           {"field_writer",
4810            [&] {
4811              if (num_weak_fields_ == 0) return;
4812              p->Emit(R"cc(
4813                ::_pbi::WeakFieldMap::FieldWriter field_writer(
4814                    this_.$weak_field_map$);
4815              )cc");
4816            }},
4817           {"ordered_cases",
4818            [&] {
4819              size_t index = 0;
4820              for (const auto* f : ordered_fields) {
4821                p->Emit({{"index", index++},
4822                         {"body", [&] { GenerateSerializeOneField(p, f, -1); }}},
4823                        R"cc(
4824                          case $index$: {
4825                            $body$;
4826                            break;
4827                          }
4828                        )cc");
4829              }
4830            }},
4831           {"extension_cases",
4832            [&] {
4833              size_t index = ordered_fields.size();
4834              for (const auto* r : sorted_extensions) {
4835                p->Emit({{"index", index++},
4836                         {"body",
4837                          [&] {
4838                            GenerateSerializeOneExtensionRange(
4839                                p, r->start_number(), r->end_number());
4840                          }}},
4841                        R"cc(
4842                          case $index$: {
4843                            $body$;
4844                            break;
4845                          }
4846                        )cc");
4847              }
4848            }},
4849           {"handle_unknown_fields",
4850            [&] {
4851              if (UseUnknownFieldSet(descriptor_->file(), options_)) {
4852                p->Emit(R"cc(
4853                  target =
4854                      ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray(
4855                          this_.$unknown_fields$, target, stream);
4856                )cc");
4857              } else {
4858                p->Emit(R"cc(
4859                  target = stream->WriteRaw(
4860                      this_.$unknown_fields$.data(),
4861                      static_cast<int>(this_.$unknown_fields$.size()), target);
4862                )cc");
4863              }
4864            }},
4865       },
4866       R"cc(
4867         $field_writer$;
4868         for (int i = $last_field$; i >= 0; i--) {
4869           switch (i) {
4870             $ordered_cases$;
4871             $extension_cases$;
4872             default: {
4873               $DCHK$(false) << "Unexpected index: " << i;
4874             }
4875           }
4876         }
4877         if (PROTOBUF_PREDICT_FALSE(this_.$have_unknown_fields$)) {
4878           $handle_unknown_fields$;
4879         }
4880       )cc");
4881 }
4882 
RequiredFieldsBitMask() const4883 std::vector<uint32_t> MessageGenerator::RequiredFieldsBitMask() const {
4884   const int array_size = HasBitsSize();
4885   std::vector<uint32_t> masks(array_size, 0);
4886 
4887   for (auto field : FieldRange(descriptor_)) {
4888     if (!field->is_required()) {
4889       continue;
4890     }
4891 
4892     const int has_bit_index = has_bit_indices_[field->index()];
4893     masks[has_bit_index / 32] |= static_cast<uint32_t>(1)
4894                                  << (has_bit_index % 32);
4895   }
4896   return masks;
4897 }
4898 
GenerateByteSize(io::Printer * p)4899 void MessageGenerator::GenerateByteSize(io::Printer* p) {
4900   if (HasSimpleBaseClass(descriptor_, options_)) return;
4901 
4902   if (descriptor_->options().message_set_wire_format()) {
4903     // Special-case MessageSet.
4904     p->Emit(
4905         R"cc(
4906 #if defined(PROTOBUF_CUSTOM_VTABLE)
4907           ::size_t $classname$::ByteSizeLong(const MessageLite& base) {
4908             const $classname$& this_ = static_cast<const $classname$&>(base);
4909 #else   // PROTOBUF_CUSTOM_VTABLE
4910           ::size_t $classname$::ByteSizeLong() const {
4911             const $classname$& this_ = *this;
4912 #endif  // PROTOBUF_CUSTOM_VTABLE
4913             $WeakDescriptorSelfPin$;
4914             $annotate_bytesize$;
4915             // @@protoc_insertion_point(message_set_byte_size_start:$full_name$)
4916             ::size_t total_size = this_.$extensions$.MessageSetByteSize();
4917             if (this_.$have_unknown_fields$) {
4918               total_size += ::_pbi::ComputeUnknownMessageSetItemsSize(
4919                   this_.$unknown_fields$);
4920             }
4921             this_.$cached_size$.Set(::_pbi::ToCachedSize(total_size));
4922             return total_size;
4923           }
4924         )cc");
4925     return;
4926   }
4927 
4928   std::vector<FieldChunk> chunks = CollectFields(
4929       optimized_order_, options_,
4930       [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool {
4931         return a->label() == b->label() && HasByteIndex(a) == HasByteIndex(b) &&
4932                IsLikelyPresent(a, options_) == IsLikelyPresent(b, options_) &&
4933                ShouldSplit(a, options_) == ShouldSplit(b, options_);
4934       });
4935 
4936   p->Emit(
4937       {{"handle_extension_set",
4938         [&] {
4939           if (descriptor_->extension_range_count() == 0) return;
4940           p->Emit(R"cc(
4941             total_size += this_.$extensions$.ByteSize();
4942           )cc");
4943         }},
4944        {"prefetch",
4945         [&] {
4946           // See comment in third_party/protobuf/port.h for details,
4947           // on how much we are prefetching. Only insert prefetch once per
4948           // function, since advancing is actually slower. We sometimes
4949           // prefetch more than sizeof(message), because it helps with
4950           // next message on arena.
4951           bool generate_prefetch = false;
4952           // Skip trivial messages with 0 or 1 fields, unless they are
4953           // repeated, to reduce codesize.
4954           switch (optimized_order_.size()) {
4955             case 1:
4956               generate_prefetch = optimized_order_[0]->is_repeated();
4957               break;
4958             case 0:
4959               break;
4960             default:
4961               generate_prefetch = true;
4962           }
4963           if (!generate_prefetch || !IsPresentMessage(descriptor_, options_)) {
4964             return;
4965           }
4966           p->Emit(R"cc(
4967             ::_pbi::Prefetch5LinesFrom7Lines(&this_);
4968           )cc");
4969         }},
4970        {"handle_fields",
4971         [&] {
4972           auto it = chunks.begin();
4973           auto end = chunks.end();
4974           int cached_has_word_index = -1;
4975 
4976           while (it != end) {
4977             auto next =
4978                 FindNextUnequalChunk(it, end, MayGroupChunksForHaswordsCheck);
4979             bool has_haswords_check =
4980                 MaybeEmitHaswordsCheck(it, next, options_, has_bit_indices_,
4981                                        cached_has_word_index, "this_.", p);
4982 
4983             while (it != next) {
4984               const auto& fields = it->fields;
4985               const bool check_has_byte =
4986                   fields.size() > 1 && HasWordIndex(fields[0]) != kNoHasbit &&
4987                   !IsLikelyPresent(fields.back(), options_);
4988               p->Emit(
4989                   {{"update_byte_size_for_chunk",
4990                     [&] {
4991                       // Go back and emit checks for each of the fields we
4992                       // processed.
4993                       for (const auto* field : fields) {
4994                         EmitUpdateByteSizeForField(field, p,
4995                                                    cached_has_word_index);
4996                       }
4997                     }},
4998                    {"may_update_cached_has_word_index",
4999                     [&] {
5000                       if (!check_has_byte) return;
5001                       if (cached_has_word_index == HasWordIndex(fields.front()))
5002                         return;
5003 
5004                       cached_has_word_index = HasWordIndex(fields.front());
5005                       p->Emit({{"index", cached_has_word_index}},
5006                               R"cc(
5007                                 cached_has_bits = this_.$has_bits$[$index$];
5008                               )cc");
5009                     }},
5010                    {"check_if_chunk_present",
5011                     [&] {
5012                       if (!check_has_byte) {
5013                         return;
5014                       }
5015 
5016                       // Emit an if() that will let us skip the whole chunk
5017                       // if none are set.
5018                       uint32_t chunk_mask =
5019                           GenChunkMask(fields, has_bit_indices_);
5020 
5021                       // Check (up to) 8 has_bits at a time if we have more
5022                       // than one field in this chunk.  Due to field layout
5023                       // ordering, we may check _has_bits_[last_chunk * 8 /
5024                       // 32] multiple times.
5025                       ABSL_DCHECK_LE(2, popcnt(chunk_mask));
5026                       ABSL_DCHECK_GE(8, popcnt(chunk_mask));
5027 
5028                       p->Emit(
5029                           {{"mask", absl::StrFormat("0x%08xu", chunk_mask)}},
5030                           "if (cached_has_bits & $mask$)");
5031                     }}},
5032                   R"cc(
5033                     $may_update_cached_has_word_index$;
5034                     $check_if_chunk_present$ {
5035                       //~ Force newline.
5036                       $update_byte_size_for_chunk$;
5037                     }
5038                   )cc");
5039 
5040               // To next chunk.
5041               ++it;
5042             }
5043 
5044             if (has_haswords_check) {
5045               p->Emit(R"cc(
5046                 }
5047               )cc");
5048 
5049               // Reset here as it may have been updated in just closed if
5050               // statement.
5051               cached_has_word_index = -1;
5052             }
5053           }
5054         }},
5055        {"handle_oneof_fields",
5056         [&] {
5057           // Fields inside a oneof don't use _has_bits_ so we count them in a
5058           // separate pass.
5059           for (auto oneof : OneOfRange(descriptor_)) {
5060             p->Emit(
5061                 {{"oneof_name", oneof->name()},
5062                  {"oneof_case_name", absl::AsciiStrToUpper(oneof->name())},
5063                  {"case_per_field",
5064                   [&] {
5065                     for (auto field : FieldRange(oneof)) {
5066                       PrintFieldComment(Formatter{p}, field, options_);
5067                       p->Emit(
5068                           {{"field_name",
5069                             UnderscoresToCamelCase(field->name(), true)},
5070                            {"field_byte_size",
5071                             [&] {
5072                               field_generators_.get(field).GenerateByteSize(p);
5073                             }}},
5074                           R"cc(
5075                             case k$field_name$: {
5076                               $field_byte_size$;
5077                               break;
5078                             }
5079                           )cc");
5080                     }
5081                   }}},
5082                 R"cc(
5083                   switch (this_.$oneof_name$_case()) {
5084                     $case_per_field$;
5085                     case $oneof_case_name$_NOT_SET: {
5086                       break;
5087                     }
5088                   }
5089                 )cc");
5090           }
5091         }},
5092        {"handle_weak_fields",
5093         [&] {
5094           if (num_weak_fields_ == 0) return;
5095           // TagSize + MessageSize
5096           p->Emit(R"cc(
5097             total_size += this_.$weak_field_map$.ByteSizeLong();
5098           )cc");
5099         }},
5100        {"handle_unknown_fields",
5101         [&] {
5102           if (UseUnknownFieldSet(descriptor_->file(), options_)) {
5103             // We go out of our way to put the computation of the uncommon
5104             // path of unknown fields in tail position. This allows for
5105             // better code generation of this function for simple protos.
5106             p->Emit(R"cc(
5107               return this_.MaybeComputeUnknownFieldsSize(total_size,
5108                                                          &this_.$cached_size$);
5109             )cc");
5110           } else {
5111             // We update _cached_size_ even though this is a const method.
5112             // Because const methods might be called concurrently this needs
5113             // to be atomic operations or the program is undefined.  In
5114             // practice, since any concurrent writes will be writing the
5115             // exact same value, normal writes will work on all common
5116             // processors. We use a dedicated wrapper class to abstract away
5117             // the underlying atomic. This makes it easier on platforms where
5118             // even relaxed memory order might have perf impact to replace it
5119             // with ordinary loads and stores.
5120             p->Emit(R"cc(
5121               if (PROTOBUF_PREDICT_FALSE(this_.$have_unknown_fields$)) {
5122                 total_size += this_.$unknown_fields$.size();
5123               }
5124               this_.$cached_size$.Set(::_pbi::ToCachedSize(total_size));
5125               return total_size;
5126             )cc");
5127           }
5128         }}},
5129       R"cc(
5130 #if defined(PROTOBUF_CUSTOM_VTABLE)
5131         ::size_t $classname$::ByteSizeLong(const MessageLite& base) {
5132           const $classname$& this_ = static_cast<const $classname$&>(base);
5133 #else   // PROTOBUF_CUSTOM_VTABLE
5134         ::size_t $classname$::ByteSizeLong() const {
5135           const $classname$& this_ = *this;
5136 #endif  // PROTOBUF_CUSTOM_VTABLE
5137           $WeakDescriptorSelfPin$;
5138           $annotate_bytesize$;
5139           // @@protoc_insertion_point(message_byte_size_start:$full_name$)
5140           ::size_t total_size = 0;
5141           $handle_extension_set$;
5142 
5143           $uint32$ cached_has_bits = 0;
5144           // Prevent compiler warnings about cached_has_bits being unused
5145           (void)cached_has_bits;
5146 
5147           $prefetch$;
5148           $handle_fields$;
5149           $handle_oneof_fields$;
5150           $handle_weak_fields$;
5151           $handle_unknown_fields$;
5152         }
5153       )cc");
5154 }
5155 
NeedsIsInitialized()5156 bool MessageGenerator::NeedsIsInitialized() {
5157   if (HasSimpleBaseClass(descriptor_, options_)) return false;
5158   if (descriptor_->extension_range_count() != 0) return true;
5159   if (num_required_fields_ != 0) return true;
5160 
5161   for (const auto* field : optimized_order_) {
5162     if (field_generators_.get(field).NeedsIsInitialized()) return true;
5163   }
5164   if (num_weak_fields_ != 0) return true;
5165 
5166   for (const auto* oneof : OneOfRange(descriptor_)) {
5167     for (const auto* field : FieldRange(oneof)) {
5168       if (field_generators_.get(field).NeedsIsInitialized()) return true;
5169     }
5170   }
5171 
5172   return false;
5173 }
5174 
GenerateIsInitialized(io::Printer * p)5175 void MessageGenerator::GenerateIsInitialized(io::Printer* p) {
5176   if (!NeedsIsInitialized()) return;
5177 
5178   auto has_required_field = [&](const auto* oneof) {
5179     for (const auto* field : FieldRange(oneof)) {
5180       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
5181           !ShouldIgnoreRequiredFieldCheck(field, options_) &&
5182           scc_analyzer_->HasRequiredFields(field->message_type())) {
5183         return true;
5184       }
5185     }
5186     return false;
5187   };
5188 
5189   p->Emit(
5190       {
5191           {"test_extensions",
5192            [&] {
5193              if (descriptor_->extension_range_count() == 0) return;
5194              p->Emit(R"cc(
5195                if (!this_.$extensions$.IsInitialized(
5196                        internal_default_instance())) {
5197                  return false;
5198                }
5199              )cc");
5200            }},
5201           {"test_required_fields",
5202            [&] {
5203              if (num_required_fields_ == 0) return;
5204              p->Emit(R"cc(
5205                if (_Internal::MissingRequiredFields(this_.$has_bits$)) {
5206                  return false;
5207                }
5208              )cc");
5209            }},
5210           {"test_ordinary_fields",
5211            [&] {
5212              for (const auto* field : optimized_order_) {
5213                auto& f = field_generators_.get(field);
5214                // XXX REMOVE? XXX
5215                const auto needs_verifier =
5216                    !f.NeedsIsInitialized()
5217                        ? absl::make_optional(p->WithSubstitutionListener(
5218                              [&](auto label, auto loc) {
5219                                ABSL_LOG(FATAL)
5220                                    << "Field generated output but is marked as "
5221                                       "!NeedsIsInitialized"
5222                                    << field->full_name();
5223                              }))
5224                        : absl::nullopt;
5225                f.GenerateIsInitialized(p);
5226              }
5227            }},
5228           {"test_weak_fields",
5229            [&] {
5230              if (num_weak_fields_ == 0) return;
5231              p->Emit(R"cc(
5232                if (!this_.$weak_field_map$.IsInitialized())
5233                  return false;
5234              )cc");
5235            }},
5236           {"test_oneof_fields",
5237            [&] {
5238              for (const auto* oneof : OneOfRange(descriptor_)) {
5239                if (!has_required_field(oneof)) continue;
5240                p->Emit({{"name", oneof->name()},
5241                         {"NAME", absl::AsciiStrToUpper(oneof->name())},
5242                         {"cases",
5243                          [&] {
5244                            for (const auto* field : FieldRange(oneof)) {
5245                              p->Emit({{"Name", UnderscoresToCamelCase(
5246                                                    field->name(), true)},
5247                                       {"body",
5248                                        [&] {
5249                                          field_generators_.get(field)
5250                                              .GenerateIsInitialized(p);
5251                                        }}},
5252                                      R"cc(
5253                                        case k$Name$: {
5254                                          $body$;
5255                                          break;
5256                                        }
5257                                      )cc");
5258                            }
5259                          }}},
5260                        R"cc(
5261                          switch (this_.$name$_case()) {
5262                            $cases$;
5263                            case $NAME$_NOT_SET: {
5264                              break;
5265                            }
5266                          }
5267                        )cc");
5268              }
5269            }},
5270       },
5271       R"cc(
5272         PROTOBUF_NOINLINE bool $classname$::IsInitializedImpl(
5273             const MessageLite& msg) {
5274           auto& this_ = static_cast<const $classname$&>(msg);
5275           $test_extensions$;
5276           $test_required_fields$;
5277           $test_ordinary_fields$;
5278           $test_weak_fields$;
5279           $test_oneof_fields$;
5280           return true;
5281         }
5282       )cc");
5283 }
5284 
5285 }  // namespace cpp
5286 }  // namespace compiler
5287 }  // namespace protobuf
5288 }  // namespace google
5289 
5290 #include "google/protobuf/port_undef.inc"
5291