• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include <algorithm>
9 #include <cstddef>
10 #include <cstdint>
11 #include <cstring>
12 #include <limits>
13 #include <new>  // IWYU pragma: keep for operator new
14 #include <numeric>
15 #include <string>
16 #include <type_traits>
17 
18 #include "absl/base/optimization.h"
19 #include "absl/log/absl_check.h"
20 #include "absl/log/absl_log.h"
21 #include "absl/numeric/bits.h"
22 #include "absl/strings/str_cat.h"
23 #include "absl/strings/string_view.h"
24 #include "google/protobuf/arenastring.h"
25 #include "google/protobuf/generated_enum_util.h"
26 #include "google/protobuf/generated_message_tctable_decl.h"
27 #include "google/protobuf/generated_message_tctable_impl.h"
28 #include "google/protobuf/inlined_string_field.h"
29 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
30 #include "google/protobuf/map.h"
31 #include "google/protobuf/message_lite.h"
32 #include "google/protobuf/parse_context.h"
33 #include "google/protobuf/port.h"
34 #include "google/protobuf/repeated_field.h"
35 #include "google/protobuf/repeated_ptr_field.h"
36 #include "google/protobuf/serial_arena.h"
37 #include "google/protobuf/varint_shuffle.h"
38 #include "google/protobuf/wire_format_lite.h"
39 #include "utf8_validity.h"
40 
41 
42 // clang-format off
43 #include "google/protobuf/port_def.inc"
44 // clang-format on
45 
46 namespace google {
47 namespace protobuf {
48 namespace internal {
49 
50 using FieldEntry = TcParseTableBase::FieldEntry;
51 
52 //////////////////////////////////////////////////////////////////////////////
53 // Template instantiations:
54 //////////////////////////////////////////////////////////////////////////////
55 
56 #ifndef NDEBUG
AlignFail(std::integral_constant<size_t,4>,std::uintptr_t address)57 void AlignFail(std::integral_constant<size_t, 4>, std::uintptr_t address) {
58   ABSL_LOG(FATAL) << "Unaligned (4) access at " << address;
59 
60   // Explicit abort to let compilers know this function does not return
61   abort();
62 }
AlignFail(std::integral_constant<size_t,8>,std::uintptr_t address)63 void AlignFail(std::integral_constant<size_t, 8>, std::uintptr_t address) {
64   ABSL_LOG(FATAL) << "Unaligned (8) access at " << address;
65 
66   // Explicit abort to let compilers know this function does not return
67   abort();
68 }
69 #endif
70 
GenericFallbackLite(PROTOBUF_TC_PARAM_DECL)71 const char* TcParser::GenericFallbackLite(PROTOBUF_TC_PARAM_DECL) {
72   PROTOBUF_MUSTTAIL return GenericFallbackImpl<MessageLite, std::string>(
73       PROTOBUF_TC_PARAM_PASS);
74 }
75 
76 //////////////////////////////////////////////////////////////////////////////
77 // Core fast parsing implementation:
78 //////////////////////////////////////////////////////////////////////////////
79 
ParseLoopPreserveNone(MessageLite * msg,const char * ptr,ParseContext * ctx,const TcParseTableBase * table)80 PROTOBUF_NOINLINE const char* TcParser::ParseLoopPreserveNone(
81     MessageLite* msg, const char* ptr, ParseContext* ctx,
82     const TcParseTableBase* table) {
83   return ParseLoop(msg, ptr, ctx, table);
84 }
85 
86 // On the fast path, a (matching) 1-byte tag already has the decoded value.
FastDecodeTag(uint8_t coded_tag)87 static uint32_t FastDecodeTag(uint8_t coded_tag) {
88   return coded_tag;
89 }
90 
91 // On the fast path, a (matching) 2-byte tag always needs to be decoded.
FastDecodeTag(uint16_t coded_tag)92 static uint32_t FastDecodeTag(uint16_t coded_tag) {
93   uint32_t result = coded_tag;
94   result += static_cast<int8_t>(coded_tag);
95   return result >> 1;
96 }
97 
98 //////////////////////////////////////////////////////////////////////////////
99 // Core mini parsing implementation:
100 //////////////////////////////////////////////////////////////////////////////
101 
102 // Field lookup table layout:
103 //
104 // Because it consists of a series of variable-length segments, the lookuup
105 // table is organized within an array of uint16_t, and each element is either
106 // a uint16_t or a uint32_t stored little-endian as a pair of uint16_t.
107 //
108 // Its fundamental building block maps 16 contiguously ascending field numbers
109 // to their locations within the field entry table:
110 
111 struct SkipEntry16 {
112   uint16_t skipmap;
113   uint16_t field_entry_offset;
114 };
115 
116 // The skipmap is a bitfield of which of those field numbers do NOT have a
117 // field entry.  The lowest bit of the skipmap corresponds to the lowest of
118 // the 16 field numbers, so if a proto had only fields 1, 2, 3, and 7, the
119 // skipmap would contain 0b11111111'10111000.
120 //
121 // The field lookup table begins with a single 32-bit skipmap that maps the
122 // field numbers 1 through 32.  This is because the majority of proto
123 // messages only contain fields numbered 1 to 32.
124 //
125 // The rest of the lookup table is a repeated series of
126 // { 32-bit field #,  #SkipEntry16s,  {SkipEntry16...} }
127 // That is, the next thing is a pair of uint16_t that form the next
128 // lowest field number that the lookup table handles.  If this number is -1,
129 // that is the end of the table.  Then there is a uint16_t that is
130 // the number of contiguous SkipEntry16 entries that follow, and then of
131 // course the SkipEntry16s themselves.
132 
133 // Originally developed and tested at https://godbolt.org/z/vbc7enYcf
134 
135 // Returns the address of the field for `tag` in the table's field entries.
136 // Returns nullptr if the field was not found.
FindFieldEntry(const TcParseTableBase * table,uint32_t field_num)137 const TcParseTableBase::FieldEntry* TcParser::FindFieldEntry(
138     const TcParseTableBase* table, uint32_t field_num) {
139   const FieldEntry* const field_entries = table->field_entries_begin();
140 
141   uint32_t fstart = 1;
142   uint32_t adj_fnum = field_num - fstart;
143 
144   if (PROTOBUF_PREDICT_TRUE(adj_fnum < 32)) {
145     uint32_t skipmap = table->skipmap32;
146     uint32_t skipbit = 1 << adj_fnum;
147     if (PROTOBUF_PREDICT_FALSE(skipmap & skipbit)) return nullptr;
148     skipmap &= skipbit - 1;
149     adj_fnum -= absl::popcount(skipmap);
150     auto* entry = field_entries + adj_fnum;
151     PROTOBUF_ASSUME(entry != nullptr);
152     return entry;
153   }
154   const uint16_t* lookup_table = table->field_lookup_begin();
155   for (;;) {
156 #ifdef ABSL_IS_LITTLE_ENDIAN
157     memcpy(&fstart, lookup_table, sizeof(fstart));
158 #else
159     fstart = lookup_table[0] | (lookup_table[1] << 16);
160 #endif
161     lookup_table += sizeof(fstart) / sizeof(*lookup_table);
162     uint32_t num_skip_entries = *lookup_table++;
163     if (field_num < fstart) return nullptr;
164     adj_fnum = field_num - fstart;
165     uint32_t skip_num = adj_fnum / 16;
166     if (PROTOBUF_PREDICT_TRUE(skip_num < num_skip_entries)) {
167       // for each group of 16 fields we have:
168       // a bitmap of 16 bits
169       // a 16-bit field-entry offset for the first of them.
170       auto* skip_data = lookup_table + (adj_fnum / 16) * (sizeof(SkipEntry16) /
171                                                           sizeof(uint16_t));
172       SkipEntry16 se = {skip_data[0], skip_data[1]};
173       adj_fnum &= 15;
174       uint32_t skipmap = se.skipmap;
175       uint16_t skipbit = 1 << adj_fnum;
176       if (PROTOBUF_PREDICT_FALSE(skipmap & skipbit)) return nullptr;
177       skipmap &= skipbit - 1;
178       adj_fnum += se.field_entry_offset;
179       adj_fnum -= absl::popcount(skipmap);
180       auto* entry = field_entries + adj_fnum;
181       PROTOBUF_ASSUME(entry != nullptr);
182       return entry;
183     }
184     lookup_table +=
185         num_skip_entries * (sizeof(SkipEntry16) / sizeof(*lookup_table));
186   }
187 }
188 
189 // Field names are stored in a format of:
190 //
191 // 1) A table of name sizes, one byte each, from 1 to 255 per name.
192 //    `entries` is the size of this first table.
193 // 1a) padding bytes, so the table of name sizes is a multiple of
194 //     eight bytes in length. They are zero.
195 //
196 // 2) All the names, concatenated, with neither separation nor termination.
197 //
198 // This is designed to be compact but not particularly fast to retrieve.
199 // In particular, it takes O(n) to retrieve the name of the n'th field,
200 // which is usually fine because most protos have fewer than 10 fields.
FindName(const char * name_data,size_t entries,size_t index)201 static absl::string_view FindName(const char* name_data, size_t entries,
202                                   size_t index) {
203   // The compiler unrolls these... if this isn't fast enough,
204   // there's an AVX version at https://godbolt.org/z/eojrjqzfr
205   // ARM-compatible version at https://godbolt.org/z/n5YT5Ee85
206 
207   // The field name sizes are padded up to a multiple of 8, so we
208   // must pad them here.
209   size_t num_sizes = (entries + 7) & -8;
210   auto* uint8s = reinterpret_cast<const uint8_t*>(name_data);
211   size_t pos = std::accumulate(uint8s, uint8s + index, num_sizes);
212   size_t size = name_data[index];
213   auto* start = &name_data[pos];
214   return {start, size};
215 }
216 
MessageName(const TcParseTableBase * table)217 absl::string_view TcParser::MessageName(const TcParseTableBase* table) {
218   return FindName(table->name_data(), table->num_field_entries + 1, 0);
219 }
220 
FieldName(const TcParseTableBase * table,const FieldEntry * field_entry)221 absl::string_view TcParser::FieldName(const TcParseTableBase* table,
222                                       const FieldEntry* field_entry) {
223   const FieldEntry* const field_entries = table->field_entries_begin();
224   auto field_index = static_cast<size_t>(field_entry - field_entries);
225   return FindName(table->name_data(), table->num_field_entries + 1,
226                   field_index + 1);
227 }
228 
Error(PROTOBUF_TC_PARAM_NO_DATA_DECL)229 PROTOBUF_NOINLINE const char* TcParser::Error(PROTOBUF_TC_PARAM_NO_DATA_DECL) {
230   (void)ctx;
231   (void)ptr;
232   SyncHasbits(msg, hasbits, table);
233   return nullptr;
234 }
235 
236 template <bool export_called_function>
MiniParse(PROTOBUF_TC_PARAM_DECL)237 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParse(
238     PROTOBUF_TC_PARAM_DECL) {
239   TestMiniParseResult* test_out;
240   if (export_called_function) {
241     test_out = reinterpret_cast<TestMiniParseResult*>(
242         static_cast<uintptr_t>(data.data));
243   }
244 
245   uint32_t tag;
246   ptr = ReadTagInlined(ptr, &tag);
247   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
248     if (export_called_function) *test_out = {Error};
249     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
250   }
251 
252   auto* entry = FindFieldEntry(table, tag >> 3);
253   if (entry == nullptr) {
254     if (export_called_function) *test_out = {table->fallback, tag};
255     data.data = tag;
256     PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
257   }
258 
259   // The handler may need the tag and the entry to resolve fallback logic. Both
260   // of these are 32 bits, so pack them into (the 64-bit) `data`. Since we can't
261   // pack the entry pointer itself, just pack its offset from `table`.
262   uint64_t entry_offset = reinterpret_cast<const char*>(entry) -
263                           reinterpret_cast<const char*>(table);
264   data.data = entry_offset << 32 | tag;
265 
266   using field_layout::FieldKind;
267   auto field_type =
268       entry->type_card & (+field_layout::kSplitMask | FieldKind::kFkMask);
269 
270   static constexpr TailCallParseFunc kMiniParseTable[] = {
271       &MpFallback,             // FieldKind::kFkNone
272       &MpVarint<false>,        // FieldKind::kFkVarint
273       &MpPackedVarint<false>,  // FieldKind::kFkPackedVarint
274       &MpFixed<false>,         // FieldKind::kFkFixed
275       &MpPackedFixed<false>,   // FieldKind::kFkPackedFixed
276       &MpString<false>,        // FieldKind::kFkString
277       &MpMessage<false>,       // FieldKind::kFkMessage
278       &MpMap<false>,           // FieldKind::kFkMap
279       &Error,                  // kSplitMask | FieldKind::kFkNone
280       &MpVarint<true>,         // kSplitMask | FieldKind::kFkVarint
281       &MpPackedVarint<true>,   // kSplitMask | FieldKind::kFkPackedVarint
282       &MpFixed<true>,          // kSplitMask | FieldKind::kFkFixed
283       &MpPackedFixed<true>,    // kSplitMask | FieldKind::kFkPackedFixed
284       &MpString<true>,         // kSplitMask | FieldKind::kFkString
285       &MpMessage<true>,        // kSplitMask | FieldKind::kFkMessage
286       &MpMap<true>,            // kSplitMask | FieldKind::kFkMap
287   };
288   // Just to be sure we got the order right, above.
289   static_assert(0 == FieldKind::kFkNone, "Invalid table order");
290   static_assert(1 == FieldKind::kFkVarint, "Invalid table order");
291   static_assert(2 == FieldKind::kFkPackedVarint, "Invalid table order");
292   static_assert(3 == FieldKind::kFkFixed, "Invalid table order");
293   static_assert(4 == FieldKind::kFkPackedFixed, "Invalid table order");
294   static_assert(5 == FieldKind::kFkString, "Invalid table order");
295   static_assert(6 == FieldKind::kFkMessage, "Invalid table order");
296   static_assert(7 == FieldKind::kFkMap, "Invalid table order");
297 
298   static_assert(8 == (+field_layout::kSplitMask | FieldKind::kFkNone),
299     "Invalid table order");
300   static_assert(9 == (+field_layout::kSplitMask | FieldKind::kFkVarint),
301     "Invalid table order");
302   static_assert(10 == (+field_layout::kSplitMask | FieldKind::kFkPackedVarint),
303     "Invalid table order");
304   static_assert(11 == (+field_layout::kSplitMask | FieldKind::kFkFixed),
305     "Invalid table order");
306   static_assert(12 == (+field_layout::kSplitMask | FieldKind::kFkPackedFixed),
307     "Invalid table order");
308   static_assert(13 == (+field_layout::kSplitMask | FieldKind::kFkString),
309     "Invalid table order");
310   static_assert(14 == (+field_layout::kSplitMask | FieldKind::kFkMessage),
311     "Invalid table order");
312   static_assert(15 == (+field_layout::kSplitMask | FieldKind::kFkMap),
313     "Invalid table order");
314 
315   TailCallParseFunc parse_fn = kMiniParseTable[field_type];
316   if (export_called_function) *test_out = {parse_fn, tag, entry};
317 
318   PROTOBUF_MUSTTAIL return parse_fn(PROTOBUF_TC_PARAM_PASS);
319 }
320 
MiniParse(PROTOBUF_TC_PARAM_NO_DATA_DECL)321 PROTOBUF_NOINLINE const char* TcParser::MiniParse(
322     PROTOBUF_TC_PARAM_NO_DATA_DECL) {
323   PROTOBUF_MUSTTAIL return MiniParse<false>(PROTOBUF_TC_PARAM_NO_DATA_PASS);
324 }
TestMiniParse(PROTOBUF_TC_PARAM_DECL)325 PROTOBUF_NOINLINE TcParser::TestMiniParseResult TcParser::TestMiniParse(
326     PROTOBUF_TC_PARAM_DECL) {
327   TestMiniParseResult result = {};
328   data.data = reinterpret_cast<uintptr_t>(&result);
329   result.ptr = MiniParse<true>(PROTOBUF_TC_PARAM_PASS);
330   return result;
331 }
332 
MpFallback(PROTOBUF_TC_PARAM_DECL)333 PROTOBUF_NOINLINE const char* TcParser::MpFallback(PROTOBUF_TC_PARAM_DECL) {
334   PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
335 }
336 
337 template <typename TagType>
FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL)338 const char* TcParser::FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL) {
339   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
340     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
341   }
342   ctx->SetLastTag(data.decoded_tag());
343   ptr += sizeof(TagType);
344   PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
345 }
346 
FastEndG1(PROTOBUF_TC_PARAM_DECL)347 PROTOBUF_NOINLINE const char* TcParser::FastEndG1(PROTOBUF_TC_PARAM_DECL) {
348   PROTOBUF_MUSTTAIL return FastEndGroupImpl<uint8_t>(PROTOBUF_TC_PARAM_PASS);
349 }
FastEndG2(PROTOBUF_TC_PARAM_DECL)350 PROTOBUF_NOINLINE const char* TcParser::FastEndG2(PROTOBUF_TC_PARAM_DECL) {
351   PROTOBUF_MUSTTAIL return FastEndGroupImpl<uint16_t>(PROTOBUF_TC_PARAM_PASS);
352 }
353 
354 //////////////////////////////////////////////////////////////////////////////
355 // Message fields
356 //////////////////////////////////////////////////////////////////////////////
357 
NewMessage(const TcParseTableBase * table,Arena * arena)358 inline PROTOBUF_ALWAYS_INLINE MessageLite* TcParser::NewMessage(
359     const TcParseTableBase* table, Arena* arena) {
360   return table->class_data->New(arena);
361 }
362 
AddMessage(const TcParseTableBase * table,RepeatedPtrFieldBase & field)363 MessageLite* TcParser::AddMessage(const TcParseTableBase* table,
364                                   RepeatedPtrFieldBase& field) {
365   return static_cast<MessageLite*>(field.AddInternal(
366       [table](Arena* arena) { return NewMessage(table, arena); }));
367 }
368 
369 template <typename TagType, bool group_coding, bool aux_is_table>
SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL)370 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularParseMessageAuxImpl(
371     PROTOBUF_TC_PARAM_DECL) {
372   PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 192);
373   PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256);
374   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
375     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
376   }
377   auto saved_tag = UnalignedLoad<TagType>(ptr);
378   ptr += sizeof(TagType);
379   hasbits |= (uint64_t{1} << data.hasbit_idx());
380   SyncHasbits(msg, hasbits, table);
381   auto& field = RefAt<MessageLite*>(msg, data.offset());
382   const auto aux = *table->field_aux(data.aux_idx());
383   const auto* inner_table =
384       aux_is_table ? aux.table : aux.message_default()->GetTcParseTable();
385 
386   if (field == nullptr) {
387     field = NewMessage(inner_table, msg->GetArena());
388   }
389   const auto inner_loop = [&](const char* ptr) {
390     return ParseLoop(field, ptr, ctx, inner_table);
391   };
392   return group_coding
393              ? ctx->ParseGroupInlined(ptr, FastDecodeTag(saved_tag), inner_loop)
394              : ctx->ParseLengthDelimitedInlined(ptr, inner_loop);
395 }
396 
FastMdS1(PROTOBUF_TC_PARAM_DECL)397 PROTOBUF_NOINLINE const char* TcParser::FastMdS1(PROTOBUF_TC_PARAM_DECL) {
398   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, false, false>(
399       PROTOBUF_TC_PARAM_PASS);
400 }
401 
FastMdS2(PROTOBUF_TC_PARAM_DECL)402 PROTOBUF_NOINLINE const char* TcParser::FastMdS2(PROTOBUF_TC_PARAM_DECL) {
403   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, false, false>(
404       PROTOBUF_TC_PARAM_PASS);
405 }
406 
FastGdS1(PROTOBUF_TC_PARAM_DECL)407 PROTOBUF_NOINLINE const char* TcParser::FastGdS1(PROTOBUF_TC_PARAM_DECL) {
408   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, true, false>(
409       PROTOBUF_TC_PARAM_PASS);
410 }
411 
FastGdS2(PROTOBUF_TC_PARAM_DECL)412 PROTOBUF_NOINLINE const char* TcParser::FastGdS2(PROTOBUF_TC_PARAM_DECL) {
413   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, true, false>(
414       PROTOBUF_TC_PARAM_PASS);
415 }
416 
FastMtS1(PROTOBUF_TC_PARAM_DECL)417 PROTOBUF_NOINLINE const char* TcParser::FastMtS1(PROTOBUF_TC_PARAM_DECL) {
418   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, false, true>(
419       PROTOBUF_TC_PARAM_PASS);
420 }
421 
FastMtS2(PROTOBUF_TC_PARAM_DECL)422 PROTOBUF_NOINLINE const char* TcParser::FastMtS2(PROTOBUF_TC_PARAM_DECL) {
423   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, false, true>(
424       PROTOBUF_TC_PARAM_PASS);
425 }
426 
FastGtS1(PROTOBUF_TC_PARAM_DECL)427 PROTOBUF_NOINLINE const char* TcParser::FastGtS1(PROTOBUF_TC_PARAM_DECL) {
428   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, true, true>(
429       PROTOBUF_TC_PARAM_PASS);
430 }
431 
FastGtS2(PROTOBUF_TC_PARAM_DECL)432 PROTOBUF_NOINLINE const char* TcParser::FastGtS2(PROTOBUF_TC_PARAM_DECL) {
433   PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, true, true>(
434       PROTOBUF_TC_PARAM_PASS);
435 }
436 
437 template <typename TagType>
LazyMessage(PROTOBUF_TC_PARAM_DECL)438 const char* TcParser::LazyMessage(PROTOBUF_TC_PARAM_DECL) {
439   ABSL_LOG(FATAL) << "Unimplemented";
440   return nullptr;
441 }
442 
FastMlS1(PROTOBUF_TC_PARAM_DECL)443 PROTOBUF_NOINLINE const char* TcParser::FastMlS1(PROTOBUF_TC_PARAM_DECL) {
444   PROTOBUF_MUSTTAIL return LazyMessage<uint8_t>(PROTOBUF_TC_PARAM_PASS);
445 }
446 
FastMlS2(PROTOBUF_TC_PARAM_DECL)447 PROTOBUF_NOINLINE const char* TcParser::FastMlS2(PROTOBUF_TC_PARAM_DECL) {
448   PROTOBUF_MUSTTAIL return LazyMessage<uint16_t>(PROTOBUF_TC_PARAM_PASS);
449 }
450 
451 template <typename TagType, bool group_coding, bool aux_is_table>
RepeatedParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL)452 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedParseMessageAuxImpl(
453     PROTOBUF_TC_PARAM_DECL) {
454   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
455     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
456   }
457   PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256);
458   const auto expected_tag = UnalignedLoad<TagType>(ptr);
459   const auto aux = *table->field_aux(data.aux_idx());
460   auto& field = RefAt<RepeatedPtrFieldBase>(msg, data.offset());
461   const TcParseTableBase* inner_table =
462       aux_is_table ? aux.table : aux.message_default()->GetTcParseTable();
463   do {
464     ptr += sizeof(TagType);
465     MessageLite* submsg = AddMessage(inner_table, field);
466     const auto inner_loop = [&](const char* ptr) {
467       return ParseLoop(submsg, ptr, ctx, inner_table);
468     };
469     ptr = group_coding ? ctx->ParseGroupInlined(
470                              ptr, FastDecodeTag(expected_tag), inner_loop)
471                        : ctx->ParseLengthDelimitedInlined(ptr, inner_loop);
472     if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
473       PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
474     }
475     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
476       PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
477     }
478   } while (UnalignedLoad<TagType>(ptr) == expected_tag);
479 
480   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
481 }
482 
FastMdR1(PROTOBUF_TC_PARAM_DECL)483 PROTOBUF_NOINLINE const char* TcParser::FastMdR1(PROTOBUF_TC_PARAM_DECL) {
484   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, false, false>(
485       PROTOBUF_TC_PARAM_PASS);
486 }
487 
FastMdR2(PROTOBUF_TC_PARAM_DECL)488 PROTOBUF_NOINLINE const char* TcParser::FastMdR2(PROTOBUF_TC_PARAM_DECL) {
489   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, false, false>(
490       PROTOBUF_TC_PARAM_PASS);
491 }
492 
FastGdR1(PROTOBUF_TC_PARAM_DECL)493 PROTOBUF_NOINLINE const char* TcParser::FastGdR1(PROTOBUF_TC_PARAM_DECL) {
494   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, true, false>(
495       PROTOBUF_TC_PARAM_PASS);
496 }
497 
FastGdR2(PROTOBUF_TC_PARAM_DECL)498 PROTOBUF_NOINLINE const char* TcParser::FastGdR2(PROTOBUF_TC_PARAM_DECL) {
499   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, true, false>(
500       PROTOBUF_TC_PARAM_PASS);
501 }
502 
FastMtR1(PROTOBUF_TC_PARAM_DECL)503 PROTOBUF_NOINLINE const char* TcParser::FastMtR1(PROTOBUF_TC_PARAM_DECL) {
504   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, false, true>(
505       PROTOBUF_TC_PARAM_PASS);
506 }
507 
FastMtR2(PROTOBUF_TC_PARAM_DECL)508 PROTOBUF_NOINLINE const char* TcParser::FastMtR2(PROTOBUF_TC_PARAM_DECL) {
509   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, false, true>(
510       PROTOBUF_TC_PARAM_PASS);
511 }
512 
FastGtR1(PROTOBUF_TC_PARAM_DECL)513 PROTOBUF_NOINLINE const char* TcParser::FastGtR1(PROTOBUF_TC_PARAM_DECL) {
514   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, true, true>(
515       PROTOBUF_TC_PARAM_PASS);
516 }
517 
FastGtR2(PROTOBUF_TC_PARAM_DECL)518 PROTOBUF_NOINLINE const char* TcParser::FastGtR2(PROTOBUF_TC_PARAM_DECL) {
519   PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, true, true>(
520       PROTOBUF_TC_PARAM_PASS);
521 }
522 
523 //////////////////////////////////////////////////////////////////////////////
524 // Fixed fields
525 //////////////////////////////////////////////////////////////////////////////
526 
527 template <typename LayoutType, typename TagType>
SingularFixed(PROTOBUF_TC_PARAM_DECL)528 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularFixed(
529     PROTOBUF_TC_PARAM_DECL) {
530   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
531     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
532   }
533   ptr += sizeof(TagType);  // Consume tag
534   hasbits |= (uint64_t{1} << data.hasbit_idx());
535   RefAt<LayoutType>(msg, data.offset()) = UnalignedLoad<LayoutType>(ptr);
536   ptr += sizeof(LayoutType);
537   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
538 }
539 
FastF32S1(PROTOBUF_TC_PARAM_DECL)540 PROTOBUF_NOINLINE const char* TcParser::FastF32S1(PROTOBUF_TC_PARAM_DECL) {
541   PROTOBUF_MUSTTAIL return SingularFixed<uint32_t, uint8_t>(
542       PROTOBUF_TC_PARAM_PASS);
543 }
FastF32S2(PROTOBUF_TC_PARAM_DECL)544 PROTOBUF_NOINLINE const char* TcParser::FastF32S2(PROTOBUF_TC_PARAM_DECL) {
545   PROTOBUF_MUSTTAIL return SingularFixed<uint32_t, uint16_t>(
546       PROTOBUF_TC_PARAM_PASS);
547 }
FastF64S1(PROTOBUF_TC_PARAM_DECL)548 PROTOBUF_NOINLINE const char* TcParser::FastF64S1(PROTOBUF_TC_PARAM_DECL) {
549   PROTOBUF_MUSTTAIL return SingularFixed<uint64_t, uint8_t>(
550       PROTOBUF_TC_PARAM_PASS);
551 }
FastF64S2(PROTOBUF_TC_PARAM_DECL)552 PROTOBUF_NOINLINE const char* TcParser::FastF64S2(PROTOBUF_TC_PARAM_DECL) {
553   PROTOBUF_MUSTTAIL return SingularFixed<uint64_t, uint16_t>(
554       PROTOBUF_TC_PARAM_PASS);
555 }
556 
557 template <typename LayoutType, typename TagType>
RepeatedFixed(PROTOBUF_TC_PARAM_DECL)558 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed(
559     PROTOBUF_TC_PARAM_DECL) {
560   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
561     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
562   }
563   auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
564   const auto tag = UnalignedLoad<TagType>(ptr);
565   do {
566     field.Add(UnalignedLoad<LayoutType>(ptr + sizeof(TagType)));
567     ptr += sizeof(TagType) + sizeof(LayoutType);
568     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
569       PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
570     }
571   } while (UnalignedLoad<TagType>(ptr) == tag);
572   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
573 }
574 
FastF32R1(PROTOBUF_TC_PARAM_DECL)575 PROTOBUF_NOINLINE const char* TcParser::FastF32R1(PROTOBUF_TC_PARAM_DECL) {
576   PROTOBUF_MUSTTAIL return RepeatedFixed<uint32_t, uint8_t>(
577       PROTOBUF_TC_PARAM_PASS);
578 }
FastF32R2(PROTOBUF_TC_PARAM_DECL)579 PROTOBUF_NOINLINE const char* TcParser::FastF32R2(PROTOBUF_TC_PARAM_DECL) {
580   PROTOBUF_MUSTTAIL return RepeatedFixed<uint32_t, uint16_t>(
581       PROTOBUF_TC_PARAM_PASS);
582 }
FastF64R1(PROTOBUF_TC_PARAM_DECL)583 PROTOBUF_NOINLINE const char* TcParser::FastF64R1(PROTOBUF_TC_PARAM_DECL) {
584   PROTOBUF_MUSTTAIL return RepeatedFixed<uint64_t, uint8_t>(
585       PROTOBUF_TC_PARAM_PASS);
586 }
FastF64R2(PROTOBUF_TC_PARAM_DECL)587 PROTOBUF_NOINLINE const char* TcParser::FastF64R2(PROTOBUF_TC_PARAM_DECL) {
588   PROTOBUF_MUSTTAIL return RepeatedFixed<uint64_t, uint16_t>(
589       PROTOBUF_TC_PARAM_PASS);
590 }
591 
592 template <typename LayoutType, typename TagType>
PackedFixed(PROTOBUF_TC_PARAM_DECL)593 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedFixed(
594     PROTOBUF_TC_PARAM_DECL) {
595   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
596     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
597   }
598   ptr += sizeof(TagType);
599   // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
600   // pending hasbits now:
601   SyncHasbits(msg, hasbits, table);
602   auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
603   int size = ReadSize(&ptr);
604   // TODO: add a tailcalling variant of ReadPackedFixed.
605   return ctx->ReadPackedFixed(ptr, size,
606                               static_cast<RepeatedField<LayoutType>*>(&field));
607 }
608 
FastF32P1(PROTOBUF_TC_PARAM_DECL)609 PROTOBUF_NOINLINE const char* TcParser::FastF32P1(PROTOBUF_TC_PARAM_DECL) {
610   PROTOBUF_MUSTTAIL return PackedFixed<uint32_t, uint8_t>(
611       PROTOBUF_TC_PARAM_PASS);
612 }
FastF32P2(PROTOBUF_TC_PARAM_DECL)613 PROTOBUF_NOINLINE const char* TcParser::FastF32P2(PROTOBUF_TC_PARAM_DECL) {
614   PROTOBUF_MUSTTAIL return PackedFixed<uint32_t, uint16_t>(
615       PROTOBUF_TC_PARAM_PASS);
616 }
FastF64P1(PROTOBUF_TC_PARAM_DECL)617 PROTOBUF_NOINLINE const char* TcParser::FastF64P1(PROTOBUF_TC_PARAM_DECL) {
618   PROTOBUF_MUSTTAIL return PackedFixed<uint64_t, uint8_t>(
619       PROTOBUF_TC_PARAM_PASS);
620 }
FastF64P2(PROTOBUF_TC_PARAM_DECL)621 PROTOBUF_NOINLINE const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) {
622   PROTOBUF_MUSTTAIL return PackedFixed<uint64_t, uint16_t>(
623       PROTOBUF_TC_PARAM_PASS);
624 }
625 
626 //////////////////////////////////////////////////////////////////////////////
627 // Varint fields
628 //////////////////////////////////////////////////////////////////////////////
629 
630 namespace {
631 
632 template <typename Type>
ParseVarint(const char * p,Type * value)633 inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
634                                                       Type* value) {
635   static_assert(sizeof(Type) == 4 || sizeof(Type) == 8,
636                 "Only [u]int32_t and [u]int64_t please");
637 #ifdef __aarch64__
638   // The VarintParse parser has a faster implementation on ARM.
639   absl::conditional_t<sizeof(Type) == 4, uint32_t, uint64_t> tmp;
640   p = VarintParse(p, &tmp);
641   if (p != nullptr) {
642     *value = tmp;
643   }
644   return p;
645 #endif
646   int64_t res;
647   p = ShiftMixParseVarint<Type>(p, res);
648   *value = res;
649   return p;
650 }
651 
652 // This overload is specifically for handling bool, because bools have very
653 // different requirements and performance opportunities than ints.
ParseVarint(const char * p,bool * value)654 inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
655                                                       bool* value) {
656   unsigned char byte = static_cast<unsigned char>(*p++);
657   if (PROTOBUF_PREDICT_TRUE(byte == 0 || byte == 1)) {
658     // This is the code path almost always taken,
659     // so we take care to make it very efficient.
660     if (sizeof(byte) == sizeof(*value)) {
661       memcpy(value, &byte, 1);
662     } else {
663       // The C++ standard does not specify that a `bool` takes only one byte
664       *value = byte;
665     }
666     return p;
667   }
668   // This part, we just care about code size.
669   // Although it's almost never used, we have to support it because we guarantee
670   // compatibility for users who change a field from an int32 or int64 to a bool
671   if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
672     byte = (byte - 0x80) | *p++;
673     if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
674       byte = (byte - 0x80) | *p++;
675       if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
676         byte = (byte - 0x80) | *p++;
677         if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
678           byte = (byte - 0x80) | *p++;
679           if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
680             byte = (byte - 0x80) | *p++;
681             if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
682               byte = (byte - 0x80) | *p++;
683               if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
684                 byte = (byte - 0x80) | *p++;
685                 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
686                   byte = (byte - 0x80) | *p++;
687                   if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
688                     // We only care about the continuation bit and the first bit
689                     // of the 10th byte.
690                     byte = (byte - 0x80) | (*p++ & 0x81);
691                     if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
692                       return nullptr;
693                     }
694                   }
695                 }
696               }
697             }
698           }
699         }
700       }
701     }
702   }
703   *value = byte;
704   return p;
705 }
706 
707 template <typename FieldType, bool zigzag = false>
ZigZagDecodeHelper(FieldType value)708 inline FieldType ZigZagDecodeHelper(FieldType value) {
709   return static_cast<FieldType>(value);
710 }
711 
712 template <>
ZigZagDecodeHelper(int32_t value)713 inline int32_t ZigZagDecodeHelper<int32_t, true>(int32_t value) {
714   return WireFormatLite::ZigZagDecode32(value);
715 }
716 
717 template <>
ZigZagDecodeHelper(int64_t value)718 inline int64_t ZigZagDecodeHelper<int64_t, true>(int64_t value) {
719   return WireFormatLite::ZigZagDecode64(value);
720 }
721 
722 // Prefetch the enum data, if necessary.
723 // We can issue the prefetch before we start parsing the ints.
PrefetchEnumData(uint16_t xform_val,TcParseTableBase::FieldAux aux)724 inline PROTOBUF_ALWAYS_INLINE void PrefetchEnumData(
725     uint16_t xform_val, TcParseTableBase::FieldAux aux) {
726 }
727 
728 // When `xform_val` is a constant, we want to inline `ValidateEnum` because it
729 // is either dropped when not a kTvEnum, or useful when it is.
730 //
731 // When it is not a constant, we do not inline `ValidateEnum` because it bloats
732 // the code around it and pessimizes the non-enum and kTvRange cases which are
733 // way more common than the kTvEnum cases. It is also called from places that
734 // already have out-of-line functions (like MpVarint) so an extra out-of-line
735 // call to `ValidateEnum` does not affect much.
EnumIsValidAux(int32_t val,uint16_t xform_val,TcParseTableBase::FieldAux aux)736 inline PROTOBUF_ALWAYS_INLINE bool EnumIsValidAux(
737     int32_t val, uint16_t xform_val, TcParseTableBase::FieldAux aux) {
738   if (xform_val == field_layout::kTvRange) {
739     auto lo = aux.enum_range.start;
740     return lo <= val && val < (lo + aux.enum_range.length);
741   }
742   if (PROTOBUF_BUILTIN_CONSTANT_P(xform_val)) {
743     return internal::ValidateEnumInlined(val, aux.enum_data);
744   } else {
745     return internal::ValidateEnum(val, aux.enum_data);
746   }
747 }
748 
749 }  // namespace
750 
751 template <typename FieldType, typename TagType, bool zigzag>
SingularVarint(PROTOBUF_TC_PARAM_DECL)752 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularVarint(
753     PROTOBUF_TC_PARAM_DECL) {
754   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
755     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
756   }
757   ptr += sizeof(TagType);  // Consume tag
758   hasbits |= (uint64_t{1} << data.hasbit_idx());
759 
760   // clang isn't smart enough to be able to only conditionally save
761   // registers to the stack, so we turn the integer-greater-than-128
762   // case into a separate routine.
763   if (PROTOBUF_PREDICT_FALSE(static_cast<int8_t>(*ptr) < 0)) {
764     PROTOBUF_MUSTTAIL return SingularVarBigint<FieldType, TagType, zigzag>(
765         PROTOBUF_TC_PARAM_PASS);
766   }
767 
768   RefAt<FieldType>(msg, data.offset()) =
769       ZigZagDecodeHelper<FieldType, zigzag>(static_cast<uint8_t>(*ptr++));
770   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
771 }
772 
773 template <typename FieldType, typename TagType, bool zigzag>
SingularVarBigint(PROTOBUF_TC_PARAM_DECL)774 PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
775     PROTOBUF_TC_PARAM_DECL) {
776   // For some reason clang wants to save 5 registers to the stack here,
777   // but we only need four for this code, so save the data we don't need
778   // to the stack.  Happily, saving them this way uses regular store
779   // instructions rather than PUSH/POP, which saves time at the cost of greater
780   // code size, but for this heavily-used piece of code, that's fine.
781   struct Spill {
782     uint64_t field_data;
783     ::google::protobuf::MessageLite* msg;
784     const ::google::protobuf::internal::TcParseTableBase* table;
785     uint64_t hasbits;
786   };
787   Spill spill = {data.data, msg, table, hasbits};
788 #if defined(__GNUC__)
789   // This empty asm block convinces the compiler that the contents of spill may
790   // have changed, and thus can't be cached in registers.  It's similar to, but
791   // more optimal than, the effect of declaring it "volatile".
792   asm("" : "+m"(spill));
793 #endif
794 
795   uint64_t tmp;
796   PROTOBUF_ASSUME(static_cast<int8_t>(*ptr) < 0);
797   ptr = ParseVarint(ptr, &tmp);
798 
799   data.data = spill.field_data;
800   msg = spill.msg;
801   table = spill.table;
802   hasbits = spill.hasbits;
803 
804   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
805     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
806   }
807   RefAt<FieldType>(msg, data.offset()) =
808       ZigZagDecodeHelper<FieldType, zigzag>(tmp);
809   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
810 }
811 
812 template <typename FieldType>
FastVarintS1(PROTOBUF_TC_PARAM_DECL)813 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::FastVarintS1(
814     PROTOBUF_TC_PARAM_DECL) {
815   using TagType = uint8_t;
816   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
817     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
818   }
819   int64_t res;
820   ptr = ShiftMixParseVarint<FieldType>(ptr + sizeof(TagType), res);
821   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
822     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
823   }
824   hasbits |= (uint64_t{1} << data.hasbit_idx());
825   RefAt<FieldType>(msg, data.offset()) = res;
826   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
827 }
828 
FastV8S1(PROTOBUF_TC_PARAM_DECL)829 PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
830   using TagType = uint8_t;
831 
832   // Special case for a varint bool field with a tag of 1 byte:
833   // The coded_tag() field will actually contain the value too and we can check
834   // both at the same time.
835   auto coded_tag = data.coded_tag<uint16_t>();
836   if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
837     auto& field = RefAt<bool>(msg, data.offset());
838     // Note: we use `data.data` because Clang generates suboptimal code when
839     // using coded_tag.
840     // In x86_64 this uses the CH register to read the second byte out of
841     // `data`.
842     uint8_t value = data.data >> 8;
843     // The assume allows using a mov instead of test+setne.
844     PROTOBUF_ASSUME(value <= 1);
845     field = static_cast<bool>(value);
846 
847     ptr += sizeof(TagType) + 1;  // Consume the tag and the value.
848     hasbits |= (uint64_t{1} << data.hasbit_idx());
849 
850     PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
851   }
852 
853   // If it didn't match above either the tag is wrong, or the value is encoded
854   // non-canonically.
855   // Jump to MiniParse as wrong tag is the most probable reason.
856   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
857 }
858 
FastV8S2(PROTOBUF_TC_PARAM_DECL)859 PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
860   PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(
861       PROTOBUF_TC_PARAM_PASS);
862 }
FastV32S1(PROTOBUF_TC_PARAM_DECL)863 PROTOBUF_NOINLINE const char* TcParser::FastV32S1(PROTOBUF_TC_PARAM_DECL) {
864   PROTOBUF_MUSTTAIL return FastVarintS1<uint32_t>(PROTOBUF_TC_PARAM_PASS);
865 }
FastV32S2(PROTOBUF_TC_PARAM_DECL)866 PROTOBUF_NOINLINE const char* TcParser::FastV32S2(PROTOBUF_TC_PARAM_DECL) {
867   PROTOBUF_MUSTTAIL return SingularVarint<uint32_t, uint16_t>(
868       PROTOBUF_TC_PARAM_PASS);
869 }
FastV64S1(PROTOBUF_TC_PARAM_DECL)870 PROTOBUF_NOINLINE const char* TcParser::FastV64S1(PROTOBUF_TC_PARAM_DECL) {
871   PROTOBUF_MUSTTAIL return FastVarintS1<uint64_t>(PROTOBUF_TC_PARAM_PASS);
872 }
FastV64S2(PROTOBUF_TC_PARAM_DECL)873 PROTOBUF_NOINLINE const char* TcParser::FastV64S2(PROTOBUF_TC_PARAM_DECL) {
874   PROTOBUF_MUSTTAIL return SingularVarint<uint64_t, uint16_t>(
875       PROTOBUF_TC_PARAM_PASS);
876 }
877 
FastZ32S1(PROTOBUF_TC_PARAM_DECL)878 PROTOBUF_NOINLINE const char* TcParser::FastZ32S1(PROTOBUF_TC_PARAM_DECL) {
879   PROTOBUF_MUSTTAIL return SingularVarint<int32_t, uint8_t, true>(
880       PROTOBUF_TC_PARAM_PASS);
881 }
FastZ32S2(PROTOBUF_TC_PARAM_DECL)882 PROTOBUF_NOINLINE const char* TcParser::FastZ32S2(PROTOBUF_TC_PARAM_DECL) {
883   PROTOBUF_MUSTTAIL return SingularVarint<int32_t, uint16_t, true>(
884       PROTOBUF_TC_PARAM_PASS);
885 }
FastZ64S1(PROTOBUF_TC_PARAM_DECL)886 PROTOBUF_NOINLINE const char* TcParser::FastZ64S1(PROTOBUF_TC_PARAM_DECL) {
887   PROTOBUF_MUSTTAIL return SingularVarint<int64_t, uint8_t, true>(
888       PROTOBUF_TC_PARAM_PASS);
889 }
FastZ64S2(PROTOBUF_TC_PARAM_DECL)890 PROTOBUF_NOINLINE const char* TcParser::FastZ64S2(PROTOBUF_TC_PARAM_DECL) {
891   PROTOBUF_MUSTTAIL return SingularVarint<int64_t, uint16_t, true>(
892       PROTOBUF_TC_PARAM_PASS);
893 }
894 
895 template <typename FieldType, typename TagType, bool zigzag>
RepeatedVarint(PROTOBUF_TC_PARAM_DECL)896 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedVarint(
897     PROTOBUF_TC_PARAM_DECL) {
898   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
899     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
900   }
901   auto& field = RefAt<RepeatedField<FieldType>>(msg, data.offset());
902   const auto expected_tag = UnalignedLoad<TagType>(ptr);
903   do {
904     ptr += sizeof(TagType);
905     FieldType tmp;
906     ptr = ParseVarint(ptr, &tmp);
907     if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
908       PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
909     }
910     field.Add(ZigZagDecodeHelper<FieldType, zigzag>(tmp));
911     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
912       PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
913     }
914   } while (UnalignedLoad<TagType>(ptr) == expected_tag);
915   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
916 }
917 
FastV8R1(PROTOBUF_TC_PARAM_DECL)918 PROTOBUF_NOINLINE const char* TcParser::FastV8R1(PROTOBUF_TC_PARAM_DECL) {
919   PROTOBUF_MUSTTAIL return RepeatedVarint<bool, uint8_t>(
920       PROTOBUF_TC_PARAM_PASS);
921 }
FastV8R2(PROTOBUF_TC_PARAM_DECL)922 PROTOBUF_NOINLINE const char* TcParser::FastV8R2(PROTOBUF_TC_PARAM_DECL) {
923   PROTOBUF_MUSTTAIL return RepeatedVarint<bool, uint16_t>(
924       PROTOBUF_TC_PARAM_PASS);
925 }
FastV32R1(PROTOBUF_TC_PARAM_DECL)926 PROTOBUF_NOINLINE const char* TcParser::FastV32R1(PROTOBUF_TC_PARAM_DECL) {
927   PROTOBUF_MUSTTAIL return RepeatedVarint<uint32_t, uint8_t>(
928       PROTOBUF_TC_PARAM_PASS);
929 }
FastV32R2(PROTOBUF_TC_PARAM_DECL)930 PROTOBUF_NOINLINE const char* TcParser::FastV32R2(PROTOBUF_TC_PARAM_DECL) {
931   PROTOBUF_MUSTTAIL return RepeatedVarint<uint32_t, uint16_t>(
932       PROTOBUF_TC_PARAM_PASS);
933 }
FastV64R1(PROTOBUF_TC_PARAM_DECL)934 PROTOBUF_NOINLINE const char* TcParser::FastV64R1(PROTOBUF_TC_PARAM_DECL) {
935   PROTOBUF_MUSTTAIL return RepeatedVarint<uint64_t, uint8_t>(
936       PROTOBUF_TC_PARAM_PASS);
937 }
FastV64R2(PROTOBUF_TC_PARAM_DECL)938 PROTOBUF_NOINLINE const char* TcParser::FastV64R2(PROTOBUF_TC_PARAM_DECL) {
939   PROTOBUF_MUSTTAIL return RepeatedVarint<uint64_t, uint16_t>(
940       PROTOBUF_TC_PARAM_PASS);
941 }
942 
FastZ32R1(PROTOBUF_TC_PARAM_DECL)943 PROTOBUF_NOINLINE const char* TcParser::FastZ32R1(PROTOBUF_TC_PARAM_DECL) {
944   PROTOBUF_MUSTTAIL return RepeatedVarint<int32_t, uint8_t, true>(
945       PROTOBUF_TC_PARAM_PASS);
946 }
FastZ32R2(PROTOBUF_TC_PARAM_DECL)947 PROTOBUF_NOINLINE const char* TcParser::FastZ32R2(PROTOBUF_TC_PARAM_DECL) {
948   PROTOBUF_MUSTTAIL return RepeatedVarint<int32_t, uint16_t, true>(
949       PROTOBUF_TC_PARAM_PASS);
950 }
FastZ64R1(PROTOBUF_TC_PARAM_DECL)951 PROTOBUF_NOINLINE const char* TcParser::FastZ64R1(PROTOBUF_TC_PARAM_DECL) {
952   PROTOBUF_MUSTTAIL return RepeatedVarint<int64_t, uint8_t, true>(
953       PROTOBUF_TC_PARAM_PASS);
954 }
FastZ64R2(PROTOBUF_TC_PARAM_DECL)955 PROTOBUF_NOINLINE const char* TcParser::FastZ64R2(PROTOBUF_TC_PARAM_DECL) {
956   PROTOBUF_MUSTTAIL return RepeatedVarint<int64_t, uint16_t, true>(
957       PROTOBUF_TC_PARAM_PASS);
958 }
959 
960 template <typename FieldType, typename TagType, bool zigzag>
PackedVarint(PROTOBUF_TC_PARAM_DECL)961 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedVarint(
962     PROTOBUF_TC_PARAM_DECL) {
963   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
964     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
965   }
966   ptr += sizeof(TagType);
967   // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
968   // pending hasbits now:
969   SyncHasbits(msg, hasbits, table);
970   auto* field = &RefAt<RepeatedField<FieldType>>(msg, data.offset());
971   return ctx->ReadPackedVarint(ptr, [field](uint64_t varint) {
972     FieldType val;
973     if (zigzag) {
974       if (sizeof(FieldType) == 8) {
975         val = WireFormatLite::ZigZagDecode64(varint);
976       } else {
977         val = WireFormatLite::ZigZagDecode32(varint);
978       }
979     } else {
980       val = varint;
981     }
982     field->Add(val);
983   });
984 }
985 
FastV8P1(PROTOBUF_TC_PARAM_DECL)986 PROTOBUF_NOINLINE const char* TcParser::FastV8P1(PROTOBUF_TC_PARAM_DECL) {
987   PROTOBUF_MUSTTAIL return PackedVarint<bool, uint8_t>(PROTOBUF_TC_PARAM_PASS);
988 }
FastV8P2(PROTOBUF_TC_PARAM_DECL)989 PROTOBUF_NOINLINE const char* TcParser::FastV8P2(PROTOBUF_TC_PARAM_DECL) {
990   PROTOBUF_MUSTTAIL return PackedVarint<bool, uint16_t>(PROTOBUF_TC_PARAM_PASS);
991 }
FastV32P1(PROTOBUF_TC_PARAM_DECL)992 PROTOBUF_NOINLINE const char* TcParser::FastV32P1(PROTOBUF_TC_PARAM_DECL) {
993   PROTOBUF_MUSTTAIL return PackedVarint<uint32_t, uint8_t>(
994       PROTOBUF_TC_PARAM_PASS);
995 }
FastV32P2(PROTOBUF_TC_PARAM_DECL)996 PROTOBUF_NOINLINE const char* TcParser::FastV32P2(PROTOBUF_TC_PARAM_DECL) {
997   PROTOBUF_MUSTTAIL return PackedVarint<uint32_t, uint16_t>(
998       PROTOBUF_TC_PARAM_PASS);
999 }
FastV64P1(PROTOBUF_TC_PARAM_DECL)1000 PROTOBUF_NOINLINE const char* TcParser::FastV64P1(PROTOBUF_TC_PARAM_DECL) {
1001   PROTOBUF_MUSTTAIL return PackedVarint<uint64_t, uint8_t>(
1002       PROTOBUF_TC_PARAM_PASS);
1003 }
FastV64P2(PROTOBUF_TC_PARAM_DECL)1004 PROTOBUF_NOINLINE const char* TcParser::FastV64P2(PROTOBUF_TC_PARAM_DECL) {
1005   PROTOBUF_MUSTTAIL return PackedVarint<uint64_t, uint16_t>(
1006       PROTOBUF_TC_PARAM_PASS);
1007 }
1008 
FastZ32P1(PROTOBUF_TC_PARAM_DECL)1009 PROTOBUF_NOINLINE const char* TcParser::FastZ32P1(PROTOBUF_TC_PARAM_DECL) {
1010   PROTOBUF_MUSTTAIL return PackedVarint<int32_t, uint8_t, true>(
1011       PROTOBUF_TC_PARAM_PASS);
1012 }
FastZ32P2(PROTOBUF_TC_PARAM_DECL)1013 PROTOBUF_NOINLINE const char* TcParser::FastZ32P2(PROTOBUF_TC_PARAM_DECL) {
1014   PROTOBUF_MUSTTAIL return PackedVarint<int32_t, uint16_t, true>(
1015       PROTOBUF_TC_PARAM_PASS);
1016 }
FastZ64P1(PROTOBUF_TC_PARAM_DECL)1017 PROTOBUF_NOINLINE const char* TcParser::FastZ64P1(PROTOBUF_TC_PARAM_DECL) {
1018   PROTOBUF_MUSTTAIL return PackedVarint<int64_t, uint8_t, true>(
1019       PROTOBUF_TC_PARAM_PASS);
1020 }
FastZ64P2(PROTOBUF_TC_PARAM_DECL)1021 PROTOBUF_NOINLINE const char* TcParser::FastZ64P2(PROTOBUF_TC_PARAM_DECL) {
1022   PROTOBUF_MUSTTAIL return PackedVarint<int64_t, uint16_t, true>(
1023       PROTOBUF_TC_PARAM_PASS);
1024 }
1025 
1026 //////////////////////////////////////////////////////////////////////////////
1027 // Enum fields
1028 //////////////////////////////////////////////////////////////////////////////
1029 
FastUnknownEnumFallback(PROTOBUF_TC_PARAM_DECL)1030 PROTOBUF_NOINLINE const char* TcParser::FastUnknownEnumFallback(
1031     PROTOBUF_TC_PARAM_DECL) {
1032   // Skip MiniParse/fallback and insert the element directly into the unknown
1033   // field set. We also normalize the value into an int32 as we do for known
1034   // enum values.
1035   uint32_t tag;
1036   ptr = ReadTag(ptr, &tag);
1037   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1038     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1039   }
1040   uint64_t tmp;
1041   ptr = ParseVarint(ptr, &tmp);
1042   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1043     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1044   }
1045   AddUnknownEnum(msg, table, tag, static_cast<int32_t>(tmp));
1046   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1047 }
1048 
MpUnknownEnumFallback(PROTOBUF_TC_PARAM_DECL)1049 PROTOBUF_NOINLINE const char* TcParser::MpUnknownEnumFallback(
1050     PROTOBUF_TC_PARAM_DECL) {
1051   // Like FastUnknownEnumFallback, but with the Mp ABI.
1052   uint32_t tag = data.tag();
1053   uint64_t tmp;
1054   ptr = ParseVarint(ptr, &tmp);
1055   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1056     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1057   }
1058   AddUnknownEnum(msg, table, tag, static_cast<int32_t>(tmp));
1059   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1060 }
1061 
1062 template <typename TagType, uint16_t xform_val>
SingularEnum(PROTOBUF_TC_PARAM_DECL)1063 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnum(
1064     PROTOBUF_TC_PARAM_DECL) {
1065   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1066     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1067   }
1068   const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
1069   PrefetchEnumData(xform_val, aux);
1070   const char* ptr2 = ptr;  // Save for unknown enum case
1071   ptr += sizeof(TagType);  // Consume tag
1072   uint64_t tmp;
1073   ptr = ParseVarint(ptr, &tmp);
1074   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1075     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1076   }
1077   if (PROTOBUF_PREDICT_FALSE(
1078           !EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux))) {
1079     ptr = ptr2;
1080     PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1081   }
1082   hasbits |= (uint64_t{1} << data.hasbit_idx());
1083   RefAt<int32_t>(msg, data.offset()) = tmp;
1084   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1085 }
1086 
FastErS1(PROTOBUF_TC_PARAM_DECL)1087 PROTOBUF_NOINLINE const char* TcParser::FastErS1(PROTOBUF_TC_PARAM_DECL) {
1088   PROTOBUF_MUSTTAIL return SingularEnum<uint8_t, field_layout::kTvRange>(
1089       PROTOBUF_TC_PARAM_PASS);
1090 }
FastErS2(PROTOBUF_TC_PARAM_DECL)1091 PROTOBUF_NOINLINE const char* TcParser::FastErS2(PROTOBUF_TC_PARAM_DECL) {
1092   PROTOBUF_MUSTTAIL return SingularEnum<uint16_t, field_layout::kTvRange>(
1093       PROTOBUF_TC_PARAM_PASS);
1094 }
FastEvS1(PROTOBUF_TC_PARAM_DECL)1095 PROTOBUF_NOINLINE const char* TcParser::FastEvS1(PROTOBUF_TC_PARAM_DECL) {
1096   PROTOBUF_MUSTTAIL return SingularEnum<uint8_t, field_layout::kTvEnum>(
1097       PROTOBUF_TC_PARAM_PASS);
1098 }
FastEvS2(PROTOBUF_TC_PARAM_DECL)1099 PROTOBUF_NOINLINE const char* TcParser::FastEvS2(PROTOBUF_TC_PARAM_DECL) {
1100   PROTOBUF_MUSTTAIL return SingularEnum<uint16_t, field_layout::kTvEnum>(
1101       PROTOBUF_TC_PARAM_PASS);
1102 }
1103 
1104 template <typename TagType, uint16_t xform_val>
RepeatedEnum(PROTOBUF_TC_PARAM_DECL)1105 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedEnum(
1106     PROTOBUF_TC_PARAM_DECL) {
1107   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1108     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1109   }
1110   auto& field = RefAt<RepeatedField<int32_t>>(msg, data.offset());
1111   const auto expected_tag = UnalignedLoad<TagType>(ptr);
1112   const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
1113   PrefetchEnumData(xform_val, aux);
1114   do {
1115     const char* ptr2 = ptr;  // save for unknown enum case
1116     ptr += sizeof(TagType);
1117     uint64_t tmp;
1118     ptr = ParseVarint(ptr, &tmp);
1119     if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1120       PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1121     }
1122     if (PROTOBUF_PREDICT_FALSE(
1123             !EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux))) {
1124       // We can avoid duplicate work in MiniParse by directly calling
1125       // table->fallback.
1126       ptr = ptr2;
1127       PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1128     }
1129     field.Add(static_cast<int32_t>(tmp));
1130     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
1131       PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1132     }
1133   } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1134 
1135   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1136 }
1137 
GetUnknownFieldOps(const TcParseTableBase * table)1138 const TcParser::UnknownFieldOps& TcParser::GetUnknownFieldOps(
1139     const TcParseTableBase* table) {
1140   // Call the fallback function in a special mode to only act as a
1141   // way to return the ops.
1142   // Hiding the unknown fields vtable behind the fallback function avoids adding
1143   // more pointers in TcParseTableBase, and the extra runtime jumps are not
1144   // relevant because unknown fields are rare.
1145   const char* ptr = table->fallback(nullptr, nullptr, nullptr, {}, nullptr, 0);
1146   return *reinterpret_cast<const UnknownFieldOps*>(ptr);
1147 }
1148 
AddUnknownEnum(MessageLite * msg,const TcParseTableBase * table,uint32_t tag,int32_t enum_value)1149 PROTOBUF_NOINLINE void TcParser::AddUnknownEnum(MessageLite* msg,
1150                                                 const TcParseTableBase* table,
1151                                                 uint32_t tag,
1152                                                 int32_t enum_value) {
1153   GetUnknownFieldOps(table).write_varint(msg, tag >> 3, enum_value);
1154 }
1155 
1156 template <typename TagType, uint16_t xform_val>
PackedEnum(PROTOBUF_TC_PARAM_DECL)1157 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedEnum(
1158     PROTOBUF_TC_PARAM_DECL) {
1159   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1160     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1161   }
1162   const auto saved_tag = UnalignedLoad<TagType>(ptr);
1163   ptr += sizeof(TagType);
1164   // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
1165   // pending hasbits now:
1166   SyncHasbits(msg, hasbits, table);
1167   auto* field = &RefAt<RepeatedField<int32_t>>(msg, data.offset());
1168   const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
1169   PrefetchEnumData(xform_val, aux);
1170   return ctx->ReadPackedVarint(ptr, [=](int32_t value) {
1171     if (!EnumIsValidAux(value, xform_val, aux)) {
1172       AddUnknownEnum(msg, table, FastDecodeTag(saved_tag), value);
1173     } else {
1174       field->Add(value);
1175     }
1176   });
1177 }
1178 
FastErR1(PROTOBUF_TC_PARAM_DECL)1179 PROTOBUF_NOINLINE const char* TcParser::FastErR1(PROTOBUF_TC_PARAM_DECL) {
1180   PROTOBUF_MUSTTAIL return RepeatedEnum<uint8_t, field_layout::kTvRange>(
1181       PROTOBUF_TC_PARAM_PASS);
1182 }
FastErR2(PROTOBUF_TC_PARAM_DECL)1183 PROTOBUF_NOINLINE const char* TcParser::FastErR2(PROTOBUF_TC_PARAM_DECL) {
1184   PROTOBUF_MUSTTAIL return RepeatedEnum<uint16_t, field_layout::kTvRange>(
1185       PROTOBUF_TC_PARAM_PASS);
1186 }
FastEvR1(PROTOBUF_TC_PARAM_DECL)1187 PROTOBUF_NOINLINE const char* TcParser::FastEvR1(PROTOBUF_TC_PARAM_DECL) {
1188   PROTOBUF_MUSTTAIL return RepeatedEnum<uint8_t, field_layout::kTvEnum>(
1189       PROTOBUF_TC_PARAM_PASS);
1190 }
FastEvR2(PROTOBUF_TC_PARAM_DECL)1191 PROTOBUF_NOINLINE const char* TcParser::FastEvR2(PROTOBUF_TC_PARAM_DECL) {
1192   PROTOBUF_MUSTTAIL return RepeatedEnum<uint16_t, field_layout::kTvEnum>(
1193       PROTOBUF_TC_PARAM_PASS);
1194 }
1195 
FastErP1(PROTOBUF_TC_PARAM_DECL)1196 PROTOBUF_NOINLINE const char* TcParser::FastErP1(PROTOBUF_TC_PARAM_DECL) {
1197   PROTOBUF_MUSTTAIL return PackedEnum<uint8_t, field_layout::kTvRange>(
1198       PROTOBUF_TC_PARAM_PASS);
1199 }
FastErP2(PROTOBUF_TC_PARAM_DECL)1200 PROTOBUF_NOINLINE const char* TcParser::FastErP2(PROTOBUF_TC_PARAM_DECL) {
1201   PROTOBUF_MUSTTAIL return PackedEnum<uint16_t, field_layout::kTvRange>(
1202       PROTOBUF_TC_PARAM_PASS);
1203 }
FastEvP1(PROTOBUF_TC_PARAM_DECL)1204 PROTOBUF_NOINLINE const char* TcParser::FastEvP1(PROTOBUF_TC_PARAM_DECL) {
1205   PROTOBUF_MUSTTAIL return PackedEnum<uint8_t, field_layout::kTvEnum>(
1206       PROTOBUF_TC_PARAM_PASS);
1207 }
FastEvP2(PROTOBUF_TC_PARAM_DECL)1208 PROTOBUF_NOINLINE const char* TcParser::FastEvP2(PROTOBUF_TC_PARAM_DECL) {
1209   PROTOBUF_MUSTTAIL return PackedEnum<uint16_t, field_layout::kTvEnum>(
1210       PROTOBUF_TC_PARAM_PASS);
1211 }
1212 
1213 template <typename TagType, uint8_t min>
SingularEnumSmallRange(PROTOBUF_TC_PARAM_DECL)1214 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnumSmallRange(
1215     PROTOBUF_TC_PARAM_DECL) {
1216   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1217     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1218   }
1219 
1220   uint8_t v = ptr[sizeof(TagType)];
1221   if (PROTOBUF_PREDICT_FALSE(min > v || v > data.aux_idx())) {
1222     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1223   }
1224 
1225   RefAt<int32_t>(msg, data.offset()) = v;
1226   ptr += sizeof(TagType) + 1;
1227   hasbits |= (uint64_t{1} << data.hasbit_idx());
1228   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1229 }
1230 
FastEr0S1(PROTOBUF_TC_PARAM_DECL)1231 PROTOBUF_NOINLINE const char* TcParser::FastEr0S1(PROTOBUF_TC_PARAM_DECL) {
1232   PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint8_t, 0>(
1233       PROTOBUF_TC_PARAM_PASS);
1234 }
1235 
FastEr0S2(PROTOBUF_TC_PARAM_DECL)1236 PROTOBUF_NOINLINE const char* TcParser::FastEr0S2(PROTOBUF_TC_PARAM_DECL) {
1237   PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint16_t, 0>(
1238       PROTOBUF_TC_PARAM_PASS);
1239 }
1240 
FastEr1S1(PROTOBUF_TC_PARAM_DECL)1241 PROTOBUF_NOINLINE const char* TcParser::FastEr1S1(PROTOBUF_TC_PARAM_DECL) {
1242   PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint8_t, 1>(
1243       PROTOBUF_TC_PARAM_PASS);
1244 }
1245 
FastEr1S2(PROTOBUF_TC_PARAM_DECL)1246 PROTOBUF_NOINLINE const char* TcParser::FastEr1S2(PROTOBUF_TC_PARAM_DECL) {
1247   PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint16_t, 1>(
1248       PROTOBUF_TC_PARAM_PASS);
1249 }
1250 
1251 template <typename TagType, uint8_t min>
RepeatedEnumSmallRange(PROTOBUF_TC_PARAM_DECL)1252 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedEnumSmallRange(
1253     PROTOBUF_TC_PARAM_DECL) {
1254   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1255     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1256   }
1257   auto& field = RefAt<RepeatedField<int32_t>>(msg, data.offset());
1258   auto expected_tag = UnalignedLoad<TagType>(ptr);
1259   const uint8_t max = data.aux_idx();
1260   do {
1261     uint8_t v = ptr[sizeof(TagType)];
1262     if (PROTOBUF_PREDICT_FALSE(min > v || v > max)) {
1263       PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1264     }
1265     field.Add(static_cast<int32_t>(v));
1266     ptr += sizeof(TagType) + 1;
1267     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
1268       PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1269     }
1270   } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1271 
1272   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1273 }
1274 
FastEr0R1(PROTOBUF_TC_PARAM_DECL)1275 PROTOBUF_NOINLINE const char* TcParser::FastEr0R1(PROTOBUF_TC_PARAM_DECL) {
1276   PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint8_t, 0>(
1277       PROTOBUF_TC_PARAM_PASS);
1278 }
FastEr0R2(PROTOBUF_TC_PARAM_DECL)1279 PROTOBUF_NOINLINE const char* TcParser::FastEr0R2(PROTOBUF_TC_PARAM_DECL) {
1280   PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint16_t, 0>(
1281       PROTOBUF_TC_PARAM_PASS);
1282 }
1283 
FastEr1R1(PROTOBUF_TC_PARAM_DECL)1284 PROTOBUF_NOINLINE const char* TcParser::FastEr1R1(PROTOBUF_TC_PARAM_DECL) {
1285   PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint8_t, 1>(
1286       PROTOBUF_TC_PARAM_PASS);
1287 }
FastEr1R2(PROTOBUF_TC_PARAM_DECL)1288 PROTOBUF_NOINLINE const char* TcParser::FastEr1R2(PROTOBUF_TC_PARAM_DECL) {
1289   PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint16_t, 1>(
1290       PROTOBUF_TC_PARAM_PASS);
1291 }
1292 
1293 template <typename TagType, uint8_t min>
PackedEnumSmallRange(PROTOBUF_TC_PARAM_DECL)1294 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedEnumSmallRange(
1295     PROTOBUF_TC_PARAM_DECL) {
1296   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1297     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1298   }
1299 
1300   // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
1301   // pending hasbits now:
1302   SyncHasbits(msg, hasbits, table);
1303 
1304   const auto saved_tag = UnalignedLoad<TagType>(ptr);
1305   ptr += sizeof(TagType);
1306   auto* field = &RefAt<RepeatedField<int32_t>>(msg, data.offset());
1307   const uint8_t max = data.aux_idx();
1308 
1309   return ctx->ReadPackedVarint(
1310       ptr,
1311       [=](int32_t v) {
1312         if (PROTOBUF_PREDICT_FALSE(min > v || v > max)) {
1313           AddUnknownEnum(msg, table, FastDecodeTag(saved_tag), v);
1314         } else {
1315           field->Add(v);
1316         }
1317       },
1318       /*size_callback=*/
1319       [=](int32_t size_bytes) {
1320         // For enums that fit in one varint byte, optimistically assume that all
1321         // the values are one byte long (i.e. no large unknown values).  If so,
1322         // we know exactly how many values we're going to get.
1323         //
1324         // But! size_bytes might be much larger than the total size of the
1325         // serialized proto (e.g. input corruption, or parsing msg1 as msg2).
1326         // We don't want a small serialized proto to lead to giant memory
1327         // allocations.
1328         //
1329         // Ideally we'd restrict size_bytes to the total size of the input, but
1330         // we don't know that value.  The best we can do is to restrict it to
1331         // the remaining bytes in the chunk, plus a "benefit of the doubt"
1332         // factor if we're very close to the end of the chunk.
1333         //
1334         // Do these calculations in int64 because it's possible we overflow
1335         // int32 (imgaine that field->size() and size_bytes are both large).
1336         int64_t new_size =
1337             int64_t{field->size()} +
1338             std::min(size_bytes, std::max(1024, ctx->MaximumReadSize(ptr)));
1339         field->Reserve(static_cast<int32_t>(
1340             std::min(new_size, int64_t{std::numeric_limits<int32_t>::max()})));
1341       });
1342 }
1343 
FastEr0P1(PROTOBUF_TC_PARAM_DECL)1344 PROTOBUF_NOINLINE const char* TcParser::FastEr0P1(PROTOBUF_TC_PARAM_DECL) {
1345   PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint8_t, 0>(
1346       PROTOBUF_TC_PARAM_PASS);
1347 }
FastEr0P2(PROTOBUF_TC_PARAM_DECL)1348 PROTOBUF_NOINLINE const char* TcParser::FastEr0P2(PROTOBUF_TC_PARAM_DECL) {
1349   PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint16_t, 0>(
1350       PROTOBUF_TC_PARAM_PASS);
1351 }
1352 
FastEr1P1(PROTOBUF_TC_PARAM_DECL)1353 PROTOBUF_NOINLINE const char* TcParser::FastEr1P1(PROTOBUF_TC_PARAM_DECL) {
1354   PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint8_t, 1>(
1355       PROTOBUF_TC_PARAM_PASS);
1356 }
FastEr1P2(PROTOBUF_TC_PARAM_DECL)1357 PROTOBUF_NOINLINE const char* TcParser::FastEr1P2(PROTOBUF_TC_PARAM_DECL) {
1358   PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint16_t, 1>(
1359       PROTOBUF_TC_PARAM_PASS);
1360 }
1361 
1362 //////////////////////////////////////////////////////////////////////////////
1363 // String/bytes fields
1364 //////////////////////////////////////////////////////////////////////////////
1365 
1366 // Defined in wire_format_lite.cc
1367 void PrintUTF8ErrorLog(absl::string_view message_name,
1368                        absl::string_view field_name, const char* operation_str,
1369                        bool emit_stacktrace);
1370 
ReportFastUtf8Error(uint32_t decoded_tag,const TcParseTableBase * table)1371 void TcParser::ReportFastUtf8Error(uint32_t decoded_tag,
1372                                    const TcParseTableBase* table) {
1373   uint32_t field_num = decoded_tag >> 3;
1374   const auto* entry = FindFieldEntry(table, field_num);
1375   PrintUTF8ErrorLog(MessageName(table), FieldName(table, entry), "parsing",
1376                     false);
1377 }
1378 
1379 namespace {
1380 
1381 // Here are overloads of ReadStringIntoArena, ReadStringNoArena and IsValidUTF8
1382 // for every string class for which we provide fast-table parser support.
1383 
ReadStringIntoArena(MessageLite *,const char * ptr,ParseContext * ctx,uint32_t,const TcParseTableBase *,ArenaStringPtr & field,Arena * arena)1384 PROTOBUF_ALWAYS_INLINE inline const char* ReadStringIntoArena(
1385     MessageLite* /*msg*/, const char* ptr, ParseContext* ctx,
1386     uint32_t /*aux_idx*/, const TcParseTableBase* /*table*/,
1387     ArenaStringPtr& field, Arena* arena) {
1388   return ctx->ReadArenaString(ptr, &field, arena);
1389 }
1390 
1391 PROTOBUF_NOINLINE
ReadStringNoArena(MessageLite *,const char * ptr,ParseContext * ctx,uint32_t,const TcParseTableBase *,ArenaStringPtr & field)1392 const char* ReadStringNoArena(MessageLite* /*msg*/, const char* ptr,
1393                               ParseContext* ctx, uint32_t /*aux_idx*/,
1394                               const TcParseTableBase* /*table*/,
1395                               ArenaStringPtr& field) {
1396   int size = ReadSize(&ptr);
1397   if (!ptr) return nullptr;
1398   return ctx->ReadString(ptr, size, field.MutableNoCopy(nullptr));
1399 }
1400 
IsValidUTF8(ArenaStringPtr & field)1401 PROTOBUF_ALWAYS_INLINE inline bool IsValidUTF8(ArenaStringPtr& field) {
1402   return utf8_range::IsStructurallyValid(field.Get());
1403 }
1404 
1405 
1406 }  // namespace
1407 
1408 template <typename TagType, typename FieldType, TcParser::Utf8Type utf8>
SingularString(PROTOBUF_TC_PARAM_DECL)1409 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularString(
1410     PROTOBUF_TC_PARAM_DECL) {
1411   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1412     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1413   }
1414   auto saved_tag = UnalignedLoad<TagType>(ptr);
1415   ptr += sizeof(TagType);
1416   hasbits |= (uint64_t{1} << data.hasbit_idx());
1417   auto& field = RefAt<FieldType>(msg, data.offset());
1418   auto arena = msg->GetArena();
1419   if (arena) {
1420     ptr =
1421         ReadStringIntoArena(msg, ptr, ctx, data.aux_idx(), table, field, arena);
1422   } else {
1423     ptr = ReadStringNoArena(msg, ptr, ctx, data.aux_idx(), table, field);
1424   }
1425   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1426     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1427   }
1428   switch (utf8) {
1429     case kNoUtf8:
1430 #ifdef NDEBUG
1431     case kUtf8ValidateOnly:
1432 #endif
1433       break;
1434     default:
1435       if (PROTOBUF_PREDICT_TRUE(IsValidUTF8(field))) {
1436         break;
1437       }
1438       ReportFastUtf8Error(FastDecodeTag(saved_tag), table);
1439       if (utf8 == kUtf8) {
1440         PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1441       }
1442       break;
1443   }
1444 
1445   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1446 }
1447 
FastBS1(PROTOBUF_TC_PARAM_DECL)1448 PROTOBUF_NOINLINE const char* TcParser::FastBS1(PROTOBUF_TC_PARAM_DECL) {
1449   PROTOBUF_MUSTTAIL return SingularString<uint8_t, ArenaStringPtr, kNoUtf8>(
1450       PROTOBUF_TC_PARAM_PASS);
1451 }
FastBS2(PROTOBUF_TC_PARAM_DECL)1452 PROTOBUF_NOINLINE const char* TcParser::FastBS2(PROTOBUF_TC_PARAM_DECL) {
1453   PROTOBUF_MUSTTAIL return SingularString<uint16_t, ArenaStringPtr, kNoUtf8>(
1454       PROTOBUF_TC_PARAM_PASS);
1455 }
FastSS1(PROTOBUF_TC_PARAM_DECL)1456 PROTOBUF_NOINLINE const char* TcParser::FastSS1(PROTOBUF_TC_PARAM_DECL) {
1457   PROTOBUF_MUSTTAIL return SingularString<uint8_t, ArenaStringPtr,
1458                                           kUtf8ValidateOnly>(
1459       PROTOBUF_TC_PARAM_PASS);
1460 }
FastSS2(PROTOBUF_TC_PARAM_DECL)1461 PROTOBUF_NOINLINE const char* TcParser::FastSS2(PROTOBUF_TC_PARAM_DECL) {
1462   PROTOBUF_MUSTTAIL return SingularString<uint16_t, ArenaStringPtr,
1463                                           kUtf8ValidateOnly>(
1464       PROTOBUF_TC_PARAM_PASS);
1465 }
FastUS1(PROTOBUF_TC_PARAM_DECL)1466 PROTOBUF_NOINLINE const char* TcParser::FastUS1(PROTOBUF_TC_PARAM_DECL) {
1467   PROTOBUF_MUSTTAIL return SingularString<uint8_t, ArenaStringPtr, kUtf8>(
1468       PROTOBUF_TC_PARAM_PASS);
1469 }
FastUS2(PROTOBUF_TC_PARAM_DECL)1470 PROTOBUF_NOINLINE const char* TcParser::FastUS2(PROTOBUF_TC_PARAM_DECL) {
1471   PROTOBUF_MUSTTAIL return SingularString<uint16_t, ArenaStringPtr, kUtf8>(
1472       PROTOBUF_TC_PARAM_PASS);
1473 }
1474 
1475 // Inlined string variants:
1476 
FastBiS1(PROTOBUF_TC_PARAM_DECL)1477 const char* TcParser::FastBiS1(PROTOBUF_TC_PARAM_DECL) {
1478   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1479 }
FastBiS2(PROTOBUF_TC_PARAM_DECL)1480 const char* TcParser::FastBiS2(PROTOBUF_TC_PARAM_DECL) {
1481   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1482 }
FastSiS1(PROTOBUF_TC_PARAM_DECL)1483 const char* TcParser::FastSiS1(PROTOBUF_TC_PARAM_DECL) {
1484   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1485 }
FastSiS2(PROTOBUF_TC_PARAM_DECL)1486 const char* TcParser::FastSiS2(PROTOBUF_TC_PARAM_DECL) {
1487   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1488 }
FastUiS1(PROTOBUF_TC_PARAM_DECL)1489 const char* TcParser::FastUiS1(PROTOBUF_TC_PARAM_DECL) {
1490   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1491 }
FastUiS2(PROTOBUF_TC_PARAM_DECL)1492 const char* TcParser::FastUiS2(PROTOBUF_TC_PARAM_DECL) {
1493   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1494 }
1495 
1496 // Corded string variants:
FastBcS1(PROTOBUF_TC_PARAM_DECL)1497 const char* TcParser::FastBcS1(PROTOBUF_TC_PARAM_DECL) {
1498   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1499 }
FastBcS2(PROTOBUF_TC_PARAM_DECL)1500 const char* TcParser::FastBcS2(PROTOBUF_TC_PARAM_DECL) {
1501   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1502 }
FastScS1(PROTOBUF_TC_PARAM_DECL)1503 const char* TcParser::FastScS1(PROTOBUF_TC_PARAM_DECL) {
1504   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1505 }
FastScS2(PROTOBUF_TC_PARAM_DECL)1506 const char* TcParser::FastScS2(PROTOBUF_TC_PARAM_DECL) {
1507   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1508 }
FastUcS1(PROTOBUF_TC_PARAM_DECL)1509 const char* TcParser::FastUcS1(PROTOBUF_TC_PARAM_DECL) {
1510   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1511 }
FastUcS2(PROTOBUF_TC_PARAM_DECL)1512 const char* TcParser::FastUcS2(PROTOBUF_TC_PARAM_DECL) {
1513   PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1514 }
1515 
1516 template <typename TagType, typename FieldType, TcParser::Utf8Type utf8>
RepeatedString(PROTOBUF_TC_PARAM_DECL)1517 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedString(
1518     PROTOBUF_TC_PARAM_DECL) {
1519   if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1520     PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1521   }
1522   const auto expected_tag = UnalignedLoad<TagType>(ptr);
1523   auto& field = RefAt<FieldType>(msg, data.offset());
1524 
1525   const auto validate_last_string = [expected_tag, table, &field] {
1526     switch (utf8) {
1527       case kNoUtf8:
1528 #ifdef NDEBUG
1529       case kUtf8ValidateOnly:
1530 #endif
1531         return true;
1532       default:
1533         if (PROTOBUF_PREDICT_TRUE(
1534                 utf8_range::IsStructurallyValid(field[field.size() - 1]))) {
1535           return true;
1536         }
1537         ReportFastUtf8Error(FastDecodeTag(expected_tag), table);
1538         if (utf8 == kUtf8) return false;
1539         return true;
1540     }
1541   };
1542 
1543   auto* arena = field.GetArena();
1544   SerialArena* serial_arena;
1545   if (PROTOBUF_PREDICT_TRUE(arena != nullptr &&
1546                             arena->impl_.GetSerialArenaFast(&serial_arena) &&
1547                             field.PrepareForParse())) {
1548     do {
1549       ptr += sizeof(TagType);
1550       ptr = ParseRepeatedStringOnce(ptr, serial_arena, ctx, field);
1551 
1552       if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !validate_last_string())) {
1553         PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1554       }
1555       if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1556     } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1557   } else {
1558     do {
1559       ptr += sizeof(TagType);
1560       std::string* str = field.Add();
1561       ptr = InlineGreedyStringParser(str, ptr, ctx);
1562       if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !validate_last_string())) {
1563         PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1564       }
1565       if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1566     } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1567   }
1568   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1569 parse_loop:
1570   PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1571 }
1572 
FastBR1(PROTOBUF_TC_PARAM_DECL)1573 PROTOBUF_NOINLINE const char* TcParser::FastBR1(PROTOBUF_TC_PARAM_DECL) {
1574   PROTOBUF_MUSTTAIL return RepeatedString<
1575       uint8_t, RepeatedPtrField<std::string>, kNoUtf8>(PROTOBUF_TC_PARAM_PASS);
1576 }
FastBR2(PROTOBUF_TC_PARAM_DECL)1577 PROTOBUF_NOINLINE const char* TcParser::FastBR2(PROTOBUF_TC_PARAM_DECL) {
1578   PROTOBUF_MUSTTAIL return RepeatedString<
1579       uint16_t, RepeatedPtrField<std::string>, kNoUtf8>(PROTOBUF_TC_PARAM_PASS);
1580 }
FastSR1(PROTOBUF_TC_PARAM_DECL)1581 PROTOBUF_NOINLINE const char* TcParser::FastSR1(PROTOBUF_TC_PARAM_DECL) {
1582   PROTOBUF_MUSTTAIL return RepeatedString<
1583       uint8_t, RepeatedPtrField<std::string>, kUtf8ValidateOnly>(
1584       PROTOBUF_TC_PARAM_PASS);
1585 }
FastSR2(PROTOBUF_TC_PARAM_DECL)1586 PROTOBUF_NOINLINE const char* TcParser::FastSR2(PROTOBUF_TC_PARAM_DECL) {
1587   PROTOBUF_MUSTTAIL return RepeatedString<
1588       uint16_t, RepeatedPtrField<std::string>, kUtf8ValidateOnly>(
1589       PROTOBUF_TC_PARAM_PASS);
1590 }
FastUR1(PROTOBUF_TC_PARAM_DECL)1591 PROTOBUF_NOINLINE const char* TcParser::FastUR1(PROTOBUF_TC_PARAM_DECL) {
1592   PROTOBUF_MUSTTAIL return RepeatedString<uint8_t,
1593                                           RepeatedPtrField<std::string>, kUtf8>(
1594       PROTOBUF_TC_PARAM_PASS);
1595 }
FastUR2(PROTOBUF_TC_PARAM_DECL)1596 PROTOBUF_NOINLINE const char* TcParser::FastUR2(PROTOBUF_TC_PARAM_DECL) {
1597   PROTOBUF_MUSTTAIL return RepeatedString<uint16_t,
1598                                           RepeatedPtrField<std::string>, kUtf8>(
1599       PROTOBUF_TC_PARAM_PASS);
1600 }
1601 
1602 //////////////////////////////////////////////////////////////////////////////
1603 // Mini parsing
1604 //////////////////////////////////////////////////////////////////////////////
1605 
1606 namespace {
SetHas(const FieldEntry & entry,MessageLite * msg)1607 inline void SetHas(const FieldEntry& entry, MessageLite* msg) {
1608   auto has_idx = static_cast<uint32_t>(entry.has_idx);
1609 #if defined(__x86_64__) && defined(__GNUC__)
1610   asm("bts %1, %0\n" : "+m"(*reinterpret_cast<char*>(msg)) : "r"(has_idx));
1611 #else
1612   auto& hasblock = TcParser::RefAt<uint32_t>(msg, has_idx / 32 * 4);
1613   hasblock |= uint32_t{1} << (has_idx % 32);
1614 #endif
1615 }
1616 }  // namespace
1617 
1618 // Destroys any existing oneof union member (if necessary). Returns true if the
1619 // caller is responsible for initializing the object, or false if the field
1620 // already has the desired case.
ChangeOneof(const TcParseTableBase * table,const TcParseTableBase::FieldEntry & entry,uint32_t field_num,ParseContext * ctx,MessageLite * msg)1621 bool TcParser::ChangeOneof(const TcParseTableBase* table,
1622                            const TcParseTableBase::FieldEntry& entry,
1623                            uint32_t field_num, ParseContext* ctx,
1624                            MessageLite* msg) {
1625   // The _oneof_case_ value offset is stored in the has-bit index.
1626   uint32_t* oneof_case = &TcParser::RefAt<uint32_t>(msg, entry.has_idx);
1627   uint32_t current_case = *oneof_case;
1628   *oneof_case = field_num;
1629 
1630   if (current_case == 0) {
1631     // If the member is empty, we don't have anything to clear. Caller is
1632     // responsible for creating a new member object.
1633     return true;
1634   }
1635   if (current_case == field_num) {
1636     // If the member is already active, then it should be merged. We're done.
1637     return false;
1638   }
1639   // Look up the value that is already stored, and dispose of it if necessary.
1640   const FieldEntry* current_entry = FindFieldEntry(table, current_case);
1641   uint16_t current_kind = current_entry->type_card & field_layout::kFkMask;
1642   uint16_t current_rep = current_entry->type_card & field_layout::kRepMask;
1643   if (current_kind == field_layout::kFkString) {
1644     switch (current_rep) {
1645       case field_layout::kRepAString: {
1646         auto& field = RefAt<ArenaStringPtr>(msg, current_entry->offset);
1647         field.Destroy();
1648         break;
1649       }
1650       case field_layout::kRepCord: {
1651         if (msg->GetArena() == nullptr) {
1652           delete RefAt<absl::Cord*>(msg, current_entry->offset);
1653         }
1654         break;
1655       }
1656       case field_layout::kRepSString:
1657       case field_layout::kRepIString:
1658       default:
1659         ABSL_DLOG(FATAL) << "string rep not handled: "
1660                          << (current_rep >> field_layout::kRepShift);
1661         return true;
1662     }
1663   } else if (current_kind == field_layout::kFkMessage) {
1664     switch (current_rep) {
1665       case field_layout::kRepMessage:
1666       case field_layout::kRepGroup: {
1667         auto& field = RefAt<MessageLite*>(msg, current_entry->offset);
1668         if (!msg->GetArena()) {
1669           delete field;
1670         }
1671         break;
1672       }
1673       default:
1674         ABSL_DLOG(FATAL) << "message rep not handled: "
1675                          << (current_rep >> field_layout::kRepShift);
1676         break;
1677     }
1678   }
1679   return true;
1680 }
1681 
1682 namespace {
GetSplitOffset(const TcParseTableBase * table)1683 uint32_t GetSplitOffset(const TcParseTableBase* table) {
1684   return table->field_aux(kSplitOffsetAuxIdx)->offset;
1685 }
1686 
GetSizeofSplit(const TcParseTableBase * table)1687 uint32_t GetSizeofSplit(const TcParseTableBase* table) {
1688   return table->field_aux(kSplitSizeAuxIdx)->offset;
1689 }
1690 }  // namespace
1691 
MaybeGetSplitBase(MessageLite * msg,const bool is_split,const TcParseTableBase * table)1692 void* TcParser::MaybeGetSplitBase(MessageLite* msg, const bool is_split,
1693                                   const TcParseTableBase* table) {
1694   void* out = msg;
1695   if (is_split) {
1696     const uint32_t split_offset = GetSplitOffset(table);
1697     void* default_split =
1698         TcParser::RefAt<void*>(table->default_instance(), split_offset);
1699     void*& split = TcParser::RefAt<void*>(msg, split_offset);
1700     if (split == default_split) {
1701       // Allocate split instance when needed.
1702       uint32_t size = GetSizeofSplit(table);
1703       Arena* arena = msg->GetArena();
1704       split = (arena == nullptr) ? ::operator new(size)
1705                                  : arena->AllocateAligned(size);
1706       memcpy(split, default_split, size);
1707     }
1708     out = split;
1709   }
1710   return out;
1711 }
1712 
1713 template <bool is_split>
MpFixed(PROTOBUF_TC_PARAM_DECL)1714 PROTOBUF_NOINLINE const char* TcParser::MpFixed(PROTOBUF_TC_PARAM_DECL) {
1715   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1716   const uint16_t type_card = entry.type_card;
1717   const uint16_t card = type_card & field_layout::kFcMask;
1718 
1719   // Check for repeated parsing (wiretype fallback is handled there):
1720   if (card == field_layout::kFcRepeated) {
1721     PROTOBUF_MUSTTAIL return MpRepeatedFixed<is_split>(PROTOBUF_TC_PARAM_PASS);
1722   }
1723   // Check for mismatched wiretype:
1724   const uint16_t rep = type_card & field_layout::kRepMask;
1725   const uint32_t decoded_wiretype = data.tag() & 7;
1726   if (rep == field_layout::kRep64Bits) {
1727     if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) {
1728       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1729     }
1730   } else {
1731     ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1732     if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) {
1733       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1734     }
1735   }
1736   // Set the field present:
1737   if (card == field_layout::kFcOptional) {
1738     SetHas(entry, msg);
1739   } else if (card == field_layout::kFcOneof) {
1740     ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
1741   }
1742   void* const base = MaybeGetSplitBase(msg, is_split, table);
1743   // Copy the value:
1744   if (rep == field_layout::kRep64Bits) {
1745     RefAt<uint64_t>(base, entry.offset) = UnalignedLoad<uint64_t>(ptr);
1746     ptr += sizeof(uint64_t);
1747   } else {
1748     RefAt<uint32_t>(base, entry.offset) = UnalignedLoad<uint32_t>(ptr);
1749     ptr += sizeof(uint32_t);
1750   }
1751   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1752 }
1753 
1754 template <bool is_split>
MpRepeatedFixed(PROTOBUF_TC_PARAM_DECL)1755 PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed(
1756     PROTOBUF_TC_PARAM_DECL) {
1757   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1758   const uint32_t decoded_tag = data.tag();
1759   const uint32_t decoded_wiretype = decoded_tag & 7;
1760 
1761   // Check for packed repeated fallback:
1762   if (decoded_wiretype == WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1763     PROTOBUF_MUSTTAIL return MpPackedFixed<is_split>(PROTOBUF_TC_PARAM_PASS);
1764   }
1765 
1766   void* const base = MaybeGetSplitBase(msg, is_split, table);
1767   const uint16_t type_card = entry.type_card;
1768   const uint16_t rep = type_card & field_layout::kRepMask;
1769   if (rep == field_layout::kRep64Bits) {
1770     if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) {
1771       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1772     }
1773     auto& field = MaybeCreateRepeatedFieldRefAt<uint64_t, is_split>(
1774         base, entry.offset, msg);
1775     constexpr auto size = sizeof(uint64_t);
1776     const char* ptr2 = ptr;
1777     uint32_t next_tag;
1778     do {
1779       ptr = ptr2;
1780       *field.Add() = UnalignedLoad<uint64_t>(ptr);
1781       ptr += size;
1782       if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1783       ptr2 = ReadTag(ptr, &next_tag);
1784       if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
1785     } while (next_tag == decoded_tag);
1786   } else {
1787     ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1788     if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) {
1789       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1790     }
1791     auto& field = MaybeCreateRepeatedFieldRefAt<uint32_t, is_split>(
1792         base, entry.offset, msg);
1793     constexpr auto size = sizeof(uint32_t);
1794     const char* ptr2 = ptr;
1795     uint32_t next_tag;
1796     do {
1797       ptr = ptr2;
1798       *field.Add() = UnalignedLoad<uint32_t>(ptr);
1799       ptr += size;
1800       if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1801       ptr2 = ReadTag(ptr, &next_tag);
1802       if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
1803     } while (next_tag == decoded_tag);
1804   }
1805 
1806   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1807 parse_loop:
1808   PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1809 error:
1810   PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1811 }
1812 
1813 template <bool is_split>
MpPackedFixed(PROTOBUF_TC_PARAM_DECL)1814 PROTOBUF_NOINLINE const char* TcParser::MpPackedFixed(PROTOBUF_TC_PARAM_DECL) {
1815   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1816   const uint16_t type_card = entry.type_card;
1817   const uint32_t decoded_wiretype = data.tag() & 7;
1818 
1819   // Check for non-packed repeated fallback:
1820   if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1821     PROTOBUF_MUSTTAIL return MpRepeatedFixed<is_split>(PROTOBUF_TC_PARAM_PASS);
1822   }
1823 
1824   void* const base = MaybeGetSplitBase(msg, is_split, table);
1825   int size = ReadSize(&ptr);
1826   uint16_t rep = type_card & field_layout::kRepMask;
1827   if (rep == field_layout::kRep64Bits) {
1828     auto& field = MaybeCreateRepeatedFieldRefAt<uint64_t, is_split>(
1829         base, entry.offset, msg);
1830     ptr = ctx->ReadPackedFixed(ptr, size, &field);
1831   } else {
1832     ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1833     auto& field = MaybeCreateRepeatedFieldRefAt<uint32_t, is_split>(
1834         base, entry.offset, msg);
1835     ptr = ctx->ReadPackedFixed(ptr, size, &field);
1836   }
1837 
1838   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1839     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1840   }
1841   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1842 }
1843 
1844 template <bool is_split>
MpVarint(PROTOBUF_TC_PARAM_DECL)1845 PROTOBUF_NOINLINE const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) {
1846   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1847   const uint16_t type_card = entry.type_card;
1848   const uint16_t card = type_card & field_layout::kFcMask;
1849 
1850   // Check for repeated parsing:
1851   if (card == field_layout::kFcRepeated) {
1852     PROTOBUF_MUSTTAIL return MpRepeatedVarint<is_split>(PROTOBUF_TC_PARAM_PASS);
1853   }
1854   // Check for wire type mismatch:
1855   if ((data.tag() & 7) != WireFormatLite::WIRETYPE_VARINT) {
1856     PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1857   }
1858   const uint16_t xform_val = type_card & field_layout::kTvMask;
1859   const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1860   const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1861 
1862   // Parse the value:
1863   const char* ptr2 = ptr;  // save for unknown enum case
1864   uint64_t tmp;
1865   ptr = ParseVarint(ptr, &tmp);
1866   if (ptr == nullptr) {
1867     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1868   }
1869 
1870   // Transform and/or validate the value
1871   uint16_t rep = type_card & field_layout::kRepMask;
1872   if (rep == field_layout::kRep64Bits) {
1873     if (is_zigzag) {
1874       tmp = WireFormatLite::ZigZagDecode64(tmp);
1875     }
1876   } else if (rep == field_layout::kRep32Bits) {
1877     if (is_validated_enum) {
1878       if (!EnumIsValidAux(tmp, xform_val, *table->field_aux(&entry))) {
1879         ptr = ptr2;
1880         PROTOBUF_MUSTTAIL return MpUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1881       }
1882     } else if (is_zigzag) {
1883       tmp = WireFormatLite::ZigZagDecode32(static_cast<uint32_t>(tmp));
1884     }
1885   }
1886 
1887   // Mark the field as present:
1888   const bool is_oneof = card == field_layout::kFcOneof;
1889   if (card == field_layout::kFcOptional) {
1890     SetHas(entry, msg);
1891   } else if (is_oneof) {
1892     ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
1893   }
1894 
1895   void* const base = MaybeGetSplitBase(msg, is_split, table);
1896   if (rep == field_layout::kRep64Bits) {
1897     RefAt<uint64_t>(base, entry.offset) = tmp;
1898   } else if (rep == field_layout::kRep32Bits) {
1899     RefAt<uint32_t>(base, entry.offset) = static_cast<uint32_t>(tmp);
1900   } else {
1901     ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep8Bits));
1902     RefAt<bool>(base, entry.offset) = static_cast<bool>(tmp);
1903   }
1904 
1905   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1906 }
1907 
1908 template <bool is_split, typename FieldType, uint16_t xform_val_in>
MpRepeatedVarintT(PROTOBUF_TC_PARAM_DECL)1909 const char* TcParser::MpRepeatedVarintT(PROTOBUF_TC_PARAM_DECL) {
1910   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1911   const uint32_t decoded_tag = data.tag();
1912   // For is_split we ignore the incoming xform_val and read it from entry to
1913   // reduce duplication for the uncommon paths.
1914   const uint16_t xform_val =
1915       is_split ? (entry.type_card & field_layout::kTvMask) : xform_val_in;
1916   const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1917   const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1918 
1919   const char* ptr2 = ptr;
1920   uint32_t next_tag;
1921   void* const base = MaybeGetSplitBase(msg, is_split, table);
1922   auto& field = MaybeCreateRepeatedFieldRefAt<FieldType, is_split>(
1923       base, entry.offset, msg);
1924 
1925   TcParseTableBase::FieldAux aux;
1926   if (is_validated_enum) {
1927     aux = *table->field_aux(&entry);
1928     PrefetchEnumData(xform_val, aux);
1929   }
1930 
1931   do {
1932     uint64_t tmp;
1933     ptr = ParseVarint(ptr2, &tmp);
1934     if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) goto error;
1935     if (is_validated_enum) {
1936       if (!EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux)) {
1937         ptr = ptr2;
1938         PROTOBUF_MUSTTAIL return MpUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1939       }
1940     } else if (is_zigzag) {
1941       tmp = sizeof(FieldType) == 8 ? WireFormatLite::ZigZagDecode64(tmp)
1942                                    : WireFormatLite::ZigZagDecode32(tmp);
1943     }
1944     field.Add(static_cast<FieldType>(tmp));
1945     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1946     ptr2 = ReadTag(ptr, &next_tag);
1947     if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
1948   } while (next_tag == decoded_tag);
1949 
1950 parse_loop:
1951   PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1952 error:
1953   PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1954 }
1955 
1956 template <bool is_split>
MpRepeatedVarint(PROTOBUF_TC_PARAM_DECL)1957 PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint(
1958     PROTOBUF_TC_PARAM_DECL) {
1959   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1960   const auto type_card = entry.type_card;
1961   const uint32_t decoded_tag = data.tag();
1962   const auto decoded_wiretype = decoded_tag & 7;
1963 
1964   // Check for packed repeated fallback:
1965   if (decoded_wiretype == WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1966     PROTOBUF_MUSTTAIL return MpPackedVarint<is_split>(PROTOBUF_TC_PARAM_PASS);
1967   }
1968   // Check for wire type mismatch:
1969   if (decoded_wiretype != WireFormatLite::WIRETYPE_VARINT) {
1970     PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1971   }
1972   // For split we avoid the duplicate code and have the impl reload the value.
1973   // Less code bloat for uncommon paths.
1974   const uint16_t xform_val = (type_card & field_layout::kTvMask);
1975   const uint16_t rep = type_card & field_layout::kRepMask;
1976   switch (rep >> field_layout::kRepShift) {
1977     case field_layout::kRep64Bits >> field_layout::kRepShift:
1978       if (xform_val == 0) {
1979         PROTOBUF_MUSTTAIL return MpRepeatedVarintT<is_split, uint64_t, 0>(
1980             PROTOBUF_TC_PARAM_PASS);
1981       } else {
1982         ABSL_DCHECK_EQ(xform_val, +field_layout::kTvZigZag);
1983         PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
1984             is_split, uint64_t, (is_split ? 0 : field_layout::kTvZigZag)>(
1985             PROTOBUF_TC_PARAM_PASS);
1986       }
1987     case field_layout::kRep32Bits >> field_layout::kRepShift:
1988       switch (xform_val >> field_layout::kTvShift) {
1989         case 0:
1990           PROTOBUF_MUSTTAIL return MpRepeatedVarintT<is_split, uint32_t, 0>(
1991               PROTOBUF_TC_PARAM_PASS);
1992         case field_layout::kTvZigZag >> field_layout::kTvShift:
1993           PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
1994               is_split, uint32_t, (is_split ? 0 : field_layout::kTvZigZag)>(
1995               PROTOBUF_TC_PARAM_PASS);
1996         case field_layout::kTvEnum >> field_layout::kTvShift:
1997           PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
1998               is_split, uint32_t, (is_split ? 0 : field_layout::kTvEnum)>(
1999               PROTOBUF_TC_PARAM_PASS);
2000         case field_layout::kTvRange >> field_layout::kTvShift:
2001           PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
2002               is_split, uint32_t, (is_split ? 0 : field_layout::kTvRange)>(
2003               PROTOBUF_TC_PARAM_PASS);
2004         default:
2005           Unreachable();
2006       }
2007     case field_layout::kRep8Bits >> field_layout::kRepShift:
2008       PROTOBUF_MUSTTAIL return MpRepeatedVarintT<is_split, bool, 0>(
2009           PROTOBUF_TC_PARAM_PASS);
2010 
2011     default:
2012       Unreachable();
2013       return nullptr;  // To silence -Werror=return-type in some toolchains
2014   }
2015 }
2016 
2017 template <bool is_split, typename FieldType, uint16_t xform_val_in>
MpPackedVarintT(PROTOBUF_TC_PARAM_DECL)2018 const char* TcParser::MpPackedVarintT(PROTOBUF_TC_PARAM_DECL) {
2019   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2020   // For is_split we ignore the incoming xform_val and read it from entry to
2021   // reduce duplication for the uncommon paths.
2022   const uint16_t xform_val =
2023       is_split ? (entry.type_card & field_layout::kTvMask) : xform_val_in;
2024   const bool is_zigzag = xform_val == field_layout::kTvZigZag;
2025   const bool is_validated_enum = xform_val & field_layout::kTvEnum;
2026 
2027   void* const base = MaybeGetSplitBase(msg, is_split, table);
2028   auto* field = &MaybeCreateRepeatedFieldRefAt<FieldType, is_split>(
2029       base, entry.offset, msg);
2030 
2031   if (is_validated_enum) {
2032     const TcParseTableBase::FieldAux aux = *table->field_aux(entry.aux_idx);
2033     PrefetchEnumData(xform_val, aux);
2034     return ctx->ReadPackedVarint(ptr, [=](int32_t value) {
2035       if (!EnumIsValidAux(value, xform_val, aux)) {
2036         AddUnknownEnum(msg, table, data.tag(), value);
2037       } else {
2038         field->Add(value);
2039       }
2040     });
2041   } else {
2042     return ctx->ReadPackedVarint(ptr, [=](uint64_t value) {
2043       field->Add(is_zigzag ? (sizeof(FieldType) == 8
2044                                   ? WireFormatLite::ZigZagDecode64(value)
2045                                   : WireFormatLite::ZigZagDecode32(
2046                                         static_cast<uint32_t>(value)))
2047                            : value);
2048     });
2049   }
2050 }
2051 
2052 template <bool is_split>
MpPackedVarint(PROTOBUF_TC_PARAM_DECL)2053 PROTOBUF_NOINLINE const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) {
2054   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2055   const auto type_card = entry.type_card;
2056   const auto decoded_wiretype = data.tag() & 7;
2057 
2058   // Check for non-packed repeated fallback:
2059   if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2060     PROTOBUF_MUSTTAIL return MpRepeatedVarint<is_split>(PROTOBUF_TC_PARAM_PASS);
2061   }
2062 
2063   // For split we avoid the duplicate code and have the impl reload the value.
2064   // Less code bloat for uncommon paths.
2065   const uint16_t xform_val = (type_card & field_layout::kTvMask);
2066 
2067   // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
2068   // pending hasbits now:
2069   SyncHasbits(msg, hasbits, table);
2070 
2071   const uint16_t rep = type_card & field_layout::kRepMask;
2072 
2073   switch (rep >> field_layout::kRepShift) {
2074     case field_layout::kRep64Bits >> field_layout::kRepShift:
2075       if (xform_val == 0) {
2076         PROTOBUF_MUSTTAIL return MpPackedVarintT<is_split, uint64_t, 0>(
2077             PROTOBUF_TC_PARAM_PASS);
2078       } else {
2079         ABSL_DCHECK_EQ(xform_val, +field_layout::kTvZigZag);
2080         PROTOBUF_MUSTTAIL return MpPackedVarintT<
2081             is_split, uint64_t, (is_split ? 0 : field_layout::kTvZigZag)>(
2082             PROTOBUF_TC_PARAM_PASS);
2083       }
2084     case field_layout::kRep32Bits >> field_layout::kRepShift:
2085       switch (xform_val >> field_layout::kTvShift) {
2086         case 0:
2087           PROTOBUF_MUSTTAIL return MpPackedVarintT<is_split, uint32_t, 0>(
2088               PROTOBUF_TC_PARAM_PASS);
2089         case field_layout::kTvZigZag >> field_layout::kTvShift:
2090           PROTOBUF_MUSTTAIL return MpPackedVarintT<
2091               is_split, uint32_t, (is_split ? 0 : field_layout::kTvZigZag)>(
2092               PROTOBUF_TC_PARAM_PASS);
2093         case field_layout::kTvEnum >> field_layout::kTvShift:
2094           PROTOBUF_MUSTTAIL return MpPackedVarintT<
2095               is_split, uint32_t, (is_split ? 0 : field_layout::kTvEnum)>(
2096               PROTOBUF_TC_PARAM_PASS);
2097         case field_layout::kTvRange >> field_layout::kTvShift:
2098           PROTOBUF_MUSTTAIL return MpPackedVarintT<
2099               is_split, uint32_t, (is_split ? 0 : field_layout::kTvRange)>(
2100               PROTOBUF_TC_PARAM_PASS);
2101         default:
2102           Unreachable();
2103       }
2104     case field_layout::kRep8Bits >> field_layout::kRepShift:
2105       PROTOBUF_MUSTTAIL return MpPackedVarintT<is_split, bool, 0>(
2106           PROTOBUF_TC_PARAM_PASS);
2107 
2108     default:
2109       Unreachable();
2110       return nullptr;  // To silence -Werror=return-type in some toolchains
2111   }
2112 }
2113 
MpVerifyUtf8(absl::string_view wire_bytes,const TcParseTableBase * table,const FieldEntry & entry,uint16_t xform_val)2114 bool TcParser::MpVerifyUtf8(absl::string_view wire_bytes,
2115                             const TcParseTableBase* table,
2116                             const FieldEntry& entry, uint16_t xform_val) {
2117   if (xform_val == field_layout::kTvUtf8) {
2118     if (!utf8_range::IsStructurallyValid(wire_bytes)) {
2119       PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry), "parsing",
2120                         false);
2121       return false;
2122     }
2123     return true;
2124   }
2125 #ifndef NDEBUG
2126   if (xform_val == field_layout::kTvUtf8Debug) {
2127     if (!utf8_range::IsStructurallyValid(wire_bytes)) {
2128       PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry), "parsing",
2129                         false);
2130     }
2131   }
2132 #endif  // NDEBUG
2133   return true;
2134 }
MpVerifyUtf8(const absl::Cord & wire_bytes,const TcParseTableBase * table,const FieldEntry & entry,uint16_t xform_val)2135 bool TcParser::MpVerifyUtf8(const absl::Cord& wire_bytes,
2136                             const TcParseTableBase* table,
2137                             const FieldEntry& entry, uint16_t xform_val) {
2138   switch (xform_val) {
2139     default:
2140       ABSL_DCHECK_EQ(xform_val, 0);
2141       return true;
2142   }
2143 }
2144 
2145 template <bool is_split>
MpString(PROTOBUF_TC_PARAM_DECL)2146 PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) {
2147   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2148   const uint16_t type_card = entry.type_card;
2149   const uint16_t card = type_card & field_layout::kFcMask;
2150   const uint32_t decoded_wiretype = data.tag() & 7;
2151 
2152   if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2153     PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2154   }
2155   if (card == field_layout::kFcRepeated) {
2156     PROTOBUF_MUSTTAIL return MpRepeatedString<is_split>(PROTOBUF_TC_PARAM_PASS);
2157   }
2158   const uint16_t xform_val = type_card & field_layout::kTvMask;
2159   const uint16_t rep = type_card & field_layout::kRepMask;
2160 
2161   // Mark the field as present:
2162   const bool is_oneof = card == field_layout::kFcOneof;
2163   bool need_init = false;
2164   if (card == field_layout::kFcOptional) {
2165     SetHas(entry, msg);
2166   } else if (is_oneof) {
2167     need_init = ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
2168   }
2169 
2170   bool is_valid = false;
2171   void* const base = MaybeGetSplitBase(msg, is_split, table);
2172   switch (rep) {
2173     case field_layout::kRepAString: {
2174       auto& field = RefAt<ArenaStringPtr>(base, entry.offset);
2175       if (need_init) field.InitDefault();
2176       Arena* arena = msg->GetArena();
2177       if (arena) {
2178         ptr = ctx->ReadArenaString(ptr, &field, arena);
2179       } else {
2180         std::string* str = field.MutableNoCopy(nullptr);
2181         ptr = InlineGreedyStringParser(str, ptr, ctx);
2182       }
2183       if (!ptr) break;
2184       is_valid = MpVerifyUtf8(field.Get(), table, entry, xform_val);
2185       break;
2186     }
2187 
2188 
2189     case field_layout::kRepCord: {
2190       absl::Cord* field;
2191       if (is_oneof) {
2192         if (need_init) {
2193           field = Arena::Create<absl::Cord>(msg->GetArena());
2194           RefAt<absl::Cord*>(msg, entry.offset) = field;
2195         } else {
2196           field = RefAt<absl::Cord*>(msg, entry.offset);
2197         }
2198       } else {
2199         field = &RefAt<absl::Cord>(base, entry.offset);
2200       }
2201       ptr = InlineCordParser(field, ptr, ctx);
2202       if (!ptr) break;
2203       is_valid = MpVerifyUtf8(*field, table, entry, xform_val);
2204       break;
2205     }
2206 
2207     default:
2208       Unreachable();
2209   }
2210 
2211   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !is_valid)) {
2212     PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2213   }
2214   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2215 }
2216 
ParseRepeatedStringOnce(const char * ptr,SerialArena * serial_arena,ParseContext * ctx,RepeatedPtrField<std::string> & field)2217 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ParseRepeatedStringOnce(
2218     const char* ptr, SerialArena* serial_arena, ParseContext* ctx,
2219     RepeatedPtrField<std::string>& field) {
2220   int size = ReadSize(&ptr);
2221   if (PROTOBUF_PREDICT_FALSE(!ptr)) return {};
2222   auto* str = new (serial_arena->AllocateFromStringBlock()) std::string();
2223   field.AddAllocatedForParse(str);
2224   ptr = ctx->ReadString(ptr, size, str);
2225   if (PROTOBUF_PREDICT_FALSE(!ptr)) return {};
2226   PROTOBUF_ASSUME(ptr != nullptr);
2227   return ptr;
2228 }
2229 
2230 template <bool is_split>
MpRepeatedString(PROTOBUF_TC_PARAM_DECL)2231 PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString(
2232     PROTOBUF_TC_PARAM_DECL) {
2233   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2234   const uint16_t type_card = entry.type_card;
2235   const uint32_t decoded_tag = data.tag();
2236   const uint32_t decoded_wiretype = decoded_tag & 7;
2237 
2238   if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2239     PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2240   }
2241 
2242   const uint16_t rep = type_card & field_layout::kRepMask;
2243   const uint16_t xform_val = type_card & field_layout::kTvMask;
2244   void* const base = MaybeGetSplitBase(msg, is_split, table);
2245   switch (rep) {
2246     case field_layout::kRepSString: {
2247       auto& field = MaybeCreateRepeatedPtrFieldRefAt<std::string, is_split>(
2248           base, entry.offset, msg);
2249       const char* ptr2 = ptr;
2250       uint32_t next_tag;
2251 
2252       auto* arena = field.GetArena();
2253       SerialArena* serial_arena;
2254       if (PROTOBUF_PREDICT_TRUE(
2255               arena != nullptr &&
2256               arena->impl_.GetSerialArenaFast(&serial_arena) &&
2257               field.PrepareForParse())) {
2258         do {
2259           ptr = ptr2;
2260           ptr = ParseRepeatedStringOnce(ptr, serial_arena, ctx, field);
2261           if (PROTOBUF_PREDICT_FALSE(ptr == nullptr ||
2262                                      !MpVerifyUtf8(field[field.size() - 1],
2263                                                    table, entry, xform_val))) {
2264             PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2265           }
2266           if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
2267           ptr2 = ReadTag(ptr, &next_tag);
2268         } while (next_tag == decoded_tag);
2269       } else {
2270         do {
2271           ptr = ptr2;
2272           std::string* str = field.Add();
2273           ptr = InlineGreedyStringParser(str, ptr, ctx);
2274           if (PROTOBUF_PREDICT_FALSE(
2275                   ptr == nullptr ||
2276                   !MpVerifyUtf8(*str, table, entry, xform_val))) {
2277             PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2278           }
2279           if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
2280           ptr2 = ReadTag(ptr, &next_tag);
2281         } while (next_tag == decoded_tag);
2282       }
2283 
2284       break;
2285     }
2286 
2287 #ifndef NDEBUG
2288     default:
2289       ABSL_LOG(FATAL) << "Unsupported repeated string rep: " << rep;
2290       break;
2291 #endif
2292   }
2293 
2294   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2295 parse_loop:
2296   PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2297 }
2298 
2299 
GetTableFromAux(uint16_t type_card,TcParseTableBase::FieldAux aux)2300 inline const TcParseTableBase* TcParser::GetTableFromAux(
2301     uint16_t type_card, TcParseTableBase::FieldAux aux) {
2302   uint16_t tv = type_card & field_layout::kTvMask;
2303   if (ABSL_PREDICT_TRUE(tv == field_layout::kTvTable)) {
2304     return aux.table;
2305   }
2306   ABSL_DCHECK(tv == field_layout::kTvDefault || tv == field_layout::kTvWeakPtr);
2307   const MessageLite* prototype = tv == field_layout::kTvDefault
2308                                      ? aux.message_default()
2309                                      : aux.message_default_weak();
2310   return prototype->GetTcParseTable();
2311 }
2312 
2313 template <bool is_split>
MpMessage(PROTOBUF_TC_PARAM_DECL)2314 PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
2315   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2316   const uint16_t type_card = entry.type_card;
2317   const uint16_t card = type_card & field_layout::kFcMask;
2318 
2319   // Check for repeated parsing:
2320   if (card == field_layout::kFcRepeated) {
2321     const uint16_t rep = type_card & field_layout::kRepMask;
2322     switch (rep) {
2323       case field_layout::kRepMessage:
2324         PROTOBUF_MUSTTAIL return MpRepeatedMessageOrGroup<is_split, false>(
2325             PROTOBUF_TC_PARAM_PASS);
2326       case field_layout::kRepGroup:
2327         PROTOBUF_MUSTTAIL return MpRepeatedMessageOrGroup<is_split, true>(
2328             PROTOBUF_TC_PARAM_PASS);
2329       default:
2330         PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2331     }
2332   }
2333 
2334   const uint32_t decoded_tag = data.tag();
2335   const uint32_t decoded_wiretype = decoded_tag & 7;
2336   const uint16_t rep = type_card & field_layout::kRepMask;
2337   const bool is_group = rep == field_layout::kRepGroup;
2338 
2339   // Validate wiretype:
2340   switch (rep) {
2341     case field_layout::kRepMessage:
2342       if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2343         goto fallback;
2344       }
2345       break;
2346     case field_layout::kRepGroup:
2347       if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
2348         goto fallback;
2349       }
2350       break;
2351     default: {
2352     fallback:
2353       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2354     }
2355   }
2356 
2357   const bool is_oneof = card == field_layout::kFcOneof;
2358   bool need_init = false;
2359   if (card == field_layout::kFcOptional) {
2360     SetHas(entry, msg);
2361   } else if (is_oneof) {
2362     need_init = ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
2363   }
2364 
2365   void* const base = MaybeGetSplitBase(msg, is_split, table);
2366   SyncHasbits(msg, hasbits, table);
2367   MessageLite*& field = RefAt<MessageLite*>(base, entry.offset);
2368 
2369   const TcParseTableBase* inner_table =
2370       GetTableFromAux(type_card, *table->field_aux(&entry));
2371   if (need_init || field == nullptr) {
2372     field = NewMessage(inner_table, msg->GetArena());
2373   }
2374   const auto inner_loop = [&](const char* ptr) {
2375     return ParseLoopPreserveNone(field, ptr, ctx, inner_table);
2376   };
2377   return is_group ? ctx->ParseGroupInlined(ptr, decoded_tag, inner_loop)
2378                   : ctx->ParseLengthDelimitedInlined(ptr, inner_loop);
2379 }
2380 
2381 template <bool is_split, bool is_group>
MpRepeatedMessageOrGroup(PROTOBUF_TC_PARAM_DECL)2382 const char* TcParser::MpRepeatedMessageOrGroup(PROTOBUF_TC_PARAM_DECL) {
2383   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2384   const uint16_t type_card = entry.type_card;
2385   ABSL_DCHECK_EQ(type_card & field_layout::kFcMask,
2386                  static_cast<uint16_t>(field_layout::kFcRepeated));
2387   const uint32_t decoded_tag = data.tag();
2388   const uint32_t decoded_wiretype = decoded_tag & 7;
2389 
2390   // Validate wiretype:
2391   if (!is_group) {
2392     ABSL_DCHECK_EQ(type_card & field_layout::kRepMask,
2393                    static_cast<uint16_t>(field_layout::kRepMessage));
2394     if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2395       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2396     }
2397   } else {
2398     ABSL_DCHECK_EQ(type_card & field_layout::kRepMask,
2399                    static_cast<uint16_t>(field_layout::kRepGroup));
2400     if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
2401       PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2402     }
2403   }
2404 
2405   void* const base = MaybeGetSplitBase(msg, is_split, table);
2406   RepeatedPtrFieldBase& field =
2407       MaybeCreateRepeatedRefAt<RepeatedPtrFieldBase, is_split>(
2408           base, entry.offset, msg);
2409   const TcParseTableBase* inner_table =
2410       GetTableFromAux(type_card, *table->field_aux(&entry));
2411 
2412   const char* ptr2 = ptr;
2413   uint32_t next_tag;
2414   do {
2415     MessageLite* value = AddMessage(inner_table, field);
2416     const auto inner_loop = [&](const char* ptr) {
2417       return ParseLoopPreserveNone(value, ptr, ctx, inner_table);
2418     };
2419     ptr = is_group ? ctx->ParseGroupInlined(ptr2, decoded_tag, inner_loop)
2420                    : ctx->ParseLengthDelimitedInlined(ptr2, inner_loop);
2421     if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) goto error;
2422     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
2423     ptr2 = ReadTag(ptr, &next_tag);
2424     if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
2425   } while (next_tag == decoded_tag);
2426   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2427 parse_loop:
2428   PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2429 error:
2430   PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2431 }
2432 
SerializeMapKey(const NodeBase * node,MapTypeCard type_card,io::CodedOutputStream & coded_output)2433 static void SerializeMapKey(const NodeBase* node, MapTypeCard type_card,
2434                             io::CodedOutputStream& coded_output) {
2435   switch (type_card.wiretype()) {
2436     case WireFormatLite::WIRETYPE_VARINT:
2437       switch (type_card.cpp_type()) {
2438         case MapTypeCard::kBool:
2439           WireFormatLite::WriteBool(
2440               1, static_cast<const KeyNode<bool>*>(node)->key(), &coded_output);
2441           break;
2442         case MapTypeCard::k32:
2443           if (type_card.is_zigzag()) {
2444             WireFormatLite::WriteSInt32(
2445                 1, static_cast<const KeyNode<uint32_t>*>(node)->key(),
2446                 &coded_output);
2447           } else if (type_card.is_signed()) {
2448             WireFormatLite::WriteInt32(
2449                 1, static_cast<const KeyNode<uint32_t>*>(node)->key(),
2450                 &coded_output);
2451           } else {
2452             WireFormatLite::WriteUInt32(
2453                 1, static_cast<const KeyNode<uint32_t>*>(node)->key(),
2454                 &coded_output);
2455           }
2456           break;
2457         case MapTypeCard::k64:
2458           if (type_card.is_zigzag()) {
2459             WireFormatLite::WriteSInt64(
2460                 1, static_cast<const KeyNode<uint64_t>*>(node)->key(),
2461                 &coded_output);
2462           } else if (type_card.is_signed()) {
2463             WireFormatLite::WriteInt64(
2464                 1, static_cast<const KeyNode<uint64_t>*>(node)->key(),
2465                 &coded_output);
2466           } else {
2467             WireFormatLite::WriteUInt64(
2468                 1, static_cast<const KeyNode<uint64_t>*>(node)->key(),
2469                 &coded_output);
2470           }
2471           break;
2472         default:
2473           Unreachable();
2474       }
2475       break;
2476     case WireFormatLite::WIRETYPE_FIXED32:
2477       WireFormatLite::WriteFixed32(
2478           1, static_cast<const KeyNode<uint32_t>*>(node)->key(), &coded_output);
2479       break;
2480     case WireFormatLite::WIRETYPE_FIXED64:
2481       WireFormatLite::WriteFixed64(
2482           1, static_cast<const KeyNode<uint64_t>*>(node)->key(), &coded_output);
2483       break;
2484     case WireFormatLite::WIRETYPE_LENGTH_DELIMITED:
2485       // We should never have a message here. They can only be values maps.
2486       ABSL_DCHECK_EQ(+type_card.cpp_type(), +MapTypeCard::kString);
2487       WireFormatLite::WriteString(
2488           1, static_cast<const KeyNode<std::string>*>(node)->key(),
2489           &coded_output);
2490       break;
2491     default:
2492       Unreachable();
2493   }
2494 }
2495 
WriteMapEntryAsUnknown(MessageLite * msg,const TcParseTableBase * table,uint32_t tag,NodeBase * node,MapAuxInfo map_info)2496 void TcParser::WriteMapEntryAsUnknown(MessageLite* msg,
2497                                       const TcParseTableBase* table,
2498                                       uint32_t tag, NodeBase* node,
2499                                       MapAuxInfo map_info) {
2500   std::string serialized;
2501   {
2502     io::StringOutputStream string_output(&serialized);
2503     io::CodedOutputStream coded_output(&string_output);
2504     SerializeMapKey(node, map_info.key_type_card, coded_output);
2505     // The mapped_type is always an enum here.
2506     ABSL_DCHECK(map_info.value_is_validated_enum);
2507     WireFormatLite::WriteInt32(2,
2508                                *reinterpret_cast<int32_t*>(
2509                                    node->GetVoidValue(map_info.node_size_info)),
2510                                &coded_output);
2511   }
2512   GetUnknownFieldOps(table).write_length_delimited(msg, tag >> 3, serialized);
2513 }
2514 
InitializeMapNodeEntry(void * obj,MapTypeCard type_card,UntypedMapBase & map,const TcParseTableBase::FieldAux * aux,bool is_key)2515 PROTOBUF_ALWAYS_INLINE inline void TcParser::InitializeMapNodeEntry(
2516     void* obj, MapTypeCard type_card, UntypedMapBase& map,
2517     const TcParseTableBase::FieldAux* aux, bool is_key) {
2518   (void)is_key;
2519   switch (type_card.cpp_type()) {
2520     case MapTypeCard::kBool:
2521       memset(obj, 0, sizeof(bool));
2522       break;
2523     case MapTypeCard::k32:
2524       memset(obj, 0, sizeof(uint32_t));
2525       break;
2526     case MapTypeCard::k64:
2527       memset(obj, 0, sizeof(uint64_t));
2528       break;
2529     case MapTypeCard::kString:
2530       Arena::CreateInArenaStorage(reinterpret_cast<std::string*>(obj),
2531                                   map.arena());
2532       break;
2533     case MapTypeCard::kMessage:
2534       aux[1].table->class_data->PlacementNew(obj, map.arena());
2535       break;
2536     default:
2537       Unreachable();
2538   }
2539 }
2540 
DestroyMapNode(NodeBase * node,MapAuxInfo map_info,UntypedMapBase & map)2541 PROTOBUF_NOINLINE void TcParser::DestroyMapNode(NodeBase* node,
2542                                                 MapAuxInfo map_info,
2543                                                 UntypedMapBase& map) {
2544   if (map_info.key_type_card.cpp_type() == MapTypeCard::kString) {
2545     static_cast<std::string*>(node->GetVoidKey())->~basic_string();
2546   }
2547   if (map_info.value_type_card.cpp_type() == MapTypeCard::kString) {
2548     static_cast<std::string*>(node->GetVoidValue(map_info.node_size_info))
2549         ->~basic_string();
2550   } else if (map_info.value_type_card.cpp_type() == MapTypeCard::kMessage) {
2551     static_cast<MessageLite*>(node->GetVoidValue(map_info.node_size_info))
2552         ->DestroyInstance();
2553   }
2554   map.DeallocNode(node, map_info.node_size_info);
2555 }
2556 
2557 template <typename T>
ReadFixed(void * obj,const char * ptr)2558 const char* ReadFixed(void* obj, const char* ptr) {
2559   auto v = UnalignedLoad<T>(ptr);
2560   ptr += sizeof(v);
2561   memcpy(obj, &v, sizeof(v));
2562   return ptr;
2563 }
2564 
ParseOneMapEntry(NodeBase * node,const char * ptr,ParseContext * ctx,const TcParseTableBase::FieldAux * aux,const TcParseTableBase * table,const TcParseTableBase::FieldEntry & entry,Arena * arena)2565 const char* TcParser::ParseOneMapEntry(
2566     NodeBase* node, const char* ptr, ParseContext* ctx,
2567     const TcParseTableBase::FieldAux* aux, const TcParseTableBase* table,
2568     const TcParseTableBase::FieldEntry& entry, Arena* arena) {
2569   using WFL = WireFormatLite;
2570 
2571   const auto map_info = aux[0].map_info;
2572   const uint8_t key_tag = WFL::MakeTag(1, map_info.key_type_card.wiretype());
2573   const uint8_t value_tag =
2574       WFL::MakeTag(2, map_info.value_type_card.wiretype());
2575 
2576   while (!ctx->Done(&ptr)) {
2577     uint32_t inner_tag = ptr[0];
2578 
2579     if (PROTOBUF_PREDICT_FALSE(inner_tag != key_tag &&
2580                                inner_tag != value_tag)) {
2581       // Do a full parse and check again in case the tag has non-canonical
2582       // encoding.
2583       ptr = ReadTag(ptr, &inner_tag);
2584       if (PROTOBUF_PREDICT_FALSE(inner_tag != key_tag &&
2585                                  inner_tag != value_tag)) {
2586         if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2587 
2588         if (inner_tag == 0 || (inner_tag & 7) == WFL::WIRETYPE_END_GROUP) {
2589           ctx->SetLastTag(inner_tag);
2590           break;
2591         }
2592 
2593         ptr = UnknownFieldParse(inner_tag, nullptr, ptr, ctx);
2594         if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2595         continue;
2596       }
2597     } else {
2598       ++ptr;
2599     }
2600 
2601     MapTypeCard type_card;
2602     void* obj;
2603     if (inner_tag == key_tag) {
2604       type_card = map_info.key_type_card;
2605       obj = node->GetVoidKey();
2606     } else {
2607       type_card = map_info.value_type_card;
2608       obj = node->GetVoidValue(map_info.node_size_info);
2609     }
2610 
2611     switch (type_card.wiretype()) {
2612       case WFL::WIRETYPE_VARINT:
2613         uint64_t tmp;
2614         ptr = ParseVarint(ptr, &tmp);
2615         if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2616         switch (type_card.cpp_type()) {
2617           case MapTypeCard::kBool:
2618             *reinterpret_cast<bool*>(obj) = static_cast<bool>(tmp);
2619             continue;
2620           case MapTypeCard::k32: {
2621             uint32_t v = static_cast<uint32_t>(tmp);
2622             if (type_card.is_zigzag()) v = WFL::ZigZagDecode32(v);
2623             memcpy(obj, &v, sizeof(v));
2624             continue;
2625           }
2626           case MapTypeCard::k64:
2627             if (type_card.is_zigzag()) tmp = WFL::ZigZagDecode64(tmp);
2628             memcpy(obj, &tmp, sizeof(tmp));
2629             continue;
2630           default:
2631             Unreachable();
2632         }
2633       case WFL::WIRETYPE_FIXED32:
2634         ptr = ReadFixed<uint32_t>(obj, ptr);
2635         continue;
2636       case WFL::WIRETYPE_FIXED64:
2637         ptr = ReadFixed<uint64_t>(obj, ptr);
2638         continue;
2639       case WFL::WIRETYPE_LENGTH_DELIMITED:
2640         if (type_card.cpp_type() == MapTypeCard::kString) {
2641           const int size = ReadSize(&ptr);
2642           if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2643           std::string* str = reinterpret_cast<std::string*>(obj);
2644           ptr = ctx->ReadString(ptr, size, str);
2645           if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2646           bool do_utf8_check = map_info.fail_on_utf8_failure;
2647 #ifndef NDEBUG
2648           do_utf8_check |= map_info.log_debug_utf8_failure;
2649 #endif
2650           if (type_card.is_utf8() && do_utf8_check &&
2651               !utf8_range::IsStructurallyValid(*str)) {
2652             PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry),
2653                               "parsing", false);
2654             if (map_info.fail_on_utf8_failure) {
2655               return nullptr;
2656             }
2657           }
2658           continue;
2659         } else {
2660           ABSL_DCHECK_EQ(+type_card.cpp_type(), +MapTypeCard::kMessage);
2661           ABSL_DCHECK_EQ(inner_tag, value_tag);
2662           ptr = ctx->ParseMessage(reinterpret_cast<MessageLite*>(obj), ptr);
2663           if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2664           continue;
2665         }
2666       default:
2667         Unreachable();
2668     }
2669   }
2670   return ptr;
2671 }
2672 
2673 template <bool is_split>
MpMap(PROTOBUF_TC_PARAM_DECL)2674 PROTOBUF_NOINLINE const char* TcParser::MpMap(PROTOBUF_TC_PARAM_DECL) {
2675   const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2676   // `aux[0]` points into a MapAuxInfo.
2677   // If we have a message mapped_type aux[1] points into a `create_in_arena`.
2678   // If we have a validated enum mapped_type aux[1] point into a
2679   // `enum_data`.
2680   const auto* aux = table->field_aux(&entry);
2681   const auto map_info = aux[0].map_info;
2682 
2683   if (PROTOBUF_PREDICT_FALSE(!map_info.is_supported ||
2684                              (data.tag() & 7) !=
2685                                  WireFormatLite::WIRETYPE_LENGTH_DELIMITED)) {
2686     PROTOBUF_MUSTTAIL return MpFallback(PROTOBUF_TC_PARAM_PASS);
2687   }
2688 
2689   // When using LITE, the offset points directly into the Map<> object.
2690   // Otherwise, it points into a MapField and we must synchronize with
2691   // reflection. It is done by calling the MutableMap() virtual function on the
2692   // field's base class.
2693   void* const base = MaybeGetSplitBase(msg, is_split, table);
2694   UntypedMapBase& map =
2695       map_info.use_lite
2696           ? RefAt<UntypedMapBase>(base, entry.offset)
2697           : *RefAt<MapFieldBaseForParse>(base, entry.offset).MutableMap();
2698 
2699   const uint32_t saved_tag = data.tag();
2700 
2701   while (true) {
2702     NodeBase* node = map.AllocNode(map_info.node_size_info);
2703 
2704     InitializeMapNodeEntry(node->GetVoidKey(), map_info.key_type_card, map, aux,
2705                            true);
2706     InitializeMapNodeEntry(node->GetVoidValue(map_info.node_size_info),
2707                            map_info.value_type_card, map, aux, false);
2708 
2709     ptr = ctx->ParseLengthDelimitedInlined(ptr, [&](const char* ptr) {
2710       return ParseOneMapEntry(node, ptr, ctx, aux, table, entry, map.arena());
2711     });
2712 
2713     if (PROTOBUF_PREDICT_TRUE(ptr != nullptr)) {
2714       if (PROTOBUF_PREDICT_FALSE(map_info.value_is_validated_enum &&
2715                                  !internal::ValidateEnumInlined(
2716                                      *static_cast<int32_t*>(node->GetVoidValue(
2717                                          map_info.node_size_info)),
2718                                      aux[1].enum_data))) {
2719         WriteMapEntryAsUnknown(msg, table, saved_tag, node, map_info);
2720       } else {
2721         // Done parsing the node, try to insert it.
2722         // If it overwrites something we get old node back to destroy it.
2723         switch (map_info.key_type_card.cpp_type()) {
2724           case MapTypeCard::kBool:
2725             node = static_cast<KeyMapBase<bool>&>(map).InsertOrReplaceNode(
2726                 static_cast<KeyMapBase<bool>::KeyNode*>(node));
2727             break;
2728           case MapTypeCard::k32:
2729             node = static_cast<KeyMapBase<uint32_t>&>(map).InsertOrReplaceNode(
2730                 static_cast<KeyMapBase<uint32_t>::KeyNode*>(node));
2731             break;
2732           case MapTypeCard::k64:
2733             node = static_cast<KeyMapBase<uint64_t>&>(map).InsertOrReplaceNode(
2734                 static_cast<KeyMapBase<uint64_t>::KeyNode*>(node));
2735             break;
2736           case MapTypeCard::kString:
2737             node =
2738                 static_cast<KeyMapBase<std::string>&>(map).InsertOrReplaceNode(
2739                     static_cast<KeyMapBase<std::string>::KeyNode*>(node));
2740             break;
2741           default:
2742             Unreachable();
2743         }
2744       }
2745     }
2746 
2747     // Destroy the node if we have it.
2748     // It could be because we failed to parse, or because insertion returned
2749     // an overwritten node.
2750     if (PROTOBUF_PREDICT_FALSE(node != nullptr && map.arena() == nullptr)) {
2751       DestroyMapNode(node, map_info, map);
2752     }
2753 
2754     if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
2755       PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2756     }
2757 
2758     if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
2759       PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2760     }
2761 
2762     uint32_t next_tag;
2763     const char* ptr2 = ReadTagInlined(ptr, &next_tag);
2764     if (next_tag != saved_tag) break;
2765     ptr = ptr2;
2766   }
2767 
2768   PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2769 }
2770 
MessageSetWireFormatParseLoopLite(PROTOBUF_TC_PARAM_NO_DATA_DECL)2771 const char* TcParser::MessageSetWireFormatParseLoopLite(
2772     PROTOBUF_TC_PARAM_NO_DATA_DECL) {
2773   PROTOBUF_MUSTTAIL return MessageSetWireFormatParseLoopImpl<MessageLite>(
2774       PROTOBUF_TC_PARAM_NO_DATA_PASS);
2775 }
2776 
TypeCardToString(uint16_t type_card)2777 std::string TypeCardToString(uint16_t type_card) {
2778   // In here we convert the runtime value of entry.type_card back into a
2779   // sequence of literal enum labels. We use the mnenonic labels for nicer
2780   // codegen.
2781   namespace fl = internal::field_layout;
2782   const int rep_index = (type_card & fl::kRepMask) >> fl::kRepShift;
2783   const int tv_index = (type_card & fl::kTvMask) >> fl::kTvShift;
2784 
2785   static constexpr const char* kFieldCardNames[] = {"Singular", "Optional",
2786                                                     "Repeated", "Oneof"};
2787   static_assert((fl::kFcSingular >> fl::kFcShift) == 0, "");
2788   static_assert((fl::kFcOptional >> fl::kFcShift) == 1, "");
2789   static_assert((fl::kFcRepeated >> fl::kFcShift) == 2, "");
2790   static_assert((fl::kFcOneof >> fl::kFcShift) == 3, "");
2791 
2792   std::string out;
2793 
2794   absl::StrAppend(&out, "::_fl::kFc",
2795                   kFieldCardNames[(type_card & fl::kFcMask) >> fl::kFcShift]);
2796 
2797 #define PROTOBUF_INTERNAL_TYPE_CARD_CASE(x)  \
2798   case fl::k##x:                             \
2799     absl::StrAppend(&out, " | ::_fl::k" #x); \
2800     break
2801 
2802   switch (type_card & fl::kFkMask) {
2803     case fl::kFkString: {
2804       switch (type_card & ~fl::kFcMask & ~fl::kRepMask & ~fl::kSplitMask) {
2805         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Bytes);
2806         PROTOBUF_INTERNAL_TYPE_CARD_CASE(RawString);
2807         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Utf8String);
2808         default:
2809           ABSL_LOG(FATAL) << "Unknown type_card: 0x" << type_card;
2810       }
2811 
2812       static constexpr const char* kRepNames[] = {"AString", "IString", "Cord",
2813                                                   "SPiece", "SString"};
2814       static_assert((fl::kRepAString >> fl::kRepShift) == 0, "");
2815       static_assert((fl::kRepIString >> fl::kRepShift) == 1, "");
2816       static_assert((fl::kRepCord >> fl::kRepShift) == 2, "");
2817       static_assert((fl::kRepSPiece >> fl::kRepShift) == 3, "");
2818       static_assert((fl::kRepSString >> fl::kRepShift) == 4, "");
2819 
2820       absl::StrAppend(&out, " | ::_fl::kRep", kRepNames[rep_index]);
2821       break;
2822     }
2823 
2824     case fl::kFkMessage: {
2825       absl::StrAppend(&out, " | ::_fl::kMessage");
2826 
2827       static constexpr const char* kRepNames[] = {nullptr, "Group", "Lazy"};
2828       static_assert((fl::kRepGroup >> fl::kRepShift) == 1, "");
2829       static_assert((fl::kRepLazy >> fl::kRepShift) == 2, "");
2830 
2831       if (auto* rep = kRepNames[rep_index]) {
2832         absl::StrAppend(&out, " | ::_fl::kRep", rep);
2833       }
2834 
2835       static constexpr const char* kXFormNames[2][4] = {
2836           {nullptr, "Default", "Table", "WeakPtr"}, {nullptr, "Eager", "Lazy"}};
2837 
2838       static_assert((fl::kTvDefault >> fl::kTvShift) == 1, "");
2839       static_assert((fl::kTvTable >> fl::kTvShift) == 2, "");
2840       static_assert((fl::kTvWeakPtr >> fl::kTvShift) == 3, "");
2841       static_assert((fl::kTvEager >> fl::kTvShift) == 1, "");
2842       static_assert((fl::kTvLazy >> fl::kTvShift) == 2, "");
2843 
2844       if (auto* xform = kXFormNames[rep_index == 2][tv_index]) {
2845         absl::StrAppend(&out, " | ::_fl::kTv", xform);
2846       }
2847       break;
2848     }
2849 
2850     case fl::kFkMap:
2851       absl::StrAppend(&out, " | ::_fl::kMap");
2852       break;
2853 
2854     case fl::kFkNone:
2855       break;
2856 
2857     case fl::kFkVarint:
2858     case fl::kFkPackedVarint:
2859     case fl::kFkFixed:
2860     case fl::kFkPackedFixed: {
2861       switch (type_card & ~fl::kFcMask & ~fl::kSplitMask) {
2862         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Bool);
2863         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Fixed32);
2864         PROTOBUF_INTERNAL_TYPE_CARD_CASE(UInt32);
2865         PROTOBUF_INTERNAL_TYPE_CARD_CASE(SFixed32);
2866         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Int32);
2867         PROTOBUF_INTERNAL_TYPE_CARD_CASE(SInt32);
2868         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Float);
2869         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Enum);
2870         PROTOBUF_INTERNAL_TYPE_CARD_CASE(EnumRange);
2871         PROTOBUF_INTERNAL_TYPE_CARD_CASE(OpenEnum);
2872         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Fixed64);
2873         PROTOBUF_INTERNAL_TYPE_CARD_CASE(UInt64);
2874         PROTOBUF_INTERNAL_TYPE_CARD_CASE(SFixed64);
2875         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Int64);
2876         PROTOBUF_INTERNAL_TYPE_CARD_CASE(SInt64);
2877         PROTOBUF_INTERNAL_TYPE_CARD_CASE(Double);
2878         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedBool);
2879         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFixed32);
2880         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedUInt32);
2881         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSFixed32);
2882         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedInt32);
2883         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSInt32);
2884         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFloat);
2885         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedEnum);
2886         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedEnumRange);
2887         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedOpenEnum);
2888         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFixed64);
2889         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedUInt64);
2890         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSFixed64);
2891         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedInt64);
2892         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSInt64);
2893         PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedDouble);
2894         default:
2895           ABSL_LOG(FATAL) << "Unknown type_card: 0x" << type_card;
2896       }
2897     }
2898   }
2899 
2900   if (type_card & fl::kSplitMask) {
2901     absl::StrAppend(&out, " | ::_fl::kSplitTrue");
2902   }
2903 
2904 #undef PROTOBUF_INTERNAL_TYPE_CARD_CASE
2905 
2906   return out;
2907 }
2908 
DiscardEverythingFallback(PROTOBUF_TC_PARAM_DECL)2909 const char* TcParser::DiscardEverythingFallback(PROTOBUF_TC_PARAM_DECL) {
2910   SyncHasbits(msg, hasbits, table);
2911   uint32_t tag = data.tag();
2912   if ((tag & 7) == WireFormatLite::WIRETYPE_END_GROUP || tag == 0) {
2913     ctx->SetLastTag(tag);
2914     return ptr;
2915   }
2916   return UnknownFieldParse(tag, nullptr, ptr, ctx);
2917 }
2918 
2919 }  // namespace internal
2920 }  // namespace protobuf
2921 }  // namespace google
2922 
2923 #include "google/protobuf/port_undef.inc"
2924