1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include <algorithm>
9 #include <cstddef>
10 #include <cstdint>
11 #include <cstring>
12 #include <limits>
13 #include <new> // IWYU pragma: keep for operator new
14 #include <numeric>
15 #include <string>
16 #include <type_traits>
17
18 #include "absl/base/optimization.h"
19 #include "absl/log/absl_check.h"
20 #include "absl/log/absl_log.h"
21 #include "absl/numeric/bits.h"
22 #include "absl/strings/str_cat.h"
23 #include "absl/strings/string_view.h"
24 #include "google/protobuf/arenastring.h"
25 #include "google/protobuf/generated_enum_util.h"
26 #include "google/protobuf/generated_message_tctable_decl.h"
27 #include "google/protobuf/generated_message_tctable_impl.h"
28 #include "google/protobuf/inlined_string_field.h"
29 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
30 #include "google/protobuf/map.h"
31 #include "google/protobuf/message_lite.h"
32 #include "google/protobuf/parse_context.h"
33 #include "google/protobuf/port.h"
34 #include "google/protobuf/repeated_field.h"
35 #include "google/protobuf/repeated_ptr_field.h"
36 #include "google/protobuf/serial_arena.h"
37 #include "google/protobuf/varint_shuffle.h"
38 #include "google/protobuf/wire_format_lite.h"
39 #include "utf8_validity.h"
40
41
42 // clang-format off
43 #include "google/protobuf/port_def.inc"
44 // clang-format on
45
46 namespace google {
47 namespace protobuf {
48 namespace internal {
49
50 using FieldEntry = TcParseTableBase::FieldEntry;
51
52 //////////////////////////////////////////////////////////////////////////////
53 // Template instantiations:
54 //////////////////////////////////////////////////////////////////////////////
55
56 #ifndef NDEBUG
AlignFail(std::integral_constant<size_t,4>,std::uintptr_t address)57 void AlignFail(std::integral_constant<size_t, 4>, std::uintptr_t address) {
58 ABSL_LOG(FATAL) << "Unaligned (4) access at " << address;
59
60 // Explicit abort to let compilers know this function does not return
61 abort();
62 }
AlignFail(std::integral_constant<size_t,8>,std::uintptr_t address)63 void AlignFail(std::integral_constant<size_t, 8>, std::uintptr_t address) {
64 ABSL_LOG(FATAL) << "Unaligned (8) access at " << address;
65
66 // Explicit abort to let compilers know this function does not return
67 abort();
68 }
69 #endif
70
GenericFallbackLite(PROTOBUF_TC_PARAM_DECL)71 const char* TcParser::GenericFallbackLite(PROTOBUF_TC_PARAM_DECL) {
72 PROTOBUF_MUSTTAIL return GenericFallbackImpl<MessageLite, std::string>(
73 PROTOBUF_TC_PARAM_PASS);
74 }
75
76 //////////////////////////////////////////////////////////////////////////////
77 // Core fast parsing implementation:
78 //////////////////////////////////////////////////////////////////////////////
79
ParseLoopPreserveNone(MessageLite * msg,const char * ptr,ParseContext * ctx,const TcParseTableBase * table)80 PROTOBUF_NOINLINE const char* TcParser::ParseLoopPreserveNone(
81 MessageLite* msg, const char* ptr, ParseContext* ctx,
82 const TcParseTableBase* table) {
83 return ParseLoop(msg, ptr, ctx, table);
84 }
85
86 // On the fast path, a (matching) 1-byte tag already has the decoded value.
FastDecodeTag(uint8_t coded_tag)87 static uint32_t FastDecodeTag(uint8_t coded_tag) {
88 return coded_tag;
89 }
90
91 // On the fast path, a (matching) 2-byte tag always needs to be decoded.
FastDecodeTag(uint16_t coded_tag)92 static uint32_t FastDecodeTag(uint16_t coded_tag) {
93 uint32_t result = coded_tag;
94 result += static_cast<int8_t>(coded_tag);
95 return result >> 1;
96 }
97
98 //////////////////////////////////////////////////////////////////////////////
99 // Core mini parsing implementation:
100 //////////////////////////////////////////////////////////////////////////////
101
102 // Field lookup table layout:
103 //
104 // Because it consists of a series of variable-length segments, the lookuup
105 // table is organized within an array of uint16_t, and each element is either
106 // a uint16_t or a uint32_t stored little-endian as a pair of uint16_t.
107 //
108 // Its fundamental building block maps 16 contiguously ascending field numbers
109 // to their locations within the field entry table:
110
111 struct SkipEntry16 {
112 uint16_t skipmap;
113 uint16_t field_entry_offset;
114 };
115
116 // The skipmap is a bitfield of which of those field numbers do NOT have a
117 // field entry. The lowest bit of the skipmap corresponds to the lowest of
118 // the 16 field numbers, so if a proto had only fields 1, 2, 3, and 7, the
119 // skipmap would contain 0b11111111'10111000.
120 //
121 // The field lookup table begins with a single 32-bit skipmap that maps the
122 // field numbers 1 through 32. This is because the majority of proto
123 // messages only contain fields numbered 1 to 32.
124 //
125 // The rest of the lookup table is a repeated series of
126 // { 32-bit field #, #SkipEntry16s, {SkipEntry16...} }
127 // That is, the next thing is a pair of uint16_t that form the next
128 // lowest field number that the lookup table handles. If this number is -1,
129 // that is the end of the table. Then there is a uint16_t that is
130 // the number of contiguous SkipEntry16 entries that follow, and then of
131 // course the SkipEntry16s themselves.
132
133 // Originally developed and tested at https://godbolt.org/z/vbc7enYcf
134
135 // Returns the address of the field for `tag` in the table's field entries.
136 // Returns nullptr if the field was not found.
FindFieldEntry(const TcParseTableBase * table,uint32_t field_num)137 const TcParseTableBase::FieldEntry* TcParser::FindFieldEntry(
138 const TcParseTableBase* table, uint32_t field_num) {
139 const FieldEntry* const field_entries = table->field_entries_begin();
140
141 uint32_t fstart = 1;
142 uint32_t adj_fnum = field_num - fstart;
143
144 if (PROTOBUF_PREDICT_TRUE(adj_fnum < 32)) {
145 uint32_t skipmap = table->skipmap32;
146 uint32_t skipbit = 1 << adj_fnum;
147 if (PROTOBUF_PREDICT_FALSE(skipmap & skipbit)) return nullptr;
148 skipmap &= skipbit - 1;
149 adj_fnum -= absl::popcount(skipmap);
150 auto* entry = field_entries + adj_fnum;
151 PROTOBUF_ASSUME(entry != nullptr);
152 return entry;
153 }
154 const uint16_t* lookup_table = table->field_lookup_begin();
155 for (;;) {
156 #ifdef ABSL_IS_LITTLE_ENDIAN
157 memcpy(&fstart, lookup_table, sizeof(fstart));
158 #else
159 fstart = lookup_table[0] | (lookup_table[1] << 16);
160 #endif
161 lookup_table += sizeof(fstart) / sizeof(*lookup_table);
162 uint32_t num_skip_entries = *lookup_table++;
163 if (field_num < fstart) return nullptr;
164 adj_fnum = field_num - fstart;
165 uint32_t skip_num = adj_fnum / 16;
166 if (PROTOBUF_PREDICT_TRUE(skip_num < num_skip_entries)) {
167 // for each group of 16 fields we have:
168 // a bitmap of 16 bits
169 // a 16-bit field-entry offset for the first of them.
170 auto* skip_data = lookup_table + (adj_fnum / 16) * (sizeof(SkipEntry16) /
171 sizeof(uint16_t));
172 SkipEntry16 se = {skip_data[0], skip_data[1]};
173 adj_fnum &= 15;
174 uint32_t skipmap = se.skipmap;
175 uint16_t skipbit = 1 << adj_fnum;
176 if (PROTOBUF_PREDICT_FALSE(skipmap & skipbit)) return nullptr;
177 skipmap &= skipbit - 1;
178 adj_fnum += se.field_entry_offset;
179 adj_fnum -= absl::popcount(skipmap);
180 auto* entry = field_entries + adj_fnum;
181 PROTOBUF_ASSUME(entry != nullptr);
182 return entry;
183 }
184 lookup_table +=
185 num_skip_entries * (sizeof(SkipEntry16) / sizeof(*lookup_table));
186 }
187 }
188
189 // Field names are stored in a format of:
190 //
191 // 1) A table of name sizes, one byte each, from 1 to 255 per name.
192 // `entries` is the size of this first table.
193 // 1a) padding bytes, so the table of name sizes is a multiple of
194 // eight bytes in length. They are zero.
195 //
196 // 2) All the names, concatenated, with neither separation nor termination.
197 //
198 // This is designed to be compact but not particularly fast to retrieve.
199 // In particular, it takes O(n) to retrieve the name of the n'th field,
200 // which is usually fine because most protos have fewer than 10 fields.
FindName(const char * name_data,size_t entries,size_t index)201 static absl::string_view FindName(const char* name_data, size_t entries,
202 size_t index) {
203 // The compiler unrolls these... if this isn't fast enough,
204 // there's an AVX version at https://godbolt.org/z/eojrjqzfr
205 // ARM-compatible version at https://godbolt.org/z/n5YT5Ee85
206
207 // The field name sizes are padded up to a multiple of 8, so we
208 // must pad them here.
209 size_t num_sizes = (entries + 7) & -8;
210 auto* uint8s = reinterpret_cast<const uint8_t*>(name_data);
211 size_t pos = std::accumulate(uint8s, uint8s + index, num_sizes);
212 size_t size = name_data[index];
213 auto* start = &name_data[pos];
214 return {start, size};
215 }
216
MessageName(const TcParseTableBase * table)217 absl::string_view TcParser::MessageName(const TcParseTableBase* table) {
218 return FindName(table->name_data(), table->num_field_entries + 1, 0);
219 }
220
FieldName(const TcParseTableBase * table,const FieldEntry * field_entry)221 absl::string_view TcParser::FieldName(const TcParseTableBase* table,
222 const FieldEntry* field_entry) {
223 const FieldEntry* const field_entries = table->field_entries_begin();
224 auto field_index = static_cast<size_t>(field_entry - field_entries);
225 return FindName(table->name_data(), table->num_field_entries + 1,
226 field_index + 1);
227 }
228
Error(PROTOBUF_TC_PARAM_NO_DATA_DECL)229 PROTOBUF_NOINLINE const char* TcParser::Error(PROTOBUF_TC_PARAM_NO_DATA_DECL) {
230 (void)ctx;
231 (void)ptr;
232 SyncHasbits(msg, hasbits, table);
233 return nullptr;
234 }
235
236 template <bool export_called_function>
MiniParse(PROTOBUF_TC_PARAM_DECL)237 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParse(
238 PROTOBUF_TC_PARAM_DECL) {
239 TestMiniParseResult* test_out;
240 if (export_called_function) {
241 test_out = reinterpret_cast<TestMiniParseResult*>(
242 static_cast<uintptr_t>(data.data));
243 }
244
245 uint32_t tag;
246 ptr = ReadTagInlined(ptr, &tag);
247 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
248 if (export_called_function) *test_out = {Error};
249 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
250 }
251
252 auto* entry = FindFieldEntry(table, tag >> 3);
253 if (entry == nullptr) {
254 if (export_called_function) *test_out = {table->fallback, tag};
255 data.data = tag;
256 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
257 }
258
259 // The handler may need the tag and the entry to resolve fallback logic. Both
260 // of these are 32 bits, so pack them into (the 64-bit) `data`. Since we can't
261 // pack the entry pointer itself, just pack its offset from `table`.
262 uint64_t entry_offset = reinterpret_cast<const char*>(entry) -
263 reinterpret_cast<const char*>(table);
264 data.data = entry_offset << 32 | tag;
265
266 using field_layout::FieldKind;
267 auto field_type =
268 entry->type_card & (+field_layout::kSplitMask | FieldKind::kFkMask);
269
270 static constexpr TailCallParseFunc kMiniParseTable[] = {
271 &MpFallback, // FieldKind::kFkNone
272 &MpVarint<false>, // FieldKind::kFkVarint
273 &MpPackedVarint<false>, // FieldKind::kFkPackedVarint
274 &MpFixed<false>, // FieldKind::kFkFixed
275 &MpPackedFixed<false>, // FieldKind::kFkPackedFixed
276 &MpString<false>, // FieldKind::kFkString
277 &MpMessage<false>, // FieldKind::kFkMessage
278 &MpMap<false>, // FieldKind::kFkMap
279 &Error, // kSplitMask | FieldKind::kFkNone
280 &MpVarint<true>, // kSplitMask | FieldKind::kFkVarint
281 &MpPackedVarint<true>, // kSplitMask | FieldKind::kFkPackedVarint
282 &MpFixed<true>, // kSplitMask | FieldKind::kFkFixed
283 &MpPackedFixed<true>, // kSplitMask | FieldKind::kFkPackedFixed
284 &MpString<true>, // kSplitMask | FieldKind::kFkString
285 &MpMessage<true>, // kSplitMask | FieldKind::kFkMessage
286 &MpMap<true>, // kSplitMask | FieldKind::kFkMap
287 };
288 // Just to be sure we got the order right, above.
289 static_assert(0 == FieldKind::kFkNone, "Invalid table order");
290 static_assert(1 == FieldKind::kFkVarint, "Invalid table order");
291 static_assert(2 == FieldKind::kFkPackedVarint, "Invalid table order");
292 static_assert(3 == FieldKind::kFkFixed, "Invalid table order");
293 static_assert(4 == FieldKind::kFkPackedFixed, "Invalid table order");
294 static_assert(5 == FieldKind::kFkString, "Invalid table order");
295 static_assert(6 == FieldKind::kFkMessage, "Invalid table order");
296 static_assert(7 == FieldKind::kFkMap, "Invalid table order");
297
298 static_assert(8 == (+field_layout::kSplitMask | FieldKind::kFkNone),
299 "Invalid table order");
300 static_assert(9 == (+field_layout::kSplitMask | FieldKind::kFkVarint),
301 "Invalid table order");
302 static_assert(10 == (+field_layout::kSplitMask | FieldKind::kFkPackedVarint),
303 "Invalid table order");
304 static_assert(11 == (+field_layout::kSplitMask | FieldKind::kFkFixed),
305 "Invalid table order");
306 static_assert(12 == (+field_layout::kSplitMask | FieldKind::kFkPackedFixed),
307 "Invalid table order");
308 static_assert(13 == (+field_layout::kSplitMask | FieldKind::kFkString),
309 "Invalid table order");
310 static_assert(14 == (+field_layout::kSplitMask | FieldKind::kFkMessage),
311 "Invalid table order");
312 static_assert(15 == (+field_layout::kSplitMask | FieldKind::kFkMap),
313 "Invalid table order");
314
315 TailCallParseFunc parse_fn = kMiniParseTable[field_type];
316 if (export_called_function) *test_out = {parse_fn, tag, entry};
317
318 PROTOBUF_MUSTTAIL return parse_fn(PROTOBUF_TC_PARAM_PASS);
319 }
320
MiniParse(PROTOBUF_TC_PARAM_NO_DATA_DECL)321 PROTOBUF_NOINLINE const char* TcParser::MiniParse(
322 PROTOBUF_TC_PARAM_NO_DATA_DECL) {
323 PROTOBUF_MUSTTAIL return MiniParse<false>(PROTOBUF_TC_PARAM_NO_DATA_PASS);
324 }
TestMiniParse(PROTOBUF_TC_PARAM_DECL)325 PROTOBUF_NOINLINE TcParser::TestMiniParseResult TcParser::TestMiniParse(
326 PROTOBUF_TC_PARAM_DECL) {
327 TestMiniParseResult result = {};
328 data.data = reinterpret_cast<uintptr_t>(&result);
329 result.ptr = MiniParse<true>(PROTOBUF_TC_PARAM_PASS);
330 return result;
331 }
332
MpFallback(PROTOBUF_TC_PARAM_DECL)333 PROTOBUF_NOINLINE const char* TcParser::MpFallback(PROTOBUF_TC_PARAM_DECL) {
334 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
335 }
336
337 template <typename TagType>
FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL)338 const char* TcParser::FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL) {
339 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
340 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
341 }
342 ctx->SetLastTag(data.decoded_tag());
343 ptr += sizeof(TagType);
344 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
345 }
346
FastEndG1(PROTOBUF_TC_PARAM_DECL)347 PROTOBUF_NOINLINE const char* TcParser::FastEndG1(PROTOBUF_TC_PARAM_DECL) {
348 PROTOBUF_MUSTTAIL return FastEndGroupImpl<uint8_t>(PROTOBUF_TC_PARAM_PASS);
349 }
FastEndG2(PROTOBUF_TC_PARAM_DECL)350 PROTOBUF_NOINLINE const char* TcParser::FastEndG2(PROTOBUF_TC_PARAM_DECL) {
351 PROTOBUF_MUSTTAIL return FastEndGroupImpl<uint16_t>(PROTOBUF_TC_PARAM_PASS);
352 }
353
354 //////////////////////////////////////////////////////////////////////////////
355 // Message fields
356 //////////////////////////////////////////////////////////////////////////////
357
NewMessage(const TcParseTableBase * table,Arena * arena)358 inline PROTOBUF_ALWAYS_INLINE MessageLite* TcParser::NewMessage(
359 const TcParseTableBase* table, Arena* arena) {
360 return table->class_data->New(arena);
361 }
362
AddMessage(const TcParseTableBase * table,RepeatedPtrFieldBase & field)363 MessageLite* TcParser::AddMessage(const TcParseTableBase* table,
364 RepeatedPtrFieldBase& field) {
365 return static_cast<MessageLite*>(field.AddInternal(
366 [table](Arena* arena) { return NewMessage(table, arena); }));
367 }
368
369 template <typename TagType, bool group_coding, bool aux_is_table>
SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL)370 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularParseMessageAuxImpl(
371 PROTOBUF_TC_PARAM_DECL) {
372 PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 192);
373 PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256);
374 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
375 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
376 }
377 auto saved_tag = UnalignedLoad<TagType>(ptr);
378 ptr += sizeof(TagType);
379 hasbits |= (uint64_t{1} << data.hasbit_idx());
380 SyncHasbits(msg, hasbits, table);
381 auto& field = RefAt<MessageLite*>(msg, data.offset());
382 const auto aux = *table->field_aux(data.aux_idx());
383 const auto* inner_table =
384 aux_is_table ? aux.table : aux.message_default()->GetTcParseTable();
385
386 if (field == nullptr) {
387 field = NewMessage(inner_table, msg->GetArena());
388 }
389 const auto inner_loop = [&](const char* ptr) {
390 return ParseLoop(field, ptr, ctx, inner_table);
391 };
392 return group_coding
393 ? ctx->ParseGroupInlined(ptr, FastDecodeTag(saved_tag), inner_loop)
394 : ctx->ParseLengthDelimitedInlined(ptr, inner_loop);
395 }
396
FastMdS1(PROTOBUF_TC_PARAM_DECL)397 PROTOBUF_NOINLINE const char* TcParser::FastMdS1(PROTOBUF_TC_PARAM_DECL) {
398 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, false, false>(
399 PROTOBUF_TC_PARAM_PASS);
400 }
401
FastMdS2(PROTOBUF_TC_PARAM_DECL)402 PROTOBUF_NOINLINE const char* TcParser::FastMdS2(PROTOBUF_TC_PARAM_DECL) {
403 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, false, false>(
404 PROTOBUF_TC_PARAM_PASS);
405 }
406
FastGdS1(PROTOBUF_TC_PARAM_DECL)407 PROTOBUF_NOINLINE const char* TcParser::FastGdS1(PROTOBUF_TC_PARAM_DECL) {
408 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, true, false>(
409 PROTOBUF_TC_PARAM_PASS);
410 }
411
FastGdS2(PROTOBUF_TC_PARAM_DECL)412 PROTOBUF_NOINLINE const char* TcParser::FastGdS2(PROTOBUF_TC_PARAM_DECL) {
413 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, true, false>(
414 PROTOBUF_TC_PARAM_PASS);
415 }
416
FastMtS1(PROTOBUF_TC_PARAM_DECL)417 PROTOBUF_NOINLINE const char* TcParser::FastMtS1(PROTOBUF_TC_PARAM_DECL) {
418 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, false, true>(
419 PROTOBUF_TC_PARAM_PASS);
420 }
421
FastMtS2(PROTOBUF_TC_PARAM_DECL)422 PROTOBUF_NOINLINE const char* TcParser::FastMtS2(PROTOBUF_TC_PARAM_DECL) {
423 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, false, true>(
424 PROTOBUF_TC_PARAM_PASS);
425 }
426
FastGtS1(PROTOBUF_TC_PARAM_DECL)427 PROTOBUF_NOINLINE const char* TcParser::FastGtS1(PROTOBUF_TC_PARAM_DECL) {
428 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, true, true>(
429 PROTOBUF_TC_PARAM_PASS);
430 }
431
FastGtS2(PROTOBUF_TC_PARAM_DECL)432 PROTOBUF_NOINLINE const char* TcParser::FastGtS2(PROTOBUF_TC_PARAM_DECL) {
433 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, true, true>(
434 PROTOBUF_TC_PARAM_PASS);
435 }
436
437 template <typename TagType>
LazyMessage(PROTOBUF_TC_PARAM_DECL)438 const char* TcParser::LazyMessage(PROTOBUF_TC_PARAM_DECL) {
439 ABSL_LOG(FATAL) << "Unimplemented";
440 return nullptr;
441 }
442
FastMlS1(PROTOBUF_TC_PARAM_DECL)443 PROTOBUF_NOINLINE const char* TcParser::FastMlS1(PROTOBUF_TC_PARAM_DECL) {
444 PROTOBUF_MUSTTAIL return LazyMessage<uint8_t>(PROTOBUF_TC_PARAM_PASS);
445 }
446
FastMlS2(PROTOBUF_TC_PARAM_DECL)447 PROTOBUF_NOINLINE const char* TcParser::FastMlS2(PROTOBUF_TC_PARAM_DECL) {
448 PROTOBUF_MUSTTAIL return LazyMessage<uint16_t>(PROTOBUF_TC_PARAM_PASS);
449 }
450
451 template <typename TagType, bool group_coding, bool aux_is_table>
RepeatedParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL)452 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedParseMessageAuxImpl(
453 PROTOBUF_TC_PARAM_DECL) {
454 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
455 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
456 }
457 PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256);
458 const auto expected_tag = UnalignedLoad<TagType>(ptr);
459 const auto aux = *table->field_aux(data.aux_idx());
460 auto& field = RefAt<RepeatedPtrFieldBase>(msg, data.offset());
461 const TcParseTableBase* inner_table =
462 aux_is_table ? aux.table : aux.message_default()->GetTcParseTable();
463 do {
464 ptr += sizeof(TagType);
465 MessageLite* submsg = AddMessage(inner_table, field);
466 const auto inner_loop = [&](const char* ptr) {
467 return ParseLoop(submsg, ptr, ctx, inner_table);
468 };
469 ptr = group_coding ? ctx->ParseGroupInlined(
470 ptr, FastDecodeTag(expected_tag), inner_loop)
471 : ctx->ParseLengthDelimitedInlined(ptr, inner_loop);
472 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
473 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
474 }
475 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
476 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
477 }
478 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
479
480 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
481 }
482
FastMdR1(PROTOBUF_TC_PARAM_DECL)483 PROTOBUF_NOINLINE const char* TcParser::FastMdR1(PROTOBUF_TC_PARAM_DECL) {
484 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, false, false>(
485 PROTOBUF_TC_PARAM_PASS);
486 }
487
FastMdR2(PROTOBUF_TC_PARAM_DECL)488 PROTOBUF_NOINLINE const char* TcParser::FastMdR2(PROTOBUF_TC_PARAM_DECL) {
489 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, false, false>(
490 PROTOBUF_TC_PARAM_PASS);
491 }
492
FastGdR1(PROTOBUF_TC_PARAM_DECL)493 PROTOBUF_NOINLINE const char* TcParser::FastGdR1(PROTOBUF_TC_PARAM_DECL) {
494 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, true, false>(
495 PROTOBUF_TC_PARAM_PASS);
496 }
497
FastGdR2(PROTOBUF_TC_PARAM_DECL)498 PROTOBUF_NOINLINE const char* TcParser::FastGdR2(PROTOBUF_TC_PARAM_DECL) {
499 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, true, false>(
500 PROTOBUF_TC_PARAM_PASS);
501 }
502
FastMtR1(PROTOBUF_TC_PARAM_DECL)503 PROTOBUF_NOINLINE const char* TcParser::FastMtR1(PROTOBUF_TC_PARAM_DECL) {
504 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, false, true>(
505 PROTOBUF_TC_PARAM_PASS);
506 }
507
FastMtR2(PROTOBUF_TC_PARAM_DECL)508 PROTOBUF_NOINLINE const char* TcParser::FastMtR2(PROTOBUF_TC_PARAM_DECL) {
509 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, false, true>(
510 PROTOBUF_TC_PARAM_PASS);
511 }
512
FastGtR1(PROTOBUF_TC_PARAM_DECL)513 PROTOBUF_NOINLINE const char* TcParser::FastGtR1(PROTOBUF_TC_PARAM_DECL) {
514 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, true, true>(
515 PROTOBUF_TC_PARAM_PASS);
516 }
517
FastGtR2(PROTOBUF_TC_PARAM_DECL)518 PROTOBUF_NOINLINE const char* TcParser::FastGtR2(PROTOBUF_TC_PARAM_DECL) {
519 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, true, true>(
520 PROTOBUF_TC_PARAM_PASS);
521 }
522
523 //////////////////////////////////////////////////////////////////////////////
524 // Fixed fields
525 //////////////////////////////////////////////////////////////////////////////
526
527 template <typename LayoutType, typename TagType>
SingularFixed(PROTOBUF_TC_PARAM_DECL)528 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularFixed(
529 PROTOBUF_TC_PARAM_DECL) {
530 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
531 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
532 }
533 ptr += sizeof(TagType); // Consume tag
534 hasbits |= (uint64_t{1} << data.hasbit_idx());
535 RefAt<LayoutType>(msg, data.offset()) = UnalignedLoad<LayoutType>(ptr);
536 ptr += sizeof(LayoutType);
537 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
538 }
539
FastF32S1(PROTOBUF_TC_PARAM_DECL)540 PROTOBUF_NOINLINE const char* TcParser::FastF32S1(PROTOBUF_TC_PARAM_DECL) {
541 PROTOBUF_MUSTTAIL return SingularFixed<uint32_t, uint8_t>(
542 PROTOBUF_TC_PARAM_PASS);
543 }
FastF32S2(PROTOBUF_TC_PARAM_DECL)544 PROTOBUF_NOINLINE const char* TcParser::FastF32S2(PROTOBUF_TC_PARAM_DECL) {
545 PROTOBUF_MUSTTAIL return SingularFixed<uint32_t, uint16_t>(
546 PROTOBUF_TC_PARAM_PASS);
547 }
FastF64S1(PROTOBUF_TC_PARAM_DECL)548 PROTOBUF_NOINLINE const char* TcParser::FastF64S1(PROTOBUF_TC_PARAM_DECL) {
549 PROTOBUF_MUSTTAIL return SingularFixed<uint64_t, uint8_t>(
550 PROTOBUF_TC_PARAM_PASS);
551 }
FastF64S2(PROTOBUF_TC_PARAM_DECL)552 PROTOBUF_NOINLINE const char* TcParser::FastF64S2(PROTOBUF_TC_PARAM_DECL) {
553 PROTOBUF_MUSTTAIL return SingularFixed<uint64_t, uint16_t>(
554 PROTOBUF_TC_PARAM_PASS);
555 }
556
557 template <typename LayoutType, typename TagType>
RepeatedFixed(PROTOBUF_TC_PARAM_DECL)558 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed(
559 PROTOBUF_TC_PARAM_DECL) {
560 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
561 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
562 }
563 auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
564 const auto tag = UnalignedLoad<TagType>(ptr);
565 do {
566 field.Add(UnalignedLoad<LayoutType>(ptr + sizeof(TagType)));
567 ptr += sizeof(TagType) + sizeof(LayoutType);
568 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
569 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
570 }
571 } while (UnalignedLoad<TagType>(ptr) == tag);
572 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
573 }
574
FastF32R1(PROTOBUF_TC_PARAM_DECL)575 PROTOBUF_NOINLINE const char* TcParser::FastF32R1(PROTOBUF_TC_PARAM_DECL) {
576 PROTOBUF_MUSTTAIL return RepeatedFixed<uint32_t, uint8_t>(
577 PROTOBUF_TC_PARAM_PASS);
578 }
FastF32R2(PROTOBUF_TC_PARAM_DECL)579 PROTOBUF_NOINLINE const char* TcParser::FastF32R2(PROTOBUF_TC_PARAM_DECL) {
580 PROTOBUF_MUSTTAIL return RepeatedFixed<uint32_t, uint16_t>(
581 PROTOBUF_TC_PARAM_PASS);
582 }
FastF64R1(PROTOBUF_TC_PARAM_DECL)583 PROTOBUF_NOINLINE const char* TcParser::FastF64R1(PROTOBUF_TC_PARAM_DECL) {
584 PROTOBUF_MUSTTAIL return RepeatedFixed<uint64_t, uint8_t>(
585 PROTOBUF_TC_PARAM_PASS);
586 }
FastF64R2(PROTOBUF_TC_PARAM_DECL)587 PROTOBUF_NOINLINE const char* TcParser::FastF64R2(PROTOBUF_TC_PARAM_DECL) {
588 PROTOBUF_MUSTTAIL return RepeatedFixed<uint64_t, uint16_t>(
589 PROTOBUF_TC_PARAM_PASS);
590 }
591
592 template <typename LayoutType, typename TagType>
PackedFixed(PROTOBUF_TC_PARAM_DECL)593 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedFixed(
594 PROTOBUF_TC_PARAM_DECL) {
595 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
596 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
597 }
598 ptr += sizeof(TagType);
599 // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
600 // pending hasbits now:
601 SyncHasbits(msg, hasbits, table);
602 auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
603 int size = ReadSize(&ptr);
604 // TODO: add a tailcalling variant of ReadPackedFixed.
605 return ctx->ReadPackedFixed(ptr, size,
606 static_cast<RepeatedField<LayoutType>*>(&field));
607 }
608
FastF32P1(PROTOBUF_TC_PARAM_DECL)609 PROTOBUF_NOINLINE const char* TcParser::FastF32P1(PROTOBUF_TC_PARAM_DECL) {
610 PROTOBUF_MUSTTAIL return PackedFixed<uint32_t, uint8_t>(
611 PROTOBUF_TC_PARAM_PASS);
612 }
FastF32P2(PROTOBUF_TC_PARAM_DECL)613 PROTOBUF_NOINLINE const char* TcParser::FastF32P2(PROTOBUF_TC_PARAM_DECL) {
614 PROTOBUF_MUSTTAIL return PackedFixed<uint32_t, uint16_t>(
615 PROTOBUF_TC_PARAM_PASS);
616 }
FastF64P1(PROTOBUF_TC_PARAM_DECL)617 PROTOBUF_NOINLINE const char* TcParser::FastF64P1(PROTOBUF_TC_PARAM_DECL) {
618 PROTOBUF_MUSTTAIL return PackedFixed<uint64_t, uint8_t>(
619 PROTOBUF_TC_PARAM_PASS);
620 }
FastF64P2(PROTOBUF_TC_PARAM_DECL)621 PROTOBUF_NOINLINE const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) {
622 PROTOBUF_MUSTTAIL return PackedFixed<uint64_t, uint16_t>(
623 PROTOBUF_TC_PARAM_PASS);
624 }
625
626 //////////////////////////////////////////////////////////////////////////////
627 // Varint fields
628 //////////////////////////////////////////////////////////////////////////////
629
630 namespace {
631
632 template <typename Type>
ParseVarint(const char * p,Type * value)633 inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
634 Type* value) {
635 static_assert(sizeof(Type) == 4 || sizeof(Type) == 8,
636 "Only [u]int32_t and [u]int64_t please");
637 #ifdef __aarch64__
638 // The VarintParse parser has a faster implementation on ARM.
639 absl::conditional_t<sizeof(Type) == 4, uint32_t, uint64_t> tmp;
640 p = VarintParse(p, &tmp);
641 if (p != nullptr) {
642 *value = tmp;
643 }
644 return p;
645 #endif
646 int64_t res;
647 p = ShiftMixParseVarint<Type>(p, res);
648 *value = res;
649 return p;
650 }
651
652 // This overload is specifically for handling bool, because bools have very
653 // different requirements and performance opportunities than ints.
ParseVarint(const char * p,bool * value)654 inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
655 bool* value) {
656 unsigned char byte = static_cast<unsigned char>(*p++);
657 if (PROTOBUF_PREDICT_TRUE(byte == 0 || byte == 1)) {
658 // This is the code path almost always taken,
659 // so we take care to make it very efficient.
660 if (sizeof(byte) == sizeof(*value)) {
661 memcpy(value, &byte, 1);
662 } else {
663 // The C++ standard does not specify that a `bool` takes only one byte
664 *value = byte;
665 }
666 return p;
667 }
668 // This part, we just care about code size.
669 // Although it's almost never used, we have to support it because we guarantee
670 // compatibility for users who change a field from an int32 or int64 to a bool
671 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
672 byte = (byte - 0x80) | *p++;
673 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
674 byte = (byte - 0x80) | *p++;
675 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
676 byte = (byte - 0x80) | *p++;
677 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
678 byte = (byte - 0x80) | *p++;
679 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
680 byte = (byte - 0x80) | *p++;
681 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
682 byte = (byte - 0x80) | *p++;
683 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
684 byte = (byte - 0x80) | *p++;
685 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
686 byte = (byte - 0x80) | *p++;
687 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
688 // We only care about the continuation bit and the first bit
689 // of the 10th byte.
690 byte = (byte - 0x80) | (*p++ & 0x81);
691 if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
692 return nullptr;
693 }
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 }
703 *value = byte;
704 return p;
705 }
706
707 template <typename FieldType, bool zigzag = false>
ZigZagDecodeHelper(FieldType value)708 inline FieldType ZigZagDecodeHelper(FieldType value) {
709 return static_cast<FieldType>(value);
710 }
711
712 template <>
ZigZagDecodeHelper(int32_t value)713 inline int32_t ZigZagDecodeHelper<int32_t, true>(int32_t value) {
714 return WireFormatLite::ZigZagDecode32(value);
715 }
716
717 template <>
ZigZagDecodeHelper(int64_t value)718 inline int64_t ZigZagDecodeHelper<int64_t, true>(int64_t value) {
719 return WireFormatLite::ZigZagDecode64(value);
720 }
721
722 // Prefetch the enum data, if necessary.
723 // We can issue the prefetch before we start parsing the ints.
PrefetchEnumData(uint16_t xform_val,TcParseTableBase::FieldAux aux)724 inline PROTOBUF_ALWAYS_INLINE void PrefetchEnumData(
725 uint16_t xform_val, TcParseTableBase::FieldAux aux) {
726 }
727
728 // When `xform_val` is a constant, we want to inline `ValidateEnum` because it
729 // is either dropped when not a kTvEnum, or useful when it is.
730 //
731 // When it is not a constant, we do not inline `ValidateEnum` because it bloats
732 // the code around it and pessimizes the non-enum and kTvRange cases which are
733 // way more common than the kTvEnum cases. It is also called from places that
734 // already have out-of-line functions (like MpVarint) so an extra out-of-line
735 // call to `ValidateEnum` does not affect much.
EnumIsValidAux(int32_t val,uint16_t xform_val,TcParseTableBase::FieldAux aux)736 inline PROTOBUF_ALWAYS_INLINE bool EnumIsValidAux(
737 int32_t val, uint16_t xform_val, TcParseTableBase::FieldAux aux) {
738 if (xform_val == field_layout::kTvRange) {
739 auto lo = aux.enum_range.start;
740 return lo <= val && val < (lo + aux.enum_range.length);
741 }
742 if (PROTOBUF_BUILTIN_CONSTANT_P(xform_val)) {
743 return internal::ValidateEnumInlined(val, aux.enum_data);
744 } else {
745 return internal::ValidateEnum(val, aux.enum_data);
746 }
747 }
748
749 } // namespace
750
751 template <typename FieldType, typename TagType, bool zigzag>
SingularVarint(PROTOBUF_TC_PARAM_DECL)752 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularVarint(
753 PROTOBUF_TC_PARAM_DECL) {
754 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
755 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
756 }
757 ptr += sizeof(TagType); // Consume tag
758 hasbits |= (uint64_t{1} << data.hasbit_idx());
759
760 // clang isn't smart enough to be able to only conditionally save
761 // registers to the stack, so we turn the integer-greater-than-128
762 // case into a separate routine.
763 if (PROTOBUF_PREDICT_FALSE(static_cast<int8_t>(*ptr) < 0)) {
764 PROTOBUF_MUSTTAIL return SingularVarBigint<FieldType, TagType, zigzag>(
765 PROTOBUF_TC_PARAM_PASS);
766 }
767
768 RefAt<FieldType>(msg, data.offset()) =
769 ZigZagDecodeHelper<FieldType, zigzag>(static_cast<uint8_t>(*ptr++));
770 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
771 }
772
773 template <typename FieldType, typename TagType, bool zigzag>
SingularVarBigint(PROTOBUF_TC_PARAM_DECL)774 PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
775 PROTOBUF_TC_PARAM_DECL) {
776 // For some reason clang wants to save 5 registers to the stack here,
777 // but we only need four for this code, so save the data we don't need
778 // to the stack. Happily, saving them this way uses regular store
779 // instructions rather than PUSH/POP, which saves time at the cost of greater
780 // code size, but for this heavily-used piece of code, that's fine.
781 struct Spill {
782 uint64_t field_data;
783 ::google::protobuf::MessageLite* msg;
784 const ::google::protobuf::internal::TcParseTableBase* table;
785 uint64_t hasbits;
786 };
787 Spill spill = {data.data, msg, table, hasbits};
788 #if defined(__GNUC__)
789 // This empty asm block convinces the compiler that the contents of spill may
790 // have changed, and thus can't be cached in registers. It's similar to, but
791 // more optimal than, the effect of declaring it "volatile".
792 asm("" : "+m"(spill));
793 #endif
794
795 uint64_t tmp;
796 PROTOBUF_ASSUME(static_cast<int8_t>(*ptr) < 0);
797 ptr = ParseVarint(ptr, &tmp);
798
799 data.data = spill.field_data;
800 msg = spill.msg;
801 table = spill.table;
802 hasbits = spill.hasbits;
803
804 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
805 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
806 }
807 RefAt<FieldType>(msg, data.offset()) =
808 ZigZagDecodeHelper<FieldType, zigzag>(tmp);
809 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
810 }
811
812 template <typename FieldType>
FastVarintS1(PROTOBUF_TC_PARAM_DECL)813 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::FastVarintS1(
814 PROTOBUF_TC_PARAM_DECL) {
815 using TagType = uint8_t;
816 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
817 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
818 }
819 int64_t res;
820 ptr = ShiftMixParseVarint<FieldType>(ptr + sizeof(TagType), res);
821 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
822 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
823 }
824 hasbits |= (uint64_t{1} << data.hasbit_idx());
825 RefAt<FieldType>(msg, data.offset()) = res;
826 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
827 }
828
FastV8S1(PROTOBUF_TC_PARAM_DECL)829 PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
830 using TagType = uint8_t;
831
832 // Special case for a varint bool field with a tag of 1 byte:
833 // The coded_tag() field will actually contain the value too and we can check
834 // both at the same time.
835 auto coded_tag = data.coded_tag<uint16_t>();
836 if (PROTOBUF_PREDICT_TRUE(coded_tag == 0x0000 || coded_tag == 0x0100)) {
837 auto& field = RefAt<bool>(msg, data.offset());
838 // Note: we use `data.data` because Clang generates suboptimal code when
839 // using coded_tag.
840 // In x86_64 this uses the CH register to read the second byte out of
841 // `data`.
842 uint8_t value = data.data >> 8;
843 // The assume allows using a mov instead of test+setne.
844 PROTOBUF_ASSUME(value <= 1);
845 field = static_cast<bool>(value);
846
847 ptr += sizeof(TagType) + 1; // Consume the tag and the value.
848 hasbits |= (uint64_t{1} << data.hasbit_idx());
849
850 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
851 }
852
853 // If it didn't match above either the tag is wrong, or the value is encoded
854 // non-canonically.
855 // Jump to MiniParse as wrong tag is the most probable reason.
856 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
857 }
858
FastV8S2(PROTOBUF_TC_PARAM_DECL)859 PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
860 PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(
861 PROTOBUF_TC_PARAM_PASS);
862 }
FastV32S1(PROTOBUF_TC_PARAM_DECL)863 PROTOBUF_NOINLINE const char* TcParser::FastV32S1(PROTOBUF_TC_PARAM_DECL) {
864 PROTOBUF_MUSTTAIL return FastVarintS1<uint32_t>(PROTOBUF_TC_PARAM_PASS);
865 }
FastV32S2(PROTOBUF_TC_PARAM_DECL)866 PROTOBUF_NOINLINE const char* TcParser::FastV32S2(PROTOBUF_TC_PARAM_DECL) {
867 PROTOBUF_MUSTTAIL return SingularVarint<uint32_t, uint16_t>(
868 PROTOBUF_TC_PARAM_PASS);
869 }
FastV64S1(PROTOBUF_TC_PARAM_DECL)870 PROTOBUF_NOINLINE const char* TcParser::FastV64S1(PROTOBUF_TC_PARAM_DECL) {
871 PROTOBUF_MUSTTAIL return FastVarintS1<uint64_t>(PROTOBUF_TC_PARAM_PASS);
872 }
FastV64S2(PROTOBUF_TC_PARAM_DECL)873 PROTOBUF_NOINLINE const char* TcParser::FastV64S2(PROTOBUF_TC_PARAM_DECL) {
874 PROTOBUF_MUSTTAIL return SingularVarint<uint64_t, uint16_t>(
875 PROTOBUF_TC_PARAM_PASS);
876 }
877
FastZ32S1(PROTOBUF_TC_PARAM_DECL)878 PROTOBUF_NOINLINE const char* TcParser::FastZ32S1(PROTOBUF_TC_PARAM_DECL) {
879 PROTOBUF_MUSTTAIL return SingularVarint<int32_t, uint8_t, true>(
880 PROTOBUF_TC_PARAM_PASS);
881 }
FastZ32S2(PROTOBUF_TC_PARAM_DECL)882 PROTOBUF_NOINLINE const char* TcParser::FastZ32S2(PROTOBUF_TC_PARAM_DECL) {
883 PROTOBUF_MUSTTAIL return SingularVarint<int32_t, uint16_t, true>(
884 PROTOBUF_TC_PARAM_PASS);
885 }
FastZ64S1(PROTOBUF_TC_PARAM_DECL)886 PROTOBUF_NOINLINE const char* TcParser::FastZ64S1(PROTOBUF_TC_PARAM_DECL) {
887 PROTOBUF_MUSTTAIL return SingularVarint<int64_t, uint8_t, true>(
888 PROTOBUF_TC_PARAM_PASS);
889 }
FastZ64S2(PROTOBUF_TC_PARAM_DECL)890 PROTOBUF_NOINLINE const char* TcParser::FastZ64S2(PROTOBUF_TC_PARAM_DECL) {
891 PROTOBUF_MUSTTAIL return SingularVarint<int64_t, uint16_t, true>(
892 PROTOBUF_TC_PARAM_PASS);
893 }
894
895 template <typename FieldType, typename TagType, bool zigzag>
RepeatedVarint(PROTOBUF_TC_PARAM_DECL)896 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedVarint(
897 PROTOBUF_TC_PARAM_DECL) {
898 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
899 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
900 }
901 auto& field = RefAt<RepeatedField<FieldType>>(msg, data.offset());
902 const auto expected_tag = UnalignedLoad<TagType>(ptr);
903 do {
904 ptr += sizeof(TagType);
905 FieldType tmp;
906 ptr = ParseVarint(ptr, &tmp);
907 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
908 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
909 }
910 field.Add(ZigZagDecodeHelper<FieldType, zigzag>(tmp));
911 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
912 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
913 }
914 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
915 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
916 }
917
FastV8R1(PROTOBUF_TC_PARAM_DECL)918 PROTOBUF_NOINLINE const char* TcParser::FastV8R1(PROTOBUF_TC_PARAM_DECL) {
919 PROTOBUF_MUSTTAIL return RepeatedVarint<bool, uint8_t>(
920 PROTOBUF_TC_PARAM_PASS);
921 }
FastV8R2(PROTOBUF_TC_PARAM_DECL)922 PROTOBUF_NOINLINE const char* TcParser::FastV8R2(PROTOBUF_TC_PARAM_DECL) {
923 PROTOBUF_MUSTTAIL return RepeatedVarint<bool, uint16_t>(
924 PROTOBUF_TC_PARAM_PASS);
925 }
FastV32R1(PROTOBUF_TC_PARAM_DECL)926 PROTOBUF_NOINLINE const char* TcParser::FastV32R1(PROTOBUF_TC_PARAM_DECL) {
927 PROTOBUF_MUSTTAIL return RepeatedVarint<uint32_t, uint8_t>(
928 PROTOBUF_TC_PARAM_PASS);
929 }
FastV32R2(PROTOBUF_TC_PARAM_DECL)930 PROTOBUF_NOINLINE const char* TcParser::FastV32R2(PROTOBUF_TC_PARAM_DECL) {
931 PROTOBUF_MUSTTAIL return RepeatedVarint<uint32_t, uint16_t>(
932 PROTOBUF_TC_PARAM_PASS);
933 }
FastV64R1(PROTOBUF_TC_PARAM_DECL)934 PROTOBUF_NOINLINE const char* TcParser::FastV64R1(PROTOBUF_TC_PARAM_DECL) {
935 PROTOBUF_MUSTTAIL return RepeatedVarint<uint64_t, uint8_t>(
936 PROTOBUF_TC_PARAM_PASS);
937 }
FastV64R2(PROTOBUF_TC_PARAM_DECL)938 PROTOBUF_NOINLINE const char* TcParser::FastV64R2(PROTOBUF_TC_PARAM_DECL) {
939 PROTOBUF_MUSTTAIL return RepeatedVarint<uint64_t, uint16_t>(
940 PROTOBUF_TC_PARAM_PASS);
941 }
942
FastZ32R1(PROTOBUF_TC_PARAM_DECL)943 PROTOBUF_NOINLINE const char* TcParser::FastZ32R1(PROTOBUF_TC_PARAM_DECL) {
944 PROTOBUF_MUSTTAIL return RepeatedVarint<int32_t, uint8_t, true>(
945 PROTOBUF_TC_PARAM_PASS);
946 }
FastZ32R2(PROTOBUF_TC_PARAM_DECL)947 PROTOBUF_NOINLINE const char* TcParser::FastZ32R2(PROTOBUF_TC_PARAM_DECL) {
948 PROTOBUF_MUSTTAIL return RepeatedVarint<int32_t, uint16_t, true>(
949 PROTOBUF_TC_PARAM_PASS);
950 }
FastZ64R1(PROTOBUF_TC_PARAM_DECL)951 PROTOBUF_NOINLINE const char* TcParser::FastZ64R1(PROTOBUF_TC_PARAM_DECL) {
952 PROTOBUF_MUSTTAIL return RepeatedVarint<int64_t, uint8_t, true>(
953 PROTOBUF_TC_PARAM_PASS);
954 }
FastZ64R2(PROTOBUF_TC_PARAM_DECL)955 PROTOBUF_NOINLINE const char* TcParser::FastZ64R2(PROTOBUF_TC_PARAM_DECL) {
956 PROTOBUF_MUSTTAIL return RepeatedVarint<int64_t, uint16_t, true>(
957 PROTOBUF_TC_PARAM_PASS);
958 }
959
960 template <typename FieldType, typename TagType, bool zigzag>
PackedVarint(PROTOBUF_TC_PARAM_DECL)961 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedVarint(
962 PROTOBUF_TC_PARAM_DECL) {
963 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
964 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
965 }
966 ptr += sizeof(TagType);
967 // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
968 // pending hasbits now:
969 SyncHasbits(msg, hasbits, table);
970 auto* field = &RefAt<RepeatedField<FieldType>>(msg, data.offset());
971 return ctx->ReadPackedVarint(ptr, [field](uint64_t varint) {
972 FieldType val;
973 if (zigzag) {
974 if (sizeof(FieldType) == 8) {
975 val = WireFormatLite::ZigZagDecode64(varint);
976 } else {
977 val = WireFormatLite::ZigZagDecode32(varint);
978 }
979 } else {
980 val = varint;
981 }
982 field->Add(val);
983 });
984 }
985
FastV8P1(PROTOBUF_TC_PARAM_DECL)986 PROTOBUF_NOINLINE const char* TcParser::FastV8P1(PROTOBUF_TC_PARAM_DECL) {
987 PROTOBUF_MUSTTAIL return PackedVarint<bool, uint8_t>(PROTOBUF_TC_PARAM_PASS);
988 }
FastV8P2(PROTOBUF_TC_PARAM_DECL)989 PROTOBUF_NOINLINE const char* TcParser::FastV8P2(PROTOBUF_TC_PARAM_DECL) {
990 PROTOBUF_MUSTTAIL return PackedVarint<bool, uint16_t>(PROTOBUF_TC_PARAM_PASS);
991 }
FastV32P1(PROTOBUF_TC_PARAM_DECL)992 PROTOBUF_NOINLINE const char* TcParser::FastV32P1(PROTOBUF_TC_PARAM_DECL) {
993 PROTOBUF_MUSTTAIL return PackedVarint<uint32_t, uint8_t>(
994 PROTOBUF_TC_PARAM_PASS);
995 }
FastV32P2(PROTOBUF_TC_PARAM_DECL)996 PROTOBUF_NOINLINE const char* TcParser::FastV32P2(PROTOBUF_TC_PARAM_DECL) {
997 PROTOBUF_MUSTTAIL return PackedVarint<uint32_t, uint16_t>(
998 PROTOBUF_TC_PARAM_PASS);
999 }
FastV64P1(PROTOBUF_TC_PARAM_DECL)1000 PROTOBUF_NOINLINE const char* TcParser::FastV64P1(PROTOBUF_TC_PARAM_DECL) {
1001 PROTOBUF_MUSTTAIL return PackedVarint<uint64_t, uint8_t>(
1002 PROTOBUF_TC_PARAM_PASS);
1003 }
FastV64P2(PROTOBUF_TC_PARAM_DECL)1004 PROTOBUF_NOINLINE const char* TcParser::FastV64P2(PROTOBUF_TC_PARAM_DECL) {
1005 PROTOBUF_MUSTTAIL return PackedVarint<uint64_t, uint16_t>(
1006 PROTOBUF_TC_PARAM_PASS);
1007 }
1008
FastZ32P1(PROTOBUF_TC_PARAM_DECL)1009 PROTOBUF_NOINLINE const char* TcParser::FastZ32P1(PROTOBUF_TC_PARAM_DECL) {
1010 PROTOBUF_MUSTTAIL return PackedVarint<int32_t, uint8_t, true>(
1011 PROTOBUF_TC_PARAM_PASS);
1012 }
FastZ32P2(PROTOBUF_TC_PARAM_DECL)1013 PROTOBUF_NOINLINE const char* TcParser::FastZ32P2(PROTOBUF_TC_PARAM_DECL) {
1014 PROTOBUF_MUSTTAIL return PackedVarint<int32_t, uint16_t, true>(
1015 PROTOBUF_TC_PARAM_PASS);
1016 }
FastZ64P1(PROTOBUF_TC_PARAM_DECL)1017 PROTOBUF_NOINLINE const char* TcParser::FastZ64P1(PROTOBUF_TC_PARAM_DECL) {
1018 PROTOBUF_MUSTTAIL return PackedVarint<int64_t, uint8_t, true>(
1019 PROTOBUF_TC_PARAM_PASS);
1020 }
FastZ64P2(PROTOBUF_TC_PARAM_DECL)1021 PROTOBUF_NOINLINE const char* TcParser::FastZ64P2(PROTOBUF_TC_PARAM_DECL) {
1022 PROTOBUF_MUSTTAIL return PackedVarint<int64_t, uint16_t, true>(
1023 PROTOBUF_TC_PARAM_PASS);
1024 }
1025
1026 //////////////////////////////////////////////////////////////////////////////
1027 // Enum fields
1028 //////////////////////////////////////////////////////////////////////////////
1029
FastUnknownEnumFallback(PROTOBUF_TC_PARAM_DECL)1030 PROTOBUF_NOINLINE const char* TcParser::FastUnknownEnumFallback(
1031 PROTOBUF_TC_PARAM_DECL) {
1032 // Skip MiniParse/fallback and insert the element directly into the unknown
1033 // field set. We also normalize the value into an int32 as we do for known
1034 // enum values.
1035 uint32_t tag;
1036 ptr = ReadTag(ptr, &tag);
1037 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1038 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1039 }
1040 uint64_t tmp;
1041 ptr = ParseVarint(ptr, &tmp);
1042 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1043 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1044 }
1045 AddUnknownEnum(msg, table, tag, static_cast<int32_t>(tmp));
1046 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1047 }
1048
MpUnknownEnumFallback(PROTOBUF_TC_PARAM_DECL)1049 PROTOBUF_NOINLINE const char* TcParser::MpUnknownEnumFallback(
1050 PROTOBUF_TC_PARAM_DECL) {
1051 // Like FastUnknownEnumFallback, but with the Mp ABI.
1052 uint32_t tag = data.tag();
1053 uint64_t tmp;
1054 ptr = ParseVarint(ptr, &tmp);
1055 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1056 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1057 }
1058 AddUnknownEnum(msg, table, tag, static_cast<int32_t>(tmp));
1059 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1060 }
1061
1062 template <typename TagType, uint16_t xform_val>
SingularEnum(PROTOBUF_TC_PARAM_DECL)1063 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnum(
1064 PROTOBUF_TC_PARAM_DECL) {
1065 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1066 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1067 }
1068 const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
1069 PrefetchEnumData(xform_val, aux);
1070 const char* ptr2 = ptr; // Save for unknown enum case
1071 ptr += sizeof(TagType); // Consume tag
1072 uint64_t tmp;
1073 ptr = ParseVarint(ptr, &tmp);
1074 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1075 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1076 }
1077 if (PROTOBUF_PREDICT_FALSE(
1078 !EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux))) {
1079 ptr = ptr2;
1080 PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1081 }
1082 hasbits |= (uint64_t{1} << data.hasbit_idx());
1083 RefAt<int32_t>(msg, data.offset()) = tmp;
1084 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1085 }
1086
FastErS1(PROTOBUF_TC_PARAM_DECL)1087 PROTOBUF_NOINLINE const char* TcParser::FastErS1(PROTOBUF_TC_PARAM_DECL) {
1088 PROTOBUF_MUSTTAIL return SingularEnum<uint8_t, field_layout::kTvRange>(
1089 PROTOBUF_TC_PARAM_PASS);
1090 }
FastErS2(PROTOBUF_TC_PARAM_DECL)1091 PROTOBUF_NOINLINE const char* TcParser::FastErS2(PROTOBUF_TC_PARAM_DECL) {
1092 PROTOBUF_MUSTTAIL return SingularEnum<uint16_t, field_layout::kTvRange>(
1093 PROTOBUF_TC_PARAM_PASS);
1094 }
FastEvS1(PROTOBUF_TC_PARAM_DECL)1095 PROTOBUF_NOINLINE const char* TcParser::FastEvS1(PROTOBUF_TC_PARAM_DECL) {
1096 PROTOBUF_MUSTTAIL return SingularEnum<uint8_t, field_layout::kTvEnum>(
1097 PROTOBUF_TC_PARAM_PASS);
1098 }
FastEvS2(PROTOBUF_TC_PARAM_DECL)1099 PROTOBUF_NOINLINE const char* TcParser::FastEvS2(PROTOBUF_TC_PARAM_DECL) {
1100 PROTOBUF_MUSTTAIL return SingularEnum<uint16_t, field_layout::kTvEnum>(
1101 PROTOBUF_TC_PARAM_PASS);
1102 }
1103
1104 template <typename TagType, uint16_t xform_val>
RepeatedEnum(PROTOBUF_TC_PARAM_DECL)1105 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedEnum(
1106 PROTOBUF_TC_PARAM_DECL) {
1107 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1108 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1109 }
1110 auto& field = RefAt<RepeatedField<int32_t>>(msg, data.offset());
1111 const auto expected_tag = UnalignedLoad<TagType>(ptr);
1112 const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
1113 PrefetchEnumData(xform_val, aux);
1114 do {
1115 const char* ptr2 = ptr; // save for unknown enum case
1116 ptr += sizeof(TagType);
1117 uint64_t tmp;
1118 ptr = ParseVarint(ptr, &tmp);
1119 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1120 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1121 }
1122 if (PROTOBUF_PREDICT_FALSE(
1123 !EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux))) {
1124 // We can avoid duplicate work in MiniParse by directly calling
1125 // table->fallback.
1126 ptr = ptr2;
1127 PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1128 }
1129 field.Add(static_cast<int32_t>(tmp));
1130 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
1131 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1132 }
1133 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1134
1135 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1136 }
1137
GetUnknownFieldOps(const TcParseTableBase * table)1138 const TcParser::UnknownFieldOps& TcParser::GetUnknownFieldOps(
1139 const TcParseTableBase* table) {
1140 // Call the fallback function in a special mode to only act as a
1141 // way to return the ops.
1142 // Hiding the unknown fields vtable behind the fallback function avoids adding
1143 // more pointers in TcParseTableBase, and the extra runtime jumps are not
1144 // relevant because unknown fields are rare.
1145 const char* ptr = table->fallback(nullptr, nullptr, nullptr, {}, nullptr, 0);
1146 return *reinterpret_cast<const UnknownFieldOps*>(ptr);
1147 }
1148
AddUnknownEnum(MessageLite * msg,const TcParseTableBase * table,uint32_t tag,int32_t enum_value)1149 PROTOBUF_NOINLINE void TcParser::AddUnknownEnum(MessageLite* msg,
1150 const TcParseTableBase* table,
1151 uint32_t tag,
1152 int32_t enum_value) {
1153 GetUnknownFieldOps(table).write_varint(msg, tag >> 3, enum_value);
1154 }
1155
1156 template <typename TagType, uint16_t xform_val>
PackedEnum(PROTOBUF_TC_PARAM_DECL)1157 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedEnum(
1158 PROTOBUF_TC_PARAM_DECL) {
1159 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1160 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1161 }
1162 const auto saved_tag = UnalignedLoad<TagType>(ptr);
1163 ptr += sizeof(TagType);
1164 // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
1165 // pending hasbits now:
1166 SyncHasbits(msg, hasbits, table);
1167 auto* field = &RefAt<RepeatedField<int32_t>>(msg, data.offset());
1168 const TcParseTableBase::FieldAux aux = *table->field_aux(data.aux_idx());
1169 PrefetchEnumData(xform_val, aux);
1170 return ctx->ReadPackedVarint(ptr, [=](int32_t value) {
1171 if (!EnumIsValidAux(value, xform_val, aux)) {
1172 AddUnknownEnum(msg, table, FastDecodeTag(saved_tag), value);
1173 } else {
1174 field->Add(value);
1175 }
1176 });
1177 }
1178
FastErR1(PROTOBUF_TC_PARAM_DECL)1179 PROTOBUF_NOINLINE const char* TcParser::FastErR1(PROTOBUF_TC_PARAM_DECL) {
1180 PROTOBUF_MUSTTAIL return RepeatedEnum<uint8_t, field_layout::kTvRange>(
1181 PROTOBUF_TC_PARAM_PASS);
1182 }
FastErR2(PROTOBUF_TC_PARAM_DECL)1183 PROTOBUF_NOINLINE const char* TcParser::FastErR2(PROTOBUF_TC_PARAM_DECL) {
1184 PROTOBUF_MUSTTAIL return RepeatedEnum<uint16_t, field_layout::kTvRange>(
1185 PROTOBUF_TC_PARAM_PASS);
1186 }
FastEvR1(PROTOBUF_TC_PARAM_DECL)1187 PROTOBUF_NOINLINE const char* TcParser::FastEvR1(PROTOBUF_TC_PARAM_DECL) {
1188 PROTOBUF_MUSTTAIL return RepeatedEnum<uint8_t, field_layout::kTvEnum>(
1189 PROTOBUF_TC_PARAM_PASS);
1190 }
FastEvR2(PROTOBUF_TC_PARAM_DECL)1191 PROTOBUF_NOINLINE const char* TcParser::FastEvR2(PROTOBUF_TC_PARAM_DECL) {
1192 PROTOBUF_MUSTTAIL return RepeatedEnum<uint16_t, field_layout::kTvEnum>(
1193 PROTOBUF_TC_PARAM_PASS);
1194 }
1195
FastErP1(PROTOBUF_TC_PARAM_DECL)1196 PROTOBUF_NOINLINE const char* TcParser::FastErP1(PROTOBUF_TC_PARAM_DECL) {
1197 PROTOBUF_MUSTTAIL return PackedEnum<uint8_t, field_layout::kTvRange>(
1198 PROTOBUF_TC_PARAM_PASS);
1199 }
FastErP2(PROTOBUF_TC_PARAM_DECL)1200 PROTOBUF_NOINLINE const char* TcParser::FastErP2(PROTOBUF_TC_PARAM_DECL) {
1201 PROTOBUF_MUSTTAIL return PackedEnum<uint16_t, field_layout::kTvRange>(
1202 PROTOBUF_TC_PARAM_PASS);
1203 }
FastEvP1(PROTOBUF_TC_PARAM_DECL)1204 PROTOBUF_NOINLINE const char* TcParser::FastEvP1(PROTOBUF_TC_PARAM_DECL) {
1205 PROTOBUF_MUSTTAIL return PackedEnum<uint8_t, field_layout::kTvEnum>(
1206 PROTOBUF_TC_PARAM_PASS);
1207 }
FastEvP2(PROTOBUF_TC_PARAM_DECL)1208 PROTOBUF_NOINLINE const char* TcParser::FastEvP2(PROTOBUF_TC_PARAM_DECL) {
1209 PROTOBUF_MUSTTAIL return PackedEnum<uint16_t, field_layout::kTvEnum>(
1210 PROTOBUF_TC_PARAM_PASS);
1211 }
1212
1213 template <typename TagType, uint8_t min>
SingularEnumSmallRange(PROTOBUF_TC_PARAM_DECL)1214 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnumSmallRange(
1215 PROTOBUF_TC_PARAM_DECL) {
1216 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1217 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1218 }
1219
1220 uint8_t v = ptr[sizeof(TagType)];
1221 if (PROTOBUF_PREDICT_FALSE(min > v || v > data.aux_idx())) {
1222 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1223 }
1224
1225 RefAt<int32_t>(msg, data.offset()) = v;
1226 ptr += sizeof(TagType) + 1;
1227 hasbits |= (uint64_t{1} << data.hasbit_idx());
1228 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1229 }
1230
FastEr0S1(PROTOBUF_TC_PARAM_DECL)1231 PROTOBUF_NOINLINE const char* TcParser::FastEr0S1(PROTOBUF_TC_PARAM_DECL) {
1232 PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint8_t, 0>(
1233 PROTOBUF_TC_PARAM_PASS);
1234 }
1235
FastEr0S2(PROTOBUF_TC_PARAM_DECL)1236 PROTOBUF_NOINLINE const char* TcParser::FastEr0S2(PROTOBUF_TC_PARAM_DECL) {
1237 PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint16_t, 0>(
1238 PROTOBUF_TC_PARAM_PASS);
1239 }
1240
FastEr1S1(PROTOBUF_TC_PARAM_DECL)1241 PROTOBUF_NOINLINE const char* TcParser::FastEr1S1(PROTOBUF_TC_PARAM_DECL) {
1242 PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint8_t, 1>(
1243 PROTOBUF_TC_PARAM_PASS);
1244 }
1245
FastEr1S2(PROTOBUF_TC_PARAM_DECL)1246 PROTOBUF_NOINLINE const char* TcParser::FastEr1S2(PROTOBUF_TC_PARAM_DECL) {
1247 PROTOBUF_MUSTTAIL return SingularEnumSmallRange<uint16_t, 1>(
1248 PROTOBUF_TC_PARAM_PASS);
1249 }
1250
1251 template <typename TagType, uint8_t min>
RepeatedEnumSmallRange(PROTOBUF_TC_PARAM_DECL)1252 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedEnumSmallRange(
1253 PROTOBUF_TC_PARAM_DECL) {
1254 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1255 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1256 }
1257 auto& field = RefAt<RepeatedField<int32_t>>(msg, data.offset());
1258 auto expected_tag = UnalignedLoad<TagType>(ptr);
1259 const uint8_t max = data.aux_idx();
1260 do {
1261 uint8_t v = ptr[sizeof(TagType)];
1262 if (PROTOBUF_PREDICT_FALSE(min > v || v > max)) {
1263 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1264 }
1265 field.Add(static_cast<int32_t>(v));
1266 ptr += sizeof(TagType) + 1;
1267 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
1268 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1269 }
1270 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1271
1272 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1273 }
1274
FastEr0R1(PROTOBUF_TC_PARAM_DECL)1275 PROTOBUF_NOINLINE const char* TcParser::FastEr0R1(PROTOBUF_TC_PARAM_DECL) {
1276 PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint8_t, 0>(
1277 PROTOBUF_TC_PARAM_PASS);
1278 }
FastEr0R2(PROTOBUF_TC_PARAM_DECL)1279 PROTOBUF_NOINLINE const char* TcParser::FastEr0R2(PROTOBUF_TC_PARAM_DECL) {
1280 PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint16_t, 0>(
1281 PROTOBUF_TC_PARAM_PASS);
1282 }
1283
FastEr1R1(PROTOBUF_TC_PARAM_DECL)1284 PROTOBUF_NOINLINE const char* TcParser::FastEr1R1(PROTOBUF_TC_PARAM_DECL) {
1285 PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint8_t, 1>(
1286 PROTOBUF_TC_PARAM_PASS);
1287 }
FastEr1R2(PROTOBUF_TC_PARAM_DECL)1288 PROTOBUF_NOINLINE const char* TcParser::FastEr1R2(PROTOBUF_TC_PARAM_DECL) {
1289 PROTOBUF_MUSTTAIL return RepeatedEnumSmallRange<uint16_t, 1>(
1290 PROTOBUF_TC_PARAM_PASS);
1291 }
1292
1293 template <typename TagType, uint8_t min>
PackedEnumSmallRange(PROTOBUF_TC_PARAM_DECL)1294 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::PackedEnumSmallRange(
1295 PROTOBUF_TC_PARAM_DECL) {
1296 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1297 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1298 }
1299
1300 // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
1301 // pending hasbits now:
1302 SyncHasbits(msg, hasbits, table);
1303
1304 const auto saved_tag = UnalignedLoad<TagType>(ptr);
1305 ptr += sizeof(TagType);
1306 auto* field = &RefAt<RepeatedField<int32_t>>(msg, data.offset());
1307 const uint8_t max = data.aux_idx();
1308
1309 return ctx->ReadPackedVarint(
1310 ptr,
1311 [=](int32_t v) {
1312 if (PROTOBUF_PREDICT_FALSE(min > v || v > max)) {
1313 AddUnknownEnum(msg, table, FastDecodeTag(saved_tag), v);
1314 } else {
1315 field->Add(v);
1316 }
1317 },
1318 /*size_callback=*/
1319 [=](int32_t size_bytes) {
1320 // For enums that fit in one varint byte, optimistically assume that all
1321 // the values are one byte long (i.e. no large unknown values). If so,
1322 // we know exactly how many values we're going to get.
1323 //
1324 // But! size_bytes might be much larger than the total size of the
1325 // serialized proto (e.g. input corruption, or parsing msg1 as msg2).
1326 // We don't want a small serialized proto to lead to giant memory
1327 // allocations.
1328 //
1329 // Ideally we'd restrict size_bytes to the total size of the input, but
1330 // we don't know that value. The best we can do is to restrict it to
1331 // the remaining bytes in the chunk, plus a "benefit of the doubt"
1332 // factor if we're very close to the end of the chunk.
1333 //
1334 // Do these calculations in int64 because it's possible we overflow
1335 // int32 (imgaine that field->size() and size_bytes are both large).
1336 int64_t new_size =
1337 int64_t{field->size()} +
1338 std::min(size_bytes, std::max(1024, ctx->MaximumReadSize(ptr)));
1339 field->Reserve(static_cast<int32_t>(
1340 std::min(new_size, int64_t{std::numeric_limits<int32_t>::max()})));
1341 });
1342 }
1343
FastEr0P1(PROTOBUF_TC_PARAM_DECL)1344 PROTOBUF_NOINLINE const char* TcParser::FastEr0P1(PROTOBUF_TC_PARAM_DECL) {
1345 PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint8_t, 0>(
1346 PROTOBUF_TC_PARAM_PASS);
1347 }
FastEr0P2(PROTOBUF_TC_PARAM_DECL)1348 PROTOBUF_NOINLINE const char* TcParser::FastEr0P2(PROTOBUF_TC_PARAM_DECL) {
1349 PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint16_t, 0>(
1350 PROTOBUF_TC_PARAM_PASS);
1351 }
1352
FastEr1P1(PROTOBUF_TC_PARAM_DECL)1353 PROTOBUF_NOINLINE const char* TcParser::FastEr1P1(PROTOBUF_TC_PARAM_DECL) {
1354 PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint8_t, 1>(
1355 PROTOBUF_TC_PARAM_PASS);
1356 }
FastEr1P2(PROTOBUF_TC_PARAM_DECL)1357 PROTOBUF_NOINLINE const char* TcParser::FastEr1P2(PROTOBUF_TC_PARAM_DECL) {
1358 PROTOBUF_MUSTTAIL return PackedEnumSmallRange<uint16_t, 1>(
1359 PROTOBUF_TC_PARAM_PASS);
1360 }
1361
1362 //////////////////////////////////////////////////////////////////////////////
1363 // String/bytes fields
1364 //////////////////////////////////////////////////////////////////////////////
1365
1366 // Defined in wire_format_lite.cc
1367 void PrintUTF8ErrorLog(absl::string_view message_name,
1368 absl::string_view field_name, const char* operation_str,
1369 bool emit_stacktrace);
1370
ReportFastUtf8Error(uint32_t decoded_tag,const TcParseTableBase * table)1371 void TcParser::ReportFastUtf8Error(uint32_t decoded_tag,
1372 const TcParseTableBase* table) {
1373 uint32_t field_num = decoded_tag >> 3;
1374 const auto* entry = FindFieldEntry(table, field_num);
1375 PrintUTF8ErrorLog(MessageName(table), FieldName(table, entry), "parsing",
1376 false);
1377 }
1378
1379 namespace {
1380
1381 // Here are overloads of ReadStringIntoArena, ReadStringNoArena and IsValidUTF8
1382 // for every string class for which we provide fast-table parser support.
1383
ReadStringIntoArena(MessageLite *,const char * ptr,ParseContext * ctx,uint32_t,const TcParseTableBase *,ArenaStringPtr & field,Arena * arena)1384 PROTOBUF_ALWAYS_INLINE inline const char* ReadStringIntoArena(
1385 MessageLite* /*msg*/, const char* ptr, ParseContext* ctx,
1386 uint32_t /*aux_idx*/, const TcParseTableBase* /*table*/,
1387 ArenaStringPtr& field, Arena* arena) {
1388 return ctx->ReadArenaString(ptr, &field, arena);
1389 }
1390
1391 PROTOBUF_NOINLINE
ReadStringNoArena(MessageLite *,const char * ptr,ParseContext * ctx,uint32_t,const TcParseTableBase *,ArenaStringPtr & field)1392 const char* ReadStringNoArena(MessageLite* /*msg*/, const char* ptr,
1393 ParseContext* ctx, uint32_t /*aux_idx*/,
1394 const TcParseTableBase* /*table*/,
1395 ArenaStringPtr& field) {
1396 int size = ReadSize(&ptr);
1397 if (!ptr) return nullptr;
1398 return ctx->ReadString(ptr, size, field.MutableNoCopy(nullptr));
1399 }
1400
IsValidUTF8(ArenaStringPtr & field)1401 PROTOBUF_ALWAYS_INLINE inline bool IsValidUTF8(ArenaStringPtr& field) {
1402 return utf8_range::IsStructurallyValid(field.Get());
1403 }
1404
1405
1406 } // namespace
1407
1408 template <typename TagType, typename FieldType, TcParser::Utf8Type utf8>
SingularString(PROTOBUF_TC_PARAM_DECL)1409 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularString(
1410 PROTOBUF_TC_PARAM_DECL) {
1411 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1412 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1413 }
1414 auto saved_tag = UnalignedLoad<TagType>(ptr);
1415 ptr += sizeof(TagType);
1416 hasbits |= (uint64_t{1} << data.hasbit_idx());
1417 auto& field = RefAt<FieldType>(msg, data.offset());
1418 auto arena = msg->GetArena();
1419 if (arena) {
1420 ptr =
1421 ReadStringIntoArena(msg, ptr, ctx, data.aux_idx(), table, field, arena);
1422 } else {
1423 ptr = ReadStringNoArena(msg, ptr, ctx, data.aux_idx(), table, field);
1424 }
1425 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1426 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1427 }
1428 switch (utf8) {
1429 case kNoUtf8:
1430 #ifdef NDEBUG
1431 case kUtf8ValidateOnly:
1432 #endif
1433 break;
1434 default:
1435 if (PROTOBUF_PREDICT_TRUE(IsValidUTF8(field))) {
1436 break;
1437 }
1438 ReportFastUtf8Error(FastDecodeTag(saved_tag), table);
1439 if (utf8 == kUtf8) {
1440 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1441 }
1442 break;
1443 }
1444
1445 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1446 }
1447
FastBS1(PROTOBUF_TC_PARAM_DECL)1448 PROTOBUF_NOINLINE const char* TcParser::FastBS1(PROTOBUF_TC_PARAM_DECL) {
1449 PROTOBUF_MUSTTAIL return SingularString<uint8_t, ArenaStringPtr, kNoUtf8>(
1450 PROTOBUF_TC_PARAM_PASS);
1451 }
FastBS2(PROTOBUF_TC_PARAM_DECL)1452 PROTOBUF_NOINLINE const char* TcParser::FastBS2(PROTOBUF_TC_PARAM_DECL) {
1453 PROTOBUF_MUSTTAIL return SingularString<uint16_t, ArenaStringPtr, kNoUtf8>(
1454 PROTOBUF_TC_PARAM_PASS);
1455 }
FastSS1(PROTOBUF_TC_PARAM_DECL)1456 PROTOBUF_NOINLINE const char* TcParser::FastSS1(PROTOBUF_TC_PARAM_DECL) {
1457 PROTOBUF_MUSTTAIL return SingularString<uint8_t, ArenaStringPtr,
1458 kUtf8ValidateOnly>(
1459 PROTOBUF_TC_PARAM_PASS);
1460 }
FastSS2(PROTOBUF_TC_PARAM_DECL)1461 PROTOBUF_NOINLINE const char* TcParser::FastSS2(PROTOBUF_TC_PARAM_DECL) {
1462 PROTOBUF_MUSTTAIL return SingularString<uint16_t, ArenaStringPtr,
1463 kUtf8ValidateOnly>(
1464 PROTOBUF_TC_PARAM_PASS);
1465 }
FastUS1(PROTOBUF_TC_PARAM_DECL)1466 PROTOBUF_NOINLINE const char* TcParser::FastUS1(PROTOBUF_TC_PARAM_DECL) {
1467 PROTOBUF_MUSTTAIL return SingularString<uint8_t, ArenaStringPtr, kUtf8>(
1468 PROTOBUF_TC_PARAM_PASS);
1469 }
FastUS2(PROTOBUF_TC_PARAM_DECL)1470 PROTOBUF_NOINLINE const char* TcParser::FastUS2(PROTOBUF_TC_PARAM_DECL) {
1471 PROTOBUF_MUSTTAIL return SingularString<uint16_t, ArenaStringPtr, kUtf8>(
1472 PROTOBUF_TC_PARAM_PASS);
1473 }
1474
1475 // Inlined string variants:
1476
FastBiS1(PROTOBUF_TC_PARAM_DECL)1477 const char* TcParser::FastBiS1(PROTOBUF_TC_PARAM_DECL) {
1478 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1479 }
FastBiS2(PROTOBUF_TC_PARAM_DECL)1480 const char* TcParser::FastBiS2(PROTOBUF_TC_PARAM_DECL) {
1481 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1482 }
FastSiS1(PROTOBUF_TC_PARAM_DECL)1483 const char* TcParser::FastSiS1(PROTOBUF_TC_PARAM_DECL) {
1484 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1485 }
FastSiS2(PROTOBUF_TC_PARAM_DECL)1486 const char* TcParser::FastSiS2(PROTOBUF_TC_PARAM_DECL) {
1487 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1488 }
FastUiS1(PROTOBUF_TC_PARAM_DECL)1489 const char* TcParser::FastUiS1(PROTOBUF_TC_PARAM_DECL) {
1490 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1491 }
FastUiS2(PROTOBUF_TC_PARAM_DECL)1492 const char* TcParser::FastUiS2(PROTOBUF_TC_PARAM_DECL) {
1493 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1494 }
1495
1496 // Corded string variants:
FastBcS1(PROTOBUF_TC_PARAM_DECL)1497 const char* TcParser::FastBcS1(PROTOBUF_TC_PARAM_DECL) {
1498 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1499 }
FastBcS2(PROTOBUF_TC_PARAM_DECL)1500 const char* TcParser::FastBcS2(PROTOBUF_TC_PARAM_DECL) {
1501 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1502 }
FastScS1(PROTOBUF_TC_PARAM_DECL)1503 const char* TcParser::FastScS1(PROTOBUF_TC_PARAM_DECL) {
1504 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1505 }
FastScS2(PROTOBUF_TC_PARAM_DECL)1506 const char* TcParser::FastScS2(PROTOBUF_TC_PARAM_DECL) {
1507 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1508 }
FastUcS1(PROTOBUF_TC_PARAM_DECL)1509 const char* TcParser::FastUcS1(PROTOBUF_TC_PARAM_DECL) {
1510 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1511 }
FastUcS2(PROTOBUF_TC_PARAM_DECL)1512 const char* TcParser::FastUcS2(PROTOBUF_TC_PARAM_DECL) {
1513 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1514 }
1515
1516 template <typename TagType, typename FieldType, TcParser::Utf8Type utf8>
RepeatedString(PROTOBUF_TC_PARAM_DECL)1517 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedString(
1518 PROTOBUF_TC_PARAM_DECL) {
1519 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1520 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1521 }
1522 const auto expected_tag = UnalignedLoad<TagType>(ptr);
1523 auto& field = RefAt<FieldType>(msg, data.offset());
1524
1525 const auto validate_last_string = [expected_tag, table, &field] {
1526 switch (utf8) {
1527 case kNoUtf8:
1528 #ifdef NDEBUG
1529 case kUtf8ValidateOnly:
1530 #endif
1531 return true;
1532 default:
1533 if (PROTOBUF_PREDICT_TRUE(
1534 utf8_range::IsStructurallyValid(field[field.size() - 1]))) {
1535 return true;
1536 }
1537 ReportFastUtf8Error(FastDecodeTag(expected_tag), table);
1538 if (utf8 == kUtf8) return false;
1539 return true;
1540 }
1541 };
1542
1543 auto* arena = field.GetArena();
1544 SerialArena* serial_arena;
1545 if (PROTOBUF_PREDICT_TRUE(arena != nullptr &&
1546 arena->impl_.GetSerialArenaFast(&serial_arena) &&
1547 field.PrepareForParse())) {
1548 do {
1549 ptr += sizeof(TagType);
1550 ptr = ParseRepeatedStringOnce(ptr, serial_arena, ctx, field);
1551
1552 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !validate_last_string())) {
1553 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1554 }
1555 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1556 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1557 } else {
1558 do {
1559 ptr += sizeof(TagType);
1560 std::string* str = field.Add();
1561 ptr = InlineGreedyStringParser(str, ptr, ctx);
1562 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !validate_last_string())) {
1563 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1564 }
1565 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1566 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1567 }
1568 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1569 parse_loop:
1570 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1571 }
1572
FastBR1(PROTOBUF_TC_PARAM_DECL)1573 PROTOBUF_NOINLINE const char* TcParser::FastBR1(PROTOBUF_TC_PARAM_DECL) {
1574 PROTOBUF_MUSTTAIL return RepeatedString<
1575 uint8_t, RepeatedPtrField<std::string>, kNoUtf8>(PROTOBUF_TC_PARAM_PASS);
1576 }
FastBR2(PROTOBUF_TC_PARAM_DECL)1577 PROTOBUF_NOINLINE const char* TcParser::FastBR2(PROTOBUF_TC_PARAM_DECL) {
1578 PROTOBUF_MUSTTAIL return RepeatedString<
1579 uint16_t, RepeatedPtrField<std::string>, kNoUtf8>(PROTOBUF_TC_PARAM_PASS);
1580 }
FastSR1(PROTOBUF_TC_PARAM_DECL)1581 PROTOBUF_NOINLINE const char* TcParser::FastSR1(PROTOBUF_TC_PARAM_DECL) {
1582 PROTOBUF_MUSTTAIL return RepeatedString<
1583 uint8_t, RepeatedPtrField<std::string>, kUtf8ValidateOnly>(
1584 PROTOBUF_TC_PARAM_PASS);
1585 }
FastSR2(PROTOBUF_TC_PARAM_DECL)1586 PROTOBUF_NOINLINE const char* TcParser::FastSR2(PROTOBUF_TC_PARAM_DECL) {
1587 PROTOBUF_MUSTTAIL return RepeatedString<
1588 uint16_t, RepeatedPtrField<std::string>, kUtf8ValidateOnly>(
1589 PROTOBUF_TC_PARAM_PASS);
1590 }
FastUR1(PROTOBUF_TC_PARAM_DECL)1591 PROTOBUF_NOINLINE const char* TcParser::FastUR1(PROTOBUF_TC_PARAM_DECL) {
1592 PROTOBUF_MUSTTAIL return RepeatedString<uint8_t,
1593 RepeatedPtrField<std::string>, kUtf8>(
1594 PROTOBUF_TC_PARAM_PASS);
1595 }
FastUR2(PROTOBUF_TC_PARAM_DECL)1596 PROTOBUF_NOINLINE const char* TcParser::FastUR2(PROTOBUF_TC_PARAM_DECL) {
1597 PROTOBUF_MUSTTAIL return RepeatedString<uint16_t,
1598 RepeatedPtrField<std::string>, kUtf8>(
1599 PROTOBUF_TC_PARAM_PASS);
1600 }
1601
1602 //////////////////////////////////////////////////////////////////////////////
1603 // Mini parsing
1604 //////////////////////////////////////////////////////////////////////////////
1605
1606 namespace {
SetHas(const FieldEntry & entry,MessageLite * msg)1607 inline void SetHas(const FieldEntry& entry, MessageLite* msg) {
1608 auto has_idx = static_cast<uint32_t>(entry.has_idx);
1609 #if defined(__x86_64__) && defined(__GNUC__)
1610 asm("bts %1, %0\n" : "+m"(*reinterpret_cast<char*>(msg)) : "r"(has_idx));
1611 #else
1612 auto& hasblock = TcParser::RefAt<uint32_t>(msg, has_idx / 32 * 4);
1613 hasblock |= uint32_t{1} << (has_idx % 32);
1614 #endif
1615 }
1616 } // namespace
1617
1618 // Destroys any existing oneof union member (if necessary). Returns true if the
1619 // caller is responsible for initializing the object, or false if the field
1620 // already has the desired case.
ChangeOneof(const TcParseTableBase * table,const TcParseTableBase::FieldEntry & entry,uint32_t field_num,ParseContext * ctx,MessageLite * msg)1621 bool TcParser::ChangeOneof(const TcParseTableBase* table,
1622 const TcParseTableBase::FieldEntry& entry,
1623 uint32_t field_num, ParseContext* ctx,
1624 MessageLite* msg) {
1625 // The _oneof_case_ value offset is stored in the has-bit index.
1626 uint32_t* oneof_case = &TcParser::RefAt<uint32_t>(msg, entry.has_idx);
1627 uint32_t current_case = *oneof_case;
1628 *oneof_case = field_num;
1629
1630 if (current_case == 0) {
1631 // If the member is empty, we don't have anything to clear. Caller is
1632 // responsible for creating a new member object.
1633 return true;
1634 }
1635 if (current_case == field_num) {
1636 // If the member is already active, then it should be merged. We're done.
1637 return false;
1638 }
1639 // Look up the value that is already stored, and dispose of it if necessary.
1640 const FieldEntry* current_entry = FindFieldEntry(table, current_case);
1641 uint16_t current_kind = current_entry->type_card & field_layout::kFkMask;
1642 uint16_t current_rep = current_entry->type_card & field_layout::kRepMask;
1643 if (current_kind == field_layout::kFkString) {
1644 switch (current_rep) {
1645 case field_layout::kRepAString: {
1646 auto& field = RefAt<ArenaStringPtr>(msg, current_entry->offset);
1647 field.Destroy();
1648 break;
1649 }
1650 case field_layout::kRepCord: {
1651 if (msg->GetArena() == nullptr) {
1652 delete RefAt<absl::Cord*>(msg, current_entry->offset);
1653 }
1654 break;
1655 }
1656 case field_layout::kRepSString:
1657 case field_layout::kRepIString:
1658 default:
1659 ABSL_DLOG(FATAL) << "string rep not handled: "
1660 << (current_rep >> field_layout::kRepShift);
1661 return true;
1662 }
1663 } else if (current_kind == field_layout::kFkMessage) {
1664 switch (current_rep) {
1665 case field_layout::kRepMessage:
1666 case field_layout::kRepGroup: {
1667 auto& field = RefAt<MessageLite*>(msg, current_entry->offset);
1668 if (!msg->GetArena()) {
1669 delete field;
1670 }
1671 break;
1672 }
1673 default:
1674 ABSL_DLOG(FATAL) << "message rep not handled: "
1675 << (current_rep >> field_layout::kRepShift);
1676 break;
1677 }
1678 }
1679 return true;
1680 }
1681
1682 namespace {
GetSplitOffset(const TcParseTableBase * table)1683 uint32_t GetSplitOffset(const TcParseTableBase* table) {
1684 return table->field_aux(kSplitOffsetAuxIdx)->offset;
1685 }
1686
GetSizeofSplit(const TcParseTableBase * table)1687 uint32_t GetSizeofSplit(const TcParseTableBase* table) {
1688 return table->field_aux(kSplitSizeAuxIdx)->offset;
1689 }
1690 } // namespace
1691
MaybeGetSplitBase(MessageLite * msg,const bool is_split,const TcParseTableBase * table)1692 void* TcParser::MaybeGetSplitBase(MessageLite* msg, const bool is_split,
1693 const TcParseTableBase* table) {
1694 void* out = msg;
1695 if (is_split) {
1696 const uint32_t split_offset = GetSplitOffset(table);
1697 void* default_split =
1698 TcParser::RefAt<void*>(table->default_instance(), split_offset);
1699 void*& split = TcParser::RefAt<void*>(msg, split_offset);
1700 if (split == default_split) {
1701 // Allocate split instance when needed.
1702 uint32_t size = GetSizeofSplit(table);
1703 Arena* arena = msg->GetArena();
1704 split = (arena == nullptr) ? ::operator new(size)
1705 : arena->AllocateAligned(size);
1706 memcpy(split, default_split, size);
1707 }
1708 out = split;
1709 }
1710 return out;
1711 }
1712
1713 template <bool is_split>
MpFixed(PROTOBUF_TC_PARAM_DECL)1714 PROTOBUF_NOINLINE const char* TcParser::MpFixed(PROTOBUF_TC_PARAM_DECL) {
1715 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1716 const uint16_t type_card = entry.type_card;
1717 const uint16_t card = type_card & field_layout::kFcMask;
1718
1719 // Check for repeated parsing (wiretype fallback is handled there):
1720 if (card == field_layout::kFcRepeated) {
1721 PROTOBUF_MUSTTAIL return MpRepeatedFixed<is_split>(PROTOBUF_TC_PARAM_PASS);
1722 }
1723 // Check for mismatched wiretype:
1724 const uint16_t rep = type_card & field_layout::kRepMask;
1725 const uint32_t decoded_wiretype = data.tag() & 7;
1726 if (rep == field_layout::kRep64Bits) {
1727 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) {
1728 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1729 }
1730 } else {
1731 ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1732 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) {
1733 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1734 }
1735 }
1736 // Set the field present:
1737 if (card == field_layout::kFcOptional) {
1738 SetHas(entry, msg);
1739 } else if (card == field_layout::kFcOneof) {
1740 ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
1741 }
1742 void* const base = MaybeGetSplitBase(msg, is_split, table);
1743 // Copy the value:
1744 if (rep == field_layout::kRep64Bits) {
1745 RefAt<uint64_t>(base, entry.offset) = UnalignedLoad<uint64_t>(ptr);
1746 ptr += sizeof(uint64_t);
1747 } else {
1748 RefAt<uint32_t>(base, entry.offset) = UnalignedLoad<uint32_t>(ptr);
1749 ptr += sizeof(uint32_t);
1750 }
1751 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1752 }
1753
1754 template <bool is_split>
MpRepeatedFixed(PROTOBUF_TC_PARAM_DECL)1755 PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed(
1756 PROTOBUF_TC_PARAM_DECL) {
1757 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1758 const uint32_t decoded_tag = data.tag();
1759 const uint32_t decoded_wiretype = decoded_tag & 7;
1760
1761 // Check for packed repeated fallback:
1762 if (decoded_wiretype == WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1763 PROTOBUF_MUSTTAIL return MpPackedFixed<is_split>(PROTOBUF_TC_PARAM_PASS);
1764 }
1765
1766 void* const base = MaybeGetSplitBase(msg, is_split, table);
1767 const uint16_t type_card = entry.type_card;
1768 const uint16_t rep = type_card & field_layout::kRepMask;
1769 if (rep == field_layout::kRep64Bits) {
1770 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) {
1771 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1772 }
1773 auto& field = MaybeCreateRepeatedFieldRefAt<uint64_t, is_split>(
1774 base, entry.offset, msg);
1775 constexpr auto size = sizeof(uint64_t);
1776 const char* ptr2 = ptr;
1777 uint32_t next_tag;
1778 do {
1779 ptr = ptr2;
1780 *field.Add() = UnalignedLoad<uint64_t>(ptr);
1781 ptr += size;
1782 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1783 ptr2 = ReadTag(ptr, &next_tag);
1784 if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
1785 } while (next_tag == decoded_tag);
1786 } else {
1787 ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1788 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) {
1789 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1790 }
1791 auto& field = MaybeCreateRepeatedFieldRefAt<uint32_t, is_split>(
1792 base, entry.offset, msg);
1793 constexpr auto size = sizeof(uint32_t);
1794 const char* ptr2 = ptr;
1795 uint32_t next_tag;
1796 do {
1797 ptr = ptr2;
1798 *field.Add() = UnalignedLoad<uint32_t>(ptr);
1799 ptr += size;
1800 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1801 ptr2 = ReadTag(ptr, &next_tag);
1802 if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
1803 } while (next_tag == decoded_tag);
1804 }
1805
1806 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1807 parse_loop:
1808 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1809 error:
1810 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1811 }
1812
1813 template <bool is_split>
MpPackedFixed(PROTOBUF_TC_PARAM_DECL)1814 PROTOBUF_NOINLINE const char* TcParser::MpPackedFixed(PROTOBUF_TC_PARAM_DECL) {
1815 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1816 const uint16_t type_card = entry.type_card;
1817 const uint32_t decoded_wiretype = data.tag() & 7;
1818
1819 // Check for non-packed repeated fallback:
1820 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1821 PROTOBUF_MUSTTAIL return MpRepeatedFixed<is_split>(PROTOBUF_TC_PARAM_PASS);
1822 }
1823
1824 void* const base = MaybeGetSplitBase(msg, is_split, table);
1825 int size = ReadSize(&ptr);
1826 uint16_t rep = type_card & field_layout::kRepMask;
1827 if (rep == field_layout::kRep64Bits) {
1828 auto& field = MaybeCreateRepeatedFieldRefAt<uint64_t, is_split>(
1829 base, entry.offset, msg);
1830 ptr = ctx->ReadPackedFixed(ptr, size, &field);
1831 } else {
1832 ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1833 auto& field = MaybeCreateRepeatedFieldRefAt<uint32_t, is_split>(
1834 base, entry.offset, msg);
1835 ptr = ctx->ReadPackedFixed(ptr, size, &field);
1836 }
1837
1838 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
1839 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1840 }
1841 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1842 }
1843
1844 template <bool is_split>
MpVarint(PROTOBUF_TC_PARAM_DECL)1845 PROTOBUF_NOINLINE const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) {
1846 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1847 const uint16_t type_card = entry.type_card;
1848 const uint16_t card = type_card & field_layout::kFcMask;
1849
1850 // Check for repeated parsing:
1851 if (card == field_layout::kFcRepeated) {
1852 PROTOBUF_MUSTTAIL return MpRepeatedVarint<is_split>(PROTOBUF_TC_PARAM_PASS);
1853 }
1854 // Check for wire type mismatch:
1855 if ((data.tag() & 7) != WireFormatLite::WIRETYPE_VARINT) {
1856 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1857 }
1858 const uint16_t xform_val = type_card & field_layout::kTvMask;
1859 const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1860 const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1861
1862 // Parse the value:
1863 const char* ptr2 = ptr; // save for unknown enum case
1864 uint64_t tmp;
1865 ptr = ParseVarint(ptr, &tmp);
1866 if (ptr == nullptr) {
1867 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1868 }
1869
1870 // Transform and/or validate the value
1871 uint16_t rep = type_card & field_layout::kRepMask;
1872 if (rep == field_layout::kRep64Bits) {
1873 if (is_zigzag) {
1874 tmp = WireFormatLite::ZigZagDecode64(tmp);
1875 }
1876 } else if (rep == field_layout::kRep32Bits) {
1877 if (is_validated_enum) {
1878 if (!EnumIsValidAux(tmp, xform_val, *table->field_aux(&entry))) {
1879 ptr = ptr2;
1880 PROTOBUF_MUSTTAIL return MpUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1881 }
1882 } else if (is_zigzag) {
1883 tmp = WireFormatLite::ZigZagDecode32(static_cast<uint32_t>(tmp));
1884 }
1885 }
1886
1887 // Mark the field as present:
1888 const bool is_oneof = card == field_layout::kFcOneof;
1889 if (card == field_layout::kFcOptional) {
1890 SetHas(entry, msg);
1891 } else if (is_oneof) {
1892 ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
1893 }
1894
1895 void* const base = MaybeGetSplitBase(msg, is_split, table);
1896 if (rep == field_layout::kRep64Bits) {
1897 RefAt<uint64_t>(base, entry.offset) = tmp;
1898 } else if (rep == field_layout::kRep32Bits) {
1899 RefAt<uint32_t>(base, entry.offset) = static_cast<uint32_t>(tmp);
1900 } else {
1901 ABSL_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep8Bits));
1902 RefAt<bool>(base, entry.offset) = static_cast<bool>(tmp);
1903 }
1904
1905 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1906 }
1907
1908 template <bool is_split, typename FieldType, uint16_t xform_val_in>
MpRepeatedVarintT(PROTOBUF_TC_PARAM_DECL)1909 const char* TcParser::MpRepeatedVarintT(PROTOBUF_TC_PARAM_DECL) {
1910 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1911 const uint32_t decoded_tag = data.tag();
1912 // For is_split we ignore the incoming xform_val and read it from entry to
1913 // reduce duplication for the uncommon paths.
1914 const uint16_t xform_val =
1915 is_split ? (entry.type_card & field_layout::kTvMask) : xform_val_in;
1916 const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1917 const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1918
1919 const char* ptr2 = ptr;
1920 uint32_t next_tag;
1921 void* const base = MaybeGetSplitBase(msg, is_split, table);
1922 auto& field = MaybeCreateRepeatedFieldRefAt<FieldType, is_split>(
1923 base, entry.offset, msg);
1924
1925 TcParseTableBase::FieldAux aux;
1926 if (is_validated_enum) {
1927 aux = *table->field_aux(&entry);
1928 PrefetchEnumData(xform_val, aux);
1929 }
1930
1931 do {
1932 uint64_t tmp;
1933 ptr = ParseVarint(ptr2, &tmp);
1934 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) goto error;
1935 if (is_validated_enum) {
1936 if (!EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux)) {
1937 ptr = ptr2;
1938 PROTOBUF_MUSTTAIL return MpUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1939 }
1940 } else if (is_zigzag) {
1941 tmp = sizeof(FieldType) == 8 ? WireFormatLite::ZigZagDecode64(tmp)
1942 : WireFormatLite::ZigZagDecode32(tmp);
1943 }
1944 field.Add(static_cast<FieldType>(tmp));
1945 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
1946 ptr2 = ReadTag(ptr, &next_tag);
1947 if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
1948 } while (next_tag == decoded_tag);
1949
1950 parse_loop:
1951 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1952 error:
1953 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
1954 }
1955
1956 template <bool is_split>
MpRepeatedVarint(PROTOBUF_TC_PARAM_DECL)1957 PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint(
1958 PROTOBUF_TC_PARAM_DECL) {
1959 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
1960 const auto type_card = entry.type_card;
1961 const uint32_t decoded_tag = data.tag();
1962 const auto decoded_wiretype = decoded_tag & 7;
1963
1964 // Check for packed repeated fallback:
1965 if (decoded_wiretype == WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1966 PROTOBUF_MUSTTAIL return MpPackedVarint<is_split>(PROTOBUF_TC_PARAM_PASS);
1967 }
1968 // Check for wire type mismatch:
1969 if (decoded_wiretype != WireFormatLite::WIRETYPE_VARINT) {
1970 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1971 }
1972 // For split we avoid the duplicate code and have the impl reload the value.
1973 // Less code bloat for uncommon paths.
1974 const uint16_t xform_val = (type_card & field_layout::kTvMask);
1975 const uint16_t rep = type_card & field_layout::kRepMask;
1976 switch (rep >> field_layout::kRepShift) {
1977 case field_layout::kRep64Bits >> field_layout::kRepShift:
1978 if (xform_val == 0) {
1979 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<is_split, uint64_t, 0>(
1980 PROTOBUF_TC_PARAM_PASS);
1981 } else {
1982 ABSL_DCHECK_EQ(xform_val, +field_layout::kTvZigZag);
1983 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
1984 is_split, uint64_t, (is_split ? 0 : field_layout::kTvZigZag)>(
1985 PROTOBUF_TC_PARAM_PASS);
1986 }
1987 case field_layout::kRep32Bits >> field_layout::kRepShift:
1988 switch (xform_val >> field_layout::kTvShift) {
1989 case 0:
1990 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<is_split, uint32_t, 0>(
1991 PROTOBUF_TC_PARAM_PASS);
1992 case field_layout::kTvZigZag >> field_layout::kTvShift:
1993 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
1994 is_split, uint32_t, (is_split ? 0 : field_layout::kTvZigZag)>(
1995 PROTOBUF_TC_PARAM_PASS);
1996 case field_layout::kTvEnum >> field_layout::kTvShift:
1997 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
1998 is_split, uint32_t, (is_split ? 0 : field_layout::kTvEnum)>(
1999 PROTOBUF_TC_PARAM_PASS);
2000 case field_layout::kTvRange >> field_layout::kTvShift:
2001 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<
2002 is_split, uint32_t, (is_split ? 0 : field_layout::kTvRange)>(
2003 PROTOBUF_TC_PARAM_PASS);
2004 default:
2005 Unreachable();
2006 }
2007 case field_layout::kRep8Bits >> field_layout::kRepShift:
2008 PROTOBUF_MUSTTAIL return MpRepeatedVarintT<is_split, bool, 0>(
2009 PROTOBUF_TC_PARAM_PASS);
2010
2011 default:
2012 Unreachable();
2013 return nullptr; // To silence -Werror=return-type in some toolchains
2014 }
2015 }
2016
2017 template <bool is_split, typename FieldType, uint16_t xform_val_in>
MpPackedVarintT(PROTOBUF_TC_PARAM_DECL)2018 const char* TcParser::MpPackedVarintT(PROTOBUF_TC_PARAM_DECL) {
2019 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2020 // For is_split we ignore the incoming xform_val and read it from entry to
2021 // reduce duplication for the uncommon paths.
2022 const uint16_t xform_val =
2023 is_split ? (entry.type_card & field_layout::kTvMask) : xform_val_in;
2024 const bool is_zigzag = xform_val == field_layout::kTvZigZag;
2025 const bool is_validated_enum = xform_val & field_layout::kTvEnum;
2026
2027 void* const base = MaybeGetSplitBase(msg, is_split, table);
2028 auto* field = &MaybeCreateRepeatedFieldRefAt<FieldType, is_split>(
2029 base, entry.offset, msg);
2030
2031 if (is_validated_enum) {
2032 const TcParseTableBase::FieldAux aux = *table->field_aux(entry.aux_idx);
2033 PrefetchEnumData(xform_val, aux);
2034 return ctx->ReadPackedVarint(ptr, [=](int32_t value) {
2035 if (!EnumIsValidAux(value, xform_val, aux)) {
2036 AddUnknownEnum(msg, table, data.tag(), value);
2037 } else {
2038 field->Add(value);
2039 }
2040 });
2041 } else {
2042 return ctx->ReadPackedVarint(ptr, [=](uint64_t value) {
2043 field->Add(is_zigzag ? (sizeof(FieldType) == 8
2044 ? WireFormatLite::ZigZagDecode64(value)
2045 : WireFormatLite::ZigZagDecode32(
2046 static_cast<uint32_t>(value)))
2047 : value);
2048 });
2049 }
2050 }
2051
2052 template <bool is_split>
MpPackedVarint(PROTOBUF_TC_PARAM_DECL)2053 PROTOBUF_NOINLINE const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) {
2054 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2055 const auto type_card = entry.type_card;
2056 const auto decoded_wiretype = data.tag() & 7;
2057
2058 // Check for non-packed repeated fallback:
2059 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2060 PROTOBUF_MUSTTAIL return MpRepeatedVarint<is_split>(PROTOBUF_TC_PARAM_PASS);
2061 }
2062
2063 // For split we avoid the duplicate code and have the impl reload the value.
2064 // Less code bloat for uncommon paths.
2065 const uint16_t xform_val = (type_card & field_layout::kTvMask);
2066
2067 // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
2068 // pending hasbits now:
2069 SyncHasbits(msg, hasbits, table);
2070
2071 const uint16_t rep = type_card & field_layout::kRepMask;
2072
2073 switch (rep >> field_layout::kRepShift) {
2074 case field_layout::kRep64Bits >> field_layout::kRepShift:
2075 if (xform_val == 0) {
2076 PROTOBUF_MUSTTAIL return MpPackedVarintT<is_split, uint64_t, 0>(
2077 PROTOBUF_TC_PARAM_PASS);
2078 } else {
2079 ABSL_DCHECK_EQ(xform_val, +field_layout::kTvZigZag);
2080 PROTOBUF_MUSTTAIL return MpPackedVarintT<
2081 is_split, uint64_t, (is_split ? 0 : field_layout::kTvZigZag)>(
2082 PROTOBUF_TC_PARAM_PASS);
2083 }
2084 case field_layout::kRep32Bits >> field_layout::kRepShift:
2085 switch (xform_val >> field_layout::kTvShift) {
2086 case 0:
2087 PROTOBUF_MUSTTAIL return MpPackedVarintT<is_split, uint32_t, 0>(
2088 PROTOBUF_TC_PARAM_PASS);
2089 case field_layout::kTvZigZag >> field_layout::kTvShift:
2090 PROTOBUF_MUSTTAIL return MpPackedVarintT<
2091 is_split, uint32_t, (is_split ? 0 : field_layout::kTvZigZag)>(
2092 PROTOBUF_TC_PARAM_PASS);
2093 case field_layout::kTvEnum >> field_layout::kTvShift:
2094 PROTOBUF_MUSTTAIL return MpPackedVarintT<
2095 is_split, uint32_t, (is_split ? 0 : field_layout::kTvEnum)>(
2096 PROTOBUF_TC_PARAM_PASS);
2097 case field_layout::kTvRange >> field_layout::kTvShift:
2098 PROTOBUF_MUSTTAIL return MpPackedVarintT<
2099 is_split, uint32_t, (is_split ? 0 : field_layout::kTvRange)>(
2100 PROTOBUF_TC_PARAM_PASS);
2101 default:
2102 Unreachable();
2103 }
2104 case field_layout::kRep8Bits >> field_layout::kRepShift:
2105 PROTOBUF_MUSTTAIL return MpPackedVarintT<is_split, bool, 0>(
2106 PROTOBUF_TC_PARAM_PASS);
2107
2108 default:
2109 Unreachable();
2110 return nullptr; // To silence -Werror=return-type in some toolchains
2111 }
2112 }
2113
MpVerifyUtf8(absl::string_view wire_bytes,const TcParseTableBase * table,const FieldEntry & entry,uint16_t xform_val)2114 bool TcParser::MpVerifyUtf8(absl::string_view wire_bytes,
2115 const TcParseTableBase* table,
2116 const FieldEntry& entry, uint16_t xform_val) {
2117 if (xform_val == field_layout::kTvUtf8) {
2118 if (!utf8_range::IsStructurallyValid(wire_bytes)) {
2119 PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry), "parsing",
2120 false);
2121 return false;
2122 }
2123 return true;
2124 }
2125 #ifndef NDEBUG
2126 if (xform_val == field_layout::kTvUtf8Debug) {
2127 if (!utf8_range::IsStructurallyValid(wire_bytes)) {
2128 PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry), "parsing",
2129 false);
2130 }
2131 }
2132 #endif // NDEBUG
2133 return true;
2134 }
MpVerifyUtf8(const absl::Cord & wire_bytes,const TcParseTableBase * table,const FieldEntry & entry,uint16_t xform_val)2135 bool TcParser::MpVerifyUtf8(const absl::Cord& wire_bytes,
2136 const TcParseTableBase* table,
2137 const FieldEntry& entry, uint16_t xform_val) {
2138 switch (xform_val) {
2139 default:
2140 ABSL_DCHECK_EQ(xform_val, 0);
2141 return true;
2142 }
2143 }
2144
2145 template <bool is_split>
MpString(PROTOBUF_TC_PARAM_DECL)2146 PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) {
2147 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2148 const uint16_t type_card = entry.type_card;
2149 const uint16_t card = type_card & field_layout::kFcMask;
2150 const uint32_t decoded_wiretype = data.tag() & 7;
2151
2152 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2153 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2154 }
2155 if (card == field_layout::kFcRepeated) {
2156 PROTOBUF_MUSTTAIL return MpRepeatedString<is_split>(PROTOBUF_TC_PARAM_PASS);
2157 }
2158 const uint16_t xform_val = type_card & field_layout::kTvMask;
2159 const uint16_t rep = type_card & field_layout::kRepMask;
2160
2161 // Mark the field as present:
2162 const bool is_oneof = card == field_layout::kFcOneof;
2163 bool need_init = false;
2164 if (card == field_layout::kFcOptional) {
2165 SetHas(entry, msg);
2166 } else if (is_oneof) {
2167 need_init = ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
2168 }
2169
2170 bool is_valid = false;
2171 void* const base = MaybeGetSplitBase(msg, is_split, table);
2172 switch (rep) {
2173 case field_layout::kRepAString: {
2174 auto& field = RefAt<ArenaStringPtr>(base, entry.offset);
2175 if (need_init) field.InitDefault();
2176 Arena* arena = msg->GetArena();
2177 if (arena) {
2178 ptr = ctx->ReadArenaString(ptr, &field, arena);
2179 } else {
2180 std::string* str = field.MutableNoCopy(nullptr);
2181 ptr = InlineGreedyStringParser(str, ptr, ctx);
2182 }
2183 if (!ptr) break;
2184 is_valid = MpVerifyUtf8(field.Get(), table, entry, xform_val);
2185 break;
2186 }
2187
2188
2189 case field_layout::kRepCord: {
2190 absl::Cord* field;
2191 if (is_oneof) {
2192 if (need_init) {
2193 field = Arena::Create<absl::Cord>(msg->GetArena());
2194 RefAt<absl::Cord*>(msg, entry.offset) = field;
2195 } else {
2196 field = RefAt<absl::Cord*>(msg, entry.offset);
2197 }
2198 } else {
2199 field = &RefAt<absl::Cord>(base, entry.offset);
2200 }
2201 ptr = InlineCordParser(field, ptr, ctx);
2202 if (!ptr) break;
2203 is_valid = MpVerifyUtf8(*field, table, entry, xform_val);
2204 break;
2205 }
2206
2207 default:
2208 Unreachable();
2209 }
2210
2211 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr || !is_valid)) {
2212 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2213 }
2214 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2215 }
2216
ParseRepeatedStringOnce(const char * ptr,SerialArena * serial_arena,ParseContext * ctx,RepeatedPtrField<std::string> & field)2217 inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ParseRepeatedStringOnce(
2218 const char* ptr, SerialArena* serial_arena, ParseContext* ctx,
2219 RepeatedPtrField<std::string>& field) {
2220 int size = ReadSize(&ptr);
2221 if (PROTOBUF_PREDICT_FALSE(!ptr)) return {};
2222 auto* str = new (serial_arena->AllocateFromStringBlock()) std::string();
2223 field.AddAllocatedForParse(str);
2224 ptr = ctx->ReadString(ptr, size, str);
2225 if (PROTOBUF_PREDICT_FALSE(!ptr)) return {};
2226 PROTOBUF_ASSUME(ptr != nullptr);
2227 return ptr;
2228 }
2229
2230 template <bool is_split>
MpRepeatedString(PROTOBUF_TC_PARAM_DECL)2231 PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString(
2232 PROTOBUF_TC_PARAM_DECL) {
2233 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2234 const uint16_t type_card = entry.type_card;
2235 const uint32_t decoded_tag = data.tag();
2236 const uint32_t decoded_wiretype = decoded_tag & 7;
2237
2238 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2239 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2240 }
2241
2242 const uint16_t rep = type_card & field_layout::kRepMask;
2243 const uint16_t xform_val = type_card & field_layout::kTvMask;
2244 void* const base = MaybeGetSplitBase(msg, is_split, table);
2245 switch (rep) {
2246 case field_layout::kRepSString: {
2247 auto& field = MaybeCreateRepeatedPtrFieldRefAt<std::string, is_split>(
2248 base, entry.offset, msg);
2249 const char* ptr2 = ptr;
2250 uint32_t next_tag;
2251
2252 auto* arena = field.GetArena();
2253 SerialArena* serial_arena;
2254 if (PROTOBUF_PREDICT_TRUE(
2255 arena != nullptr &&
2256 arena->impl_.GetSerialArenaFast(&serial_arena) &&
2257 field.PrepareForParse())) {
2258 do {
2259 ptr = ptr2;
2260 ptr = ParseRepeatedStringOnce(ptr, serial_arena, ctx, field);
2261 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr ||
2262 !MpVerifyUtf8(field[field.size() - 1],
2263 table, entry, xform_val))) {
2264 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2265 }
2266 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
2267 ptr2 = ReadTag(ptr, &next_tag);
2268 } while (next_tag == decoded_tag);
2269 } else {
2270 do {
2271 ptr = ptr2;
2272 std::string* str = field.Add();
2273 ptr = InlineGreedyStringParser(str, ptr, ctx);
2274 if (PROTOBUF_PREDICT_FALSE(
2275 ptr == nullptr ||
2276 !MpVerifyUtf8(*str, table, entry, xform_val))) {
2277 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2278 }
2279 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
2280 ptr2 = ReadTag(ptr, &next_tag);
2281 } while (next_tag == decoded_tag);
2282 }
2283
2284 break;
2285 }
2286
2287 #ifndef NDEBUG
2288 default:
2289 ABSL_LOG(FATAL) << "Unsupported repeated string rep: " << rep;
2290 break;
2291 #endif
2292 }
2293
2294 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2295 parse_loop:
2296 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2297 }
2298
2299
GetTableFromAux(uint16_t type_card,TcParseTableBase::FieldAux aux)2300 inline const TcParseTableBase* TcParser::GetTableFromAux(
2301 uint16_t type_card, TcParseTableBase::FieldAux aux) {
2302 uint16_t tv = type_card & field_layout::kTvMask;
2303 if (ABSL_PREDICT_TRUE(tv == field_layout::kTvTable)) {
2304 return aux.table;
2305 }
2306 ABSL_DCHECK(tv == field_layout::kTvDefault || tv == field_layout::kTvWeakPtr);
2307 const MessageLite* prototype = tv == field_layout::kTvDefault
2308 ? aux.message_default()
2309 : aux.message_default_weak();
2310 return prototype->GetTcParseTable();
2311 }
2312
2313 template <bool is_split>
MpMessage(PROTOBUF_TC_PARAM_DECL)2314 PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
2315 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2316 const uint16_t type_card = entry.type_card;
2317 const uint16_t card = type_card & field_layout::kFcMask;
2318
2319 // Check for repeated parsing:
2320 if (card == field_layout::kFcRepeated) {
2321 const uint16_t rep = type_card & field_layout::kRepMask;
2322 switch (rep) {
2323 case field_layout::kRepMessage:
2324 PROTOBUF_MUSTTAIL return MpRepeatedMessageOrGroup<is_split, false>(
2325 PROTOBUF_TC_PARAM_PASS);
2326 case field_layout::kRepGroup:
2327 PROTOBUF_MUSTTAIL return MpRepeatedMessageOrGroup<is_split, true>(
2328 PROTOBUF_TC_PARAM_PASS);
2329 default:
2330 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2331 }
2332 }
2333
2334 const uint32_t decoded_tag = data.tag();
2335 const uint32_t decoded_wiretype = decoded_tag & 7;
2336 const uint16_t rep = type_card & field_layout::kRepMask;
2337 const bool is_group = rep == field_layout::kRepGroup;
2338
2339 // Validate wiretype:
2340 switch (rep) {
2341 case field_layout::kRepMessage:
2342 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2343 goto fallback;
2344 }
2345 break;
2346 case field_layout::kRepGroup:
2347 if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
2348 goto fallback;
2349 }
2350 break;
2351 default: {
2352 fallback:
2353 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2354 }
2355 }
2356
2357 const bool is_oneof = card == field_layout::kFcOneof;
2358 bool need_init = false;
2359 if (card == field_layout::kFcOptional) {
2360 SetHas(entry, msg);
2361 } else if (is_oneof) {
2362 need_init = ChangeOneof(table, entry, data.tag() >> 3, ctx, msg);
2363 }
2364
2365 void* const base = MaybeGetSplitBase(msg, is_split, table);
2366 SyncHasbits(msg, hasbits, table);
2367 MessageLite*& field = RefAt<MessageLite*>(base, entry.offset);
2368
2369 const TcParseTableBase* inner_table =
2370 GetTableFromAux(type_card, *table->field_aux(&entry));
2371 if (need_init || field == nullptr) {
2372 field = NewMessage(inner_table, msg->GetArena());
2373 }
2374 const auto inner_loop = [&](const char* ptr) {
2375 return ParseLoopPreserveNone(field, ptr, ctx, inner_table);
2376 };
2377 return is_group ? ctx->ParseGroupInlined(ptr, decoded_tag, inner_loop)
2378 : ctx->ParseLengthDelimitedInlined(ptr, inner_loop);
2379 }
2380
2381 template <bool is_split, bool is_group>
MpRepeatedMessageOrGroup(PROTOBUF_TC_PARAM_DECL)2382 const char* TcParser::MpRepeatedMessageOrGroup(PROTOBUF_TC_PARAM_DECL) {
2383 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2384 const uint16_t type_card = entry.type_card;
2385 ABSL_DCHECK_EQ(type_card & field_layout::kFcMask,
2386 static_cast<uint16_t>(field_layout::kFcRepeated));
2387 const uint32_t decoded_tag = data.tag();
2388 const uint32_t decoded_wiretype = decoded_tag & 7;
2389
2390 // Validate wiretype:
2391 if (!is_group) {
2392 ABSL_DCHECK_EQ(type_card & field_layout::kRepMask,
2393 static_cast<uint16_t>(field_layout::kRepMessage));
2394 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2395 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2396 }
2397 } else {
2398 ABSL_DCHECK_EQ(type_card & field_layout::kRepMask,
2399 static_cast<uint16_t>(field_layout::kRepGroup));
2400 if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
2401 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2402 }
2403 }
2404
2405 void* const base = MaybeGetSplitBase(msg, is_split, table);
2406 RepeatedPtrFieldBase& field =
2407 MaybeCreateRepeatedRefAt<RepeatedPtrFieldBase, is_split>(
2408 base, entry.offset, msg);
2409 const TcParseTableBase* inner_table =
2410 GetTableFromAux(type_card, *table->field_aux(&entry));
2411
2412 const char* ptr2 = ptr;
2413 uint32_t next_tag;
2414 do {
2415 MessageLite* value = AddMessage(inner_table, field);
2416 const auto inner_loop = [&](const char* ptr) {
2417 return ParseLoopPreserveNone(value, ptr, ctx, inner_table);
2418 };
2419 ptr = is_group ? ctx->ParseGroupInlined(ptr2, decoded_tag, inner_loop)
2420 : ctx->ParseLengthDelimitedInlined(ptr2, inner_loop);
2421 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) goto error;
2422 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) goto parse_loop;
2423 ptr2 = ReadTag(ptr, &next_tag);
2424 if (PROTOBUF_PREDICT_FALSE(ptr2 == nullptr)) goto error;
2425 } while (next_tag == decoded_tag);
2426 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2427 parse_loop:
2428 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2429 error:
2430 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2431 }
2432
SerializeMapKey(const NodeBase * node,MapTypeCard type_card,io::CodedOutputStream & coded_output)2433 static void SerializeMapKey(const NodeBase* node, MapTypeCard type_card,
2434 io::CodedOutputStream& coded_output) {
2435 switch (type_card.wiretype()) {
2436 case WireFormatLite::WIRETYPE_VARINT:
2437 switch (type_card.cpp_type()) {
2438 case MapTypeCard::kBool:
2439 WireFormatLite::WriteBool(
2440 1, static_cast<const KeyNode<bool>*>(node)->key(), &coded_output);
2441 break;
2442 case MapTypeCard::k32:
2443 if (type_card.is_zigzag()) {
2444 WireFormatLite::WriteSInt32(
2445 1, static_cast<const KeyNode<uint32_t>*>(node)->key(),
2446 &coded_output);
2447 } else if (type_card.is_signed()) {
2448 WireFormatLite::WriteInt32(
2449 1, static_cast<const KeyNode<uint32_t>*>(node)->key(),
2450 &coded_output);
2451 } else {
2452 WireFormatLite::WriteUInt32(
2453 1, static_cast<const KeyNode<uint32_t>*>(node)->key(),
2454 &coded_output);
2455 }
2456 break;
2457 case MapTypeCard::k64:
2458 if (type_card.is_zigzag()) {
2459 WireFormatLite::WriteSInt64(
2460 1, static_cast<const KeyNode<uint64_t>*>(node)->key(),
2461 &coded_output);
2462 } else if (type_card.is_signed()) {
2463 WireFormatLite::WriteInt64(
2464 1, static_cast<const KeyNode<uint64_t>*>(node)->key(),
2465 &coded_output);
2466 } else {
2467 WireFormatLite::WriteUInt64(
2468 1, static_cast<const KeyNode<uint64_t>*>(node)->key(),
2469 &coded_output);
2470 }
2471 break;
2472 default:
2473 Unreachable();
2474 }
2475 break;
2476 case WireFormatLite::WIRETYPE_FIXED32:
2477 WireFormatLite::WriteFixed32(
2478 1, static_cast<const KeyNode<uint32_t>*>(node)->key(), &coded_output);
2479 break;
2480 case WireFormatLite::WIRETYPE_FIXED64:
2481 WireFormatLite::WriteFixed64(
2482 1, static_cast<const KeyNode<uint64_t>*>(node)->key(), &coded_output);
2483 break;
2484 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED:
2485 // We should never have a message here. They can only be values maps.
2486 ABSL_DCHECK_EQ(+type_card.cpp_type(), +MapTypeCard::kString);
2487 WireFormatLite::WriteString(
2488 1, static_cast<const KeyNode<std::string>*>(node)->key(),
2489 &coded_output);
2490 break;
2491 default:
2492 Unreachable();
2493 }
2494 }
2495
WriteMapEntryAsUnknown(MessageLite * msg,const TcParseTableBase * table,uint32_t tag,NodeBase * node,MapAuxInfo map_info)2496 void TcParser::WriteMapEntryAsUnknown(MessageLite* msg,
2497 const TcParseTableBase* table,
2498 uint32_t tag, NodeBase* node,
2499 MapAuxInfo map_info) {
2500 std::string serialized;
2501 {
2502 io::StringOutputStream string_output(&serialized);
2503 io::CodedOutputStream coded_output(&string_output);
2504 SerializeMapKey(node, map_info.key_type_card, coded_output);
2505 // The mapped_type is always an enum here.
2506 ABSL_DCHECK(map_info.value_is_validated_enum);
2507 WireFormatLite::WriteInt32(2,
2508 *reinterpret_cast<int32_t*>(
2509 node->GetVoidValue(map_info.node_size_info)),
2510 &coded_output);
2511 }
2512 GetUnknownFieldOps(table).write_length_delimited(msg, tag >> 3, serialized);
2513 }
2514
InitializeMapNodeEntry(void * obj,MapTypeCard type_card,UntypedMapBase & map,const TcParseTableBase::FieldAux * aux,bool is_key)2515 PROTOBUF_ALWAYS_INLINE inline void TcParser::InitializeMapNodeEntry(
2516 void* obj, MapTypeCard type_card, UntypedMapBase& map,
2517 const TcParseTableBase::FieldAux* aux, bool is_key) {
2518 (void)is_key;
2519 switch (type_card.cpp_type()) {
2520 case MapTypeCard::kBool:
2521 memset(obj, 0, sizeof(bool));
2522 break;
2523 case MapTypeCard::k32:
2524 memset(obj, 0, sizeof(uint32_t));
2525 break;
2526 case MapTypeCard::k64:
2527 memset(obj, 0, sizeof(uint64_t));
2528 break;
2529 case MapTypeCard::kString:
2530 Arena::CreateInArenaStorage(reinterpret_cast<std::string*>(obj),
2531 map.arena());
2532 break;
2533 case MapTypeCard::kMessage:
2534 aux[1].table->class_data->PlacementNew(obj, map.arena());
2535 break;
2536 default:
2537 Unreachable();
2538 }
2539 }
2540
DestroyMapNode(NodeBase * node,MapAuxInfo map_info,UntypedMapBase & map)2541 PROTOBUF_NOINLINE void TcParser::DestroyMapNode(NodeBase* node,
2542 MapAuxInfo map_info,
2543 UntypedMapBase& map) {
2544 if (map_info.key_type_card.cpp_type() == MapTypeCard::kString) {
2545 static_cast<std::string*>(node->GetVoidKey())->~basic_string();
2546 }
2547 if (map_info.value_type_card.cpp_type() == MapTypeCard::kString) {
2548 static_cast<std::string*>(node->GetVoidValue(map_info.node_size_info))
2549 ->~basic_string();
2550 } else if (map_info.value_type_card.cpp_type() == MapTypeCard::kMessage) {
2551 static_cast<MessageLite*>(node->GetVoidValue(map_info.node_size_info))
2552 ->DestroyInstance();
2553 }
2554 map.DeallocNode(node, map_info.node_size_info);
2555 }
2556
2557 template <typename T>
ReadFixed(void * obj,const char * ptr)2558 const char* ReadFixed(void* obj, const char* ptr) {
2559 auto v = UnalignedLoad<T>(ptr);
2560 ptr += sizeof(v);
2561 memcpy(obj, &v, sizeof(v));
2562 return ptr;
2563 }
2564
ParseOneMapEntry(NodeBase * node,const char * ptr,ParseContext * ctx,const TcParseTableBase::FieldAux * aux,const TcParseTableBase * table,const TcParseTableBase::FieldEntry & entry,Arena * arena)2565 const char* TcParser::ParseOneMapEntry(
2566 NodeBase* node, const char* ptr, ParseContext* ctx,
2567 const TcParseTableBase::FieldAux* aux, const TcParseTableBase* table,
2568 const TcParseTableBase::FieldEntry& entry, Arena* arena) {
2569 using WFL = WireFormatLite;
2570
2571 const auto map_info = aux[0].map_info;
2572 const uint8_t key_tag = WFL::MakeTag(1, map_info.key_type_card.wiretype());
2573 const uint8_t value_tag =
2574 WFL::MakeTag(2, map_info.value_type_card.wiretype());
2575
2576 while (!ctx->Done(&ptr)) {
2577 uint32_t inner_tag = ptr[0];
2578
2579 if (PROTOBUF_PREDICT_FALSE(inner_tag != key_tag &&
2580 inner_tag != value_tag)) {
2581 // Do a full parse and check again in case the tag has non-canonical
2582 // encoding.
2583 ptr = ReadTag(ptr, &inner_tag);
2584 if (PROTOBUF_PREDICT_FALSE(inner_tag != key_tag &&
2585 inner_tag != value_tag)) {
2586 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2587
2588 if (inner_tag == 0 || (inner_tag & 7) == WFL::WIRETYPE_END_GROUP) {
2589 ctx->SetLastTag(inner_tag);
2590 break;
2591 }
2592
2593 ptr = UnknownFieldParse(inner_tag, nullptr, ptr, ctx);
2594 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2595 continue;
2596 }
2597 } else {
2598 ++ptr;
2599 }
2600
2601 MapTypeCard type_card;
2602 void* obj;
2603 if (inner_tag == key_tag) {
2604 type_card = map_info.key_type_card;
2605 obj = node->GetVoidKey();
2606 } else {
2607 type_card = map_info.value_type_card;
2608 obj = node->GetVoidValue(map_info.node_size_info);
2609 }
2610
2611 switch (type_card.wiretype()) {
2612 case WFL::WIRETYPE_VARINT:
2613 uint64_t tmp;
2614 ptr = ParseVarint(ptr, &tmp);
2615 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2616 switch (type_card.cpp_type()) {
2617 case MapTypeCard::kBool:
2618 *reinterpret_cast<bool*>(obj) = static_cast<bool>(tmp);
2619 continue;
2620 case MapTypeCard::k32: {
2621 uint32_t v = static_cast<uint32_t>(tmp);
2622 if (type_card.is_zigzag()) v = WFL::ZigZagDecode32(v);
2623 memcpy(obj, &v, sizeof(v));
2624 continue;
2625 }
2626 case MapTypeCard::k64:
2627 if (type_card.is_zigzag()) tmp = WFL::ZigZagDecode64(tmp);
2628 memcpy(obj, &tmp, sizeof(tmp));
2629 continue;
2630 default:
2631 Unreachable();
2632 }
2633 case WFL::WIRETYPE_FIXED32:
2634 ptr = ReadFixed<uint32_t>(obj, ptr);
2635 continue;
2636 case WFL::WIRETYPE_FIXED64:
2637 ptr = ReadFixed<uint64_t>(obj, ptr);
2638 continue;
2639 case WFL::WIRETYPE_LENGTH_DELIMITED:
2640 if (type_card.cpp_type() == MapTypeCard::kString) {
2641 const int size = ReadSize(&ptr);
2642 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2643 std::string* str = reinterpret_cast<std::string*>(obj);
2644 ptr = ctx->ReadString(ptr, size, str);
2645 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2646 bool do_utf8_check = map_info.fail_on_utf8_failure;
2647 #ifndef NDEBUG
2648 do_utf8_check |= map_info.log_debug_utf8_failure;
2649 #endif
2650 if (type_card.is_utf8() && do_utf8_check &&
2651 !utf8_range::IsStructurallyValid(*str)) {
2652 PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry),
2653 "parsing", false);
2654 if (map_info.fail_on_utf8_failure) {
2655 return nullptr;
2656 }
2657 }
2658 continue;
2659 } else {
2660 ABSL_DCHECK_EQ(+type_card.cpp_type(), +MapTypeCard::kMessage);
2661 ABSL_DCHECK_EQ(inner_tag, value_tag);
2662 ptr = ctx->ParseMessage(reinterpret_cast<MessageLite*>(obj), ptr);
2663 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
2664 continue;
2665 }
2666 default:
2667 Unreachable();
2668 }
2669 }
2670 return ptr;
2671 }
2672
2673 template <bool is_split>
MpMap(PROTOBUF_TC_PARAM_DECL)2674 PROTOBUF_NOINLINE const char* TcParser::MpMap(PROTOBUF_TC_PARAM_DECL) {
2675 const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
2676 // `aux[0]` points into a MapAuxInfo.
2677 // If we have a message mapped_type aux[1] points into a `create_in_arena`.
2678 // If we have a validated enum mapped_type aux[1] point into a
2679 // `enum_data`.
2680 const auto* aux = table->field_aux(&entry);
2681 const auto map_info = aux[0].map_info;
2682
2683 if (PROTOBUF_PREDICT_FALSE(!map_info.is_supported ||
2684 (data.tag() & 7) !=
2685 WireFormatLite::WIRETYPE_LENGTH_DELIMITED)) {
2686 PROTOBUF_MUSTTAIL return MpFallback(PROTOBUF_TC_PARAM_PASS);
2687 }
2688
2689 // When using LITE, the offset points directly into the Map<> object.
2690 // Otherwise, it points into a MapField and we must synchronize with
2691 // reflection. It is done by calling the MutableMap() virtual function on the
2692 // field's base class.
2693 void* const base = MaybeGetSplitBase(msg, is_split, table);
2694 UntypedMapBase& map =
2695 map_info.use_lite
2696 ? RefAt<UntypedMapBase>(base, entry.offset)
2697 : *RefAt<MapFieldBaseForParse>(base, entry.offset).MutableMap();
2698
2699 const uint32_t saved_tag = data.tag();
2700
2701 while (true) {
2702 NodeBase* node = map.AllocNode(map_info.node_size_info);
2703
2704 InitializeMapNodeEntry(node->GetVoidKey(), map_info.key_type_card, map, aux,
2705 true);
2706 InitializeMapNodeEntry(node->GetVoidValue(map_info.node_size_info),
2707 map_info.value_type_card, map, aux, false);
2708
2709 ptr = ctx->ParseLengthDelimitedInlined(ptr, [&](const char* ptr) {
2710 return ParseOneMapEntry(node, ptr, ctx, aux, table, entry, map.arena());
2711 });
2712
2713 if (PROTOBUF_PREDICT_TRUE(ptr != nullptr)) {
2714 if (PROTOBUF_PREDICT_FALSE(map_info.value_is_validated_enum &&
2715 !internal::ValidateEnumInlined(
2716 *static_cast<int32_t*>(node->GetVoidValue(
2717 map_info.node_size_info)),
2718 aux[1].enum_data))) {
2719 WriteMapEntryAsUnknown(msg, table, saved_tag, node, map_info);
2720 } else {
2721 // Done parsing the node, try to insert it.
2722 // If it overwrites something we get old node back to destroy it.
2723 switch (map_info.key_type_card.cpp_type()) {
2724 case MapTypeCard::kBool:
2725 node = static_cast<KeyMapBase<bool>&>(map).InsertOrReplaceNode(
2726 static_cast<KeyMapBase<bool>::KeyNode*>(node));
2727 break;
2728 case MapTypeCard::k32:
2729 node = static_cast<KeyMapBase<uint32_t>&>(map).InsertOrReplaceNode(
2730 static_cast<KeyMapBase<uint32_t>::KeyNode*>(node));
2731 break;
2732 case MapTypeCard::k64:
2733 node = static_cast<KeyMapBase<uint64_t>&>(map).InsertOrReplaceNode(
2734 static_cast<KeyMapBase<uint64_t>::KeyNode*>(node));
2735 break;
2736 case MapTypeCard::kString:
2737 node =
2738 static_cast<KeyMapBase<std::string>&>(map).InsertOrReplaceNode(
2739 static_cast<KeyMapBase<std::string>::KeyNode*>(node));
2740 break;
2741 default:
2742 Unreachable();
2743 }
2744 }
2745 }
2746
2747 // Destroy the node if we have it.
2748 // It could be because we failed to parse, or because insertion returned
2749 // an overwritten node.
2750 if (PROTOBUF_PREDICT_FALSE(node != nullptr && map.arena() == nullptr)) {
2751 DestroyMapNode(node, map_info, map);
2752 }
2753
2754 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
2755 PROTOBUF_MUSTTAIL return Error(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2756 }
2757
2758 if (PROTOBUF_PREDICT_FALSE(!ctx->DataAvailable(ptr))) {
2759 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2760 }
2761
2762 uint32_t next_tag;
2763 const char* ptr2 = ReadTagInlined(ptr, &next_tag);
2764 if (next_tag != saved_tag) break;
2765 ptr = ptr2;
2766 }
2767
2768 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS);
2769 }
2770
MessageSetWireFormatParseLoopLite(PROTOBUF_TC_PARAM_NO_DATA_DECL)2771 const char* TcParser::MessageSetWireFormatParseLoopLite(
2772 PROTOBUF_TC_PARAM_NO_DATA_DECL) {
2773 PROTOBUF_MUSTTAIL return MessageSetWireFormatParseLoopImpl<MessageLite>(
2774 PROTOBUF_TC_PARAM_NO_DATA_PASS);
2775 }
2776
TypeCardToString(uint16_t type_card)2777 std::string TypeCardToString(uint16_t type_card) {
2778 // In here we convert the runtime value of entry.type_card back into a
2779 // sequence of literal enum labels. We use the mnenonic labels for nicer
2780 // codegen.
2781 namespace fl = internal::field_layout;
2782 const int rep_index = (type_card & fl::kRepMask) >> fl::kRepShift;
2783 const int tv_index = (type_card & fl::kTvMask) >> fl::kTvShift;
2784
2785 static constexpr const char* kFieldCardNames[] = {"Singular", "Optional",
2786 "Repeated", "Oneof"};
2787 static_assert((fl::kFcSingular >> fl::kFcShift) == 0, "");
2788 static_assert((fl::kFcOptional >> fl::kFcShift) == 1, "");
2789 static_assert((fl::kFcRepeated >> fl::kFcShift) == 2, "");
2790 static_assert((fl::kFcOneof >> fl::kFcShift) == 3, "");
2791
2792 std::string out;
2793
2794 absl::StrAppend(&out, "::_fl::kFc",
2795 kFieldCardNames[(type_card & fl::kFcMask) >> fl::kFcShift]);
2796
2797 #define PROTOBUF_INTERNAL_TYPE_CARD_CASE(x) \
2798 case fl::k##x: \
2799 absl::StrAppend(&out, " | ::_fl::k" #x); \
2800 break
2801
2802 switch (type_card & fl::kFkMask) {
2803 case fl::kFkString: {
2804 switch (type_card & ~fl::kFcMask & ~fl::kRepMask & ~fl::kSplitMask) {
2805 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Bytes);
2806 PROTOBUF_INTERNAL_TYPE_CARD_CASE(RawString);
2807 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Utf8String);
2808 default:
2809 ABSL_LOG(FATAL) << "Unknown type_card: 0x" << type_card;
2810 }
2811
2812 static constexpr const char* kRepNames[] = {"AString", "IString", "Cord",
2813 "SPiece", "SString"};
2814 static_assert((fl::kRepAString >> fl::kRepShift) == 0, "");
2815 static_assert((fl::kRepIString >> fl::kRepShift) == 1, "");
2816 static_assert((fl::kRepCord >> fl::kRepShift) == 2, "");
2817 static_assert((fl::kRepSPiece >> fl::kRepShift) == 3, "");
2818 static_assert((fl::kRepSString >> fl::kRepShift) == 4, "");
2819
2820 absl::StrAppend(&out, " | ::_fl::kRep", kRepNames[rep_index]);
2821 break;
2822 }
2823
2824 case fl::kFkMessage: {
2825 absl::StrAppend(&out, " | ::_fl::kMessage");
2826
2827 static constexpr const char* kRepNames[] = {nullptr, "Group", "Lazy"};
2828 static_assert((fl::kRepGroup >> fl::kRepShift) == 1, "");
2829 static_assert((fl::kRepLazy >> fl::kRepShift) == 2, "");
2830
2831 if (auto* rep = kRepNames[rep_index]) {
2832 absl::StrAppend(&out, " | ::_fl::kRep", rep);
2833 }
2834
2835 static constexpr const char* kXFormNames[2][4] = {
2836 {nullptr, "Default", "Table", "WeakPtr"}, {nullptr, "Eager", "Lazy"}};
2837
2838 static_assert((fl::kTvDefault >> fl::kTvShift) == 1, "");
2839 static_assert((fl::kTvTable >> fl::kTvShift) == 2, "");
2840 static_assert((fl::kTvWeakPtr >> fl::kTvShift) == 3, "");
2841 static_assert((fl::kTvEager >> fl::kTvShift) == 1, "");
2842 static_assert((fl::kTvLazy >> fl::kTvShift) == 2, "");
2843
2844 if (auto* xform = kXFormNames[rep_index == 2][tv_index]) {
2845 absl::StrAppend(&out, " | ::_fl::kTv", xform);
2846 }
2847 break;
2848 }
2849
2850 case fl::kFkMap:
2851 absl::StrAppend(&out, " | ::_fl::kMap");
2852 break;
2853
2854 case fl::kFkNone:
2855 break;
2856
2857 case fl::kFkVarint:
2858 case fl::kFkPackedVarint:
2859 case fl::kFkFixed:
2860 case fl::kFkPackedFixed: {
2861 switch (type_card & ~fl::kFcMask & ~fl::kSplitMask) {
2862 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Bool);
2863 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Fixed32);
2864 PROTOBUF_INTERNAL_TYPE_CARD_CASE(UInt32);
2865 PROTOBUF_INTERNAL_TYPE_CARD_CASE(SFixed32);
2866 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Int32);
2867 PROTOBUF_INTERNAL_TYPE_CARD_CASE(SInt32);
2868 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Float);
2869 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Enum);
2870 PROTOBUF_INTERNAL_TYPE_CARD_CASE(EnumRange);
2871 PROTOBUF_INTERNAL_TYPE_CARD_CASE(OpenEnum);
2872 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Fixed64);
2873 PROTOBUF_INTERNAL_TYPE_CARD_CASE(UInt64);
2874 PROTOBUF_INTERNAL_TYPE_CARD_CASE(SFixed64);
2875 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Int64);
2876 PROTOBUF_INTERNAL_TYPE_CARD_CASE(SInt64);
2877 PROTOBUF_INTERNAL_TYPE_CARD_CASE(Double);
2878 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedBool);
2879 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFixed32);
2880 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedUInt32);
2881 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSFixed32);
2882 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedInt32);
2883 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSInt32);
2884 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFloat);
2885 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedEnum);
2886 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedEnumRange);
2887 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedOpenEnum);
2888 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFixed64);
2889 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedUInt64);
2890 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSFixed64);
2891 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedInt64);
2892 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSInt64);
2893 PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedDouble);
2894 default:
2895 ABSL_LOG(FATAL) << "Unknown type_card: 0x" << type_card;
2896 }
2897 }
2898 }
2899
2900 if (type_card & fl::kSplitMask) {
2901 absl::StrAppend(&out, " | ::_fl::kSplitTrue");
2902 }
2903
2904 #undef PROTOBUF_INTERNAL_TYPE_CARD_CASE
2905
2906 return out;
2907 }
2908
DiscardEverythingFallback(PROTOBUF_TC_PARAM_DECL)2909 const char* TcParser::DiscardEverythingFallback(PROTOBUF_TC_PARAM_DECL) {
2910 SyncHasbits(msg, hasbits, table);
2911 uint32_t tag = data.tag();
2912 if ((tag & 7) == WireFormatLite::WIRETYPE_END_GROUP || tag == 0) {
2913 ctx->SetLastTag(tag);
2914 return ptr;
2915 }
2916 return UnknownFieldParse(tag, nullptr, ptr, ctx);
2917 }
2918
2919 } // namespace internal
2920 } // namespace protobuf
2921 } // namespace google
2922
2923 #include "google/protobuf/port_undef.inc"
2924