1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #include <google/protobuf/compiler/cpp/cpp_parse_function_generator.h>
32
33 #include <algorithm>
34 #include <limits>
35 #include <string>
36 #include <utility>
37
38 #include <google/protobuf/wire_format.h>
39 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
40
41 namespace google {
42 namespace protobuf {
43 namespace compiler {
44 namespace cpp {
45
46 namespace {
47 using google::protobuf::internal::WireFormat;
48 using google::protobuf::internal::WireFormatLite;
49
GetOrderedFields(const Descriptor * descriptor,const Options & options)50 std::vector<const FieldDescriptor*> GetOrderedFields(
51 const Descriptor* descriptor, const Options& options) {
52 std::vector<const FieldDescriptor*> ordered_fields;
53 for (auto field : FieldRange(descriptor)) {
54 if (!IsFieldStripped(field, options)) {
55 ordered_fields.push_back(field);
56 }
57 }
58 std::sort(ordered_fields.begin(), ordered_fields.end(),
59 [](const FieldDescriptor* a, const FieldDescriptor* b) {
60 return a->number() < b->number();
61 });
62 return ordered_fields;
63 }
64
HasInternalAccessors(const FieldOptions::CType ctype)65 bool HasInternalAccessors(const FieldOptions::CType ctype) {
66 return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
67 }
68
TagSize(uint32_t field_number)69 int TagSize(uint32_t field_number) {
70 if (field_number < 16) return 1;
71 GOOGLE_CHECK_LT(field_number, (1 << 14))
72 << "coded tag for " << field_number << " too big for uint16_t";
73 return 2;
74 }
75
76 std::string FieldParseFunctionName(
77 const TailCallTableInfo::FieldEntryInfo& entry, const Options& options);
78
IsFieldEligibleForFastParsing(const TailCallTableInfo::FieldEntryInfo & entry,const Options & options,MessageSCCAnalyzer * scc_analyzer)79 bool IsFieldEligibleForFastParsing(
80 const TailCallTableInfo::FieldEntryInfo& entry, const Options& options,
81 MessageSCCAnalyzer* scc_analyzer) {
82 const auto* field = entry.field;
83 // Map, oneof, weak, and lazy fields are not handled on the fast path.
84 if (field->is_map() || field->real_containing_oneof() ||
85 field->options().weak() ||
86 IsImplicitWeakField(field, options, scc_analyzer) ||
87 IsLazy(field, options, scc_analyzer)) {
88 return false;
89 }
90
91 // We will check for a valid auxiliary index range later. However, we might
92 // want to change the value we check for inlined string fields.
93 int aux_idx = entry.aux_idx;
94
95 switch (field->type()) {
96 case FieldDescriptor::TYPE_ENUM:
97 // If enum values are not validated at parse time, then this field can be
98 // handled on the fast path like an int32.
99 if (HasPreservingUnknownEnumSemantics(field)) {
100 break;
101 }
102 if (field->is_repeated() && field->is_packed()) {
103 return false;
104 }
105 break;
106
107 // Some bytes fields can be handled on fast path.
108 case FieldDescriptor::TYPE_STRING:
109 case FieldDescriptor::TYPE_BYTES:
110 if (field->options().ctype() != FieldOptions::STRING) {
111 return false;
112 }
113 if (IsStringInlined(field, options)) {
114 GOOGLE_CHECK(!field->is_repeated());
115 // For inlined strings, the donation state index is stored in the
116 // `aux_idx` field of the fast parsing info. We need to check the range
117 // of that value instead of the auxiliary index.
118 aux_idx = entry.inlined_string_idx;
119 }
120 break;
121
122 default:
123 break;
124 }
125
126 if (HasHasbit(field)) {
127 // The tailcall parser can only update the first 32 hasbits. Fields with
128 // has-bits beyond the first 32 are handled by mini parsing/fallback.
129 GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString();
130 if (entry.hasbit_idx >= 32) return false;
131 }
132
133 // If the field needs auxiliary data, then the aux index is needed. This
134 // must fit in a uint8_t.
135 if (aux_idx > std::numeric_limits<uint8_t>::max()) {
136 return false;
137 }
138
139 // The largest tag that can be read by the tailcall parser is two bytes
140 // when varint-coded. This allows 14 bits for the numeric tag value:
141 // byte 0 byte 1
142 // 1nnnnttt 0nnnnnnn
143 // ^^^^^^^ ^^^^^^^
144 if (field->number() >= 1 << 11) return false;
145
146 return true;
147 }
148
SplitFastFieldsForSize(const std::vector<TailCallTableInfo::FieldEntryInfo> & field_entries,int table_size_log2,const Options & options,MessageSCCAnalyzer * scc_analyzer)149 std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
150 const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries,
151 int table_size_log2, const Options& options,
152 MessageSCCAnalyzer* scc_analyzer) {
153 std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2);
154 const uint32_t idx_mask = result.size() - 1;
155
156 for (const auto& entry : field_entries) {
157 if (!IsFieldEligibleForFastParsing(entry, options, scc_analyzer)) {
158 continue;
159 }
160
161 const auto* field = entry.field;
162 uint32_t tag = WireFormat::MakeTag(field);
163
164 // Construct the varint-coded tag. If it is more than 7 bits, we need to
165 // shift the high bits and add a continue bit.
166 if (uint32_t hibits = tag & 0xFFFFFF80) {
167 tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128
168 }
169
170 // The field index is determined by the low bits of the field number, where
171 // the table size determines the width of the mask. The largest table
172 // supported is 32 entries. The parse loop uses these bits directly, so that
173 // the dispatch does not require arithmetic:
174 // byte 0 byte 1
175 // tag: 1nnnnttt 0nnnnnnn
176 // ^^^^^
177 // idx (table_size_log2=5)
178 // This means that any field number that does not fit in the lower 4 bits
179 // will always have the top bit of its table index asserted.
180 const uint32_t fast_idx = (tag >> 3) & idx_mask;
181
182 TailCallTableInfo::FastFieldInfo& info = result[fast_idx];
183 if (info.field != nullptr) {
184 // This field entry is already filled.
185 continue;
186 }
187
188 // Fill in this field's entry:
189 GOOGLE_CHECK(info.func_name.empty()) << info.func_name;
190 info.func_name = FieldParseFunctionName(entry, options);
191 info.field = field;
192 info.coded_tag = tag;
193 // If this field does not have presence, then it can set an out-of-bounds
194 // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
195 info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63;
196 if (IsStringInlined(field, options)) {
197 GOOGLE_CHECK(!field->is_repeated());
198 info.aux_idx = static_cast<uint8_t>(entry.inlined_string_idx);
199 } else {
200 info.aux_idx = static_cast<uint8_t>(entry.aux_idx);
201 }
202 }
203 return result;
204 }
205
206 // Filter out fields that will be handled by mini parsing.
FilterMiniParsedFields(const std::vector<const FieldDescriptor * > & fields,const Options & options,MessageSCCAnalyzer * scc_analyzer)207 std::vector<const FieldDescriptor*> FilterMiniParsedFields(
208 const std::vector<const FieldDescriptor*>& fields, const Options& options,
209 MessageSCCAnalyzer* scc_analyzer) {
210 std::vector<const FieldDescriptor*> generated_fallback_fields;
211
212 for (const auto* field : fields) {
213 bool handled = false;
214 switch (field->type()) {
215 case FieldDescriptor::TYPE_DOUBLE:
216 case FieldDescriptor::TYPE_FLOAT:
217 case FieldDescriptor::TYPE_FIXED32:
218 case FieldDescriptor::TYPE_SFIXED32:
219 case FieldDescriptor::TYPE_FIXED64:
220 case FieldDescriptor::TYPE_SFIXED64:
221 case FieldDescriptor::TYPE_BOOL:
222 case FieldDescriptor::TYPE_UINT32:
223 case FieldDescriptor::TYPE_SINT32:
224 case FieldDescriptor::TYPE_INT32:
225 case FieldDescriptor::TYPE_UINT64:
226 case FieldDescriptor::TYPE_SINT64:
227 case FieldDescriptor::TYPE_INT64:
228 // These are handled by MiniParse, so we don't need any generated
229 // fallback code.
230 handled = true;
231 break;
232
233 case FieldDescriptor::TYPE_ENUM:
234 if (field->is_repeated() &&
235 !HasPreservingUnknownEnumSemantics(field)) {
236 // TODO(b/206890171): handle packed repeated closed enums
237 // Non-packed repeated can be handled using tables, but we still
238 // need to generate fallback code for all repeated enums in order to
239 // handle packed encoding. This is because of the lite/full split
240 // when handling invalid enum values in a packed field.
241 handled = false;
242 } else {
243 handled = true;
244 }
245 break;
246
247 case FieldDescriptor::TYPE_BYTES:
248 case FieldDescriptor::TYPE_STRING:
249 if (IsStringInlined(field, options)) {
250 // TODO(b/198211897): support InilnedStringField.
251 handled = false;
252 } else {
253 handled = true;
254 }
255 break;
256
257 case FieldDescriptor::TYPE_MESSAGE:
258 case FieldDescriptor::TYPE_GROUP:
259 // TODO(b/210762816): support remaining field types.
260 if (field->is_map() || IsWeak(field, options) ||
261 IsImplicitWeakField(field, options, scc_analyzer) ||
262 IsLazy(field, options, scc_analyzer)) {
263 handled = false;
264 } else {
265 handled = true;
266 }
267 break;
268
269 default:
270 handled = false;
271 break;
272 }
273 if (!handled) generated_fallback_fields.push_back(field);
274 }
275
276 return generated_fallback_fields;
277 }
278
279 } // namespace
280
TailCallTableInfo(const Descriptor * descriptor,const Options & options,const std::vector<const FieldDescriptor * > & ordered_fields,const std::vector<int> & has_bit_indices,const std::vector<int> & inlined_string_indices,MessageSCCAnalyzer * scc_analyzer)281 TailCallTableInfo::TailCallTableInfo(
282 const Descriptor* descriptor, const Options& options,
283 const std::vector<const FieldDescriptor*>& ordered_fields,
284 const std::vector<int>& has_bit_indices,
285 const std::vector<int>& inlined_string_indices,
286 MessageSCCAnalyzer* scc_analyzer) {
287 int oneof_count = descriptor->real_oneof_decl_count();
288 // If this message has any oneof fields, store the case offset in the first
289 // auxiliary entry.
290 if (oneof_count > 0) {
291 GOOGLE_LOG_IF(DFATAL, ordered_fields.empty())
292 << "Invalid message: " << descriptor->full_name() << " has "
293 << oneof_count << " oneof declarations, but no fields";
294 aux_entries.push_back(StrCat(
295 "_fl::Offset{offsetof(", ClassName(descriptor), ", _oneof_case_)}"));
296 }
297
298 // If this message has any inlined string fields, store the donation state
299 // offset in the second auxiliary entry.
300 if (!inlined_string_indices.empty()) {
301 aux_entries.resize(2); // pad if necessary
302 aux_entries[1] =
303 StrCat("_fl::Offset{offsetof(", ClassName(descriptor),
304 ", _inlined_string_donated_)}");
305 }
306
307 // Fill in mini table entries.
308 for (const FieldDescriptor* field : ordered_fields) {
309 field_entries.push_back(
310 {field, (HasHasbit(field) ? has_bit_indices[field->index()] : -1)});
311 auto& entry = field_entries.back();
312
313 if (field->type() == FieldDescriptor::TYPE_MESSAGE ||
314 field->type() == FieldDescriptor::TYPE_GROUP) {
315 // Message-typed fields have a FieldAux with the default instance pointer.
316 if (field->is_map()) {
317 // TODO(b/205904770): generate aux entries for maps
318 } else if (IsWeak(field, options)) {
319 // Don't generate anything for weak fields. They are handled by the
320 // generated fallback.
321 } else if (IsImplicitWeakField(field, options, scc_analyzer)) {
322 // Implicit weak fields don't need to store a default instance pointer.
323 } else if (IsLazy(field, options, scc_analyzer)) {
324 // Lazy fields are handled by the generated fallback function.
325 } else {
326 field_entries.back().aux_idx = aux_entries.size();
327 const Descriptor* field_type = field->message_type();
328 aux_entries.push_back(StrCat(
329 "reinterpret_cast<const ", QualifiedClassName(field_type, options),
330 "*>(&", QualifiedDefaultInstanceName(field_type, options), ")"));
331 }
332 } else if (field->type() == FieldDescriptor::TYPE_ENUM &&
333 !HasPreservingUnknownEnumSemantics(field)) {
334 // Enum fields which preserve unknown values (proto3 behavior) are
335 // effectively int32 fields with respect to parsing -- i.e., the value
336 // does not need to be validated at parse time.
337 //
338 // Enum fields which do not preserve unknown values (proto2 behavior) use
339 // a FieldAux to store validation information. If the enum values are
340 // sequential (and within a range we can represent), then the FieldAux
341 // entry represents the range using the minimum value (which must fit in
342 // an int16_t) and count (a uint16_t). Otherwise, the entry holds a
343 // pointer to the generated Name_IsValid function.
344
345 entry.aux_idx = aux_entries.size();
346 const EnumDescriptor* enum_type = field->enum_type();
347 GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString();
348
349 // Check if the enum values are a single, contiguous range.
350 std::vector<int> enum_values;
351 for (int i = 0, N = enum_type->value_count(); i < N; ++i) {
352 enum_values.push_back(enum_type->value(i)->number());
353 }
354 auto values_begin = enum_values.begin();
355 auto values_end = enum_values.end();
356 std::sort(values_begin, values_end);
357 enum_values.erase(std::unique(values_begin, values_end), values_end);
358
359 if (enum_values.back() - enum_values[0] == enum_values.size() - 1 &&
360 enum_values[0] >= std::numeric_limits<int16_t>::min() &&
361 enum_values[0] <= std::numeric_limits<int16_t>::max() &&
362 enum_values.size() <= std::numeric_limits<uint16_t>::max()) {
363 entry.is_enum_range = true;
364 aux_entries.push_back(
365 StrCat(enum_values[0], ", ", enum_values.size()));
366 } else {
367 entry.is_enum_range = false;
368 aux_entries.push_back(
369 StrCat(QualifiedClassName(enum_type, options), "_IsValid"));
370 }
371 } else if ((field->type() == FieldDescriptor::TYPE_STRING ||
372 field->type() == FieldDescriptor::TYPE_BYTES) &&
373 IsStringInlined(field, options)) {
374 GOOGLE_CHECK(!field->is_repeated());
375 // Inlined strings have an extra marker to represent their donation state.
376 int idx = inlined_string_indices[field->index()];
377 // For mini parsing, the donation state index is stored as an `offset`
378 // auxiliary entry.
379 entry.aux_idx = aux_entries.size();
380 aux_entries.push_back(StrCat("_fl::Offset{", idx, "}"));
381 // For fast table parsing, the donation state index is stored instead of
382 // the aux_idx (this will limit the range to 8 bits).
383 entry.inlined_string_idx = idx;
384 }
385 }
386
387 // Choose the smallest fast table that covers the maximum number of fields.
388 table_size_log2 = 0; // fallback value
389 int num_fast_fields = -1;
390 for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) {
391 size_t try_size = 1 << try_size_log2;
392 auto split_fields = SplitFastFieldsForSize(field_entries, try_size_log2,
393 options, scc_analyzer);
394 GOOGLE_CHECK_EQ(split_fields.size(), try_size);
395 int try_num_fast_fields = 0;
396 for (const auto& info : split_fields) {
397 if (info.field != nullptr) ++try_num_fast_fields;
398 }
399 // Use this size if (and only if) it covers more fields.
400 if (try_num_fast_fields > num_fast_fields) {
401 fast_path_fields = std::move(split_fields);
402 table_size_log2 = try_size_log2;
403 num_fast_fields = try_num_fast_fields;
404 }
405 // The largest table we allow has the same number of entries as the message
406 // has fields, rounded up to the next power of 2 (e.g., a message with 5
407 // fields can have a fast table of size 8). A larger table *might* cover
408 // more fields in certain cases, but a larger table in that case would have
409 // mostly empty entries; so, we cap the size to avoid pathologically sparse
410 // tables.
411 if (try_size > ordered_fields.size()) {
412 break;
413 }
414 }
415
416 // Filter out fields that are handled by MiniParse. We don't need to generate
417 // a fallback for these, which saves code size.
418 fallback_fields = FilterMiniParsedFields(ordered_fields, options,
419 scc_analyzer);
420
421 // If there are no fallback fields, and at most one extension range, the
422 // parser can use a generic fallback function. Otherwise, a message-specific
423 // fallback routine is needed.
424 use_generated_fallback =
425 !fallback_fields.empty() || descriptor->extension_range_count() > 1;
426 }
427
ParseFunctionGenerator(const Descriptor * descriptor,int max_has_bit_index,const std::vector<int> & has_bit_indices,const std::vector<int> & inlined_string_indices,const Options & options,MessageSCCAnalyzer * scc_analyzer,const std::map<std::string,std::string> & vars)428 ParseFunctionGenerator::ParseFunctionGenerator(
429 const Descriptor* descriptor, int max_has_bit_index,
430 const std::vector<int>& has_bit_indices,
431 const std::vector<int>& inlined_string_indices, const Options& options,
432 MessageSCCAnalyzer* scc_analyzer,
433 const std::map<std::string, std::string>& vars)
434 : descriptor_(descriptor),
435 scc_analyzer_(scc_analyzer),
436 options_(options),
437 variables_(vars),
438 inlined_string_indices_(inlined_string_indices),
439 ordered_fields_(GetOrderedFields(descriptor_, options_)),
440 num_hasbits_(max_has_bit_index) {
441 if (should_generate_tctable()) {
442 tc_table_info_.reset(new TailCallTableInfo(
443 descriptor_, options_, ordered_fields_, has_bit_indices,
444 inlined_string_indices, scc_analyzer));
445 }
446 SetCommonVars(options_, &variables_);
447 SetCommonMessageDataVariables(&variables_);
448 SetUnknownFieldsVariable(descriptor_, options_, &variables_);
449 variables_["classname"] = ClassName(descriptor, false);
450 }
451
GenerateMethodDecls(io::Printer * printer)452 void ParseFunctionGenerator::GenerateMethodDecls(io::Printer* printer) {
453 Formatter format(printer, variables_);
454 if (should_generate_tctable()) {
455 format.Outdent();
456 if (should_generate_guarded_tctable()) {
457 format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
458 }
459 format(
460 " private:\n"
461 " static const char* Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL);\n"
462 " public:\n");
463 if (should_generate_guarded_tctable()) {
464 format("#endif\n");
465 }
466 format.Indent();
467 }
468 format(
469 "const char* _InternalParse(const char* ptr, "
470 "::$proto_ns$::internal::ParseContext* ctx) final;\n");
471 }
472
GenerateMethodImpls(io::Printer * printer)473 void ParseFunctionGenerator::GenerateMethodImpls(io::Printer* printer) {
474 Formatter format(printer, variables_);
475 bool need_parse_function = true;
476 if (descriptor_->options().message_set_wire_format()) {
477 // Special-case MessageSet.
478 need_parse_function = false;
479 format(
480 "const char* $classname$::_InternalParse(const char* ptr,\n"
481 " ::_pbi::ParseContext* ctx) {\n"
482 "$annotate_deserialize$");
483 if (!options_.unverified_lazy_message_sets &&
484 ShouldVerify(descriptor_, options_, scc_analyzer_)) {
485 format(
486 " ctx->set_lazy_eager_verify_func(&$classname$::InternalVerify);\n");
487 }
488 format(
489 " return $extensions$.ParseMessageSet(ptr, \n"
490 " internal_default_instance(), &_internal_metadata_, ctx);\n"
491 "}\n");
492 }
493 if (!should_generate_tctable()) {
494 if (need_parse_function) {
495 GenerateLoopingParseFunction(format);
496 }
497 return;
498 }
499 if (should_generate_guarded_tctable()) {
500 format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n");
501 }
502 if (need_parse_function) {
503 GenerateTailcallParseFunction(format);
504 }
505 if (tc_table_info_->use_generated_fallback) {
506 GenerateTailcallFallbackFunction(format);
507 }
508 if (should_generate_guarded_tctable()) {
509 if (need_parse_function) {
510 format("\n#else // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n");
511 GenerateLoopingParseFunction(format);
512 }
513 format("\n#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
514 }
515 }
516
should_generate_tctable() const517 bool ParseFunctionGenerator::should_generate_tctable() const {
518 if (options_.tctable_mode == Options::kTCTableNever) {
519 return false;
520 }
521 return true;
522 }
523
GenerateTailcallParseFunction(Formatter & format)524 void ParseFunctionGenerator::GenerateTailcallParseFunction(Formatter& format) {
525 GOOGLE_CHECK(should_generate_tctable());
526
527 // Generate an `_InternalParse` that starts the tail-calling loop.
528 format(
529 "const char* $classname$::_InternalParse(\n"
530 " const char* ptr, ::_pbi::ParseContext* ctx) {\n"
531 "$annotate_deserialize$"
532 " ptr = ::_pbi::TcParser::ParseLoop(this, ptr, ctx, "
533 "&_table_.header);\n");
534 format(
535 " return ptr;\n"
536 "}\n\n");
537 }
538
GenerateTailcallFallbackFunction(Formatter & format)539 void ParseFunctionGenerator::GenerateTailcallFallbackFunction(
540 Formatter& format) {
541 GOOGLE_CHECK(should_generate_tctable());
542 format(
543 "const char* $classname$::Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL) {\n"
544 "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) return nullptr\n");
545 format.Indent();
546 format("auto* typed_msg = static_cast<$classname$*>(msg);\n");
547
548 if (num_hasbits_ > 0) {
549 // Sync hasbits
550 format("typed_msg->_has_bits_[0] = hasbits;\n");
551 }
552 format("uint32_t tag = data.tag();\n");
553
554 format.Set("msg", "typed_msg->");
555 format.Set("this", "typed_msg");
556 format.Set("has_bits", "typed_msg->_has_bits_");
557 format.Set("next_tag", "goto next_tag");
558 GenerateParseIterationBody(format, descriptor_,
559 tc_table_info_->fallback_fields);
560
561 format.Outdent();
562 format(
563 "next_tag:\n"
564 "message_done:\n"
565 " return ptr;\n"
566 "#undef CHK_\n"
567 "}\n");
568 }
569
570 struct SkipEntry16 {
571 uint16_t skipmap;
572 uint16_t field_entry_offset;
573 };
574 struct SkipEntryBlock {
575 uint32_t first_fnum;
576 std::vector<SkipEntry16> entries;
577 };
578 struct NumToEntryTable {
579 uint32_t skipmap32; // for fields #1 - #32
580 std::vector<SkipEntryBlock> blocks;
581 // Compute the number of uint16_t required to represent this table.
size16google::protobuf::compiler::cpp::NumToEntryTable582 int size16() const {
583 int size = 2; // for the termination field#
584 for (const auto& block : blocks) {
585 // 2 for the field#, 1 for a count of skip entries, 2 for each entry.
586 size += 3 + block.entries.size() * 2;
587 }
588 return size;
589 }
590 };
591
592 static NumToEntryTable MakeNumToEntryTable(
593 const std::vector<const FieldDescriptor*>& field_descriptors);
594
GenerateDataDecls(io::Printer * printer)595 void ParseFunctionGenerator::GenerateDataDecls(io::Printer* printer) {
596 if (!should_generate_tctable()) {
597 return;
598 }
599 Formatter format(printer, variables_);
600 if (should_generate_guarded_tctable()) {
601 format.Outdent();
602 format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
603 format.Indent();
604 }
605 auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_);
606 format(
607 "static const ::$proto_ns$::internal::"
608 "TcParseTable<$1$, $2$, $3$, $4$, $5$> _table_;\n",
609 tc_table_info_->table_size_log2, ordered_fields_.size(),
610 tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(),
611 field_num_to_entry_table.size16());
612 if (should_generate_guarded_tctable()) {
613 format.Outdent();
614 format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
615 format.Indent();
616 }
617 }
618
GenerateDataDefinitions(io::Printer * printer)619 void ParseFunctionGenerator::GenerateDataDefinitions(io::Printer* printer) {
620 if (!should_generate_tctable()) {
621 return;
622 }
623 Formatter format(printer, variables_);
624 if (should_generate_guarded_tctable()) {
625 format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
626 }
627 GenerateTailCallTable(format);
628 if (should_generate_guarded_tctable()) {
629 format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
630 }
631 }
632
GenerateLoopingParseFunction(Formatter & format)633 void ParseFunctionGenerator::GenerateLoopingParseFunction(Formatter& format) {
634 format(
635 "const char* $classname$::_InternalParse(const char* ptr, "
636 "::_pbi::ParseContext* ctx) {\n"
637 "$annotate_deserialize$"
638 "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
639 format.Indent();
640 format.Set("msg", "");
641 format.Set("this", "this");
642 int hasbits_size = 0;
643 if (num_hasbits_ > 0) {
644 hasbits_size = (num_hasbits_ + 31) / 32;
645 }
646 // For now only optimize small hasbits.
647 if (hasbits_size != 1) hasbits_size = 0;
648 if (hasbits_size) {
649 format("_Internal::HasBits has_bits{};\n");
650 format.Set("has_bits", "has_bits");
651 } else {
652 format.Set("has_bits", "_has_bits_");
653 }
654 format.Set("next_tag", "continue");
655 format("while (!ctx->Done(&ptr)) {\n");
656 format.Indent();
657
658 format(
659 "uint32_t tag;\n"
660 "ptr = ::_pbi::ReadTag(ptr, &tag);\n");
661 GenerateParseIterationBody(format, descriptor_, ordered_fields_);
662
663 format.Outdent();
664 format("} // while\n");
665
666 format.Outdent();
667 format("message_done:\n");
668 if (hasbits_size) format(" _has_bits_.Or(has_bits);\n");
669
670 format(
671 " return ptr;\n"
672 "failure:\n"
673 " ptr = nullptr;\n"
674 " goto message_done;\n"
675 "#undef CHK_\n"
676 "}\n");
677 }
678
MakeNumToEntryTable(const std::vector<const FieldDescriptor * > & field_descriptors)679 static NumToEntryTable MakeNumToEntryTable(
680 const std::vector<const FieldDescriptor*>& field_descriptors) {
681 NumToEntryTable num_to_entry_table;
682 num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1);
683
684 // skip_entry_block is the current block of SkipEntries that we're
685 // appending to. cur_block_first_fnum is the number of the first
686 // field represented by the block.
687 uint16_t field_entry_index = 0;
688 uint16_t N = field_descriptors.size();
689 // First, handle field numbers 1-32, which affect only the initial
690 // skipmap32 and don't generate additional skip-entry blocks.
691 for (; field_entry_index != N; ++field_entry_index) {
692 auto* field_descriptor = field_descriptors[field_entry_index];
693 if (field_descriptor->number() > 32) break;
694 auto skipmap32_index = field_descriptor->number() - 1;
695 num_to_entry_table.skipmap32 -= 1 << skipmap32_index;
696 }
697 // If all the field numbers were less than or equal to 32, we will have
698 // no further entries to process, and we are already done.
699 if (field_entry_index == N) return num_to_entry_table;
700
701 SkipEntryBlock* block = nullptr;
702 bool start_new_block = true;
703 // To determine sparseness, track the field number corresponding to
704 // the start of the most recent skip entry.
705 uint32_t last_skip_entry_start = 0;
706 for (; field_entry_index != N; ++field_entry_index) {
707 auto* field_descriptor = field_descriptors[field_entry_index];
708 uint32_t fnum = field_descriptor->number();
709 GOOGLE_CHECK_GT(fnum, last_skip_entry_start);
710 if (start_new_block == false) {
711 // If the next field number is within 15 of the last_skip_entry_start, we
712 // continue writing just to that entry. If it's between 16 and 31 more,
713 // then we just extend the current block by one. If it's more than 31
714 // more, we have to add empty skip entries in order to continue using the
715 // existing block. Obviously it's just 32 more, it doesn't make sense to
716 // start a whole new block, since new blocks mean having to write out
717 // their starting field number, which is 32 bits, as well as the size of
718 // the additional block, which is 16... while an empty SkipEntry16 only
719 // costs 32 bits. So if it was 48 more, it's a slight space win; we save
720 // 16 bits, but probably at the cost of slower run time. We're choosing
721 // 96 for now.
722 if (fnum - last_skip_entry_start > 96) start_new_block = true;
723 }
724 if (start_new_block) {
725 num_to_entry_table.blocks.push_back(SkipEntryBlock{fnum});
726 block = &num_to_entry_table.blocks.back();
727 start_new_block = false;
728 }
729
730 auto skip_entry_num = (fnum - block->first_fnum) / 16;
731 auto skip_entry_index = (fnum - block->first_fnum) % 16;
732 while (skip_entry_num >= block->entries.size())
733 block->entries.push_back({0xFFFF, field_entry_index});
734 block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index);
735
736 last_skip_entry_start = fnum - skip_entry_index;
737 }
738 return num_to_entry_table;
739 }
740
GenerateTailCallTable(Formatter & format)741 void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) {
742 GOOGLE_CHECK(should_generate_tctable());
743 // All entries without a fast-path parsing function need a fallback.
744 std::string fallback;
745 if (tc_table_info_->use_generated_fallback) {
746 fallback = ClassName(descriptor_) + "::Tct_ParseFallback";
747 } else {
748 fallback = "::_pbi::TcParser::GenericFallback";
749 if (GetOptimizeFor(descriptor_->file(), options_) ==
750 FileOptions::LITE_RUNTIME) {
751 fallback += "Lite";
752 }
753 }
754
755 // For simplicity and speed, the table is not covering all proto
756 // configurations. This model uses a fallback to cover all situations that
757 // the table can't accommodate, together with unknown fields or extensions.
758 // These are number of fields over 32, fields with 3 or more tag bytes,
759 // maps, weak fields, lazy, more than 1 extension range. In the cases
760 // the table is sufficient we can use a generic routine, that just handles
761 // unknown fields and potentially an extension range.
762 auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_);
763 format(
764 "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1\n"
765 "const ::_pbi::TcParseTable<$1$, $2$, $3$, $4$, $5$> "
766 "$classname$::_table_ = "
767 "{\n",
768 tc_table_info_->table_size_log2, ordered_fields_.size(),
769 tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(),
770 field_num_to_entry_table.size16());
771 {
772 auto table_scope = format.ScopedIndent();
773 format("{\n");
774 {
775 auto header_scope = format.ScopedIndent();
776 if (num_hasbits_ > 0 || IsMapEntryMessage(descriptor_)) {
777 format("PROTOBUF_FIELD_OFFSET($classname$, _has_bits_),\n");
778 } else {
779 format("0, // no _has_bits_\n");
780 }
781 if (descriptor_->extension_range_count() == 1) {
782 format(
783 "PROTOBUF_FIELD_OFFSET($classname$, $extensions$),\n"
784 "$1$, $2$, // extension_range_{low,high}\n",
785 descriptor_->extension_range(0)->start,
786 descriptor_->extension_range(0)->end);
787 } else {
788 format("0, 0, 0, // no _extensions_\n");
789 }
790 format("$1$, $2$, // max_field_number, fast_idx_mask\n",
791 (ordered_fields_.empty() ? 0 : ordered_fields_.back()->number()),
792 (((1 << tc_table_info_->table_size_log2) - 1) << 3));
793 format(
794 "offsetof(decltype(_table_), field_lookup_table),\n"
795 "$1$, // skipmap\n",
796 field_num_to_entry_table.skipmap32);
797 if (ordered_fields_.empty()) {
798 format(
799 "offsetof(decltype(_table_), field_names), // no field_entries\n");
800 } else {
801 format("offsetof(decltype(_table_), field_entries),\n");
802 }
803
804 format(
805 "$1$, // num_field_entries\n"
806 "$2$, // num_aux_entries\n",
807 ordered_fields_.size(), tc_table_info_->aux_entries.size());
808 if (tc_table_info_->aux_entries.empty()) {
809 format(
810 "offsetof(decltype(_table_), field_names), // no aux_entries\n");
811 } else {
812 format("offsetof(decltype(_table_), aux_entries),\n");
813 }
814 format(
815 "&$1$._instance,\n"
816 "$2$, // fallback\n"
817 "",
818 DefaultInstanceName(descriptor_, options_), fallback);
819 }
820 format("}, {{\n");
821 {
822 // fast_entries[]
823 auto fast_scope = format.ScopedIndent();
824 GenerateFastFieldEntries(format);
825 }
826 format("}}, {{\n");
827 {
828 // field_lookup_table[]
829 auto field_lookup_scope = format.ScopedIndent();
830 int line_entries = 0;
831 for (int i = 0, N = field_num_to_entry_table.blocks.size(); i < N; ++i) {
832 SkipEntryBlock& entry_block = field_num_to_entry_table.blocks[i];
833 format("$1$, $2$, $3$,\n", entry_block.first_fnum & 65535,
834 entry_block.first_fnum / 65536, entry_block.entries.size());
835 for (auto se16 : entry_block.entries) {
836 if (line_entries == 0) {
837 format("$1$, $2$,", se16.skipmap, se16.field_entry_offset);
838 ++line_entries;
839 } else if (line_entries < 5) {
840 format(" $1$, $2$,", se16.skipmap, se16.field_entry_offset);
841 ++line_entries;
842 } else {
843 format(" $1$, $2$,\n", se16.skipmap, se16.field_entry_offset);
844 line_entries = 0;
845 }
846 }
847 }
848 if (line_entries) format("\n");
849 format("65535, 65535\n");
850 }
851 if (ordered_fields_.empty()) {
852 GOOGLE_LOG_IF(DFATAL, !tc_table_info_->aux_entries.empty())
853 << "Invalid message: " << descriptor_->full_name() << " has "
854 << tc_table_info_->aux_entries.size()
855 << " auxiliary field entries, but no fields";
856 format(
857 "}},\n"
858 "// no field_entries, or aux_entries\n"
859 "{{\n");
860 } else {
861 format("}}, {{\n");
862 {
863 // field_entries[]
864 auto field_scope = format.ScopedIndent();
865 GenerateFieldEntries(format);
866 }
867 if (tc_table_info_->aux_entries.empty()) {
868 format(
869 "}},\n"
870 "// no aux_entries\n"
871 "{{\n");
872 } else {
873 format("}}, {{\n");
874 {
875 // aux_entries[]
876 auto aux_scope = format.ScopedIndent();
877 for (const std::string& aux_entry : tc_table_info_->aux_entries) {
878 format("{$1$},\n", aux_entry);
879 }
880 }
881 format("}}, {{\n");
882 }
883 } // ordered_fields_.empty()
884 {
885 // field_names[]
886 auto field_name_scope = format.ScopedIndent();
887 GenerateFieldNames(format);
888 }
889 format("}},\n");
890 }
891 format("};\n\n"); // _table_
892 }
893
GenerateFastFieldEntries(Formatter & format)894 void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) {
895 for (const auto& info : tc_table_info_->fast_path_fields) {
896 if (info.field != nullptr) {
897 PrintFieldComment(format, info.field);
898 }
899 if (info.func_name.empty()) {
900 format("{::_pbi::TcParser::MiniParse, {}},\n");
901 } else {
902 format(
903 "{$1$,\n"
904 " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$, $5$)}},\n",
905 info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx,
906 FieldMemberName(info.field));
907 }
908 }
909 }
910
FormatFieldKind(Formatter & format,const TailCallTableInfo::FieldEntryInfo & entry,const Options & options,MessageSCCAnalyzer * scc_analyzer)911 static void FormatFieldKind(Formatter& format,
912 const TailCallTableInfo::FieldEntryInfo& entry,
913 const Options& options,
914 MessageSCCAnalyzer* scc_analyzer) {
915 const FieldDescriptor* field = entry.field;
916 // Spell the field kind in proto language declaration order, starting with
917 // cardinality:
918 format("(::_fl::kFc");
919 if (HasHasbit(field)) {
920 format("Optional");
921 } else if (field->is_repeated()) {
922 format("Repeated");
923 } else if (field->real_containing_oneof()) {
924 format("Oneof");
925 } else {
926 format("Singular");
927 }
928
929 // The rest of the type uses convenience aliases:
930 format(" | ::_fl::k");
931 if (field->is_repeated() && field->is_packed()) {
932 format("Packed");
933 }
934 switch (field->type()) {
935 case FieldDescriptor::TYPE_DOUBLE:
936 format("Double");
937 break;
938 case FieldDescriptor::TYPE_FLOAT:
939 format("Float");
940 break;
941 case FieldDescriptor::TYPE_FIXED32:
942 format("Fixed32");
943 break;
944 case FieldDescriptor::TYPE_SFIXED32:
945 format("SFixed32");
946 break;
947 case FieldDescriptor::TYPE_FIXED64:
948 format("Fixed64");
949 break;
950 case FieldDescriptor::TYPE_SFIXED64:
951 format("SFixed64");
952 break;
953 case FieldDescriptor::TYPE_BOOL:
954 format("Bool");
955 break;
956 case FieldDescriptor::TYPE_ENUM:
957 if (HasPreservingUnknownEnumSemantics(field)) {
958 // No validation is required.
959 format("OpenEnum");
960 } else if (entry.is_enum_range) {
961 // Validation is done by range check (start/length in FieldAux).
962 format("EnumRange");
963 } else {
964 // Validation uses the generated _IsValid function.
965 format("Enum");
966 }
967 break;
968 case FieldDescriptor::TYPE_UINT32:
969 format("UInt32");
970 break;
971 case FieldDescriptor::TYPE_SINT32:
972 format("SInt32");
973 break;
974 case FieldDescriptor::TYPE_INT32:
975 format("Int32");
976 break;
977 case FieldDescriptor::TYPE_UINT64:
978 format("UInt64");
979 break;
980 case FieldDescriptor::TYPE_SINT64:
981 format("SInt64");
982 break;
983 case FieldDescriptor::TYPE_INT64:
984 format("Int64");
985 break;
986
987 case FieldDescriptor::TYPE_BYTES:
988 format("Bytes");
989 break;
990 case FieldDescriptor::TYPE_STRING: {
991 auto mode = GetUtf8CheckMode(field, options);
992 switch (mode) {
993 case Utf8CheckMode::kStrict:
994 format("Utf8String");
995 break;
996 case Utf8CheckMode::kVerify:
997 format("RawString");
998 break;
999 case Utf8CheckMode::kNone:
1000 // Treat LITE_RUNTIME strings as bytes.
1001 format("Bytes");
1002 break;
1003 default:
1004 GOOGLE_LOG(FATAL) << "Invalid Utf8CheckMode (" << static_cast<int>(mode)
1005 << ") for " << field->DebugString();
1006 }
1007 break;
1008 }
1009
1010 case FieldDescriptor::TYPE_GROUP:
1011 format("Message | ::_fl::kRepGroup");
1012 break;
1013 case FieldDescriptor::TYPE_MESSAGE:
1014 if (field->is_map()) {
1015 format("Map");
1016 } else {
1017 format("Message");
1018 if (IsLazy(field, options, scc_analyzer)) {
1019 format(" | ::_fl::kRepLazy");
1020 } else if (IsImplicitWeakField(field, options, scc_analyzer)) {
1021 format(" | ::_fl::kRepIWeak");
1022 }
1023 }
1024 break;
1025 }
1026
1027 // Fill in extra information about string and bytes field representations.
1028 if (field->type() == FieldDescriptor::TYPE_BYTES ||
1029 field->type() == FieldDescriptor::TYPE_STRING) {
1030 if (field->is_repeated()) {
1031 format(" | ::_fl::kRepSString");
1032 } else {
1033 format(" | ::_fl::kRepAString");
1034 }
1035 }
1036
1037 format(")");
1038 }
1039
GenerateFieldEntries(Formatter & format)1040 void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) {
1041 for (const auto& entry : tc_table_info_->field_entries) {
1042 const FieldDescriptor* field = entry.field;
1043 PrintFieldComment(format, field);
1044 format("{");
1045 if (IsWeak(field, options_)) {
1046 // Weak fields are handled by the generated fallback function.
1047 // (These are handled by legacy Google-internal logic.)
1048 format("/* weak */ 0, 0, 0, 0");
1049 } else {
1050 const OneofDescriptor* oneof = field->real_containing_oneof();
1051 format("PROTOBUF_FIELD_OFFSET($classname$, $1$), $2$, $3$,\n ",
1052 FieldMemberName(field),
1053 (oneof ? oneof->index() : entry.hasbit_idx), entry.aux_idx);
1054 FormatFieldKind(format, entry, options_, scc_analyzer_);
1055 }
1056 format("},\n");
1057 }
1058 }
1059
1060 static constexpr int kMaxNameLength = 255;
1061
CalculateFieldNamesSize() const1062 int ParseFunctionGenerator::CalculateFieldNamesSize() const {
1063 // The full name of the message appears first.
1064 int size = std::min(static_cast<int>(descriptor_->full_name().size()),
1065 kMaxNameLength);
1066 int lengths_size = 1;
1067 for (const auto& entry : tc_table_info_->field_entries) {
1068 const FieldDescriptor* field = entry.field;
1069 GOOGLE_CHECK_LE(field->name().size(), kMaxNameLength);
1070 size += field->name().size();
1071 lengths_size += 1;
1072 }
1073 // align to an 8-byte boundary
1074 lengths_size = (lengths_size + 7) & -8;
1075 return size + lengths_size + 1;
1076 }
1077
FormatOctal(Formatter & format,int size)1078 static void FormatOctal(Formatter& format, int size) {
1079 int octal_size = ((size >> 6) & 3) * 100 + //
1080 ((size >> 3) & 7) * 10 + //
1081 ((size >> 0) & 7);
1082 format("\\$1$", octal_size);
1083 }
1084
GenerateFieldNames(Formatter & format)1085 void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) {
1086 // First, we output the size of each string, as an unsigned byte. The first
1087 // string is the message name.
1088 int count = 1;
1089 format("\"");
1090 FormatOctal(format,
1091 std::min(static_cast<int>(descriptor_->full_name().size()), 255));
1092 for (const auto& entry : tc_table_info_->field_entries) {
1093 FormatOctal(format, entry.field->name().size());
1094 ++count;
1095 }
1096 while (count & 7) { // align to an 8-byte boundary
1097 format("\\0");
1098 ++count;
1099 }
1100 format("\"\n");
1101 // The message name is stored at the beginning of the string
1102 std::string message_name = descriptor_->full_name();
1103 if (message_name.size() > kMaxNameLength) {
1104 static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2;
1105 message_name = StrCat(
1106 message_name.substr(0, kNameHalfLength), "...",
1107 message_name.substr(message_name.size() - kNameHalfLength));
1108 }
1109 format("\"$1$\"\n", message_name);
1110 // Then we output the actual field names
1111 for (const auto& entry : tc_table_info_->field_entries) {
1112 const FieldDescriptor* field = entry.field;
1113 format("\"$1$\"\n", field->name());
1114 }
1115 }
1116
GenerateArenaString(Formatter & format,const FieldDescriptor * field)1117 void ParseFunctionGenerator::GenerateArenaString(Formatter& format,
1118 const FieldDescriptor* field) {
1119 if (HasHasbit(field)) {
1120 format("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1121 }
1122 format(
1123 "if (arena != nullptr) {\n"
1124 " ptr = ctx->ReadArenaString(ptr, &$msg$$field$, arena");
1125 if (IsStringInlined(field, options_)) {
1126 GOOGLE_DCHECK(!inlined_string_indices_.empty());
1127 int inlined_string_index = inlined_string_indices_[field->index()];
1128 GOOGLE_DCHECK_GT(inlined_string_index, 0);
1129 format(", &$msg$$inlined_string_donated_array$[0], $1$, $this$",
1130 inlined_string_index);
1131 } else {
1132 GOOGLE_DCHECK(field->default_value_string().empty());
1133 }
1134 format(
1135 ");\n"
1136 "} else {\n"
1137 " ptr = ::_pbi::InlineGreedyStringParser("
1138 "$msg$$field$.MutableNoCopy(nullptr), ptr, ctx);\n"
1139 "}\n"
1140 "const std::string* str = &$msg$$field$.Get(); (void)str;\n");
1141 }
1142
GenerateStrings(Formatter & format,const FieldDescriptor * field,bool check_utf8)1143 void ParseFunctionGenerator::GenerateStrings(Formatter& format,
1144 const FieldDescriptor* field,
1145 bool check_utf8) {
1146 FieldOptions::CType ctype = FieldOptions::STRING;
1147 if (!options_.opensource_runtime) {
1148 // Open source doesn't support other ctypes;
1149 ctype = field->options().ctype();
1150 }
1151 if (!field->is_repeated() && !options_.opensource_runtime &&
1152 GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1153 // For now only use arena string for strings with empty defaults.
1154 field->default_value_string().empty() &&
1155 !field->real_containing_oneof() && ctype == FieldOptions::STRING) {
1156 GenerateArenaString(format, field);
1157 } else {
1158 std::string parser_name;
1159 switch (ctype) {
1160 case FieldOptions::STRING:
1161 parser_name = "GreedyStringParser";
1162 break;
1163 case FieldOptions::CORD:
1164 parser_name = "CordParser";
1165 break;
1166 case FieldOptions::STRING_PIECE:
1167 parser_name = "StringPieceParser";
1168 break;
1169 }
1170 format(
1171 "auto str = $msg$$1$$2$_$name$();\n"
1172 "ptr = ::_pbi::Inline$3$(str, ptr, ctx);\n",
1173 HasInternalAccessors(ctype) ? "_internal_" : "",
1174 field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1175 parser_name);
1176 }
1177 // It is intentionally placed before VerifyUTF8 because it doesn't make sense
1178 // to verify UTF8 when we already know parsing failed.
1179 format("CHK_(ptr);\n");
1180 if (!check_utf8) return; // return if this is a bytes field
1181 auto level = GetUtf8CheckMode(field, options_);
1182 switch (level) {
1183 case Utf8CheckMode::kNone:
1184 return;
1185 case Utf8CheckMode::kVerify:
1186 format("#ifndef NDEBUG\n");
1187 break;
1188 case Utf8CheckMode::kStrict:
1189 format("CHK_(");
1190 break;
1191 }
1192 std::string field_name;
1193 field_name = "nullptr";
1194 if (HasDescriptorMethods(field->file(), options_)) {
1195 field_name = StrCat("\"", field->full_name(), "\"");
1196 }
1197 format("::_pbi::VerifyUTF8(str, $1$)", field_name);
1198 switch (level) {
1199 case Utf8CheckMode::kNone:
1200 return;
1201 case Utf8CheckMode::kVerify:
1202 format(
1203 ";\n"
1204 "#endif // !NDEBUG\n");
1205 break;
1206 case Utf8CheckMode::kStrict:
1207 format(");\n");
1208 break;
1209 }
1210 }
1211
GenerateLengthDelim(Formatter & format,const FieldDescriptor * field)1212 void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format,
1213 const FieldDescriptor* field) {
1214 if (field->is_packable()) {
1215 if (field->type() == FieldDescriptor::TYPE_ENUM &&
1216 !HasPreservingUnknownEnumSemantics(field)) {
1217 std::string enum_type = QualifiedClassName(field->enum_type(), options_);
1218 format(
1219 "ptr = "
1220 "::$proto_ns$::internal::Packed$1$Parser<$unknown_fields_type$>("
1221 "$msg$_internal_mutable_$name$(), ptr, ctx, $2$_IsValid, "
1222 "&$msg$_internal_metadata_, $3$);\n",
1223 DeclaredTypeMethodName(field->type()), enum_type, field->number());
1224 } else {
1225 format(
1226 "ptr = ::$proto_ns$::internal::Packed$1$Parser("
1227 "$msg$_internal_mutable_$name$(), ptr, ctx);\n",
1228 DeclaredTypeMethodName(field->type()));
1229 }
1230 format("CHK_(ptr);\n");
1231 } else {
1232 auto field_type = field->type();
1233 switch (field_type) {
1234 case FieldDescriptor::TYPE_STRING:
1235 GenerateStrings(format, field, true /* utf8 */);
1236 break;
1237 case FieldDescriptor::TYPE_BYTES:
1238 GenerateStrings(format, field, false /* utf8 */);
1239 break;
1240 case FieldDescriptor::TYPE_MESSAGE: {
1241 if (field->is_map()) {
1242 const FieldDescriptor* val = field->message_type()->map_value();
1243 GOOGLE_CHECK(val);
1244 if (val->type() == FieldDescriptor::TYPE_ENUM &&
1245 !HasPreservingUnknownEnumSemantics(field)) {
1246 format(
1247 "auto object = "
1248 "::$proto_ns$::internal::InitEnumParseWrapper<"
1249 "$unknown_fields_type$>(&$msg$$field$, $1$_IsValid, "
1250 "$2$, &$msg$_internal_metadata_);\n"
1251 "ptr = ctx->ParseMessage(&object, ptr);\n",
1252 QualifiedClassName(val->enum_type(), options_),
1253 field->number());
1254 } else {
1255 format("ptr = ctx->ParseMessage(&$msg$$field$, ptr);\n");
1256 }
1257 } else if (IsLazy(field, options_, scc_analyzer_)) {
1258 bool eager_verify =
1259 IsEagerlyVerifiedLazy(field, options_, scc_analyzer_);
1260 if (ShouldVerify(descriptor_, options_, scc_analyzer_)) {
1261 format(
1262 "ctx->set_lazy_eager_verify_func($1$);\n",
1263 eager_verify
1264 ? StrCat("&", ClassName(field->message_type(), true),
1265 "::InternalVerify")
1266 : "nullptr");
1267 }
1268 if (field->real_containing_oneof()) {
1269 format(
1270 "if (!$msg$_internal_has_$name$()) {\n"
1271 " $msg$clear_$1$();\n"
1272 " $msg$$field$ = ::$proto_ns$::Arena::CreateMessage<\n"
1273 " ::$proto_ns$::internal::LazyField>("
1274 "$msg$GetArenaForAllocation());\n"
1275 " $msg$set_has_$name$();\n"
1276 "}\n"
1277 "auto* lazy_field = $msg$$field$;\n",
1278 field->containing_oneof()->name());
1279 } else if (HasHasbit(field)) {
1280 format(
1281 "_Internal::set_has_$name$(&$has_bits$);\n"
1282 "auto* lazy_field = &$msg$$field$;\n");
1283 } else {
1284 format("auto* lazy_field = &$msg$$field$;\n");
1285 }
1286 format(
1287 "::$proto_ns$::internal::LazyFieldParseHelper<\n"
1288 " ::$proto_ns$::internal::LazyField> parse_helper(\n"
1289 " $1$::default_instance(),\n"
1290 " $msg$GetArenaForAllocation(),\n"
1291 " ::google::protobuf::internal::LazyVerifyOption::$2$,\n"
1292 " lazy_field);\n"
1293 "ptr = ctx->ParseMessage(&parse_helper, ptr);\n",
1294 FieldMessageTypeName(field, options_),
1295 eager_verify ? "kEager" : "kLazy");
1296 if (ShouldVerify(descriptor_, options_, scc_analyzer_) &&
1297 eager_verify) {
1298 format("ctx->set_lazy_eager_verify_func(nullptr);\n");
1299 }
1300 } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1301 if (!field->is_repeated()) {
1302 format(
1303 "ptr = ctx->ParseMessage(_Internal::mutable_$name$($this$), "
1304 "ptr);\n");
1305 } else {
1306 format(
1307 "ptr = ctx->ParseMessage($msg$$field$.AddWeak("
1308 "reinterpret_cast<const ::$proto_ns$::MessageLite*>($1$ptr_)"
1309 "), ptr);\n",
1310 QualifiedDefaultInstanceName(field->message_type(), options_));
1311 }
1312 } else if (IsWeak(field, options_)) {
1313 format(
1314 "{\n"
1315 " auto* default_ = &reinterpret_cast<const Message&>($1$);\n"
1316 " ptr = ctx->ParseMessage($msg$$weak_field_map$.MutableMessage("
1317 "$2$, default_), ptr);\n"
1318 "}\n",
1319 QualifiedDefaultInstanceName(field->message_type(), options_),
1320 field->number());
1321 } else {
1322 format(
1323 "ptr = ctx->ParseMessage($msg$_internal_$mutable_field$(), "
1324 "ptr);\n");
1325 }
1326 format("CHK_(ptr);\n");
1327 break;
1328 }
1329 default:
1330 GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1331 << " filed type is " << field->type();
1332 }
1333 }
1334 }
1335
ShouldRepeat(const FieldDescriptor * descriptor,WireFormatLite::WireType wiretype)1336 static bool ShouldRepeat(const FieldDescriptor* descriptor,
1337 WireFormatLite::WireType wiretype) {
1338 constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1339 return descriptor->number() < kMaxTwoByteFieldNumber &&
1340 descriptor->is_repeated() &&
1341 (!descriptor->is_packable() ||
1342 wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1343 }
1344
GenerateFieldBody(Formatter & format,WireFormatLite::WireType wiretype,const FieldDescriptor * field)1345 void ParseFunctionGenerator::GenerateFieldBody(
1346 Formatter& format, WireFormatLite::WireType wiretype,
1347 const FieldDescriptor* field) {
1348 Formatter::SaveState formatter_state(&format);
1349 format.AddMap(
1350 {{"name", FieldName(field)},
1351 {"primitive_type", PrimitiveTypeName(options_, field->cpp_type())}});
1352 if (field->is_repeated()) {
1353 format.AddMap({{"put_field", StrCat("add_", FieldName(field))},
1354 {"mutable_field", StrCat("add_", FieldName(field))}});
1355 } else {
1356 format.AddMap(
1357 {{"put_field", StrCat("set_", FieldName(field))},
1358 {"mutable_field", StrCat("mutable_", FieldName(field))}});
1359 }
1360 uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype);
1361 switch (wiretype) {
1362 case WireFormatLite::WIRETYPE_VARINT: {
1363 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1364 if (field->type() == FieldDescriptor::TYPE_ENUM) {
1365 format.Set("enum_type",
1366 QualifiedClassName(field->enum_type(), options_));
1367 format(
1368 "$uint64$ val = ::$proto_ns$::internal::ReadVarint64(&ptr);\n"
1369 "CHK_(ptr);\n");
1370 if (!HasPreservingUnknownEnumSemantics(field)) {
1371 format("if (PROTOBUF_PREDICT_TRUE($enum_type$_IsValid(val))) {\n");
1372 format.Indent();
1373 }
1374 format("$msg$_internal_$put_field$(static_cast<$enum_type$>(val));\n");
1375 if (!HasPreservingUnknownEnumSemantics(field)) {
1376 format.Outdent();
1377 format(
1378 "} else {\n"
1379 " ::$proto_ns$::internal::WriteVarint("
1380 "$1$, val, $msg$mutable_unknown_fields());\n"
1381 "}\n",
1382 field->number());
1383 }
1384 } else {
1385 std::string size = (field->type() == FieldDescriptor::TYPE_INT32 ||
1386 field->type() == FieldDescriptor::TYPE_SINT32 ||
1387 field->type() == FieldDescriptor::TYPE_UINT32)
1388 ? "32"
1389 : "64";
1390 std::string zigzag;
1391 if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1392 field->type() == FieldDescriptor::TYPE_SINT64)) {
1393 zigzag = "ZigZag";
1394 }
1395 if (field->is_repeated() || field->real_containing_oneof()) {
1396 format(
1397 "$msg$_internal_$put_field$("
1398 "::$proto_ns$::internal::ReadVarint$1$$2$(&ptr));\n"
1399 "CHK_(ptr);\n",
1400 zigzag, size);
1401 } else {
1402 if (HasHasbit(field)) {
1403 format("_Internal::set_has_$name$(&$has_bits$);\n");
1404 }
1405 format(
1406 "$msg$$field$ = ::$proto_ns$::internal::ReadVarint$1$$2$(&ptr);\n"
1407 "CHK_(ptr);\n",
1408 zigzag, size);
1409 }
1410 }
1411 break;
1412 }
1413 case WireFormatLite::WIRETYPE_FIXED32:
1414 case WireFormatLite::WIRETYPE_FIXED64: {
1415 if (field->is_repeated() || field->real_containing_oneof()) {
1416 format(
1417 "$msg$_internal_$put_field$("
1418 "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr));\n"
1419 "ptr += sizeof($primitive_type$);\n");
1420 } else {
1421 if (HasHasbit(field)) {
1422 format("_Internal::set_has_$name$(&$has_bits$);\n");
1423 }
1424 format(
1425 "$msg$$field$ = "
1426 "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr);\n"
1427 "ptr += sizeof($primitive_type$);\n");
1428 }
1429 break;
1430 }
1431 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1432 GenerateLengthDelim(format, field);
1433 break;
1434 }
1435 case WireFormatLite::WIRETYPE_START_GROUP: {
1436 format(
1437 "ptr = ctx->ParseGroup($msg$_internal_$mutable_field$(), ptr, $1$);\n"
1438 "CHK_(ptr);\n",
1439 tag);
1440 break;
1441 }
1442 case WireFormatLite::WIRETYPE_END_GROUP: {
1443 GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1444 break;
1445 }
1446 } // switch (wire_type)
1447 }
1448
1449 // Returns the tag for this field and in case of repeated packable fields,
1450 // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32_t * fallback_tag_ptr)1451 static uint32_t ExpectedTag(const FieldDescriptor* field,
1452 uint32_t* fallback_tag_ptr) {
1453 uint32_t expected_tag;
1454 if (field->is_packable()) {
1455 auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1456 expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype);
1457 GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1458 auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1459 uint32_t fallback_tag =
1460 WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1461
1462 if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1463 *fallback_tag_ptr = fallback_tag;
1464 } else {
1465 auto expected_wiretype = WireFormat::WireTypeForField(field);
1466 expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype);
1467 }
1468 return expected_tag;
1469 }
1470
1471 // These variables are used by the generated parse iteration, and must already
1472 // be defined in the generated code:
1473 // - `const char* ptr`: the input buffer.
1474 // - `ParseContext* ctx`: the associated context for `ptr`.
1475 // - implicit `this`: i.e., we must be in a non-static member function.
1476 //
1477 // The macro `CHK_(x)` must be defined. It should return an error condition if
1478 // the macro parameter is false.
1479 //
1480 // Whenever an END_GROUP tag was read, or tag 0 was read, the generated code
1481 // branches to the label `message_done`.
1482 //
1483 // These formatter variables are used:
1484 // - `next_tag`: a single statement to begin parsing the next tag.
1485 //
1486 // At the end of the generated code, the enclosing function should proceed to
1487 // parse the next tag in the stream.
GenerateParseIterationBody(Formatter & format,const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & fields)1488 void ParseFunctionGenerator::GenerateParseIterationBody(
1489 Formatter& format, const Descriptor* descriptor,
1490 const std::vector<const FieldDescriptor*>& fields) {
1491 if (!fields.empty()) {
1492 GenerateFieldSwitch(format, fields);
1493 // Each field `case` only considers field number. Field numbers that are
1494 // not defined in the message, or tags with an incompatible wire type, are
1495 // considered "unusual" cases. They will be handled by the logic below.
1496 format.Outdent();
1497 format("handle_unusual:\n");
1498 format.Indent();
1499 }
1500
1501 // Unusual/extension/unknown case:
1502 format(
1503 "if ((tag == 0) || ((tag & 7) == 4)) {\n"
1504 " CHK_(ptr);\n"
1505 " ctx->SetLastTag(tag);\n"
1506 " goto message_done;\n"
1507 "}\n");
1508 if (IsMapEntryMessage(descriptor)) {
1509 format("$next_tag$;\n");
1510 } else {
1511 if (descriptor->extension_range_count() > 0) {
1512 format("if (");
1513 for (int i = 0; i < descriptor->extension_range_count(); i++) {
1514 const Descriptor::ExtensionRange* range =
1515 descriptor->extension_range(i);
1516 if (i > 0) format(" ||\n ");
1517
1518 uint32_t start_tag = WireFormatLite::MakeTag(
1519 range->start, static_cast<WireFormatLite::WireType>(0));
1520 uint32_t end_tag = WireFormatLite::MakeTag(
1521 range->end, static_cast<WireFormatLite::WireType>(0));
1522
1523 if (range->end > FieldDescriptor::kMaxNumber) {
1524 format("($1$u <= tag)", start_tag);
1525 } else {
1526 format("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1527 }
1528 }
1529 format(
1530 ") {\n"
1531 " ptr = $msg$$extensions$.ParseField(tag, ptr, "
1532 "internal_default_instance(), &$msg$_internal_metadata_, ctx);\n"
1533 " CHK_(ptr != nullptr);\n"
1534 " $next_tag$;\n"
1535 "}\n");
1536 }
1537 format(
1538 "ptr = UnknownFieldParse(\n"
1539 " tag,\n"
1540 " $msg$_internal_metadata_.mutable_unknown_fields<"
1541 "$unknown_fields_type$>(),\n"
1542 " ptr, ctx);\n"
1543 "CHK_(ptr != nullptr);\n");
1544 }
1545 }
1546
GenerateFieldSwitch(Formatter & format,const std::vector<const FieldDescriptor * > & fields)1547 void ParseFunctionGenerator::GenerateFieldSwitch(
1548 Formatter& format, const std::vector<const FieldDescriptor*>& fields) {
1549 format("switch (tag >> 3) {\n");
1550 format.Indent();
1551
1552 for (const auto* field : fields) {
1553 format.Set("field", FieldMemberName(field));
1554 PrintFieldComment(format, field);
1555 format("case $1$:\n", field->number());
1556 format.Indent();
1557 uint32_t fallback_tag = 0;
1558 uint32_t expected_tag = ExpectedTag(field, &fallback_tag);
1559 format("if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1560 expected_tag & 0xFF);
1561 format.Indent();
1562 auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1563 uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype);
1564 int tag_size = io::CodedOutputStream::VarintSize32(tag);
1565 bool is_repeat = ShouldRepeat(field, wiretype);
1566 if (is_repeat) {
1567 format(
1568 "ptr -= $1$;\n"
1569 "do {\n"
1570 " ptr += $1$;\n",
1571 tag_size);
1572 format.Indent();
1573 }
1574 GenerateFieldBody(format, wiretype, field);
1575 if (is_repeat) {
1576 format.Outdent();
1577 format(
1578 " if (!ctx->DataAvailable(ptr)) break;\n"
1579 "} while (::$proto_ns$::internal::ExpectTag<$1$>(ptr));\n",
1580 tag);
1581 }
1582 format.Outdent();
1583 if (fallback_tag) {
1584 format("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1585 fallback_tag & 0xFF);
1586 format.Indent();
1587 GenerateFieldBody(format, WireFormatLite::GetTagWireType(fallback_tag),
1588 field);
1589 format.Outdent();
1590 }
1591 format(
1592 "} else\n"
1593 " goto handle_unusual;\n"
1594 "$next_tag$;\n");
1595 format.Outdent();
1596 } // for loop over ordered fields
1597
1598 format(
1599 "default:\n"
1600 " goto handle_unusual;\n");
1601 format.Outdent();
1602 format("} // switch\n");
1603 }
1604
1605 namespace {
1606
FieldParseFunctionName(const TailCallTableInfo::FieldEntryInfo & entry,const Options & options)1607 std::string FieldParseFunctionName(
1608 const TailCallTableInfo::FieldEntryInfo& entry, const Options& options) {
1609 const FieldDescriptor* field = entry.field;
1610 std::string name = "::_pbi::TcParser::Fast";
1611
1612 switch (field->type()) {
1613 case FieldDescriptor::TYPE_FIXED32:
1614 case FieldDescriptor::TYPE_SFIXED32:
1615 case FieldDescriptor::TYPE_FLOAT:
1616 name.append("F32");
1617 break;
1618
1619 case FieldDescriptor::TYPE_FIXED64:
1620 case FieldDescriptor::TYPE_SFIXED64:
1621 case FieldDescriptor::TYPE_DOUBLE:
1622 name.append("F64");
1623 break;
1624
1625 case FieldDescriptor::TYPE_BOOL:
1626 name.append("V8");
1627 break;
1628 case FieldDescriptor::TYPE_INT32:
1629 case FieldDescriptor::TYPE_UINT32:
1630 name.append("V32");
1631 break;
1632 case FieldDescriptor::TYPE_INT64:
1633 case FieldDescriptor::TYPE_UINT64:
1634 name.append("V64");
1635 break;
1636
1637 case FieldDescriptor::TYPE_ENUM:
1638 if (HasPreservingUnknownEnumSemantics(field)) {
1639 name.append("V32");
1640 break;
1641 }
1642 if (field->is_repeated() && field->is_packed()) {
1643 GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString();
1644 return "";
1645 }
1646 name.append(entry.is_enum_range ? "Er" : "Ev");
1647 break;
1648
1649 case FieldDescriptor::TYPE_SINT32:
1650 name.append("Z32");
1651 break;
1652 case FieldDescriptor::TYPE_SINT64:
1653 name.append("Z64");
1654 break;
1655
1656 case FieldDescriptor::TYPE_BYTES:
1657 name.append("B");
1658 if (IsStringInlined(field, options)) {
1659 name.append("i");
1660 }
1661 break;
1662 case FieldDescriptor::TYPE_STRING:
1663 switch (GetUtf8CheckMode(field, options)) {
1664 case Utf8CheckMode::kNone:
1665 name.append("B");
1666 break;
1667 case Utf8CheckMode::kVerify:
1668 name.append("S");
1669 break;
1670 case Utf8CheckMode::kStrict:
1671 name.append("U");
1672 break;
1673 default:
1674 GOOGLE_LOG(DFATAL) << "Mode not handled: "
1675 << static_cast<int>(GetUtf8CheckMode(field, options));
1676 return "";
1677 }
1678 if (IsStringInlined(field, options)) {
1679 name.append("i");
1680 }
1681 break;
1682
1683 case FieldDescriptor::TYPE_MESSAGE:
1684 name.append("M");
1685 break;
1686 case FieldDescriptor::TYPE_GROUP:
1687 name.append("G");
1688 break;
1689
1690 default:
1691 GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString();
1692 return "";
1693 }
1694
1695 // The field implementation functions are prefixed by cardinality:
1696 // `S` for optional or implicit fields.
1697 // `R` for non-packed repeated.
1698 // `P` for packed repeated.
1699 name.append(field->is_packed() ? "P"
1700 : field->is_repeated() ? "R"
1701 : field->real_containing_oneof() ? "O"
1702 : "S");
1703
1704 // Append the tag length. Fast parsing only handles 1- or 2-byte tags.
1705 name.append(TagSize(field->number()) == 1 ? "1" : "2");
1706
1707 return name;
1708 }
1709
1710 } // namespace
1711
1712 } // namespace cpp
1713 } // namespace compiler
1714 } // namespace protobuf
1715 } // namespace google
1716