1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11
12 #include "google/protobuf/wire_format_lite.h"
13
14 #include <cstddef>
15 #include <cstdint>
16 #include <limits>
17 #include <new>
18 #include <string>
19 #include <type_traits>
20
21 #include "absl/log/absl_check.h"
22 #include "absl/log/absl_log.h"
23 #include "absl/log/log.h"
24 #include "absl/strings/cord.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/strings/str_format.h"
27 #include "absl/strings/string_view.h"
28 #include "google/protobuf/io/coded_stream.h"
29 #include "google/protobuf/message_lite.h"
30 #include "google/protobuf/repeated_field.h"
31 #include "utf8_validity.h"
32
33
34 // Must be included last.
35 #include "google/protobuf/port_def.inc"
36
37 namespace google {
38 namespace protobuf {
39 namespace internal {
40
41 #if !defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912)
42 // Old version of MSVC doesn't like definitions of inline constants, GCC
43 // requires them.
44 const int WireFormatLite::kMessageSetItemStartTag;
45 const int WireFormatLite::kMessageSetItemEndTag;
46 const int WireFormatLite::kMessageSetTypeIdTag;
47 const int WireFormatLite::kMessageSetMessageTag;
48
49 #endif
50
51 constexpr size_t WireFormatLite::kFixed32Size;
52 constexpr size_t WireFormatLite::kFixed64Size;
53 constexpr size_t WireFormatLite::kSFixed32Size;
54 constexpr size_t WireFormatLite::kSFixed64Size;
55 constexpr size_t WireFormatLite::kFloatSize;
56 constexpr size_t WireFormatLite::kDoubleSize;
57 constexpr size_t WireFormatLite::kBoolSize;
58
59 // IBM xlC requires prefixing constants with WireFormatLite::
60 const size_t WireFormatLite::kMessageSetItemTagsSize =
61 io::CodedOutputStream::StaticVarintSize32<
62 WireFormatLite::kMessageSetItemStartTag>::value +
63 io::CodedOutputStream::StaticVarintSize32<
64 WireFormatLite::kMessageSetItemEndTag>::value +
65 io::CodedOutputStream::StaticVarintSize32<
66 WireFormatLite::kMessageSetTypeIdTag>::value +
67 io::CodedOutputStream::StaticVarintSize32<
68 WireFormatLite::kMessageSetMessageTag>::value;
69
70 const WireFormatLite::CppType
71 WireFormatLite::kFieldTypeToCppTypeMap[MAX_FIELD_TYPE + 1] = {
72 static_cast<CppType>(0), // 0 is reserved for errors
73
74 CPPTYPE_DOUBLE, // TYPE_DOUBLE
75 CPPTYPE_FLOAT, // TYPE_FLOAT
76 CPPTYPE_INT64, // TYPE_INT64
77 CPPTYPE_UINT64, // TYPE_UINT64
78 CPPTYPE_INT32, // TYPE_INT32
79 CPPTYPE_UINT64, // TYPE_FIXED64
80 CPPTYPE_UINT32, // TYPE_FIXED32
81 CPPTYPE_BOOL, // TYPE_BOOL
82 CPPTYPE_STRING, // TYPE_STRING
83 CPPTYPE_MESSAGE, // TYPE_GROUP
84 CPPTYPE_MESSAGE, // TYPE_MESSAGE
85 CPPTYPE_STRING, // TYPE_BYTES
86 CPPTYPE_UINT32, // TYPE_UINT32
87 CPPTYPE_ENUM, // TYPE_ENUM
88 CPPTYPE_INT32, // TYPE_SFIXED32
89 CPPTYPE_INT64, // TYPE_SFIXED64
90 CPPTYPE_INT32, // TYPE_SINT32
91 CPPTYPE_INT64, // TYPE_SINT64
92 };
93
94 const WireFormatLite::WireType
95 WireFormatLite::kWireTypeForFieldType[MAX_FIELD_TYPE + 1] = {
96 static_cast<WireFormatLite::WireType>(-1), // invalid
97 WireFormatLite::WIRETYPE_FIXED64, // TYPE_DOUBLE
98 WireFormatLite::WIRETYPE_FIXED32, // TYPE_FLOAT
99 WireFormatLite::WIRETYPE_VARINT, // TYPE_INT64
100 WireFormatLite::WIRETYPE_VARINT, // TYPE_UINT64
101 WireFormatLite::WIRETYPE_VARINT, // TYPE_INT32
102 WireFormatLite::WIRETYPE_FIXED64, // TYPE_FIXED64
103 WireFormatLite::WIRETYPE_FIXED32, // TYPE_FIXED32
104 WireFormatLite::WIRETYPE_VARINT, // TYPE_BOOL
105 WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_STRING
106 WireFormatLite::WIRETYPE_START_GROUP, // TYPE_GROUP
107 WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_MESSAGE
108 WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_BYTES
109 WireFormatLite::WIRETYPE_VARINT, // TYPE_UINT32
110 WireFormatLite::WIRETYPE_VARINT, // TYPE_ENUM
111 WireFormatLite::WIRETYPE_FIXED32, // TYPE_SFIXED32
112 WireFormatLite::WIRETYPE_FIXED64, // TYPE_SFIXED64
113 WireFormatLite::WIRETYPE_VARINT, // TYPE_SINT32
114 WireFormatLite::WIRETYPE_VARINT, // TYPE_SINT64
115 };
116
SkipField(io::CodedInputStream * input,uint32_t tag)117 bool WireFormatLite::SkipField(io::CodedInputStream* input, uint32_t tag) {
118 // Field number 0 is illegal.
119 if (WireFormatLite::GetTagFieldNumber(tag) == 0) return false;
120 switch (WireFormatLite::GetTagWireType(tag)) {
121 case WireFormatLite::WIRETYPE_VARINT: {
122 uint64_t value;
123 if (!input->ReadVarint64(&value)) return false;
124 return true;
125 }
126 case WireFormatLite::WIRETYPE_FIXED64: {
127 uint64_t value;
128 if (!input->ReadLittleEndian64(&value)) return false;
129 return true;
130 }
131 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
132 uint32_t length;
133 if (!input->ReadVarint32(&length)) return false;
134 if (!input->Skip(length)) return false;
135 return true;
136 }
137 case WireFormatLite::WIRETYPE_START_GROUP: {
138 if (!input->IncrementRecursionDepth()) return false;
139 if (!SkipMessage(input)) return false;
140 input->DecrementRecursionDepth();
141 // Check that the ending tag matched the starting tag.
142 if (!input->LastTagWas(
143 WireFormatLite::MakeTag(WireFormatLite::GetTagFieldNumber(tag),
144 WireFormatLite::WIRETYPE_END_GROUP))) {
145 return false;
146 }
147 return true;
148 }
149 case WireFormatLite::WIRETYPE_END_GROUP: {
150 return false;
151 }
152 case WireFormatLite::WIRETYPE_FIXED32: {
153 uint32_t value;
154 if (!input->ReadLittleEndian32(&value)) return false;
155 return true;
156 }
157 default: {
158 return false;
159 }
160 }
161 }
162
SkipField(io::CodedInputStream * input,uint32_t tag,io::CodedOutputStream * output)163 bool WireFormatLite::SkipField(io::CodedInputStream* input, uint32_t tag,
164 io::CodedOutputStream* output) {
165 // Field number 0 is illegal.
166 if (WireFormatLite::GetTagFieldNumber(tag) == 0) return false;
167 switch (WireFormatLite::GetTagWireType(tag)) {
168 case WireFormatLite::WIRETYPE_VARINT: {
169 uint64_t value;
170 if (!input->ReadVarint64(&value)) return false;
171 output->WriteVarint32(tag);
172 output->WriteVarint64(value);
173 return true;
174 }
175 case WireFormatLite::WIRETYPE_FIXED64: {
176 uint64_t value;
177 if (!input->ReadLittleEndian64(&value)) return false;
178 output->WriteVarint32(tag);
179 output->WriteLittleEndian64(value);
180 return true;
181 }
182 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
183 uint32_t length;
184 if (!input->ReadVarint32(&length)) return false;
185 output->WriteVarint32(tag);
186 output->WriteVarint32(length);
187 // TODO: Provide API to prevent extra string copying.
188 std::string temp;
189 if (!input->ReadString(&temp, length)) return false;
190 output->WriteString(temp);
191 return true;
192 }
193 case WireFormatLite::WIRETYPE_START_GROUP: {
194 output->WriteVarint32(tag);
195 if (!input->IncrementRecursionDepth()) return false;
196 if (!SkipMessage(input, output)) return false;
197 input->DecrementRecursionDepth();
198 // Check that the ending tag matched the starting tag.
199 if (!input->LastTagWas(
200 WireFormatLite::MakeTag(WireFormatLite::GetTagFieldNumber(tag),
201 WireFormatLite::WIRETYPE_END_GROUP))) {
202 return false;
203 }
204 return true;
205 }
206 case WireFormatLite::WIRETYPE_END_GROUP: {
207 return false;
208 }
209 case WireFormatLite::WIRETYPE_FIXED32: {
210 uint32_t value;
211 if (!input->ReadLittleEndian32(&value)) return false;
212 output->WriteVarint32(tag);
213 output->WriteLittleEndian32(value);
214 return true;
215 }
216 default: {
217 return false;
218 }
219 }
220 }
221
SkipMessage(io::CodedInputStream * input)222 bool WireFormatLite::SkipMessage(io::CodedInputStream* input) {
223 while (true) {
224 uint32_t tag = input->ReadTag();
225 if (tag == 0) {
226 // End of input. This is a valid place to end, so return true.
227 return true;
228 }
229
230 WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
231
232 if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
233 // Must be the end of the message.
234 return true;
235 }
236
237 if (!SkipField(input, tag)) return false;
238 }
239 }
240
SkipMessage(io::CodedInputStream * input,io::CodedOutputStream * output)241 bool WireFormatLite::SkipMessage(io::CodedInputStream* input,
242 io::CodedOutputStream* output) {
243 while (true) {
244 uint32_t tag = input->ReadTag();
245 if (tag == 0) {
246 // End of input. This is a valid place to end, so return true.
247 return true;
248 }
249
250 WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
251
252 if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
253 output->WriteVarint32(tag);
254 // Must be the end of the message.
255 return true;
256 }
257
258 if (!SkipField(input, tag, output)) return false;
259 }
260 }
261
SkipField(io::CodedInputStream * input,uint32_t tag)262 bool FieldSkipper::SkipField(io::CodedInputStream* input, uint32_t tag) {
263 return WireFormatLite::SkipField(input, tag);
264 }
265
SkipMessage(io::CodedInputStream * input)266 bool FieldSkipper::SkipMessage(io::CodedInputStream* input) {
267 return WireFormatLite::SkipMessage(input);
268 }
269
SkipUnknownEnum(int,int)270 void FieldSkipper::SkipUnknownEnum(int /* field_number */, int /* value */) {
271 // Nothing.
272 }
273
SkipField(io::CodedInputStream * input,uint32_t tag)274 bool CodedOutputStreamFieldSkipper::SkipField(io::CodedInputStream* input,
275 uint32_t tag) {
276 return WireFormatLite::SkipField(input, tag, unknown_fields_);
277 }
278
SkipMessage(io::CodedInputStream * input)279 bool CodedOutputStreamFieldSkipper::SkipMessage(io::CodedInputStream* input) {
280 return WireFormatLite::SkipMessage(input, unknown_fields_);
281 }
282
SkipUnknownEnum(int field_number,int value)283 void CodedOutputStreamFieldSkipper::SkipUnknownEnum(int field_number,
284 int value) {
285 unknown_fields_->WriteVarint32(field_number);
286 unknown_fields_->WriteVarint64(value);
287 }
288
ReadPackedEnumPreserveUnknowns(io::CodedInputStream * input,int field_number,bool (* is_valid)(int),io::CodedOutputStream * unknown_fields_stream,RepeatedField<int> * values)289 bool WireFormatLite::ReadPackedEnumPreserveUnknowns(
290 io::CodedInputStream* input, int field_number, bool (*is_valid)(int),
291 io::CodedOutputStream* unknown_fields_stream, RepeatedField<int>* values) {
292 uint32_t length;
293 if (!input->ReadVarint32(&length)) return false;
294 io::CodedInputStream::Limit limit = input->PushLimit(length);
295 while (input->BytesUntilLimit() > 0) {
296 int value;
297 if (!ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(input, &value)) {
298 return false;
299 }
300 if (is_valid == nullptr || is_valid(value)) {
301 values->Add(value);
302 } else {
303 uint32_t tag = WireFormatLite::MakeTag(field_number,
304 WireFormatLite::WIRETYPE_VARINT);
305 unknown_fields_stream->WriteVarint32(tag);
306 unknown_fields_stream->WriteVarint32(value);
307 }
308 }
309 input->PopLimit(limit);
310 return true;
311 }
312
313 #if !defined(ABSL_IS_LITTLE_ENDIAN)
314
315 namespace {
EncodeFixedSizeValue(float v,uint8_t * dest)316 void EncodeFixedSizeValue(float v, uint8_t* dest) {
317 WireFormatLite::WriteFloatNoTagToArray(v, dest);
318 }
319
EncodeFixedSizeValue(double v,uint8_t * dest)320 void EncodeFixedSizeValue(double v, uint8_t* dest) {
321 WireFormatLite::WriteDoubleNoTagToArray(v, dest);
322 }
323
EncodeFixedSizeValue(uint32_t v,uint8_t * dest)324 void EncodeFixedSizeValue(uint32_t v, uint8_t* dest) {
325 WireFormatLite::WriteFixed32NoTagToArray(v, dest);
326 }
327
EncodeFixedSizeValue(uint64_t v,uint8_t * dest)328 void EncodeFixedSizeValue(uint64_t v, uint8_t* dest) {
329 WireFormatLite::WriteFixed64NoTagToArray(v, dest);
330 }
331
EncodeFixedSizeValue(int32_t v,uint8_t * dest)332 void EncodeFixedSizeValue(int32_t v, uint8_t* dest) {
333 WireFormatLite::WriteSFixed32NoTagToArray(v, dest);
334 }
335
EncodeFixedSizeValue(int64_t v,uint8_t * dest)336 void EncodeFixedSizeValue(int64_t v, uint8_t* dest) {
337 WireFormatLite::WriteSFixed64NoTagToArray(v, dest);
338 }
339
EncodeFixedSizeValue(bool v,uint8_t * dest)340 void EncodeFixedSizeValue(bool v, uint8_t* dest) {
341 WireFormatLite::WriteBoolNoTagToArray(v, dest);
342 }
343 } // anonymous namespace
344
345 #endif // !defined(ABSL_IS_LITTLE_ENDIAN)
346
347 template <typename CType>
WriteArray(const CType * a,int n,io::CodedOutputStream * output)348 static void WriteArray(const CType* a, int n, io::CodedOutputStream* output) {
349 #if defined(ABSL_IS_LITTLE_ENDIAN)
350 output->WriteRaw(reinterpret_cast<const char*>(a), n * sizeof(a[0]));
351 #else
352 const int kAtATime = 128;
353 uint8_t buf[sizeof(CType) * kAtATime];
354 for (int i = 0; i < n; i += kAtATime) {
355 int to_do = std::min(kAtATime, n - i);
356 uint8_t* ptr = buf;
357 for (int j = 0; j < to_do; j++) {
358 EncodeFixedSizeValue(a[i + j], ptr);
359 ptr += sizeof(a[0]);
360 }
361 output->WriteRaw(buf, to_do * sizeof(a[0]));
362 }
363 #endif
364 }
365
WriteFloatArray(const float * a,int n,io::CodedOutputStream * output)366 void WireFormatLite::WriteFloatArray(const float* a, int n,
367 io::CodedOutputStream* output) {
368 WriteArray<float>(a, n, output);
369 }
370
WriteDoubleArray(const double * a,int n,io::CodedOutputStream * output)371 void WireFormatLite::WriteDoubleArray(const double* a, int n,
372 io::CodedOutputStream* output) {
373 WriteArray<double>(a, n, output);
374 }
375
WriteFixed32Array(const uint32_t * a,int n,io::CodedOutputStream * output)376 void WireFormatLite::WriteFixed32Array(const uint32_t* a, int n,
377 io::CodedOutputStream* output) {
378 WriteArray<uint32_t>(a, n, output);
379 }
380
WriteFixed64Array(const uint64_t * a,int n,io::CodedOutputStream * output)381 void WireFormatLite::WriteFixed64Array(const uint64_t* a, int n,
382 io::CodedOutputStream* output) {
383 WriteArray<uint64_t>(a, n, output);
384 }
385
WriteSFixed32Array(const int32_t * a,int n,io::CodedOutputStream * output)386 void WireFormatLite::WriteSFixed32Array(const int32_t* a, int n,
387 io::CodedOutputStream* output) {
388 WriteArray<int32_t>(a, n, output);
389 }
390
WriteSFixed64Array(const int64_t * a,int n,io::CodedOutputStream * output)391 void WireFormatLite::WriteSFixed64Array(const int64_t* a, int n,
392 io::CodedOutputStream* output) {
393 WriteArray<int64_t>(a, n, output);
394 }
395
WriteBoolArray(const bool * a,int n,io::CodedOutputStream * output)396 void WireFormatLite::WriteBoolArray(const bool* a, int n,
397 io::CodedOutputStream* output) {
398 WriteArray<bool>(a, n, output);
399 }
400
WriteInt32(int field_number,int32_t value,io::CodedOutputStream * output)401 void WireFormatLite::WriteInt32(int field_number, int32_t value,
402 io::CodedOutputStream* output) {
403 WriteTag(field_number, WIRETYPE_VARINT, output);
404 WriteInt32NoTag(value, output);
405 }
WriteInt64(int field_number,int64_t value,io::CodedOutputStream * output)406 void WireFormatLite::WriteInt64(int field_number, int64_t value,
407 io::CodedOutputStream* output) {
408 WriteTag(field_number, WIRETYPE_VARINT, output);
409 WriteInt64NoTag(value, output);
410 }
WriteUInt32(int field_number,uint32_t value,io::CodedOutputStream * output)411 void WireFormatLite::WriteUInt32(int field_number, uint32_t value,
412 io::CodedOutputStream* output) {
413 WriteTag(field_number, WIRETYPE_VARINT, output);
414 WriteUInt32NoTag(value, output);
415 }
WriteUInt64(int field_number,uint64_t value,io::CodedOutputStream * output)416 void WireFormatLite::WriteUInt64(int field_number, uint64_t value,
417 io::CodedOutputStream* output) {
418 WriteTag(field_number, WIRETYPE_VARINT, output);
419 WriteUInt64NoTag(value, output);
420 }
WriteSInt32(int field_number,int32_t value,io::CodedOutputStream * output)421 void WireFormatLite::WriteSInt32(int field_number, int32_t value,
422 io::CodedOutputStream* output) {
423 WriteTag(field_number, WIRETYPE_VARINT, output);
424 WriteSInt32NoTag(value, output);
425 }
WriteSInt64(int field_number,int64_t value,io::CodedOutputStream * output)426 void WireFormatLite::WriteSInt64(int field_number, int64_t value,
427 io::CodedOutputStream* output) {
428 WriteTag(field_number, WIRETYPE_VARINT, output);
429 WriteSInt64NoTag(value, output);
430 }
WriteFixed32(int field_number,uint32_t value,io::CodedOutputStream * output)431 void WireFormatLite::WriteFixed32(int field_number, uint32_t value,
432 io::CodedOutputStream* output) {
433 WriteTag(field_number, WIRETYPE_FIXED32, output);
434 WriteFixed32NoTag(value, output);
435 }
WriteFixed64(int field_number,uint64_t value,io::CodedOutputStream * output)436 void WireFormatLite::WriteFixed64(int field_number, uint64_t value,
437 io::CodedOutputStream* output) {
438 WriteTag(field_number, WIRETYPE_FIXED64, output);
439 WriteFixed64NoTag(value, output);
440 }
WriteSFixed32(int field_number,int32_t value,io::CodedOutputStream * output)441 void WireFormatLite::WriteSFixed32(int field_number, int32_t value,
442 io::CodedOutputStream* output) {
443 WriteTag(field_number, WIRETYPE_FIXED32, output);
444 WriteSFixed32NoTag(value, output);
445 }
WriteSFixed64(int field_number,int64_t value,io::CodedOutputStream * output)446 void WireFormatLite::WriteSFixed64(int field_number, int64_t value,
447 io::CodedOutputStream* output) {
448 WriteTag(field_number, WIRETYPE_FIXED64, output);
449 WriteSFixed64NoTag(value, output);
450 }
WriteFloat(int field_number,float value,io::CodedOutputStream * output)451 void WireFormatLite::WriteFloat(int field_number, float value,
452 io::CodedOutputStream* output) {
453 WriteTag(field_number, WIRETYPE_FIXED32, output);
454 WriteFloatNoTag(value, output);
455 }
WriteDouble(int field_number,double value,io::CodedOutputStream * output)456 void WireFormatLite::WriteDouble(int field_number, double value,
457 io::CodedOutputStream* output) {
458 WriteTag(field_number, WIRETYPE_FIXED64, output);
459 WriteDoubleNoTag(value, output);
460 }
WriteBool(int field_number,bool value,io::CodedOutputStream * output)461 void WireFormatLite::WriteBool(int field_number, bool value,
462 io::CodedOutputStream* output) {
463 WriteTag(field_number, WIRETYPE_VARINT, output);
464 WriteBoolNoTag(value, output);
465 }
WriteEnum(int field_number,int value,io::CodedOutputStream * output)466 void WireFormatLite::WriteEnum(int field_number, int value,
467 io::CodedOutputStream* output) {
468 WriteTag(field_number, WIRETYPE_VARINT, output);
469 WriteEnumNoTag(value, output);
470 }
471
472 constexpr size_t kInt32MaxSize = std::numeric_limits<int32_t>::max();
473
WriteString(int field_number,const std::string & value,io::CodedOutputStream * output)474 void WireFormatLite::WriteString(int field_number, const std::string& value,
475 io::CodedOutputStream* output) {
476 // String is for UTF-8 text only
477 WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
478 ABSL_CHECK_LE(value.size(), kInt32MaxSize);
479 output->WriteVarint32(value.size());
480 output->WriteString(value);
481 }
WriteStringMaybeAliased(int field_number,const std::string & value,io::CodedOutputStream * output)482 void WireFormatLite::WriteStringMaybeAliased(int field_number,
483 const std::string& value,
484 io::CodedOutputStream* output) {
485 // String is for UTF-8 text only
486 WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
487 ABSL_CHECK_LE(value.size(), kInt32MaxSize);
488 output->WriteVarint32(value.size());
489 output->WriteRawMaybeAliased(value.data(), value.size());
490 }
WriteBytes(int field_number,const std::string & value,io::CodedOutputStream * output)491 void WireFormatLite::WriteBytes(int field_number, const std::string& value,
492 io::CodedOutputStream* output) {
493 WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
494 ABSL_CHECK_LE(value.size(), kInt32MaxSize);
495 output->WriteVarint32(value.size());
496 output->WriteString(value);
497 }
WriteBytesMaybeAliased(int field_number,const std::string & value,io::CodedOutputStream * output)498 void WireFormatLite::WriteBytesMaybeAliased(int field_number,
499 const std::string& value,
500 io::CodedOutputStream* output) {
501 WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
502 ABSL_CHECK_LE(value.size(), kInt32MaxSize);
503 output->WriteVarint32(value.size());
504 output->WriteRawMaybeAliased(value.data(), value.size());
505 }
506
507
WriteGroup(int field_number,const MessageLite & value,io::CodedOutputStream * output)508 void WireFormatLite::WriteGroup(int field_number, const MessageLite& value,
509 io::CodedOutputStream* output) {
510 WriteTag(field_number, WIRETYPE_START_GROUP, output);
511 value.SerializeWithCachedSizes(output);
512 WriteTag(field_number, WIRETYPE_END_GROUP, output);
513 }
514
WriteMessage(int field_number,const MessageLite & value,io::CodedOutputStream * output)515 void WireFormatLite::WriteMessage(int field_number, const MessageLite& value,
516 io::CodedOutputStream* output) {
517 WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
518 const int size = value.GetCachedSize();
519 output->WriteVarint32(size);
520 value.SerializeWithCachedSizes(output);
521 }
522
InternalWriteGroup(int field_number,const MessageLite & value,uint8_t * target,io::EpsCopyOutputStream * stream)523 uint8_t* WireFormatLite::InternalWriteGroup(int field_number,
524 const MessageLite& value,
525 uint8_t* target,
526 io::EpsCopyOutputStream* stream) {
527 target = stream->EnsureSpace(target);
528 target = WriteTagToArray(field_number, WIRETYPE_START_GROUP, target);
529 target = value._InternalSerialize(target, stream);
530 target = stream->EnsureSpace(target);
531 return WriteTagToArray(field_number, WIRETYPE_END_GROUP, target);
532 }
533
InternalWriteMessage(int field_number,const MessageLite & value,int cached_size,uint8_t * target,io::EpsCopyOutputStream * stream)534 uint8_t* WireFormatLite::InternalWriteMessage(int field_number,
535 const MessageLite& value,
536 int cached_size, uint8_t* target,
537 io::EpsCopyOutputStream* stream) {
538 target = stream->EnsureSpace(target);
539 target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target);
540 target = io::CodedOutputStream::WriteVarint32ToArray(
541 static_cast<uint32_t>(cached_size), target);
542 return value._InternalSerialize(target, stream);
543 }
544
WriteSubMessageMaybeToArray(int,const MessageLite & value,io::CodedOutputStream * output)545 void WireFormatLite::WriteSubMessageMaybeToArray(
546 int /*size*/, const MessageLite& value, io::CodedOutputStream* output) {
547 output->SetCur(value._InternalSerialize(output->Cur(), output->EpsCopy()));
548 }
549
WriteGroupMaybeToArray(int field_number,const MessageLite & value,io::CodedOutputStream * output)550 void WireFormatLite::WriteGroupMaybeToArray(int field_number,
551 const MessageLite& value,
552 io::CodedOutputStream* output) {
553 WriteTag(field_number, WIRETYPE_START_GROUP, output);
554 const int size = value.GetCachedSize();
555 WriteSubMessageMaybeToArray(size, value, output);
556 WriteTag(field_number, WIRETYPE_END_GROUP, output);
557 }
558
WriteMessageMaybeToArray(int field_number,const MessageLite & value,io::CodedOutputStream * output)559 void WireFormatLite::WriteMessageMaybeToArray(int field_number,
560 const MessageLite& value,
561 io::CodedOutputStream* output) {
562 WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
563 const int size = value.GetCachedSize();
564 output->WriteVarint32(size);
565 WriteSubMessageMaybeToArray(size, value, output);
566 }
567
568 PROTOBUF_NDEBUG_INLINE static bool ReadBytesToString(
569 io::CodedInputStream* input, std::string* value);
ReadBytesToString(io::CodedInputStream * input,std::string * value)570 inline static bool ReadBytesToString(io::CodedInputStream* input,
571 std::string* value) {
572 uint32_t length;
573 return input->ReadVarint32(&length) && input->ReadString(value, length);
574 }
575
ReadBytes(io::CodedInputStream * input,std::string * value)576 bool WireFormatLite::ReadBytes(io::CodedInputStream* input,
577 std::string* value) {
578 return ReadBytesToString(input, value);
579 }
580
ReadBytes(io::CodedInputStream * input,std::string ** p)581 bool WireFormatLite::ReadBytes(io::CodedInputStream* input, std::string** p) {
582 if (*p == &GetEmptyStringAlreadyInited()) {
583 *p = new std::string();
584 }
585 return ReadBytesToString(input, *p);
586 }
587
PrintUTF8ErrorLog(absl::string_view message_name,absl::string_view field_name,const char * operation_str,bool emit_stacktrace)588 void PrintUTF8ErrorLog(absl::string_view message_name,
589 absl::string_view field_name, const char* operation_str,
590 bool emit_stacktrace) {
591 std::string stacktrace;
592 (void)emit_stacktrace; // Parameter is used by Google-internal code.
593 std::string quoted_field_name = "";
594 if (!field_name.empty()) {
595 if (!message_name.empty()) {
596 quoted_field_name =
597 absl::StrCat(" '", message_name, ".", field_name, "'");
598 } else {
599 quoted_field_name = absl::StrCat(" '", field_name, "'");
600 }
601 }
602 std::string error_message =
603 absl::StrCat("String field", quoted_field_name,
604 " contains invalid UTF-8 data "
605 "when ",
606 operation_str,
607 " a protocol buffer. Use the 'bytes' type if you intend to "
608 "send raw bytes. ",
609 stacktrace);
610 ABSL_LOG(ERROR) << error_message;
611 }
612
613 #if defined (__MINGW64__) || defined(__MINGW32__)
VerifyUtf8String(const char * data,int size,Operation op,const std::string_view field_name)614 bool WireFormatLite::VerifyUtf8String(const char* data, int size, Operation op,
615 const std::string_view field_name) {
616 #else
617 bool WireFormatLite::VerifyUtf8String(const char* data, int size, Operation op,
618 const absl::string_view field_name) {
619 #endif
620 if (!utf8_range::IsStructurallyValid({data, static_cast<size_t>(size)})) {
621 const char* operation_str = nullptr;
622 switch (op) {
623 case PARSE:
624 operation_str = "parsing";
625 break;
626 case SERIALIZE:
627 operation_str = "serializing";
628 break;
629 // no default case: have the compiler warn if a case is not covered.
630 }
631 #if defined (__MINGW64__) || defined(__MINGW32__)
632 PrintUTF8ErrorLog("", absl::string_view{field_name.data(), field_name.size()}, operation_str, false);
633 #else
634 PrintUTF8ErrorLog("", field_name, operation_str, false);
635 #endif
636 return false;
637 }
638 return true;
639 }
640
641 // this code is deliberately written such that clang makes it into really
642 // efficient SSE code.
643 template <bool ZigZag, bool SignExtended, typename T>
644 static size_t VarintSize(const T* data, const int n) {
645 static_assert(sizeof(T) == 4, "This routine only works for 32 bit integers");
646 // is_unsigned<T> => !ZigZag
647 static_assert(
648 (std::is_unsigned<T>::value ^ ZigZag) || std::is_signed<T>::value,
649 "Cannot ZigZag encode unsigned types");
650 // is_unsigned<T> => !SignExtended
651 static_assert(
652 (std::is_unsigned<T>::value ^ SignExtended) || std::is_signed<T>::value,
653 "Cannot SignExtended unsigned types");
654 static_assert(!(SignExtended && ZigZag),
655 "Cannot SignExtended and ZigZag on the same type");
656 // This approach is only faster when vectorized, and the vectorized
657 // implementation only works in units of the platform's vector width, and is
658 // only faster once a certain number of iterations are used. Normally the
659 // compiler generates two loops - one partially unrolled vectorized loop that
660 // processes big chunks, and a second "epilogue" scalar loop to finish up the
661 // remainder. This is done manually here so that the faster scalar
662 // implementation is used for small inputs and for the epilogue.
663 int vectorN = n & -32;
664 uint32_t sum = vectorN;
665 uint32_t msb_sum = 0;
666 int i = 0;
667 for (; i < vectorN; i++) {
668 uint32_t x = data[i];
669 if (ZigZag) {
670 x = WireFormatLite::ZigZagEncode32(x);
671 } else if (SignExtended) {
672 msb_sum += x >> 31;
673 }
674 // clang is so smart that it produces optimal SIMD sequence unrolling
675 // the loop 8 ints at a time. With a sequence of 4
676 // cmpres = cmpgt x, sizeclass ( -1 or 0)
677 // sum = sum - cmpres
678 if (x > 0x7F) sum++;
679 if (x > 0x3FFF) sum++;
680 if (x > 0x1FFFFF) sum++;
681 if (x > 0xFFFFFFF) sum++;
682 }
683 #ifdef __clang__
684 // Clang is not smart enough to see that this loop doesn't run many times
685 // NOLINTNEXTLINE(google3-runtime-pragma-loop-hint): b/315043579
686 #pragma clang loop vectorize(disable) unroll(disable) interleave(disable)
687 #endif
688 for (; i < n; i++) {
689 uint32_t x = data[i];
690 if (ZigZag) {
691 sum += WireFormatLite::SInt32Size(x);
692 } else if (SignExtended) {
693 sum += WireFormatLite::Int32Size(x);
694 } else {
695 sum += WireFormatLite::UInt32Size(x);
696 }
697 }
698 if (SignExtended) sum += msb_sum * 5;
699 return sum;
700 }
701
702 template <bool ZigZag, typename T>
703 static size_t VarintSize64(const T* data, const int n) {
704 static_assert(sizeof(T) == 8, "This routine only works for 64 bit integers");
705 // is_unsigned<T> => !ZigZag
706 static_assert(!ZigZag || !std::is_unsigned<T>::value,
707 "Cannot ZigZag encode unsigned types");
708 int vectorN = n & -32;
709 uint64_t sum = vectorN;
710 int i = 0;
711 for (; i < vectorN; i++) {
712 uint64_t x = data[i];
713 if (ZigZag) {
714 x = WireFormatLite::ZigZagEncode64(x);
715 }
716 // First step is a binary search, we can't branch in sse so we use the
717 // result of the compare to adjust sum and appropriately. This code is
718 // written to make clang recognize the vectorization.
719 uint64_t tmp = x >= (static_cast<uint64_t>(1) << 35) ? -1 : 0;
720 sum += 5 & tmp;
721 x >>= 35 & tmp;
722 if (x > 0x7F) sum++;
723 if (x > 0x3FFF) sum++;
724 if (x > 0x1FFFFF) sum++;
725 if (x > 0xFFFFFFF) sum++;
726 }
727 #ifdef __clang__
728 // Clang is not smart enough to see that this loop doesn't run many times
729 // NOLINTNEXTLINE(google3-runtime-pragma-loop-hint): b/315043579
730 #pragma clang loop vectorize(disable) unroll(disable) interleave(disable)
731 #endif
732 for (; i < n; i++) {
733 uint64_t x = data[i];
734 if (ZigZag) {
735 sum += WireFormatLite::SInt64Size(x);
736 } else {
737 sum += WireFormatLite::UInt64Size(x);
738 }
739 }
740 return sum;
741 }
742
743 // On machines without a vector count-leading-zeros instruction such as SVE CLZ
744 // on arm or VPLZCNT on x86, SSE or AVX2 instructions can allow vectorization of
745 // the size calculation loop. GCC does not detect this autovectorization
746 // opportunity, so only enable for clang.
747 // When last tested, AVX512-vectorized lzcnt was slower than the SSE/AVX2
748 // implementation, so __AVX512CD__ is not checked.
749 #if defined(__SSE__) && defined(__clang__)
750 size_t WireFormatLite::Int32Size(const RepeatedField<int32_t>& value) {
751 return VarintSize<false, true>(value.data(), value.size());
752 }
753
754 size_t WireFormatLite::UInt32Size(const RepeatedField<uint32_t>& value) {
755 return VarintSize<false, false>(value.data(), value.size());
756 }
757
758 size_t WireFormatLite::SInt32Size(const RepeatedField<int32_t>& value) {
759 return VarintSize<true, false>(value.data(), value.size());
760 }
761
762 size_t WireFormatLite::EnumSize(const RepeatedField<int>& value) {
763 // On ILP64, sizeof(int) == 8, which would require a different template.
764 return VarintSize<false, true>(value.data(), value.size());
765 }
766
767 #else // !(defined(__SSE__) && defined(__clang__))
768
769 size_t WireFormatLite::Int32Size(const RepeatedField<int32_t>& value) {
770 size_t out = 0;
771 const int n = value.size();
772 for (int i = 0; i < n; i++) {
773 out += Int32Size(value.Get(i));
774 }
775 return out;
776 }
777
778 size_t WireFormatLite::UInt32Size(const RepeatedField<uint32_t>& value) {
779 size_t out = 0;
780 const int n = value.size();
781 for (int i = 0; i < n; i++) {
782 out += UInt32Size(value.Get(i));
783 }
784 return out;
785 }
786
787 size_t WireFormatLite::SInt32Size(const RepeatedField<int32_t>& value) {
788 size_t out = 0;
789 const int n = value.size();
790 for (int i = 0; i < n; i++) {
791 out += SInt32Size(value.Get(i));
792 }
793 return out;
794 }
795
796 size_t WireFormatLite::EnumSize(const RepeatedField<int>& value) {
797 size_t out = 0;
798 const int n = value.size();
799 for (int i = 0; i < n; i++) {
800 out += EnumSize(value.Get(i));
801 }
802 return out;
803 }
804
805 #endif
806
807 // Micro benchmarks show that the vectorizable loop only starts beating
808 // the normal loop when 256-bit vector registers are available.
809 #if defined(__AVX2__) && defined(__clang__)
810 size_t WireFormatLite::Int64Size(const RepeatedField<int64_t>& value) {
811 return VarintSize64<false>(value.data(), value.size());
812 }
813
814 size_t WireFormatLite::UInt64Size(const RepeatedField<uint64_t>& value) {
815 return VarintSize64<false>(value.data(), value.size());
816 }
817
818 size_t WireFormatLite::SInt64Size(const RepeatedField<int64_t>& value) {
819 return VarintSize64<true>(value.data(), value.size());
820 }
821
822 #else
823
824 size_t WireFormatLite::Int64Size(const RepeatedField<int64_t>& value) {
825 size_t out = 0;
826 const int n = value.size();
827 for (int i = 0; i < n; i++) {
828 out += Int64Size(value.Get(i));
829 }
830 return out;
831 }
832
833 size_t WireFormatLite::UInt64Size(const RepeatedField<uint64_t>& value) {
834 size_t out = 0;
835 const int n = value.size();
836 for (int i = 0; i < n; i++) {
837 out += UInt64Size(value.Get(i));
838 }
839 return out;
840 }
841
842 size_t WireFormatLite::SInt64Size(const RepeatedField<int64_t>& value) {
843 size_t out = 0;
844 const int n = value.size();
845 for (int i = 0; i < n; i++) {
846 out += SInt64Size(value.Get(i));
847 }
848 return out;
849 }
850
851 #endif
852
853 size_t WireFormatLite::Int32SizeWithPackedTagSize(
854 const RepeatedField<int32_t>& value, size_t tag_size,
855 const internal::CachedSize& cached_size) {
856 if (value.empty()) {
857 cached_size.Set(0);
858 return 0;
859 }
860 size_t res;
861 PROTOBUF_ALWAYS_INLINE_CALL res = Int32Size(value);
862 cached_size.SetNonZero(ToCachedSize(res));
863 return tag_size + res + Int32Size(static_cast<int32_t>(res));
864 }
865 size_t WireFormatLite::Int64SizeWithPackedTagSize(
866 const RepeatedField<int64_t>& value, size_t tag_size,
867 const internal::CachedSize& cached_size) {
868 if (value.empty()) {
869 cached_size.Set(0);
870 return 0;
871 }
872 size_t res;
873 PROTOBUF_ALWAYS_INLINE_CALL res = Int64Size(value);
874 cached_size.SetNonZero(ToCachedSize(res));
875 return tag_size + res + Int32Size(static_cast<int32_t>(res));
876 }
877 size_t WireFormatLite::UInt32SizeWithPackedTagSize(
878 const RepeatedField<uint32_t>& value, size_t tag_size,
879 const internal::CachedSize& cached_size) {
880 if (value.empty()) {
881 cached_size.Set(0);
882 return 0;
883 }
884 size_t res;
885 PROTOBUF_ALWAYS_INLINE_CALL res = UInt32Size(value);
886 cached_size.SetNonZero(ToCachedSize(res));
887 return tag_size + res + Int32Size(static_cast<int32_t>(res));
888 }
889 size_t WireFormatLite::UInt64SizeWithPackedTagSize(
890 const RepeatedField<uint64_t>& value, size_t tag_size,
891 const internal::CachedSize& cached_size) {
892 if (value.empty()) {
893 cached_size.Set(0);
894 return 0;
895 }
896 size_t res;
897 PROTOBUF_ALWAYS_INLINE_CALL res = UInt64Size(value);
898 cached_size.SetNonZero(ToCachedSize(res));
899 return tag_size + res + Int32Size(static_cast<int32_t>(res));
900 }
901 size_t WireFormatLite::SInt32SizeWithPackedTagSize(
902 const RepeatedField<int32_t>& value, size_t tag_size,
903 const internal::CachedSize& cached_size) {
904 if (value.empty()) {
905 cached_size.Set(0);
906 return 0;
907 }
908 size_t res;
909 PROTOBUF_ALWAYS_INLINE_CALL res = SInt32Size(value);
910 cached_size.SetNonZero(ToCachedSize(res));
911 return tag_size + res + Int32Size(static_cast<int32_t>(res));
912 }
913 size_t WireFormatLite::SInt64SizeWithPackedTagSize(
914 const RepeatedField<int64_t>& value, size_t tag_size,
915 const internal::CachedSize& cached_size) {
916 if (value.empty()) {
917 cached_size.Set(0);
918 return 0;
919 }
920 size_t res;
921 PROTOBUF_ALWAYS_INLINE_CALL res = SInt64Size(value);
922 cached_size.SetNonZero(ToCachedSize(res));
923 return tag_size + res + Int32Size(static_cast<int32_t>(res));
924 }
925 size_t WireFormatLite::EnumSizeWithPackedTagSize(
926 const RepeatedField<int>& value, size_t tag_size,
927 const internal::CachedSize& cached_size) {
928 if (value.empty()) {
929 cached_size.Set(0);
930 return 0;
931 }
932 size_t res;
933 PROTOBUF_ALWAYS_INLINE_CALL res = EnumSize(value);
934 cached_size.SetNonZero(ToCachedSize(res));
935 return tag_size + res + Int32Size(static_cast<int32_t>(res));
936 }
937
938 } // namespace internal
939 } // namespace protobuf
940 } // namespace google
941
942 #include "google/protobuf/port_undef.inc"
943