1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32 #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33
34 #include <cstdint>
35 #include <cstring>
36 #include <string>
37
38 #include <google/protobuf/io/coded_stream.h>
39 #include <google/protobuf/io/zero_copy_stream.h>
40 #include <google/protobuf/arena.h>
41 #include <google/protobuf/arenastring.h>
42 #include <google/protobuf/implicit_weak_message.h>
43 #include <google/protobuf/metadata_lite.h>
44 #include <google/protobuf/port.h>
45 #include <google/protobuf/repeated_field.h>
46 #include <google/protobuf/wire_format_lite.h>
47 #include <google/protobuf/stubs/strutil.h>
48
49 #include <google/protobuf/port_def.inc>
50
51
52 namespace google {
53 namespace protobuf {
54
55 class UnknownFieldSet;
56 class DescriptorPool;
57 class MessageFactory;
58
59 namespace internal {
60
61 // Template code below needs to know about the existence of these functions.
62 PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s);
63 PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val,
64 std::string* s);
65 // Inline because it is just forwarding to s->WriteVarint
66 inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
67 inline void WriteLengthDelimited(uint32 num, StringPiece val,
68 UnknownFieldSet* s);
69
70
71 // The basic abstraction the parser is designed for is a slight modification
72 // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
73 // stream as a series of buffers that concatenate to the full stream.
74 // Pictorially a ZCIS presents a stream in chunks like so
75 // [---------------------------------------------------------------]
76 // [---------------------] chunk 1
77 // [----------------------------] chunk 2
78 // chunk 3 [--------------]
79 //
80 // Where the '-' represent the bytes which are vertically lined up with the
81 // bytes of the stream. The proto parser requires its input to be presented
82 // similarly with the extra
83 // property that each chunk has kSlopBytes past its end that overlaps with the
84 // first kSlopBytes of the next chunk, or if there is no next chunk at least its
85 // still valid to read those bytes. Again, pictorially, we now have
86 //
87 // [---------------------------------------------------------------]
88 // [-------------------....] chunk 1
89 // [------------------------....] chunk 2
90 // chunk 3 [------------------..**]
91 // chunk 4 [--****]
92 // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
93 // chunk that match up with the start of the next chunk. Above each chunk has
94 // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
95 // past the stream, indicated by '*' above, their values are unspecified. It is
96 // still legal to read them (ie. should not segfault). Reading past the
97 // end should be detected by the user and indicated as an error.
98 //
99 // The reason for this, admittedly, unconventional invariant is to ruthlessly
100 // optimize the protobuf parser. Having an overlap helps in two important ways.
101 // Firstly it alleviates having to performing bounds checks if a piece of code
102 // is guaranteed to not read more than kSlopBytes. Secondly, and more
103 // importantly, the protobuf wireformat is such that reading a key/value pair is
104 // always less than 16 bytes. This removes the need to change to next buffer in
105 // the middle of reading primitive values. Hence there is no need to store and
106 // load the current position.
107
108 class PROTOBUF_EXPORT EpsCopyInputStream {
109 public:
110 enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
111
EpsCopyInputStream(bool enable_aliasing)112 explicit EpsCopyInputStream(bool enable_aliasing)
113 : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
114
BackUp(const char * ptr)115 void BackUp(const char* ptr) {
116 GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
117 int count;
118 if (next_chunk_ == buffer_) {
119 count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
120 } else {
121 count = size_ + static_cast<int>(buffer_end_ - ptr);
122 }
123 if (count > 0) StreamBackUp(count);
124 }
125
126 // If return value is negative it's an error
PushLimit(const char * ptr,int limit)127 PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
128 GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
129 // This add is safe due to the invariant above, because
130 // ptr - buffer_end_ <= kSlopBytes.
131 limit += static_cast<int>(ptr - buffer_end_);
132 limit_end_ = buffer_end_ + (std::min)(0, limit);
133 auto old_limit = limit_;
134 limit_ = limit;
135 return old_limit - limit;
136 }
137
PopLimit(int delta)138 PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
139 if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
140 limit_ = limit_ + delta;
141 // TODO(gerbens) We could remove this line and hoist the code to
142 // DoneFallback. Study the perf/bin-size effects.
143 limit_end_ = buffer_end_ + (std::min)(0, limit_);
144 return true;
145 }
146
Skip(const char * ptr,int size)147 PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
148 if (size <= buffer_end_ + kSlopBytes - ptr) {
149 return ptr + size;
150 }
151 return SkipFallback(ptr, size);
152 }
ReadString(const char * ptr,int size,std::string * s)153 PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
154 std::string* s) {
155 if (size <= buffer_end_ + kSlopBytes - ptr) {
156 s->assign(ptr, size);
157 return ptr + size;
158 }
159 return ReadStringFallback(ptr, size, s);
160 }
AppendString(const char * ptr,int size,std::string * s)161 PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
162 std::string* s) {
163 if (size <= buffer_end_ + kSlopBytes - ptr) {
164 s->append(ptr, size);
165 return ptr + size;
166 }
167 return AppendStringFallback(ptr, size, s);
168 }
169
170 template <typename Tag, typename T>
171 PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
172 Tag expected_tag,
173 RepeatedField<T>* out);
174
175 template <typename T>
176 PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
177 int size,
178 RepeatedField<T>* out);
179 template <typename Add>
180 PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
181 Add add);
182
LastTag()183 uint32 LastTag() const { return last_tag_minus_1_ + 1; }
ConsumeEndGroup(uint32 start_tag)184 bool ConsumeEndGroup(uint32 start_tag) {
185 bool res = last_tag_minus_1_ == start_tag;
186 last_tag_minus_1_ = 0;
187 return res;
188 }
EndedAtLimit()189 bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
EndedAtEndOfStream()190 bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
SetLastTag(uint32 tag)191 void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - 1; }
SetEndOfStream()192 void SetEndOfStream() { last_tag_minus_1_ = 1; }
IsExceedingLimit(const char * ptr)193 bool IsExceedingLimit(const char* ptr) {
194 return ptr > limit_end_ &&
195 (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
196 }
BytesUntilLimit(const char * ptr)197 int BytesUntilLimit(const char* ptr) const {
198 return limit_ + static_cast<int>(buffer_end_ - ptr);
199 }
200 // Returns true if more data is available, if false is returned one has to
201 // call Done for further checks.
DataAvailable(const char * ptr)202 bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
203
204 protected:
205 // Returns true is limit (either an explicit limit or end of stream) is
206 // reached. It aligns *ptr across buffer seams.
207 // If limit is exceeded it returns true and ptr is set to null.
DoneWithCheck(const char ** ptr,int d)208 bool DoneWithCheck(const char** ptr, int d) {
209 GOOGLE_DCHECK(*ptr);
210 if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
211 int overrun = *ptr - buffer_end_;
212 GOOGLE_DCHECK_LE(overrun, kSlopBytes); // Guaranteed by parse loop.
213 if (overrun ==
214 limit_) { // No need to flip buffers if we ended on a limit.
215 // If we actually overrun the buffer and next_chunk_ is null. It means
216 // the stream ended and we passed the stream end.
217 if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr;
218 return true;
219 }
220 auto res = DoneFallback(overrun, d);
221 *ptr = res.first;
222 return res.second;
223 }
224
InitFrom(StringPiece flat)225 const char* InitFrom(StringPiece flat) {
226 overall_limit_ = 0;
227 if (flat.size() > kSlopBytes) {
228 limit_ = kSlopBytes;
229 limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
230 next_chunk_ = buffer_;
231 if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
232 return flat.data();
233 } else {
234 std::memcpy(buffer_, flat.data(), flat.size());
235 limit_ = 0;
236 limit_end_ = buffer_end_ = buffer_ + flat.size();
237 next_chunk_ = nullptr;
238 if (aliasing_ == kOnPatch) {
239 aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
240 reinterpret_cast<std::uintptr_t>(buffer_);
241 }
242 return buffer_;
243 }
244 }
245
246 const char* InitFrom(io::ZeroCopyInputStream* zcis);
247
InitFrom(io::ZeroCopyInputStream * zcis,int limit)248 const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
249 if (limit == -1) return InitFrom(zcis);
250 overall_limit_ = limit;
251 auto res = InitFrom(zcis);
252 limit_ = limit - static_cast<int>(buffer_end_ - res);
253 limit_end_ = buffer_end_ + (std::min)(0, limit_);
254 return res;
255 }
256
257 private:
258 const char* limit_end_; // buffer_end_ + min(limit_, 0)
259 const char* buffer_end_;
260 const char* next_chunk_;
261 int size_;
262 int limit_; // relative to buffer_end_;
263 io::ZeroCopyInputStream* zcis_ = nullptr;
264 char buffer_[2 * kSlopBytes] = {};
265 enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
266 std::uintptr_t aliasing_ = kNoAliasing;
267 // This variable is used to communicate how the parse ended, in order to
268 // completely verify the parsed data. A wire-format parse can end because of
269 // one of the following conditions:
270 // 1) A parse can end on a pushed limit.
271 // 2) A parse can end on End Of Stream (EOS).
272 // 3) A parse can end on 0 tag (only valid for toplevel message).
273 // 4) A parse can end on an end-group tag.
274 // This variable should always be set to 0, which indicates case 1. If the
275 // parse terminated due to EOS (case 2), it's set to 1. In case the parse
276 // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
277 // This var doesn't really belong in EpsCopyInputStream and should be part of
278 // the ParseContext, but case 2 is most easily and optimally implemented in
279 // DoneFallback.
280 uint32 last_tag_minus_1_ = 0;
281 int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits.
282 // Pretty random large number that seems like a safe allocation on most
283 // systems. TODO(gerbens) do we need to set this as build flag?
284 enum { kSafeStringSize = 50000000 };
285
286 // Advances to next buffer chunk returns a pointer to the same logical place
287 // in the stream as set by overrun. Overrun indicates the position in the slop
288 // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at
289 // limit, at which point the returned pointer maybe null if there was an
290 // error. The invariant of this function is that it's guaranteed that
291 // kSlopBytes bytes can be accessed from the returned ptr. This function might
292 // advance more buffers than one in the underlying ZeroCopyInputStream.
293 std::pair<const char*, bool> DoneFallback(int overrun, int depth);
294 // Advances to the next buffer, at most one call to Next() on the underlying
295 // ZeroCopyInputStream is made. This function DOES NOT match the returned
296 // pointer to where in the slop region the parse ends, hence no overrun
297 // parameter. This is useful for string operations where you always copy
298 // to the end of the buffer (including the slop region).
299 const char* Next();
300 // overrun is the location in the slop region the stream currently is
301 // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of
302 // the ZeroCopyInputStream in the case the parse will end in the last
303 // kSlopBytes of the current buffer. depth is the current depth of nested
304 // groups (or negative if the use case does not need careful tracking).
305 inline const char* NextBuffer(int overrun, int depth);
306 const char* SkipFallback(const char* ptr, int size);
307 const char* AppendStringFallback(const char* ptr, int size, std::string* str);
308 const char* ReadStringFallback(const char* ptr, int size, std::string* str);
StreamNext(const void ** data)309 bool StreamNext(const void** data) {
310 bool res = zcis_->Next(data, &size_);
311 if (res) overall_limit_ -= size_;
312 return res;
313 }
StreamBackUp(int count)314 void StreamBackUp(int count) {
315 zcis_->BackUp(count);
316 overall_limit_ += count;
317 }
318
319 template <typename A>
AppendSize(const char * ptr,int size,const A & append)320 const char* AppendSize(const char* ptr, int size, const A& append) {
321 int chunk_size = buffer_end_ + kSlopBytes - ptr;
322 do {
323 GOOGLE_DCHECK(size > chunk_size);
324 if (next_chunk_ == nullptr) return nullptr;
325 append(ptr, chunk_size);
326 ptr += chunk_size;
327 size -= chunk_size;
328 // TODO(gerbens) Next calls NextBuffer which generates buffers with
329 // overlap and thus incurs cost of copying the slop regions. This is not
330 // necessary for reading strings. We should just call Next buffers.
331 if (limit_ <= kSlopBytes) return nullptr;
332 ptr = Next();
333 if (ptr == nullptr) return nullptr; // passed the limit
334 ptr += kSlopBytes;
335 chunk_size = buffer_end_ + kSlopBytes - ptr;
336 } while (size > chunk_size);
337 append(ptr, size);
338 return ptr + size;
339 }
340
341 // AppendUntilEnd appends data until a limit (either a PushLimit or end of
342 // stream. Normal payloads are from length delimited fields which have an
343 // explicit size. Reading until limit only comes when the string takes
344 // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
345 // implicit weak messages. We keep these methods private and friend them.
346 template <typename A>
AppendUntilEnd(const char * ptr,const A & append)347 const char* AppendUntilEnd(const char* ptr, const A& append) {
348 if (ptr - buffer_end_ > limit_) return nullptr;
349 while (limit_ > kSlopBytes) {
350 int chunk_size = buffer_end_ + kSlopBytes - ptr;
351 GOOGLE_DCHECK_GE(chunk_size, 0);
352 append(ptr, chunk_size);
353 ptr = Next();
354 if (ptr == nullptr) return limit_end_;
355 ptr += kSlopBytes;
356 }
357 auto end = buffer_end_ + limit_;
358 GOOGLE_DCHECK(end >= ptr);
359 append(ptr, end - ptr);
360 return end;
361 }
362
AppendString(const char * ptr,std::string * str)363 PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
364 std::string* str) {
365 return AppendUntilEnd(
366 ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
367 }
368 friend class ImplicitWeakMessage;
369 };
370
371 // ParseContext holds all data that is global to the entire parse. Most
372 // importantly it contains the input stream, but also recursion depth and also
373 // stores the end group tag, in case a parser ended on a endgroup, to verify
374 // matching start/end group tags.
375 class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
376 public:
377 struct Data {
378 const DescriptorPool* pool = nullptr;
379 MessageFactory* factory = nullptr;
380 };
381
382 template <typename... T>
ParseContext(int depth,bool aliasing,const char ** start,T &&...args)383 ParseContext(int depth, bool aliasing, const char** start, T&&... args)
384 : EpsCopyInputStream(aliasing), depth_(depth) {
385 *start = InitFrom(std::forward<T>(args)...);
386 }
387
TrackCorrectEnding()388 void TrackCorrectEnding() { group_depth_ = 0; }
389
Done(const char ** ptr)390 bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
391
depth()392 int depth() const { return depth_; }
393
data()394 Data& data() { return data_; }
data()395 const Data& data() const { return data_; }
396
397 template <typename T>
398 PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr);
399 // We outline when the type is generic and we go through a virtual
400 const char* ParseMessage(MessageLite* msg, const char* ptr);
401 const char* ParseMessage(Message* msg, const char* ptr);
402
403 template <typename T>
ParseGroup(T * msg,const char * ptr,uint32 tag)404 PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseGroup(
405 T* msg, const char* ptr, uint32 tag) {
406 if (--depth_ < 0) return nullptr;
407 group_depth_++;
408 ptr = msg->_InternalParse(ptr, this);
409 group_depth_--;
410 depth_++;
411 if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
412 return ptr;
413 }
414
415 private:
416 // The context keeps an internal stack to keep track of the recursive
417 // part of the parse state.
418 // Current depth of the active parser, depth counts down.
419 // This is used to limit recursion depth (to prevent overflow on malicious
420 // data), but is also used to index in stack_ to store the current state.
421 int depth_;
422 // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
423 // in the last kSlopBytes of a ZeroCopyInputStream chunk.
424 int group_depth_ = INT_MIN;
425 Data data_;
426 };
427
428 template <uint32 tag>
ExpectTag(const char * ptr)429 bool ExpectTag(const char* ptr) {
430 if (tag < 128) {
431 return *ptr == tag;
432 } else {
433 static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes");
434 char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)};
435 return std::memcmp(ptr, buf, 2) == 0;
436 }
437 }
438
439 template <int>
440 struct EndianHelper;
441
442 template <>
443 struct EndianHelper<1> {
444 static uint8 Load(const void* p) { return *static_cast<const uint8*>(p); }
445 };
446
447 template <>
448 struct EndianHelper<2> {
449 static uint16 Load(const void* p) {
450 uint16 tmp;
451 std::memcpy(&tmp, p, 2);
452 #ifndef PROTOBUF_LITTLE_ENDIAN
453 tmp = bswap_16(tmp);
454 #endif
455 return tmp;
456 }
457 };
458
459 template <>
460 struct EndianHelper<4> {
461 static uint32 Load(const void* p) {
462 uint32 tmp;
463 std::memcpy(&tmp, p, 4);
464 #ifndef PROTOBUF_LITTLE_ENDIAN
465 tmp = bswap_32(tmp);
466 #endif
467 return tmp;
468 }
469 };
470
471 template <>
472 struct EndianHelper<8> {
473 static uint64 Load(const void* p) {
474 uint64 tmp;
475 std::memcpy(&tmp, p, 8);
476 #ifndef PROTOBUF_LITTLE_ENDIAN
477 tmp = bswap_64(tmp);
478 #endif
479 return tmp;
480 }
481 };
482
483 template <typename T>
484 T UnalignedLoad(const char* p) {
485 auto tmp = EndianHelper<sizeof(T)>::Load(p);
486 T res;
487 memcpy(&res, &tmp, sizeof(T));
488 return res;
489 }
490
491 PROTOBUF_EXPORT
492 std::pair<const char*, uint32> VarintParseSlow32(const char* p, uint32 res);
493 PROTOBUF_EXPORT
494 std::pair<const char*, uint64> VarintParseSlow64(const char* p, uint32 res);
495
496 inline const char* VarintParseSlow(const char* p, uint32 res, uint32* out) {
497 auto tmp = VarintParseSlow32(p, res);
498 *out = tmp.second;
499 return tmp.first;
500 }
501
502 inline const char* VarintParseSlow(const char* p, uint32 res, uint64* out) {
503 auto tmp = VarintParseSlow64(p, res);
504 *out = tmp.second;
505 return tmp.first;
506 }
507
508 template <typename T>
509 PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
510 auto ptr = reinterpret_cast<const uint8*>(p);
511 uint32 res = ptr[0];
512 if (!(res & 0x80)) {
513 *out = res;
514 return p + 1;
515 }
516 uint32 byte = ptr[1];
517 res += (byte - 1) << 7;
518 if (!(byte & 0x80)) {
519 *out = res;
520 return p + 2;
521 }
522 return VarintParseSlow(p, res, out);
523 }
524
525 // Used for tags, could read up to 5 bytes which must be available.
526 // Caller must ensure its safe to call.
527
528 PROTOBUF_EXPORT
529 std::pair<const char*, uint32> ReadTagFallback(const char* p, uint32 res);
530
531 // Same as ParseVarint but only accept 5 bytes at most.
532 inline const char* ReadTag(const char* p, uint32* out, uint32 /*max_tag*/ = 0) {
533 uint32 res = static_cast<uint8>(p[0]);
534 if (res < 128) {
535 *out = res;
536 return p + 1;
537 }
538 uint32 second = static_cast<uint8>(p[1]);
539 res += (second - 1) << 7;
540 if (second < 128) {
541 *out = res;
542 return p + 2;
543 }
544 auto tmp = ReadTagFallback(p, res);
545 *out = tmp.second;
546 return tmp.first;
547 }
548
549 // Decode 2 consecutive bytes of a varint and returns the value, shifted left
550 // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
551 // first byte's continuation bit is set.
552 // If bit 15 of return value is set (equivalent to the continuation bits of both
553 // bytes being set) the varint continues, otherwise the parse is done. On x86
554 // movsx eax, dil
555 // add edi, eax
556 // adc [rsi], 1
557 // add eax, eax
558 // and eax, edi
559 inline uint32 DecodeTwoBytes(const char** ptr) {
560 uint32 value = UnalignedLoad<uint16>(*ptr);
561 // Sign extend the low byte continuation bit
562 uint32_t x = static_cast<int8_t>(value);
563 // This add is an amazing operation, it cancels the low byte continuation bit
564 // from y transferring it to the carry. Simultaneously it also shifts the 7
565 // LSB left by one tightly against high byte varint bits. Hence value now
566 // contains the unpacked value shifted left by 1.
567 value += x;
568 // Use the carry to update the ptr appropriately.
569 *ptr += value < x ? 2 : 1;
570 return value & (x + x); // Mask out the high byte iff no continuation
571 }
572
573 // More efficient varint parsing for big varints
574 inline const char* ParseBigVarint(const char* p, uint64* out) {
575 auto pnew = p;
576 auto tmp = DecodeTwoBytes(&pnew);
577 uint64 res = tmp >> 1;
578 if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
579 *out = res;
580 return pnew;
581 }
582 for (std::uint32_t i = 1; i < 5; i++) {
583 pnew = p + 2 * i;
584 tmp = DecodeTwoBytes(&pnew);
585 res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
586 if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
587 *out = res;
588 return pnew;
589 }
590 }
591 return nullptr;
592 }
593
594 PROTOBUF_EXPORT
595 std::pair<const char*, int32> ReadSizeFallback(const char* p, uint32 first);
596 // Used for tags, could read up to 5 bytes which must be available. Additionally
597 // it makes sure the unsigned value fits a int32, otherwise returns nullptr.
598 // Caller must ensure its safe to call.
599 inline uint32 ReadSize(const char** pp) {
600 auto p = *pp;
601 uint32 res = static_cast<uint8>(p[0]);
602 if (res < 128) {
603 *pp = p + 1;
604 return res;
605 }
606 auto x = ReadSizeFallback(p, res);
607 *pp = x.first;
608 return x.second;
609 }
610
611 // Some convenience functions to simplify the generated parse loop code.
612 // Returning the value and updating the buffer pointer allows for nicer
613 // function composition. We rely on the compiler to inline this.
614 // Also in debug compiles having local scoped variables tend to generated
615 // stack frames that scale as O(num fields).
616 inline uint64 ReadVarint64(const char** p) {
617 uint64 tmp;
618 *p = VarintParse(*p, &tmp);
619 return tmp;
620 }
621
622 inline uint32 ReadVarint32(const char** p) {
623 uint32 tmp;
624 *p = VarintParse(*p, &tmp);
625 return tmp;
626 }
627
628 inline int64 ReadVarintZigZag64(const char** p) {
629 uint64 tmp;
630 *p = VarintParse(*p, &tmp);
631 return WireFormatLite::ZigZagDecode64(tmp);
632 }
633
634 inline int32 ReadVarintZigZag32(const char** p) {
635 uint64 tmp;
636 *p = VarintParse(*p, &tmp);
637 return WireFormatLite::ZigZagDecode32(static_cast<uint32>(tmp));
638 }
639
640 template <typename T>
641 PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
642 T* msg, const char* ptr) {
643 int size = ReadSize(&ptr);
644 if (!ptr) return nullptr;
645 auto old = PushLimit(ptr, size);
646 if (--depth_ < 0) return nullptr;
647 ptr = msg->_InternalParse(ptr, this);
648 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
649 depth_++;
650 if (!PopLimit(old)) return nullptr;
651 return ptr;
652 }
653
654 template <typename Add>
655 const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
656 while (ptr < end) {
657 uint64 varint;
658 ptr = VarintParse(ptr, &varint);
659 if (ptr == nullptr) return nullptr;
660 add(varint);
661 }
662 return ptr;
663 }
664
665 template <typename Add>
666 const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
667 int size = ReadSize(&ptr);
668 if (ptr == nullptr) return nullptr;
669 int chunk_size = buffer_end_ - ptr;
670 while (size > chunk_size) {
671 ptr = ReadPackedVarintArray(ptr, buffer_end_, add);
672 if (ptr == nullptr) return nullptr;
673 int overrun = ptr - buffer_end_;
674 GOOGLE_DCHECK(overrun >= 0 && overrun <= kSlopBytes);
675 if (size - chunk_size <= kSlopBytes) {
676 // The current buffer contains all the information needed, we don't need
677 // to flip buffers. However we must parse from a buffer with enough space
678 // so we are not prone to a buffer overflow.
679 char buf[kSlopBytes + 10] = {};
680 std::memcpy(buf, buffer_end_, kSlopBytes);
681 GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes);
682 auto end = buf + (size - chunk_size);
683 auto res = ReadPackedVarintArray(buf + overrun, end, add);
684 if (res == nullptr || res != end) return nullptr;
685 return buffer_end_ + (res - buf);
686 }
687 size -= overrun + chunk_size;
688 GOOGLE_DCHECK_GT(size, 0);
689 // We must flip buffers
690 if (limit_ <= kSlopBytes) return nullptr;
691 ptr = Next();
692 if (ptr == nullptr) return nullptr;
693 ptr += overrun;
694 chunk_size = buffer_end_ - ptr;
695 }
696 auto end = ptr + size;
697 ptr = ReadPackedVarintArray(ptr, end, add);
698 return end == ptr ? ptr : nullptr;
699 }
700
701 // Helper for verification of utf8
702 PROTOBUF_EXPORT
703 bool VerifyUTF8(StringPiece s, const char* field_name);
704
705 inline bool VerifyUTF8(const std::string* s, const char* field_name) {
706 return VerifyUTF8(*s, field_name);
707 }
708
709 // All the string parsers with or without UTF checking and for all CTypes.
710 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
711 std::string* s, const char* ptr, ParseContext* ctx);
712
713
714 // Add any of the following lines to debug which parse function is failing.
715
716 #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
717 if (!(predicate)) { \
718 /* ::raise(SIGINT); */ \
719 /* GOOGLE_LOG(ERROR) << "Parse failure"; */ \
720 return ret; \
721 }
722
723 #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
724 GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
725
726 template <typename T>
727 PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser,
728 const char* ptr,
729 ParseContext* ctx) {
730 uint32 number = tag >> 3;
731 GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
732 using WireType = internal::WireFormatLite::WireType;
733 switch (tag & 7) {
734 case WireType::WIRETYPE_VARINT: {
735 uint64 value;
736 ptr = VarintParse(ptr, &value);
737 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
738 field_parser.AddVarint(number, value);
739 break;
740 }
741 case WireType::WIRETYPE_FIXED64: {
742 uint64 value = UnalignedLoad<uint64>(ptr);
743 ptr += 8;
744 field_parser.AddFixed64(number, value);
745 break;
746 }
747 case WireType::WIRETYPE_LENGTH_DELIMITED: {
748 ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
749 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
750 break;
751 }
752 case WireType::WIRETYPE_START_GROUP: {
753 ptr = field_parser.ParseGroup(number, ptr, ctx);
754 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
755 break;
756 }
757 case WireType::WIRETYPE_END_GROUP: {
758 GOOGLE_LOG(FATAL) << "Can't happen";
759 break;
760 }
761 case WireType::WIRETYPE_FIXED32: {
762 uint32 value = UnalignedLoad<uint32>(ptr);
763 ptr += 4;
764 field_parser.AddFixed32(number, value);
765 break;
766 }
767 default:
768 return nullptr;
769 }
770 return ptr;
771 }
772
773 template <typename T>
774 PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
775 const char* ptr,
776 ParseContext* ctx) {
777 while (!ctx->Done(&ptr)) {
778 uint32 tag;
779 ptr = ReadTag(ptr, &tag);
780 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
781 if (tag == 0 || (tag & 7) == 4) {
782 ctx->SetLastTag(tag);
783 return ptr;
784 }
785 ptr = FieldParser(tag, field_parser, ptr, ctx);
786 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
787 }
788 return ptr;
789 }
790
791 // The packed parsers parse repeated numeric primitives directly into the
792 // corresponding field
793
794 // These are packed varints
795 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
796 void* object, const char* ptr, ParseContext* ctx);
797 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
798 void* object, const char* ptr, ParseContext* ctx);
799 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
800 void* object, const char* ptr, ParseContext* ctx);
801 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
802 void* object, const char* ptr, ParseContext* ctx);
803 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
804 void* object, const char* ptr, ParseContext* ctx);
805 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
806 void* object, const char* ptr, ParseContext* ctx);
807 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
808 void* object, const char* ptr, ParseContext* ctx);
809
810 template <typename T>
811 PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
812 void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int),
813 InternalMetadata* metadata, int field_num) {
814 return ctx->ReadPackedVarint(
815 ptr, [object, is_valid, metadata, field_num](uint64 val) {
816 if (is_valid(val)) {
817 static_cast<RepeatedField<int>*>(object)->Add(val);
818 } else {
819 WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
820 }
821 });
822 }
823
824 template <typename T>
825 PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg(
826 void* object, const char* ptr, ParseContext* ctx,
827 bool (*is_valid)(const void*, int), const void* data,
828 InternalMetadata* metadata, int field_num) {
829 return ctx->ReadPackedVarint(
830 ptr, [object, is_valid, data, metadata, field_num](uint64 val) {
831 if (is_valid(data, val)) {
832 static_cast<RepeatedField<int>*>(object)->Add(val);
833 } else {
834 WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
835 }
836 });
837 }
838
839 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
840 void* object, const char* ptr, ParseContext* ctx);
841 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
842 void* object, const char* ptr, ParseContext* ctx);
843 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
844 void* object, const char* ptr, ParseContext* ctx);
845 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
846 void* object, const char* ptr, ParseContext* ctx);
847 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
848 void* object, const char* ptr, ParseContext* ctx);
849 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
850 void* object, const char* ptr, ParseContext* ctx);
851 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
852 void* object, const char* ptr, ParseContext* ctx);
853
854 // This is the only recursive parser.
855 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
856 std::string* unknown, const char* ptr, ParseContext* ctx);
857 // This is a helper to for the UnknownGroupLiteParse but is actually also
858 // useful in the generated code. It uses overload on std::string* vs
859 // UnknownFieldSet* to make the generated code isomorphic between full and lite.
860 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
861 uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx);
862
863 } // namespace internal
864 } // namespace protobuf
865 } // namespace google
866
867 #include <google/protobuf/port_undef.inc>
868
869 #endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
870