• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32 #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33 
34 #include <cstdint>
35 #include <cstring>
36 #include <string>
37 
38 #include <google/protobuf/io/coded_stream.h>
39 #include <google/protobuf/io/zero_copy_stream.h>
40 #include <google/protobuf/arena.h>
41 #include <google/protobuf/arenastring.h>
42 #include <google/protobuf/implicit_weak_message.h>
43 #include <google/protobuf/metadata_lite.h>
44 #include <google/protobuf/port.h>
45 #include <google/protobuf/repeated_field.h>
46 #include <google/protobuf/wire_format_lite.h>
47 #include <google/protobuf/stubs/strutil.h>
48 
49 #include <google/protobuf/port_def.inc>
50 
51 
52 namespace google {
53 namespace protobuf {
54 
55 class UnknownFieldSet;
56 class DescriptorPool;
57 class MessageFactory;
58 
59 namespace internal {
60 
61 // Template code below needs to know about the existence of these functions.
62 PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s);
63 PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val,
64                                           std::string* s);
65 // Inline because it is just forwarding to s->WriteVarint
66 inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
67 inline void WriteLengthDelimited(uint32 num, StringPiece val,
68                                  UnknownFieldSet* s);
69 
70 
71 // The basic abstraction the parser is designed for is a slight modification
72 // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
73 // stream as a series of buffers that concatenate to the full stream.
74 // Pictorially a ZCIS presents a stream in chunks like so
75 // [---------------------------------------------------------------]
76 // [---------------------] chunk 1
77 //                      [----------------------------] chunk 2
78 //                                          chunk 3 [--------------]
79 //
80 // Where the '-' represent the bytes which are vertically lined up with the
81 // bytes of the stream. The proto parser requires its input to be presented
82 // similarly with the extra
83 // property that each chunk has kSlopBytes past its end that overlaps with the
84 // first kSlopBytes of the next chunk, or if there is no next chunk at least its
85 // still valid to read those bytes. Again, pictorially, we now have
86 //
87 // [---------------------------------------------------------------]
88 // [-------------------....] chunk 1
89 //                    [------------------------....] chunk 2
90 //                                    chunk 3 [------------------..**]
91 //                                                      chunk 4 [--****]
92 // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
93 // chunk that match up with the start of the next chunk. Above each chunk has
94 // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
95 // past the stream, indicated by '*' above, their values are unspecified. It is
96 // still legal to read them (ie. should not segfault). Reading past the
97 // end should be detected by the user and indicated as an error.
98 //
99 // The reason for this, admittedly, unconventional invariant is to ruthlessly
100 // optimize the protobuf parser. Having an overlap helps in two important ways.
101 // Firstly it alleviates having to performing bounds checks if a piece of code
102 // is guaranteed to not read more than kSlopBytes. Secondly, and more
103 // importantly, the protobuf wireformat is such that reading a key/value pair is
104 // always less than 16 bytes. This removes the need to change to next buffer in
105 // the middle of reading primitive values. Hence there is no need to store and
106 // load the current position.
107 
108 class PROTOBUF_EXPORT EpsCopyInputStream {
109  public:
110   enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
111 
EpsCopyInputStream(bool enable_aliasing)112   explicit EpsCopyInputStream(bool enable_aliasing)
113       : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
114 
BackUp(const char * ptr)115   void BackUp(const char* ptr) {
116     GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
117     int count;
118     if (next_chunk_ == buffer_) {
119       count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
120     } else {
121       count = size_ + static_cast<int>(buffer_end_ - ptr);
122     }
123     if (count > 0) StreamBackUp(count);
124   }
125 
126   // If return value is negative it's an error
PushLimit(const char * ptr,int limit)127   PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
128     GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
129     // This add is safe due to the invariant above, because
130     // ptr - buffer_end_ <= kSlopBytes.
131     limit += static_cast<int>(ptr - buffer_end_);
132     limit_end_ = buffer_end_ + (std::min)(0, limit);
133     auto old_limit = limit_;
134     limit_ = limit;
135     return old_limit - limit;
136   }
137 
PopLimit(int delta)138   PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
139     if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
140     limit_ = limit_ + delta;
141     // TODO(gerbens) We could remove this line and hoist the code to
142     // DoneFallback. Study the perf/bin-size effects.
143     limit_end_ = buffer_end_ + (std::min)(0, limit_);
144     return true;
145   }
146 
Skip(const char * ptr,int size)147   PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
148     if (size <= buffer_end_ + kSlopBytes - ptr) {
149       return ptr + size;
150     }
151     return SkipFallback(ptr, size);
152   }
ReadString(const char * ptr,int size,std::string * s)153   PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
154                                                   std::string* s) {
155     if (size <= buffer_end_ + kSlopBytes - ptr) {
156       s->assign(ptr, size);
157       return ptr + size;
158     }
159     return ReadStringFallback(ptr, size, s);
160   }
AppendString(const char * ptr,int size,std::string * s)161   PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
162                                                     std::string* s) {
163     if (size <= buffer_end_ + kSlopBytes - ptr) {
164       s->append(ptr, size);
165       return ptr + size;
166     }
167     return AppendStringFallback(ptr, size, s);
168   }
169 
170   template <typename Tag, typename T>
171   PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
172                                                          Tag expected_tag,
173                                                          RepeatedField<T>* out);
174 
175   template <typename T>
176   PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
177                                                        int size,
178                                                        RepeatedField<T>* out);
179   template <typename Add>
180   PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
181                                                         Add add);
182 
LastTag()183   uint32 LastTag() const { return last_tag_minus_1_ + 1; }
ConsumeEndGroup(uint32 start_tag)184   bool ConsumeEndGroup(uint32 start_tag) {
185     bool res = last_tag_minus_1_ == start_tag;
186     last_tag_minus_1_ = 0;
187     return res;
188   }
EndedAtLimit()189   bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
EndedAtEndOfStream()190   bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
SetLastTag(uint32 tag)191   void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - 1; }
SetEndOfStream()192   void SetEndOfStream() { last_tag_minus_1_ = 1; }
IsExceedingLimit(const char * ptr)193   bool IsExceedingLimit(const char* ptr) {
194     return ptr > limit_end_ &&
195            (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
196   }
BytesUntilLimit(const char * ptr)197   int BytesUntilLimit(const char* ptr) const {
198     return limit_ + static_cast<int>(buffer_end_ - ptr);
199   }
200   // Returns true if more data is available, if false is returned one has to
201   // call Done for further checks.
DataAvailable(const char * ptr)202   bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
203 
204  protected:
205   // Returns true is limit (either an explicit limit or end of stream) is
206   // reached. It aligns *ptr across buffer seams.
207   // If limit is exceeded it returns true and ptr is set to null.
DoneWithCheck(const char ** ptr,int d)208   bool DoneWithCheck(const char** ptr, int d) {
209     GOOGLE_DCHECK(*ptr);
210     if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
211     // No need to fetch buffer if we ended on a limit in the slop region
212     if ((*ptr - buffer_end_) == limit_) return true;
213     auto res = DoneFallback(*ptr, d);
214     *ptr = res.first;
215     return res.second;
216   }
217 
InitFrom(StringPiece flat)218   const char* InitFrom(StringPiece flat) {
219     overall_limit_ = 0;
220     if (flat.size() > kSlopBytes) {
221       limit_ = kSlopBytes;
222       limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
223       next_chunk_ = buffer_;
224       if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
225       return flat.data();
226     } else {
227       std::memcpy(buffer_, flat.data(), flat.size());
228       limit_ = 0;
229       limit_end_ = buffer_end_ = buffer_ + flat.size();
230       next_chunk_ = nullptr;
231       if (aliasing_ == kOnPatch) {
232         aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
233                     reinterpret_cast<std::uintptr_t>(buffer_);
234       }
235       return buffer_;
236     }
237   }
238 
239   const char* InitFrom(io::ZeroCopyInputStream* zcis);
240 
InitFrom(io::ZeroCopyInputStream * zcis,int limit)241   const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
242     if (limit == -1) return InitFrom(zcis);
243     overall_limit_ = limit;
244     auto res = InitFrom(zcis);
245     limit_ = limit - static_cast<int>(buffer_end_ - res);
246     limit_end_ = buffer_end_ + (std::min)(0, limit_);
247     return res;
248   }
249 
250  private:
251   const char* limit_end_;  // buffer_end_ + min(limit_, 0)
252   const char* buffer_end_;
253   const char* next_chunk_;
254   int size_;
255   int limit_;  // relative to buffer_end_;
256   io::ZeroCopyInputStream* zcis_ = nullptr;
257   char buffer_[2 * kSlopBytes] = {};
258   enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
259   std::uintptr_t aliasing_ = kNoAliasing;
260   // This variable is used to communicate how the parse ended, in order to
261   // completely verify the parsed data. A wire-format parse can end because of
262   // one of the following conditions:
263   // 1) A parse can end on a pushed limit.
264   // 2) A parse can end on End Of Stream (EOS).
265   // 3) A parse can end on 0 tag (only valid for toplevel message).
266   // 4) A parse can end on an end-group tag.
267   // This variable should always be set to 0, which indicates case 1. If the
268   // parse terminated due to EOS (case 2), it's set to 1. In case the parse
269   // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
270   // This var doesn't really belong in EpsCopyInputStream and should be part of
271   // the ParseContext, but case 2 is most easily and optimally implemented in
272   // DoneFallback.
273   uint32 last_tag_minus_1_ = 0;
274   int overall_limit_ = INT_MAX;  // Overall limit independent of pushed limits.
275   // Pretty random large number that seems like a safe allocation on most
276   // systems. TODO(gerbens) do we need to set this as build flag?
277   enum { kSafeStringSize = 50000000 };
278 
279   std::pair<const char*, bool> DoneFallback(const char* ptr, int d);
280   const char* Next(int overrun, int d);
281   const char* SkipFallback(const char* ptr, int size);
282   const char* AppendStringFallback(const char* ptr, int size, std::string* str);
283   const char* ReadStringFallback(const char* ptr, int size, std::string* str);
StreamNext(const void ** data)284   bool StreamNext(const void** data) {
285     bool res = zcis_->Next(data, &size_);
286     if (res) overall_limit_ -= size_;
287     return res;
288   }
StreamBackUp(int count)289   void StreamBackUp(int count) {
290     zcis_->BackUp(count);
291     overall_limit_ += count;
292   }
293 
294   template <typename A>
AppendSize(const char * ptr,int size,const A & append)295   const char* AppendSize(const char* ptr, int size, const A& append) {
296     int chunk_size = buffer_end_ + kSlopBytes - ptr;
297     do {
298       GOOGLE_DCHECK(size > chunk_size);
299       append(ptr, chunk_size);
300       ptr += chunk_size;
301       size -= chunk_size;
302       // DoneFallBack asserts it isn't called when exactly on the limit. If this
303       // happens we fail the parse, as we are at the limit and still more bytes
304       // to read.
305       if (limit_ == kSlopBytes) return nullptr;
306       auto res = DoneFallback(ptr, -1);
307       if (res.second) return nullptr;  // If done we passed the limit
308       ptr = res.first;
309       chunk_size = buffer_end_ + kSlopBytes - ptr;
310     } while (size > chunk_size);
311     append(ptr, size);
312     return ptr + size;
313   }
314 
315   // AppendUntilEnd appends data until a limit (either a PushLimit or end of
316   // stream. Normal payloads are from length delimited fields which have an
317   // explicit size. Reading until limit only comes when the string takes
318   // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
319   // implicit weak messages. We keep these methods private and friend them.
320   template <typename A>
AppendUntilEnd(const char * ptr,const A & append)321   const char* AppendUntilEnd(const char* ptr, const A& append) {
322     while (!DoneWithCheck(&ptr, -1)) {
323       append(ptr, limit_end_ - ptr);
324       ptr = limit_end_;
325     }
326     return ptr;
327   }
328 
AppendString(const char * ptr,std::string * str)329   PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
330                                                     std::string* str) {
331     return AppendUntilEnd(
332         ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
333   }
334   friend class ImplicitWeakMessage;
335 };
336 
337 // ParseContext holds all data that is global to the entire parse. Most
338 // importantly it contains the input stream, but also recursion depth and also
339 // stores the end group tag, in case a parser ended on a endgroup, to verify
340 // matching start/end group tags.
341 class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
342  public:
343   struct Data {
344     const DescriptorPool* pool = nullptr;
345     MessageFactory* factory = nullptr;
346   };
347 
348   template <typename... T>
ParseContext(int depth,bool aliasing,const char ** start,T &&...args)349   ParseContext(int depth, bool aliasing, const char** start, T&&... args)
350       : EpsCopyInputStream(aliasing), depth_(depth) {
351     *start = InitFrom(std::forward<T>(args)...);
352   }
353 
TrackCorrectEnding()354   void TrackCorrectEnding() { group_depth_ = 0; }
355 
Done(const char ** ptr)356   bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
DoneNoSlopCheck(const char ** ptr)357   bool DoneNoSlopCheck(const char** ptr) { return DoneWithCheck(ptr, -1); }
358 
depth()359   int depth() const { return depth_; }
360 
data()361   Data& data() { return data_; }
data()362   const Data& data() const { return data_; }
363 
364   template <typename T>
365   PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr);
366   // We outline when the type is generic and we go through a virtual
367   const char* ParseMessage(MessageLite* msg, const char* ptr);
368   const char* ParseMessage(Message* msg, const char* ptr);
369 
370   template <typename T>
ParseGroup(T * msg,const char * ptr,uint32 tag)371   PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseGroup(
372       T* msg, const char* ptr, uint32 tag) {
373     if (--depth_ < 0) return nullptr;
374     group_depth_++;
375     ptr = msg->_InternalParse(ptr, this);
376     group_depth_--;
377     depth_++;
378     if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
379     return ptr;
380   }
381 
382  private:
383   // The context keeps an internal stack to keep track of the recursive
384   // part of the parse state.
385   // Current depth of the active parser, depth counts down.
386   // This is used to limit recursion depth (to prevent overflow on malicious
387   // data), but is also used to index in stack_ to store the current state.
388   int depth_;
389   // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
390   // in the last kSlopBytes of a ZeroCopyInputStream chunk.
391   int group_depth_ = INT_MIN;
392   Data data_;
393 };
394 
395 template <uint32 tag>
ExpectTag(const char * ptr)396 bool ExpectTag(const char* ptr) {
397   if (tag < 128) {
398     return *ptr == tag;
399   } else {
400     static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes");
401     char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)};
402     return std::memcmp(ptr, buf, 2) == 0;
403   }
404 }
405 
406 template <int>
407 struct EndianHelper;
408 
409 template <>
410 struct EndianHelper<1> {
411   static uint8 Load(const void* p) { return *static_cast<const uint8*>(p); }
412 };
413 
414 template <>
415 struct EndianHelper<2> {
416   static uint16 Load(const void* p) {
417     uint16 tmp;
418     std::memcpy(&tmp, p, 2);
419 #ifndef PROTOBUF_LITTLE_ENDIAN
420     tmp = bswap_16(tmp);
421 #endif
422     return tmp;
423   }
424 };
425 
426 template <>
427 struct EndianHelper<4> {
428   static uint32 Load(const void* p) {
429     uint32 tmp;
430     std::memcpy(&tmp, p, 4);
431 #ifndef PROTOBUF_LITTLE_ENDIAN
432     tmp = bswap_32(tmp);
433 #endif
434     return tmp;
435   }
436 };
437 
438 template <>
439 struct EndianHelper<8> {
440   static uint64 Load(const void* p) {
441     uint64 tmp;
442     std::memcpy(&tmp, p, 8);
443 #ifndef PROTOBUF_LITTLE_ENDIAN
444     tmp = bswap_64(tmp);
445 #endif
446     return tmp;
447   }
448 };
449 
450 template <typename T>
451 T UnalignedLoad(const char* p) {
452   auto tmp = EndianHelper<sizeof(T)>::Load(p);
453   T res;
454   memcpy(&res, &tmp, sizeof(T));
455   return res;
456 }
457 
458 PROTOBUF_EXPORT
459 std::pair<const char*, uint32> VarintParseSlow32(const char* p, uint32 res);
460 PROTOBUF_EXPORT
461 std::pair<const char*, uint64> VarintParseSlow64(const char* p, uint32 res);
462 
463 inline const char* VarintParseSlow(const char* p, uint32 res, uint32* out) {
464   auto tmp = VarintParseSlow32(p, res);
465   *out = tmp.second;
466   return tmp.first;
467 }
468 
469 inline const char* VarintParseSlow(const char* p, uint32 res, uint64* out) {
470   auto tmp = VarintParseSlow64(p, res);
471   *out = tmp.second;
472   return tmp.first;
473 }
474 
475 template <typename T>
476 PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
477   auto ptr = reinterpret_cast<const uint8*>(p);
478   uint32 res = ptr[0];
479   if (!(res & 0x80)) {
480     *out = res;
481     return p + 1;
482   }
483   uint32 byte = ptr[1];
484   res += (byte - 1) << 7;
485   if (!(byte & 0x80)) {
486     *out = res;
487     return p + 2;
488   }
489   return VarintParseSlow(p, res, out);
490 }
491 
492 // Used for tags, could read up to 5 bytes which must be available.
493 // Caller must ensure its safe to call.
494 
495 PROTOBUF_EXPORT
496 std::pair<const char*, uint32> ReadTagFallback(const char* p, uint32 res);
497 
498 // Same as ParseVarint but only accept 5 bytes at most.
499 inline const char* ReadTag(const char* p, uint32* out, uint32 /*max_tag*/ = 0) {
500   uint32 res = static_cast<uint8>(p[0]);
501   if (res < 128) {
502     *out = res;
503     return p + 1;
504   }
505   uint32 second = static_cast<uint8>(p[1]);
506   res += (second - 1) << 7;
507   if (second < 128) {
508     *out = res;
509     return p + 2;
510   }
511   auto tmp = ReadTagFallback(p, res);
512   *out = tmp.second;
513   return tmp.first;
514 }
515 
516 // Decode 2 consecutive bytes of a varint and returns the value, shifted left
517 // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
518 // first byte's continuation bit is set.
519 // If bit 15 of return value is set (equivalent to the continuation bits of both
520 // bytes being set) the varint continues, otherwise the parse is done. On x86
521 // movsx eax, dil
522 // add edi, eax
523 // adc [rsi], 1
524 // add eax, eax
525 // and eax, edi
526 inline uint32 DecodeTwoBytes(const char** ptr) {
527   uint32 value = UnalignedLoad<uint16>(*ptr);
528   // Sign extend the low byte continuation bit
529   uint32_t x = static_cast<int8_t>(value);
530   // This add is an amazing operation, it cancels the low byte continuation bit
531   // from y transferring it to the carry. Simultaneously it also shifts the 7
532   // LSB left by one tightly against high byte varint bits. Hence value now
533   // contains the unpacked value shifted left by 1.
534   value += x;
535   // Use the carry to update the ptr appropriately.
536   *ptr += value < x ? 2 : 1;
537   return value & (x + x);  // Mask out the high byte iff no continuation
538 }
539 
540 // More efficient varint parsing for big varints
541 inline const char* ParseBigVarint(const char* p, uint64* out) {
542   auto pnew = p;
543   auto tmp = DecodeTwoBytes(&pnew);
544   uint64 res = tmp >> 1;
545   if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
546     *out = res;
547     return pnew;
548   }
549   for (std::uint32_t i = 1; i < 5; i++) {
550     pnew = p + 2 * i;
551     tmp = DecodeTwoBytes(&pnew);
552     res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
553     if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
554       *out = res;
555       return pnew;
556     }
557   }
558   return nullptr;
559 }
560 
561 PROTOBUF_EXPORT
562 std::pair<const char*, int32> ReadSizeFallback(const char* p, uint32 first);
563 // Used for tags, could read up to 5 bytes which must be available. Additionally
564 // it makes sure the unsigned value fits a int32, otherwise returns nullptr.
565 // Caller must ensure its safe to call.
566 inline uint32 ReadSize(const char** pp) {
567   auto p = *pp;
568   uint32 res = static_cast<uint8>(p[0]);
569   if (res < 128) {
570     *pp = p + 1;
571     return res;
572   }
573   auto x = ReadSizeFallback(p, res);
574   *pp = x.first;
575   return x.second;
576 }
577 
578 // Some convenience functions to simplify the generated parse loop code.
579 // Returning the value and updating the buffer pointer allows for nicer
580 // function composition. We rely on the compiler to inline this.
581 // Also in debug compiles having local scoped variables tend to generated
582 // stack frames that scale as O(num fields).
583 inline uint64 ReadVarint64(const char** p) {
584   uint64 tmp;
585   *p = VarintParse(*p, &tmp);
586   return tmp;
587 }
588 
589 inline uint32 ReadVarint32(const char** p) {
590   uint32 tmp;
591   *p = VarintParse(*p, &tmp);
592   return tmp;
593 }
594 
595 inline int64 ReadVarintZigZag64(const char** p) {
596   uint64 tmp;
597   *p = VarintParse(*p, &tmp);
598   return WireFormatLite::ZigZagDecode64(tmp);
599 }
600 
601 inline int32 ReadVarintZigZag32(const char** p) {
602   uint64 tmp;
603   *p = VarintParse(*p, &tmp);
604   return WireFormatLite::ZigZagDecode32(static_cast<uint32>(tmp));
605 }
606 
607 template <typename T>
608 PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
609     T* msg, const char* ptr) {
610   int size = ReadSize(&ptr);
611   if (!ptr) return nullptr;
612   auto old = PushLimit(ptr, size);
613   if (--depth_ < 0) return nullptr;
614   ptr = msg->_InternalParse(ptr, this);
615   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
616   depth_++;
617   if (!PopLimit(old)) return nullptr;
618   return ptr;
619 }
620 
621 template <typename Add>
622 const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
623   int size = ReadSize(&ptr);
624   if (ptr == nullptr) return nullptr;
625   auto old = PushLimit(ptr, size);
626   if (old < 0) return nullptr;
627   while (!DoneWithCheck(&ptr, -1)) {
628     uint64 varint;
629     ptr = VarintParse(ptr, &varint);
630     if (!ptr) return nullptr;
631     add(varint);
632   }
633   if (!PopLimit(old)) return nullptr;
634   return ptr;
635 }
636 
637 // Helper for verification of utf8
638 PROTOBUF_EXPORT
639 bool VerifyUTF8(StringPiece s, const char* field_name);
640 
641 inline bool VerifyUTF8(const std::string* s, const char* field_name) {
642   return VerifyUTF8(*s, field_name);
643 }
644 
645 // All the string parsers with or without UTF checking and for all CTypes.
646 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
647     std::string* s, const char* ptr, ParseContext* ctx);
648 
649 
650 // Add any of the following lines to debug which parse function is failing.
651 
652 #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
653   if (!(predicate)) {                                  \
654     /*  ::raise(SIGINT);  */                           \
655     /*  GOOGLE_LOG(ERROR) << "Parse failure";  */             \
656     return ret;                                        \
657   }
658 
659 #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
660   GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
661 
662 template <typename T>
663 PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser,
664                                                  const char* ptr,
665                                                  ParseContext* ctx) {
666   uint32 number = tag >> 3;
667   GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
668   using WireType = internal::WireFormatLite::WireType;
669   switch (tag & 7) {
670     case WireType::WIRETYPE_VARINT: {
671       uint64 value;
672       ptr = VarintParse(ptr, &value);
673       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
674       field_parser.AddVarint(number, value);
675       break;
676     }
677     case WireType::WIRETYPE_FIXED64: {
678       uint64 value = UnalignedLoad<uint64>(ptr);
679       ptr += 8;
680       field_parser.AddFixed64(number, value);
681       break;
682     }
683     case WireType::WIRETYPE_LENGTH_DELIMITED: {
684       ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
685       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
686       break;
687     }
688     case WireType::WIRETYPE_START_GROUP: {
689       ptr = field_parser.ParseGroup(number, ptr, ctx);
690       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
691       break;
692     }
693     case WireType::WIRETYPE_END_GROUP: {
694       GOOGLE_LOG(FATAL) << "Can't happen";
695       break;
696     }
697     case WireType::WIRETYPE_FIXED32: {
698       uint32 value = UnalignedLoad<uint32>(ptr);
699       ptr += 4;
700       field_parser.AddFixed32(number, value);
701       break;
702     }
703     default:
704       return nullptr;
705   }
706   return ptr;
707 }
708 
709 template <typename T>
710 PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
711                                                       const char* ptr,
712                                                       ParseContext* ctx) {
713   while (!ctx->Done(&ptr)) {
714     uint32 tag;
715     ptr = ReadTag(ptr, &tag);
716     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
717     if (tag == 0 || (tag & 7) == 4) {
718       ctx->SetLastTag(tag);
719       return ptr;
720     }
721     ptr = FieldParser(tag, field_parser, ptr, ctx);
722     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
723   }
724   return ptr;
725 }
726 
727 // The packed parsers parse repeated numeric primitives directly into  the
728 // corresponding field
729 
730 // These are packed varints
731 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
732     void* object, const char* ptr, ParseContext* ctx);
733 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
734     void* object, const char* ptr, ParseContext* ctx);
735 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
736     void* object, const char* ptr, ParseContext* ctx);
737 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
738     void* object, const char* ptr, ParseContext* ctx);
739 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
740     void* object, const char* ptr, ParseContext* ctx);
741 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
742     void* object, const char* ptr, ParseContext* ctx);
743 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
744     void* object, const char* ptr, ParseContext* ctx);
745 
746 template <typename T>
747 PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
748     void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int),
749     InternalMetadata* metadata, int field_num) {
750   return ctx->ReadPackedVarint(
751       ptr, [object, is_valid, metadata, field_num](uint64 val) {
752         if (is_valid(val)) {
753           static_cast<RepeatedField<int>*>(object)->Add(val);
754         } else {
755           WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
756         }
757       });
758 }
759 
760 template <typename T>
761 PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg(
762     void* object, const char* ptr, ParseContext* ctx,
763     bool (*is_valid)(const void*, int), const void* data,
764     InternalMetadata* metadata, int field_num) {
765   return ctx->ReadPackedVarint(
766       ptr, [object, is_valid, data, metadata, field_num](uint64 val) {
767         if (is_valid(data, val)) {
768           static_cast<RepeatedField<int>*>(object)->Add(val);
769         } else {
770           WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
771         }
772       });
773 }
774 
775 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
776     void* object, const char* ptr, ParseContext* ctx);
777 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
778     void* object, const char* ptr, ParseContext* ctx);
779 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
780     void* object, const char* ptr, ParseContext* ctx);
781 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
782     void* object, const char* ptr, ParseContext* ctx);
783 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
784     void* object, const char* ptr, ParseContext* ctx);
785 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
786     void* object, const char* ptr, ParseContext* ctx);
787 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
788     void* object, const char* ptr, ParseContext* ctx);
789 
790 // This is the only recursive parser.
791 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
792     std::string* unknown, const char* ptr, ParseContext* ctx);
793 // This is a helper to for the UnknownGroupLiteParse but is actually also
794 // useful in the generated code. It uses overload on std::string* vs
795 // UnknownFieldSet* to make the generated code isomorphic between full and lite.
796 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
797     uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx);
798 
799 }  // namespace internal
800 }  // namespace protobuf
801 }  // namespace google
802 
803 #include <google/protobuf/port_undef.inc>
804 
805 #endif  // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
806