• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file contains the CodedInputStream and CodedOutputStream classes,
36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37 // and allow you to read or write individual pieces of data in various
38 // formats.  In particular, these implement the varint encoding for
39 // integers, a simple variable-length encoding in which smaller numbers
40 // take fewer bytes.
41 //
42 // Typically these classes will only be used internally by the protocol
43 // buffer library in order to encode and decode protocol buffers.  Clients
44 // of the library only need to know about this class if they wish to write
45 // custom message parsing or serialization procedures.
46 //
47 // CodedOutputStream example:
48 //   // Write some data to "myfile".  First we write a 4-byte "magic number"
49 //   // to identify the file type, then write a length-delimited string.  The
50 //   // string is composed of a varint giving the length followed by the raw
51 //   // bytes.
52 //   int fd = open("myfile", O_CREAT | O_WRONLY);
53 //   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54 //   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55 //
56 //   int magic_number = 1234;
57 //   char text[] = "Hello world!";
58 //   coded_output->WriteLittleEndian32(magic_number);
59 //   coded_output->WriteVarint32(strlen(text));
60 //   coded_output->WriteRaw(text, strlen(text));
61 //
62 //   delete coded_output;
63 //   delete raw_output;
64 //   close(fd);
65 //
66 // CodedInputStream example:
67 //   // Read a file created by the above code.
68 //   int fd = open("myfile", O_RDONLY);
69 //   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70 //   CodedInputStream* coded_input = new CodedInputStream(raw_input);
71 //
72 //   coded_input->ReadLittleEndian32(&magic_number);
73 //   if (magic_number != 1234) {
74 //     cerr << "File not in expected format." << endl;
75 //     return;
76 //   }
77 //
78 //   uint32 size;
79 //   coded_input->ReadVarint32(&size);
80 //
81 //   char* text = new char[size + 1];
82 //   coded_input->ReadRaw(buffer, size);
83 //   text[size] = '\0';
84 //
85 //   delete coded_input;
86 //   delete raw_input;
87 //   close(fd);
88 //
89 //   cout << "Text is: " << text << endl;
90 //   delete [] text;
91 //
92 // For those who are interested, varint encoding is defined as follows:
93 //
94 // The encoding operates on unsigned integers of up to 64 bits in length.
95 // Each byte of the encoded value has the format:
96 // * bits 0-6: Seven bits of the number being encoded.
97 // * bit 7: Zero if this is the last byte in the encoding (in which
98 //   case all remaining bits of the number are zero) or 1 if
99 //   more bytes follow.
100 // The first byte contains the least-significant 7 bits of the number, the
101 // second byte (if present) contains the next-least-significant 7 bits,
102 // and so on.  So, the binary number 1011000101011 would be encoded in two
103 // bytes as "10101011 00101100".
104 //
105 // In theory, varint could be used to encode integers of any length.
106 // However, for practicality we set a limit at 64 bits.  The maximum encoded
107 // length of a number is thus 10 bytes.
108 
109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111 
112 #include <assert.h>
113 #include <atomic>
114 #include <climits>
115 #include <string>
116 #include <utility>
117 #ifdef _MSC_VER
118 // Assuming windows is always little-endian.
119 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
120 #define PROTOBUF_LITTLE_ENDIAN 1
121 #endif
122 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
123 // If MSVC has "/RTCc" set, it will complain about truncating casts at
124 // runtime.  This file contains some intentional truncating casts.
125 #pragma runtime_checks("c", off)
126 #endif
127 #else
128 #include <sys/param.h>  // __BYTE_ORDER
129 #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) ||    \
130      (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
131     !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
132 #define PROTOBUF_LITTLE_ENDIAN 1
133 #endif
134 #endif
135 #include <google/protobuf/stubs/common.h>
136 #include <google/protobuf/port.h>
137 #include <google/protobuf/stubs/port.h>
138 
139 
140 #include <google/protobuf/port_def.inc>
141 
142 namespace google {
143 namespace protobuf {
144 
145 class DescriptorPool;
146 class MessageFactory;
147 class ZeroCopyCodedInputStream;
148 
149 namespace internal {
150 void MapTestForceDeterministic();
151 class EpsCopyByteStream;
152 }  // namespace internal
153 
154 namespace io {
155 
156 // Defined in this file.
157 class CodedInputStream;
158 class CodedOutputStream;
159 
160 // Defined in other files.
161 class ZeroCopyInputStream;   // zero_copy_stream.h
162 class ZeroCopyOutputStream;  // zero_copy_stream.h
163 
164 // Class which reads and decodes binary data which is composed of varint-
165 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
166 // Most users will not need to deal with CodedInputStream.
167 //
168 // Most methods of CodedInputStream that return a bool return false if an
169 // underlying I/O error occurs or if the data is malformed.  Once such a
170 // failure occurs, the CodedInputStream is broken and is no longer useful.
171 // After a failure, callers also should assume writes to "out" args may have
172 // occurred, though nothing useful can be determined from those writes.
173 class PROTOBUF_EXPORT CodedInputStream {
174  public:
175   // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
176   explicit CodedInputStream(ZeroCopyInputStream* input);
177 
178   // Create a CodedInputStream that reads from the given flat array.  This is
179   // faster than using an ArrayInputStream.  PushLimit(size) is implied by
180   // this constructor.
181   explicit CodedInputStream(const uint8* buffer, int size);
182 
183   // Destroy the CodedInputStream and position the underlying
184   // ZeroCopyInputStream at the first unread byte.  If an error occurred while
185   // reading (causing a method to return false), then the exact position of
186   // the input stream may be anywhere between the last value that was read
187   // successfully and the stream's byte limit.
188   ~CodedInputStream();
189 
190   // Return true if this CodedInputStream reads from a flat array instead of
191   // a ZeroCopyInputStream.
192   inline bool IsFlat() const;
193 
194   // Skips a number of bytes.  Returns false if an underlying read error
195   // occurs.
196   inline bool Skip(int count);
197 
198   // Sets *data to point directly at the unread part of the CodedInputStream's
199   // underlying buffer, and *size to the size of that buffer, but does not
200   // advance the stream's current position.  This will always either produce
201   // a non-empty buffer or return false.  If the caller consumes any of
202   // this data, it should then call Skip() to skip over the consumed bytes.
203   // This may be useful for implementing external fast parsing routines for
204   // types of data not covered by the CodedInputStream interface.
205   bool GetDirectBufferPointer(const void** data, int* size);
206 
207   // Like GetDirectBufferPointer, but this method is inlined, and does not
208   // attempt to Refresh() if the buffer is currently empty.
209   PROTOBUF_ALWAYS_INLINE
210   void GetDirectBufferPointerInline(const void** data, int* size);
211 
212   // Read raw bytes, copying them into the given buffer.
213   bool ReadRaw(void* buffer, int size);
214 
215   // Like the above, with inlined optimizations. This should only be used
216   // by the protobuf implementation.
217   PROTOBUF_ALWAYS_INLINE
218   bool InternalReadRawInline(void* buffer, int size);
219 
220   // Like ReadRaw, but reads into a string.
221   bool ReadString(std::string* buffer, int size);
222   // Like the above, with inlined optimizations. This should only be used
223   // by the protobuf implementation.
224   PROTOBUF_ALWAYS_INLINE
225   bool InternalReadStringInline(std::string* buffer, int size);
226 
227 
228   // Read a 32-bit little-endian integer.
229   bool ReadLittleEndian32(uint32* value);
230   // Read a 64-bit little-endian integer.
231   bool ReadLittleEndian64(uint64* value);
232 
233   // These methods read from an externally provided buffer. The caller is
234   // responsible for ensuring that the buffer has sufficient space.
235   // Read a 32-bit little-endian integer.
236   static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
237                                                   uint32* value);
238   // Read a 64-bit little-endian integer.
239   static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
240                                                   uint64* value);
241 
242   // Read an unsigned integer with Varint encoding, truncating to 32 bits.
243   // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
244   // it to uint32, but may be more efficient.
245   bool ReadVarint32(uint32* value);
246   // Read an unsigned integer with Varint encoding.
247   bool ReadVarint64(uint64* value);
248 
249   // Reads a varint off the wire into an "int". This should be used for reading
250   // sizes off the wire (sizes of strings, submessages, bytes fields, etc).
251   //
252   // The value from the wire is interpreted as unsigned.  If its value exceeds
253   // the representable value of an integer on this platform, instead of
254   // truncating we return false. Truncating (as performed by ReadVarint32()
255   // above) is an acceptable approach for fields representing an integer, but
256   // when we are parsing a size from the wire, truncating the value would result
257   // in us misparsing the payload.
258   bool ReadVarintSizeAsInt(int* value);
259 
260   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
261   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, ReadTag
262   // (but not ReadTagNoLastTag) updates the last tag value, which can be checked
263   // with LastTagWas().
264   //
265   // Always inline because this is only called in one place per parse loop
266   // but it is called for every iteration of said loop, so it should be fast.
267   // GCC doesn't want to inline this by default.
ReadTag()268   PROTOBUF_ALWAYS_INLINE uint32 ReadTag() {
269     return last_tag_ = ReadTagNoLastTag();
270   }
271 
272   PROTOBUF_ALWAYS_INLINE uint32 ReadTagNoLastTag();
273 
274   // This usually a faster alternative to ReadTag() when cutoff is a manifest
275   // constant.  It does particularly well for cutoff >= 127.  The first part
276   // of the return value is the tag that was read, though it can also be 0 in
277   // the cases where ReadTag() would return 0.  If the second part is true
278   // then the tag is known to be in [0, cutoff].  If not, the tag either is
279   // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
280   // because that can arise in several ways, and for best performance we want
281   // to avoid an extra "is tag == 0?" check here.)
282   PROTOBUF_ALWAYS_INLINE
ReadTagWithCutoff(uint32 cutoff)283   std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff) {
284     std::pair<uint32, bool> result = ReadTagWithCutoffNoLastTag(cutoff);
285     last_tag_ = result.first;
286     return result;
287   }
288 
289   PROTOBUF_ALWAYS_INLINE
290   std::pair<uint32, bool> ReadTagWithCutoffNoLastTag(uint32 cutoff);
291 
292   // Usually returns true if calling ReadVarint32() now would produce the given
293   // value.  Will always return false if ReadVarint32() would not return the
294   // given value.  If ExpectTag() returns true, it also advances past
295   // the varint.  For best performance, use a compile-time constant as the
296   // parameter.
297   // Always inline because this collapses to a small number of instructions
298   // when given a constant parameter, but GCC doesn't want to inline by default.
299   PROTOBUF_ALWAYS_INLINE bool ExpectTag(uint32 expected);
300 
301   // Like above, except this reads from the specified buffer. The caller is
302   // responsible for ensuring that the buffer is large enough to read a varint
303   // of the expected size. For best performance, use a compile-time constant as
304   // the expected tag parameter.
305   //
306   // Returns a pointer beyond the expected tag if it was found, or NULL if it
307   // was not.
308   PROTOBUF_ALWAYS_INLINE
309   static const uint8* ExpectTagFromArray(const uint8* buffer, uint32 expected);
310 
311   // Usually returns true if no more bytes can be read.  Always returns false
312   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
313   // call to LastTagWas() will act as if ReadTag() had been called and returned
314   // zero, and ConsumedEntireMessage() will return true.
315   bool ExpectAtEnd();
316 
317   // If the last call to ReadTag() or ReadTagWithCutoff() returned the given
318   // value, returns true.  Otherwise, returns false.
319   // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last
320   // returned value.
321   //
322   // This is needed because parsers for some types of embedded messages
323   // (with field type TYPE_GROUP) don't actually know that they've reached the
324   // end of a message until they see an ENDGROUP tag, which was actually part
325   // of the enclosing message.  The enclosing message would like to check that
326   // tag to make sure it had the right number, so it calls LastTagWas() on
327   // return from the embedded parser to check.
328   bool LastTagWas(uint32 expected);
SetLastTag(uint32 tag)329   void SetLastTag(uint32 tag) { last_tag_ = tag; }
330 
331   // When parsing message (but NOT a group), this method must be called
332   // immediately after MergeFromCodedStream() returns (if it returns true)
333   // to further verify that the message ended in a legitimate way.  For
334   // example, this verifies that parsing did not end on an end-group tag.
335   // It also checks for some cases where, due to optimizations,
336   // MergeFromCodedStream() can incorrectly return true.
337   bool ConsumedEntireMessage();
SetConsumed()338   void SetConsumed() { legitimate_message_end_ = true; }
339 
340   // Limits ----------------------------------------------------------
341   // Limits are used when parsing length-delimited embedded messages.
342   // After the message's length is read, PushLimit() is used to prevent
343   // the CodedInputStream from reading beyond that length.  Once the
344   // embedded message has been parsed, PopLimit() is called to undo the
345   // limit.
346 
347   // Opaque type used with PushLimit() and PopLimit().  Do not modify
348   // values of this type yourself.  The only reason that this isn't a
349   // struct with private internals is for efficiency.
350   typedef int Limit;
351 
352   // Places a limit on the number of bytes that the stream may read,
353   // starting from the current position.  Once the stream hits this limit,
354   // it will act like the end of the input has been reached until PopLimit()
355   // is called.
356   //
357   // As the names imply, the stream conceptually has a stack of limits.  The
358   // shortest limit on the stack is always enforced, even if it is not the
359   // top limit.
360   //
361   // The value returned by PushLimit() is opaque to the caller, and must
362   // be passed unchanged to the corresponding call to PopLimit().
363   Limit PushLimit(int byte_limit);
364 
365   // Pops the last limit pushed by PushLimit().  The input must be the value
366   // returned by that call to PushLimit().
367   void PopLimit(Limit limit);
368 
369   // Returns the number of bytes left until the nearest limit on the
370   // stack is hit, or -1 if no limits are in place.
371   int BytesUntilLimit() const;
372 
373   // Returns current position relative to the beginning of the input stream.
374   int CurrentPosition() const;
375 
376   // Total Bytes Limit -----------------------------------------------
377   // To prevent malicious users from sending excessively large messages
378   // and causing memory exhaustion, CodedInputStream imposes a hard limit on
379   // the total number of bytes it will read.
380 
381   // Sets the maximum number of bytes that this CodedInputStream will read
382   // before refusing to continue.  To prevent servers from allocating enormous
383   // amounts of memory to hold parsed messages, the maximum message length
384   // should be limited to the shortest length that will not harm usability.
385   // The default limit is INT_MAX (~2GB) and apps should set shorter limits
386   // if possible. An error will always be printed to stderr if the limit is
387   // reached.
388   //
389   // Note: setting a limit less than the current read position is interpreted
390   // as a limit on the current position.
391   //
392   // This is unrelated to PushLimit()/PopLimit().
393   void SetTotalBytesLimit(int total_bytes_limit);
394 
395   PROTOBUF_DEPRECATED_MSG(
396       "Please use the single parameter version of SetTotalBytesLimit(). The "
397       "second parameter is ignored.")
SetTotalBytesLimit(int total_bytes_limit,int)398   void SetTotalBytesLimit(int total_bytes_limit, int) {
399     SetTotalBytesLimit(total_bytes_limit);
400   }
401 
402   // The Total Bytes Limit minus the Current Position, or -1 if the total bytes
403   // limit is INT_MAX.
404   int BytesUntilTotalBytesLimit() const;
405 
406   // Recursion Limit -------------------------------------------------
407   // To prevent corrupt or malicious messages from causing stack overflows,
408   // we must keep track of the depth of recursion when parsing embedded
409   // messages and groups.  CodedInputStream keeps track of this because it
410   // is the only object that is passed down the stack during parsing.
411 
412   // Sets the maximum recursion depth.  The default is 100.
413   void SetRecursionLimit(int limit);
RecursionBudget()414   int RecursionBudget() { return recursion_budget_; }
415 
GetDefaultRecursionLimit()416   static int GetDefaultRecursionLimit() { return default_recursion_limit_; }
417 
418   // Increments the current recursion depth.  Returns true if the depth is
419   // under the limit, false if it has gone over.
420   bool IncrementRecursionDepth();
421 
422   // Decrements the recursion depth if possible.
423   void DecrementRecursionDepth();
424 
425   // Decrements the recursion depth blindly.  This is faster than
426   // DecrementRecursionDepth().  It should be used only if all previous
427   // increments to recursion depth were successful.
428   void UnsafeDecrementRecursionDepth();
429 
430   // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
431   // Using this can reduce code size and complexity in some cases.  The caller
432   // is expected to check that the second part of the result is non-negative (to
433   // bail out if the depth of recursion is too high) and, if all is well, to
434   // later pass the first part of the result to PopLimit() or similar.
435   std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
436       int byte_limit);
437 
438   // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
439   Limit ReadLengthAndPushLimit();
440 
441   // Helper that is equivalent to: {
442   //  bool result = ConsumedEntireMessage();
443   //  PopLimit(limit);
444   //  UnsafeDecrementRecursionDepth();
445   //  return result; }
446   // Using this can reduce code size and complexity in some cases.
447   // Do not use unless the current recursion depth is greater than zero.
448   bool DecrementRecursionDepthAndPopLimit(Limit limit);
449 
450   // Helper that is equivalent to: {
451   //  bool result = ConsumedEntireMessage();
452   //  PopLimit(limit);
453   //  return result; }
454   // Using this can reduce code size and complexity in some cases.
455   bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
456 
457   // Extension Registry ----------------------------------------------
458   // ADVANCED USAGE:  99.9% of people can ignore this section.
459   //
460   // By default, when parsing extensions, the parser looks for extension
461   // definitions in the pool which owns the outer message's Descriptor.
462   // However, you may call SetExtensionRegistry() to provide an alternative
463   // pool instead.  This makes it possible, for example, to parse a message
464   // using a generated class, but represent some extensions using
465   // DynamicMessage.
466 
467   // Set the pool used to look up extensions.  Most users do not need to call
468   // this as the correct pool will be chosen automatically.
469   //
470   // WARNING:  It is very easy to misuse this.  Carefully read the requirements
471   //   below.  Do not use this unless you are sure you need it.  Almost no one
472   //   does.
473   //
474   // Let's say you are parsing a message into message object m, and you want
475   // to take advantage of SetExtensionRegistry().  You must follow these
476   // requirements:
477   //
478   // The given DescriptorPool must contain m->GetDescriptor().  It is not
479   // sufficient for it to simply contain a descriptor that has the same name
480   // and content -- it must be the *exact object*.  In other words:
481   //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
482   //          m->GetDescriptor());
483   // There are two ways to satisfy this requirement:
484   // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
485   //    because this is the pool that would be used anyway if you didn't call
486   //    SetExtensionRegistry() at all.
487   // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
488   //    "underlay".  Read the documentation for DescriptorPool for more
489   //    information about underlays.
490   //
491   // You must also provide a MessageFactory.  This factory will be used to
492   // construct Message objects representing extensions.  The factory's
493   // GetPrototype() MUST return non-NULL for any Descriptor which can be found
494   // through the provided pool.
495   //
496   // If the provided factory might return instances of protocol-compiler-
497   // generated (i.e. compiled-in) types, or if the outer message object m is
498   // a generated type, then the given factory MUST have this property:  If
499   // GetPrototype() is given a Descriptor which resides in
500   // DescriptorPool::generated_pool(), the factory MUST return the same
501   // prototype which MessageFactory::generated_factory() would return.  That
502   // is, given a descriptor for a generated type, the factory must return an
503   // instance of the generated class (NOT DynamicMessage).  However, when
504   // given a descriptor for a type that is NOT in generated_pool, the factory
505   // is free to return any implementation.
506   //
507   // The reason for this requirement is that generated sub-objects may be
508   // accessed via the standard (non-reflection) extension accessor methods,
509   // and these methods will down-cast the object to the generated class type.
510   // If the object is not actually of that type, the results would be undefined.
511   // On the other hand, if an extension is not compiled in, then there is no
512   // way the code could end up accessing it via the standard accessors -- the
513   // only way to access the extension is via reflection.  When using reflection,
514   // DynamicMessage and generated messages are indistinguishable, so it's fine
515   // if these objects are represented using DynamicMessage.
516   //
517   // Using DynamicMessageFactory on which you have called
518   // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
519   // above requirement.
520   //
521   // If either pool or factory is NULL, both must be NULL.
522   //
523   // Note that this feature is ignored when parsing "lite" messages as they do
524   // not have descriptors.
525   void SetExtensionRegistry(const DescriptorPool* pool,
526                             MessageFactory* factory);
527 
528   // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
529   // has been provided.
530   const DescriptorPool* GetExtensionPool();
531 
532   // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
533   // factory has been provided.
534   MessageFactory* GetExtensionFactory();
535 
536  private:
537   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
538 
539   const uint8* buffer_;
540   const uint8* buffer_end_;  // pointer to the end of the buffer.
541   ZeroCopyInputStream* input_;
542   int total_bytes_read_;  // total bytes read from input_, including
543                           // the current buffer
544 
545   // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
546   // so that we can BackUp() on destruction.
547   int overflow_bytes_;
548 
549   // LastTagWas() stuff.
550   uint32 last_tag_;  // result of last ReadTag() or ReadTagWithCutoff().
551 
552   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
553   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
554   // reach the end of a message and attempt to read another tag.
555   bool legitimate_message_end_;
556 
557   // See EnableAliasing().
558   bool aliasing_enabled_;
559 
560   // Limits
561   Limit current_limit_;  // if position = -1, no limit is applied
562 
563   // For simplicity, if the current buffer crosses a limit (either a normal
564   // limit created by PushLimit() or the total bytes limit), buffer_size_
565   // only tracks the number of bytes before that limit.  This field
566   // contains the number of bytes after it.  Note that this implies that if
567   // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
568   // hit a limit.  However, if both are zero, it doesn't necessarily mean
569   // we aren't at a limit -- the buffer may have ended exactly at the limit.
570   int buffer_size_after_limit_;
571 
572   // Maximum number of bytes to read, period.  This is unrelated to
573   // current_limit_.  Set using SetTotalBytesLimit().
574   int total_bytes_limit_;
575 
576   // Current recursion budget, controlled by IncrementRecursionDepth() and
577   // similar.  Starts at recursion_limit_ and goes down: if this reaches
578   // -1 we are over budget.
579   int recursion_budget_;
580   // Recursion depth limit, set by SetRecursionLimit().
581   int recursion_limit_;
582 
583   // See SetExtensionRegistry().
584   const DescriptorPool* extension_pool_;
585   MessageFactory* extension_factory_;
586 
587   // Private member functions.
588 
589   // Fallback when Skip() goes past the end of the current buffer.
590   bool SkipFallback(int count, int original_buffer_size);
591 
592   // Advance the buffer by a given number of bytes.
593   void Advance(int amount);
594 
595   // Back up input_ to the current buffer position.
596   void BackUpInputToCurrentPosition();
597 
598   // Recomputes the value of buffer_size_after_limit_.  Must be called after
599   // current_limit_ or total_bytes_limit_ changes.
600   void RecomputeBufferLimits();
601 
602   // Writes an error message saying that we hit total_bytes_limit_.
603   void PrintTotalBytesLimitError();
604 
605   // Called when the buffer runs out to request more data.  Implies an
606   // Advance(BufferSize()).
607   bool Refresh();
608 
609   // When parsing varints, we optimize for the common case of small values, and
610   // then optimize for the case when the varint fits within the current buffer
611   // piece. The Fallback method is used when we can't use the one-byte
612   // optimization. The Slow method is yet another fallback when the buffer is
613   // not large enough. Making the slow path out-of-line speeds up the common
614   // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
615   // message crosses multiple buffers.  Note: ReadVarint32Fallback() and
616   // ReadVarint64Fallback() are called frequently and generally not inlined, so
617   // they have been optimized to avoid "out" parameters.  The former returns -1
618   // if it fails and the uint32 it read otherwise.  The latter has a bool
619   // indicating success or failure as part of its return type.
620   int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
621   int ReadVarintSizeAsIntFallback();
622   std::pair<uint64, bool> ReadVarint64Fallback();
623   bool ReadVarint32Slow(uint32* value);
624   bool ReadVarint64Slow(uint64* value);
625   int ReadVarintSizeAsIntSlow();
626   bool ReadLittleEndian32Fallback(uint32* value);
627   bool ReadLittleEndian64Fallback(uint64* value);
628 
629   // Fallback/slow methods for reading tags. These do not update last_tag_,
630   // but will set legitimate_message_end_ if we are at the end of the input
631   // stream.
632   uint32 ReadTagFallback(uint32 first_byte_or_zero);
633   uint32 ReadTagSlow();
634   bool ReadStringFallback(std::string* buffer, int size);
635 
636   // Return the size of the buffer.
637   int BufferSize() const;
638 
639   static const int kDefaultTotalBytesLimit = INT_MAX;
640 
641   static int default_recursion_limit_;  // 100 by default.
642 
643   friend class google::protobuf::ZeroCopyCodedInputStream;
644   friend class google::protobuf::internal::EpsCopyByteStream;
645 };
646 
647 // Class which encodes and writes binary data which is composed of varint-
648 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
649 // Most users will not need to deal with CodedOutputStream.
650 //
651 // Most methods of CodedOutputStream which return a bool return false if an
652 // underlying I/O error occurs.  Once such a failure occurs, the
653 // CodedOutputStream is broken and is no longer useful. The Write* methods do
654 // not return the stream status, but will invalidate the stream if an error
655 // occurs. The client can probe HadError() to determine the status.
656 //
657 // Note that every method of CodedOutputStream which writes some data has
658 // a corresponding static "ToArray" version. These versions write directly
659 // to the provided buffer, returning a pointer past the last written byte.
660 // They require that the buffer has sufficient capacity for the encoded data.
661 // This allows an optimization where we check if an output stream has enough
662 // space for an entire message before we start writing and, if there is, we
663 // call only the ToArray methods to avoid doing bound checks for each
664 // individual value.
665 // i.e., in the example above:
666 //
667 //   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
668 //   int magic_number = 1234;
669 //   char text[] = "Hello world!";
670 //
671 //   int coded_size = sizeof(magic_number) +
672 //                    CodedOutputStream::VarintSize32(strlen(text)) +
673 //                    strlen(text);
674 //
675 //   uint8* buffer =
676 //       coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
677 //   if (buffer != NULL) {
678 //     // The output stream has enough space in the buffer: write directly to
679 //     // the array.
680 //     buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
681 //                                                            buffer);
682 //     buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
683 //     buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
684 //   } else {
685 //     // Make bound-checked writes, which will ask the underlying stream for
686 //     // more space as needed.
687 //     coded_output->WriteLittleEndian32(magic_number);
688 //     coded_output->WriteVarint32(strlen(text));
689 //     coded_output->WriteRaw(text, strlen(text));
690 //   }
691 //
692 //   delete coded_output;
693 class PROTOBUF_EXPORT CodedOutputStream {
694  public:
695   // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
696   explicit CodedOutputStream(ZeroCopyOutputStream* output);
697   CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
698 
699   // Destroy the CodedOutputStream and position the underlying
700   // ZeroCopyOutputStream immediately after the last byte written.
701   ~CodedOutputStream();
702 
703   // Trims any unused space in the underlying buffer so that its size matches
704   // the number of bytes written by this stream. The underlying buffer will
705   // automatically be trimmed when this stream is destroyed; this call is only
706   // necessary if the underlying buffer is accessed *before* the stream is
707   // destroyed.
708   void Trim();
709 
710   // Skips a number of bytes, leaving the bytes unmodified in the underlying
711   // buffer.  Returns false if an underlying write error occurs.  This is
712   // mainly useful with GetDirectBufferPointer().
713   // Note of caution, the skipped bytes may contain uninitialized data. The
714   // caller must make sure that the skipped bytes are properly initialized,
715   // otherwise you might leak bytes from your heap.
716   bool Skip(int count);
717 
718   // Sets *data to point directly at the unwritten part of the
719   // CodedOutputStream's underlying buffer, and *size to the size of that
720   // buffer, but does not advance the stream's current position.  This will
721   // always either produce a non-empty buffer or return false.  If the caller
722   // writes any data to this buffer, it should then call Skip() to skip over
723   // the consumed bytes.  This may be useful for implementing external fast
724   // serialization routines for types of data not covered by the
725   // CodedOutputStream interface.
726   bool GetDirectBufferPointer(void** data, int* size);
727 
728   // If there are at least "size" bytes available in the current buffer,
729   // returns a pointer directly into the buffer and advances over these bytes.
730   // The caller may then write directly into this buffer (e.g. using the
731   // *ToArray static methods) rather than go through CodedOutputStream.  If
732   // there are not enough bytes available, returns NULL.  The return pointer is
733   // invalidated as soon as any other non-const method of CodedOutputStream
734   // is called.
735   inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
736 
737   // Write raw bytes, copying them from the given buffer.
738   void WriteRaw(const void* buffer, int size);
739   // Like WriteRaw()  but will try to write aliased data if aliasing is
740   // turned on.
741   void WriteRawMaybeAliased(const void* data, int size);
742   // Like WriteRaw()  but writing directly to the target array.
743   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
744   // copy loops. Since this gets called by every field with string or bytes
745   // type, inlining may lead to a significant amount of code bloat, with only a
746   // minor performance gain.
747   static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
748 
749   // Equivalent to WriteRaw(str.data(), str.size()).
750   void WriteString(const std::string& str);
751   // Like WriteString()  but writing directly to the target array.
752   static uint8* WriteStringToArray(const std::string& str, uint8* target);
753   // Write the varint-encoded size of str followed by str.
754   static uint8* WriteStringWithSizeToArray(const std::string& str,
755                                            uint8* target);
756 
757 
758   // Instructs the CodedOutputStream to allow the underlying
759   // ZeroCopyOutputStream to hold pointers to the original structure instead of
760   // copying, if it supports it (i.e. output->AllowsAliasing() is true).  If the
761   // underlying stream does not support aliasing, then enabling it has no
762   // affect.  For now, this only affects the behavior of
763   // WriteRawMaybeAliased().
764   //
765   // NOTE: It is caller's responsibility to ensure that the chunk of memory
766   // remains live until all of the data has been consumed from the stream.
767   void EnableAliasing(bool enabled);
768 
769   // Write a 32-bit little-endian integer.
770   void WriteLittleEndian32(uint32 value);
771   // Like WriteLittleEndian32()  but writing directly to the target array.
772   static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
773   // Write a 64-bit little-endian integer.
774   void WriteLittleEndian64(uint64 value);
775   // Like WriteLittleEndian64()  but writing directly to the target array.
776   static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
777 
778   // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
779   // is equivalent to casting it to uint64 and writing it as a 64-bit value,
780   // but may be more efficient.
781   void WriteVarint32(uint32 value);
782   // Like WriteVarint32()  but writing directly to the target array.
783   static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
784   // Write an unsigned integer with Varint encoding.
785   void WriteVarint64(uint64 value);
786   // Like WriteVarint64()  but writing directly to the target array.
787   static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
788 
789   // Equivalent to WriteVarint32() except when the value is negative,
790   // in which case it must be sign-extended to a full 10 bytes.
791   void WriteVarint32SignExtended(int32 value);
792   // Like WriteVarint32SignExtended()  but writing directly to the target array.
793   static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
794 
795   // This is identical to WriteVarint32(), but optimized for writing tags.
796   // In particular, if the input is a compile-time constant, this method
797   // compiles down to a couple instructions.
798   // Always inline because otherwise the aformentioned optimization can't work,
799   // but GCC by default doesn't want to inline this.
800   void WriteTag(uint32 value);
801   // Like WriteTag()  but writing directly to the target array.
802   PROTOBUF_ALWAYS_INLINE
803   static uint8* WriteTagToArray(uint32 value, uint8* target);
804 
805   // Returns the number of bytes needed to encode the given value as a varint.
806   static size_t VarintSize32(uint32 value);
807   // Returns the number of bytes needed to encode the given value as a varint.
808   static size_t VarintSize64(uint64 value);
809 
810   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
811   static size_t VarintSize32SignExtended(int32 value);
812 
813   // Compile-time equivalent of VarintSize32().
814   template <uint32 Value>
815   struct StaticVarintSize32 {
816     static const size_t value =
817         (Value < (1 << 7))
818             ? 1
819             : (Value < (1 << 14))
820                   ? 2
821                   : (Value < (1 << 21)) ? 3 : (Value < (1 << 28)) ? 4 : 5;
822   };
823 
824   // Returns the total number of bytes written since this object was created.
825   inline int ByteCount() const;
826 
827   // Returns true if there was an underlying I/O error since this object was
828   // created.
HadError()829   bool HadError() const { return had_error_; }
830 
831   // Deterministic serialization, if requested, guarantees that for a given
832   // binary, equal messages will always be serialized to the same bytes. This
833   // implies:
834   //   . repeated serialization of a message will return the same bytes
835   //   . different processes of the same binary (which may be executing on
836   //     different machines) will serialize equal messages to the same bytes.
837   //
838   // Note the deterministic serialization is NOT canonical across languages; it
839   // is also unstable across different builds with schema changes due to unknown
840   // fields. Users who need canonical serialization, e.g., persistent storage in
841   // a canonical form, fingerprinting, etc., should define their own
842   // canonicalization specification and implement the serializer using
843   // reflection APIs rather than relying on this API.
844   //
845   // If deterministic serialization is requested, the serializer will
846   // sort map entries by keys in lexicographical order or numerical order.
847   // (This is an implementation detail and may subject to change.)
848   //
849   // There are two ways to determine whether serialization should be
850   // deterministic for this CodedOutputStream.  If SetSerializationDeterministic
851   // has not yet been called, then the default comes from the global default,
852   // which is false, until SetDefaultSerializationDeterministic has been called.
853   // Otherwise, SetSerializationDeterministic has been called, and the last
854   // value passed to it is all that matters.
SetSerializationDeterministic(bool value)855   void SetSerializationDeterministic(bool value) {
856     is_serialization_deterministic_ = value;
857   }
858   // See above.  Also, note that users of this CodedOutputStream may need to
859   // call IsSerializationDeterministic() to serialize in the intended way.  This
860   // CodedOutputStream cannot enforce a desire for deterministic serialization
861   // by itself.
IsSerializationDeterministic()862   bool IsSerializationDeterministic() const {
863     return is_serialization_deterministic_;
864   }
865 
IsDefaultSerializationDeterministic()866   static bool IsDefaultSerializationDeterministic() {
867     return default_serialization_deterministic_.load(
868                std::memory_order_relaxed) != 0;
869   }
870 
871  private:
872   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
873 
874   ZeroCopyOutputStream* output_;
875   uint8* buffer_;
876   int buffer_size_;
877   int total_bytes_;        // Sum of sizes of all buffers seen so far.
878   bool had_error_;         // Whether an error occurred during output.
879   bool aliasing_enabled_;  // See EnableAliasing().
880   bool is_serialization_deterministic_;
881   static std::atomic<bool> default_serialization_deterministic_;
882 
883   // Advance the buffer by a given number of bytes.
884   void Advance(int amount);
885 
886   // Called when the buffer runs out to request more data.  Implies an
887   // Advance(buffer_size_).
888   bool Refresh();
889 
890   // Like WriteRaw() but may avoid copying if the underlying
891   // ZeroCopyOutputStream supports it.
892   void WriteAliasedRaw(const void* buffer, int size);
893 
894   // If this write might cross the end of the buffer, we compose the bytes first
895   // then use WriteRaw().
896   void WriteVarint32SlowPath(uint32 value);
897   void WriteVarint64SlowPath(uint64 value);
898 
899   // See above.  Other projects may use "friend" to allow them to call this.
900   // After SetDefaultSerializationDeterministic() completes, all protocol
901   // buffer serializations will be deterministic by default.  Thread safe.
902   // However, the meaning of "after" is subtle here: to be safe, each thread
903   // that wants deterministic serialization by default needs to call
904   // SetDefaultSerializationDeterministic() or ensure on its own that another
905   // thread has done so.
906   friend void internal::MapTestForceDeterministic();
SetDefaultSerializationDeterministic()907   static void SetDefaultSerializationDeterministic() {
908     default_serialization_deterministic_.store(true, std::memory_order_relaxed);
909   }
910 };
911 
912 // inline methods ====================================================
913 // The vast majority of varints are only one byte.  These inline
914 // methods optimize for that case.
915 
ReadVarint32(uint32 * value)916 inline bool CodedInputStream::ReadVarint32(uint32* value) {
917   uint32 v = 0;
918   if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
919     v = *buffer_;
920     if (v < 0x80) {
921       *value = v;
922       Advance(1);
923       return true;
924     }
925   }
926   int64 result = ReadVarint32Fallback(v);
927   *value = static_cast<uint32>(result);
928   return result >= 0;
929 }
930 
ReadVarint64(uint64 * value)931 inline bool CodedInputStream::ReadVarint64(uint64* value) {
932   if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
933     *value = *buffer_;
934     Advance(1);
935     return true;
936   }
937   std::pair<uint64, bool> p = ReadVarint64Fallback();
938   *value = p.first;
939   return p.second;
940 }
941 
ReadVarintSizeAsInt(int * value)942 inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) {
943   if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
944     int v = *buffer_;
945     if (v < 0x80) {
946       *value = v;
947       Advance(1);
948       return true;
949     }
950   }
951   *value = ReadVarintSizeAsIntFallback();
952   return *value >= 0;
953 }
954 
955 // static
ReadLittleEndian32FromArray(const uint8 * buffer,uint32 * value)956 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
957     const uint8* buffer, uint32* value) {
958 #if defined(PROTOBUF_LITTLE_ENDIAN)
959   memcpy(value, buffer, sizeof(*value));
960   return buffer + sizeof(*value);
961 #else
962   *value = (static_cast<uint32>(buffer[0])) |
963            (static_cast<uint32>(buffer[1]) << 8) |
964            (static_cast<uint32>(buffer[2]) << 16) |
965            (static_cast<uint32>(buffer[3]) << 24);
966   return buffer + sizeof(*value);
967 #endif
968 }
969 // static
ReadLittleEndian64FromArray(const uint8 * buffer,uint64 * value)970 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
971     const uint8* buffer, uint64* value) {
972 #if defined(PROTOBUF_LITTLE_ENDIAN)
973   memcpy(value, buffer, sizeof(*value));
974   return buffer + sizeof(*value);
975 #else
976   uint32 part0 = (static_cast<uint32>(buffer[0])) |
977                  (static_cast<uint32>(buffer[1]) << 8) |
978                  (static_cast<uint32>(buffer[2]) << 16) |
979                  (static_cast<uint32>(buffer[3]) << 24);
980   uint32 part1 = (static_cast<uint32>(buffer[4])) |
981                  (static_cast<uint32>(buffer[5]) << 8) |
982                  (static_cast<uint32>(buffer[6]) << 16) |
983                  (static_cast<uint32>(buffer[7]) << 24);
984   *value = static_cast<uint64>(part0) | (static_cast<uint64>(part1) << 32);
985   return buffer + sizeof(*value);
986 #endif
987 }
988 
ReadLittleEndian32(uint32 * value)989 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
990 #if defined(PROTOBUF_LITTLE_ENDIAN)
991   if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
992     buffer_ = ReadLittleEndian32FromArray(buffer_, value);
993     return true;
994   } else {
995     return ReadLittleEndian32Fallback(value);
996   }
997 #else
998   return ReadLittleEndian32Fallback(value);
999 #endif
1000 }
1001 
ReadLittleEndian64(uint64 * value)1002 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
1003 #if defined(PROTOBUF_LITTLE_ENDIAN)
1004   if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
1005     buffer_ = ReadLittleEndian64FromArray(buffer_, value);
1006     return true;
1007   } else {
1008     return ReadLittleEndian64Fallback(value);
1009   }
1010 #else
1011   return ReadLittleEndian64Fallback(value);
1012 #endif
1013 }
1014 
ReadTagNoLastTag()1015 inline uint32 CodedInputStream::ReadTagNoLastTag() {
1016   uint32 v = 0;
1017   if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1018     v = *buffer_;
1019     if (v < 0x80) {
1020       Advance(1);
1021       return v;
1022     }
1023   }
1024   v = ReadTagFallback(v);
1025   return v;
1026 }
1027 
ReadTagWithCutoffNoLastTag(uint32 cutoff)1028 inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoffNoLastTag(
1029     uint32 cutoff) {
1030   // In performance-sensitive code we can expect cutoff to be a compile-time
1031   // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
1032   // compile time.
1033   uint32 first_byte_or_zero = 0;
1034   if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1035     // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
1036     // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
1037     // is large enough then is it better to check for the two-byte case first?
1038     first_byte_or_zero = buffer_[0];
1039     if (static_cast<int8>(buffer_[0]) > 0) {
1040       const uint32 kMax1ByteVarint = 0x7f;
1041       uint32 tag = buffer_[0];
1042       Advance(1);
1043       return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
1044     }
1045     // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
1046     // and tag is two bytes.  The latter is tested by bitwise-and-not of the
1047     // first byte and the second byte.
1048     if (cutoff >= 0x80 && PROTOBUF_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
1049         PROTOBUF_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
1050       const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
1051       uint32 tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
1052       Advance(2);
1053       // It might make sense to test for tag == 0 now, but it is so rare that
1054       // that we don't bother.  A varint-encoded 0 should be one byte unless
1055       // the encoder lost its mind.  The second part of the return value of
1056       // this function is allowed to be either true or false if the tag is 0,
1057       // so we don't have to check for tag == 0.  We may need to check whether
1058       // it exceeds cutoff.
1059       bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
1060       return std::make_pair(tag, at_or_below_cutoff);
1061     }
1062   }
1063   // Slow path
1064   const uint32 tag = ReadTagFallback(first_byte_or_zero);
1065   // If tag == 0 we want to return { 0, false } so the following overflow is intended.
1066   // We use __builtin_add_overflow to appease the sub-overflow UB sanitizer.
1067   uint32_t tag_minus_one;
1068   __builtin_add_overflow(tag, -1, &tag_minus_one);
1069   return std::make_pair(tag, tag_minus_one < cutoff);
1070 }
1071 
LastTagWas(uint32 expected)1072 inline bool CodedInputStream::LastTagWas(uint32 expected) {
1073   return last_tag_ == expected;
1074 }
1075 
ConsumedEntireMessage()1076 inline bool CodedInputStream::ConsumedEntireMessage() {
1077   return legitimate_message_end_;
1078 }
1079 
ExpectTag(uint32 expected)1080 inline bool CodedInputStream::ExpectTag(uint32 expected) {
1081   if (expected < (1 << 7)) {
1082     if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) &&
1083         buffer_[0] == expected) {
1084       Advance(1);
1085       return true;
1086     } else {
1087       return false;
1088     }
1089   } else if (expected < (1 << 14)) {
1090     if (PROTOBUF_PREDICT_TRUE(BufferSize() >= 2) &&
1091         buffer_[0] == static_cast<uint8>(expected | 0x80) &&
1092         buffer_[1] == static_cast<uint8>(expected >> 7)) {
1093       Advance(2);
1094       return true;
1095     } else {
1096       return false;
1097     }
1098   } else {
1099     // Don't bother optimizing for larger values.
1100     return false;
1101   }
1102 }
1103 
ExpectTagFromArray(const uint8 * buffer,uint32 expected)1104 inline const uint8* CodedInputStream::ExpectTagFromArray(const uint8* buffer,
1105                                                          uint32 expected) {
1106   if (expected < (1 << 7)) {
1107     if (buffer[0] == expected) {
1108       return buffer + 1;
1109     }
1110   } else if (expected < (1 << 14)) {
1111     if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
1112         buffer[1] == static_cast<uint8>(expected >> 7)) {
1113       return buffer + 2;
1114     }
1115   }
1116   return NULL;
1117 }
1118 
GetDirectBufferPointerInline(const void ** data,int * size)1119 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1120                                                            int* size) {
1121   *data = buffer_;
1122   *size = static_cast<int>(buffer_end_ - buffer_);
1123 }
1124 
ExpectAtEnd()1125 inline bool CodedInputStream::ExpectAtEnd() {
1126   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
1127   // hard to say without calling Refresh(), and we'd rather not do that.
1128 
1129   if (buffer_ == buffer_end_ && ((buffer_size_after_limit_ != 0) ||
1130                                  (total_bytes_read_ == current_limit_))) {
1131     last_tag_ = 0;                   // Pretend we called ReadTag()...
1132     legitimate_message_end_ = true;  // ... and it hit EOF.
1133     return true;
1134   } else {
1135     return false;
1136   }
1137 }
1138 
CurrentPosition()1139 inline int CodedInputStream::CurrentPosition() const {
1140   return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1141 }
1142 
GetDirectBufferForNBytesAndAdvance(int size)1143 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
1144   if (buffer_size_ < size) {
1145     return NULL;
1146   } else {
1147     uint8* result = buffer_;
1148     Advance(size);
1149     return result;
1150   }
1151 }
1152 
WriteVarint32ToArray(uint32 value,uint8 * target)1153 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
1154                                                       uint8* target) {
1155   while (value >= 0x80) {
1156     *target = static_cast<uint8>(value | 0x80);
1157     value >>= 7;
1158     ++target;
1159   }
1160   *target = static_cast<uint8>(value);
1161   return target + 1;
1162 }
1163 
WriteVarint64ToArray(uint64 value,uint8 * target)1164 inline uint8* CodedOutputStream::WriteVarint64ToArray(uint64 value,
1165                                                       uint8* target) {
1166   while (value >= 0x80) {
1167     *target = static_cast<uint8>(value | 0x80);
1168     value >>= 7;
1169     ++target;
1170   }
1171   *target = static_cast<uint8>(value);
1172   return target + 1;
1173 }
1174 
WriteVarint32SignExtended(int32 value)1175 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
1176   WriteVarint64(static_cast<uint64>(value));
1177 }
1178 
WriteVarint32SignExtendedToArray(int32 value,uint8 * target)1179 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
1180     int32 value, uint8* target) {
1181   return WriteVarint64ToArray(static_cast<uint64>(value), target);
1182 }
1183 
WriteLittleEndian32ToArray(uint32 value,uint8 * target)1184 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
1185                                                             uint8* target) {
1186 #if defined(PROTOBUF_LITTLE_ENDIAN)
1187   memcpy(target, &value, sizeof(value));
1188 #else
1189   target[0] = static_cast<uint8>(value);
1190   target[1] = static_cast<uint8>(value >> 8);
1191   target[2] = static_cast<uint8>(value >> 16);
1192   target[3] = static_cast<uint8>(value >> 24);
1193 #endif
1194   return target + sizeof(value);
1195 }
1196 
WriteLittleEndian64ToArray(uint64 value,uint8 * target)1197 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
1198                                                             uint8* target) {
1199 #if defined(PROTOBUF_LITTLE_ENDIAN)
1200   memcpy(target, &value, sizeof(value));
1201 #else
1202   uint32 part0 = static_cast<uint32>(value);
1203   uint32 part1 = static_cast<uint32>(value >> 32);
1204 
1205   target[0] = static_cast<uint8>(part0);
1206   target[1] = static_cast<uint8>(part0 >> 8);
1207   target[2] = static_cast<uint8>(part0 >> 16);
1208   target[3] = static_cast<uint8>(part0 >> 24);
1209   target[4] = static_cast<uint8>(part1);
1210   target[5] = static_cast<uint8>(part1 >> 8);
1211   target[6] = static_cast<uint8>(part1 >> 16);
1212   target[7] = static_cast<uint8>(part1 >> 24);
1213 #endif
1214   return target + sizeof(value);
1215 }
1216 
WriteVarint32(uint32 value)1217 inline void CodedOutputStream::WriteVarint32(uint32 value) {
1218   if (buffer_size_ >= 5) {
1219     // Fast path:  We have enough bytes left in the buffer to guarantee that
1220     // this write won't cross the end, so we can skip the checks.
1221     uint8* target = buffer_;
1222     uint8* end = WriteVarint32ToArray(value, target);
1223     int size = static_cast<int>(end - target);
1224     Advance(size);
1225   } else {
1226     WriteVarint32SlowPath(value);
1227   }
1228 }
1229 
WriteVarint64(uint64 value)1230 inline void CodedOutputStream::WriteVarint64(uint64 value) {
1231   if (buffer_size_ >= 10) {
1232     // Fast path:  We have enough bytes left in the buffer to guarantee that
1233     // this write won't cross the end, so we can skip the checks.
1234     uint8* target = buffer_;
1235     uint8* end = WriteVarint64ToArray(value, target);
1236     int size = static_cast<int>(end - target);
1237     Advance(size);
1238   } else {
1239     WriteVarint64SlowPath(value);
1240   }
1241 }
1242 
WriteTag(uint32 value)1243 inline void CodedOutputStream::WriteTag(uint32 value) { WriteVarint32(value); }
1244 
WriteTagToArray(uint32 value,uint8 * target)1245 inline uint8* CodedOutputStream::WriteTagToArray(uint32 value, uint8* target) {
1246   return WriteVarint32ToArray(value, target);
1247 }
1248 
VarintSize32(uint32 value)1249 inline size_t CodedOutputStream::VarintSize32(uint32 value) {
1250   // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
1251   // Use an explicit multiplication to implement the divide of
1252   // a number in the 1..31 range.
1253   // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
1254   // undefined.
1255   uint32 log2value = Bits::Log2FloorNonZero(value | 0x1);
1256   return static_cast<size_t>((log2value * 9 + 73) / 64);
1257 }
1258 
VarintSize64(uint64 value)1259 inline size_t CodedOutputStream::VarintSize64(uint64 value) {
1260   // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
1261   // Use an explicit multiplication to implement the divide of
1262   // a number in the 1..63 range.
1263   // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
1264   // undefined.
1265   uint32 log2value = Bits::Log2FloorNonZero64(value | 0x1);
1266   return static_cast<size_t>((log2value * 9 + 73) / 64);
1267 }
1268 
VarintSize32SignExtended(int32 value)1269 inline size_t CodedOutputStream::VarintSize32SignExtended(int32 value) {
1270   if (value < 0) {
1271     return 10;  // TODO(kenton):  Make this a symbolic constant.
1272   } else {
1273     return VarintSize32(static_cast<uint32>(value));
1274   }
1275 }
1276 
WriteString(const std::string & str)1277 inline void CodedOutputStream::WriteString(const std::string& str) {
1278   WriteRaw(str.data(), static_cast<int>(str.size()));
1279 }
1280 
WriteRawMaybeAliased(const void * data,int size)1281 inline void CodedOutputStream::WriteRawMaybeAliased(const void* data,
1282                                                     int size) {
1283   if (aliasing_enabled_) {
1284     WriteAliasedRaw(data, size);
1285   } else {
1286     WriteRaw(data, size);
1287   }
1288 }
1289 
WriteStringToArray(const std::string & str,uint8 * target)1290 inline uint8* CodedOutputStream::WriteStringToArray(const std::string& str,
1291                                                     uint8* target) {
1292   return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1293 }
1294 
ByteCount()1295 inline int CodedOutputStream::ByteCount() const {
1296   return total_bytes_ - buffer_size_;
1297 }
1298 
Advance(int amount)1299 inline void CodedInputStream::Advance(int amount) { buffer_ += amount; }
1300 
Advance(int amount)1301 inline void CodedOutputStream::Advance(int amount) {
1302   buffer_ += amount;
1303   buffer_size_ -= amount;
1304 }
1305 
SetRecursionLimit(int limit)1306 inline void CodedInputStream::SetRecursionLimit(int limit) {
1307   recursion_budget_ += limit - recursion_limit_;
1308   recursion_limit_ = limit;
1309 }
1310 
IncrementRecursionDepth()1311 inline bool CodedInputStream::IncrementRecursionDepth() {
1312   --recursion_budget_;
1313   return recursion_budget_ >= 0;
1314 }
1315 
DecrementRecursionDepth()1316 inline void CodedInputStream::DecrementRecursionDepth() {
1317   if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
1318 }
1319 
UnsafeDecrementRecursionDepth()1320 inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
1321   assert(recursion_budget_ < recursion_limit_);
1322   ++recursion_budget_;
1323 }
1324 
SetExtensionRegistry(const DescriptorPool * pool,MessageFactory * factory)1325 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1326                                                    MessageFactory* factory) {
1327   extension_pool_ = pool;
1328   extension_factory_ = factory;
1329 }
1330 
GetExtensionPool()1331 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1332   return extension_pool_;
1333 }
1334 
GetExtensionFactory()1335 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1336   return extension_factory_;
1337 }
1338 
BufferSize()1339 inline int CodedInputStream::BufferSize() const {
1340   return static_cast<int>(buffer_end_ - buffer_);
1341 }
1342 
CodedInputStream(ZeroCopyInputStream * input)1343 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1344     : buffer_(NULL),
1345       buffer_end_(NULL),
1346       input_(input),
1347       total_bytes_read_(0),
1348       overflow_bytes_(0),
1349       last_tag_(0),
1350       legitimate_message_end_(false),
1351       aliasing_enabled_(false),
1352       current_limit_(kint32max),
1353       buffer_size_after_limit_(0),
1354       total_bytes_limit_(kDefaultTotalBytesLimit),
1355       recursion_budget_(default_recursion_limit_),
1356       recursion_limit_(default_recursion_limit_),
1357       extension_pool_(NULL),
1358       extension_factory_(NULL) {
1359   // Eagerly Refresh() so buffer space is immediately available.
1360   Refresh();
1361 }
1362 
CodedInputStream(const uint8 * buffer,int size)1363 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1364     : buffer_(buffer),
1365       buffer_end_(buffer + size),
1366       input_(NULL),
1367       total_bytes_read_(size),
1368       overflow_bytes_(0),
1369       last_tag_(0),
1370       legitimate_message_end_(false),
1371       aliasing_enabled_(false),
1372       current_limit_(size),
1373       buffer_size_after_limit_(0),
1374       total_bytes_limit_(kDefaultTotalBytesLimit),
1375       recursion_budget_(default_recursion_limit_),
1376       recursion_limit_(default_recursion_limit_),
1377       extension_pool_(NULL),
1378       extension_factory_(NULL) {
1379   // Note that setting current_limit_ == size is important to prevent some
1380   // code paths from trying to access input_ and segfaulting.
1381 }
1382 
IsFlat()1383 inline bool CodedInputStream::IsFlat() const { return input_ == NULL; }
1384 
Skip(int count)1385 inline bool CodedInputStream::Skip(int count) {
1386   if (count < 0) return false;  // security: count is often user-supplied
1387 
1388   const int original_buffer_size = BufferSize();
1389 
1390   if (count <= original_buffer_size) {
1391     // Just skipping within the current buffer.  Easy.
1392     Advance(count);
1393     return true;
1394   }
1395 
1396   return SkipFallback(count, original_buffer_size);
1397 }
1398 
1399 }  // namespace io
1400 }  // namespace protobuf
1401 }  // namespace google
1402 
1403 #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
1404 #pragma runtime_checks("c", restore)
1405 #endif  // _MSC_VER && !defined(__INTEL_COMPILER)
1406 
1407 #include <google/protobuf/port_undef.inc>
1408 
1409 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1410