• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file contains the CodedInputStream and CodedOutputStream classes,
36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37 // and allow you to read or write individual pieces of data in various
38 // formats.  In particular, these implement the varint encoding for
39 // integers, a simple variable-length encoding in which smaller numbers
40 // take fewer bytes.
41 //
42 // Typically these classes will only be used internally by the protocol
43 // buffer library in order to encode and decode protocol buffers.  Clients
44 // of the library only need to know about this class if they wish to write
45 // custom message parsing or serialization procedures.
46 //
47 // CodedOutputStream example:
48 //   // Write some data to "myfile".  First we write a 4-byte "magic number"
49 //   // to identify the file type, then write a length-delimited string.  The
50 //   // string is composed of a varint giving the length followed by the raw
51 //   // bytes.
52 //   int fd = open("myfile", O_WRONLY);
53 //   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54 //   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55 //
56 //   int magic_number = 1234;
57 //   char text[] = "Hello world!";
58 //   coded_output->WriteLittleEndian32(magic_number);
59 //   coded_output->WriteVarint32(strlen(text));
60 //   coded_output->WriteRaw(text, strlen(text));
61 //
62 //   delete coded_output;
63 //   delete raw_output;
64 //   close(fd);
65 //
66 // CodedInputStream example:
67 //   // Read a file created by the above code.
68 //   int fd = open("myfile", O_RDONLY);
69 //   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70 //   CodedInputStream coded_input = new CodedInputStream(raw_input);
71 //
72 //   coded_input->ReadLittleEndian32(&magic_number);
73 //   if (magic_number != 1234) {
74 //     cerr << "File not in expected format." << endl;
75 //     return;
76 //   }
77 //
78 //   uint32 size;
79 //   coded_input->ReadVarint32(&size);
80 //
81 //   char* text = new char[size + 1];
82 //   coded_input->ReadRaw(buffer, size);
83 //   text[size] = '\0';
84 //
85 //   delete coded_input;
86 //   delete raw_input;
87 //   close(fd);
88 //
89 //   cout << "Text is: " << text << endl;
90 //   delete [] text;
91 //
92 // For those who are interested, varint encoding is defined as follows:
93 //
94 // The encoding operates on unsigned integers of up to 64 bits in length.
95 // Each byte of the encoded value has the format:
96 // * bits 0-6: Seven bits of the number being encoded.
97 // * bit 7: Zero if this is the last byte in the encoding (in which
98 //   case all remaining bits of the number are zero) or 1 if
99 //   more bytes follow.
100 // The first byte contains the least-significant 7 bits of the number, the
101 // second byte (if present) contains the next-least-significant 7 bits,
102 // and so on.  So, the binary number 1011000101011 would be encoded in two
103 // bytes as "10101011 00101100".
104 //
105 // In theory, varint could be used to encode integers of any length.
106 // However, for practicality we set a limit at 64 bits.  The maximum encoded
107 // length of a number is thus 10 bytes.
108 
109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111 
112 #include <string>
113 #ifndef _MSC_VER
114 #include <sys/param.h>
115 #endif  // !_MSC_VER
116 #include <google/protobuf/stubs/common.h>
117 #include <google/protobuf/stubs/common.h>          // for GOOGLE_PREDICT_TRUE macro
118 
119 namespace google {
120 
121 namespace protobuf {
122 
123 class DescriptorPool;
124 class MessageFactory;
125 
126 namespace io {
127 
128 // Defined in this file.
129 class CodedInputStream;
130 class CodedOutputStream;
131 
132 // Defined in other files.
133 class ZeroCopyInputStream;           // zero_copy_stream.h
134 class ZeroCopyOutputStream;          // zero_copy_stream.h
135 
136 // Class which reads and decodes binary data which is composed of varint-
137 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
138 // Most users will not need to deal with CodedInputStream.
139 //
140 // Most methods of CodedInputStream that return a bool return false if an
141 // underlying I/O error occurs or if the data is malformed.  Once such a
142 // failure occurs, the CodedInputStream is broken and is no longer useful.
143 class LIBPROTOBUF_EXPORT CodedInputStream {
144  public:
145   // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
146   explicit CodedInputStream(ZeroCopyInputStream* input);
147 
148   // Create a CodedInputStream that reads from the given flat array.  This is
149   // faster than using an ArrayInputStream.  PushLimit(size) is implied by
150   // this constructor.
151   explicit CodedInputStream(const uint8* buffer, int size);
152 
153   // Destroy the CodedInputStream and position the underlying
154   // ZeroCopyInputStream at the first unread byte.  If an error occurred while
155   // reading (causing a method to return false), then the exact position of
156   // the input stream may be anywhere between the last value that was read
157   // successfully and the stream's byte limit.
158   ~CodedInputStream();
159 
160 
161   // Skips a number of bytes.  Returns false if an underlying read error
162   // occurs.
163   bool Skip(int count);
164 
165   // Sets *data to point directly at the unread part of the CodedInputStream's
166   // underlying buffer, and *size to the size of that buffer, but does not
167   // advance the stream's current position.  This will always either produce
168   // a non-empty buffer or return false.  If the caller consumes any of
169   // this data, it should then call Skip() to skip over the consumed bytes.
170   // This may be useful for implementing external fast parsing routines for
171   // types of data not covered by the CodedInputStream interface.
172   bool GetDirectBufferPointer(const void** data, int* size);
173 
174   // Like GetDirectBufferPointer, but this method is inlined, and does not
175   // attempt to Refresh() if the buffer is currently empty.
176   inline void GetDirectBufferPointerInline(const void** data,
177                                            int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
178 
179   // Read raw bytes, copying them into the given buffer.
180   bool ReadRaw(void* buffer, int size);
181 
182   // Like ReadRaw, but reads into a string.
183   //
184   // Implementation Note:  ReadString() grows the string gradually as it
185   // reads in the data, rather than allocating the entire requested size
186   // upfront.  This prevents denial-of-service attacks in which a client
187   // could claim that a string is going to be MAX_INT bytes long in order to
188   // crash the server because it can't allocate this much space at once.
189   bool ReadString(string* buffer, int size);
190   // Like the above, with inlined optimizations. This should only be used
191   // by the protobuf implementation.
192   inline bool InternalReadStringInline(string* buffer,
193                                        int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
194 
195 
196   // Read a 32-bit little-endian integer.
197   bool ReadLittleEndian32(uint32* value);
198   // Read a 64-bit little-endian integer.
199   bool ReadLittleEndian64(uint64* value);
200 
201   // These methods read from an externally provided buffer. The caller is
202   // responsible for ensuring that the buffer has sufficient space.
203   // Read a 32-bit little-endian integer.
204   static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
205                                                    uint32* value);
206   // Read a 64-bit little-endian integer.
207   static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
208                                                    uint64* value);
209 
210   // Read an unsigned integer with Varint encoding, truncating to 32 bits.
211   // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
212   // it to uint32, but may be more efficient.
213   bool ReadVarint32(uint32* value);
214   // Read an unsigned integer with Varint encoding.
215   bool ReadVarint64(uint64* value);
216 
217   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
218   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
219   // the last tag value, which can be checked with LastTagWas().
220   // Always inline because this is only called in once place per parse loop
221   // but it is called for every iteration of said loop, so it should be fast.
222   // GCC doesn't want to inline this by default.
223   uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
224 
225   // Usually returns true if calling ReadVarint32() now would produce the given
226   // value.  Will always return false if ReadVarint32() would not return the
227   // given value.  If ExpectTag() returns true, it also advances past
228   // the varint.  For best performance, use a compile-time constant as the
229   // parameter.
230   // Always inline because this collapses to a small number of instructions
231   // when given a constant parameter, but GCC doesn't want to inline by default.
232   bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
233 
234   // Like above, except this reads from the specified buffer. The caller is
235   // responsible for ensuring that the buffer is large enough to read a varint
236   // of the expected size. For best performance, use a compile-time constant as
237   // the expected tag parameter.
238   //
239   // Returns a pointer beyond the expected tag if it was found, or NULL if it
240   // was not.
241   static const uint8* ExpectTagFromArray(
242       const uint8* buffer,
243       uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
244 
245   // Usually returns true if no more bytes can be read.  Always returns false
246   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
247   // call to LastTagWas() will act as if ReadTag() had been called and returned
248   // zero, and ConsumedEntireMessage() will return true.
249   bool ExpectAtEnd();
250 
251   // If the last call to ReadTag() returned the given value, returns true.
252   // Otherwise, returns false;
253   //
254   // This is needed because parsers for some types of embedded messages
255   // (with field type TYPE_GROUP) don't actually know that they've reached the
256   // end of a message until they see an ENDGROUP tag, which was actually part
257   // of the enclosing message.  The enclosing message would like to check that
258   // tag to make sure it had the right number, so it calls LastTagWas() on
259   // return from the embedded parser to check.
260   bool LastTagWas(uint32 expected);
261 
262   // When parsing message (but NOT a group), this method must be called
263   // immediately after MergeFromCodedStream() returns (if it returns true)
264   // to further verify that the message ended in a legitimate way.  For
265   // example, this verifies that parsing did not end on an end-group tag.
266   // It also checks for some cases where, due to optimizations,
267   // MergeFromCodedStream() can incorrectly return true.
268   bool ConsumedEntireMessage();
269 
270   // Limits ----------------------------------------------------------
271   // Limits are used when parsing length-delimited embedded messages.
272   // After the message's length is read, PushLimit() is used to prevent
273   // the CodedInputStream from reading beyond that length.  Once the
274   // embedded message has been parsed, PopLimit() is called to undo the
275   // limit.
276 
277   // Opaque type used with PushLimit() and PopLimit().  Do not modify
278   // values of this type yourself.  The only reason that this isn't a
279   // struct with private internals is for efficiency.
280   typedef int Limit;
281 
282   // Places a limit on the number of bytes that the stream may read,
283   // starting from the current position.  Once the stream hits this limit,
284   // it will act like the end of the input has been reached until PopLimit()
285   // is called.
286   //
287   // As the names imply, the stream conceptually has a stack of limits.  The
288   // shortest limit on the stack is always enforced, even if it is not the
289   // top limit.
290   //
291   // The value returned by PushLimit() is opaque to the caller, and must
292   // be passed unchanged to the corresponding call to PopLimit().
293   Limit PushLimit(int byte_limit);
294 
295   // Pops the last limit pushed by PushLimit().  The input must be the value
296   // returned by that call to PushLimit().
297   void PopLimit(Limit limit);
298 
299   // Returns the number of bytes left until the nearest limit on the
300   // stack is hit, or -1 if no limits are in place.
301   int BytesUntilLimit();
302 
303   // Total Bytes Limit -----------------------------------------------
304   // To prevent malicious users from sending excessively large messages
305   // and causing integer overflows or memory exhaustion, CodedInputStream
306   // imposes a hard limit on the total number of bytes it will read.
307 
308   // Sets the maximum number of bytes that this CodedInputStream will read
309   // before refusing to continue.  To prevent integer overflows in the
310   // protocol buffers implementation, as well as to prevent servers from
311   // allocating enormous amounts of memory to hold parsed messages, the
312   // maximum message length should be limited to the shortest length that
313   // will not harm usability.  The theoretical shortest message that could
314   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
315   // should set shorter limits if possible.  If warning_threshold is not -1,
316   // a warning will be printed to stderr after warning_threshold bytes are
317   // read.  An error will always be printed to stderr if the limit is
318   // reached.
319   //
320   // This is unrelated to PushLimit()/PopLimit().
321   //
322   // Hint:  If you are reading this because your program is printing a
323   //   warning about dangerously large protocol messages, you may be
324   //   confused about what to do next.  The best option is to change your
325   //   design such that excessively large messages are not necessary.
326   //   For example, try to design file formats to consist of many small
327   //   messages rather than a single large one.  If this is infeasible,
328   //   you will need to increase the limit.  Chances are, though, that
329   //   your code never constructs a CodedInputStream on which the limit
330   //   can be set.  You probably parse messages by calling things like
331   //   Message::ParseFromString().  In this case, you will need to change
332   //   your code to instead construct some sort of ZeroCopyInputStream
333   //   (e.g. an ArrayInputStream), construct a CodedInputStream around
334   //   that, then call Message::ParseFromCodedStream() instead.  Then
335   //   you can adjust the limit.  Yes, it's more work, but you're doing
336   //   something unusual.
337   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
338 
339   // Recursion Limit -------------------------------------------------
340   // To prevent corrupt or malicious messages from causing stack overflows,
341   // we must keep track of the depth of recursion when parsing embedded
342   // messages and groups.  CodedInputStream keeps track of this because it
343   // is the only object that is passed down the stack during parsing.
344 
345   // Sets the maximum recursion depth.  The default is 64.
346   void SetRecursionLimit(int limit);
347 
348   // Increments the current recursion depth.  Returns true if the depth is
349   // under the limit, false if it has gone over.
350   bool IncrementRecursionDepth();
351 
352   // Decrements the recursion depth.
353   void DecrementRecursionDepth();
354 
355   // Extension Registry ----------------------------------------------
356   // ADVANCED USAGE:  99.9% of people can ignore this section.
357   //
358   // By default, when parsing extensions, the parser looks for extension
359   // definitions in the pool which owns the outer message's Descriptor.
360   // However, you may call SetExtensionRegistry() to provide an alternative
361   // pool instead.  This makes it possible, for example, to parse a message
362   // using a generated class, but represent some extensions using
363   // DynamicMessage.
364 
365   // Set the pool used to look up extensions.  Most users do not need to call
366   // this as the correct pool will be chosen automatically.
367   //
368   // WARNING:  It is very easy to misuse this.  Carefully read the requirements
369   //   below.  Do not use this unless you are sure you need it.  Almost no one
370   //   does.
371   //
372   // Let's say you are parsing a message into message object m, and you want
373   // to take advantage of SetExtensionRegistry().  You must follow these
374   // requirements:
375   //
376   // The given DescriptorPool must contain m->GetDescriptor().  It is not
377   // sufficient for it to simply contain a descriptor that has the same name
378   // and content -- it must be the *exact object*.  In other words:
379   //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
380   //          m->GetDescriptor());
381   // There are two ways to satisfy this requirement:
382   // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
383   //    because this is the pool that would be used anyway if you didn't call
384   //    SetExtensionRegistry() at all.
385   // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
386   //    "underlay".  Read the documentation for DescriptorPool for more
387   //    information about underlays.
388   //
389   // You must also provide a MessageFactory.  This factory will be used to
390   // construct Message objects representing extensions.  The factory's
391   // GetPrototype() MUST return non-NULL for any Descriptor which can be found
392   // through the provided pool.
393   //
394   // If the provided factory might return instances of protocol-compiler-
395   // generated (i.e. compiled-in) types, or if the outer message object m is
396   // a generated type, then the given factory MUST have this property:  If
397   // GetPrototype() is given a Descriptor which resides in
398   // DescriptorPool::generated_pool(), the factory MUST return the same
399   // prototype which MessageFactory::generated_factory() would return.  That
400   // is, given a descriptor for a generated type, the factory must return an
401   // instance of the generated class (NOT DynamicMessage).  However, when
402   // given a descriptor for a type that is NOT in generated_pool, the factory
403   // is free to return any implementation.
404   //
405   // The reason for this requirement is that generated sub-objects may be
406   // accessed via the standard (non-reflection) extension accessor methods,
407   // and these methods will down-cast the object to the generated class type.
408   // If the object is not actually of that type, the results would be undefined.
409   // On the other hand, if an extension is not compiled in, then there is no
410   // way the code could end up accessing it via the standard accessors -- the
411   // only way to access the extension is via reflection.  When using reflection,
412   // DynamicMessage and generated messages are indistinguishable, so it's fine
413   // if these objects are represented using DynamicMessage.
414   //
415   // Using DynamicMessageFactory on which you have called
416   // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
417   // above requirement.
418   //
419   // If either pool or factory is NULL, both must be NULL.
420   //
421   // Note that this feature is ignored when parsing "lite" messages as they do
422   // not have descriptors.
423   void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
424 
425   // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
426   // has been provided.
427   const DescriptorPool* GetExtensionPool();
428 
429   // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
430   // factory has been provided.
431   MessageFactory* GetExtensionFactory();
432 
433  private:
434   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
435 
436   ZeroCopyInputStream* input_;
437   const uint8* buffer_;
438   const uint8* buffer_end_;     // pointer to the end of the buffer.
439   int total_bytes_read_;  // total bytes read from input_, including
440                           // the current buffer
441 
442   // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
443   // so that we can BackUp() on destruction.
444   int overflow_bytes_;
445 
446   // LastTagWas() stuff.
447   uint32 last_tag_;         // result of last ReadTag().
448 
449   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
450   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
451   // reach the end of a message and attempt to read another tag.
452   bool legitimate_message_end_;
453 
454   // See EnableAliasing().
455   bool aliasing_enabled_;
456 
457   // Limits
458   Limit current_limit_;   // if position = -1, no limit is applied
459 
460   // For simplicity, if the current buffer crosses a limit (either a normal
461   // limit created by PushLimit() or the total bytes limit), buffer_size_
462   // only tracks the number of bytes before that limit.  This field
463   // contains the number of bytes after it.  Note that this implies that if
464   // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
465   // hit a limit.  However, if both are zero, it doesn't necessarily mean
466   // we aren't at a limit -- the buffer may have ended exactly at the limit.
467   int buffer_size_after_limit_;
468 
469   // Maximum number of bytes to read, period.  This is unrelated to
470   // current_limit_.  Set using SetTotalBytesLimit().
471   int total_bytes_limit_;
472   int total_bytes_warning_threshold_;
473 
474   // Current recursion depth, controlled by IncrementRecursionDepth() and
475   // DecrementRecursionDepth().
476   int recursion_depth_;
477   // Recursion depth limit, set by SetRecursionLimit().
478   int recursion_limit_;
479 
480   // See SetExtensionRegistry().
481   const DescriptorPool* extension_pool_;
482   MessageFactory* extension_factory_;
483 
484   // Private member functions.
485 
486   // Advance the buffer by a given number of bytes.
487   void Advance(int amount);
488 
489   // Back up input_ to the current buffer position.
490   void BackUpInputToCurrentPosition();
491 
492   // Recomputes the value of buffer_size_after_limit_.  Must be called after
493   // current_limit_ or total_bytes_limit_ changes.
494   void RecomputeBufferLimits();
495 
496   // Writes an error message saying that we hit total_bytes_limit_.
497   void PrintTotalBytesLimitError();
498 
499   // Called when the buffer runs out to request more data.  Implies an
500   // Advance(BufferSize()).
501   bool Refresh();
502 
503   // When parsing varints, we optimize for the common case of small values, and
504   // then optimize for the case when the varint fits within the current buffer
505   // piece. The Fallback method is used when we can't use the one-byte
506   // optimization. The Slow method is yet another fallback when the buffer is
507   // not large enough. Making the slow path out-of-line speeds up the common
508   // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
509   // message crosses multiple buffers.
510   bool ReadVarint32Fallback(uint32* value);
511   bool ReadVarint64Fallback(uint64* value);
512   bool ReadVarint32Slow(uint32* value);
513   bool ReadVarint64Slow(uint64* value);
514   bool ReadLittleEndian32Fallback(uint32* value);
515   bool ReadLittleEndian64Fallback(uint64* value);
516   // Fallback/slow methods for reading tags. These do not update last_tag_,
517   // but will set legitimate_message_end_ if we are at the end of the input
518   // stream.
519   uint32 ReadTagFallback();
520   uint32 ReadTagSlow();
521   bool ReadStringFallback(string* buffer, int size);
522 
523   // Return the size of the buffer.
524   int BufferSize() const;
525 
526   static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
527 
528   static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
529   static const int kDefaultRecursionLimit = 64;
530 };
531 
532 // Class which encodes and writes binary data which is composed of varint-
533 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
534 // Most users will not need to deal with CodedOutputStream.
535 //
536 // Most methods of CodedOutputStream which return a bool return false if an
537 // underlying I/O error occurs.  Once such a failure occurs, the
538 // CodedOutputStream is broken and is no longer useful. The Write* methods do
539 // not return the stream status, but will invalidate the stream if an error
540 // occurs. The client can probe HadError() to determine the status.
541 //
542 // Note that every method of CodedOutputStream which writes some data has
543 // a corresponding static "ToArray" version. These versions write directly
544 // to the provided buffer, returning a pointer past the last written byte.
545 // They require that the buffer has sufficient capacity for the encoded data.
546 // This allows an optimization where we check if an output stream has enough
547 // space for an entire message before we start writing and, if there is, we
548 // call only the ToArray methods to avoid doing bound checks for each
549 // individual value.
550 // i.e., in the example above:
551 //
552 //   CodedOutputStream coded_output = new CodedOutputStream(raw_output);
553 //   int magic_number = 1234;
554 //   char text[] = "Hello world!";
555 //
556 //   int coded_size = sizeof(magic_number) +
557 //                    CodedOutputStream::Varint32Size(strlen(text)) +
558 //                    strlen(text);
559 //
560 //   uint8* buffer =
561 //       coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
562 //   if (buffer != NULL) {
563 //     // The output stream has enough space in the buffer: write directly to
564 //     // the array.
565 //     buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
566 //                                                            buffer);
567 //     buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
568 //     buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
569 //   } else {
570 //     // Make bound-checked writes, which will ask the underlying stream for
571 //     // more space as needed.
572 //     coded_output->WriteLittleEndian32(magic_number);
573 //     coded_output->WriteVarint32(strlen(text));
574 //     coded_output->WriteRaw(text, strlen(text));
575 //   }
576 //
577 //   delete coded_output;
578 class LIBPROTOBUF_EXPORT CodedOutputStream {
579  public:
580   // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
581   explicit CodedOutputStream(ZeroCopyOutputStream* output);
582 
583   // Destroy the CodedOutputStream and position the underlying
584   // ZeroCopyOutputStream immediately after the last byte written.
585   ~CodedOutputStream();
586 
587   // Skips a number of bytes, leaving the bytes unmodified in the underlying
588   // buffer.  Returns false if an underlying write error occurs.  This is
589   // mainly useful with GetDirectBufferPointer().
590   bool Skip(int count);
591 
592   // Sets *data to point directly at the unwritten part of the
593   // CodedOutputStream's underlying buffer, and *size to the size of that
594   // buffer, but does not advance the stream's current position.  This will
595   // always either produce a non-empty buffer or return false.  If the caller
596   // writes any data to this buffer, it should then call Skip() to skip over
597   // the consumed bytes.  This may be useful for implementing external fast
598   // serialization routines for types of data not covered by the
599   // CodedOutputStream interface.
600   bool GetDirectBufferPointer(void** data, int* size);
601 
602   // If there are at least "size" bytes available in the current buffer,
603   // returns a pointer directly into the buffer and advances over these bytes.
604   // The caller may then write directly into this buffer (e.g. using the
605   // *ToArray static methods) rather than go through CodedOutputStream.  If
606   // there are not enough bytes available, returns NULL.  The return pointer is
607   // invalidated as soon as any other non-const method of CodedOutputStream
608   // is called.
609   inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
610 
611   // Write raw bytes, copying them from the given buffer.
612   void WriteRaw(const void* buffer, int size);
613   // Like WriteRaw()  but writing directly to the target array.
614   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
615   // copy loops. Since this gets called by every field with string or bytes
616   // type, inlining may lead to a significant amount of code bloat, with only a
617   // minor performance gain.
618   static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
619 
620   // Equivalent to WriteRaw(str.data(), str.size()).
621   void WriteString(const string& str);
622   // Like WriteString()  but writing directly to the target array.
623   static uint8* WriteStringToArray(const string& str, uint8* target);
624 
625 
626   // Write a 32-bit little-endian integer.
627   void WriteLittleEndian32(uint32 value);
628   // Like WriteLittleEndian32()  but writing directly to the target array.
629   static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
630   // Write a 64-bit little-endian integer.
631   void WriteLittleEndian64(uint64 value);
632   // Like WriteLittleEndian64()  but writing directly to the target array.
633   static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
634 
635   // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
636   // is equivalent to casting it to uint64 and writing it as a 64-bit value,
637   // but may be more efficient.
638   void WriteVarint32(uint32 value);
639   // Like WriteVarint32()  but writing directly to the target array.
640   static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
641   // Write an unsigned integer with Varint encoding.
642   void WriteVarint64(uint64 value);
643   // Like WriteVarint64()  but writing directly to the target array.
644   static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
645 
646   // Equivalent to WriteVarint32() except when the value is negative,
647   // in which case it must be sign-extended to a full 10 bytes.
648   void WriteVarint32SignExtended(int32 value);
649   // Like WriteVarint32SignExtended()  but writing directly to the target array.
650   static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
651 
652   // This is identical to WriteVarint32(), but optimized for writing tags.
653   // In particular, if the input is a compile-time constant, this method
654   // compiles down to a couple instructions.
655   // Always inline because otherwise the aformentioned optimization can't work,
656   // but GCC by default doesn't want to inline this.
657   void WriteTag(uint32 value);
658   // Like WriteTag()  but writing directly to the target array.
659   static uint8* WriteTagToArray(
660       uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
661 
662   // Returns the number of bytes needed to encode the given value as a varint.
663   static int VarintSize32(uint32 value);
664   // Returns the number of bytes needed to encode the given value as a varint.
665   static int VarintSize64(uint64 value);
666 
667   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
668   static int VarintSize32SignExtended(int32 value);
669 
670   // Returns the total number of bytes written since this object was created.
671   inline int ByteCount() const;
672 
673   // Returns true if there was an underlying I/O error since this object was
674   // created.
HadError()675   bool HadError() const { return had_error_; }
676 
677  private:
678   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
679 
680   ZeroCopyOutputStream* output_;
681   uint8* buffer_;
682   int buffer_size_;
683   int total_bytes_;  // Sum of sizes of all buffers seen so far.
684   bool had_error_;   // Whether an error occurred during output.
685 
686   // Advance the buffer by a given number of bytes.
687   void Advance(int amount);
688 
689   // Called when the buffer runs out to request more data.  Implies an
690   // Advance(buffer_size_).
691   bool Refresh();
692 
693   static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
694 
695   // Always-inlined versions of WriteVarint* functions so that code can be
696   // reused, while still controlling size. For instance, WriteVarint32ToArray()
697   // should not directly call this: since it is inlined itself, doing so
698   // would greatly increase the size of generated code. Instead, it should call
699   // WriteVarint32FallbackToArray.  Meanwhile, WriteVarint32() is already
700   // out-of-line, so it should just invoke this directly to avoid any extra
701   // function call overhead.
702   static uint8* WriteVarint32FallbackToArrayInline(
703       uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
704   static uint8* WriteVarint64ToArrayInline(
705       uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
706 
707   static int VarintSize32Fallback(uint32 value);
708 };
709 
710 // inline methods ====================================================
711 // The vast majority of varints are only one byte.  These inline
712 // methods optimize for that case.
713 
ReadVarint32(uint32 * value)714 inline bool CodedInputStream::ReadVarint32(uint32* value) {
715   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
716     *value = *buffer_;
717     Advance(1);
718     return true;
719   } else {
720     return ReadVarint32Fallback(value);
721   }
722 }
723 
ReadVarint64(uint64 * value)724 inline bool CodedInputStream::ReadVarint64(uint64* value) {
725   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
726     *value = *buffer_;
727     Advance(1);
728     return true;
729   } else {
730     return ReadVarint64Fallback(value);
731   }
732 }
733 
734 // static
ReadLittleEndian32FromArray(const uint8 * buffer,uint32 * value)735 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
736     const uint8* buffer,
737     uint32* value) {
738 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
739     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
740   memcpy(value, buffer, sizeof(*value));
741   return buffer + sizeof(*value);
742 #else
743   *value = (static_cast<uint32>(buffer[0])      ) |
744            (static_cast<uint32>(buffer[1]) <<  8) |
745            (static_cast<uint32>(buffer[2]) << 16) |
746            (static_cast<uint32>(buffer[3]) << 24);
747   return buffer + sizeof(*value);
748 #endif
749 }
750 // static
ReadLittleEndian64FromArray(const uint8 * buffer,uint64 * value)751 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
752     const uint8* buffer,
753     uint64* value) {
754 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
755     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
756   memcpy(value, buffer, sizeof(*value));
757   return buffer + sizeof(*value);
758 #else
759   uint32 part0 = (static_cast<uint32>(buffer[0])      ) |
760                  (static_cast<uint32>(buffer[1]) <<  8) |
761                  (static_cast<uint32>(buffer[2]) << 16) |
762                  (static_cast<uint32>(buffer[3]) << 24);
763   uint32 part1 = (static_cast<uint32>(buffer[4])      ) |
764                  (static_cast<uint32>(buffer[5]) <<  8) |
765                  (static_cast<uint32>(buffer[6]) << 16) |
766                  (static_cast<uint32>(buffer[7]) << 24);
767   *value = static_cast<uint64>(part0) |
768           (static_cast<uint64>(part1) << 32);
769   return buffer + sizeof(*value);
770 #endif
771 }
772 
ReadLittleEndian32(uint32 * value)773 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
774 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
775     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
776   if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
777     memcpy(value, buffer_, sizeof(*value));
778     Advance(sizeof(*value));
779     return true;
780   } else {
781     return ReadLittleEndian32Fallback(value);
782   }
783 #else
784   return ReadLittleEndian32Fallback(value);
785 #endif
786 }
787 
ReadLittleEndian64(uint64 * value)788 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
789 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
790     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
791   if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
792     memcpy(value, buffer_, sizeof(*value));
793     Advance(sizeof(*value));
794     return true;
795   } else {
796     return ReadLittleEndian64Fallback(value);
797   }
798 #else
799   return ReadLittleEndian64Fallback(value);
800 #endif
801 }
802 
ReadTag()803 inline uint32 CodedInputStream::ReadTag() {
804   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
805     last_tag_ = buffer_[0];
806     Advance(1);
807     return last_tag_;
808   } else {
809     last_tag_ = ReadTagFallback();
810     return last_tag_;
811   }
812 }
813 
LastTagWas(uint32 expected)814 inline bool CodedInputStream::LastTagWas(uint32 expected) {
815   return last_tag_ == expected;
816 }
817 
ConsumedEntireMessage()818 inline bool CodedInputStream::ConsumedEntireMessage() {
819   return legitimate_message_end_;
820 }
821 
ExpectTag(uint32 expected)822 inline bool CodedInputStream::ExpectTag(uint32 expected) {
823   if (expected < (1 << 7)) {
824     if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
825       Advance(1);
826       return true;
827     } else {
828       return false;
829     }
830   } else if (expected < (1 << 14)) {
831     if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
832         buffer_[0] == static_cast<uint8>(expected | 0x80) &&
833         buffer_[1] == static_cast<uint8>(expected >> 7)) {
834       Advance(2);
835       return true;
836     } else {
837       return false;
838     }
839   } else {
840     // Don't bother optimizing for larger values.
841     return false;
842   }
843 }
844 
ExpectTagFromArray(const uint8 * buffer,uint32 expected)845 inline const uint8* CodedInputStream::ExpectTagFromArray(
846     const uint8* buffer, uint32 expected) {
847   if (expected < (1 << 7)) {
848     if (buffer[0] == expected) {
849       return buffer + 1;
850     }
851   } else if (expected < (1 << 14)) {
852     if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
853         buffer[1] == static_cast<uint8>(expected >> 7)) {
854       return buffer + 2;
855     }
856   }
857   return NULL;
858 }
859 
GetDirectBufferPointerInline(const void ** data,int * size)860 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
861                                                            int* size) {
862   *data = buffer_;
863   *size = buffer_end_ - buffer_;
864 }
865 
ExpectAtEnd()866 inline bool CodedInputStream::ExpectAtEnd() {
867   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
868   // hard to say without calling Refresh(), and we'd rather not do that.
869 
870   if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
871     last_tag_ = 0;                   // Pretend we called ReadTag()...
872     legitimate_message_end_ = true;  // ... and it hit EOF.
873     return true;
874   } else {
875     return false;
876   }
877 }
878 
GetDirectBufferForNBytesAndAdvance(int size)879 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
880   if (buffer_size_ < size) {
881     return NULL;
882   } else {
883     uint8* result = buffer_;
884     Advance(size);
885     return result;
886   }
887 }
888 
WriteVarint32ToArray(uint32 value,uint8 * target)889 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
890                                                         uint8* target) {
891   if (value < 0x80) {
892     *target = value;
893     return target + 1;
894   } else {
895     return WriteVarint32FallbackToArray(value, target);
896   }
897 }
898 
WriteVarint32SignExtended(int32 value)899 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
900   if (value < 0) {
901     WriteVarint64(static_cast<uint64>(value));
902   } else {
903     WriteVarint32(static_cast<uint32>(value));
904   }
905 }
906 
WriteVarint32SignExtendedToArray(int32 value,uint8 * target)907 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
908     int32 value, uint8* target) {
909   if (value < 0) {
910     return WriteVarint64ToArray(static_cast<uint64>(value), target);
911   } else {
912     return WriteVarint32ToArray(static_cast<uint32>(value), target);
913   }
914 }
915 
WriteLittleEndian32ToArray(uint32 value,uint8 * target)916 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
917                                                             uint8* target) {
918 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
919     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
920   memcpy(target, &value, sizeof(value));
921 #else
922   target[0] = static_cast<uint8>(value);
923   target[1] = static_cast<uint8>(value >>  8);
924   target[2] = static_cast<uint8>(value >> 16);
925   target[3] = static_cast<uint8>(value >> 24);
926 #endif
927   return target + sizeof(value);
928 }
929 
WriteLittleEndian64ToArray(uint64 value,uint8 * target)930 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
931                                                             uint8* target) {
932 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
933     defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
934   memcpy(target, &value, sizeof(value));
935 #else
936   uint32 part0 = static_cast<uint32>(value);
937   uint32 part1 = static_cast<uint32>(value >> 32);
938 
939   target[0] = static_cast<uint8>(part0);
940   target[1] = static_cast<uint8>(part0 >>  8);
941   target[2] = static_cast<uint8>(part0 >> 16);
942   target[3] = static_cast<uint8>(part0 >> 24);
943   target[4] = static_cast<uint8>(part1);
944   target[5] = static_cast<uint8>(part1 >>  8);
945   target[6] = static_cast<uint8>(part1 >> 16);
946   target[7] = static_cast<uint8>(part1 >> 24);
947 #endif
948   return target + sizeof(value);
949 }
950 
WriteTag(uint32 value)951 inline void CodedOutputStream::WriteTag(uint32 value) {
952   WriteVarint32(value);
953 }
954 
WriteTagToArray(uint32 value,uint8 * target)955 inline uint8* CodedOutputStream::WriteTagToArray(
956     uint32 value, uint8* target) {
957   if (value < (1 << 7)) {
958     target[0] = value;
959     return target + 1;
960   } else if (value < (1 << 14)) {
961     target[0] = static_cast<uint8>(value | 0x80);
962     target[1] = static_cast<uint8>(value >> 7);
963     return target + 2;
964   } else {
965     return WriteVarint32FallbackToArray(value, target);
966   }
967 }
968 
VarintSize32(uint32 value)969 inline int CodedOutputStream::VarintSize32(uint32 value) {
970   if (value < (1 << 7)) {
971     return 1;
972   } else  {
973     return VarintSize32Fallback(value);
974   }
975 }
976 
VarintSize32SignExtended(int32 value)977 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
978   if (value < 0) {
979     return 10;     // TODO(kenton):  Make this a symbolic constant.
980   } else {
981     return VarintSize32(static_cast<uint32>(value));
982   }
983 }
984 
WriteString(const string & str)985 inline void CodedOutputStream::WriteString(const string& str) {
986   WriteRaw(str.data(), str.size());
987 }
988 
WriteStringToArray(const string & str,uint8 * target)989 inline uint8* CodedOutputStream::WriteStringToArray(
990     const string& str, uint8* target) {
991   return WriteRawToArray(str.data(), str.size(), target);
992 }
993 
ByteCount()994 inline int CodedOutputStream::ByteCount() const {
995   return total_bytes_ - buffer_size_;
996 }
997 
Advance(int amount)998 inline void CodedInputStream::Advance(int amount) {
999   buffer_ += amount;
1000 }
1001 
Advance(int amount)1002 inline void CodedOutputStream::Advance(int amount) {
1003   buffer_ += amount;
1004   buffer_size_ -= amount;
1005 }
1006 
SetRecursionLimit(int limit)1007 inline void CodedInputStream::SetRecursionLimit(int limit) {
1008   recursion_limit_ = limit;
1009 }
1010 
IncrementRecursionDepth()1011 inline bool CodedInputStream::IncrementRecursionDepth() {
1012   ++recursion_depth_;
1013   return recursion_depth_ <= recursion_limit_;
1014 }
1015 
DecrementRecursionDepth()1016 inline void CodedInputStream::DecrementRecursionDepth() {
1017   if (recursion_depth_ > 0) --recursion_depth_;
1018 }
1019 
SetExtensionRegistry(DescriptorPool * pool,MessageFactory * factory)1020 inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
1021                                                    MessageFactory* factory) {
1022   extension_pool_ = pool;
1023   extension_factory_ = factory;
1024 }
1025 
GetExtensionPool()1026 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1027   return extension_pool_;
1028 }
1029 
GetExtensionFactory()1030 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1031   return extension_factory_;
1032 }
1033 
BufferSize()1034 inline int CodedInputStream::BufferSize() const {
1035   return buffer_end_ - buffer_;
1036 }
1037 
CodedInputStream(ZeroCopyInputStream * input)1038 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1039   : input_(input),
1040     buffer_(NULL),
1041     buffer_end_(NULL),
1042     total_bytes_read_(0),
1043     overflow_bytes_(0),
1044     last_tag_(0),
1045     legitimate_message_end_(false),
1046     aliasing_enabled_(false),
1047     current_limit_(INT_MAX),
1048     buffer_size_after_limit_(0),
1049     total_bytes_limit_(kDefaultTotalBytesLimit),
1050     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1051     recursion_depth_(0),
1052     recursion_limit_(kDefaultRecursionLimit),
1053     extension_pool_(NULL),
1054     extension_factory_(NULL) {
1055   // Eagerly Refresh() so buffer space is immediately available.
1056   Refresh();
1057 }
1058 
CodedInputStream(const uint8 * buffer,int size)1059 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1060   : input_(NULL),
1061     buffer_(buffer),
1062     buffer_end_(buffer + size),
1063     total_bytes_read_(size),
1064     overflow_bytes_(0),
1065     last_tag_(0),
1066     legitimate_message_end_(false),
1067     aliasing_enabled_(false),
1068     current_limit_(size),
1069     buffer_size_after_limit_(0),
1070     total_bytes_limit_(kDefaultTotalBytesLimit),
1071     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1072     recursion_depth_(0),
1073     recursion_limit_(kDefaultRecursionLimit),
1074     extension_pool_(NULL),
1075     extension_factory_(NULL) {
1076   // Note that setting current_limit_ == size is important to prevent some
1077   // code paths from trying to access input_ and segfaulting.
1078 }
1079 
~CodedInputStream()1080 inline CodedInputStream::~CodedInputStream() {
1081   if (input_ != NULL) {
1082     BackUpInputToCurrentPosition();
1083   }
1084 }
1085 
1086 }  // namespace io
1087 }  // namespace protobuf
1088 
1089 }  // namespace google
1090 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1091