1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file contains the CodedInputStream and CodedOutputStream classes,
36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37 // and allow you to read or write individual pieces of data in various
38 // formats. In particular, these implement the varint encoding for
39 // integers, a simple variable-length encoding in which smaller numbers
40 // take fewer bytes.
41 //
42 // Typically these classes will only be used internally by the protocol
43 // buffer library in order to encode and decode protocol buffers. Clients
44 // of the library only need to know about this class if they wish to write
45 // custom message parsing or serialization procedures.
46 //
47 // CodedOutputStream example:
48 // // Write some data to "myfile". First we write a 4-byte "magic number"
49 // // to identify the file type, then write a length-delimited string. The
50 // // string is composed of a varint giving the length followed by the raw
51 // // bytes.
52 // int fd = open("myfile", O_WRONLY);
53 // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55 //
56 // int magic_number = 1234;
57 // char text[] = "Hello world!";
58 // coded_output->WriteLittleEndian32(magic_number);
59 // coded_output->WriteVarint32(strlen(text));
60 // coded_output->WriteRaw(text, strlen(text));
61 //
62 // delete coded_output;
63 // delete raw_output;
64 // close(fd);
65 //
66 // CodedInputStream example:
67 // // Read a file created by the above code.
68 // int fd = open("myfile", O_RDONLY);
69 // ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70 // CodedInputStream coded_input = new CodedInputStream(raw_input);
71 //
72 // coded_input->ReadLittleEndian32(&magic_number);
73 // if (magic_number != 1234) {
74 // cerr << "File not in expected format." << endl;
75 // return;
76 // }
77 //
78 // uint32 size;
79 // coded_input->ReadVarint32(&size);
80 //
81 // char* text = new char[size + 1];
82 // coded_input->ReadRaw(buffer, size);
83 // text[size] = '\0';
84 //
85 // delete coded_input;
86 // delete raw_input;
87 // close(fd);
88 //
89 // cout << "Text is: " << text << endl;
90 // delete [] text;
91 //
92 // For those who are interested, varint encoding is defined as follows:
93 //
94 // The encoding operates on unsigned integers of up to 64 bits in length.
95 // Each byte of the encoded value has the format:
96 // * bits 0-6: Seven bits of the number being encoded.
97 // * bit 7: Zero if this is the last byte in the encoding (in which
98 // case all remaining bits of the number are zero) or 1 if
99 // more bytes follow.
100 // The first byte contains the least-significant 7 bits of the number, the
101 // second byte (if present) contains the next-least-significant 7 bits,
102 // and so on. So, the binary number 1011000101011 would be encoded in two
103 // bytes as "10101011 00101100".
104 //
105 // In theory, varint could be used to encode integers of any length.
106 // However, for practicality we set a limit at 64 bits. The maximum encoded
107 // length of a number is thus 10 bytes.
108
109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112 #include <string>
113 #ifndef _MSC_VER
114 #include <sys/param.h>
115 #endif // !_MSC_VER
116 #include <google/protobuf/stubs/common.h>
117 #include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro
118
119 namespace google {
120
121 namespace protobuf {
122
123 class DescriptorPool;
124 class MessageFactory;
125
126 namespace io {
127
128 // Defined in this file.
129 class CodedInputStream;
130 class CodedOutputStream;
131
132 // Defined in other files.
133 class ZeroCopyInputStream; // zero_copy_stream.h
134 class ZeroCopyOutputStream; // zero_copy_stream.h
135
136 // Class which reads and decodes binary data which is composed of varint-
137 // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
138 // Most users will not need to deal with CodedInputStream.
139 //
140 // Most methods of CodedInputStream that return a bool return false if an
141 // underlying I/O error occurs or if the data is malformed. Once such a
142 // failure occurs, the CodedInputStream is broken and is no longer useful.
143 class LIBPROTOBUF_EXPORT CodedInputStream {
144 public:
145 // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
146 explicit CodedInputStream(ZeroCopyInputStream* input);
147
148 // Create a CodedInputStream that reads from the given flat array. This is
149 // faster than using an ArrayInputStream. PushLimit(size) is implied by
150 // this constructor.
151 explicit CodedInputStream(const uint8* buffer, int size);
152
153 // Destroy the CodedInputStream and position the underlying
154 // ZeroCopyInputStream at the first unread byte. If an error occurred while
155 // reading (causing a method to return false), then the exact position of
156 // the input stream may be anywhere between the last value that was read
157 // successfully and the stream's byte limit.
158 ~CodedInputStream();
159
160
161 // Skips a number of bytes. Returns false if an underlying read error
162 // occurs.
163 bool Skip(int count);
164
165 // Sets *data to point directly at the unread part of the CodedInputStream's
166 // underlying buffer, and *size to the size of that buffer, but does not
167 // advance the stream's current position. This will always either produce
168 // a non-empty buffer or return false. If the caller consumes any of
169 // this data, it should then call Skip() to skip over the consumed bytes.
170 // This may be useful for implementing external fast parsing routines for
171 // types of data not covered by the CodedInputStream interface.
172 bool GetDirectBufferPointer(const void** data, int* size);
173
174 // Like GetDirectBufferPointer, but this method is inlined, and does not
175 // attempt to Refresh() if the buffer is currently empty.
176 inline void GetDirectBufferPointerInline(const void** data,
177 int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
178
179 // Read raw bytes, copying them into the given buffer.
180 bool ReadRaw(void* buffer, int size);
181
182 // Like ReadRaw, but reads into a string.
183 //
184 // Implementation Note: ReadString() grows the string gradually as it
185 // reads in the data, rather than allocating the entire requested size
186 // upfront. This prevents denial-of-service attacks in which a client
187 // could claim that a string is going to be MAX_INT bytes long in order to
188 // crash the server because it can't allocate this much space at once.
189 bool ReadString(string* buffer, int size);
190 // Like the above, with inlined optimizations. This should only be used
191 // by the protobuf implementation.
192 inline bool InternalReadStringInline(string* buffer,
193 int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
194
195
196 // Read a 32-bit little-endian integer.
197 bool ReadLittleEndian32(uint32* value);
198 // Read a 64-bit little-endian integer.
199 bool ReadLittleEndian64(uint64* value);
200
201 // These methods read from an externally provided buffer. The caller is
202 // responsible for ensuring that the buffer has sufficient space.
203 // Read a 32-bit little-endian integer.
204 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
205 uint32* value);
206 // Read a 64-bit little-endian integer.
207 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
208 uint64* value);
209
210 // Read an unsigned integer with Varint encoding, truncating to 32 bits.
211 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
212 // it to uint32, but may be more efficient.
213 bool ReadVarint32(uint32* value);
214 // Read an unsigned integer with Varint encoding.
215 bool ReadVarint64(uint64* value);
216
217 // Read a tag. This calls ReadVarint32() and returns the result, or returns
218 // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
219 // the last tag value, which can be checked with LastTagWas().
220 // Always inline because this is only called in once place per parse loop
221 // but it is called for every iteration of said loop, so it should be fast.
222 // GCC doesn't want to inline this by default.
223 uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
224
225 // Usually returns true if calling ReadVarint32() now would produce the given
226 // value. Will always return false if ReadVarint32() would not return the
227 // given value. If ExpectTag() returns true, it also advances past
228 // the varint. For best performance, use a compile-time constant as the
229 // parameter.
230 // Always inline because this collapses to a small number of instructions
231 // when given a constant parameter, but GCC doesn't want to inline by default.
232 bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
233
234 // Like above, except this reads from the specified buffer. The caller is
235 // responsible for ensuring that the buffer is large enough to read a varint
236 // of the expected size. For best performance, use a compile-time constant as
237 // the expected tag parameter.
238 //
239 // Returns a pointer beyond the expected tag if it was found, or NULL if it
240 // was not.
241 static const uint8* ExpectTagFromArray(
242 const uint8* buffer,
243 uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
244
245 // Usually returns true if no more bytes can be read. Always returns false
246 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
247 // call to LastTagWas() will act as if ReadTag() had been called and returned
248 // zero, and ConsumedEntireMessage() will return true.
249 bool ExpectAtEnd();
250
251 // If the last call to ReadTag() returned the given value, returns true.
252 // Otherwise, returns false;
253 //
254 // This is needed because parsers for some types of embedded messages
255 // (with field type TYPE_GROUP) don't actually know that they've reached the
256 // end of a message until they see an ENDGROUP tag, which was actually part
257 // of the enclosing message. The enclosing message would like to check that
258 // tag to make sure it had the right number, so it calls LastTagWas() on
259 // return from the embedded parser to check.
260 bool LastTagWas(uint32 expected);
261
262 // When parsing message (but NOT a group), this method must be called
263 // immediately after MergeFromCodedStream() returns (if it returns true)
264 // to further verify that the message ended in a legitimate way. For
265 // example, this verifies that parsing did not end on an end-group tag.
266 // It also checks for some cases where, due to optimizations,
267 // MergeFromCodedStream() can incorrectly return true.
268 bool ConsumedEntireMessage();
269
270 // Limits ----------------------------------------------------------
271 // Limits are used when parsing length-delimited embedded messages.
272 // After the message's length is read, PushLimit() is used to prevent
273 // the CodedInputStream from reading beyond that length. Once the
274 // embedded message has been parsed, PopLimit() is called to undo the
275 // limit.
276
277 // Opaque type used with PushLimit() and PopLimit(). Do not modify
278 // values of this type yourself. The only reason that this isn't a
279 // struct with private internals is for efficiency.
280 typedef int Limit;
281
282 // Places a limit on the number of bytes that the stream may read,
283 // starting from the current position. Once the stream hits this limit,
284 // it will act like the end of the input has been reached until PopLimit()
285 // is called.
286 //
287 // As the names imply, the stream conceptually has a stack of limits. The
288 // shortest limit on the stack is always enforced, even if it is not the
289 // top limit.
290 //
291 // The value returned by PushLimit() is opaque to the caller, and must
292 // be passed unchanged to the corresponding call to PopLimit().
293 Limit PushLimit(int byte_limit);
294
295 // Pops the last limit pushed by PushLimit(). The input must be the value
296 // returned by that call to PushLimit().
297 void PopLimit(Limit limit);
298
299 // Returns the number of bytes left until the nearest limit on the
300 // stack is hit, or -1 if no limits are in place.
301 int BytesUntilLimit();
302
303 // Total Bytes Limit -----------------------------------------------
304 // To prevent malicious users from sending excessively large messages
305 // and causing integer overflows or memory exhaustion, CodedInputStream
306 // imposes a hard limit on the total number of bytes it will read.
307
308 // Sets the maximum number of bytes that this CodedInputStream will read
309 // before refusing to continue. To prevent integer overflows in the
310 // protocol buffers implementation, as well as to prevent servers from
311 // allocating enormous amounts of memory to hold parsed messages, the
312 // maximum message length should be limited to the shortest length that
313 // will not harm usability. The theoretical shortest message that could
314 // cause integer overflows is 512MB. The default limit is 64MB. Apps
315 // should set shorter limits if possible. If warning_threshold is not -1,
316 // a warning will be printed to stderr after warning_threshold bytes are
317 // read. An error will always be printed to stderr if the limit is
318 // reached.
319 //
320 // This is unrelated to PushLimit()/PopLimit().
321 //
322 // Hint: If you are reading this because your program is printing a
323 // warning about dangerously large protocol messages, you may be
324 // confused about what to do next. The best option is to change your
325 // design such that excessively large messages are not necessary.
326 // For example, try to design file formats to consist of many small
327 // messages rather than a single large one. If this is infeasible,
328 // you will need to increase the limit. Chances are, though, that
329 // your code never constructs a CodedInputStream on which the limit
330 // can be set. You probably parse messages by calling things like
331 // Message::ParseFromString(). In this case, you will need to change
332 // your code to instead construct some sort of ZeroCopyInputStream
333 // (e.g. an ArrayInputStream), construct a CodedInputStream around
334 // that, then call Message::ParseFromCodedStream() instead. Then
335 // you can adjust the limit. Yes, it's more work, but you're doing
336 // something unusual.
337 void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
338
339 // Recursion Limit -------------------------------------------------
340 // To prevent corrupt or malicious messages from causing stack overflows,
341 // we must keep track of the depth of recursion when parsing embedded
342 // messages and groups. CodedInputStream keeps track of this because it
343 // is the only object that is passed down the stack during parsing.
344
345 // Sets the maximum recursion depth. The default is 64.
346 void SetRecursionLimit(int limit);
347
348 // Increments the current recursion depth. Returns true if the depth is
349 // under the limit, false if it has gone over.
350 bool IncrementRecursionDepth();
351
352 // Decrements the recursion depth.
353 void DecrementRecursionDepth();
354
355 // Extension Registry ----------------------------------------------
356 // ADVANCED USAGE: 99.9% of people can ignore this section.
357 //
358 // By default, when parsing extensions, the parser looks for extension
359 // definitions in the pool which owns the outer message's Descriptor.
360 // However, you may call SetExtensionRegistry() to provide an alternative
361 // pool instead. This makes it possible, for example, to parse a message
362 // using a generated class, but represent some extensions using
363 // DynamicMessage.
364
365 // Set the pool used to look up extensions. Most users do not need to call
366 // this as the correct pool will be chosen automatically.
367 //
368 // WARNING: It is very easy to misuse this. Carefully read the requirements
369 // below. Do not use this unless you are sure you need it. Almost no one
370 // does.
371 //
372 // Let's say you are parsing a message into message object m, and you want
373 // to take advantage of SetExtensionRegistry(). You must follow these
374 // requirements:
375 //
376 // The given DescriptorPool must contain m->GetDescriptor(). It is not
377 // sufficient for it to simply contain a descriptor that has the same name
378 // and content -- it must be the *exact object*. In other words:
379 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
380 // m->GetDescriptor());
381 // There are two ways to satisfy this requirement:
382 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
383 // because this is the pool that would be used anyway if you didn't call
384 // SetExtensionRegistry() at all.
385 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
386 // "underlay". Read the documentation for DescriptorPool for more
387 // information about underlays.
388 //
389 // You must also provide a MessageFactory. This factory will be used to
390 // construct Message objects representing extensions. The factory's
391 // GetPrototype() MUST return non-NULL for any Descriptor which can be found
392 // through the provided pool.
393 //
394 // If the provided factory might return instances of protocol-compiler-
395 // generated (i.e. compiled-in) types, or if the outer message object m is
396 // a generated type, then the given factory MUST have this property: If
397 // GetPrototype() is given a Descriptor which resides in
398 // DescriptorPool::generated_pool(), the factory MUST return the same
399 // prototype which MessageFactory::generated_factory() would return. That
400 // is, given a descriptor for a generated type, the factory must return an
401 // instance of the generated class (NOT DynamicMessage). However, when
402 // given a descriptor for a type that is NOT in generated_pool, the factory
403 // is free to return any implementation.
404 //
405 // The reason for this requirement is that generated sub-objects may be
406 // accessed via the standard (non-reflection) extension accessor methods,
407 // and these methods will down-cast the object to the generated class type.
408 // If the object is not actually of that type, the results would be undefined.
409 // On the other hand, if an extension is not compiled in, then there is no
410 // way the code could end up accessing it via the standard accessors -- the
411 // only way to access the extension is via reflection. When using reflection,
412 // DynamicMessage and generated messages are indistinguishable, so it's fine
413 // if these objects are represented using DynamicMessage.
414 //
415 // Using DynamicMessageFactory on which you have called
416 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
417 // above requirement.
418 //
419 // If either pool or factory is NULL, both must be NULL.
420 //
421 // Note that this feature is ignored when parsing "lite" messages as they do
422 // not have descriptors.
423 void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
424
425 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
426 // has been provided.
427 const DescriptorPool* GetExtensionPool();
428
429 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
430 // factory has been provided.
431 MessageFactory* GetExtensionFactory();
432
433 private:
434 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
435
436 ZeroCopyInputStream* input_;
437 const uint8* buffer_;
438 const uint8* buffer_end_; // pointer to the end of the buffer.
439 int total_bytes_read_; // total bytes read from input_, including
440 // the current buffer
441
442 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
443 // so that we can BackUp() on destruction.
444 int overflow_bytes_;
445
446 // LastTagWas() stuff.
447 uint32 last_tag_; // result of last ReadTag().
448
449 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
450 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
451 // reach the end of a message and attempt to read another tag.
452 bool legitimate_message_end_;
453
454 // See EnableAliasing().
455 bool aliasing_enabled_;
456
457 // Limits
458 Limit current_limit_; // if position = -1, no limit is applied
459
460 // For simplicity, if the current buffer crosses a limit (either a normal
461 // limit created by PushLimit() or the total bytes limit), buffer_size_
462 // only tracks the number of bytes before that limit. This field
463 // contains the number of bytes after it. Note that this implies that if
464 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
465 // hit a limit. However, if both are zero, it doesn't necessarily mean
466 // we aren't at a limit -- the buffer may have ended exactly at the limit.
467 int buffer_size_after_limit_;
468
469 // Maximum number of bytes to read, period. This is unrelated to
470 // current_limit_. Set using SetTotalBytesLimit().
471 int total_bytes_limit_;
472 int total_bytes_warning_threshold_;
473
474 // Current recursion depth, controlled by IncrementRecursionDepth() and
475 // DecrementRecursionDepth().
476 int recursion_depth_;
477 // Recursion depth limit, set by SetRecursionLimit().
478 int recursion_limit_;
479
480 // See SetExtensionRegistry().
481 const DescriptorPool* extension_pool_;
482 MessageFactory* extension_factory_;
483
484 // Private member functions.
485
486 // Advance the buffer by a given number of bytes.
487 void Advance(int amount);
488
489 // Back up input_ to the current buffer position.
490 void BackUpInputToCurrentPosition();
491
492 // Recomputes the value of buffer_size_after_limit_. Must be called after
493 // current_limit_ or total_bytes_limit_ changes.
494 void RecomputeBufferLimits();
495
496 // Writes an error message saying that we hit total_bytes_limit_.
497 void PrintTotalBytesLimitError();
498
499 // Called when the buffer runs out to request more data. Implies an
500 // Advance(BufferSize()).
501 bool Refresh();
502
503 // When parsing varints, we optimize for the common case of small values, and
504 // then optimize for the case when the varint fits within the current buffer
505 // piece. The Fallback method is used when we can't use the one-byte
506 // optimization. The Slow method is yet another fallback when the buffer is
507 // not large enough. Making the slow path out-of-line speeds up the common
508 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
509 // message crosses multiple buffers.
510 bool ReadVarint32Fallback(uint32* value);
511 bool ReadVarint64Fallback(uint64* value);
512 bool ReadVarint32Slow(uint32* value);
513 bool ReadVarint64Slow(uint64* value);
514 bool ReadLittleEndian32Fallback(uint32* value);
515 bool ReadLittleEndian64Fallback(uint64* value);
516 // Fallback/slow methods for reading tags. These do not update last_tag_,
517 // but will set legitimate_message_end_ if we are at the end of the input
518 // stream.
519 uint32 ReadTagFallback();
520 uint32 ReadTagSlow();
521 bool ReadStringFallback(string* buffer, int size);
522
523 // Return the size of the buffer.
524 int BufferSize() const;
525
526 static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
527
528 static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
529 static const int kDefaultRecursionLimit = 64;
530 };
531
532 // Class which encodes and writes binary data which is composed of varint-
533 // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
534 // Most users will not need to deal with CodedOutputStream.
535 //
536 // Most methods of CodedOutputStream which return a bool return false if an
537 // underlying I/O error occurs. Once such a failure occurs, the
538 // CodedOutputStream is broken and is no longer useful. The Write* methods do
539 // not return the stream status, but will invalidate the stream if an error
540 // occurs. The client can probe HadError() to determine the status.
541 //
542 // Note that every method of CodedOutputStream which writes some data has
543 // a corresponding static "ToArray" version. These versions write directly
544 // to the provided buffer, returning a pointer past the last written byte.
545 // They require that the buffer has sufficient capacity for the encoded data.
546 // This allows an optimization where we check if an output stream has enough
547 // space for an entire message before we start writing and, if there is, we
548 // call only the ToArray methods to avoid doing bound checks for each
549 // individual value.
550 // i.e., in the example above:
551 //
552 // CodedOutputStream coded_output = new CodedOutputStream(raw_output);
553 // int magic_number = 1234;
554 // char text[] = "Hello world!";
555 //
556 // int coded_size = sizeof(magic_number) +
557 // CodedOutputStream::Varint32Size(strlen(text)) +
558 // strlen(text);
559 //
560 // uint8* buffer =
561 // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
562 // if (buffer != NULL) {
563 // // The output stream has enough space in the buffer: write directly to
564 // // the array.
565 // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
566 // buffer);
567 // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
568 // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
569 // } else {
570 // // Make bound-checked writes, which will ask the underlying stream for
571 // // more space as needed.
572 // coded_output->WriteLittleEndian32(magic_number);
573 // coded_output->WriteVarint32(strlen(text));
574 // coded_output->WriteRaw(text, strlen(text));
575 // }
576 //
577 // delete coded_output;
578 class LIBPROTOBUF_EXPORT CodedOutputStream {
579 public:
580 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
581 explicit CodedOutputStream(ZeroCopyOutputStream* output);
582
583 // Destroy the CodedOutputStream and position the underlying
584 // ZeroCopyOutputStream immediately after the last byte written.
585 ~CodedOutputStream();
586
587 // Skips a number of bytes, leaving the bytes unmodified in the underlying
588 // buffer. Returns false if an underlying write error occurs. This is
589 // mainly useful with GetDirectBufferPointer().
590 bool Skip(int count);
591
592 // Sets *data to point directly at the unwritten part of the
593 // CodedOutputStream's underlying buffer, and *size to the size of that
594 // buffer, but does not advance the stream's current position. This will
595 // always either produce a non-empty buffer or return false. If the caller
596 // writes any data to this buffer, it should then call Skip() to skip over
597 // the consumed bytes. This may be useful for implementing external fast
598 // serialization routines for types of data not covered by the
599 // CodedOutputStream interface.
600 bool GetDirectBufferPointer(void** data, int* size);
601
602 // If there are at least "size" bytes available in the current buffer,
603 // returns a pointer directly into the buffer and advances over these bytes.
604 // The caller may then write directly into this buffer (e.g. using the
605 // *ToArray static methods) rather than go through CodedOutputStream. If
606 // there are not enough bytes available, returns NULL. The return pointer is
607 // invalidated as soon as any other non-const method of CodedOutputStream
608 // is called.
609 inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
610
611 // Write raw bytes, copying them from the given buffer.
612 void WriteRaw(const void* buffer, int size);
613 // Like WriteRaw() but writing directly to the target array.
614 // This is _not_ inlined, as the compiler often optimizes memcpy into inline
615 // copy loops. Since this gets called by every field with string or bytes
616 // type, inlining may lead to a significant amount of code bloat, with only a
617 // minor performance gain.
618 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
619
620 // Equivalent to WriteRaw(str.data(), str.size()).
621 void WriteString(const string& str);
622 // Like WriteString() but writing directly to the target array.
623 static uint8* WriteStringToArray(const string& str, uint8* target);
624
625
626 // Write a 32-bit little-endian integer.
627 void WriteLittleEndian32(uint32 value);
628 // Like WriteLittleEndian32() but writing directly to the target array.
629 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
630 // Write a 64-bit little-endian integer.
631 void WriteLittleEndian64(uint64 value);
632 // Like WriteLittleEndian64() but writing directly to the target array.
633 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
634
635 // Write an unsigned integer with Varint encoding. Writing a 32-bit value
636 // is equivalent to casting it to uint64 and writing it as a 64-bit value,
637 // but may be more efficient.
638 void WriteVarint32(uint32 value);
639 // Like WriteVarint32() but writing directly to the target array.
640 static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
641 // Write an unsigned integer with Varint encoding.
642 void WriteVarint64(uint64 value);
643 // Like WriteVarint64() but writing directly to the target array.
644 static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
645
646 // Equivalent to WriteVarint32() except when the value is negative,
647 // in which case it must be sign-extended to a full 10 bytes.
648 void WriteVarint32SignExtended(int32 value);
649 // Like WriteVarint32SignExtended() but writing directly to the target array.
650 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
651
652 // This is identical to WriteVarint32(), but optimized for writing tags.
653 // In particular, if the input is a compile-time constant, this method
654 // compiles down to a couple instructions.
655 // Always inline because otherwise the aformentioned optimization can't work,
656 // but GCC by default doesn't want to inline this.
657 void WriteTag(uint32 value);
658 // Like WriteTag() but writing directly to the target array.
659 static uint8* WriteTagToArray(
660 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
661
662 // Returns the number of bytes needed to encode the given value as a varint.
663 static int VarintSize32(uint32 value);
664 // Returns the number of bytes needed to encode the given value as a varint.
665 static int VarintSize64(uint64 value);
666
667 // If negative, 10 bytes. Otheriwse, same as VarintSize32().
668 static int VarintSize32SignExtended(int32 value);
669
670 // Returns the total number of bytes written since this object was created.
671 inline int ByteCount() const;
672
673 // Returns true if there was an underlying I/O error since this object was
674 // created.
HadError()675 bool HadError() const { return had_error_; }
676
677 private:
678 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
679
680 ZeroCopyOutputStream* output_;
681 uint8* buffer_;
682 int buffer_size_;
683 int total_bytes_; // Sum of sizes of all buffers seen so far.
684 bool had_error_; // Whether an error occurred during output.
685
686 // Advance the buffer by a given number of bytes.
687 void Advance(int amount);
688
689 // Called when the buffer runs out to request more data. Implies an
690 // Advance(buffer_size_).
691 bool Refresh();
692
693 static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
694
695 // Always-inlined versions of WriteVarint* functions so that code can be
696 // reused, while still controlling size. For instance, WriteVarint32ToArray()
697 // should not directly call this: since it is inlined itself, doing so
698 // would greatly increase the size of generated code. Instead, it should call
699 // WriteVarint32FallbackToArray. Meanwhile, WriteVarint32() is already
700 // out-of-line, so it should just invoke this directly to avoid any extra
701 // function call overhead.
702 static uint8* WriteVarint32FallbackToArrayInline(
703 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
704 static uint8* WriteVarint64ToArrayInline(
705 uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
706
707 static int VarintSize32Fallback(uint32 value);
708 };
709
710 // inline methods ====================================================
711 // The vast majority of varints are only one byte. These inline
712 // methods optimize for that case.
713
ReadVarint32(uint32 * value)714 inline bool CodedInputStream::ReadVarint32(uint32* value) {
715 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
716 *value = *buffer_;
717 Advance(1);
718 return true;
719 } else {
720 return ReadVarint32Fallback(value);
721 }
722 }
723
ReadVarint64(uint64 * value)724 inline bool CodedInputStream::ReadVarint64(uint64* value) {
725 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
726 *value = *buffer_;
727 Advance(1);
728 return true;
729 } else {
730 return ReadVarint64Fallback(value);
731 }
732 }
733
734 // static
ReadLittleEndian32FromArray(const uint8 * buffer,uint32 * value)735 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
736 const uint8* buffer,
737 uint32* value) {
738 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
739 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
740 memcpy(value, buffer, sizeof(*value));
741 return buffer + sizeof(*value);
742 #else
743 *value = (static_cast<uint32>(buffer[0]) ) |
744 (static_cast<uint32>(buffer[1]) << 8) |
745 (static_cast<uint32>(buffer[2]) << 16) |
746 (static_cast<uint32>(buffer[3]) << 24);
747 return buffer + sizeof(*value);
748 #endif
749 }
750 // static
ReadLittleEndian64FromArray(const uint8 * buffer,uint64 * value)751 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
752 const uint8* buffer,
753 uint64* value) {
754 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
755 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
756 memcpy(value, buffer, sizeof(*value));
757 return buffer + sizeof(*value);
758 #else
759 uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
760 (static_cast<uint32>(buffer[1]) << 8) |
761 (static_cast<uint32>(buffer[2]) << 16) |
762 (static_cast<uint32>(buffer[3]) << 24);
763 uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
764 (static_cast<uint32>(buffer[5]) << 8) |
765 (static_cast<uint32>(buffer[6]) << 16) |
766 (static_cast<uint32>(buffer[7]) << 24);
767 *value = static_cast<uint64>(part0) |
768 (static_cast<uint64>(part1) << 32);
769 return buffer + sizeof(*value);
770 #endif
771 }
772
ReadLittleEndian32(uint32 * value)773 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
774 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
775 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
776 if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
777 memcpy(value, buffer_, sizeof(*value));
778 Advance(sizeof(*value));
779 return true;
780 } else {
781 return ReadLittleEndian32Fallback(value);
782 }
783 #else
784 return ReadLittleEndian32Fallback(value);
785 #endif
786 }
787
ReadLittleEndian64(uint64 * value)788 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
789 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
790 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
791 if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
792 memcpy(value, buffer_, sizeof(*value));
793 Advance(sizeof(*value));
794 return true;
795 } else {
796 return ReadLittleEndian64Fallback(value);
797 }
798 #else
799 return ReadLittleEndian64Fallback(value);
800 #endif
801 }
802
ReadTag()803 inline uint32 CodedInputStream::ReadTag() {
804 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
805 last_tag_ = buffer_[0];
806 Advance(1);
807 return last_tag_;
808 } else {
809 last_tag_ = ReadTagFallback();
810 return last_tag_;
811 }
812 }
813
LastTagWas(uint32 expected)814 inline bool CodedInputStream::LastTagWas(uint32 expected) {
815 return last_tag_ == expected;
816 }
817
ConsumedEntireMessage()818 inline bool CodedInputStream::ConsumedEntireMessage() {
819 return legitimate_message_end_;
820 }
821
ExpectTag(uint32 expected)822 inline bool CodedInputStream::ExpectTag(uint32 expected) {
823 if (expected < (1 << 7)) {
824 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
825 Advance(1);
826 return true;
827 } else {
828 return false;
829 }
830 } else if (expected < (1 << 14)) {
831 if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
832 buffer_[0] == static_cast<uint8>(expected | 0x80) &&
833 buffer_[1] == static_cast<uint8>(expected >> 7)) {
834 Advance(2);
835 return true;
836 } else {
837 return false;
838 }
839 } else {
840 // Don't bother optimizing for larger values.
841 return false;
842 }
843 }
844
ExpectTagFromArray(const uint8 * buffer,uint32 expected)845 inline const uint8* CodedInputStream::ExpectTagFromArray(
846 const uint8* buffer, uint32 expected) {
847 if (expected < (1 << 7)) {
848 if (buffer[0] == expected) {
849 return buffer + 1;
850 }
851 } else if (expected < (1 << 14)) {
852 if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
853 buffer[1] == static_cast<uint8>(expected >> 7)) {
854 return buffer + 2;
855 }
856 }
857 return NULL;
858 }
859
GetDirectBufferPointerInline(const void ** data,int * size)860 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
861 int* size) {
862 *data = buffer_;
863 *size = buffer_end_ - buffer_;
864 }
865
ExpectAtEnd()866 inline bool CodedInputStream::ExpectAtEnd() {
867 // If we are at a limit we know no more bytes can be read. Otherwise, it's
868 // hard to say without calling Refresh(), and we'd rather not do that.
869
870 if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
871 last_tag_ = 0; // Pretend we called ReadTag()...
872 legitimate_message_end_ = true; // ... and it hit EOF.
873 return true;
874 } else {
875 return false;
876 }
877 }
878
GetDirectBufferForNBytesAndAdvance(int size)879 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
880 if (buffer_size_ < size) {
881 return NULL;
882 } else {
883 uint8* result = buffer_;
884 Advance(size);
885 return result;
886 }
887 }
888
WriteVarint32ToArray(uint32 value,uint8 * target)889 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
890 uint8* target) {
891 if (value < 0x80) {
892 *target = value;
893 return target + 1;
894 } else {
895 return WriteVarint32FallbackToArray(value, target);
896 }
897 }
898
WriteVarint32SignExtended(int32 value)899 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
900 if (value < 0) {
901 WriteVarint64(static_cast<uint64>(value));
902 } else {
903 WriteVarint32(static_cast<uint32>(value));
904 }
905 }
906
WriteVarint32SignExtendedToArray(int32 value,uint8 * target)907 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
908 int32 value, uint8* target) {
909 if (value < 0) {
910 return WriteVarint64ToArray(static_cast<uint64>(value), target);
911 } else {
912 return WriteVarint32ToArray(static_cast<uint32>(value), target);
913 }
914 }
915
WriteLittleEndian32ToArray(uint32 value,uint8 * target)916 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
917 uint8* target) {
918 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
919 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
920 memcpy(target, &value, sizeof(value));
921 #else
922 target[0] = static_cast<uint8>(value);
923 target[1] = static_cast<uint8>(value >> 8);
924 target[2] = static_cast<uint8>(value >> 16);
925 target[3] = static_cast<uint8>(value >> 24);
926 #endif
927 return target + sizeof(value);
928 }
929
WriteLittleEndian64ToArray(uint64 value,uint8 * target)930 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
931 uint8* target) {
932 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
933 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
934 memcpy(target, &value, sizeof(value));
935 #else
936 uint32 part0 = static_cast<uint32>(value);
937 uint32 part1 = static_cast<uint32>(value >> 32);
938
939 target[0] = static_cast<uint8>(part0);
940 target[1] = static_cast<uint8>(part0 >> 8);
941 target[2] = static_cast<uint8>(part0 >> 16);
942 target[3] = static_cast<uint8>(part0 >> 24);
943 target[4] = static_cast<uint8>(part1);
944 target[5] = static_cast<uint8>(part1 >> 8);
945 target[6] = static_cast<uint8>(part1 >> 16);
946 target[7] = static_cast<uint8>(part1 >> 24);
947 #endif
948 return target + sizeof(value);
949 }
950
WriteTag(uint32 value)951 inline void CodedOutputStream::WriteTag(uint32 value) {
952 WriteVarint32(value);
953 }
954
WriteTagToArray(uint32 value,uint8 * target)955 inline uint8* CodedOutputStream::WriteTagToArray(
956 uint32 value, uint8* target) {
957 if (value < (1 << 7)) {
958 target[0] = value;
959 return target + 1;
960 } else if (value < (1 << 14)) {
961 target[0] = static_cast<uint8>(value | 0x80);
962 target[1] = static_cast<uint8>(value >> 7);
963 return target + 2;
964 } else {
965 return WriteVarint32FallbackToArray(value, target);
966 }
967 }
968
VarintSize32(uint32 value)969 inline int CodedOutputStream::VarintSize32(uint32 value) {
970 if (value < (1 << 7)) {
971 return 1;
972 } else {
973 return VarintSize32Fallback(value);
974 }
975 }
976
VarintSize32SignExtended(int32 value)977 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
978 if (value < 0) {
979 return 10; // TODO(kenton): Make this a symbolic constant.
980 } else {
981 return VarintSize32(static_cast<uint32>(value));
982 }
983 }
984
WriteString(const string & str)985 inline void CodedOutputStream::WriteString(const string& str) {
986 WriteRaw(str.data(), str.size());
987 }
988
WriteStringToArray(const string & str,uint8 * target)989 inline uint8* CodedOutputStream::WriteStringToArray(
990 const string& str, uint8* target) {
991 return WriteRawToArray(str.data(), str.size(), target);
992 }
993
ByteCount()994 inline int CodedOutputStream::ByteCount() const {
995 return total_bytes_ - buffer_size_;
996 }
997
Advance(int amount)998 inline void CodedInputStream::Advance(int amount) {
999 buffer_ += amount;
1000 }
1001
Advance(int amount)1002 inline void CodedOutputStream::Advance(int amount) {
1003 buffer_ += amount;
1004 buffer_size_ -= amount;
1005 }
1006
SetRecursionLimit(int limit)1007 inline void CodedInputStream::SetRecursionLimit(int limit) {
1008 recursion_limit_ = limit;
1009 }
1010
IncrementRecursionDepth()1011 inline bool CodedInputStream::IncrementRecursionDepth() {
1012 ++recursion_depth_;
1013 return recursion_depth_ <= recursion_limit_;
1014 }
1015
DecrementRecursionDepth()1016 inline void CodedInputStream::DecrementRecursionDepth() {
1017 if (recursion_depth_ > 0) --recursion_depth_;
1018 }
1019
SetExtensionRegistry(DescriptorPool * pool,MessageFactory * factory)1020 inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
1021 MessageFactory* factory) {
1022 extension_pool_ = pool;
1023 extension_factory_ = factory;
1024 }
1025
GetExtensionPool()1026 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1027 return extension_pool_;
1028 }
1029
GetExtensionFactory()1030 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1031 return extension_factory_;
1032 }
1033
BufferSize()1034 inline int CodedInputStream::BufferSize() const {
1035 return buffer_end_ - buffer_;
1036 }
1037
CodedInputStream(ZeroCopyInputStream * input)1038 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1039 : input_(input),
1040 buffer_(NULL),
1041 buffer_end_(NULL),
1042 total_bytes_read_(0),
1043 overflow_bytes_(0),
1044 last_tag_(0),
1045 legitimate_message_end_(false),
1046 aliasing_enabled_(false),
1047 current_limit_(INT_MAX),
1048 buffer_size_after_limit_(0),
1049 total_bytes_limit_(kDefaultTotalBytesLimit),
1050 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1051 recursion_depth_(0),
1052 recursion_limit_(kDefaultRecursionLimit),
1053 extension_pool_(NULL),
1054 extension_factory_(NULL) {
1055 // Eagerly Refresh() so buffer space is immediately available.
1056 Refresh();
1057 }
1058
CodedInputStream(const uint8 * buffer,int size)1059 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1060 : input_(NULL),
1061 buffer_(buffer),
1062 buffer_end_(buffer + size),
1063 total_bytes_read_(size),
1064 overflow_bytes_(0),
1065 last_tag_(0),
1066 legitimate_message_end_(false),
1067 aliasing_enabled_(false),
1068 current_limit_(size),
1069 buffer_size_after_limit_(0),
1070 total_bytes_limit_(kDefaultTotalBytesLimit),
1071 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1072 recursion_depth_(0),
1073 recursion_limit_(kDefaultRecursionLimit),
1074 extension_pool_(NULL),
1075 extension_factory_(NULL) {
1076 // Note that setting current_limit_ == size is important to prevent some
1077 // code paths from trying to access input_ and segfaulting.
1078 }
1079
~CodedInputStream()1080 inline CodedInputStream::~CodedInputStream() {
1081 if (input_ != NULL) {
1082 BackUpInputToCurrentPosition();
1083 }
1084 }
1085
1086 } // namespace io
1087 } // namespace protobuf
1088
1089 } // namespace google
1090 #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1091