1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file contains the CodedInputStream and CodedOutputStream classes,
36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37 // and allow you to read or write individual pieces of data in various
38 // formats. In particular, these implement the varint encoding for
39 // integers, a simple variable-length encoding in which smaller numbers
40 // take fewer bytes.
41 //
42 // Typically these classes will only be used internally by the protocol
43 // buffer library in order to encode and decode protocol buffers. Clients
44 // of the library only need to know about this class if they wish to write
45 // custom message parsing or serialization procedures.
46 //
47 // CodedOutputStream example:
48 // // Write some data to "myfile". First we write a 4-byte "magic number"
49 // // to identify the file type, then write a length-delimited string. The
50 // // string is composed of a varint giving the length followed by the raw
51 // // bytes.
52 // int fd = open("myfile", O_CREAT | O_WRONLY);
53 // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55 //
56 // int magic_number = 1234;
57 // char text[] = "Hello world!";
58 // coded_output->WriteLittleEndian32(magic_number);
59 // coded_output->WriteVarint32(strlen(text));
60 // coded_output->WriteRaw(text, strlen(text));
61 //
62 // delete coded_output;
63 // delete raw_output;
64 // close(fd);
65 //
66 // CodedInputStream example:
67 // // Read a file created by the above code.
68 // int fd = open("myfile", O_RDONLY);
69 // ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70 // CodedInputStream coded_input = new CodedInputStream(raw_input);
71 //
72 // coded_input->ReadLittleEndian32(&magic_number);
73 // if (magic_number != 1234) {
74 // cerr << "File not in expected format." << endl;
75 // return;
76 // }
77 //
78 // uint32 size;
79 // coded_input->ReadVarint32(&size);
80 //
81 // char* text = new char[size + 1];
82 // coded_input->ReadRaw(buffer, size);
83 // text[size] = '\0';
84 //
85 // delete coded_input;
86 // delete raw_input;
87 // close(fd);
88 //
89 // cout << "Text is: " << text << endl;
90 // delete [] text;
91 //
92 // For those who are interested, varint encoding is defined as follows:
93 //
94 // The encoding operates on unsigned integers of up to 64 bits in length.
95 // Each byte of the encoded value has the format:
96 // * bits 0-6: Seven bits of the number being encoded.
97 // * bit 7: Zero if this is the last byte in the encoding (in which
98 // case all remaining bits of the number are zero) or 1 if
99 // more bytes follow.
100 // The first byte contains the least-significant 7 bits of the number, the
101 // second byte (if present) contains the next-least-significant 7 bits,
102 // and so on. So, the binary number 1011000101011 would be encoded in two
103 // bytes as "10101011 00101100".
104 //
105 // In theory, varint could be used to encode integers of any length.
106 // However, for practicality we set a limit at 64 bits. The maximum encoded
107 // length of a number is thus 10 bytes.
108
109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112 #include <assert.h>
113 #include <string>
114 #include <utility>
115 #ifdef _MSC_VER
116 // Assuming windows is always little-endian.
117 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
118 #define PROTOBUF_LITTLE_ENDIAN 1
119 #endif
120 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
121 // If MSVC has "/RTCc" set, it will complain about truncating casts at
122 // runtime. This file contains some intentional truncating casts.
123 #pragma runtime_checks("c", off)
124 #endif
125 #else
126 #include <sys/param.h> // __BYTE_ORDER
127 #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
128 (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
129 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
130 #define PROTOBUF_LITTLE_ENDIAN 1
131 #endif
132 #endif
133 #include <google/protobuf/stubs/common.h>
134
135 namespace google {
136
137 namespace protobuf {
138
139 class DescriptorPool;
140 class MessageFactory;
141
142 namespace io {
143
144 // Defined in this file.
145 class CodedInputStream;
146 class CodedOutputStream;
147
148 // Defined in other files.
149 class ZeroCopyInputStream; // zero_copy_stream.h
150 class ZeroCopyOutputStream; // zero_copy_stream.h
151
152 // Class which reads and decodes binary data which is composed of varint-
153 // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
154 // Most users will not need to deal with CodedInputStream.
155 //
156 // Most methods of CodedInputStream that return a bool return false if an
157 // underlying I/O error occurs or if the data is malformed. Once such a
158 // failure occurs, the CodedInputStream is broken and is no longer useful.
159 class LIBPROTOBUF_EXPORT CodedInputStream {
160 public:
161 // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
162 explicit CodedInputStream(ZeroCopyInputStream* input);
163
164 // Create a CodedInputStream that reads from the given flat array. This is
165 // faster than using an ArrayInputStream. PushLimit(size) is implied by
166 // this constructor.
167 explicit CodedInputStream(const uint8* buffer, int size);
168
169 // Destroy the CodedInputStream and position the underlying
170 // ZeroCopyInputStream at the first unread byte. If an error occurred while
171 // reading (causing a method to return false), then the exact position of
172 // the input stream may be anywhere between the last value that was read
173 // successfully and the stream's byte limit.
174 ~CodedInputStream();
175
176 // Return true if this CodedInputStream reads from a flat array instead of
177 // a ZeroCopyInputStream.
178 inline bool IsFlat() const;
179
180 // Skips a number of bytes. Returns false if an underlying read error
181 // occurs.
182 bool Skip(int count);
183
184 // Sets *data to point directly at the unread part of the CodedInputStream's
185 // underlying buffer, and *size to the size of that buffer, but does not
186 // advance the stream's current position. This will always either produce
187 // a non-empty buffer or return false. If the caller consumes any of
188 // this data, it should then call Skip() to skip over the consumed bytes.
189 // This may be useful for implementing external fast parsing routines for
190 // types of data not covered by the CodedInputStream interface.
191 bool GetDirectBufferPointer(const void** data, int* size);
192
193 // Like GetDirectBufferPointer, but this method is inlined, and does not
194 // attempt to Refresh() if the buffer is currently empty.
195 GOOGLE_ATTRIBUTE_ALWAYS_INLINE void GetDirectBufferPointerInline(const void** data,
196 int* size);
197
198 // Read raw bytes, copying them into the given buffer.
199 bool ReadRaw(void* buffer, int size);
200
201 // Like the above, with inlined optimizations. This should only be used
202 // by the protobuf implementation.
203 GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadRawInline(void* buffer, int size);
204
205 // Like ReadRaw, but reads into a string.
206 //
207 // Implementation Note: ReadString() grows the string gradually as it
208 // reads in the data, rather than allocating the entire requested size
209 // upfront. This prevents denial-of-service attacks in which a client
210 // could claim that a string is going to be MAX_INT bytes long in order to
211 // crash the server because it can't allocate this much space at once.
212 bool ReadString(string* buffer, int size);
213 // Like the above, with inlined optimizations. This should only be used
214 // by the protobuf implementation.
215 GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadStringInline(string* buffer,
216 int size);
217
218
219 // Read a 32-bit little-endian integer.
220 bool ReadLittleEndian32(uint32* value);
221 // Read a 64-bit little-endian integer.
222 bool ReadLittleEndian64(uint64* value);
223
224 // These methods read from an externally provided buffer. The caller is
225 // responsible for ensuring that the buffer has sufficient space.
226 // Read a 32-bit little-endian integer.
227 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
228 uint32* value);
229 // Read a 64-bit little-endian integer.
230 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
231 uint64* value);
232
233 // Read an unsigned integer with Varint encoding, truncating to 32 bits.
234 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
235 // it to uint32, but may be more efficient.
236 bool ReadVarint32(uint32* value);
237 // Read an unsigned integer with Varint encoding.
238 bool ReadVarint64(uint64* value);
239
240 // Read a tag. This calls ReadVarint32() and returns the result, or returns
241 // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
242 // the last tag value, which can be checked with LastTagWas().
243 // Always inline because this is only called in one place per parse loop
244 // but it is called for every iteration of said loop, so it should be fast.
245 // GCC doesn't want to inline this by default.
246 GOOGLE_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag();
247
248 // This usually a faster alternative to ReadTag() when cutoff is a manifest
249 // constant. It does particularly well for cutoff >= 127. The first part
250 // of the return value is the tag that was read, though it can also be 0 in
251 // the cases where ReadTag() would return 0. If the second part is true
252 // then the tag is known to be in [0, cutoff]. If not, the tag either is
253 // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
254 // because that can arise in several ways, and for best performance we want
255 // to avoid an extra "is tag == 0?" check here.)
256 GOOGLE_ATTRIBUTE_ALWAYS_INLINE std::pair<uint32, bool> ReadTagWithCutoff(
257 uint32 cutoff);
258
259 // Usually returns true if calling ReadVarint32() now would produce the given
260 // value. Will always return false if ReadVarint32() would not return the
261 // given value. If ExpectTag() returns true, it also advances past
262 // the varint. For best performance, use a compile-time constant as the
263 // parameter.
264 // Always inline because this collapses to a small number of instructions
265 // when given a constant parameter, but GCC doesn't want to inline by default.
266 GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
267
268 // Like above, except this reads from the specified buffer. The caller is
269 // responsible for ensuring that the buffer is large enough to read a varint
270 // of the expected size. For best performance, use a compile-time constant as
271 // the expected tag parameter.
272 //
273 // Returns a pointer beyond the expected tag if it was found, or NULL if it
274 // was not.
275 GOOGLE_ATTRIBUTE_ALWAYS_INLINE static const uint8* ExpectTagFromArray(
276 const uint8* buffer,
277 uint32 expected);
278
279 // Usually returns true if no more bytes can be read. Always returns false
280 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
281 // call to LastTagWas() will act as if ReadTag() had been called and returned
282 // zero, and ConsumedEntireMessage() will return true.
283 bool ExpectAtEnd();
284
285 // If the last call to ReadTag() or ReadTagWithCutoff() returned the
286 // given value, returns true. Otherwise, returns false;
287 //
288 // This is needed because parsers for some types of embedded messages
289 // (with field type TYPE_GROUP) don't actually know that they've reached the
290 // end of a message until they see an ENDGROUP tag, which was actually part
291 // of the enclosing message. The enclosing message would like to check that
292 // tag to make sure it had the right number, so it calls LastTagWas() on
293 // return from the embedded parser to check.
294 bool LastTagWas(uint32 expected);
295
296 // When parsing message (but NOT a group), this method must be called
297 // immediately after MergeFromCodedStream() returns (if it returns true)
298 // to further verify that the message ended in a legitimate way. For
299 // example, this verifies that parsing did not end on an end-group tag.
300 // It also checks for some cases where, due to optimizations,
301 // MergeFromCodedStream() can incorrectly return true.
302 bool ConsumedEntireMessage();
303
304 // Limits ----------------------------------------------------------
305 // Limits are used when parsing length-delimited embedded messages.
306 // After the message's length is read, PushLimit() is used to prevent
307 // the CodedInputStream from reading beyond that length. Once the
308 // embedded message has been parsed, PopLimit() is called to undo the
309 // limit.
310
311 // Opaque type used with PushLimit() and PopLimit(). Do not modify
312 // values of this type yourself. The only reason that this isn't a
313 // struct with private internals is for efficiency.
314 typedef int Limit;
315
316 // Places a limit on the number of bytes that the stream may read,
317 // starting from the current position. Once the stream hits this limit,
318 // it will act like the end of the input has been reached until PopLimit()
319 // is called.
320 //
321 // As the names imply, the stream conceptually has a stack of limits. The
322 // shortest limit on the stack is always enforced, even if it is not the
323 // top limit.
324 //
325 // The value returned by PushLimit() is opaque to the caller, and must
326 // be passed unchanged to the corresponding call to PopLimit().
327 Limit PushLimit(int byte_limit);
328
329 // Pops the last limit pushed by PushLimit(). The input must be the value
330 // returned by that call to PushLimit().
331 void PopLimit(Limit limit);
332
333 // Returns the number of bytes left until the nearest limit on the
334 // stack is hit, or -1 if no limits are in place.
335 int BytesUntilLimit() const;
336
337 // Returns current position relative to the beginning of the input stream.
338 int CurrentPosition() const;
339
340 // Total Bytes Limit -----------------------------------------------
341 // To prevent malicious users from sending excessively large messages
342 // and causing integer overflows or memory exhaustion, CodedInputStream
343 // imposes a hard limit on the total number of bytes it will read.
344
345 // Sets the maximum number of bytes that this CodedInputStream will read
346 // before refusing to continue. To prevent integer overflows in the
347 // protocol buffers implementation, as well as to prevent servers from
348 // allocating enormous amounts of memory to hold parsed messages, the
349 // maximum message length should be limited to the shortest length that
350 // will not harm usability. The theoretical shortest message that could
351 // cause integer overflows is 512MB. The default limit is 64MB. Apps
352 // should set shorter limits if possible. If warning_threshold is not -1,
353 // a warning will be printed to stderr after warning_threshold bytes are
354 // read. For backwards compatibility all negative values get squashed to -1,
355 // as other negative values might have special internal meanings.
356 // An error will always be printed to stderr if the limit is reached.
357 //
358 // This is unrelated to PushLimit()/PopLimit().
359 //
360 // Hint: If you are reading this because your program is printing a
361 // warning about dangerously large protocol messages, you may be
362 // confused about what to do next. The best option is to change your
363 // design such that excessively large messages are not necessary.
364 // For example, try to design file formats to consist of many small
365 // messages rather than a single large one. If this is infeasible,
366 // you will need to increase the limit. Chances are, though, that
367 // your code never constructs a CodedInputStream on which the limit
368 // can be set. You probably parse messages by calling things like
369 // Message::ParseFromString(). In this case, you will need to change
370 // your code to instead construct some sort of ZeroCopyInputStream
371 // (e.g. an ArrayInputStream), construct a CodedInputStream around
372 // that, then call Message::ParseFromCodedStream() instead. Then
373 // you can adjust the limit. Yes, it's more work, but you're doing
374 // something unusual.
375 void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
376
377 // The Total Bytes Limit minus the Current Position, or -1 if there
378 // is no Total Bytes Limit.
379 int BytesUntilTotalBytesLimit() const;
380
381 // Recursion Limit -------------------------------------------------
382 // To prevent corrupt or malicious messages from causing stack overflows,
383 // we must keep track of the depth of recursion when parsing embedded
384 // messages and groups. CodedInputStream keeps track of this because it
385 // is the only object that is passed down the stack during parsing.
386
387 // Sets the maximum recursion depth. The default is 100.
388 void SetRecursionLimit(int limit);
389
390
391 // Increments the current recursion depth. Returns true if the depth is
392 // under the limit, false if it has gone over.
393 bool IncrementRecursionDepth();
394
395 // Decrements the recursion depth if possible.
396 void DecrementRecursionDepth();
397
398 // Decrements the recursion depth blindly. This is faster than
399 // DecrementRecursionDepth(). It should be used only if all previous
400 // increments to recursion depth were successful.
401 void UnsafeDecrementRecursionDepth();
402
403 // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
404 // Using this can reduce code size and complexity in some cases. The caller
405 // is expected to check that the second part of the result is non-negative (to
406 // bail out if the depth of recursion is too high) and, if all is well, to
407 // later pass the first part of the result to PopLimit() or similar.
408 std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
409 int byte_limit);
410
411 // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
412 Limit ReadLengthAndPushLimit();
413
414 // Helper that is equivalent to: {
415 // bool result = ConsumedEntireMessage();
416 // PopLimit(limit);
417 // UnsafeDecrementRecursionDepth();
418 // return result; }
419 // Using this can reduce code size and complexity in some cases.
420 // Do not use unless the current recursion depth is greater than zero.
421 bool DecrementRecursionDepthAndPopLimit(Limit limit);
422
423 // Helper that is equivalent to: {
424 // bool result = ConsumedEntireMessage();
425 // PopLimit(limit);
426 // return result; }
427 // Using this can reduce code size and complexity in some cases.
428 bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
429
430 // Extension Registry ----------------------------------------------
431 // ADVANCED USAGE: 99.9% of people can ignore this section.
432 //
433 // By default, when parsing extensions, the parser looks for extension
434 // definitions in the pool which owns the outer message's Descriptor.
435 // However, you may call SetExtensionRegistry() to provide an alternative
436 // pool instead. This makes it possible, for example, to parse a message
437 // using a generated class, but represent some extensions using
438 // DynamicMessage.
439
440 // Set the pool used to look up extensions. Most users do not need to call
441 // this as the correct pool will be chosen automatically.
442 //
443 // WARNING: It is very easy to misuse this. Carefully read the requirements
444 // below. Do not use this unless you are sure you need it. Almost no one
445 // does.
446 //
447 // Let's say you are parsing a message into message object m, and you want
448 // to take advantage of SetExtensionRegistry(). You must follow these
449 // requirements:
450 //
451 // The given DescriptorPool must contain m->GetDescriptor(). It is not
452 // sufficient for it to simply contain a descriptor that has the same name
453 // and content -- it must be the *exact object*. In other words:
454 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
455 // m->GetDescriptor());
456 // There are two ways to satisfy this requirement:
457 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
458 // because this is the pool that would be used anyway if you didn't call
459 // SetExtensionRegistry() at all.
460 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
461 // "underlay". Read the documentation for DescriptorPool for more
462 // information about underlays.
463 //
464 // You must also provide a MessageFactory. This factory will be used to
465 // construct Message objects representing extensions. The factory's
466 // GetPrototype() MUST return non-NULL for any Descriptor which can be found
467 // through the provided pool.
468 //
469 // If the provided factory might return instances of protocol-compiler-
470 // generated (i.e. compiled-in) types, or if the outer message object m is
471 // a generated type, then the given factory MUST have this property: If
472 // GetPrototype() is given a Descriptor which resides in
473 // DescriptorPool::generated_pool(), the factory MUST return the same
474 // prototype which MessageFactory::generated_factory() would return. That
475 // is, given a descriptor for a generated type, the factory must return an
476 // instance of the generated class (NOT DynamicMessage). However, when
477 // given a descriptor for a type that is NOT in generated_pool, the factory
478 // is free to return any implementation.
479 //
480 // The reason for this requirement is that generated sub-objects may be
481 // accessed via the standard (non-reflection) extension accessor methods,
482 // and these methods will down-cast the object to the generated class type.
483 // If the object is not actually of that type, the results would be undefined.
484 // On the other hand, if an extension is not compiled in, then there is no
485 // way the code could end up accessing it via the standard accessors -- the
486 // only way to access the extension is via reflection. When using reflection,
487 // DynamicMessage and generated messages are indistinguishable, so it's fine
488 // if these objects are represented using DynamicMessage.
489 //
490 // Using DynamicMessageFactory on which you have called
491 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
492 // above requirement.
493 //
494 // If either pool or factory is NULL, both must be NULL.
495 //
496 // Note that this feature is ignored when parsing "lite" messages as they do
497 // not have descriptors.
498 void SetExtensionRegistry(const DescriptorPool* pool,
499 MessageFactory* factory);
500
501 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
502 // has been provided.
503 const DescriptorPool* GetExtensionPool();
504
505 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
506 // factory has been provided.
507 MessageFactory* GetExtensionFactory();
508
509 private:
510 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
511
512 const uint8* buffer_;
513 const uint8* buffer_end_; // pointer to the end of the buffer.
514 ZeroCopyInputStream* input_;
515 int total_bytes_read_; // total bytes read from input_, including
516 // the current buffer
517
518 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
519 // so that we can BackUp() on destruction.
520 int overflow_bytes_;
521
522 // LastTagWas() stuff.
523 uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
524
525 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
526 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
527 // reach the end of a message and attempt to read another tag.
528 bool legitimate_message_end_;
529
530 // See EnableAliasing().
531 bool aliasing_enabled_;
532
533 // Limits
534 Limit current_limit_; // if position = -1, no limit is applied
535
536 // For simplicity, if the current buffer crosses a limit (either a normal
537 // limit created by PushLimit() or the total bytes limit), buffer_size_
538 // only tracks the number of bytes before that limit. This field
539 // contains the number of bytes after it. Note that this implies that if
540 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
541 // hit a limit. However, if both are zero, it doesn't necessarily mean
542 // we aren't at a limit -- the buffer may have ended exactly at the limit.
543 int buffer_size_after_limit_;
544
545 // Maximum number of bytes to read, period. This is unrelated to
546 // current_limit_. Set using SetTotalBytesLimit().
547 int total_bytes_limit_;
548
549 // If positive/0: Limit for bytes read after which a warning due to size
550 // should be logged.
551 // If -1: Printing of warning disabled. Can be set by client.
552 // If -2: Internal: Limit has been reached, print full size when destructing.
553 int total_bytes_warning_threshold_;
554
555 // Current recursion budget, controlled by IncrementRecursionDepth() and
556 // similar. Starts at recursion_limit_ and goes down: if this reaches
557 // -1 we are over budget.
558 int recursion_budget_;
559 // Recursion depth limit, set by SetRecursionLimit().
560 int recursion_limit_;
561
562 // See SetExtensionRegistry().
563 const DescriptorPool* extension_pool_;
564 MessageFactory* extension_factory_;
565
566 // Private member functions.
567
568 // Advance the buffer by a given number of bytes.
569 void Advance(int amount);
570
571 // Back up input_ to the current buffer position.
572 void BackUpInputToCurrentPosition();
573
574 // Recomputes the value of buffer_size_after_limit_. Must be called after
575 // current_limit_ or total_bytes_limit_ changes.
576 void RecomputeBufferLimits();
577
578 // Writes an error message saying that we hit total_bytes_limit_.
579 void PrintTotalBytesLimitError();
580
581 // Called when the buffer runs out to request more data. Implies an
582 // Advance(BufferSize()).
583 bool Refresh();
584
585 // When parsing varints, we optimize for the common case of small values, and
586 // then optimize for the case when the varint fits within the current buffer
587 // piece. The Fallback method is used when we can't use the one-byte
588 // optimization. The Slow method is yet another fallback when the buffer is
589 // not large enough. Making the slow path out-of-line speeds up the common
590 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
591 // message crosses multiple buffers. Note: ReadVarint32Fallback() and
592 // ReadVarint64Fallback() are called frequently and generally not inlined, so
593 // they have been optimized to avoid "out" parameters. The former returns -1
594 // if it fails and the uint32 it read otherwise. The latter has a bool
595 // indicating success or failure as part of its return type.
596 int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
597 std::pair<uint64, bool> ReadVarint64Fallback();
598 bool ReadVarint32Slow(uint32* value);
599 bool ReadVarint64Slow(uint64* value);
600 bool ReadLittleEndian32Fallback(uint32* value);
601 bool ReadLittleEndian64Fallback(uint64* value);
602 // Fallback/slow methods for reading tags. These do not update last_tag_,
603 // but will set legitimate_message_end_ if we are at the end of the input
604 // stream.
605 uint32 ReadTagFallback(uint32 first_byte_or_zero);
606 uint32 ReadTagSlow();
607 bool ReadStringFallback(string* buffer, int size);
608
609 // Return the size of the buffer.
610 int BufferSize() const;
611
612 static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
613
614 static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
615
616 static int default_recursion_limit_; // 100 by default.
617 };
618
619 // Class which encodes and writes binary data which is composed of varint-
620 // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
621 // Most users will not need to deal with CodedOutputStream.
622 //
623 // Most methods of CodedOutputStream which return a bool return false if an
624 // underlying I/O error occurs. Once such a failure occurs, the
625 // CodedOutputStream is broken and is no longer useful. The Write* methods do
626 // not return the stream status, but will invalidate the stream if an error
627 // occurs. The client can probe HadError() to determine the status.
628 //
629 // Note that every method of CodedOutputStream which writes some data has
630 // a corresponding static "ToArray" version. These versions write directly
631 // to the provided buffer, returning a pointer past the last written byte.
632 // They require that the buffer has sufficient capacity for the encoded data.
633 // This allows an optimization where we check if an output stream has enough
634 // space for an entire message before we start writing and, if there is, we
635 // call only the ToArray methods to avoid doing bound checks for each
636 // individual value.
637 // i.e., in the example above:
638 //
639 // CodedOutputStream coded_output = new CodedOutputStream(raw_output);
640 // int magic_number = 1234;
641 // char text[] = "Hello world!";
642 //
643 // int coded_size = sizeof(magic_number) +
644 // CodedOutputStream::VarintSize32(strlen(text)) +
645 // strlen(text);
646 //
647 // uint8* buffer =
648 // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
649 // if (buffer != NULL) {
650 // // The output stream has enough space in the buffer: write directly to
651 // // the array.
652 // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
653 // buffer);
654 // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
655 // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
656 // } else {
657 // // Make bound-checked writes, which will ask the underlying stream for
658 // // more space as needed.
659 // coded_output->WriteLittleEndian32(magic_number);
660 // coded_output->WriteVarint32(strlen(text));
661 // coded_output->WriteRaw(text, strlen(text));
662 // }
663 //
664 // delete coded_output;
665 class LIBPROTOBUF_EXPORT CodedOutputStream {
666 public:
667 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
668 explicit CodedOutputStream(ZeroCopyOutputStream* output);
669 CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
670
671 // Destroy the CodedOutputStream and position the underlying
672 // ZeroCopyOutputStream immediately after the last byte written.
673 ~CodedOutputStream();
674
675 // Trims any unused space in the underlying buffer so that its size matches
676 // the number of bytes written by this stream. The underlying buffer will
677 // automatically be trimmed when this stream is destroyed; this call is only
678 // necessary if the underlying buffer is accessed *before* the stream is
679 // destroyed.
680 void Trim();
681
682 // Skips a number of bytes, leaving the bytes unmodified in the underlying
683 // buffer. Returns false if an underlying write error occurs. This is
684 // mainly useful with GetDirectBufferPointer().
685 bool Skip(int count);
686
687 // Sets *data to point directly at the unwritten part of the
688 // CodedOutputStream's underlying buffer, and *size to the size of that
689 // buffer, but does not advance the stream's current position. This will
690 // always either produce a non-empty buffer or return false. If the caller
691 // writes any data to this buffer, it should then call Skip() to skip over
692 // the consumed bytes. This may be useful for implementing external fast
693 // serialization routines for types of data not covered by the
694 // CodedOutputStream interface.
695 bool GetDirectBufferPointer(void** data, int* size);
696
697 // If there are at least "size" bytes available in the current buffer,
698 // returns a pointer directly into the buffer and advances over these bytes.
699 // The caller may then write directly into this buffer (e.g. using the
700 // *ToArray static methods) rather than go through CodedOutputStream. If
701 // there are not enough bytes available, returns NULL. The return pointer is
702 // invalidated as soon as any other non-const method of CodedOutputStream
703 // is called.
704 inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
705
706 // Write raw bytes, copying them from the given buffer.
707 void WriteRaw(const void* buffer, int size);
708 // Like WriteRaw() but will try to write aliased data if aliasing is
709 // turned on.
710 void WriteRawMaybeAliased(const void* data, int size);
711 // Like WriteRaw() but writing directly to the target array.
712 // This is _not_ inlined, as the compiler often optimizes memcpy into inline
713 // copy loops. Since this gets called by every field with string or bytes
714 // type, inlining may lead to a significant amount of code bloat, with only a
715 // minor performance gain.
716 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
717
718 // Equivalent to WriteRaw(str.data(), str.size()).
719 void WriteString(const string& str);
720 // Like WriteString() but writing directly to the target array.
721 static uint8* WriteStringToArray(const string& str, uint8* target);
722 // Write the varint-encoded size of str followed by str.
723 static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
724
725
726 // Instructs the CodedOutputStream to allow the underlying
727 // ZeroCopyOutputStream to hold pointers to the original structure instead of
728 // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
729 // underlying stream does not support aliasing, then enabling it has no
730 // affect. For now, this only affects the behavior of
731 // WriteRawMaybeAliased().
732 //
733 // NOTE: It is caller's responsibility to ensure that the chunk of memory
734 // remains live until all of the data has been consumed from the stream.
735 void EnableAliasing(bool enabled);
736
737 // Write a 32-bit little-endian integer.
738 void WriteLittleEndian32(uint32 value);
739 // Like WriteLittleEndian32() but writing directly to the target array.
740 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
741 // Write a 64-bit little-endian integer.
742 void WriteLittleEndian64(uint64 value);
743 // Like WriteLittleEndian64() but writing directly to the target array.
744 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
745
746 // Write an unsigned integer with Varint encoding. Writing a 32-bit value
747 // is equivalent to casting it to uint64 and writing it as a 64-bit value,
748 // but may be more efficient.
749 void WriteVarint32(uint32 value);
750 // Like WriteVarint32() but writing directly to the target array.
751 static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
752 // Write an unsigned integer with Varint encoding.
753 void WriteVarint64(uint64 value);
754 // Like WriteVarint64() but writing directly to the target array.
755 static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
756
757 // Equivalent to WriteVarint32() except when the value is negative,
758 // in which case it must be sign-extended to a full 10 bytes.
759 void WriteVarint32SignExtended(int32 value);
760 // Like WriteVarint32SignExtended() but writing directly to the target array.
761 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
762
763 // This is identical to WriteVarint32(), but optimized for writing tags.
764 // In particular, if the input is a compile-time constant, this method
765 // compiles down to a couple instructions.
766 // Always inline because otherwise the aformentioned optimization can't work,
767 // but GCC by default doesn't want to inline this.
768 void WriteTag(uint32 value);
769 // Like WriteTag() but writing directly to the target array.
770 GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteTagToArray(uint32 value,
771 uint8* target);
772
773 // Returns the number of bytes needed to encode the given value as a varint.
774 static int VarintSize32(uint32 value);
775 // Returns the number of bytes needed to encode the given value as a varint.
776 static int VarintSize64(uint64 value);
777
778 // If negative, 10 bytes. Otheriwse, same as VarintSize32().
779 static int VarintSize32SignExtended(int32 value);
780
781 // Compile-time equivalent of VarintSize32().
782 template <uint32 Value>
783 struct StaticVarintSize32 {
784 static const int value =
785 (Value < (1 << 7))
786 ? 1
787 : (Value < (1 << 14))
788 ? 2
789 : (Value < (1 << 21))
790 ? 3
791 : (Value < (1 << 28))
792 ? 4
793 : 5;
794 };
795
796 // Returns the total number of bytes written since this object was created.
797 inline int ByteCount() const;
798
799 // Returns true if there was an underlying I/O error since this object was
800 // created.
HadError()801 bool HadError() const { return had_error_; }
802
803 private:
804 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
805
806 ZeroCopyOutputStream* output_;
807 uint8* buffer_;
808 int buffer_size_;
809 int total_bytes_; // Sum of sizes of all buffers seen so far.
810 bool had_error_; // Whether an error occurred during output.
811 bool aliasing_enabled_; // See EnableAliasing().
812
813 // Advance the buffer by a given number of bytes.
814 void Advance(int amount);
815
816 // Called when the buffer runs out to request more data. Implies an
817 // Advance(buffer_size_).
818 bool Refresh();
819
820 // Like WriteRaw() but may avoid copying if the underlying
821 // ZeroCopyOutputStream supports it.
822 void WriteAliasedRaw(const void* buffer, int size);
823
824 // If this write might cross the end of the buffer, we compose the bytes first
825 // then use WriteRaw().
826 void WriteVarint32SlowPath(uint32 value);
827
828 // Always-inlined versions of WriteVarint* functions so that code can be
829 // reused, while still controlling size. For instance, WriteVarint32ToArray()
830 // should not directly call this: since it is inlined itself, doing so
831 // would greatly increase the size of generated code. Instead, it should call
832 // WriteVarint32FallbackToArray. Meanwhile, WriteVarint32() is already
833 // out-of-line, so it should just invoke this directly to avoid any extra
834 // function call overhead.
835 GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteVarint64ToArrayInline(
836 uint64 value, uint8* target);
837
838 static int VarintSize32Fallback(uint32 value);
839 };
840
841 // inline methods ====================================================
842 // The vast majority of varints are only one byte. These inline
843 // methods optimize for that case.
844
ReadVarint32(uint32 * value)845 inline bool CodedInputStream::ReadVarint32(uint32* value) {
846 uint32 v = 0;
847 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
848 v = *buffer_;
849 if (v < 0x80) {
850 *value = v;
851 Advance(1);
852 return true;
853 }
854 }
855 int64 result = ReadVarint32Fallback(v);
856 *value = static_cast<uint32>(result);
857 return result >= 0;
858 }
859
ReadVarint64(uint64 * value)860 inline bool CodedInputStream::ReadVarint64(uint64* value) {
861 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
862 *value = *buffer_;
863 Advance(1);
864 return true;
865 }
866 std::pair<uint64, bool> p = ReadVarint64Fallback();
867 *value = p.first;
868 return p.second;
869 }
870
871 // static
ReadLittleEndian32FromArray(const uint8 * buffer,uint32 * value)872 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
873 const uint8* buffer,
874 uint32* value) {
875 #if defined(PROTOBUF_LITTLE_ENDIAN)
876 memcpy(value, buffer, sizeof(*value));
877 return buffer + sizeof(*value);
878 #else
879 *value = (static_cast<uint32>(buffer[0]) ) |
880 (static_cast<uint32>(buffer[1]) << 8) |
881 (static_cast<uint32>(buffer[2]) << 16) |
882 (static_cast<uint32>(buffer[3]) << 24);
883 return buffer + sizeof(*value);
884 #endif
885 }
886 // static
ReadLittleEndian64FromArray(const uint8 * buffer,uint64 * value)887 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
888 const uint8* buffer,
889 uint64* value) {
890 #if defined(PROTOBUF_LITTLE_ENDIAN)
891 memcpy(value, buffer, sizeof(*value));
892 return buffer + sizeof(*value);
893 #else
894 uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
895 (static_cast<uint32>(buffer[1]) << 8) |
896 (static_cast<uint32>(buffer[2]) << 16) |
897 (static_cast<uint32>(buffer[3]) << 24);
898 uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
899 (static_cast<uint32>(buffer[5]) << 8) |
900 (static_cast<uint32>(buffer[6]) << 16) |
901 (static_cast<uint32>(buffer[7]) << 24);
902 *value = static_cast<uint64>(part0) |
903 (static_cast<uint64>(part1) << 32);
904 return buffer + sizeof(*value);
905 #endif
906 }
907
ReadLittleEndian32(uint32 * value)908 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
909 #if defined(PROTOBUF_LITTLE_ENDIAN)
910 if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
911 memcpy(value, buffer_, sizeof(*value));
912 Advance(sizeof(*value));
913 return true;
914 } else {
915 return ReadLittleEndian32Fallback(value);
916 }
917 #else
918 return ReadLittleEndian32Fallback(value);
919 #endif
920 }
921
ReadLittleEndian64(uint64 * value)922 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
923 #if defined(PROTOBUF_LITTLE_ENDIAN)
924 if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
925 memcpy(value, buffer_, sizeof(*value));
926 Advance(sizeof(*value));
927 return true;
928 } else {
929 return ReadLittleEndian64Fallback(value);
930 }
931 #else
932 return ReadLittleEndian64Fallback(value);
933 #endif
934 }
935
ReadTag()936 inline uint32 CodedInputStream::ReadTag() {
937 uint32 v = 0;
938 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
939 v = *buffer_;
940 if (v < 0x80) {
941 last_tag_ = v;
942 Advance(1);
943 return v;
944 }
945 }
946 last_tag_ = ReadTagFallback(v);
947 return last_tag_;
948 }
949
ReadTagWithCutoff(uint32 cutoff)950 inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
951 uint32 cutoff) {
952 // In performance-sensitive code we can expect cutoff to be a compile-time
953 // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
954 // compile time.
955 uint32 first_byte_or_zero = 0;
956 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
957 // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
958 // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
959 // is large enough then is it better to check for the two-byte case first?
960 first_byte_or_zero = buffer_[0];
961 if (static_cast<int8>(buffer_[0]) > 0) {
962 const uint32 kMax1ByteVarint = 0x7f;
963 uint32 tag = last_tag_ = buffer_[0];
964 Advance(1);
965 return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
966 }
967 // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
968 // and tag is two bytes. The latter is tested by bitwise-and-not of the
969 // first byte and the second byte.
970 if (cutoff >= 0x80 &&
971 GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
972 GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
973 const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
974 uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
975 Advance(2);
976 // It might make sense to test for tag == 0 now, but it is so rare that
977 // that we don't bother. A varint-encoded 0 should be one byte unless
978 // the encoder lost its mind. The second part of the return value of
979 // this function is allowed to be either true or false if the tag is 0,
980 // so we don't have to check for tag == 0. We may need to check whether
981 // it exceeds cutoff.
982 bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
983 return std::make_pair(tag, at_or_below_cutoff);
984 }
985 }
986 // Slow path
987 last_tag_ = ReadTagFallback(first_byte_or_zero);
988 return std::make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
989 }
990
LastTagWas(uint32 expected)991 inline bool CodedInputStream::LastTagWas(uint32 expected) {
992 return last_tag_ == expected;
993 }
994
ConsumedEntireMessage()995 inline bool CodedInputStream::ConsumedEntireMessage() {
996 return legitimate_message_end_;
997 }
998
ExpectTag(uint32 expected)999 inline bool CodedInputStream::ExpectTag(uint32 expected) {
1000 if (expected < (1 << 7)) {
1001 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
1002 Advance(1);
1003 return true;
1004 } else {
1005 return false;
1006 }
1007 } else if (expected < (1 << 14)) {
1008 if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
1009 buffer_[0] == static_cast<uint8>(expected | 0x80) &&
1010 buffer_[1] == static_cast<uint8>(expected >> 7)) {
1011 Advance(2);
1012 return true;
1013 } else {
1014 return false;
1015 }
1016 } else {
1017 // Don't bother optimizing for larger values.
1018 return false;
1019 }
1020 }
1021
ExpectTagFromArray(const uint8 * buffer,uint32 expected)1022 inline const uint8* CodedInputStream::ExpectTagFromArray(
1023 const uint8* buffer, uint32 expected) {
1024 if (expected < (1 << 7)) {
1025 if (buffer[0] == expected) {
1026 return buffer + 1;
1027 }
1028 } else if (expected < (1 << 14)) {
1029 if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
1030 buffer[1] == static_cast<uint8>(expected >> 7)) {
1031 return buffer + 2;
1032 }
1033 }
1034 return NULL;
1035 }
1036
GetDirectBufferPointerInline(const void ** data,int * size)1037 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1038 int* size) {
1039 *data = buffer_;
1040 *size = static_cast<int>(buffer_end_ - buffer_);
1041 }
1042
ExpectAtEnd()1043 inline bool CodedInputStream::ExpectAtEnd() {
1044 // If we are at a limit we know no more bytes can be read. Otherwise, it's
1045 // hard to say without calling Refresh(), and we'd rather not do that.
1046
1047 if (buffer_ == buffer_end_ &&
1048 ((buffer_size_after_limit_ != 0) ||
1049 (total_bytes_read_ == current_limit_))) {
1050 last_tag_ = 0; // Pretend we called ReadTag()...
1051 legitimate_message_end_ = true; // ... and it hit EOF.
1052 return true;
1053 } else {
1054 return false;
1055 }
1056 }
1057
CurrentPosition()1058 inline int CodedInputStream::CurrentPosition() const {
1059 return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1060 }
1061
GetDirectBufferForNBytesAndAdvance(int size)1062 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
1063 if (buffer_size_ < size) {
1064 return NULL;
1065 } else {
1066 uint8* result = buffer_;
1067 Advance(size);
1068 return result;
1069 }
1070 }
1071
WriteVarint32ToArray(uint32 value,uint8 * target)1072 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
1073 uint8* target) {
1074 while (value >= 0x80) {
1075 *target = static_cast<uint8>(value | 0x80);
1076 value >>= 7;
1077 ++target;
1078 }
1079 *target = static_cast<uint8>(value);
1080 return target + 1;
1081 }
1082
WriteVarint32SignExtended(int32 value)1083 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
1084 if (value < 0) {
1085 WriteVarint64(static_cast<uint64>(value));
1086 } else {
1087 WriteVarint32(static_cast<uint32>(value));
1088 }
1089 }
1090
WriteVarint32SignExtendedToArray(int32 value,uint8 * target)1091 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
1092 int32 value, uint8* target) {
1093 if (value < 0) {
1094 return WriteVarint64ToArray(static_cast<uint64>(value), target);
1095 } else {
1096 return WriteVarint32ToArray(static_cast<uint32>(value), target);
1097 }
1098 }
1099
WriteLittleEndian32ToArray(uint32 value,uint8 * target)1100 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
1101 uint8* target) {
1102 #if defined(PROTOBUF_LITTLE_ENDIAN)
1103 memcpy(target, &value, sizeof(value));
1104 #else
1105 target[0] = static_cast<uint8>(value);
1106 target[1] = static_cast<uint8>(value >> 8);
1107 target[2] = static_cast<uint8>(value >> 16);
1108 target[3] = static_cast<uint8>(value >> 24);
1109 #endif
1110 return target + sizeof(value);
1111 }
1112
WriteLittleEndian64ToArray(uint64 value,uint8 * target)1113 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
1114 uint8* target) {
1115 #if defined(PROTOBUF_LITTLE_ENDIAN)
1116 memcpy(target, &value, sizeof(value));
1117 #else
1118 uint32 part0 = static_cast<uint32>(value);
1119 uint32 part1 = static_cast<uint32>(value >> 32);
1120
1121 target[0] = static_cast<uint8>(part0);
1122 target[1] = static_cast<uint8>(part0 >> 8);
1123 target[2] = static_cast<uint8>(part0 >> 16);
1124 target[3] = static_cast<uint8>(part0 >> 24);
1125 target[4] = static_cast<uint8>(part1);
1126 target[5] = static_cast<uint8>(part1 >> 8);
1127 target[6] = static_cast<uint8>(part1 >> 16);
1128 target[7] = static_cast<uint8>(part1 >> 24);
1129 #endif
1130 return target + sizeof(value);
1131 }
1132
WriteVarint32(uint32 value)1133 inline void CodedOutputStream::WriteVarint32(uint32 value) {
1134 if (buffer_size_ >= 5) {
1135 // Fast path: We have enough bytes left in the buffer to guarantee that
1136 // this write won't cross the end, so we can skip the checks.
1137 uint8* target = buffer_;
1138 uint8* end = WriteVarint32ToArray(value, target);
1139 int size = static_cast<int>(end - target);
1140 Advance(size);
1141 } else {
1142 WriteVarint32SlowPath(value);
1143 }
1144 }
1145
WriteTag(uint32 value)1146 inline void CodedOutputStream::WriteTag(uint32 value) {
1147 WriteVarint32(value);
1148 }
1149
WriteTagToArray(uint32 value,uint8 * target)1150 inline uint8* CodedOutputStream::WriteTagToArray(
1151 uint32 value, uint8* target) {
1152 return WriteVarint32ToArray(value, target);
1153 }
1154
VarintSize32(uint32 value)1155 inline int CodedOutputStream::VarintSize32(uint32 value) {
1156 if (value < (1 << 7)) {
1157 return 1;
1158 } else {
1159 return VarintSize32Fallback(value);
1160 }
1161 }
1162
VarintSize32SignExtended(int32 value)1163 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
1164 if (value < 0) {
1165 return 10; // TODO(kenton): Make this a symbolic constant.
1166 } else {
1167 return VarintSize32(static_cast<uint32>(value));
1168 }
1169 }
1170
WriteString(const string & str)1171 inline void CodedOutputStream::WriteString(const string& str) {
1172 WriteRaw(str.data(), static_cast<int>(str.size()));
1173 }
1174
WriteRawMaybeAliased(const void * data,int size)1175 inline void CodedOutputStream::WriteRawMaybeAliased(
1176 const void* data, int size) {
1177 if (aliasing_enabled_) {
1178 WriteAliasedRaw(data, size);
1179 } else {
1180 WriteRaw(data, size);
1181 }
1182 }
1183
WriteStringToArray(const string & str,uint8 * target)1184 inline uint8* CodedOutputStream::WriteStringToArray(
1185 const string& str, uint8* target) {
1186 return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1187 }
1188
ByteCount()1189 inline int CodedOutputStream::ByteCount() const {
1190 return total_bytes_ - buffer_size_;
1191 }
1192
Advance(int amount)1193 inline void CodedInputStream::Advance(int amount) {
1194 buffer_ += amount;
1195 }
1196
Advance(int amount)1197 inline void CodedOutputStream::Advance(int amount) {
1198 buffer_ += amount;
1199 buffer_size_ -= amount;
1200 }
1201
SetRecursionLimit(int limit)1202 inline void CodedInputStream::SetRecursionLimit(int limit) {
1203 recursion_budget_ += limit - recursion_limit_;
1204 recursion_limit_ = limit;
1205 }
1206
IncrementRecursionDepth()1207 inline bool CodedInputStream::IncrementRecursionDepth() {
1208 --recursion_budget_;
1209 return recursion_budget_ >= 0;
1210 }
1211
DecrementRecursionDepth()1212 inline void CodedInputStream::DecrementRecursionDepth() {
1213 if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
1214 }
1215
UnsafeDecrementRecursionDepth()1216 inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
1217 assert(recursion_budget_ < recursion_limit_);
1218 ++recursion_budget_;
1219 }
1220
SetExtensionRegistry(const DescriptorPool * pool,MessageFactory * factory)1221 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1222 MessageFactory* factory) {
1223 extension_pool_ = pool;
1224 extension_factory_ = factory;
1225 }
1226
GetExtensionPool()1227 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1228 return extension_pool_;
1229 }
1230
GetExtensionFactory()1231 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1232 return extension_factory_;
1233 }
1234
BufferSize()1235 inline int CodedInputStream::BufferSize() const {
1236 return static_cast<int>(buffer_end_ - buffer_);
1237 }
1238
CodedInputStream(ZeroCopyInputStream * input)1239 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1240 : buffer_(NULL),
1241 buffer_end_(NULL),
1242 input_(input),
1243 total_bytes_read_(0),
1244 overflow_bytes_(0),
1245 last_tag_(0),
1246 legitimate_message_end_(false),
1247 aliasing_enabled_(false),
1248 current_limit_(kint32max),
1249 buffer_size_after_limit_(0),
1250 total_bytes_limit_(kDefaultTotalBytesLimit),
1251 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1252 recursion_budget_(default_recursion_limit_),
1253 recursion_limit_(default_recursion_limit_),
1254 extension_pool_(NULL),
1255 extension_factory_(NULL) {
1256 // Eagerly Refresh() so buffer space is immediately available.
1257 Refresh();
1258 }
1259
CodedInputStream(const uint8 * buffer,int size)1260 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1261 : buffer_(buffer),
1262 buffer_end_(buffer + size),
1263 input_(NULL),
1264 total_bytes_read_(size),
1265 overflow_bytes_(0),
1266 last_tag_(0),
1267 legitimate_message_end_(false),
1268 aliasing_enabled_(false),
1269 current_limit_(size),
1270 buffer_size_after_limit_(0),
1271 total_bytes_limit_(kDefaultTotalBytesLimit),
1272 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1273 recursion_budget_(default_recursion_limit_),
1274 recursion_limit_(default_recursion_limit_),
1275 extension_pool_(NULL),
1276 extension_factory_(NULL) {
1277 // Note that setting current_limit_ == size is important to prevent some
1278 // code paths from trying to access input_ and segfaulting.
1279 }
1280
IsFlat()1281 inline bool CodedInputStream::IsFlat() const {
1282 return input_ == NULL;
1283 }
1284
1285 } // namespace io
1286 } // namespace protobuf
1287
1288
1289 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
1290 #pragma runtime_checks("c", restore)
1291 #endif // _MSC_VER && !defined(__INTEL_COMPILER)
1292
1293 } // namespace google
1294 #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1295