1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file contains the CodedInputStream and CodedOutputStream classes,
36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37 // and allow you to read or write individual pieces of data in various
38 // formats. In particular, these implement the varint encoding for
39 // integers, a simple variable-length encoding in which smaller numbers
40 // take fewer bytes.
41 //
42 // Typically these classes will only be used internally by the protocol
43 // buffer library in order to encode and decode protocol buffers. Clients
44 // of the library only need to know about this class if they wish to write
45 // custom message parsing or serialization procedures.
46 //
47 // CodedOutputStream example:
48 // // Write some data to "myfile". First we write a 4-byte "magic number"
49 // // to identify the file type, then write a length-delimited string. The
50 // // string is composed of a varint giving the length followed by the raw
51 // // bytes.
52 // int fd = open("myfile", O_CREAT | O_WRONLY);
53 // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55 //
56 // int magic_number = 1234;
57 // char text[] = "Hello world!";
58 // coded_output->WriteLittleEndian32(magic_number);
59 // coded_output->WriteVarint32(strlen(text));
60 // coded_output->WriteRaw(text, strlen(text));
61 //
62 // delete coded_output;
63 // delete raw_output;
64 // close(fd);
65 //
66 // CodedInputStream example:
67 // // Read a file created by the above code.
68 // int fd = open("myfile", O_RDONLY);
69 // ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70 // CodedInputStream* coded_input = new CodedInputStream(raw_input);
71 //
72 // coded_input->ReadLittleEndian32(&magic_number);
73 // if (magic_number != 1234) {
74 // cerr << "File not in expected format." << endl;
75 // return;
76 // }
77 //
78 // uint32 size;
79 // coded_input->ReadVarint32(&size);
80 //
81 // char* text = new char[size + 1];
82 // coded_input->ReadRaw(buffer, size);
83 // text[size] = '\0';
84 //
85 // delete coded_input;
86 // delete raw_input;
87 // close(fd);
88 //
89 // cout << "Text is: " << text << endl;
90 // delete [] text;
91 //
92 // For those who are interested, varint encoding is defined as follows:
93 //
94 // The encoding operates on unsigned integers of up to 64 bits in length.
95 // Each byte of the encoded value has the format:
96 // * bits 0-6: Seven bits of the number being encoded.
97 // * bit 7: Zero if this is the last byte in the encoding (in which
98 // case all remaining bits of the number are zero) or 1 if
99 // more bytes follow.
100 // The first byte contains the least-significant 7 bits of the number, the
101 // second byte (if present) contains the next-least-significant 7 bits,
102 // and so on. So, the binary number 1011000101011 would be encoded in two
103 // bytes as "10101011 00101100".
104 //
105 // In theory, varint could be used to encode integers of any length.
106 // However, for practicality we set a limit at 64 bits. The maximum encoded
107 // length of a number is thus 10 bytes.
108
109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112 #include <assert.h>
113 #include <atomic>
114 #include <climits>
115 #include <string>
116 #include <utility>
117 #ifdef _MSC_VER
118 // Assuming windows is always little-endian.
119 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
120 #define PROTOBUF_LITTLE_ENDIAN 1
121 #endif
122 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
123 // If MSVC has "/RTCc" set, it will complain about truncating casts at
124 // runtime. This file contains some intentional truncating casts.
125 #pragma runtime_checks("c", off)
126 #endif
127 #else
128 #include <sys/param.h> // __BYTE_ORDER
129 #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
130 (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
131 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
132 #define PROTOBUF_LITTLE_ENDIAN 1
133 #endif
134 #endif
135 #include <google/protobuf/stubs/common.h>
136 #include <google/protobuf/port.h>
137 #include <google/protobuf/stubs/port.h>
138
139
140 #include <google/protobuf/port_def.inc>
141
142 namespace google {
143 namespace protobuf {
144
145 class DescriptorPool;
146 class MessageFactory;
147 class ZeroCopyCodedInputStream;
148
149 namespace internal {
150 void MapTestForceDeterministic();
151 class EpsCopyByteStream;
152 } // namespace internal
153
154 namespace io {
155
156 // Defined in this file.
157 class CodedInputStream;
158 class CodedOutputStream;
159
160 // Defined in other files.
161 class ZeroCopyInputStream; // zero_copy_stream.h
162 class ZeroCopyOutputStream; // zero_copy_stream.h
163
164 // Class which reads and decodes binary data which is composed of varint-
165 // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
166 // Most users will not need to deal with CodedInputStream.
167 //
168 // Most methods of CodedInputStream that return a bool return false if an
169 // underlying I/O error occurs or if the data is malformed. Once such a
170 // failure occurs, the CodedInputStream is broken and is no longer useful.
171 // After a failure, callers also should assume writes to "out" args may have
172 // occurred, though nothing useful can be determined from those writes.
173 class PROTOBUF_EXPORT CodedInputStream {
174 public:
175 // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
176 explicit CodedInputStream(ZeroCopyInputStream* input);
177
178 // Create a CodedInputStream that reads from the given flat array. This is
179 // faster than using an ArrayInputStream. PushLimit(size) is implied by
180 // this constructor.
181 explicit CodedInputStream(const uint8* buffer, int size);
182
183 // Destroy the CodedInputStream and position the underlying
184 // ZeroCopyInputStream at the first unread byte. If an error occurred while
185 // reading (causing a method to return false), then the exact position of
186 // the input stream may be anywhere between the last value that was read
187 // successfully and the stream's byte limit.
188 ~CodedInputStream();
189
190 // Return true if this CodedInputStream reads from a flat array instead of
191 // a ZeroCopyInputStream.
192 inline bool IsFlat() const;
193
194 // Skips a number of bytes. Returns false if an underlying read error
195 // occurs.
196 inline bool Skip(int count);
197
198 // Sets *data to point directly at the unread part of the CodedInputStream's
199 // underlying buffer, and *size to the size of that buffer, but does not
200 // advance the stream's current position. This will always either produce
201 // a non-empty buffer or return false. If the caller consumes any of
202 // this data, it should then call Skip() to skip over the consumed bytes.
203 // This may be useful for implementing external fast parsing routines for
204 // types of data not covered by the CodedInputStream interface.
205 bool GetDirectBufferPointer(const void** data, int* size);
206
207 // Like GetDirectBufferPointer, but this method is inlined, and does not
208 // attempt to Refresh() if the buffer is currently empty.
209 PROTOBUF_ALWAYS_INLINE
210 void GetDirectBufferPointerInline(const void** data, int* size);
211
212 // Read raw bytes, copying them into the given buffer.
213 bool ReadRaw(void* buffer, int size);
214
215 // Like the above, with inlined optimizations. This should only be used
216 // by the protobuf implementation.
217 PROTOBUF_ALWAYS_INLINE
218 bool InternalReadRawInline(void* buffer, int size);
219
220 // Like ReadRaw, but reads into a string.
221 bool ReadString(std::string* buffer, int size);
222 // Like the above, with inlined optimizations. This should only be used
223 // by the protobuf implementation.
224 PROTOBUF_ALWAYS_INLINE
225 bool InternalReadStringInline(std::string* buffer, int size);
226
227
228 // Read a 32-bit little-endian integer.
229 bool ReadLittleEndian32(uint32* value);
230 // Read a 64-bit little-endian integer.
231 bool ReadLittleEndian64(uint64* value);
232
233 // These methods read from an externally provided buffer. The caller is
234 // responsible for ensuring that the buffer has sufficient space.
235 // Read a 32-bit little-endian integer.
236 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
237 uint32* value);
238 // Read a 64-bit little-endian integer.
239 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
240 uint64* value);
241
242 // Read an unsigned integer with Varint encoding, truncating to 32 bits.
243 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
244 // it to uint32, but may be more efficient.
245 bool ReadVarint32(uint32* value);
246 // Read an unsigned integer with Varint encoding.
247 bool ReadVarint64(uint64* value);
248
249 // Reads a varint off the wire into an "int". This should be used for reading
250 // sizes off the wire (sizes of strings, submessages, bytes fields, etc).
251 //
252 // The value from the wire is interpreted as unsigned. If its value exceeds
253 // the representable value of an integer on this platform, instead of
254 // truncating we return false. Truncating (as performed by ReadVarint32()
255 // above) is an acceptable approach for fields representing an integer, but
256 // when we are parsing a size from the wire, truncating the value would result
257 // in us misparsing the payload.
258 bool ReadVarintSizeAsInt(int* value);
259
260 // Read a tag. This calls ReadVarint32() and returns the result, or returns
261 // zero (which is not a valid tag) if ReadVarint32() fails. Also, ReadTag
262 // (but not ReadTagNoLastTag) updates the last tag value, which can be checked
263 // with LastTagWas().
264 //
265 // Always inline because this is only called in one place per parse loop
266 // but it is called for every iteration of said loop, so it should be fast.
267 // GCC doesn't want to inline this by default.
ReadTag()268 PROTOBUF_ALWAYS_INLINE uint32 ReadTag() {
269 return last_tag_ = ReadTagNoLastTag();
270 }
271
272 PROTOBUF_ALWAYS_INLINE uint32 ReadTagNoLastTag();
273
274 // This usually a faster alternative to ReadTag() when cutoff is a manifest
275 // constant. It does particularly well for cutoff >= 127. The first part
276 // of the return value is the tag that was read, though it can also be 0 in
277 // the cases where ReadTag() would return 0. If the second part is true
278 // then the tag is known to be in [0, cutoff]. If not, the tag either is
279 // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
280 // because that can arise in several ways, and for best performance we want
281 // to avoid an extra "is tag == 0?" check here.)
282 PROTOBUF_ALWAYS_INLINE
ReadTagWithCutoff(uint32 cutoff)283 std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff) {
284 std::pair<uint32, bool> result = ReadTagWithCutoffNoLastTag(cutoff);
285 last_tag_ = result.first;
286 return result;
287 }
288
289 PROTOBUF_ALWAYS_INLINE
290 std::pair<uint32, bool> ReadTagWithCutoffNoLastTag(uint32 cutoff);
291
292 // Usually returns true if calling ReadVarint32() now would produce the given
293 // value. Will always return false if ReadVarint32() would not return the
294 // given value. If ExpectTag() returns true, it also advances past
295 // the varint. For best performance, use a compile-time constant as the
296 // parameter.
297 // Always inline because this collapses to a small number of instructions
298 // when given a constant parameter, but GCC doesn't want to inline by default.
299 PROTOBUF_ALWAYS_INLINE bool ExpectTag(uint32 expected);
300
301 // Like above, except this reads from the specified buffer. The caller is
302 // responsible for ensuring that the buffer is large enough to read a varint
303 // of the expected size. For best performance, use a compile-time constant as
304 // the expected tag parameter.
305 //
306 // Returns a pointer beyond the expected tag if it was found, or NULL if it
307 // was not.
308 PROTOBUF_ALWAYS_INLINE
309 static const uint8* ExpectTagFromArray(const uint8* buffer, uint32 expected);
310
311 // Usually returns true if no more bytes can be read. Always returns false
312 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
313 // call to LastTagWas() will act as if ReadTag() had been called and returned
314 // zero, and ConsumedEntireMessage() will return true.
315 bool ExpectAtEnd();
316
317 // If the last call to ReadTag() or ReadTagWithCutoff() returned the given
318 // value, returns true. Otherwise, returns false.
319 // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last
320 // returned value.
321 //
322 // This is needed because parsers for some types of embedded messages
323 // (with field type TYPE_GROUP) don't actually know that they've reached the
324 // end of a message until they see an ENDGROUP tag, which was actually part
325 // of the enclosing message. The enclosing message would like to check that
326 // tag to make sure it had the right number, so it calls LastTagWas() on
327 // return from the embedded parser to check.
328 bool LastTagWas(uint32 expected);
SetLastTag(uint32 tag)329 void SetLastTag(uint32 tag) { last_tag_ = tag; }
330
331 // When parsing message (but NOT a group), this method must be called
332 // immediately after MergeFromCodedStream() returns (if it returns true)
333 // to further verify that the message ended in a legitimate way. For
334 // example, this verifies that parsing did not end on an end-group tag.
335 // It also checks for some cases where, due to optimizations,
336 // MergeFromCodedStream() can incorrectly return true.
337 bool ConsumedEntireMessage();
SetConsumed()338 void SetConsumed() { legitimate_message_end_ = true; }
339
340 // Limits ----------------------------------------------------------
341 // Limits are used when parsing length-delimited embedded messages.
342 // After the message's length is read, PushLimit() is used to prevent
343 // the CodedInputStream from reading beyond that length. Once the
344 // embedded message has been parsed, PopLimit() is called to undo the
345 // limit.
346
347 // Opaque type used with PushLimit() and PopLimit(). Do not modify
348 // values of this type yourself. The only reason that this isn't a
349 // struct with private internals is for efficiency.
350 typedef int Limit;
351
352 // Places a limit on the number of bytes that the stream may read,
353 // starting from the current position. Once the stream hits this limit,
354 // it will act like the end of the input has been reached until PopLimit()
355 // is called.
356 //
357 // As the names imply, the stream conceptually has a stack of limits. The
358 // shortest limit on the stack is always enforced, even if it is not the
359 // top limit.
360 //
361 // The value returned by PushLimit() is opaque to the caller, and must
362 // be passed unchanged to the corresponding call to PopLimit().
363 Limit PushLimit(int byte_limit);
364
365 // Pops the last limit pushed by PushLimit(). The input must be the value
366 // returned by that call to PushLimit().
367 void PopLimit(Limit limit);
368
369 // Returns the number of bytes left until the nearest limit on the
370 // stack is hit, or -1 if no limits are in place.
371 int BytesUntilLimit() const;
372
373 // Returns current position relative to the beginning of the input stream.
374 int CurrentPosition() const;
375
376 // Total Bytes Limit -----------------------------------------------
377 // To prevent malicious users from sending excessively large messages
378 // and causing memory exhaustion, CodedInputStream imposes a hard limit on
379 // the total number of bytes it will read.
380
381 // Sets the maximum number of bytes that this CodedInputStream will read
382 // before refusing to continue. To prevent servers from allocating enormous
383 // amounts of memory to hold parsed messages, the maximum message length
384 // should be limited to the shortest length that will not harm usability.
385 // The default limit is INT_MAX (~2GB) and apps should set shorter limits
386 // if possible. An error will always be printed to stderr if the limit is
387 // reached.
388 //
389 // Note: setting a limit less than the current read position is interpreted
390 // as a limit on the current position.
391 //
392 // This is unrelated to PushLimit()/PopLimit().
393 void SetTotalBytesLimit(int total_bytes_limit);
394
395 PROTOBUF_DEPRECATED_MSG(
396 "Please use the single parameter version of SetTotalBytesLimit(). The "
397 "second parameter is ignored.")
SetTotalBytesLimit(int total_bytes_limit,int)398 void SetTotalBytesLimit(int total_bytes_limit, int) {
399 SetTotalBytesLimit(total_bytes_limit);
400 }
401
402 // The Total Bytes Limit minus the Current Position, or -1 if the total bytes
403 // limit is INT_MAX.
404 int BytesUntilTotalBytesLimit() const;
405
406 // Recursion Limit -------------------------------------------------
407 // To prevent corrupt or malicious messages from causing stack overflows,
408 // we must keep track of the depth of recursion when parsing embedded
409 // messages and groups. CodedInputStream keeps track of this because it
410 // is the only object that is passed down the stack during parsing.
411
412 // Sets the maximum recursion depth. The default is 100.
413 void SetRecursionLimit(int limit);
RecursionBudget()414 int RecursionBudget() { return recursion_budget_; }
415
GetDefaultRecursionLimit()416 static int GetDefaultRecursionLimit() { return default_recursion_limit_; }
417
418 // Increments the current recursion depth. Returns true if the depth is
419 // under the limit, false if it has gone over.
420 bool IncrementRecursionDepth();
421
422 // Decrements the recursion depth if possible.
423 void DecrementRecursionDepth();
424
425 // Decrements the recursion depth blindly. This is faster than
426 // DecrementRecursionDepth(). It should be used only if all previous
427 // increments to recursion depth were successful.
428 void UnsafeDecrementRecursionDepth();
429
430 // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
431 // Using this can reduce code size and complexity in some cases. The caller
432 // is expected to check that the second part of the result is non-negative (to
433 // bail out if the depth of recursion is too high) and, if all is well, to
434 // later pass the first part of the result to PopLimit() or similar.
435 std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
436 int byte_limit);
437
438 // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
439 Limit ReadLengthAndPushLimit();
440
441 // Helper that is equivalent to: {
442 // bool result = ConsumedEntireMessage();
443 // PopLimit(limit);
444 // UnsafeDecrementRecursionDepth();
445 // return result; }
446 // Using this can reduce code size and complexity in some cases.
447 // Do not use unless the current recursion depth is greater than zero.
448 bool DecrementRecursionDepthAndPopLimit(Limit limit);
449
450 // Helper that is equivalent to: {
451 // bool result = ConsumedEntireMessage();
452 // PopLimit(limit);
453 // return result; }
454 // Using this can reduce code size and complexity in some cases.
455 bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
456
457 // Extension Registry ----------------------------------------------
458 // ADVANCED USAGE: 99.9% of people can ignore this section.
459 //
460 // By default, when parsing extensions, the parser looks for extension
461 // definitions in the pool which owns the outer message's Descriptor.
462 // However, you may call SetExtensionRegistry() to provide an alternative
463 // pool instead. This makes it possible, for example, to parse a message
464 // using a generated class, but represent some extensions using
465 // DynamicMessage.
466
467 // Set the pool used to look up extensions. Most users do not need to call
468 // this as the correct pool will be chosen automatically.
469 //
470 // WARNING: It is very easy to misuse this. Carefully read the requirements
471 // below. Do not use this unless you are sure you need it. Almost no one
472 // does.
473 //
474 // Let's say you are parsing a message into message object m, and you want
475 // to take advantage of SetExtensionRegistry(). You must follow these
476 // requirements:
477 //
478 // The given DescriptorPool must contain m->GetDescriptor(). It is not
479 // sufficient for it to simply contain a descriptor that has the same name
480 // and content -- it must be the *exact object*. In other words:
481 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
482 // m->GetDescriptor());
483 // There are two ways to satisfy this requirement:
484 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
485 // because this is the pool that would be used anyway if you didn't call
486 // SetExtensionRegistry() at all.
487 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
488 // "underlay". Read the documentation for DescriptorPool for more
489 // information about underlays.
490 //
491 // You must also provide a MessageFactory. This factory will be used to
492 // construct Message objects representing extensions. The factory's
493 // GetPrototype() MUST return non-NULL for any Descriptor which can be found
494 // through the provided pool.
495 //
496 // If the provided factory might return instances of protocol-compiler-
497 // generated (i.e. compiled-in) types, or if the outer message object m is
498 // a generated type, then the given factory MUST have this property: If
499 // GetPrototype() is given a Descriptor which resides in
500 // DescriptorPool::generated_pool(), the factory MUST return the same
501 // prototype which MessageFactory::generated_factory() would return. That
502 // is, given a descriptor for a generated type, the factory must return an
503 // instance of the generated class (NOT DynamicMessage). However, when
504 // given a descriptor for a type that is NOT in generated_pool, the factory
505 // is free to return any implementation.
506 //
507 // The reason for this requirement is that generated sub-objects may be
508 // accessed via the standard (non-reflection) extension accessor methods,
509 // and these methods will down-cast the object to the generated class type.
510 // If the object is not actually of that type, the results would be undefined.
511 // On the other hand, if an extension is not compiled in, then there is no
512 // way the code could end up accessing it via the standard accessors -- the
513 // only way to access the extension is via reflection. When using reflection,
514 // DynamicMessage and generated messages are indistinguishable, so it's fine
515 // if these objects are represented using DynamicMessage.
516 //
517 // Using DynamicMessageFactory on which you have called
518 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
519 // above requirement.
520 //
521 // If either pool or factory is NULL, both must be NULL.
522 //
523 // Note that this feature is ignored when parsing "lite" messages as they do
524 // not have descriptors.
525 void SetExtensionRegistry(const DescriptorPool* pool,
526 MessageFactory* factory);
527
528 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
529 // has been provided.
530 const DescriptorPool* GetExtensionPool();
531
532 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
533 // factory has been provided.
534 MessageFactory* GetExtensionFactory();
535
536 private:
537 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
538
539 const uint8* buffer_;
540 const uint8* buffer_end_; // pointer to the end of the buffer.
541 ZeroCopyInputStream* input_;
542 int total_bytes_read_; // total bytes read from input_, including
543 // the current buffer
544
545 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
546 // so that we can BackUp() on destruction.
547 int overflow_bytes_;
548
549 // LastTagWas() stuff.
550 uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
551
552 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
553 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
554 // reach the end of a message and attempt to read another tag.
555 bool legitimate_message_end_;
556
557 // See EnableAliasing().
558 bool aliasing_enabled_;
559
560 // Limits
561 Limit current_limit_; // if position = -1, no limit is applied
562
563 // For simplicity, if the current buffer crosses a limit (either a normal
564 // limit created by PushLimit() or the total bytes limit), buffer_size_
565 // only tracks the number of bytes before that limit. This field
566 // contains the number of bytes after it. Note that this implies that if
567 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
568 // hit a limit. However, if both are zero, it doesn't necessarily mean
569 // we aren't at a limit -- the buffer may have ended exactly at the limit.
570 int buffer_size_after_limit_;
571
572 // Maximum number of bytes to read, period. This is unrelated to
573 // current_limit_. Set using SetTotalBytesLimit().
574 int total_bytes_limit_;
575
576 // Current recursion budget, controlled by IncrementRecursionDepth() and
577 // similar. Starts at recursion_limit_ and goes down: if this reaches
578 // -1 we are over budget.
579 int recursion_budget_;
580 // Recursion depth limit, set by SetRecursionLimit().
581 int recursion_limit_;
582
583 // See SetExtensionRegistry().
584 const DescriptorPool* extension_pool_;
585 MessageFactory* extension_factory_;
586
587 // Private member functions.
588
589 // Fallback when Skip() goes past the end of the current buffer.
590 bool SkipFallback(int count, int original_buffer_size);
591
592 // Advance the buffer by a given number of bytes.
593 void Advance(int amount);
594
595 // Back up input_ to the current buffer position.
596 void BackUpInputToCurrentPosition();
597
598 // Recomputes the value of buffer_size_after_limit_. Must be called after
599 // current_limit_ or total_bytes_limit_ changes.
600 void RecomputeBufferLimits();
601
602 // Writes an error message saying that we hit total_bytes_limit_.
603 void PrintTotalBytesLimitError();
604
605 // Called when the buffer runs out to request more data. Implies an
606 // Advance(BufferSize()).
607 bool Refresh();
608
609 // When parsing varints, we optimize for the common case of small values, and
610 // then optimize for the case when the varint fits within the current buffer
611 // piece. The Fallback method is used when we can't use the one-byte
612 // optimization. The Slow method is yet another fallback when the buffer is
613 // not large enough. Making the slow path out-of-line speeds up the common
614 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
615 // message crosses multiple buffers. Note: ReadVarint32Fallback() and
616 // ReadVarint64Fallback() are called frequently and generally not inlined, so
617 // they have been optimized to avoid "out" parameters. The former returns -1
618 // if it fails and the uint32 it read otherwise. The latter has a bool
619 // indicating success or failure as part of its return type.
620 int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
621 int ReadVarintSizeAsIntFallback();
622 std::pair<uint64, bool> ReadVarint64Fallback();
623 bool ReadVarint32Slow(uint32* value);
624 bool ReadVarint64Slow(uint64* value);
625 int ReadVarintSizeAsIntSlow();
626 bool ReadLittleEndian32Fallback(uint32* value);
627 bool ReadLittleEndian64Fallback(uint64* value);
628
629 // Fallback/slow methods for reading tags. These do not update last_tag_,
630 // but will set legitimate_message_end_ if we are at the end of the input
631 // stream.
632 uint32 ReadTagFallback(uint32 first_byte_or_zero);
633 uint32 ReadTagSlow();
634 bool ReadStringFallback(std::string* buffer, int size);
635
636 // Return the size of the buffer.
637 int BufferSize() const;
638
639 static const int kDefaultTotalBytesLimit = INT_MAX;
640
641 static int default_recursion_limit_; // 100 by default.
642
643 friend class google::protobuf::ZeroCopyCodedInputStream;
644 friend class google::protobuf::internal::EpsCopyByteStream;
645 };
646
647 // Class which encodes and writes binary data which is composed of varint-
648 // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
649 // Most users will not need to deal with CodedOutputStream.
650 //
651 // Most methods of CodedOutputStream which return a bool return false if an
652 // underlying I/O error occurs. Once such a failure occurs, the
653 // CodedOutputStream is broken and is no longer useful. The Write* methods do
654 // not return the stream status, but will invalidate the stream if an error
655 // occurs. The client can probe HadError() to determine the status.
656 //
657 // Note that every method of CodedOutputStream which writes some data has
658 // a corresponding static "ToArray" version. These versions write directly
659 // to the provided buffer, returning a pointer past the last written byte.
660 // They require that the buffer has sufficient capacity for the encoded data.
661 // This allows an optimization where we check if an output stream has enough
662 // space for an entire message before we start writing and, if there is, we
663 // call only the ToArray methods to avoid doing bound checks for each
664 // individual value.
665 // i.e., in the example above:
666 //
667 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
668 // int magic_number = 1234;
669 // char text[] = "Hello world!";
670 //
671 // int coded_size = sizeof(magic_number) +
672 // CodedOutputStream::VarintSize32(strlen(text)) +
673 // strlen(text);
674 //
675 // uint8* buffer =
676 // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
677 // if (buffer != NULL) {
678 // // The output stream has enough space in the buffer: write directly to
679 // // the array.
680 // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
681 // buffer);
682 // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
683 // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
684 // } else {
685 // // Make bound-checked writes, which will ask the underlying stream for
686 // // more space as needed.
687 // coded_output->WriteLittleEndian32(magic_number);
688 // coded_output->WriteVarint32(strlen(text));
689 // coded_output->WriteRaw(text, strlen(text));
690 // }
691 //
692 // delete coded_output;
693 class PROTOBUF_EXPORT CodedOutputStream {
694 public:
695 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
696 explicit CodedOutputStream(ZeroCopyOutputStream* output);
697 CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
698
699 // Destroy the CodedOutputStream and position the underlying
700 // ZeroCopyOutputStream immediately after the last byte written.
701 ~CodedOutputStream();
702
703 // Trims any unused space in the underlying buffer so that its size matches
704 // the number of bytes written by this stream. The underlying buffer will
705 // automatically be trimmed when this stream is destroyed; this call is only
706 // necessary if the underlying buffer is accessed *before* the stream is
707 // destroyed.
708 void Trim();
709
710 // Skips a number of bytes, leaving the bytes unmodified in the underlying
711 // buffer. Returns false if an underlying write error occurs. This is
712 // mainly useful with GetDirectBufferPointer().
713 // Note of caution, the skipped bytes may contain uninitialized data. The
714 // caller must make sure that the skipped bytes are properly initialized,
715 // otherwise you might leak bytes from your heap.
716 bool Skip(int count);
717
718 // Sets *data to point directly at the unwritten part of the
719 // CodedOutputStream's underlying buffer, and *size to the size of that
720 // buffer, but does not advance the stream's current position. This will
721 // always either produce a non-empty buffer or return false. If the caller
722 // writes any data to this buffer, it should then call Skip() to skip over
723 // the consumed bytes. This may be useful for implementing external fast
724 // serialization routines for types of data not covered by the
725 // CodedOutputStream interface.
726 bool GetDirectBufferPointer(void** data, int* size);
727
728 // If there are at least "size" bytes available in the current buffer,
729 // returns a pointer directly into the buffer and advances over these bytes.
730 // The caller may then write directly into this buffer (e.g. using the
731 // *ToArray static methods) rather than go through CodedOutputStream. If
732 // there are not enough bytes available, returns NULL. The return pointer is
733 // invalidated as soon as any other non-const method of CodedOutputStream
734 // is called.
735 inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
736
737 // Write raw bytes, copying them from the given buffer.
738 void WriteRaw(const void* buffer, int size);
739 // Like WriteRaw() but will try to write aliased data if aliasing is
740 // turned on.
741 void WriteRawMaybeAliased(const void* data, int size);
742 // Like WriteRaw() but writing directly to the target array.
743 // This is _not_ inlined, as the compiler often optimizes memcpy into inline
744 // copy loops. Since this gets called by every field with string or bytes
745 // type, inlining may lead to a significant amount of code bloat, with only a
746 // minor performance gain.
747 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
748
749 // Equivalent to WriteRaw(str.data(), str.size()).
750 void WriteString(const std::string& str);
751 // Like WriteString() but writing directly to the target array.
752 static uint8* WriteStringToArray(const std::string& str, uint8* target);
753 // Write the varint-encoded size of str followed by str.
754 static uint8* WriteStringWithSizeToArray(const std::string& str,
755 uint8* target);
756
757
758 // Instructs the CodedOutputStream to allow the underlying
759 // ZeroCopyOutputStream to hold pointers to the original structure instead of
760 // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
761 // underlying stream does not support aliasing, then enabling it has no
762 // affect. For now, this only affects the behavior of
763 // WriteRawMaybeAliased().
764 //
765 // NOTE: It is caller's responsibility to ensure that the chunk of memory
766 // remains live until all of the data has been consumed from the stream.
767 void EnableAliasing(bool enabled);
768
769 // Write a 32-bit little-endian integer.
770 void WriteLittleEndian32(uint32 value);
771 // Like WriteLittleEndian32() but writing directly to the target array.
772 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
773 // Write a 64-bit little-endian integer.
774 void WriteLittleEndian64(uint64 value);
775 // Like WriteLittleEndian64() but writing directly to the target array.
776 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
777
778 // Write an unsigned integer with Varint encoding. Writing a 32-bit value
779 // is equivalent to casting it to uint64 and writing it as a 64-bit value,
780 // but may be more efficient.
781 void WriteVarint32(uint32 value);
782 // Like WriteVarint32() but writing directly to the target array.
783 static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
784 // Write an unsigned integer with Varint encoding.
785 void WriteVarint64(uint64 value);
786 // Like WriteVarint64() but writing directly to the target array.
787 static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
788
789 // Equivalent to WriteVarint32() except when the value is negative,
790 // in which case it must be sign-extended to a full 10 bytes.
791 void WriteVarint32SignExtended(int32 value);
792 // Like WriteVarint32SignExtended() but writing directly to the target array.
793 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
794
795 // This is identical to WriteVarint32(), but optimized for writing tags.
796 // In particular, if the input is a compile-time constant, this method
797 // compiles down to a couple instructions.
798 // Always inline because otherwise the aformentioned optimization can't work,
799 // but GCC by default doesn't want to inline this.
800 void WriteTag(uint32 value);
801 // Like WriteTag() but writing directly to the target array.
802 PROTOBUF_ALWAYS_INLINE
803 static uint8* WriteTagToArray(uint32 value, uint8* target);
804
805 // Returns the number of bytes needed to encode the given value as a varint.
806 static size_t VarintSize32(uint32 value);
807 // Returns the number of bytes needed to encode the given value as a varint.
808 static size_t VarintSize64(uint64 value);
809
810 // If negative, 10 bytes. Otheriwse, same as VarintSize32().
811 static size_t VarintSize32SignExtended(int32 value);
812
813 // Compile-time equivalent of VarintSize32().
814 template <uint32 Value>
815 struct StaticVarintSize32 {
816 static const size_t value =
817 (Value < (1 << 7))
818 ? 1
819 : (Value < (1 << 14))
820 ? 2
821 : (Value < (1 << 21)) ? 3 : (Value < (1 << 28)) ? 4 : 5;
822 };
823
824 // Returns the total number of bytes written since this object was created.
825 inline int ByteCount() const;
826
827 // Returns true if there was an underlying I/O error since this object was
828 // created.
HadError()829 bool HadError() const { return had_error_; }
830
831 // Deterministic serialization, if requested, guarantees that for a given
832 // binary, equal messages will always be serialized to the same bytes. This
833 // implies:
834 // . repeated serialization of a message will return the same bytes
835 // . different processes of the same binary (which may be executing on
836 // different machines) will serialize equal messages to the same bytes.
837 //
838 // Note the deterministic serialization is NOT canonical across languages; it
839 // is also unstable across different builds with schema changes due to unknown
840 // fields. Users who need canonical serialization, e.g., persistent storage in
841 // a canonical form, fingerprinting, etc., should define their own
842 // canonicalization specification and implement the serializer using
843 // reflection APIs rather than relying on this API.
844 //
845 // If deterministic serialization is requested, the serializer will
846 // sort map entries by keys in lexicographical order or numerical order.
847 // (This is an implementation detail and may subject to change.)
848 //
849 // There are two ways to determine whether serialization should be
850 // deterministic for this CodedOutputStream. If SetSerializationDeterministic
851 // has not yet been called, then the default comes from the global default,
852 // which is false, until SetDefaultSerializationDeterministic has been called.
853 // Otherwise, SetSerializationDeterministic has been called, and the last
854 // value passed to it is all that matters.
SetSerializationDeterministic(bool value)855 void SetSerializationDeterministic(bool value) {
856 is_serialization_deterministic_ = value;
857 }
858 // See above. Also, note that users of this CodedOutputStream may need to
859 // call IsSerializationDeterministic() to serialize in the intended way. This
860 // CodedOutputStream cannot enforce a desire for deterministic serialization
861 // by itself.
IsSerializationDeterministic()862 bool IsSerializationDeterministic() const {
863 return is_serialization_deterministic_;
864 }
865
IsDefaultSerializationDeterministic()866 static bool IsDefaultSerializationDeterministic() {
867 return default_serialization_deterministic_.load(
868 std::memory_order_relaxed) != 0;
869 }
870
871 private:
872 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
873
874 ZeroCopyOutputStream* output_;
875 uint8* buffer_;
876 int buffer_size_;
877 int total_bytes_; // Sum of sizes of all buffers seen so far.
878 bool had_error_; // Whether an error occurred during output.
879 bool aliasing_enabled_; // See EnableAliasing().
880 bool is_serialization_deterministic_;
881 static std::atomic<bool> default_serialization_deterministic_;
882
883 // Advance the buffer by a given number of bytes.
884 void Advance(int amount);
885
886 // Called when the buffer runs out to request more data. Implies an
887 // Advance(buffer_size_).
888 bool Refresh();
889
890 // Like WriteRaw() but may avoid copying if the underlying
891 // ZeroCopyOutputStream supports it.
892 void WriteAliasedRaw(const void* buffer, int size);
893
894 // If this write might cross the end of the buffer, we compose the bytes first
895 // then use WriteRaw().
896 void WriteVarint32SlowPath(uint32 value);
897 void WriteVarint64SlowPath(uint64 value);
898
899 // See above. Other projects may use "friend" to allow them to call this.
900 // After SetDefaultSerializationDeterministic() completes, all protocol
901 // buffer serializations will be deterministic by default. Thread safe.
902 // However, the meaning of "after" is subtle here: to be safe, each thread
903 // that wants deterministic serialization by default needs to call
904 // SetDefaultSerializationDeterministic() or ensure on its own that another
905 // thread has done so.
906 friend void internal::MapTestForceDeterministic();
SetDefaultSerializationDeterministic()907 static void SetDefaultSerializationDeterministic() {
908 default_serialization_deterministic_.store(true, std::memory_order_relaxed);
909 }
910 };
911
912 // inline methods ====================================================
913 // The vast majority of varints are only one byte. These inline
914 // methods optimize for that case.
915
ReadVarint32(uint32 * value)916 inline bool CodedInputStream::ReadVarint32(uint32* value) {
917 uint32 v = 0;
918 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
919 v = *buffer_;
920 if (v < 0x80) {
921 *value = v;
922 Advance(1);
923 return true;
924 }
925 }
926 int64 result = ReadVarint32Fallback(v);
927 *value = static_cast<uint32>(result);
928 return result >= 0;
929 }
930
ReadVarint64(uint64 * value)931 inline bool CodedInputStream::ReadVarint64(uint64* value) {
932 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
933 *value = *buffer_;
934 Advance(1);
935 return true;
936 }
937 std::pair<uint64, bool> p = ReadVarint64Fallback();
938 *value = p.first;
939 return p.second;
940 }
941
ReadVarintSizeAsInt(int * value)942 inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) {
943 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
944 int v = *buffer_;
945 if (v < 0x80) {
946 *value = v;
947 Advance(1);
948 return true;
949 }
950 }
951 *value = ReadVarintSizeAsIntFallback();
952 return *value >= 0;
953 }
954
955 // static
ReadLittleEndian32FromArray(const uint8 * buffer,uint32 * value)956 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
957 const uint8* buffer, uint32* value) {
958 #if defined(PROTOBUF_LITTLE_ENDIAN)
959 memcpy(value, buffer, sizeof(*value));
960 return buffer + sizeof(*value);
961 #else
962 *value = (static_cast<uint32>(buffer[0])) |
963 (static_cast<uint32>(buffer[1]) << 8) |
964 (static_cast<uint32>(buffer[2]) << 16) |
965 (static_cast<uint32>(buffer[3]) << 24);
966 return buffer + sizeof(*value);
967 #endif
968 }
969 // static
ReadLittleEndian64FromArray(const uint8 * buffer,uint64 * value)970 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
971 const uint8* buffer, uint64* value) {
972 #if defined(PROTOBUF_LITTLE_ENDIAN)
973 memcpy(value, buffer, sizeof(*value));
974 return buffer + sizeof(*value);
975 #else
976 uint32 part0 = (static_cast<uint32>(buffer[0])) |
977 (static_cast<uint32>(buffer[1]) << 8) |
978 (static_cast<uint32>(buffer[2]) << 16) |
979 (static_cast<uint32>(buffer[3]) << 24);
980 uint32 part1 = (static_cast<uint32>(buffer[4])) |
981 (static_cast<uint32>(buffer[5]) << 8) |
982 (static_cast<uint32>(buffer[6]) << 16) |
983 (static_cast<uint32>(buffer[7]) << 24);
984 *value = static_cast<uint64>(part0) | (static_cast<uint64>(part1) << 32);
985 return buffer + sizeof(*value);
986 #endif
987 }
988
ReadLittleEndian32(uint32 * value)989 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
990 #if defined(PROTOBUF_LITTLE_ENDIAN)
991 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
992 buffer_ = ReadLittleEndian32FromArray(buffer_, value);
993 return true;
994 } else {
995 return ReadLittleEndian32Fallback(value);
996 }
997 #else
998 return ReadLittleEndian32Fallback(value);
999 #endif
1000 }
1001
ReadLittleEndian64(uint64 * value)1002 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
1003 #if defined(PROTOBUF_LITTLE_ENDIAN)
1004 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
1005 buffer_ = ReadLittleEndian64FromArray(buffer_, value);
1006 return true;
1007 } else {
1008 return ReadLittleEndian64Fallback(value);
1009 }
1010 #else
1011 return ReadLittleEndian64Fallback(value);
1012 #endif
1013 }
1014
ReadTagNoLastTag()1015 inline uint32 CodedInputStream::ReadTagNoLastTag() {
1016 uint32 v = 0;
1017 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1018 v = *buffer_;
1019 if (v < 0x80) {
1020 Advance(1);
1021 return v;
1022 }
1023 }
1024 v = ReadTagFallback(v);
1025 return v;
1026 }
1027
ReadTagWithCutoffNoLastTag(uint32 cutoff)1028 inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoffNoLastTag(
1029 uint32 cutoff) {
1030 // In performance-sensitive code we can expect cutoff to be a compile-time
1031 // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
1032 // compile time.
1033 uint32 first_byte_or_zero = 0;
1034 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1035 // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
1036 // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
1037 // is large enough then is it better to check for the two-byte case first?
1038 first_byte_or_zero = buffer_[0];
1039 if (static_cast<int8>(buffer_[0]) > 0) {
1040 const uint32 kMax1ByteVarint = 0x7f;
1041 uint32 tag = buffer_[0];
1042 Advance(1);
1043 return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
1044 }
1045 // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
1046 // and tag is two bytes. The latter is tested by bitwise-and-not of the
1047 // first byte and the second byte.
1048 if (cutoff >= 0x80 && PROTOBUF_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
1049 PROTOBUF_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
1050 const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
1051 uint32 tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
1052 Advance(2);
1053 // It might make sense to test for tag == 0 now, but it is so rare that
1054 // that we don't bother. A varint-encoded 0 should be one byte unless
1055 // the encoder lost its mind. The second part of the return value of
1056 // this function is allowed to be either true or false if the tag is 0,
1057 // so we don't have to check for tag == 0. We may need to check whether
1058 // it exceeds cutoff.
1059 bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
1060 return std::make_pair(tag, at_or_below_cutoff);
1061 }
1062 }
1063 // Slow path
1064 const uint32 tag = ReadTagFallback(first_byte_or_zero);
1065 // If tag == 0 we want to return { 0, false } so the following overflow is intended.
1066 // We use __builtin_add_overflow to appease the sub-overflow UB sanitizer.
1067 uint32_t tag_minus_one;
1068 __builtin_add_overflow(tag, -1, &tag_minus_one);
1069 return std::make_pair(tag, tag_minus_one < cutoff);
1070 }
1071
LastTagWas(uint32 expected)1072 inline bool CodedInputStream::LastTagWas(uint32 expected) {
1073 return last_tag_ == expected;
1074 }
1075
ConsumedEntireMessage()1076 inline bool CodedInputStream::ConsumedEntireMessage() {
1077 return legitimate_message_end_;
1078 }
1079
ExpectTag(uint32 expected)1080 inline bool CodedInputStream::ExpectTag(uint32 expected) {
1081 if (expected < (1 << 7)) {
1082 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) &&
1083 buffer_[0] == expected) {
1084 Advance(1);
1085 return true;
1086 } else {
1087 return false;
1088 }
1089 } else if (expected < (1 << 14)) {
1090 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= 2) &&
1091 buffer_[0] == static_cast<uint8>(expected | 0x80) &&
1092 buffer_[1] == static_cast<uint8>(expected >> 7)) {
1093 Advance(2);
1094 return true;
1095 } else {
1096 return false;
1097 }
1098 } else {
1099 // Don't bother optimizing for larger values.
1100 return false;
1101 }
1102 }
1103
ExpectTagFromArray(const uint8 * buffer,uint32 expected)1104 inline const uint8* CodedInputStream::ExpectTagFromArray(const uint8* buffer,
1105 uint32 expected) {
1106 if (expected < (1 << 7)) {
1107 if (buffer[0] == expected) {
1108 return buffer + 1;
1109 }
1110 } else if (expected < (1 << 14)) {
1111 if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
1112 buffer[1] == static_cast<uint8>(expected >> 7)) {
1113 return buffer + 2;
1114 }
1115 }
1116 return NULL;
1117 }
1118
GetDirectBufferPointerInline(const void ** data,int * size)1119 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1120 int* size) {
1121 *data = buffer_;
1122 *size = static_cast<int>(buffer_end_ - buffer_);
1123 }
1124
ExpectAtEnd()1125 inline bool CodedInputStream::ExpectAtEnd() {
1126 // If we are at a limit we know no more bytes can be read. Otherwise, it's
1127 // hard to say without calling Refresh(), and we'd rather not do that.
1128
1129 if (buffer_ == buffer_end_ && ((buffer_size_after_limit_ != 0) ||
1130 (total_bytes_read_ == current_limit_))) {
1131 last_tag_ = 0; // Pretend we called ReadTag()...
1132 legitimate_message_end_ = true; // ... and it hit EOF.
1133 return true;
1134 } else {
1135 return false;
1136 }
1137 }
1138
CurrentPosition()1139 inline int CodedInputStream::CurrentPosition() const {
1140 return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1141 }
1142
GetDirectBufferForNBytesAndAdvance(int size)1143 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
1144 if (buffer_size_ < size) {
1145 return NULL;
1146 } else {
1147 uint8* result = buffer_;
1148 Advance(size);
1149 return result;
1150 }
1151 }
1152
WriteVarint32ToArray(uint32 value,uint8 * target)1153 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
1154 uint8* target) {
1155 while (value >= 0x80) {
1156 *target = static_cast<uint8>(value | 0x80);
1157 value >>= 7;
1158 ++target;
1159 }
1160 *target = static_cast<uint8>(value);
1161 return target + 1;
1162 }
1163
WriteVarint64ToArray(uint64 value,uint8 * target)1164 inline uint8* CodedOutputStream::WriteVarint64ToArray(uint64 value,
1165 uint8* target) {
1166 while (value >= 0x80) {
1167 *target = static_cast<uint8>(value | 0x80);
1168 value >>= 7;
1169 ++target;
1170 }
1171 *target = static_cast<uint8>(value);
1172 return target + 1;
1173 }
1174
WriteVarint32SignExtended(int32 value)1175 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
1176 WriteVarint64(static_cast<uint64>(value));
1177 }
1178
WriteVarint32SignExtendedToArray(int32 value,uint8 * target)1179 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
1180 int32 value, uint8* target) {
1181 return WriteVarint64ToArray(static_cast<uint64>(value), target);
1182 }
1183
WriteLittleEndian32ToArray(uint32 value,uint8 * target)1184 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
1185 uint8* target) {
1186 #if defined(PROTOBUF_LITTLE_ENDIAN)
1187 memcpy(target, &value, sizeof(value));
1188 #else
1189 target[0] = static_cast<uint8>(value);
1190 target[1] = static_cast<uint8>(value >> 8);
1191 target[2] = static_cast<uint8>(value >> 16);
1192 target[3] = static_cast<uint8>(value >> 24);
1193 #endif
1194 return target + sizeof(value);
1195 }
1196
WriteLittleEndian64ToArray(uint64 value,uint8 * target)1197 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
1198 uint8* target) {
1199 #if defined(PROTOBUF_LITTLE_ENDIAN)
1200 memcpy(target, &value, sizeof(value));
1201 #else
1202 uint32 part0 = static_cast<uint32>(value);
1203 uint32 part1 = static_cast<uint32>(value >> 32);
1204
1205 target[0] = static_cast<uint8>(part0);
1206 target[1] = static_cast<uint8>(part0 >> 8);
1207 target[2] = static_cast<uint8>(part0 >> 16);
1208 target[3] = static_cast<uint8>(part0 >> 24);
1209 target[4] = static_cast<uint8>(part1);
1210 target[5] = static_cast<uint8>(part1 >> 8);
1211 target[6] = static_cast<uint8>(part1 >> 16);
1212 target[7] = static_cast<uint8>(part1 >> 24);
1213 #endif
1214 return target + sizeof(value);
1215 }
1216
WriteVarint32(uint32 value)1217 inline void CodedOutputStream::WriteVarint32(uint32 value) {
1218 if (buffer_size_ >= 5) {
1219 // Fast path: We have enough bytes left in the buffer to guarantee that
1220 // this write won't cross the end, so we can skip the checks.
1221 uint8* target = buffer_;
1222 uint8* end = WriteVarint32ToArray(value, target);
1223 int size = static_cast<int>(end - target);
1224 Advance(size);
1225 } else {
1226 WriteVarint32SlowPath(value);
1227 }
1228 }
1229
WriteVarint64(uint64 value)1230 inline void CodedOutputStream::WriteVarint64(uint64 value) {
1231 if (buffer_size_ >= 10) {
1232 // Fast path: We have enough bytes left in the buffer to guarantee that
1233 // this write won't cross the end, so we can skip the checks.
1234 uint8* target = buffer_;
1235 uint8* end = WriteVarint64ToArray(value, target);
1236 int size = static_cast<int>(end - target);
1237 Advance(size);
1238 } else {
1239 WriteVarint64SlowPath(value);
1240 }
1241 }
1242
WriteTag(uint32 value)1243 inline void CodedOutputStream::WriteTag(uint32 value) { WriteVarint32(value); }
1244
WriteTagToArray(uint32 value,uint8 * target)1245 inline uint8* CodedOutputStream::WriteTagToArray(uint32 value, uint8* target) {
1246 return WriteVarint32ToArray(value, target);
1247 }
1248
VarintSize32(uint32 value)1249 inline size_t CodedOutputStream::VarintSize32(uint32 value) {
1250 // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
1251 // Use an explicit multiplication to implement the divide of
1252 // a number in the 1..31 range.
1253 // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
1254 // undefined.
1255 uint32 log2value = Bits::Log2FloorNonZero(value | 0x1);
1256 return static_cast<size_t>((log2value * 9 + 73) / 64);
1257 }
1258
VarintSize64(uint64 value)1259 inline size_t CodedOutputStream::VarintSize64(uint64 value) {
1260 // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
1261 // Use an explicit multiplication to implement the divide of
1262 // a number in the 1..63 range.
1263 // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
1264 // undefined.
1265 uint32 log2value = Bits::Log2FloorNonZero64(value | 0x1);
1266 return static_cast<size_t>((log2value * 9 + 73) / 64);
1267 }
1268
VarintSize32SignExtended(int32 value)1269 inline size_t CodedOutputStream::VarintSize32SignExtended(int32 value) {
1270 if (value < 0) {
1271 return 10; // TODO(kenton): Make this a symbolic constant.
1272 } else {
1273 return VarintSize32(static_cast<uint32>(value));
1274 }
1275 }
1276
WriteString(const std::string & str)1277 inline void CodedOutputStream::WriteString(const std::string& str) {
1278 WriteRaw(str.data(), static_cast<int>(str.size()));
1279 }
1280
WriteRawMaybeAliased(const void * data,int size)1281 inline void CodedOutputStream::WriteRawMaybeAliased(const void* data,
1282 int size) {
1283 if (aliasing_enabled_) {
1284 WriteAliasedRaw(data, size);
1285 } else {
1286 WriteRaw(data, size);
1287 }
1288 }
1289
WriteStringToArray(const std::string & str,uint8 * target)1290 inline uint8* CodedOutputStream::WriteStringToArray(const std::string& str,
1291 uint8* target) {
1292 return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1293 }
1294
ByteCount()1295 inline int CodedOutputStream::ByteCount() const {
1296 return total_bytes_ - buffer_size_;
1297 }
1298
Advance(int amount)1299 inline void CodedInputStream::Advance(int amount) { buffer_ += amount; }
1300
Advance(int amount)1301 inline void CodedOutputStream::Advance(int amount) {
1302 buffer_ += amount;
1303 buffer_size_ -= amount;
1304 }
1305
SetRecursionLimit(int limit)1306 inline void CodedInputStream::SetRecursionLimit(int limit) {
1307 recursion_budget_ += limit - recursion_limit_;
1308 recursion_limit_ = limit;
1309 }
1310
IncrementRecursionDepth()1311 inline bool CodedInputStream::IncrementRecursionDepth() {
1312 --recursion_budget_;
1313 return recursion_budget_ >= 0;
1314 }
1315
DecrementRecursionDepth()1316 inline void CodedInputStream::DecrementRecursionDepth() {
1317 if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
1318 }
1319
UnsafeDecrementRecursionDepth()1320 inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
1321 assert(recursion_budget_ < recursion_limit_);
1322 ++recursion_budget_;
1323 }
1324
SetExtensionRegistry(const DescriptorPool * pool,MessageFactory * factory)1325 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1326 MessageFactory* factory) {
1327 extension_pool_ = pool;
1328 extension_factory_ = factory;
1329 }
1330
GetExtensionPool()1331 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1332 return extension_pool_;
1333 }
1334
GetExtensionFactory()1335 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1336 return extension_factory_;
1337 }
1338
BufferSize()1339 inline int CodedInputStream::BufferSize() const {
1340 return static_cast<int>(buffer_end_ - buffer_);
1341 }
1342
CodedInputStream(ZeroCopyInputStream * input)1343 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1344 : buffer_(NULL),
1345 buffer_end_(NULL),
1346 input_(input),
1347 total_bytes_read_(0),
1348 overflow_bytes_(0),
1349 last_tag_(0),
1350 legitimate_message_end_(false),
1351 aliasing_enabled_(false),
1352 current_limit_(kint32max),
1353 buffer_size_after_limit_(0),
1354 total_bytes_limit_(kDefaultTotalBytesLimit),
1355 recursion_budget_(default_recursion_limit_),
1356 recursion_limit_(default_recursion_limit_),
1357 extension_pool_(NULL),
1358 extension_factory_(NULL) {
1359 // Eagerly Refresh() so buffer space is immediately available.
1360 Refresh();
1361 }
1362
CodedInputStream(const uint8 * buffer,int size)1363 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1364 : buffer_(buffer),
1365 buffer_end_(buffer + size),
1366 input_(NULL),
1367 total_bytes_read_(size),
1368 overflow_bytes_(0),
1369 last_tag_(0),
1370 legitimate_message_end_(false),
1371 aliasing_enabled_(false),
1372 current_limit_(size),
1373 buffer_size_after_limit_(0),
1374 total_bytes_limit_(kDefaultTotalBytesLimit),
1375 recursion_budget_(default_recursion_limit_),
1376 recursion_limit_(default_recursion_limit_),
1377 extension_pool_(NULL),
1378 extension_factory_(NULL) {
1379 // Note that setting current_limit_ == size is important to prevent some
1380 // code paths from trying to access input_ and segfaulting.
1381 }
1382
IsFlat()1383 inline bool CodedInputStream::IsFlat() const { return input_ == NULL; }
1384
Skip(int count)1385 inline bool CodedInputStream::Skip(int count) {
1386 if (count < 0) return false; // security: count is often user-supplied
1387
1388 const int original_buffer_size = BufferSize();
1389
1390 if (count <= original_buffer_size) {
1391 // Just skipping within the current buffer. Easy.
1392 Advance(count);
1393 return true;
1394 }
1395
1396 return SkipFallback(count, original_buffer_size);
1397 }
1398
1399 } // namespace io
1400 } // namespace protobuf
1401 } // namespace google
1402
1403 #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
1404 #pragma runtime_checks("c", restore)
1405 #endif // _MSC_VER && !defined(__INTEL_COMPILER)
1406
1407 #include <google/protobuf/port_undef.inc>
1408
1409 #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1410