1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // This file contains the CodedInputStream and CodedOutputStream classes,
13 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
14 // and allow you to read or write individual pieces of data in various
15 // formats. In particular, these implement the varint encoding for
16 // integers, a simple variable-length encoding in which smaller numbers
17 // take fewer bytes.
18 //
19 // Typically these classes will only be used internally by the protocol
20 // buffer library in order to encode and decode protocol buffers. Clients
21 // of the library only need to know about this class if they wish to write
22 // custom message parsing or serialization procedures.
23 //
24 // CodedOutputStream example:
25 // // Write some data to "myfile". First we write a 4-byte "magic number"
26 // // to identify the file type, then write a length-prefixed string. The
27 // // string is composed of a varint giving the length followed by the raw
28 // // bytes.
29 // int fd = open("myfile", O_CREAT | O_WRONLY);
30 // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
31 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
32 //
33 // int magic_number = 1234;
34 // char text[] = "Hello world!";
35 // coded_output->WriteLittleEndian32(magic_number);
36 // coded_output->WriteVarint32(strlen(text));
37 // coded_output->WriteRaw(text, strlen(text));
38 //
39 // delete coded_output;
40 // delete raw_output;
41 // close(fd);
42 //
43 // CodedInputStream example:
44 // // Read a file created by the above code.
45 // int fd = open("myfile", O_RDONLY);
46 // ZeroCopyInputStream* raw_input = new FileInputStream(fd);
47 // CodedInputStream* coded_input = new CodedInputStream(raw_input);
48 //
49 // coded_input->ReadLittleEndian32(&magic_number);
50 // if (magic_number != 1234) {
51 // cerr << "File not in expected format." << endl;
52 // return;
53 // }
54 //
55 // uint32_t size;
56 // coded_input->ReadVarint32(&size);
57 //
58 // char* text = new char[size + 1];
59 // coded_input->ReadRaw(buffer, size);
60 // text[size] = '\0';
61 //
62 // delete coded_input;
63 // delete raw_input;
64 // close(fd);
65 //
66 // cout << "Text is: " << text << endl;
67 // delete [] text;
68 //
69 // For those who are interested, varint encoding is defined as follows:
70 //
71 // The encoding operates on unsigned integers of up to 64 bits in length.
72 // Each byte of the encoded value has the format:
73 // * bits 0-6: Seven bits of the number being encoded.
74 // * bit 7: Zero if this is the last byte in the encoding (in which
75 // case all remaining bits of the number are zero) or 1 if
76 // more bytes follow.
77 // The first byte contains the least-significant 7 bits of the number, the
78 // second byte (if present) contains the next-least-significant 7 bits,
79 // and so on. So, the binary number 1011000101011 would be encoded in two
80 // bytes as "10101011 00101100".
81 //
82 // In theory, varint could be used to encode integers of any length.
83 // However, for practicality we set a limit at 64 bits. The maximum encoded
84 // length of a number is thus 10 bytes.
85
86 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
87 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
88
89 #include <assert.h>
90
91 #include <atomic>
92 #include <climits>
93 #include <cstddef>
94 #include <cstdint>
95 #include <cstring>
96 #include <limits>
97 #include <string>
98 #include <type_traits>
99 #include <utility>
100
101 #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
102 // If MSVC has "/RTCc" set, it will complain about truncating casts at
103 // runtime. This file contains some intentional truncating casts.
104 #pragma runtime_checks("c", off)
105 #endif
106
107 #include "absl/log/absl_log.h" // Replace with vlog_is_on.h after Abseil LTS 20240722
108
109 #include "absl/log/absl_check.h"
110 #include "absl/numeric/bits.h"
111 #include "absl/strings/cord.h"
112 #include "absl/strings/string_view.h"
113 #include "google/protobuf/endian_pb.h"
114
115 // Must be included last.
116 #include "google/protobuf/port_def.inc"
117
118 namespace google {
119 namespace protobuf {
120
121 class DescriptorPool;
122 class MessageFactory;
123 class ZeroCopyCodedInputStream;
124
125 namespace internal {
126 void MapTestForceDeterministic();
127 class EpsCopyByteStream;
128 } // namespace internal
129
130 namespace io {
131
132 // Defined in this file.
133 class CodedInputStream;
134 class CodedOutputStream;
135
136 // Defined in other files.
137 class ZeroCopyInputStream; // zero_copy_stream.h
138 class ZeroCopyOutputStream; // zero_copy_stream.h
139
140 // Class which reads and decodes binary data which is composed of varint-
141 // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
142 // Most users will not need to deal with CodedInputStream.
143 //
144 // Most methods of CodedInputStream that return a bool return false if an
145 // underlying I/O error occurs or if the data is malformed. Once such a
146 // failure occurs, the CodedInputStream is broken and is no longer useful.
147 // After a failure, callers also should assume writes to "out" args may have
148 // occurred, though nothing useful can be determined from those writes.
149 class PROTOBUF_EXPORT CodedInputStream {
150 public:
151 // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
152 explicit CodedInputStream(ZeroCopyInputStream* input);
153
154 // Create a CodedInputStream that reads from the given flat array. This is
155 // faster than using an ArrayInputStream. PushLimit(size) is implied by
156 // this constructor.
157 explicit CodedInputStream(const uint8_t* buffer, int size);
158 CodedInputStream(const CodedInputStream&) = delete;
159 CodedInputStream& operator=(const CodedInputStream&) = delete;
160
161 // Destroy the CodedInputStream and position the underlying
162 // ZeroCopyInputStream at the first unread byte. If an error occurred while
163 // reading (causing a method to return false), then the exact position of
164 // the input stream may be anywhere between the last value that was read
165 // successfully and the stream's byte limit.
166 ~CodedInputStream();
167
168 // Return true if this CodedInputStream reads from a flat array instead of
169 // a ZeroCopyInputStream.
170 inline bool IsFlat() const;
171
172 // Skips a number of bytes. Returns false if an underlying read error
173 // occurs.
174 inline bool Skip(int count);
175
176 // Sets *data to point directly at the unread part of the CodedInputStream's
177 // underlying buffer, and *size to the size of that buffer, but does not
178 // advance the stream's current position. This will always either produce
179 // a non-empty buffer or return false. If the caller consumes any of
180 // this data, it should then call Skip() to skip over the consumed bytes.
181 // This may be useful for implementing external fast parsing routines for
182 // types of data not covered by the CodedInputStream interface.
183 bool GetDirectBufferPointer(const void** data, int* size);
184
185 // Like GetDirectBufferPointer, but this method is inlined, and does not
186 // attempt to Refresh() if the buffer is currently empty.
187 PROTOBUF_ALWAYS_INLINE
188 void GetDirectBufferPointerInline(const void** data, int* size);
189
190 // Read raw bytes, copying them into the given buffer.
191 bool ReadRaw(void* buffer, int size);
192
193 // Like ReadRaw, but reads into a string.
194 bool ReadString(std::string* buffer, int size);
195
196 // Like ReadString(), but reads to a Cord.
197 bool ReadCord(absl::Cord* output, int size);
198
199
200 // Read a 16-bit little-endian integer.
201 bool ReadLittleEndian16(uint16_t* value);
202 // Read a 32-bit little-endian integer.
203 bool ReadLittleEndian32(uint32_t* value);
204 // Read a 64-bit little-endian integer.
205 bool ReadLittleEndian64(uint64_t* value);
206
207 // These methods read from an externally provided buffer. The caller is
208 // responsible for ensuring that the buffer has sufficient space.
209 // Read a 16-bit little-endian integer.
210 static const uint8_t* ReadLittleEndian16FromArray(const uint8_t* buffer,
211 uint16_t* value);
212 // Read a 32-bit little-endian integer.
213 static const uint8_t* ReadLittleEndian32FromArray(const uint8_t* buffer,
214 uint32_t* value);
215 // Read a 64-bit little-endian integer.
216 static const uint8_t* ReadLittleEndian64FromArray(const uint8_t* buffer,
217 uint64_t* value);
218
219 // Read an unsigned integer with Varint encoding, truncating to 32 bits.
220 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
221 // it to uint32_t, but may be more efficient.
222 bool ReadVarint32(uint32_t* value);
223 // Read an unsigned integer with Varint encoding.
224 bool ReadVarint64(uint64_t* value);
225
226 // Reads a varint off the wire into an "int". This should be used for reading
227 // sizes off the wire (sizes of strings, submessages, bytes fields, etc).
228 //
229 // The value from the wire is interpreted as unsigned. If its value exceeds
230 // the representable value of an integer on this platform, instead of
231 // truncating we return false. Truncating (as performed by ReadVarint32()
232 // above) is an acceptable approach for fields representing an integer, but
233 // when we are parsing a size from the wire, truncating the value would result
234 // in us misparsing the payload.
235 bool ReadVarintSizeAsInt(int* value);
236
237 // Read a tag. This calls ReadVarint32() and returns the result, or returns
238 // zero (which is not a valid tag) if ReadVarint32() fails. Also, ReadTag
239 // (but not ReadTagNoLastTag) updates the last tag value, which can be checked
240 // with LastTagWas().
241 //
242 // Always inline because this is only called in one place per parse loop
243 // but it is called for every iteration of said loop, so it should be fast.
244 // GCC doesn't want to inline this by default.
ReadTag()245 PROTOBUF_ALWAYS_INLINE uint32_t ReadTag() {
246 return last_tag_ = ReadTagNoLastTag();
247 }
248
249 PROTOBUF_ALWAYS_INLINE uint32_t ReadTagNoLastTag();
250
251 // This usually a faster alternative to ReadTag() when cutoff is a manifest
252 // constant. It does particularly well for cutoff >= 127. The first part
253 // of the return value is the tag that was read, though it can also be 0 in
254 // the cases where ReadTag() would return 0. If the second part is true
255 // then the tag is known to be in [0, cutoff]. If not, the tag either is
256 // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
257 // because that can arise in several ways, and for best performance we want
258 // to avoid an extra "is tag == 0?" check here.)
259 PROTOBUF_ALWAYS_INLINE
ReadTagWithCutoff(uint32_t cutoff)260 std::pair<uint32_t, bool> ReadTagWithCutoff(uint32_t cutoff) {
261 std::pair<uint32_t, bool> result = ReadTagWithCutoffNoLastTag(cutoff);
262 last_tag_ = result.first;
263 return result;
264 }
265
266 PROTOBUF_ALWAYS_INLINE
267 std::pair<uint32_t, bool> ReadTagWithCutoffNoLastTag(uint32_t cutoff);
268
269 // Usually returns true if calling ReadVarint32() now would produce the given
270 // value. Will always return false if ReadVarint32() would not return the
271 // given value. If ExpectTag() returns true, it also advances past
272 // the varint. For best performance, use a compile-time constant as the
273 // parameter.
274 // Always inline because this collapses to a small number of instructions
275 // when given a constant parameter, but GCC doesn't want to inline by default.
276 PROTOBUF_ALWAYS_INLINE bool ExpectTag(uint32_t expected);
277
278 // Like above, except this reads from the specified buffer. The caller is
279 // responsible for ensuring that the buffer is large enough to read a varint
280 // of the expected size. For best performance, use a compile-time constant as
281 // the expected tag parameter.
282 //
283 // Returns a pointer beyond the expected tag if it was found, or NULL if it
284 // was not.
285 PROTOBUF_ALWAYS_INLINE
286 static const uint8_t* ExpectTagFromArray(const uint8_t* buffer,
287 uint32_t expected);
288
289 // Usually returns true if no more bytes can be read. Always returns false
290 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
291 // call to LastTagWas() will act as if ReadTag() had been called and returned
292 // zero, and ConsumedEntireMessage() will return true.
293 bool ExpectAtEnd();
294
295 // If the last call to ReadTag() or ReadTagWithCutoff() returned the given
296 // value, returns true. Otherwise, returns false.
297 // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last
298 // returned value.
299 //
300 // This is needed because parsers for some types of embedded messages
301 // (with field type TYPE_GROUP) don't actually know that they've reached the
302 // end of a message until they see an ENDGROUP tag, which was actually part
303 // of the enclosing message. The enclosing message would like to check that
304 // tag to make sure it had the right number, so it calls LastTagWas() on
305 // return from the embedded parser to check.
306 bool LastTagWas(uint32_t expected);
SetLastTag(uint32_t tag)307 void SetLastTag(uint32_t tag) { last_tag_ = tag; }
308
309 // When parsing message (but NOT a group), this method must be called
310 // immediately after MergeFromCodedStream() returns (if it returns true)
311 // to further verify that the message ended in a legitimate way. For
312 // example, this verifies that parsing did not end on an end-group tag.
313 // It also checks for some cases where, due to optimizations,
314 // MergeFromCodedStream() can incorrectly return true.
315 bool ConsumedEntireMessage();
SetConsumed()316 void SetConsumed() { legitimate_message_end_ = true; }
317
318 // Limits ----------------------------------------------------------
319 // Limits are used when parsing length-prefixed embedded messages.
320 // After the message's length is read, PushLimit() is used to prevent
321 // the CodedInputStream from reading beyond that length. Once the
322 // embedded message has been parsed, PopLimit() is called to undo the
323 // limit.
324
325 // Opaque type used with PushLimit() and PopLimit(). Do not modify
326 // values of this type yourself. The only reason that this isn't a
327 // struct with private internals is for efficiency.
328 typedef int Limit;
329
330 // Places a limit on the number of bytes that the stream may read,
331 // starting from the current position. Once the stream hits this limit,
332 // it will act like the end of the input has been reached until PopLimit()
333 // is called.
334 //
335 // As the names imply, the stream conceptually has a stack of limits. The
336 // shortest limit on the stack is always enforced, even if it is not the
337 // top limit.
338 //
339 // The value returned by PushLimit() is opaque to the caller, and must
340 // be passed unchanged to the corresponding call to PopLimit().
341 Limit PushLimit(int byte_limit);
342
343 // Pops the last limit pushed by PushLimit(). The input must be the value
344 // returned by that call to PushLimit().
345 void PopLimit(Limit limit);
346
347 // Returns the number of bytes left until the nearest limit on the
348 // stack is hit, or -1 if no limits are in place.
349 int BytesUntilLimit() const;
350
351 // Returns current position relative to the beginning of the input stream.
352 int CurrentPosition() const;
353
354 // Total Bytes Limit -----------------------------------------------
355 // To prevent malicious users from sending excessively large messages
356 // and causing memory exhaustion, CodedInputStream imposes a hard limit on
357 // the total number of bytes it will read.
358
359 // Sets the maximum number of bytes that this CodedInputStream will read
360 // before refusing to continue. To prevent servers from allocating enormous
361 // amounts of memory to hold parsed messages, the maximum message length
362 // should be limited to the shortest length that will not harm usability.
363 // The default limit is INT_MAX (~2GB) and apps should set shorter limits
364 // if possible. An error will always be printed to stderr if the limit is
365 // reached.
366 //
367 // Note: setting a limit less than the current read position is interpreted
368 // as a limit on the current position.
369 //
370 // This is unrelated to PushLimit()/PopLimit().
371 void SetTotalBytesLimit(int total_bytes_limit);
372
373 // The Total Bytes Limit minus the Current Position, or -1 if the total bytes
374 // limit is INT_MAX.
375 int BytesUntilTotalBytesLimit() const;
376
377 // Recursion Limit -------------------------------------------------
378 // To prevent corrupt or malicious messages from causing stack overflows,
379 // we must keep track of the depth of recursion when parsing embedded
380 // messages and groups. CodedInputStream keeps track of this because it
381 // is the only object that is passed down the stack during parsing.
382
383 // Sets the maximum recursion depth. The default is 100.
384 void SetRecursionLimit(int limit);
RecursionBudget()385 int RecursionBudget() { return recursion_budget_; }
386
GetDefaultRecursionLimit()387 static int GetDefaultRecursionLimit() { return default_recursion_limit_; }
388
389 // Increments the current recursion depth. Returns true if the depth is
390 // under the limit, false if it has gone over.
391 bool IncrementRecursionDepth();
392
393 // Decrements the recursion depth if possible.
394 void DecrementRecursionDepth();
395
396 // Decrements the recursion depth blindly. This is faster than
397 // DecrementRecursionDepth(). It should be used only if all previous
398 // increments to recursion depth were successful.
399 void UnsafeDecrementRecursionDepth();
400
401 // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
402 // Using this can reduce code size and complexity in some cases. The caller
403 // is expected to check that the second part of the result is non-negative (to
404 // bail out if the depth of recursion is too high) and, if all is well, to
405 // later pass the first part of the result to PopLimit() or similar.
406 std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
407 int byte_limit);
408
409 // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
410 Limit ReadLengthAndPushLimit();
411
412 // Helper that is equivalent to: {
413 // bool result = ConsumedEntireMessage();
414 // PopLimit(limit);
415 // UnsafeDecrementRecursionDepth();
416 // return result; }
417 // Using this can reduce code size and complexity in some cases.
418 // Do not use unless the current recursion depth is greater than zero.
419 bool DecrementRecursionDepthAndPopLimit(Limit limit);
420
421 // Helper that is equivalent to: {
422 // bool result = ConsumedEntireMessage();
423 // PopLimit(limit);
424 // return result; }
425 // Using this can reduce code size and complexity in some cases.
426 bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
427
428 // Extension Registry ----------------------------------------------
429 // ADVANCED USAGE: 99.9% of people can ignore this section.
430 //
431 // By default, when parsing extensions, the parser looks for extension
432 // definitions in the pool which owns the outer message's Descriptor.
433 // However, you may call SetExtensionRegistry() to provide an alternative
434 // pool instead. This makes it possible, for example, to parse a message
435 // using a generated class, but represent some extensions using
436 // DynamicMessage.
437
438 // Set the pool used to look up extensions. Most users do not need to call
439 // this as the correct pool will be chosen automatically.
440 //
441 // WARNING: It is very easy to misuse this. Carefully read the requirements
442 // below. Do not use this unless you are sure you need it. Almost no one
443 // does.
444 //
445 // Let's say you are parsing a message into message object m, and you want
446 // to take advantage of SetExtensionRegistry(). You must follow these
447 // requirements:
448 //
449 // The given DescriptorPool must contain m->GetDescriptor(). It is not
450 // sufficient for it to simply contain a descriptor that has the same name
451 // and content -- it must be the *exact object*. In other words:
452 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
453 // m->GetDescriptor());
454 // There are two ways to satisfy this requirement:
455 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
456 // because this is the pool that would be used anyway if you didn't call
457 // SetExtensionRegistry() at all.
458 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
459 // "underlay". Read the documentation for DescriptorPool for more
460 // information about underlays.
461 //
462 // You must also provide a MessageFactory. This factory will be used to
463 // construct Message objects representing extensions. The factory's
464 // GetPrototype() MUST return non-NULL for any Descriptor which can be found
465 // through the provided pool.
466 //
467 // If the provided factory might return instances of protocol-compiler-
468 // generated (i.e. compiled-in) types, or if the outer message object m is
469 // a generated type, then the given factory MUST have this property: If
470 // GetPrototype() is given a Descriptor which resides in
471 // DescriptorPool::generated_pool(), the factory MUST return the same
472 // prototype which MessageFactory::generated_factory() would return. That
473 // is, given a descriptor for a generated type, the factory must return an
474 // instance of the generated class (NOT DynamicMessage). However, when
475 // given a descriptor for a type that is NOT in generated_pool, the factory
476 // is free to return any implementation.
477 //
478 // The reason for this requirement is that generated sub-objects may be
479 // accessed via the standard (non-reflection) extension accessor methods,
480 // and these methods will down-cast the object to the generated class type.
481 // If the object is not actually of that type, the results would be undefined.
482 // On the other hand, if an extension is not compiled in, then there is no
483 // way the code could end up accessing it via the standard accessors -- the
484 // only way to access the extension is via reflection. When using reflection,
485 // DynamicMessage and generated messages are indistinguishable, so it's fine
486 // if these objects are represented using DynamicMessage.
487 //
488 // Using DynamicMessageFactory on which you have called
489 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
490 // above requirement.
491 //
492 // If either pool or factory is NULL, both must be NULL.
493 //
494 // Note that this feature is ignored when parsing "lite" messages as they do
495 // not have descriptors.
496 void SetExtensionRegistry(const DescriptorPool* pool,
497 MessageFactory* factory);
498
499 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
500 // has been provided.
501 const DescriptorPool* GetExtensionPool();
502
503 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
504 // factory has been provided.
505 MessageFactory* GetExtensionFactory();
506
507 private:
508 const uint8_t* buffer_;
509 const uint8_t* buffer_end_; // pointer to the end of the buffer.
510 ZeroCopyInputStream* input_;
511 int total_bytes_read_; // total bytes read from input_, including
512 // the current buffer
513
514 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
515 // so that we can BackUp() on destruction.
516 int overflow_bytes_;
517
518 // LastTagWas() stuff.
519 uint32_t last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
520
521 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
522 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
523 // reach the end of a message and attempt to read another tag.
524 bool legitimate_message_end_;
525
526 // See EnableAliasing().
527 bool aliasing_enabled_;
528
529 // If true, set eager parsing mode to override lazy fields.
530 bool force_eager_parsing_;
531
532 // Limits
533 Limit current_limit_; // if position = -1, no limit is applied
534
535 // For simplicity, if the current buffer crosses a limit (either a normal
536 // limit created by PushLimit() or the total bytes limit), buffer_size_
537 // only tracks the number of bytes before that limit. This field
538 // contains the number of bytes after it. Note that this implies that if
539 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
540 // hit a limit. However, if both are zero, it doesn't necessarily mean
541 // we aren't at a limit -- the buffer may have ended exactly at the limit.
542 int buffer_size_after_limit_;
543
544 // Maximum number of bytes to read, period. This is unrelated to
545 // current_limit_. Set using SetTotalBytesLimit().
546 int total_bytes_limit_;
547
548 // Current recursion budget, controlled by IncrementRecursionDepth() and
549 // similar. Starts at recursion_limit_ and goes down: if this reaches
550 // -1 we are over budget.
551 int recursion_budget_;
552 // Recursion depth limit, set by SetRecursionLimit().
553 int recursion_limit_;
554
555 // See SetExtensionRegistry().
556 const DescriptorPool* extension_pool_;
557 MessageFactory* extension_factory_;
558
559 // Private member functions.
560
561 // Fallback when Skip() goes past the end of the current buffer.
562 bool SkipFallback(int count, int original_buffer_size);
563
564 // Advance the buffer by a given number of bytes.
565 void Advance(int amount);
566
567 // Back up input_ to the current buffer position.
568 void BackUpInputToCurrentPosition();
569
570 // Recomputes the value of buffer_size_after_limit_. Must be called after
571 // current_limit_ or total_bytes_limit_ changes.
572 void RecomputeBufferLimits();
573
574 // Writes an error message saying that we hit total_bytes_limit_.
575 void PrintTotalBytesLimitError();
576
577 // Called when the buffer runs out to request more data. Implies an
578 // Advance(BufferSize()).
579 bool Refresh();
580
581 // When parsing varints, we optimize for the common case of small values, and
582 // then optimize for the case when the varint fits within the current buffer
583 // piece. The Fallback method is used when we can't use the one-byte
584 // optimization. The Slow method is yet another fallback when the buffer is
585 // not large enough. Making the slow path out-of-line speeds up the common
586 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
587 // message crosses multiple buffers. Note: ReadVarint32Fallback() and
588 // ReadVarint64Fallback() are called frequently and generally not inlined, so
589 // they have been optimized to avoid "out" parameters. The former returns -1
590 // if it fails and the uint32_t it read otherwise. The latter has a bool
591 // indicating success or failure as part of its return type.
592 int64_t ReadVarint32Fallback(uint32_t first_byte_or_zero);
593 int ReadVarintSizeAsIntFallback();
594 std::pair<uint64_t, bool> ReadVarint64Fallback();
595 bool ReadVarint32Slow(uint32_t* value);
596 bool ReadVarint64Slow(uint64_t* value);
597 int ReadVarintSizeAsIntSlow();
598 bool ReadLittleEndian16Fallback(uint16_t* value);
599 bool ReadLittleEndian32Fallback(uint32_t* value);
600 bool ReadLittleEndian64Fallback(uint64_t* value);
601
602 // Fallback/slow methods for reading tags. These do not update last_tag_,
603 // but will set legitimate_message_end_ if we are at the end of the input
604 // stream.
605 uint32_t ReadTagFallback(uint32_t first_byte_or_zero);
606 uint32_t ReadTagSlow();
607 bool ReadStringFallback(std::string* buffer, int size);
608
609 // Return the size of the buffer.
610 int BufferSize() const;
611
612 static const int kDefaultTotalBytesLimit = INT_MAX;
613
614 static int default_recursion_limit_; // 100 by default.
615
616 friend class google::protobuf::ZeroCopyCodedInputStream;
617 friend class google::protobuf::internal::EpsCopyByteStream;
618 };
619
620 // EpsCopyOutputStream wraps a ZeroCopyOutputStream and exposes a new stream,
621 // which has the property you can write kSlopBytes (16 bytes) from the current
622 // position without bounds checks. The cursor into the stream is managed by
623 // the user of the class and is an explicit parameter in the methods. Careful
624 // use of this class, ie. keep ptr a local variable, eliminates the need to
625 // for the compiler to sync the ptr value between register and memory.
626 class PROTOBUF_EXPORT EpsCopyOutputStream {
627 public:
628 enum { kSlopBytes = 16 };
629
630 // Initialize from a stream.
EpsCopyOutputStream(ZeroCopyOutputStream * stream,bool deterministic,uint8_t ** pp)631 EpsCopyOutputStream(ZeroCopyOutputStream* stream, bool deterministic,
632 uint8_t** pp)
633 : end_(buffer_),
634 stream_(stream),
635 is_serialization_deterministic_(deterministic) {
636 *pp = buffer_;
637 }
638
639 // Only for array serialization. No overflow protection, end_ will be the
640 // pointed to the end of the array. When using this the total size is already
641 // known, so no need to maintain the slop region.
EpsCopyOutputStream(void * data,int size,bool deterministic)642 EpsCopyOutputStream(void* data, int size, bool deterministic)
643 : end_(static_cast<uint8_t*>(data) + size),
644 buffer_end_(nullptr),
645 stream_(nullptr),
646 is_serialization_deterministic_(deterministic) {}
647
648 // Initialize from stream but with the first buffer already given (eager).
EpsCopyOutputStream(void * data,int size,ZeroCopyOutputStream * stream,bool deterministic,uint8_t ** pp)649 EpsCopyOutputStream(void* data, int size, ZeroCopyOutputStream* stream,
650 bool deterministic, uint8_t** pp)
651 : stream_(stream), is_serialization_deterministic_(deterministic) {
652 *pp = SetInitialBuffer(data, size);
653 }
654
655 // Flush everything that's written into the underlying ZeroCopyOutputStream
656 // and trims the underlying stream to the location of ptr.
657 uint8_t* Trim(uint8_t* ptr);
658
659 // After this it's guaranteed you can safely write kSlopBytes to ptr. This
660 // will never fail! The underlying stream can produce an error. Use HadError
661 // to check for errors.
EnsureSpace(uint8_t * ptr)662 PROTOBUF_NODISCARD uint8_t* EnsureSpace(uint8_t* ptr) {
663 if (PROTOBUF_PREDICT_FALSE(ptr >= end_)) {
664 return EnsureSpaceFallback(ptr);
665 }
666 return ptr;
667 }
668
WriteRaw(const void * data,int size,uint8_t * ptr)669 uint8_t* WriteRaw(const void* data, int size, uint8_t* ptr) {
670 if (PROTOBUF_PREDICT_FALSE(end_ - ptr < size)) {
671 return WriteRawFallback(data, size, ptr);
672 }
673 std::memcpy(ptr, data, static_cast<unsigned int>(size));
674 return ptr + size;
675 }
676 // Writes the buffer specified by data, size to the stream. Possibly by
677 // aliasing the buffer (ie. not copying the data). The caller is responsible
678 // to make sure the buffer is alive for the duration of the
679 // ZeroCopyOutputStream.
680 #ifndef NDEBUG
681 PROTOBUF_NOINLINE
682 #endif
WriteRawMaybeAliased(const void * data,int size,uint8_t * ptr)683 uint8_t* WriteRawMaybeAliased(const void* data, int size, uint8_t* ptr) {
684 if (aliasing_enabled_) {
685 return WriteAliasedRaw(data, size, ptr);
686 } else {
687 return WriteRaw(data, size, ptr);
688 }
689 }
690
691 uint8_t* WriteCord(const absl::Cord& cord, uint8_t* ptr);
692
693 #ifndef NDEBUG
694 PROTOBUF_NOINLINE
695 #endif
WriteStringMaybeAliased(uint32_t num,const std::string & s,uint8_t * ptr)696 uint8_t* WriteStringMaybeAliased(uint32_t num, const std::string& s,
697 uint8_t* ptr) {
698 std::ptrdiff_t size = s.size();
699 if (PROTOBUF_PREDICT_FALSE(
700 size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) {
701 return WriteStringMaybeAliasedOutline(num, s, ptr);
702 }
703 ptr = UnsafeVarint((num << 3) | 2, ptr);
704 *ptr++ = static_cast<uint8_t>(size);
705 std::memcpy(ptr, s.data(), size);
706 return ptr + size;
707 }
WriteBytesMaybeAliased(uint32_t num,const std::string & s,uint8_t * ptr)708 uint8_t* WriteBytesMaybeAliased(uint32_t num, const std::string& s,
709 uint8_t* ptr) {
710 return WriteStringMaybeAliased(num, s, ptr);
711 }
712
713 template <typename T>
WriteString(uint32_t num,const T & s,uint8_t * ptr)714 PROTOBUF_ALWAYS_INLINE uint8_t* WriteString(uint32_t num, const T& s,
715 uint8_t* ptr) {
716 std::ptrdiff_t size = s.size();
717 if (PROTOBUF_PREDICT_FALSE(
718 size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) {
719 return WriteStringOutline(num, s, ptr);
720 }
721 ptr = UnsafeVarint((num << 3) | 2, ptr);
722 *ptr++ = static_cast<uint8_t>(size);
723 std::memcpy(ptr, s.data(), size);
724 return ptr + size;
725 }
726
WriteString(uint32_t num,const absl::Cord & s,uint8_t * ptr)727 uint8_t* WriteString(uint32_t num, const absl::Cord& s, uint8_t* ptr) {
728 ptr = EnsureSpace(ptr);
729 ptr = WriteTag(num, 2, ptr);
730 return WriteCordOutline(s, ptr);
731 }
732
733 template <typename T>
734 #ifndef NDEBUG
735 PROTOBUF_NOINLINE
736 #endif
WriteBytes(uint32_t num,const T & s,uint8_t * ptr)737 uint8_t* WriteBytes(uint32_t num, const T& s, uint8_t* ptr) {
738 return WriteString(num, s, ptr);
739 }
740
741 template <typename T>
WriteInt32Packed(int num,const T & r,int size,uint8_t * ptr)742 PROTOBUF_ALWAYS_INLINE uint8_t* WriteInt32Packed(int num, const T& r,
743 int size, uint8_t* ptr) {
744 return WriteVarintPacked(num, r, size, ptr, Encode64);
745 }
746 template <typename T>
WriteUInt32Packed(int num,const T & r,int size,uint8_t * ptr)747 PROTOBUF_ALWAYS_INLINE uint8_t* WriteUInt32Packed(int num, const T& r,
748 int size, uint8_t* ptr) {
749 return WriteVarintPacked(num, r, size, ptr, Encode32);
750 }
751 template <typename T>
WriteSInt32Packed(int num,const T & r,int size,uint8_t * ptr)752 PROTOBUF_ALWAYS_INLINE uint8_t* WriteSInt32Packed(int num, const T& r,
753 int size, uint8_t* ptr) {
754 return WriteVarintPacked(num, r, size, ptr, ZigZagEncode32);
755 }
756 template <typename T>
WriteInt64Packed(int num,const T & r,int size,uint8_t * ptr)757 PROTOBUF_ALWAYS_INLINE uint8_t* WriteInt64Packed(int num, const T& r,
758 int size, uint8_t* ptr) {
759 return WriteVarintPacked(num, r, size, ptr, Encode64);
760 }
761 template <typename T>
WriteUInt64Packed(int num,const T & r,int size,uint8_t * ptr)762 PROTOBUF_ALWAYS_INLINE uint8_t* WriteUInt64Packed(int num, const T& r,
763 int size, uint8_t* ptr) {
764 return WriteVarintPacked(num, r, size, ptr, Encode64);
765 }
766 template <typename T>
WriteSInt64Packed(int num,const T & r,int size,uint8_t * ptr)767 PROTOBUF_ALWAYS_INLINE uint8_t* WriteSInt64Packed(int num, const T& r,
768 int size, uint8_t* ptr) {
769 return WriteVarintPacked(num, r, size, ptr, ZigZagEncode64);
770 }
771 template <typename T>
WriteEnumPacked(int num,const T & r,int size,uint8_t * ptr)772 PROTOBUF_ALWAYS_INLINE uint8_t* WriteEnumPacked(int num, const T& r, int size,
773 uint8_t* ptr) {
774 return WriteVarintPacked(num, r, size, ptr, Encode64);
775 }
776
777 template <typename T>
WriteFixedPacked(int num,const T & r,uint8_t * ptr)778 PROTOBUF_ALWAYS_INLINE uint8_t* WriteFixedPacked(int num, const T& r,
779 uint8_t* ptr) {
780 ptr = EnsureSpace(ptr);
781 constexpr auto element_size = sizeof(typename T::value_type);
782 auto size = r.size() * element_size;
783 ptr = WriteLengthDelim(num, size, ptr);
784 return WriteRawLittleEndian<element_size>(r.data(), static_cast<int>(size),
785 ptr);
786 }
787
788 // Returns true if there was an underlying I/O error since this object was
789 // created.
HadError()790 bool HadError() const { return had_error_; }
791
792 // Instructs the EpsCopyOutputStream to allow the underlying
793 // ZeroCopyOutputStream to hold pointers to the original structure instead of
794 // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
795 // underlying stream does not support aliasing, then enabling it has no
796 // affect. For now, this only affects the behavior of
797 // WriteRawMaybeAliased().
798 //
799 // NOTE: It is caller's responsibility to ensure that the chunk of memory
800 // remains live until all of the data has been consumed from the stream.
801 void EnableAliasing(bool enabled);
802
803 // See documentation on CodedOutputStream::SetSerializationDeterministic.
SetSerializationDeterministic(bool value)804 void SetSerializationDeterministic(bool value) {
805 is_serialization_deterministic_ = value;
806 }
807
808 // See documentation on CodedOutputStream::IsSerializationDeterministic.
IsSerializationDeterministic()809 bool IsSerializationDeterministic() const {
810 return is_serialization_deterministic_;
811 }
812
813 // The number of bytes written to the stream at position ptr, relative to the
814 // stream's overall position.
815 int64_t ByteCount(uint8_t* ptr) const;
816
817
818 private:
819 uint8_t* end_;
820 uint8_t* buffer_end_ = buffer_;
821 uint8_t buffer_[2 * kSlopBytes];
822 ZeroCopyOutputStream* stream_;
823 bool had_error_ = false;
824 bool aliasing_enabled_ = false; // See EnableAliasing().
825 bool is_serialization_deterministic_;
826 bool skip_check_consistency = false;
827
828 uint8_t* EnsureSpaceFallback(uint8_t* ptr);
829 inline uint8_t* Next();
830 int Flush(uint8_t* ptr);
GetSize(uint8_t * ptr)831 std::ptrdiff_t GetSize(uint8_t* ptr) const {
832 ABSL_DCHECK(ptr <= end_ + kSlopBytes); // NOLINT
833 return end_ + kSlopBytes - ptr;
834 }
835
Error()836 uint8_t* Error() {
837 had_error_ = true;
838 // We use the patch buffer to always guarantee space to write to.
839 end_ = buffer_ + kSlopBytes;
840 return buffer_;
841 }
842
TagSize(uint32_t tag)843 static constexpr int TagSize(uint32_t tag) {
844 return (tag < (1 << 7)) ? 1
845 : (tag < (1 << 14)) ? 2
846 : (tag < (1 << 21)) ? 3
847 : (tag < (1 << 28)) ? 4
848 : 5;
849 }
850
WriteTag(uint32_t num,uint32_t wt,uint8_t * ptr)851 PROTOBUF_ALWAYS_INLINE uint8_t* WriteTag(uint32_t num, uint32_t wt,
852 uint8_t* ptr) {
853 ABSL_DCHECK(ptr < end_); // NOLINT
854 return UnsafeVarint((num << 3) | wt, ptr);
855 }
856
WriteLengthDelim(int num,uint32_t size,uint8_t * ptr)857 PROTOBUF_ALWAYS_INLINE uint8_t* WriteLengthDelim(int num, uint32_t size,
858 uint8_t* ptr) {
859 ptr = WriteTag(num, 2, ptr);
860 return UnsafeWriteSize(size, ptr);
861 }
862
863 uint8_t* WriteRawFallback(const void* data, int size, uint8_t* ptr);
864
865 uint8_t* WriteAliasedRaw(const void* data, int size, uint8_t* ptr);
866
867 uint8_t* WriteStringMaybeAliasedOutline(uint32_t num, const std::string& s,
868 uint8_t* ptr);
869 uint8_t* WriteStringOutline(uint32_t num, const std::string& s, uint8_t* ptr);
870 uint8_t* WriteStringOutline(uint32_t num, absl::string_view s, uint8_t* ptr);
871 uint8_t* WriteCordOutline(const absl::Cord& c, uint8_t* ptr);
872
873 template <typename T, typename E>
WriteVarintPacked(int num,const T & r,int size,uint8_t * ptr,const E & encode)874 PROTOBUF_ALWAYS_INLINE uint8_t* WriteVarintPacked(int num, const T& r,
875 int size, uint8_t* ptr,
876 const E& encode) {
877 ptr = EnsureSpace(ptr);
878 ptr = WriteLengthDelim(num, size, ptr);
879 auto it = r.data();
880 auto end = it + r.size();
881 do {
882 ptr = EnsureSpace(ptr);
883 ptr = UnsafeVarint(encode(*it++), ptr);
884 } while (it < end);
885 return ptr;
886 }
887
Encode32(uint32_t v)888 static uint32_t Encode32(uint32_t v) { return v; }
Encode64(uint64_t v)889 static uint64_t Encode64(uint64_t v) { return v; }
ZigZagEncode32(int32_t v)890 static uint32_t ZigZagEncode32(int32_t v) {
891 return (static_cast<uint32_t>(v) << 1) ^ static_cast<uint32_t>(v >> 31);
892 }
ZigZagEncode64(int64_t v)893 static uint64_t ZigZagEncode64(int64_t v) {
894 return (static_cast<uint64_t>(v) << 1) ^ static_cast<uint64_t>(v >> 63);
895 }
896
897 template <typename T>
UnsafeVarint(T value,uint8_t * ptr)898 PROTOBUF_ALWAYS_INLINE static uint8_t* UnsafeVarint(T value, uint8_t* ptr) {
899 static_assert(std::is_unsigned<T>::value,
900 "Varint serialization must be unsigned");
901 while (PROTOBUF_PREDICT_FALSE(value >= 0x80)) {
902 *ptr = static_cast<uint8_t>(value | 0x80);
903 value >>= 7;
904 ++ptr;
905 }
906 *ptr++ = static_cast<uint8_t>(value);
907 return ptr;
908 }
909
UnsafeWriteSize(uint32_t value,uint8_t * ptr)910 PROTOBUF_ALWAYS_INLINE static uint8_t* UnsafeWriteSize(uint32_t value,
911 uint8_t* ptr) {
912 while (PROTOBUF_PREDICT_FALSE(value >= 0x80)) {
913 *ptr = static_cast<uint8_t>(value | 0x80);
914 value >>= 7;
915 ++ptr;
916 }
917 *ptr++ = static_cast<uint8_t>(value);
918 return ptr;
919 }
920
921 template <int S>
922 uint8_t* WriteRawLittleEndian(const void* data, int size, uint8_t* ptr);
923 #if !defined(ABSL_IS_LITTLE_ENDIAN) || \
924 defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
925 uint8_t* WriteRawLittleEndian32(const void* data, int size, uint8_t* ptr);
926 uint8_t* WriteRawLittleEndian64(const void* data, int size, uint8_t* ptr);
927 #endif
928
929 // These methods are for CodedOutputStream. Ideally they should be private
930 // but to match current behavior of CodedOutputStream as close as possible
931 // we allow it some functionality.
932 public:
SetInitialBuffer(void * data,int size)933 uint8_t* SetInitialBuffer(void* data, int size) {
934 auto ptr = static_cast<uint8_t*>(data);
935 if (size > kSlopBytes) {
936 end_ = ptr + size - kSlopBytes;
937 buffer_end_ = nullptr;
938 return ptr;
939 } else {
940 end_ = buffer_ + size;
941 buffer_end_ = ptr;
942 return buffer_;
943 }
944 }
945
946 private:
947 // Needed by CodedOutputStream HadError. HadError needs to flush the patch
948 // buffers to ensure there is no error as of yet.
949 uint8_t* FlushAndResetBuffer(uint8_t*);
950
951 // The following functions mimic the old CodedOutputStream behavior as close
952 // as possible. They flush the current state to the stream, behave as
953 // the old CodedOutputStream and then return to normal operation.
954 bool Skip(int count, uint8_t** pp);
955 bool GetDirectBufferPointer(void** data, int* size, uint8_t** pp);
956 uint8_t* GetDirectBufferForNBytesAndAdvance(int size, uint8_t** pp);
957
958 friend class CodedOutputStream;
959 };
960
961 template <>
962 inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<1>(const void* data,
963 int size,
964 uint8_t* ptr) {
965 return WriteRaw(data, size, ptr);
966 }
967 template <>
968 inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<4>(const void* data,
969 int size,
970 uint8_t* ptr) {
971 #if defined(ABSL_IS_LITTLE_ENDIAN) && \
972 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
973 return WriteRaw(data, size, ptr);
974 #else
975 return WriteRawLittleEndian32(data, size, ptr);
976 #endif
977 }
978 template <>
979 inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<8>(const void* data,
980 int size,
981 uint8_t* ptr) {
982 #if defined(ABSL_IS_LITTLE_ENDIAN) && \
983 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
984 return WriteRaw(data, size, ptr);
985 #else
986 return WriteRawLittleEndian64(data, size, ptr);
987 #endif
988 }
989
990 // Class which encodes and writes binary data which is composed of varint-
991 // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
992 // Most users will not need to deal with CodedOutputStream.
993 //
994 // Most methods of CodedOutputStream which return a bool return false if an
995 // underlying I/O error occurs. Once such a failure occurs, the
996 // CodedOutputStream is broken and is no longer useful. The Write* methods do
997 // not return the stream status, but will invalidate the stream if an error
998 // occurs. The client can probe HadError() to determine the status.
999 //
1000 // Note that every method of CodedOutputStream which writes some data has
1001 // a corresponding static "ToArray" version. These versions write directly
1002 // to the provided buffer, returning a pointer past the last written byte.
1003 // They require that the buffer has sufficient capacity for the encoded data.
1004 // This allows an optimization where we check if an output stream has enough
1005 // space for an entire message before we start writing and, if there is, we
1006 // call only the ToArray methods to avoid doing bound checks for each
1007 // individual value.
1008 // i.e., in the example above:
1009 //
1010 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
1011 // int magic_number = 1234;
1012 // char text[] = "Hello world!";
1013 //
1014 // int coded_size = sizeof(magic_number) +
1015 // CodedOutputStream::VarintSize32(strlen(text)) +
1016 // strlen(text);
1017 //
1018 // uint8_t* buffer =
1019 // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
1020 // if (buffer != nullptr) {
1021 // // The output stream has enough space in the buffer: write directly to
1022 // // the array.
1023 // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
1024 // buffer);
1025 // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
1026 // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
1027 // } else {
1028 // // Make bound-checked writes, which will ask the underlying stream for
1029 // // more space as needed.
1030 // coded_output->WriteLittleEndian32(magic_number);
1031 // coded_output->WriteVarint32(strlen(text));
1032 // coded_output->WriteRaw(text, strlen(text));
1033 // }
1034 //
1035 // delete coded_output;
1036 class PROTOBUF_EXPORT CodedOutputStream {
1037 public:
1038 // Creates a CodedOutputStream that writes to the given `stream`.
1039 // The provided stream must publicly derive from `ZeroCopyOutputStream`.
1040 template <class Stream, class = typename std::enable_if<std::is_base_of<
1041 ZeroCopyOutputStream, Stream>::value>::type>
1042 explicit CodedOutputStream(Stream* stream);
1043
1044 // Creates a CodedOutputStream that writes to the given `stream`, and does
1045 // an 'eager initialization' of the internal state if `eager_init` is true.
1046 // The provided stream must publicly derive from `ZeroCopyOutputStream`.
1047 template <class Stream, class = typename std::enable_if<std::is_base_of<
1048 ZeroCopyOutputStream, Stream>::value>::type>
1049 CodedOutputStream(Stream* stream, bool eager_init);
1050 CodedOutputStream(const CodedOutputStream&) = delete;
1051 CodedOutputStream& operator=(const CodedOutputStream&) = delete;
1052
1053 // Destroy the CodedOutputStream and position the underlying
1054 // ZeroCopyOutputStream immediately after the last byte written.
1055 ~CodedOutputStream();
1056
1057 // Returns true if there was an underlying I/O error since this object was
1058 // created. On should call Trim before this function in order to catch all
1059 // errors.
HadError()1060 bool HadError() {
1061 cur_ = impl_.FlushAndResetBuffer(cur_);
1062 ABSL_DCHECK(cur_);
1063 return impl_.HadError();
1064 }
1065
1066 // Trims any unused space in the underlying buffer so that its size matches
1067 // the number of bytes written by this stream. The underlying buffer will
1068 // automatically be trimmed when this stream is destroyed; this call is only
1069 // necessary if the underlying buffer is accessed *before* the stream is
1070 // destroyed.
Trim()1071 void Trim() { cur_ = impl_.Trim(cur_); }
1072
1073 // Skips a number of bytes, leaving the bytes unmodified in the underlying
1074 // buffer. Returns false if an underlying write error occurs. This is
1075 // mainly useful with GetDirectBufferPointer().
1076 // Note of caution, the skipped bytes may contain uninitialized data. The
1077 // caller must make sure that the skipped bytes are properly initialized,
1078 // otherwise you might leak bytes from your heap.
Skip(int count)1079 bool Skip(int count) { return impl_.Skip(count, &cur_); }
1080
1081 // Sets *data to point directly at the unwritten part of the
1082 // CodedOutputStream's underlying buffer, and *size to the size of that
1083 // buffer, but does not advance the stream's current position. This will
1084 // always either produce a non-empty buffer or return false. If the caller
1085 // writes any data to this buffer, it should then call Skip() to skip over
1086 // the consumed bytes. This may be useful for implementing external fast
1087 // serialization routines for types of data not covered by the
1088 // CodedOutputStream interface.
GetDirectBufferPointer(void ** data,int * size)1089 bool GetDirectBufferPointer(void** data, int* size) {
1090 return impl_.GetDirectBufferPointer(data, size, &cur_);
1091 }
1092
1093 // If there are at least "size" bytes available in the current buffer,
1094 // returns a pointer directly into the buffer and advances over these bytes.
1095 // The caller may then write directly into this buffer (e.g. using the
1096 // *ToArray static methods) rather than go through CodedOutputStream. If
1097 // there are not enough bytes available, returns NULL. The return pointer is
1098 // invalidated as soon as any other non-const method of CodedOutputStream
1099 // is called.
GetDirectBufferForNBytesAndAdvance(int size)1100 inline uint8_t* GetDirectBufferForNBytesAndAdvance(int size) {
1101 return impl_.GetDirectBufferForNBytesAndAdvance(size, &cur_);
1102 }
1103
1104 // Write raw bytes, copying them from the given buffer.
WriteRaw(const void * buffer,int size)1105 void WriteRaw(const void* buffer, int size) {
1106 cur_ = impl_.WriteRaw(buffer, size, cur_);
1107 }
1108 // Like WriteRaw() but will try to write aliased data if aliasing is
1109 // turned on.
1110 void WriteRawMaybeAliased(const void* data, int size);
1111 // Like WriteRaw() but writing directly to the target array.
1112 // This is _not_ inlined, as the compiler often optimizes memcpy into inline
1113 // copy loops. Since this gets called by every field with string or bytes
1114 // type, inlining may lead to a significant amount of code bloat, with only a
1115 // minor performance gain.
1116 static uint8_t* WriteRawToArray(const void* buffer, int size,
1117 uint8_t* target);
1118
1119 // Equivalent to WriteRaw(str.data(), str.size()).
1120 void WriteString(const std::string& str);
1121 // Like WriteString() but writing directly to the target array.
1122 static uint8_t* WriteStringToArray(const std::string& str, uint8_t* target);
1123 // Write the varint-encoded size of str followed by str.
1124 static uint8_t* WriteStringWithSizeToArray(const std::string& str,
1125 uint8_t* target);
1126
1127 // Like WriteString() but writes a Cord.
WriteCord(const absl::Cord & cord)1128 void WriteCord(const absl::Cord& cord) { cur_ = impl_.WriteCord(cord, cur_); }
1129
1130 // Like WriteCord() but writing directly to the target array.
1131 static uint8_t* WriteCordToArray(const absl::Cord& cord, uint8_t* target);
1132
1133
1134 // Write a 16-bit little-endian integer.
WriteLittleEndian16(uint16_t value)1135 void WriteLittleEndian16(uint16_t value) {
1136 cur_ = impl_.EnsureSpace(cur_);
1137 SetCur(WriteLittleEndian16ToArray(value, Cur()));
1138 }
1139 // Like WriteLittleEndian16() but writing directly to the target array.
1140 static uint8_t* WriteLittleEndian16ToArray(uint16_t value, uint8_t* target);
1141 // Write a 32-bit little-endian integer.
WriteLittleEndian32(uint32_t value)1142 void WriteLittleEndian32(uint32_t value) {
1143 cur_ = impl_.EnsureSpace(cur_);
1144 SetCur(WriteLittleEndian32ToArray(value, Cur()));
1145 }
1146 // Like WriteLittleEndian32() but writing directly to the target array.
1147 static uint8_t* WriteLittleEndian32ToArray(uint32_t value, uint8_t* target);
1148 // Write a 64-bit little-endian integer.
WriteLittleEndian64(uint64_t value)1149 void WriteLittleEndian64(uint64_t value) {
1150 cur_ = impl_.EnsureSpace(cur_);
1151 SetCur(WriteLittleEndian64ToArray(value, Cur()));
1152 }
1153 // Like WriteLittleEndian64() but writing directly to the target array.
1154 static uint8_t* WriteLittleEndian64ToArray(uint64_t value, uint8_t* target);
1155
1156 // Write an unsigned integer with Varint encoding. Writing a 32-bit value
1157 // is equivalent to casting it to uint64_t and writing it as a 64-bit value,
1158 // but may be more efficient.
1159 void WriteVarint32(uint32_t value);
1160 // Like WriteVarint32() but writing directly to the target array.
1161 static uint8_t* WriteVarint32ToArray(uint32_t value, uint8_t* target);
1162 // Like WriteVarint32ToArray()
1163 [[deprecated("Please use WriteVarint32ToArray() instead")]] static uint8_t*
WriteVarint32ToArrayOutOfLine(uint32_t value,uint8_t * target)1164 WriteVarint32ToArrayOutOfLine(uint32_t value, uint8_t* target) {
1165 return WriteVarint32ToArray(value, target);
1166 }
1167 // Write an unsigned integer with Varint encoding.
1168 void WriteVarint64(uint64_t value);
1169 // Like WriteVarint64() but writing directly to the target array.
1170 static uint8_t* WriteVarint64ToArray(uint64_t value, uint8_t* target);
1171
1172 // Equivalent to WriteVarint32() except when the value is negative,
1173 // in which case it must be sign-extended to a full 10 bytes.
1174 void WriteVarint32SignExtended(int32_t value);
1175 // Like WriteVarint32SignExtended() but writing directly to the target array.
1176 static uint8_t* WriteVarint32SignExtendedToArray(int32_t value,
1177 uint8_t* target);
1178
1179 // This is identical to WriteVarint32(), but optimized for writing tags.
1180 // In particular, if the input is a compile-time constant, this method
1181 // compiles down to a couple instructions.
1182 // Always inline because otherwise the aforementioned optimization can't work,
1183 // but GCC by default doesn't want to inline this.
1184 void WriteTag(uint32_t value);
1185 // Like WriteTag() but writing directly to the target array.
1186 PROTOBUF_ALWAYS_INLINE
1187 static uint8_t* WriteTagToArray(uint32_t value, uint8_t* target);
1188
1189 // Returns the number of bytes needed to encode the given value as a varint.
1190 static size_t VarintSize32(uint32_t value);
1191 // Returns the number of bytes needed to encode the given value as a varint.
1192 static size_t VarintSize64(uint64_t value);
1193
1194 // If negative, 10 bytes. Otherwise, same as VarintSize32().
1195 static size_t VarintSize32SignExtended(int32_t value);
1196
1197 // Same as above, plus one. The additional one comes at no compute cost.
1198 static size_t VarintSize32PlusOne(uint32_t value);
1199 static size_t VarintSize64PlusOne(uint64_t value);
1200 static size_t VarintSize32SignExtendedPlusOne(int32_t value);
1201
1202 // Compile-time equivalent of VarintSize32().
1203 template <uint32_t Value>
1204 struct StaticVarintSize32 {
1205 static const size_t value = (Value < (1 << 7)) ? 1
1206 : (Value < (1 << 14)) ? 2
1207 : (Value < (1 << 21)) ? 3
1208 : (Value < (1 << 28)) ? 4
1209 : 5;
1210 };
1211
1212 // Returns the total number of bytes written since this object was created.
ByteCount()1213 int ByteCount() const {
1214 return static_cast<int>(impl_.ByteCount(cur_) - start_count_);
1215 }
1216
1217 // Instructs the CodedOutputStream to allow the underlying
1218 // ZeroCopyOutputStream to hold pointers to the original structure instead of
1219 // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
1220 // underlying stream does not support aliasing, then enabling it has no
1221 // affect. For now, this only affects the behavior of
1222 // WriteRawMaybeAliased().
1223 //
1224 // NOTE: It is caller's responsibility to ensure that the chunk of memory
1225 // remains live until all of the data has been consumed from the stream.
EnableAliasing(bool enabled)1226 void EnableAliasing(bool enabled) { impl_.EnableAliasing(enabled); }
1227
1228 // Indicate to the serializer whether the user wants deterministic
1229 // serialization. The default when this is not called comes from the global
1230 // default, controlled by SetDefaultSerializationDeterministic.
1231 //
1232 // What deterministic serialization means is entirely up to the driver of the
1233 // serialization process (i.e. the caller of methods like WriteVarint32). In
1234 // the case of serializing a proto buffer message using one of the methods of
1235 // MessageLite, this means that for a given binary equal messages will always
1236 // be serialized to the same bytes. This implies:
1237 //
1238 // * Repeated serialization of a message will return the same bytes.
1239 //
1240 // * Different processes running the same binary (including on different
1241 // machines) will serialize equal messages to the same bytes.
1242 //
1243 // Note that this is *not* canonical across languages. It is also unstable
1244 // across different builds with intervening message definition changes, due to
1245 // unknown fields. Users who need canonical serialization (e.g. persistent
1246 // storage in a canonical form, fingerprinting) should define their own
1247 // canonicalization specification and implement the serializer using
1248 // reflection APIs rather than relying on this API.
SetSerializationDeterministic(bool value)1249 void SetSerializationDeterministic(bool value) {
1250 impl_.SetSerializationDeterministic(value);
1251 }
1252
1253 // Return whether the user wants deterministic serialization. See above.
IsSerializationDeterministic()1254 bool IsSerializationDeterministic() const {
1255 return impl_.IsSerializationDeterministic();
1256 }
1257
IsDefaultSerializationDeterministic()1258 static bool IsDefaultSerializationDeterministic() {
1259 return default_serialization_deterministic_.load(
1260 std::memory_order_relaxed) != 0;
1261 }
1262
1263 template <typename Func>
1264 void Serialize(const Func& func);
1265
Cur()1266 uint8_t* Cur() const { return cur_; }
SetCur(uint8_t * ptr)1267 void SetCur(uint8_t* ptr) { cur_ = ptr; }
EpsCopy()1268 EpsCopyOutputStream* EpsCopy() { return &impl_; }
1269
1270 private:
1271 template <class Stream>
1272 void InitEagerly(Stream* stream);
1273
1274 EpsCopyOutputStream impl_;
1275 uint8_t* cur_;
1276 int64_t start_count_;
1277 static std::atomic<bool> default_serialization_deterministic_;
1278
1279 // See above. Other projects may use "friend" to allow them to call this.
1280 // After SetDefaultSerializationDeterministic() completes, all protocol
1281 // buffer serializations will be deterministic by default. Thread safe.
1282 // However, the meaning of "after" is subtle here: to be safe, each thread
1283 // that wants deterministic serialization by default needs to call
1284 // SetDefaultSerializationDeterministic() or ensure on its own that another
1285 // thread has done so.
1286 friend void google::protobuf::internal::MapTestForceDeterministic();
SetDefaultSerializationDeterministic()1287 static void SetDefaultSerializationDeterministic() {
1288 default_serialization_deterministic_.store(true, std::memory_order_relaxed);
1289 }
1290 };
1291
1292 // inline methods ====================================================
1293 // The vast majority of varints are only one byte. These inline
1294 // methods optimize for that case.
1295
ReadVarint32(uint32_t * value)1296 inline bool CodedInputStream::ReadVarint32(uint32_t* value) {
1297 uint32_t v = 0;
1298 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1299 v = *buffer_;
1300 if (v < 0x80) {
1301 *value = v;
1302 Advance(1);
1303 return true;
1304 }
1305 }
1306 int64_t result = ReadVarint32Fallback(v);
1307 *value = static_cast<uint32_t>(result);
1308 return result >= 0;
1309 }
1310
ReadVarint64(uint64_t * value)1311 inline bool CodedInputStream::ReadVarint64(uint64_t* value) {
1312 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
1313 *value = *buffer_;
1314 Advance(1);
1315 return true;
1316 }
1317 std::pair<uint64_t, bool> p = ReadVarint64Fallback();
1318 *value = p.first;
1319 return p.second;
1320 }
1321
ReadVarintSizeAsInt(int * value)1322 inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) {
1323 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1324 int v = *buffer_;
1325 if (v < 0x80) {
1326 *value = v;
1327 Advance(1);
1328 return true;
1329 }
1330 }
1331 *value = ReadVarintSizeAsIntFallback();
1332 return *value >= 0;
1333 }
1334
1335 // static
ReadLittleEndian16FromArray(const uint8_t * buffer,uint16_t * value)1336 inline const uint8_t* CodedInputStream::ReadLittleEndian16FromArray(
1337 const uint8_t* buffer, uint16_t* value) {
1338 memcpy(value, buffer, sizeof(*value));
1339 *value = google::protobuf::internal::little_endian::ToHost(*value);
1340 return buffer + sizeof(*value);
1341 }
1342 // static
ReadLittleEndian32FromArray(const uint8_t * buffer,uint32_t * value)1343 inline const uint8_t* CodedInputStream::ReadLittleEndian32FromArray(
1344 const uint8_t* buffer, uint32_t* value) {
1345 memcpy(value, buffer, sizeof(*value));
1346 *value = google::protobuf::internal::little_endian::ToHost(*value);
1347 return buffer + sizeof(*value);
1348 }
1349 // static
ReadLittleEndian64FromArray(const uint8_t * buffer,uint64_t * value)1350 inline const uint8_t* CodedInputStream::ReadLittleEndian64FromArray(
1351 const uint8_t* buffer, uint64_t* value) {
1352 memcpy(value, buffer, sizeof(*value));
1353 *value = google::protobuf::internal::little_endian::ToHost(*value);
1354 return buffer + sizeof(*value);
1355 }
1356
ReadLittleEndian16(uint16_t * value)1357 inline bool CodedInputStream::ReadLittleEndian16(uint16_t* value) {
1358 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
1359 buffer_ = ReadLittleEndian16FromArray(buffer_, value);
1360 return true;
1361 } else {
1362 return ReadLittleEndian16Fallback(value);
1363 }
1364 }
1365
ReadLittleEndian32(uint32_t * value)1366 inline bool CodedInputStream::ReadLittleEndian32(uint32_t* value) {
1367 #if defined(ABSL_IS_LITTLE_ENDIAN) && \
1368 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
1369 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
1370 buffer_ = ReadLittleEndian32FromArray(buffer_, value);
1371 return true;
1372 } else {
1373 return ReadLittleEndian32Fallback(value);
1374 }
1375 #else
1376 return ReadLittleEndian32Fallback(value);
1377 #endif
1378 }
1379
ReadLittleEndian64(uint64_t * value)1380 inline bool CodedInputStream::ReadLittleEndian64(uint64_t* value) {
1381 #if defined(ABSL_IS_LITTLE_ENDIAN) && \
1382 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
1383 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
1384 buffer_ = ReadLittleEndian64FromArray(buffer_, value);
1385 return true;
1386 } else {
1387 return ReadLittleEndian64Fallback(value);
1388 }
1389 #else
1390 return ReadLittleEndian64Fallback(value);
1391 #endif
1392 }
1393
ReadTagNoLastTag()1394 inline uint32_t CodedInputStream::ReadTagNoLastTag() {
1395 uint32_t v = 0;
1396 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1397 v = *buffer_;
1398 if (v < 0x80) {
1399 Advance(1);
1400 return v;
1401 }
1402 }
1403 v = ReadTagFallback(v);
1404 return v;
1405 }
1406
ReadTagWithCutoffNoLastTag(uint32_t cutoff)1407 inline std::pair<uint32_t, bool> CodedInputStream::ReadTagWithCutoffNoLastTag(
1408 uint32_t cutoff) {
1409 // In performance-sensitive code we can expect cutoff to be a compile-time
1410 // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
1411 // compile time.
1412 uint32_t first_byte_or_zero = 0;
1413 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) {
1414 // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
1415 // TODO: Is it worth rearranging this? E.g., if the number of fields
1416 // is large enough then is it better to check for the two-byte case first?
1417 first_byte_or_zero = buffer_[0];
1418 if (static_cast<int8_t>(buffer_[0]) > 0) {
1419 const uint32_t kMax1ByteVarint = 0x7f;
1420 uint32_t tag = buffer_[0];
1421 Advance(1);
1422 return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
1423 }
1424 // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
1425 // and tag is two bytes. The latter is tested by bitwise-and-not of the
1426 // first byte and the second byte.
1427 if (cutoff >= 0x80 && PROTOBUF_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
1428 PROTOBUF_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
1429 const uint32_t kMax2ByteVarint = (0x7f << 7) + 0x7f;
1430 uint32_t tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
1431 Advance(2);
1432 // It might make sense to test for tag == 0 now, but it is so rare that
1433 // that we don't bother. A varint-encoded 0 should be one byte unless
1434 // the encoder lost its mind. The second part of the return value of
1435 // this function is allowed to be either true or false if the tag is 0,
1436 // so we don't have to check for tag == 0. We may need to check whether
1437 // it exceeds cutoff.
1438 bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
1439 return std::make_pair(tag, at_or_below_cutoff);
1440 }
1441 }
1442 // Slow path
1443 const uint32_t tag = ReadTagFallback(first_byte_or_zero);
1444 return std::make_pair(tag, static_cast<uint32_t>(tag - 1) < cutoff);
1445 }
1446
LastTagWas(uint32_t expected)1447 inline bool CodedInputStream::LastTagWas(uint32_t expected) {
1448 return last_tag_ == expected;
1449 }
1450
ConsumedEntireMessage()1451 inline bool CodedInputStream::ConsumedEntireMessage() {
1452 return legitimate_message_end_;
1453 }
1454
ExpectTag(uint32_t expected)1455 inline bool CodedInputStream::ExpectTag(uint32_t expected) {
1456 if (expected < (1 << 7)) {
1457 if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) &&
1458 buffer_[0] == expected) {
1459 Advance(1);
1460 return true;
1461 } else {
1462 return false;
1463 }
1464 } else if (expected < (1 << 14)) {
1465 if (PROTOBUF_PREDICT_TRUE(BufferSize() >= 2) &&
1466 buffer_[0] == static_cast<uint8_t>(expected | 0x80) &&
1467 buffer_[1] == static_cast<uint8_t>(expected >> 7)) {
1468 Advance(2);
1469 return true;
1470 } else {
1471 return false;
1472 }
1473 } else {
1474 // Don't bother optimizing for larger values.
1475 return false;
1476 }
1477 }
1478
ExpectTagFromArray(const uint8_t * buffer,uint32_t expected)1479 inline const uint8_t* CodedInputStream::ExpectTagFromArray(
1480 const uint8_t* buffer, uint32_t expected) {
1481 if (expected < (1 << 7)) {
1482 if (buffer[0] == expected) {
1483 return buffer + 1;
1484 }
1485 } else if (expected < (1 << 14)) {
1486 if (buffer[0] == static_cast<uint8_t>(expected | 0x80) &&
1487 buffer[1] == static_cast<uint8_t>(expected >> 7)) {
1488 return buffer + 2;
1489 }
1490 }
1491 return nullptr;
1492 }
1493
GetDirectBufferPointerInline(const void ** data,int * size)1494 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1495 int* size) {
1496 *data = buffer_;
1497 *size = static_cast<int>(buffer_end_ - buffer_);
1498 }
1499
ExpectAtEnd()1500 inline bool CodedInputStream::ExpectAtEnd() {
1501 // If we are at a limit we know no more bytes can be read. Otherwise, it's
1502 // hard to say without calling Refresh(), and we'd rather not do that.
1503
1504 if (buffer_ == buffer_end_ && ((buffer_size_after_limit_ != 0) ||
1505 (total_bytes_read_ == current_limit_))) {
1506 last_tag_ = 0; // Pretend we called ReadTag()...
1507 legitimate_message_end_ = true; // ... and it hit EOF.
1508 return true;
1509 } else {
1510 return false;
1511 }
1512 }
1513
CurrentPosition()1514 inline int CodedInputStream::CurrentPosition() const {
1515 return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1516 }
1517
Advance(int amount)1518 inline void CodedInputStream::Advance(int amount) { buffer_ += amount; }
1519
SetRecursionLimit(int limit)1520 inline void CodedInputStream::SetRecursionLimit(int limit) {
1521 recursion_budget_ += limit - recursion_limit_;
1522 recursion_limit_ = limit;
1523 }
1524
IncrementRecursionDepth()1525 inline bool CodedInputStream::IncrementRecursionDepth() {
1526 --recursion_budget_;
1527 return recursion_budget_ >= 0;
1528 }
1529
DecrementRecursionDepth()1530 inline void CodedInputStream::DecrementRecursionDepth() {
1531 if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
1532 }
1533
UnsafeDecrementRecursionDepth()1534 inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
1535 assert(recursion_budget_ < recursion_limit_);
1536 ++recursion_budget_;
1537 }
1538
SetExtensionRegistry(const DescriptorPool * pool,MessageFactory * factory)1539 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1540 MessageFactory* factory) {
1541 extension_pool_ = pool;
1542 extension_factory_ = factory;
1543 }
1544
GetExtensionPool()1545 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1546 return extension_pool_;
1547 }
1548
GetExtensionFactory()1549 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1550 return extension_factory_;
1551 }
1552
BufferSize()1553 inline int CodedInputStream::BufferSize() const {
1554 return static_cast<int>(buffer_end_ - buffer_);
1555 }
1556
CodedInputStream(ZeroCopyInputStream * input)1557 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1558 : buffer_(nullptr),
1559 buffer_end_(nullptr),
1560 input_(input),
1561 total_bytes_read_(0),
1562 overflow_bytes_(0),
1563 last_tag_(0),
1564 legitimate_message_end_(false),
1565 aliasing_enabled_(false),
1566 force_eager_parsing_(false),
1567 current_limit_(std::numeric_limits<int32_t>::max()),
1568 buffer_size_after_limit_(0),
1569 total_bytes_limit_(kDefaultTotalBytesLimit),
1570 recursion_budget_(default_recursion_limit_),
1571 recursion_limit_(default_recursion_limit_),
1572 extension_pool_(nullptr),
1573 extension_factory_(nullptr) {
1574 // Eagerly Refresh() so buffer space is immediately available.
1575 Refresh();
1576 }
1577
CodedInputStream(const uint8_t * buffer,int size)1578 inline CodedInputStream::CodedInputStream(const uint8_t* buffer, int size)
1579 : buffer_(buffer),
1580 buffer_end_(buffer + size),
1581 input_(nullptr),
1582 total_bytes_read_(size),
1583 overflow_bytes_(0),
1584 last_tag_(0),
1585 legitimate_message_end_(false),
1586 aliasing_enabled_(false),
1587 force_eager_parsing_(false),
1588 current_limit_(size),
1589 buffer_size_after_limit_(0),
1590 total_bytes_limit_(kDefaultTotalBytesLimit),
1591 recursion_budget_(default_recursion_limit_),
1592 recursion_limit_(default_recursion_limit_),
1593 extension_pool_(nullptr),
1594 extension_factory_(nullptr) {
1595 // Note that setting current_limit_ == size is important to prevent some
1596 // code paths from trying to access input_ and segfaulting.
1597 }
1598
IsFlat()1599 inline bool CodedInputStream::IsFlat() const { return input_ == nullptr; }
1600
Skip(int count)1601 inline bool CodedInputStream::Skip(int count) {
1602 if (count < 0) return false; // security: count is often user-supplied
1603
1604 const int original_buffer_size = BufferSize();
1605
1606 if (count <= original_buffer_size) {
1607 // Just skipping within the current buffer. Easy.
1608 Advance(count);
1609 return true;
1610 }
1611
1612 return SkipFallback(count, original_buffer_size);
1613 }
1614
1615 template <class Stream, class>
CodedOutputStream(Stream * stream)1616 inline CodedOutputStream::CodedOutputStream(Stream* stream)
1617 : impl_(stream, IsDefaultSerializationDeterministic(), &cur_),
1618 start_count_(stream->ByteCount()) {
1619 InitEagerly(stream);
1620 }
1621
1622 template <class Stream, class>
CodedOutputStream(Stream * stream,bool eager_init)1623 inline CodedOutputStream::CodedOutputStream(Stream* stream, bool eager_init)
1624 : impl_(stream, IsDefaultSerializationDeterministic(), &cur_),
1625 start_count_(stream->ByteCount()) {
1626 if (eager_init) {
1627 InitEagerly(stream);
1628 }
1629 }
1630
1631 template <class Stream>
InitEagerly(Stream * stream)1632 inline void CodedOutputStream::InitEagerly(Stream* stream) {
1633 void* data;
1634 int size;
1635 if (PROTOBUF_PREDICT_TRUE(stream->Next(&data, &size) && size > 0)) {
1636 cur_ = impl_.SetInitialBuffer(data, size);
1637 }
1638 }
1639
WriteVarint32ToArray(uint32_t value,uint8_t * target)1640 inline uint8_t* CodedOutputStream::WriteVarint32ToArray(uint32_t value,
1641 uint8_t* target) {
1642 return EpsCopyOutputStream::UnsafeVarint(value, target);
1643 }
1644
WriteVarint64ToArray(uint64_t value,uint8_t * target)1645 inline uint8_t* CodedOutputStream::WriteVarint64ToArray(uint64_t value,
1646 uint8_t* target) {
1647 return EpsCopyOutputStream::UnsafeVarint(value, target);
1648 }
1649
WriteVarint32SignExtended(int32_t value)1650 inline void CodedOutputStream::WriteVarint32SignExtended(int32_t value) {
1651 WriteVarint64(static_cast<uint64_t>(value));
1652 }
1653
WriteVarint32SignExtendedToArray(int32_t value,uint8_t * target)1654 inline uint8_t* CodedOutputStream::WriteVarint32SignExtendedToArray(
1655 int32_t value, uint8_t* target) {
1656 return WriteVarint64ToArray(static_cast<uint64_t>(value), target);
1657 }
1658
WriteLittleEndian16ToArray(uint16_t value,uint8_t * target)1659 inline uint8_t* CodedOutputStream::WriteLittleEndian16ToArray(uint16_t value,
1660 uint8_t* target) {
1661 uint16_t little_endian_value = google::protobuf::internal::little_endian::ToHost(value);
1662 memcpy(target, &little_endian_value, sizeof(value));
1663 return target + sizeof(value);
1664 }
1665
WriteLittleEndian32ToArray(uint32_t value,uint8_t * target)1666 inline uint8_t* CodedOutputStream::WriteLittleEndian32ToArray(uint32_t value,
1667 uint8_t* target) {
1668 #if defined(ABSL_IS_LITTLE_ENDIAN) && \
1669 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
1670 memcpy(target, &value, sizeof(value));
1671 #else
1672 target[0] = static_cast<uint8_t>(value);
1673 target[1] = static_cast<uint8_t>(value >> 8);
1674 target[2] = static_cast<uint8_t>(value >> 16);
1675 target[3] = static_cast<uint8_t>(value >> 24);
1676 #endif
1677 return target + sizeof(value);
1678 }
1679
WriteLittleEndian64ToArray(uint64_t value,uint8_t * target)1680 inline uint8_t* CodedOutputStream::WriteLittleEndian64ToArray(uint64_t value,
1681 uint8_t* target) {
1682 #if defined(ABSL_IS_LITTLE_ENDIAN) && \
1683 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
1684 memcpy(target, &value, sizeof(value));
1685 #else
1686 uint32_t part0 = static_cast<uint32_t>(value);
1687 uint32_t part1 = static_cast<uint32_t>(value >> 32);
1688
1689 target[0] = static_cast<uint8_t>(part0);
1690 target[1] = static_cast<uint8_t>(part0 >> 8);
1691 target[2] = static_cast<uint8_t>(part0 >> 16);
1692 target[3] = static_cast<uint8_t>(part0 >> 24);
1693 target[4] = static_cast<uint8_t>(part1);
1694 target[5] = static_cast<uint8_t>(part1 >> 8);
1695 target[6] = static_cast<uint8_t>(part1 >> 16);
1696 target[7] = static_cast<uint8_t>(part1 >> 24);
1697 #endif
1698 return target + sizeof(value);
1699 }
1700
WriteVarint32(uint32_t value)1701 inline void CodedOutputStream::WriteVarint32(uint32_t value) {
1702 cur_ = impl_.EnsureSpace(cur_);
1703 SetCur(WriteVarint32ToArray(value, Cur()));
1704 }
1705
WriteVarint64(uint64_t value)1706 inline void CodedOutputStream::WriteVarint64(uint64_t value) {
1707 cur_ = impl_.EnsureSpace(cur_);
1708 SetCur(WriteVarint64ToArray(value, Cur()));
1709 }
1710
WriteTag(uint32_t value)1711 inline void CodedOutputStream::WriteTag(uint32_t value) {
1712 WriteVarint32(value);
1713 }
1714
WriteTagToArray(uint32_t value,uint8_t * target)1715 inline uint8_t* CodedOutputStream::WriteTagToArray(uint32_t value,
1716 uint8_t* target) {
1717 return WriteVarint32ToArray(value, target);
1718 }
1719
1720 #if (defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || \
1721 defined(_M_X64)) && \
1722 !(defined(__LZCNT__) || defined(__AVX2__))
1723 // X86 CPUs lacking the lzcnt instruction are faster with the bsr-based
1724 // implementation. MSVC does not define __LZCNT__, the nearest option that
1725 // it interprets as lzcnt availability is __AVX2__.
1726 #define PROTOBUF_CODED_STREAM_H_PREFER_BSR 1
1727 #else
1728 #define PROTOBUF_CODED_STREAM_H_PREFER_BSR 0
1729 #endif
VarintSize32(uint32_t value)1730 inline size_t CodedOutputStream::VarintSize32(uint32_t value) {
1731 #if PROTOBUF_CODED_STREAM_H_PREFER_BSR
1732 // Explicit OR 0x1 to avoid calling absl::countl_zero(0), which
1733 // requires a branch to check for on platforms without a clz instruction.
1734 uint32_t log2value = (std::numeric_limits<uint32_t>::digits - 1) -
1735 absl::countl_zero(value | 0x1);
1736 return static_cast<size_t>((log2value * 9 + (64 + 9)) / 64);
1737 #else
1738 uint32_t clz = absl::countl_zero(value);
1739 return static_cast<size_t>(
1740 ((std::numeric_limits<uint32_t>::digits * 9 + 64) - (clz * 9)) / 64);
1741 #endif
1742 }
1743
VarintSize32PlusOne(uint32_t value)1744 inline size_t CodedOutputStream::VarintSize32PlusOne(uint32_t value) {
1745 // Same as above, but one more.
1746 #if PROTOBUF_CODED_STREAM_H_PREFER_BSR
1747 uint32_t log2value = (std::numeric_limits<uint32_t>::digits - 1) -
1748 absl::countl_zero(value | 0x1);
1749 return static_cast<size_t>((log2value * 9 + (64 + 9) + 64) / 64);
1750 #else
1751 uint32_t clz = absl::countl_zero(value);
1752 return static_cast<size_t>(
1753 ((std::numeric_limits<uint32_t>::digits * 9 + 64 + 64) - (clz * 9)) / 64);
1754 #endif
1755 }
1756
VarintSize64(uint64_t value)1757 inline size_t CodedOutputStream::VarintSize64(uint64_t value) {
1758 #if PROTOBUF_CODED_STREAM_H_PREFER_BSR
1759 // Explicit OR 0x1 to avoid calling absl::countl_zero(0), which
1760 // requires a branch to check for on platforms without a clz instruction.
1761 uint32_t log2value = (std::numeric_limits<uint64_t>::digits - 1) -
1762 absl::countl_zero(value | 0x1);
1763 return static_cast<size_t>((log2value * 9 + (64 + 9)) / 64);
1764 #else
1765 uint32_t clz = absl::countl_zero(value);
1766 return static_cast<size_t>(
1767 ((std::numeric_limits<uint64_t>::digits * 9 + 64) - (clz * 9)) / 64);
1768 #endif
1769 }
1770
VarintSize64PlusOne(uint64_t value)1771 inline size_t CodedOutputStream::VarintSize64PlusOne(uint64_t value) {
1772 // Same as above, but one more.
1773 #if PROTOBUF_CODED_STREAM_H_PREFER_BSR
1774 uint32_t log2value = (std::numeric_limits<uint64_t>::digits - 1) -
1775 absl::countl_zero(value | 0x1);
1776 return static_cast<size_t>((log2value * 9 + (64 + 9) + 64) / 64);
1777 #else
1778 uint32_t clz = absl::countl_zero(value);
1779 return static_cast<size_t>(
1780 ((std::numeric_limits<uint64_t>::digits * 9 + 64 + 64) - (clz * 9)) / 64);
1781 #endif
1782 }
1783
VarintSize32SignExtended(int32_t value)1784 inline size_t CodedOutputStream::VarintSize32SignExtended(int32_t value) {
1785 return VarintSize64(static_cast<uint64_t>(int64_t{value}));
1786 }
1787
VarintSize32SignExtendedPlusOne(int32_t value)1788 inline size_t CodedOutputStream::VarintSize32SignExtendedPlusOne(
1789 int32_t value) {
1790 return VarintSize64PlusOne(static_cast<uint64_t>(int64_t{value}));
1791 }
1792 #undef PROTOBUF_CODED_STREAM_H_PREFER_BSR
1793
WriteString(const std::string & str)1794 inline void CodedOutputStream::WriteString(const std::string& str) {
1795 WriteRaw(str.data(), static_cast<int>(str.size()));
1796 }
1797
WriteRawMaybeAliased(const void * data,int size)1798 inline void CodedOutputStream::WriteRawMaybeAliased(const void* data,
1799 int size) {
1800 cur_ = impl_.WriteRawMaybeAliased(data, size, cur_);
1801 }
1802
WriteRawToArray(const void * data,int size,uint8_t * target)1803 inline uint8_t* CodedOutputStream::WriteRawToArray(const void* data, int size,
1804 uint8_t* target) {
1805 memcpy(target, data, static_cast<unsigned int>(size));
1806 return target + size;
1807 }
1808
WriteStringToArray(const std::string & str,uint8_t * target)1809 inline uint8_t* CodedOutputStream::WriteStringToArray(const std::string& str,
1810 uint8_t* target) {
1811 return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1812 }
1813
1814 } // namespace io
1815 } // namespace protobuf
1816 } // namespace google
1817
1818 #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
1819 #pragma runtime_checks("c", restore)
1820 #endif // _MSC_VER && !defined(__INTEL_COMPILER)
1821
1822 #include "google/protobuf/port_undef.inc"
1823
1824 #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1825