1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2008 Google Inc. All rights reserved. 4 // 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file or at 7 // https://developers.google.com/open-source/licenses/bsd 8 #endregion 9 10 using Google.Protobuf.Collections; 11 using System; 12 using System.IO; 13 using System.Security; 14 15 namespace Google.Protobuf 16 { 17 /// <summary> 18 /// Reads and decodes protocol message fields. 19 /// </summary> 20 /// <remarks> 21 /// <para> 22 /// This class is generally used by generated code to read appropriate 23 /// primitives from the stream. It effectively encapsulates the lowest 24 /// levels of protocol buffer format. 25 /// </para> 26 /// <para> 27 /// Repeated fields and map fields are not handled by this class; use <see cref="RepeatedField{T}"/> 28 /// and <see cref="MapField{TKey, TValue}"/> to serialize such fields. 29 /// </para> 30 /// </remarks> 31 [SecuritySafeCritical] 32 public sealed class CodedInputStream : IDisposable 33 { 34 /// <summary> 35 /// Whether to leave the underlying stream open when disposing of this stream. 36 /// This is always true when there's no stream. 37 /// </summary> 38 private readonly bool leaveOpen; 39 40 /// <summary> 41 /// Buffer of data read from the stream or provided at construction time. 42 /// </summary> 43 private readonly byte[] buffer; 44 45 /// <summary> 46 /// The stream to read further input from, or null if the byte array buffer was provided 47 /// directly on construction, with no further data available. 48 /// </summary> 49 private readonly Stream input; 50 51 /// <summary> 52 /// The parser state is kept separately so that other parse implementations can reuse the same 53 /// parsing primitives. 54 /// </summary> 55 private ParserInternalState state; 56 57 internal const int DefaultRecursionLimit = 100; 58 internal const int DefaultSizeLimit = Int32.MaxValue; 59 internal const int BufferSize = 4096; 60 61 #region Construction 62 // Note that the checks are performed such that we don't end up checking obviously-valid things 63 // like non-null references for arrays we've just created. 64 65 /// <summary> 66 /// Creates a new CodedInputStream reading data from the given byte array. 67 /// </summary> CodedInputStream(byte[] buffer)68 public CodedInputStream(byte[] buffer) : this(null, ProtoPreconditions.CheckNotNull(buffer, "buffer"), 0, buffer.Length, true) 69 { 70 } 71 72 /// <summary> 73 /// Creates a new <see cref="CodedInputStream"/> that reads from the given byte array slice. 74 /// </summary> CodedInputStream(byte[] buffer, int offset, int length)75 public CodedInputStream(byte[] buffer, int offset, int length) 76 : this(null, ProtoPreconditions.CheckNotNull(buffer, "buffer"), offset, offset + length, true) 77 { 78 if (offset < 0 || offset > buffer.Length) 79 { 80 throw new ArgumentOutOfRangeException("offset", "Offset must be within the buffer"); 81 } 82 if (length < 0 || offset + length > buffer.Length) 83 { 84 throw new ArgumentOutOfRangeException("length", "Length must be non-negative and within the buffer"); 85 } 86 } 87 88 /// <summary> 89 /// Creates a new <see cref="CodedInputStream"/> reading data from the given stream, which will be disposed 90 /// when the returned object is disposed. 91 /// </summary> 92 /// <param name="input">The stream to read from.</param> CodedInputStream(Stream input)93 public CodedInputStream(Stream input) : this(input, false) 94 { 95 } 96 97 /// <summary> 98 /// Creates a new <see cref="CodedInputStream"/> reading data from the given stream. 99 /// </summary> 100 /// <param name="input">The stream to read from.</param> 101 /// <param name="leaveOpen"><c>true</c> to leave <paramref name="input"/> open when the returned 102 /// <c cref="CodedInputStream"/> is disposed; <c>false</c> to dispose of the given stream when the 103 /// returned object is disposed.</param> CodedInputStream(Stream input, bool leaveOpen)104 public CodedInputStream(Stream input, bool leaveOpen) 105 : this(ProtoPreconditions.CheckNotNull(input, "input"), new byte[BufferSize], 0, 0, leaveOpen) 106 { 107 } 108 109 /// <summary> 110 /// Creates a new CodedInputStream reading data from the given 111 /// stream and buffer, using the default limits. 112 /// </summary> CodedInputStream(Stream input, byte[] buffer, int bufferPos, int bufferSize, bool leaveOpen)113 internal CodedInputStream(Stream input, byte[] buffer, int bufferPos, int bufferSize, bool leaveOpen) 114 { 115 this.input = input; 116 this.buffer = buffer; 117 this.state.bufferPos = bufferPos; 118 this.state.bufferSize = bufferSize; 119 this.state.sizeLimit = DefaultSizeLimit; 120 this.state.recursionLimit = DefaultRecursionLimit; 121 SegmentedBufferHelper.Initialize(this, out this.state.segmentedBufferHelper); 122 this.leaveOpen = leaveOpen; 123 124 this.state.currentLimit = int.MaxValue; 125 } 126 127 /// <summary> 128 /// Creates a new CodedInputStream reading data from the given 129 /// stream and buffer, using the specified limits. 130 /// </summary> 131 /// <remarks> 132 /// This chains to the version with the default limits instead of vice versa to avoid 133 /// having to check that the default values are valid every time. 134 /// </remarks> CodedInputStream(Stream input, byte[] buffer, int bufferPos, int bufferSize, int sizeLimit, int recursionLimit, bool leaveOpen)135 internal CodedInputStream(Stream input, byte[] buffer, int bufferPos, int bufferSize, int sizeLimit, int recursionLimit, bool leaveOpen) 136 : this(input, buffer, bufferPos, bufferSize, leaveOpen) 137 { 138 if (sizeLimit <= 0) 139 { 140 throw new ArgumentOutOfRangeException("sizeLimit", "Size limit must be positive"); 141 } 142 if (recursionLimit <= 0) 143 { 144 throw new ArgumentOutOfRangeException("recursionLimit!", "Recursion limit must be positive"); 145 } 146 this.state.sizeLimit = sizeLimit; 147 this.state.recursionLimit = recursionLimit; 148 } 149 #endregion 150 151 /// <summary> 152 /// Creates a <see cref="CodedInputStream"/> with the specified size and recursion limits, reading 153 /// from an input stream. 154 /// </summary> 155 /// <remarks> 156 /// This method exists separately from the constructor to reduce the number of constructor overloads. 157 /// It is likely to be used considerably less frequently than the constructors, as the default limits 158 /// are suitable for most use cases. 159 /// </remarks> 160 /// <param name="input">The input stream to read from</param> 161 /// <param name="sizeLimit">The total limit of data to read from the stream.</param> 162 /// <param name="recursionLimit">The maximum recursion depth to allow while reading.</param> 163 /// <returns>A <c>CodedInputStream</c> reading from <paramref name="input"/> with the specified size 164 /// and recursion limits.</returns> CreateWithLimits(Stream input, int sizeLimit, int recursionLimit)165 public static CodedInputStream CreateWithLimits(Stream input, int sizeLimit, int recursionLimit) 166 { 167 // Note: we may want an overload accepting leaveOpen 168 return new CodedInputStream(input, new byte[BufferSize], 0, 0, sizeLimit, recursionLimit, false); 169 } 170 171 /// <summary> 172 /// Returns the current position in the input stream, or the position in the input buffer 173 /// </summary> 174 public long Position 175 { 176 get 177 { 178 if (input != null) 179 { 180 return input.Position - ((state.bufferSize + state.bufferSizeAfterLimit) - state.bufferPos); 181 } 182 return state.bufferPos; 183 } 184 } 185 186 /// <summary> 187 /// Returns the last tag read, or 0 if no tags have been read or we've read beyond 188 /// the end of the stream. 189 /// </summary> 190 internal uint LastTag { get { return state.lastTag; } } 191 192 /// <summary> 193 /// Returns the size limit for this stream. 194 /// </summary> 195 /// <remarks> 196 /// This limit is applied when reading from the underlying stream, as a sanity check. It is 197 /// not applied when reading from a byte array data source without an underlying stream. 198 /// The default value is Int32.MaxValue. 199 /// </remarks> 200 /// <value> 201 /// The size limit. 202 /// </value> 203 public int SizeLimit { get { return state.sizeLimit; } } 204 205 /// <summary> 206 /// Returns the recursion limit for this stream. This limit is applied whilst reading messages, 207 /// to avoid maliciously-recursive data. 208 /// </summary> 209 /// <remarks> 210 /// The default limit is 100. 211 /// </remarks> 212 /// <value> 213 /// The recursion limit for this stream. 214 /// </value> 215 public int RecursionLimit { get { return state.recursionLimit; } } 216 217 /// <summary> 218 /// Internal-only property; when set to true, unknown fields will be discarded while parsing. 219 /// </summary> 220 internal bool DiscardUnknownFields 221 { 222 get { return state.DiscardUnknownFields; } 223 set { state.DiscardUnknownFields = value; } 224 } 225 226 /// <summary> 227 /// Internal-only property; provides extension identifiers to compatible messages while parsing. 228 /// </summary> 229 internal ExtensionRegistry ExtensionRegistry 230 { 231 get { return state.ExtensionRegistry; } 232 set { state.ExtensionRegistry = value; } 233 } 234 235 internal byte[] InternalBuffer => buffer; 236 237 internal Stream InternalInputStream => input; 238 239 internal ref ParserInternalState InternalState => ref state; 240 241 /// <summary> 242 /// Disposes of this instance, potentially closing any underlying stream. 243 /// </summary> 244 /// <remarks> 245 /// As there is no flushing to perform here, disposing of a <see cref="CodedInputStream"/> which 246 /// was constructed with the <c>leaveOpen</c> option parameter set to <c>true</c> (or one which 247 /// was constructed to read from a byte array) has no effect. 248 /// </remarks> Dispose()249 public void Dispose() 250 { 251 if (!leaveOpen) 252 { 253 input.Dispose(); 254 } 255 } 256 257 #region Validation 258 /// <summary> 259 /// Verifies that the last call to ReadTag() returned tag 0 - in other words, 260 /// we've reached the end of the stream when we expected to. 261 /// </summary> 262 /// <exception cref="InvalidProtocolBufferException">The 263 /// tag read was not the one specified</exception> CheckReadEndOfStreamTag()264 internal void CheckReadEndOfStreamTag() 265 { 266 ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref state); 267 } 268 #endregion 269 270 #region Reading of tags etc 271 272 /// <summary> 273 /// Peeks at the next field tag. This is like calling <see cref="ReadTag"/>, but the 274 /// tag is not consumed. (So a subsequent call to <see cref="ReadTag"/> will return the 275 /// same value.) 276 /// </summary> PeekTag()277 public uint PeekTag() 278 { 279 var span = new ReadOnlySpan<byte>(buffer); 280 return ParsingPrimitives.PeekTag(ref span, ref state); 281 } 282 283 /// <summary> 284 /// Reads a field tag, returning the tag of 0 for "end of stream". 285 /// </summary> 286 /// <remarks> 287 /// If this method returns 0, it doesn't necessarily mean the end of all 288 /// the data in this CodedInputStream; it may be the end of the logical stream 289 /// for an embedded message, for example. 290 /// </remarks> 291 /// <returns>The next field tag, or 0 for end of stream. (0 is never a valid tag.)</returns> ReadTag()292 public uint ReadTag() 293 { 294 var span = new ReadOnlySpan<byte>(buffer); 295 return ParsingPrimitives.ParseTag(ref span, ref state); 296 } 297 298 /// <summary> 299 /// Skips the data for the field with the tag we've just read. 300 /// This should be called directly after <see cref="ReadTag"/>, when 301 /// the caller wishes to skip an unknown field. 302 /// </summary> 303 /// <remarks> 304 /// This method throws <see cref="InvalidProtocolBufferException"/> if the last-read tag was an end-group tag. 305 /// If a caller wishes to skip a group, they should skip the whole group, by calling this method after reading the 306 /// start-group tag. This behavior allows callers to call this method on any field they don't understand, correctly 307 /// resulting in an error if an end-group tag has not been paired with an earlier start-group tag. 308 /// </remarks> 309 /// <exception cref="InvalidProtocolBufferException">The last tag was an end-group tag</exception> 310 /// <exception cref="InvalidOperationException">The last read operation read to the end of the logical stream</exception> SkipLastField()311 public void SkipLastField() 312 { 313 var span = new ReadOnlySpan<byte>(buffer); 314 ParsingPrimitivesMessages.SkipLastField(ref span, ref state); 315 } 316 317 /// <summary> 318 /// Skip a group. 319 /// </summary> SkipGroup(uint startGroupTag)320 internal void SkipGroup(uint startGroupTag) 321 { 322 var span = new ReadOnlySpan<byte>(buffer); 323 ParsingPrimitivesMessages.SkipGroup(ref span, ref state, startGroupTag); 324 } 325 326 /// <summary> 327 /// Reads a double field from the stream. 328 /// </summary> ReadDouble()329 public double ReadDouble() 330 { 331 var span = new ReadOnlySpan<byte>(buffer); 332 return ParsingPrimitives.ParseDouble(ref span, ref state); 333 } 334 335 /// <summary> 336 /// Reads a float field from the stream. 337 /// </summary> ReadFloat()338 public float ReadFloat() 339 { 340 var span = new ReadOnlySpan<byte>(buffer); 341 return ParsingPrimitives.ParseFloat(ref span, ref state); 342 } 343 344 /// <summary> 345 /// Reads a uint64 field from the stream. 346 /// </summary> ReadUInt64()347 public ulong ReadUInt64() 348 { 349 return ReadRawVarint64(); 350 } 351 352 /// <summary> 353 /// Reads an int64 field from the stream. 354 /// </summary> ReadInt64()355 public long ReadInt64() 356 { 357 return (long) ReadRawVarint64(); 358 } 359 360 /// <summary> 361 /// Reads an int32 field from the stream. 362 /// </summary> ReadInt32()363 public int ReadInt32() 364 { 365 return (int) ReadRawVarint32(); 366 } 367 368 /// <summary> 369 /// Reads a fixed64 field from the stream. 370 /// </summary> ReadFixed64()371 public ulong ReadFixed64() 372 { 373 return ReadRawLittleEndian64(); 374 } 375 376 /// <summary> 377 /// Reads a fixed32 field from the stream. 378 /// </summary> ReadFixed32()379 public uint ReadFixed32() 380 { 381 return ReadRawLittleEndian32(); 382 } 383 384 /// <summary> 385 /// Reads a bool field from the stream. 386 /// </summary> ReadBool()387 public bool ReadBool() 388 { 389 return ReadRawVarint64() != 0; 390 } 391 392 /// <summary> 393 /// Reads a string field from the stream. 394 /// </summary> ReadString()395 public string ReadString() 396 { 397 var span = new ReadOnlySpan<byte>(buffer); 398 return ParsingPrimitives.ReadString(ref span, ref state); 399 } 400 401 /// <summary> 402 /// Reads an embedded message field value from the stream. 403 /// </summary> ReadMessage(IMessage builder)404 public void ReadMessage(IMessage builder) 405 { 406 // TODO: if the message doesn't implement IBufferMessage (and thus does not provide the InternalMergeFrom method), 407 // what we're doing here works fine, but could be more efficient. 408 // What happens is that we first initialize a ParseContext from the current coded input stream only to parse the length of the message, at which point 409 // we will need to switch back again to CodedInputStream-based parsing (which involves copying and storing the state) to be able to 410 // invoke the legacy MergeFrom(CodedInputStream) method. 411 // For now, this inefficiency is fine, considering this is only a backward-compatibility scenario (and regenerating the code fixes it). 412 ParseContext.Initialize(buffer.AsSpan(), ref state, out ParseContext ctx); 413 try 414 { 415 ParsingPrimitivesMessages.ReadMessage(ref ctx, builder); 416 } 417 finally 418 { 419 ctx.CopyStateTo(this); 420 } 421 } 422 423 /// <summary> 424 /// Reads an embedded group field from the stream. 425 /// </summary> ReadGroup(IMessage builder)426 public void ReadGroup(IMessage builder) 427 { 428 ParseContext.Initialize(this, out ParseContext ctx); 429 try 430 { 431 ParsingPrimitivesMessages.ReadGroup(ref ctx, builder); 432 } 433 finally 434 { 435 ctx.CopyStateTo(this); 436 } 437 } 438 439 /// <summary> 440 /// Reads a bytes field value from the stream. 441 /// </summary> ReadBytes()442 public ByteString ReadBytes() 443 { 444 var span = new ReadOnlySpan<byte>(buffer); 445 return ParsingPrimitives.ReadBytes(ref span, ref state); 446 } 447 448 /// <summary> 449 /// Reads a uint32 field value from the stream. 450 /// </summary> ReadUInt32()451 public uint ReadUInt32() 452 { 453 return ReadRawVarint32(); 454 } 455 456 /// <summary> 457 /// Reads an enum field value from the stream. 458 /// </summary> ReadEnum()459 public int ReadEnum() 460 { 461 // Currently just a pass-through, but it's nice to separate it logically from WriteInt32. 462 return (int) ReadRawVarint32(); 463 } 464 465 /// <summary> 466 /// Reads an sfixed32 field value from the stream. 467 /// </summary> ReadSFixed32()468 public int ReadSFixed32() 469 { 470 return (int) ReadRawLittleEndian32(); 471 } 472 473 /// <summary> 474 /// Reads an sfixed64 field value from the stream. 475 /// </summary> ReadSFixed64()476 public long ReadSFixed64() 477 { 478 return (long) ReadRawLittleEndian64(); 479 } 480 481 /// <summary> 482 /// Reads an sint32 field value from the stream. 483 /// </summary> ReadSInt32()484 public int ReadSInt32() 485 { 486 return ParsingPrimitives.DecodeZigZag32(ReadRawVarint32()); 487 } 488 489 /// <summary> 490 /// Reads an sint64 field value from the stream. 491 /// </summary> ReadSInt64()492 public long ReadSInt64() 493 { 494 return ParsingPrimitives.DecodeZigZag64(ReadRawVarint64()); 495 } 496 497 /// <summary> 498 /// Reads a length for length-delimited data. 499 /// </summary> 500 /// <remarks> 501 /// This is internally just reading a varint, but this method exists 502 /// to make the calling code clearer. 503 /// </remarks> ReadLength()504 public int ReadLength() 505 { 506 var span = new ReadOnlySpan<byte>(buffer); 507 return ParsingPrimitives.ParseLength(ref span, ref state); 508 } 509 510 /// <summary> 511 /// Peeks at the next tag in the stream. If it matches <paramref name="tag"/>, 512 /// the tag is consumed and the method returns <c>true</c>; otherwise, the 513 /// stream is left in the original position and the method returns <c>false</c>. 514 /// </summary> MaybeConsumeTag(uint tag)515 public bool MaybeConsumeTag(uint tag) 516 { 517 var span = new ReadOnlySpan<byte>(buffer); 518 return ParsingPrimitives.MaybeConsumeTag(ref span, ref state, tag); 519 } 520 521 #endregion 522 523 #region Underlying reading primitives 524 525 /// <summary> 526 /// Reads a raw Varint from the stream. If larger than 32 bits, discard the upper bits. 527 /// This method is optimised for the case where we've got lots of data in the buffer. 528 /// That means we can check the size just once, then just read directly from the buffer 529 /// without constant rechecking of the buffer length. 530 /// </summary> ReadRawVarint32()531 internal uint ReadRawVarint32() 532 { 533 var span = new ReadOnlySpan<byte>(buffer); 534 return ParsingPrimitives.ParseRawVarint32(ref span, ref state); 535 } 536 537 /// <summary> 538 /// Reads a varint from the input one byte at a time, so that it does not 539 /// read any bytes after the end of the varint. If you simply wrapped the 540 /// stream in a CodedInputStream and used ReadRawVarint32(Stream) 541 /// then you would probably end up reading past the end of the varint since 542 /// CodedInputStream buffers its input. 543 /// </summary> 544 /// <param name="input"></param> 545 /// <returns></returns> ReadRawVarint32(Stream input)546 internal static uint ReadRawVarint32(Stream input) 547 { 548 return ParsingPrimitives.ReadRawVarint32(input); 549 } 550 551 /// <summary> 552 /// Reads a raw varint from the stream. 553 /// </summary> ReadRawVarint64()554 internal ulong ReadRawVarint64() 555 { 556 var span = new ReadOnlySpan<byte>(buffer); 557 return ParsingPrimitives.ParseRawVarint64(ref span, ref state); 558 } 559 560 /// <summary> 561 /// Reads a 32-bit little-endian integer from the stream. 562 /// </summary> ReadRawLittleEndian32()563 internal uint ReadRawLittleEndian32() 564 { 565 var span = new ReadOnlySpan<byte>(buffer); 566 return ParsingPrimitives.ParseRawLittleEndian32(ref span, ref state); 567 } 568 569 /// <summary> 570 /// Reads a 64-bit little-endian integer from the stream. 571 /// </summary> ReadRawLittleEndian64()572 internal ulong ReadRawLittleEndian64() 573 { 574 var span = new ReadOnlySpan<byte>(buffer); 575 return ParsingPrimitives.ParseRawLittleEndian64(ref span, ref state); 576 } 577 #endregion 578 579 #region Internal reading and buffer management 580 581 /// <summary> 582 /// Sets currentLimit to (current position) + byteLimit. This is called 583 /// when descending into a length-delimited embedded message. The previous 584 /// limit is returned. 585 /// </summary> 586 /// <returns>The old limit.</returns> PushLimit(int byteLimit)587 internal int PushLimit(int byteLimit) 588 { 589 return SegmentedBufferHelper.PushLimit(ref state, byteLimit); 590 } 591 592 /// <summary> 593 /// Discards the current limit, returning the previous limit. 594 /// </summary> PopLimit(int oldLimit)595 internal void PopLimit(int oldLimit) 596 { 597 SegmentedBufferHelper.PopLimit(ref state, oldLimit); 598 } 599 600 /// <summary> 601 /// Returns whether or not all the data before the limit has been read. 602 /// </summary> 603 /// <returns></returns> 604 internal bool ReachedLimit 605 { 606 get 607 { 608 return SegmentedBufferHelper.IsReachedLimit(ref state); 609 } 610 } 611 612 /// <summary> 613 /// Returns true if the stream has reached the end of the input. This is the 614 /// case if either the end of the underlying input source has been reached or 615 /// the stream has reached a limit created using PushLimit. 616 /// </summary> 617 public bool IsAtEnd 618 { 619 get 620 { 621 var span = new ReadOnlySpan<byte>(buffer); 622 return SegmentedBufferHelper.IsAtEnd(ref span, ref state); 623 } 624 } 625 626 /// <summary> 627 /// Reads a fixed size of bytes from the input. 628 /// </summary> 629 /// <exception cref="InvalidProtocolBufferException"> 630 /// the end of the stream or the current limit was reached 631 /// </exception> ReadRawBytes(int size)632 internal byte[] ReadRawBytes(int size) 633 { 634 var span = new ReadOnlySpan<byte>(buffer); 635 return ParsingPrimitives.ReadRawBytes(ref span, ref state, size); 636 } 637 638 /// <summary> 639 /// Reads a top-level message or a nested message after the limits for this message have been pushed. 640 /// (parser will proceed until the end of the current limit) 641 /// NOTE: this method needs to be public because it's invoked by the generated code - e.g. msg.MergeFrom(CodedInputStream input) method 642 /// </summary> ReadRawMessage(IMessage message)643 public void ReadRawMessage(IMessage message) 644 { 645 ParseContext.Initialize(this, out ParseContext ctx); 646 try 647 { 648 ParsingPrimitivesMessages.ReadRawMessage(ref ctx, message); 649 } 650 finally 651 { 652 ctx.CopyStateTo(this); 653 } 654 } 655 #endregion 656 } 657 } 658