1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2008 Google Inc. All rights reserved. 4 // 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file or at 7 // https://developers.google.com/open-source/licenses/bsd 8 #endregion 9 10 using System; 11 using System.Collections.Generic; 12 using System.Globalization; 13 using System.IO; 14 using System.Text; 15 16 namespace Google.Protobuf 17 { 18 /// <summary> 19 /// Simple but strict JSON tokenizer, rigidly following RFC 7159. 20 /// </summary> 21 /// <remarks> 22 /// <para> 23 /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc. 24 /// It does not create tokens for the separator between names and values, or for the comma 25 /// between values. It validates the token stream as it goes - so callers can assume that the 26 /// tokens it produces are appropriate. For example, it would never produce "start object, end array." 27 /// </para> 28 /// <para>Implementation details: the base class handles single token push-back and </para> 29 /// <para>Not thread-safe.</para> 30 /// </remarks> 31 internal abstract class JsonTokenizer 32 { 33 private JsonToken bufferedToken; 34 35 /// <summary> 36 /// Creates a tokenizer that reads from the given text reader. 37 /// </summary> FromTextReader(TextReader reader)38 internal static JsonTokenizer FromTextReader(TextReader reader) 39 { 40 return new JsonTextTokenizer(reader); 41 } 42 43 /// <summary> 44 /// Creates a tokenizer that first replays the given list of tokens, then continues reading 45 /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back 46 /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was 47 /// created for the sake of Any parsing. 48 /// </summary> FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)49 internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation) 50 { 51 return new JsonReplayTokenizer(tokens, continuation); 52 } 53 54 /// <summary> 55 /// Returns the depth of the stack, purely in objects (not collections). 56 /// Informally, this is the number of remaining unclosed '{' characters we have. 57 /// </summary> 58 internal int ObjectDepth { get; private set; } 59 60 // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous 61 // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). PushBack(JsonToken token)62 internal void PushBack(JsonToken token) 63 { 64 if (bufferedToken != null) 65 { 66 throw new InvalidOperationException("Can't push back twice"); 67 } 68 bufferedToken = token; 69 if (token.Type == JsonToken.TokenType.StartObject) 70 { 71 ObjectDepth--; 72 } 73 else if (token.Type == JsonToken.TokenType.EndObject) 74 { 75 ObjectDepth++; 76 } 77 } 78 79 /// <summary> 80 /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, 81 /// after which point <c>Next()</c> should not be called again. 82 /// </summary> 83 /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks> 84 /// <returns>The next token in the stream. This is never null.</returns> 85 /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> 86 /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> Next()87 internal JsonToken Next() 88 { 89 JsonToken tokenToReturn; 90 if (bufferedToken != null) 91 { 92 tokenToReturn = bufferedToken; 93 bufferedToken = null; 94 } 95 else 96 { 97 tokenToReturn = NextImpl(); 98 } 99 if (tokenToReturn.Type == JsonToken.TokenType.StartObject) 100 { 101 ObjectDepth++; 102 } 103 else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) 104 { 105 ObjectDepth--; 106 } 107 return tokenToReturn; 108 } 109 110 /// <summary> 111 /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates 112 /// to this if it doesn't have a buffered token.) 113 /// </summary> 114 /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> 115 /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> NextImpl()116 protected abstract JsonToken NextImpl(); 117 118 /// <summary> 119 /// Skips the value we're about to read. This must only be called immediately after reading a property name. 120 /// If the value is an object or an array, the complete object/array is skipped. 121 /// </summary> SkipValue()122 internal void SkipValue() 123 { 124 // We'll assume that Next() makes sure that the end objects and end arrays are all valid. 125 // All we care about is the total nesting depth we need to close. 126 int depth = 0; 127 128 // do/while rather than while loop so that we read at least one token. 129 do 130 { 131 var token = Next(); 132 switch (token.Type) 133 { 134 case JsonToken.TokenType.EndArray: 135 case JsonToken.TokenType.EndObject: 136 depth--; 137 break; 138 case JsonToken.TokenType.StartArray: 139 case JsonToken.TokenType.StartObject: 140 depth++; 141 break; 142 } 143 } while (depth != 0); 144 } 145 146 /// <summary> 147 /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. 148 /// </summary> 149 private class JsonReplayTokenizer : JsonTokenizer 150 { 151 private readonly IList<JsonToken> tokens; 152 private readonly JsonTokenizer nextTokenizer; 153 private int nextTokenIndex; 154 JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)155 internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer) 156 { 157 this.tokens = tokens; 158 this.nextTokenizer = nextTokenizer; 159 } 160 161 // FIXME: Object depth not maintained... NextImpl()162 protected override JsonToken NextImpl() 163 { 164 if (nextTokenIndex >= tokens.Count) 165 { 166 return nextTokenizer.Next(); 167 } 168 return tokens[nextTokenIndex++]; 169 } 170 } 171 172 /// <summary> 173 /// Tokenizer which does all the *real* work of parsing JSON. 174 /// </summary> 175 private sealed class JsonTextTokenizer : JsonTokenizer 176 { 177 // The set of states in which a value is valid next token. 178 private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; 179 180 private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); 181 private readonly PushBackReader reader; 182 private State state; 183 JsonTextTokenizer(TextReader reader)184 internal JsonTextTokenizer(TextReader reader) 185 { 186 this.reader = new PushBackReader(reader); 187 state = State.StartOfDocument; 188 containerStack.Push(ContainerType.Document); 189 } 190 191 /// <remarks> 192 /// This method essentially just loops through characters skipping whitespace, validating and 193 /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) 194 /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point 195 /// it returns the token. Although the method is large, it would be relatively hard to break down further... most 196 /// of it is the large switch statement, which sometimes returns and sometimes doesn't. 197 /// </remarks> NextImpl()198 protected override JsonToken NextImpl() 199 { 200 if (state == State.ReaderExhausted) 201 { 202 throw new InvalidOperationException("Next() called after end of document"); 203 } 204 while (true) 205 { 206 var next = reader.Read(); 207 switch (next) 208 { 209 case -1: 210 ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); 211 state = State.ReaderExhausted; 212 return JsonToken.EndDocument; 213 214 // Skip whitespace between tokens 215 case ' ': 216 case '\t': 217 case '\r': 218 case '\n': 219 break; 220 case ':': 221 ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); 222 state = State.ObjectAfterColon; 223 break; 224 case ',': 225 ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a comma: "); 226 state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; 227 break; 228 case '"': 229 string stringValue = ReadString(); 230 if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) 231 { 232 state = State.ObjectBeforeColon; 233 return JsonToken.Name(stringValue); 234 } 235 else 236 { 237 ValidateAndModifyStateForValue("Invalid state to read a double quote: "); 238 return JsonToken.Value(stringValue); 239 } 240 case '{': 241 ValidateState(ValueStates, "Invalid state to read an open brace: "); 242 state = State.ObjectStart; 243 containerStack.Push(ContainerType.Object); 244 return JsonToken.StartObject; 245 case '}': 246 ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); 247 PopContainer(); 248 return JsonToken.EndObject; 249 case '[': 250 ValidateState(ValueStates, "Invalid state to read an open square bracket: "); 251 state = State.ArrayStart; 252 containerStack.Push(ContainerType.Array); 253 return JsonToken.StartArray; 254 case ']': 255 ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); 256 PopContainer(); 257 return JsonToken.EndArray; 258 case 'n': // Start of null 259 ConsumeLiteral("null"); 260 ValidateAndModifyStateForValue("Invalid state to read a null literal: "); 261 return JsonToken.Null; 262 case 't': // Start of true 263 ConsumeLiteral("true"); 264 ValidateAndModifyStateForValue("Invalid state to read a true literal: "); 265 return JsonToken.True; 266 case 'f': // Start of false 267 ConsumeLiteral("false"); 268 ValidateAndModifyStateForValue("Invalid state to read a false literal: "); 269 return JsonToken.False; 270 case '-': // Start of a number 271 case '0': 272 case '1': 273 case '2': 274 case '3': 275 case '4': 276 case '5': 277 case '6': 278 case '7': 279 case '8': 280 case '9': 281 double number = ReadNumber((char) next); 282 ValidateAndModifyStateForValue("Invalid state to read a number token: "); 283 return JsonToken.Value(number); 284 default: 285 throw new InvalidJsonException($"Invalid first character of token: {(char) next}"); 286 } 287 } 288 } 289 ValidateState(State validStates, string errorPrefix)290 private void ValidateState(State validStates, string errorPrefix) 291 { 292 if ((validStates & state) == 0) 293 { 294 throw reader.CreateException(errorPrefix + state); 295 } 296 } 297 298 /// <summary> 299 /// Reads a string token. It is assumed that the opening " has already been read. 300 /// </summary> ReadString()301 private string ReadString() 302 { 303 //builder will not be released in case of an exception, but this is not a problem and we will create new on next Acquire 304 var builder = StringBuilderCache.Acquire(); 305 bool haveHighSurrogate = false; 306 while (true) 307 { 308 char c = reader.ReadOrFail("Unexpected end of text while reading string"); 309 if (c < ' ') 310 { 311 throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); 312 } 313 if (c == '"') 314 { 315 if (haveHighSurrogate) 316 { 317 throw reader.CreateException("Invalid use of surrogate pair code units"); 318 } 319 return StringBuilderCache.GetStringAndRelease(builder); 320 } 321 if (c == '\\') 322 { 323 c = ReadEscapedCharacter(); 324 } 325 // TODO: Consider only allowing surrogate pairs that are either both escaped, 326 // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate 327 // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. 328 if (haveHighSurrogate != char.IsLowSurrogate(c)) 329 { 330 throw reader.CreateException("Invalid use of surrogate pair code units"); 331 } 332 haveHighSurrogate = char.IsHighSurrogate(c); 333 builder.Append(c); 334 } 335 } 336 337 /// <summary> 338 /// Reads an escaped character. It is assumed that the leading backslash has already been read. 339 /// </summary> ReadEscapedCharacter()340 private char ReadEscapedCharacter() 341 { 342 char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); 343 return c switch 344 { 345 'n' => '\n', 346 '\\' => '\\', 347 'b' => '\b', 348 'f' => '\f', 349 'r' => '\r', 350 't' => '\t', 351 '"' => '"', 352 '/' => '/', 353 'u' => ReadUnicodeEscape(), 354 _ => throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int)c)), 355 }; 356 } 357 358 /// <summary> 359 /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. 360 /// </summary> ReadUnicodeEscape()361 private char ReadUnicodeEscape() 362 { 363 int result = 0; 364 for (int i = 0; i < 4; i++) 365 { 366 char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); 367 int nybble; 368 if (c >= '0' && c <= '9') 369 { 370 nybble = c - '0'; 371 } 372 else if (c >= 'a' && c <= 'f') 373 { 374 nybble = c - 'a' + 10; 375 } 376 else if (c >= 'A' && c <= 'F') 377 { 378 nybble = c - 'A' + 10; 379 } 380 else 381 { 382 throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); 383 } 384 result = (result << 4) + nybble; 385 } 386 return (char) result; 387 } 388 389 /// <summary> 390 /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. 391 /// It is assumed that the first letter of the literal has already been read. 392 /// </summary> ConsumeLiteral(string text)393 private void ConsumeLiteral(string text) 394 { 395 for (int i = 1; i < text.Length; i++) 396 { 397 int next = reader.Read(); 398 if (next != text[i]) 399 { 400 // Only check for "end of text" when we've detected that the character differs from the 401 // expected one. 402 var message = next == -1 403 ? $"Unexpected end of text while reading literal token {text}" 404 : $"Unexpected character while reading literal token {text}"; 405 throw reader.CreateException(message); 406 } 407 } 408 } 409 ReadNumber(char initialCharacter)410 private double ReadNumber(char initialCharacter) 411 { 412 //builder will not be released in case of an exception, but this is not a problem and we will create new on next Acquire 413 var builder = StringBuilderCache.Acquire(); 414 if (initialCharacter == '-') 415 { 416 builder.Append("-"); 417 } 418 else 419 { 420 reader.PushBack(initialCharacter); 421 } 422 // Each method returns the character it read that doesn't belong in that part, 423 // so we know what to do next, including pushing the character back at the end. 424 // null is returned for "end of text". 425 int next = ReadInt(builder); 426 if (next == '.') 427 { 428 next = ReadFrac(builder); 429 } 430 if (next == 'e' || next == 'E') 431 { 432 next = ReadExp(builder); 433 } 434 // If we read a character which wasn't part of the number, push it back so we can read it again 435 // to parse the next token. 436 if (next != -1) 437 { 438 reader.PushBack((char) next); 439 } 440 441 // TODO: What exception should we throw if the value can't be represented as a double? 442 var builderValue = StringBuilderCache.GetStringAndRelease(builder); 443 try 444 { 445 double result = double.Parse(builderValue, 446 NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, 447 CultureInfo.InvariantCulture); 448 449 // .NET Core 3.0 and later returns infinity if the number is too large or small to be represented. 450 // For compatibility with other Protobuf implementations the tokenizer should still throw. 451 if (double.IsInfinity(result)) 452 { 453 throw reader.CreateException("Numeric value out of range: " + builderValue); 454 } 455 456 return result; 457 } 458 catch (OverflowException) 459 { 460 throw reader.CreateException("Numeric value out of range: " + builderValue); 461 } 462 } 463 464 /// <summary> 465 /// Copies an integer into a StringBuilder. 466 /// </summary> 467 /// <param name="builder">The builder to read the number into</param> 468 /// <returns>The character following the integer, or -1 for end-of-text.</returns> ReadInt(StringBuilder builder)469 private int ReadInt(StringBuilder builder) 470 { 471 char first = reader.ReadOrFail("Invalid numeric literal"); 472 if (first < '0' || first > '9') 473 { 474 throw reader.CreateException("Invalid numeric literal"); 475 } 476 builder.Append(first); 477 int next = ConsumeDigits(builder, out int digitCount); 478 if (first == '0' && digitCount != 0) 479 { 480 throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); 481 } 482 return next; 483 } 484 485 /// <summary> 486 /// Copies the fractional part of an integer into a StringBuilder, assuming reader is positioned after a period. 487 /// </summary> 488 /// <param name="builder">The builder to read the number into</param> 489 /// <returns>The character following the fractional part, or -1 for end-of-text.</returns> ReadFrac(StringBuilder builder)490 private int ReadFrac(StringBuilder builder) 491 { 492 builder.Append('.'); // Already consumed this 493 int next = ConsumeDigits(builder, out int digitCount); 494 if (digitCount == 0) 495 { 496 throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); 497 } 498 return next; 499 } 500 501 /// <summary> 502 /// Copies the exponent part of a number into a StringBuilder, with an assumption that the reader is already positioned after the "e". 503 /// </summary> 504 /// <param name="builder">The builder to read the number into</param> 505 /// <returns>The character following the exponent, or -1 for end-of-text.</returns> ReadExp(StringBuilder builder)506 private int ReadExp(StringBuilder builder) 507 { 508 builder.Append('E'); // Already consumed this (or 'e') 509 int next = reader.Read(); 510 if (next == -1) 511 { 512 throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); 513 } 514 if (next == '-' || next == '+') 515 { 516 builder.Append((char) next); 517 } 518 else 519 { 520 reader.PushBack((char) next); 521 } 522 next = ConsumeDigits(builder, out int digitCount); 523 if (digitCount == 0) 524 { 525 throw reader.CreateException("Invalid numeric literal: exponent without value"); 526 } 527 return next; 528 } 529 530 /// <summary> 531 /// Copies a sequence of digits into a StringBuilder. 532 /// </summary> 533 /// <param name="builder">The builder to read the number into</param> 534 /// <param name="count">The number of digits appended to the builder</param> 535 /// <returns>The character following the digits, or -1 for end-of-text.</returns> ConsumeDigits(StringBuilder builder, out int count)536 private int ConsumeDigits(StringBuilder builder, out int count) 537 { 538 count = 0; 539 while (true) 540 { 541 int next = reader.Read(); 542 if (next == -1 || next < '0' || next > '9') 543 { 544 return next; 545 } 546 count++; 547 builder.Append((char) next); 548 } 549 } 550 551 /// <summary> 552 /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) 553 /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. 554 /// </summary> ValidateAndModifyStateForValue(string errorPrefix)555 private void ValidateAndModifyStateForValue(string errorPrefix) 556 { 557 ValidateState(ValueStates, errorPrefix); 558 switch (state) 559 { 560 case State.StartOfDocument: 561 state = State.ExpectedEndOfDocument; 562 return; 563 case State.ObjectAfterColon: 564 state = State.ObjectAfterProperty; 565 return; 566 case State.ArrayStart: 567 case State.ArrayAfterComma: 568 state = State.ArrayAfterValue; 569 return; 570 default: 571 throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); 572 } 573 } 574 575 /// <summary> 576 /// Pops the top-most container, and sets the state to the appropriate one for the end of a value 577 /// in the parent container. 578 /// </summary> PopContainer()579 private void PopContainer() 580 { 581 containerStack.Pop(); 582 var parent = containerStack.Peek(); 583 state = parent switch 584 { 585 ContainerType.Object => State.ObjectAfterProperty, 586 ContainerType.Array => State.ArrayAfterValue, 587 ContainerType.Document => State.ExpectedEndOfDocument, 588 _ => throw new InvalidOperationException("Unexpected container type: " + parent), 589 }; 590 } 591 592 private enum ContainerType 593 { 594 Document, Object, Array 595 } 596 597 /// <summary> 598 /// Possible states of the tokenizer. 599 /// </summary> 600 /// <remarks> 601 /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states 602 /// for checking.</para> 603 /// <para> 604 /// Each is documented with an example, 605 /// where ^ represents the current position within the text stream. The examples all use string values, 606 /// but could be any value, including nested objects/arrays. 607 /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). 608 /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which 609 /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. 610 /// </para> 611 /// <para> 612 /// These states were derived manually by reading RFC 7159 carefully. 613 /// </para> 614 /// </remarks> 615 [Flags] 616 private enum State 617 { 618 /// <summary> 619 /// ^ { "foo": "bar" } 620 /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" 621 /// </summary> 622 StartOfDocument = 1 << 0, 623 /// <summary> 624 /// { "foo": "bar" } ^ 625 /// After the value in a document. Next states: ReaderExhausted 626 /// </summary> 627 ExpectedEndOfDocument = 1 << 1, 628 /// <summary> 629 /// { "foo": "bar" } ^ (and already read to the end of the reader) 630 /// Terminal state. 631 /// </summary> 632 ReaderExhausted = 1 << 2, 633 /// <summary> 634 /// { ^ "foo": "bar" } 635 /// Before the *first* property in an object. 636 /// Next states: 637 /// "AfterValue" (empty object) 638 /// ObjectBeforeColon (read a name) 639 /// </summary> 640 ObjectStart = 1 << 3, 641 /// <summary> 642 /// { "foo" ^ : "bar", "x": "y" } 643 /// Next state: ObjectAfterColon 644 /// </summary> 645 ObjectBeforeColon = 1 << 4, 646 /// <summary> 647 /// { "foo" : ^ "bar", "x": "y" } 648 /// Before any property other than the first in an object. 649 /// (Equivalently: after any property in an object) 650 /// Next states: 651 /// "AfterValue" (value is simple) 652 /// ObjectStart (value is object) 653 /// ArrayStart (value is array) 654 /// </summary> 655 ObjectAfterColon = 1 << 5, 656 /// <summary> 657 /// { "foo" : "bar" ^ , "x" : "y" } 658 /// At the end of a property, so expecting either a comma or end-of-object 659 /// Next states: ObjectAfterComma or "AfterValue" 660 /// </summary> 661 ObjectAfterProperty = 1 << 6, 662 /// <summary> 663 /// { "foo":"bar", ^ "x":"y" } 664 /// Read the comma after the previous property, so expecting another property. 665 /// This is like ObjectStart, but closing brace isn't valid here 666 /// Next state: ObjectBeforeColon. 667 /// </summary> 668 ObjectAfterComma = 1 << 7, 669 /// <summary> 670 /// [ ^ "foo", "bar" ] 671 /// Before the *first* value in an array. 672 /// Next states: 673 /// "AfterValue" (read a value) 674 /// "AfterValue" (end of array; will pop stack) 675 /// </summary> 676 ArrayStart = 1 << 8, 677 /// <summary> 678 /// [ "foo" ^ , "bar" ] 679 /// After any value in an array, so expecting either a comma or end-of-array 680 /// Next states: ArrayAfterComma or "AfterValue" 681 /// </summary> 682 ArrayAfterValue = 1 << 9, 683 /// <summary> 684 /// [ "foo", ^ "bar" ] 685 /// After a comma in an array, so there *must* be another value (simple or complex). 686 /// Next states: "AfterValue" (simple value), StartObject, StartArray 687 /// </summary> 688 ArrayAfterComma = 1 << 10 689 } 690 691 /// <summary> 692 /// Wrapper around a text reader allowing small amounts of buffering and location handling. 693 /// </summary> 694 private class PushBackReader 695 { 696 // TODO: Add locations for errors etc. 697 698 private readonly TextReader reader; 699 PushBackReader(TextReader reader)700 internal PushBackReader(TextReader reader) 701 { 702 // TODO: Wrap the reader in a BufferedReader? 703 this.reader = reader; 704 } 705 706 /// <summary> 707 /// The buffered next character, if we have one, or -1 if there is no buffered character. 708 /// </summary> 709 private int nextChar = -1; 710 711 /// <summary> 712 /// Returns the next character in the stream, or -1 if we have reached the end of the stream. 713 /// </summary> Read()714 internal int Read() 715 { 716 if (nextChar != -1) 717 { 718 int tmp = nextChar; 719 nextChar = -1; 720 return tmp; 721 } 722 return reader.Read(); 723 } 724 725 /// <summary> 726 /// Reads the next character from the underlying reader, throwing an <see cref="InvalidJsonException" /> 727 /// with the specified message if there are no more characters available. 728 /// </summary> ReadOrFail(string messageOnFailure)729 internal char ReadOrFail(string messageOnFailure) 730 { 731 int next = Read(); 732 if (next == -1) 733 { 734 throw CreateException(messageOnFailure); 735 } 736 return (char) next; 737 } 738 PushBack(char c)739 internal void PushBack(char c) 740 { 741 if (nextChar != -1) 742 { 743 throw new InvalidOperationException("Cannot push back when already buffering a character"); 744 } 745 nextChar = c; 746 } 747 748 /// <summary> 749 /// Creates a new exception appropriate for the current state of the reader. 750 /// </summary> CreateException(string message)751 internal InvalidJsonException CreateException(string message) 752 { 753 // TODO: Keep track of and use the location. 754 return new InvalidJsonException(message); 755 } 756 } 757 758 /// <summary> 759 /// Provide a cached reusable instance of stringbuilder per thread. 760 /// Copied from https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/System/Text/StringBuilderCache.cs 761 /// </summary> 762 private static class StringBuilderCache 763 { 764 private const int MaxCachedStringBuilderSize = 360; 765 private const int DefaultStringBuilderCapacity = 16; // == StringBuilder.DefaultCapacity 766 767 [ThreadStatic] 768 private static StringBuilder cachedInstance; 769 770 /// <summary>Get a StringBuilder for the specified capacity.</summary> 771 /// <remarks>If a StringBuilder of an appropriate size is cached, it will be returned and the cache emptied.</remarks> Acquire(int capacity = DefaultStringBuilderCapacity)772 public static StringBuilder Acquire(int capacity = DefaultStringBuilderCapacity) 773 { 774 if (capacity <= MaxCachedStringBuilderSize) 775 { 776 StringBuilder sb = cachedInstance; 777 if (sb != null) 778 { 779 // Avoid stringbuilder block fragmentation by getting a new StringBuilder 780 // when the requested size is larger than the current capacity 781 if (capacity <= sb.Capacity) 782 { 783 cachedInstance = null; 784 sb.Clear(); 785 return sb; 786 } 787 } 788 } 789 790 return new StringBuilder(capacity); 791 } 792 793 /// <summary>Place the specified builder in the cache if it is not too big.</summary> Release(StringBuilder sb)794 private static void Release(StringBuilder sb) 795 { 796 if (sb.Capacity <= MaxCachedStringBuilderSize) 797 { 798 cachedInstance = cachedInstance?.Capacity >= sb.Capacity ? cachedInstance : sb; 799 } 800 } 801 802 /// <summary>ToString() the stringbuilder, Release it to the cache, and return the resulting string.</summary> GetStringAndRelease(StringBuilder sb)803 public static string GetStringAndRelease(StringBuilder sb) 804 { 805 string result = sb.ToString(); 806 Release(sb); 807 return result; 808 } 809 } 810 } 811 } 812 } 813