• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2008 Google Inc.  All rights reserved.
4 //
5 // Use of this source code is governed by a BSD-style
6 // license that can be found in the LICENSE file or at
7 // https://developers.google.com/open-source/licenses/bsd
8 #endregion
9 
10 using System;
11 using System.Collections.Generic;
12 using System.Globalization;
13 using System.IO;
14 using System.Text;
15 
16 namespace Google.Protobuf
17 {
18     /// <summary>
19     /// Simple but strict JSON tokenizer, rigidly following RFC 7159.
20     /// </summary>
21     /// <remarks>
22     /// <para>
23     /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc.
24     /// It does not create tokens for the separator between names and values, or for the comma
25     /// between values. It validates the token stream as it goes - so callers can assume that the
26     /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
27     /// </para>
28     /// <para>Implementation details: the base class handles single token push-back and </para>
29     /// <para>Not thread-safe.</para>
30     /// </remarks>
31     internal abstract class JsonTokenizer
32     {
33         private JsonToken bufferedToken;
34 
35         /// <summary>
36         ///  Creates a tokenizer that reads from the given text reader.
37         /// </summary>
FromTextReader(TextReader reader)38         internal static JsonTokenizer FromTextReader(TextReader reader)
39         {
40             return new JsonTextTokenizer(reader);
41         }
42 
43         /// <summary>
44         /// Creates a tokenizer that first replays the given list of tokens, then continues reading
45         /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
46         /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
47         /// created for the sake of Any parsing.
48         /// </summary>
FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)49         internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
50         {
51             return new JsonReplayTokenizer(tokens, continuation);
52         }
53 
54         /// <summary>
55         /// Returns the depth of the stack, purely in objects (not collections).
56         /// Informally, this is the number of remaining unclosed '{' characters we have.
57         /// </summary>
58         internal int ObjectDepth { get; private set; }
59 
60         // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
61         // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
PushBack(JsonToken token)62         internal void PushBack(JsonToken token)
63         {
64             if (bufferedToken != null)
65             {
66                 throw new InvalidOperationException("Can't push back twice");
67             }
68             bufferedToken = token;
69             if (token.Type == JsonToken.TokenType.StartObject)
70             {
71                 ObjectDepth--;
72             }
73             else if (token.Type == JsonToken.TokenType.EndObject)
74             {
75                 ObjectDepth++;
76             }
77         }
78 
79         /// <summary>
80         /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
81         /// after which point <c>Next()</c> should not be called again.
82         /// </summary>
83         /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
84         /// <returns>The next token in the stream. This is never null.</returns>
85         /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
86         /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
Next()87         internal JsonToken Next()
88         {
89             JsonToken tokenToReturn;
90             if (bufferedToken != null)
91             {
92                 tokenToReturn = bufferedToken;
93                 bufferedToken = null;
94             }
95             else
96             {
97                 tokenToReturn = NextImpl();
98             }
99             if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
100             {
101                 ObjectDepth++;
102             }
103             else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
104             {
105                 ObjectDepth--;
106             }
107             return tokenToReturn;
108         }
109 
110         /// <summary>
111         /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
112         /// to this if it doesn't have a buffered token.)
113         /// </summary>
114         /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
115         /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
NextImpl()116         protected abstract JsonToken NextImpl();
117 
118         /// <summary>
119         /// Skips the value we're about to read. This must only be called immediately after reading a property name.
120         /// If the value is an object or an array, the complete object/array is skipped.
121         /// </summary>
SkipValue()122         internal void SkipValue()
123         {
124             // We'll assume that Next() makes sure that the end objects and end arrays are all valid.
125             // All we care about is the total nesting depth we need to close.
126             int depth = 0;
127 
128             // do/while rather than while loop so that we read at least one token.
129             do
130             {
131                 var token = Next();
132                 switch (token.Type)
133                 {
134                     case JsonToken.TokenType.EndArray:
135                     case JsonToken.TokenType.EndObject:
136                         depth--;
137                         break;
138                     case JsonToken.TokenType.StartArray:
139                     case JsonToken.TokenType.StartObject:
140                         depth++;
141                         break;
142                 }
143             } while (depth != 0);
144         }
145 
146         /// <summary>
147         /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
148         /// </summary>
149         private class JsonReplayTokenizer : JsonTokenizer
150         {
151             private readonly IList<JsonToken> tokens;
152             private readonly JsonTokenizer nextTokenizer;
153             private int nextTokenIndex;
154 
JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)155             internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
156             {
157                 this.tokens = tokens;
158                 this.nextTokenizer = nextTokenizer;
159             }
160 
161             // FIXME: Object depth not maintained...
NextImpl()162             protected override JsonToken NextImpl()
163             {
164                 if (nextTokenIndex >= tokens.Count)
165                 {
166                     return nextTokenizer.Next();
167                 }
168                 return tokens[nextTokenIndex++];
169             }
170         }
171 
172         /// <summary>
173         /// Tokenizer which does all the *real* work of parsing JSON.
174         /// </summary>
175         private sealed class JsonTextTokenizer : JsonTokenizer
176         {
177             // The set of states in which a value is valid next token.
178             private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
179 
180             private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
181             private readonly PushBackReader reader;
182             private State state;
183 
JsonTextTokenizer(TextReader reader)184             internal JsonTextTokenizer(TextReader reader)
185             {
186                 this.reader = new PushBackReader(reader);
187                 state = State.StartOfDocument;
188                 containerStack.Push(ContainerType.Document);
189             }
190 
191             /// <remarks>
192             /// This method essentially just loops through characters skipping whitespace, validating and
193             /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
194             /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
195             /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
196             /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
197             /// </remarks>
NextImpl()198             protected override JsonToken NextImpl()
199             {
200                 if (state == State.ReaderExhausted)
201                 {
202                     throw new InvalidOperationException("Next() called after end of document");
203                 }
204                 while (true)
205                 {
206                     var next = reader.Read();
207                     switch (next)
208                     {
209                         case -1:
210                             ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
211                             state = State.ReaderExhausted;
212                             return JsonToken.EndDocument;
213 
214                         // Skip whitespace between tokens
215                         case ' ':
216                         case '\t':
217                         case '\r':
218                         case '\n':
219                             break;
220                         case ':':
221                             ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: ");
222                             state = State.ObjectAfterColon;
223                             break;
224                         case ',':
225                             ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a comma: ");
226                             state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma;
227                             break;
228                         case '"':
229                             string stringValue = ReadString();
230                             if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0)
231                             {
232                                 state = State.ObjectBeforeColon;
233                                 return JsonToken.Name(stringValue);
234                             }
235                             else
236                             {
237                                 ValidateAndModifyStateForValue("Invalid state to read a double quote: ");
238                                 return JsonToken.Value(stringValue);
239                             }
240                         case '{':
241                             ValidateState(ValueStates, "Invalid state to read an open brace: ");
242                             state = State.ObjectStart;
243                             containerStack.Push(ContainerType.Object);
244                             return JsonToken.StartObject;
245                         case '}':
246                             ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
247                             PopContainer();
248                             return JsonToken.EndObject;
249                         case '[':
250                             ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
251                             state = State.ArrayStart;
252                             containerStack.Push(ContainerType.Array);
253                             return JsonToken.StartArray;
254                         case ']':
255                             ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: ");
256                             PopContainer();
257                             return JsonToken.EndArray;
258                         case 'n': // Start of null
259                             ConsumeLiteral("null");
260                             ValidateAndModifyStateForValue("Invalid state to read a null literal: ");
261                             return JsonToken.Null;
262                         case 't': // Start of true
263                             ConsumeLiteral("true");
264                             ValidateAndModifyStateForValue("Invalid state to read a true literal: ");
265                             return JsonToken.True;
266                         case 'f': // Start of false
267                             ConsumeLiteral("false");
268                             ValidateAndModifyStateForValue("Invalid state to read a false literal: ");
269                             return JsonToken.False;
270                         case '-': // Start of a number
271                         case '0':
272                         case '1':
273                         case '2':
274                         case '3':
275                         case '4':
276                         case '5':
277                         case '6':
278                         case '7':
279                         case '8':
280                         case '9':
281                             double number = ReadNumber((char) next);
282                             ValidateAndModifyStateForValue("Invalid state to read a number token: ");
283                             return JsonToken.Value(number);
284                         default:
285                             throw new InvalidJsonException($"Invalid first character of token: {(char) next}");
286                     }
287                 }
288             }
289 
ValidateState(State validStates, string errorPrefix)290             private void ValidateState(State validStates, string errorPrefix)
291             {
292                 if ((validStates & state) == 0)
293                 {
294                     throw reader.CreateException(errorPrefix + state);
295                 }
296             }
297 
298             /// <summary>
299             /// Reads a string token. It is assumed that the opening " has already been read.
300             /// </summary>
ReadString()301             private string ReadString()
302             {
303                 //builder will not be released in case of an exception, but this is not a problem and we will create new on next Acquire
304                 var builder = StringBuilderCache.Acquire();
305                 bool haveHighSurrogate = false;
306                 while (true)
307                 {
308                     char c = reader.ReadOrFail("Unexpected end of text while reading string");
309                     if (c < ' ')
310                     {
311                         throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c));
312                     }
313                     if (c == '"')
314                     {
315                         if (haveHighSurrogate)
316                         {
317                             throw reader.CreateException("Invalid use of surrogate pair code units");
318                         }
319                         return StringBuilderCache.GetStringAndRelease(builder);
320                     }
321                     if (c == '\\')
322                     {
323                         c = ReadEscapedCharacter();
324                     }
325                     // TODO: Consider only allowing surrogate pairs that are either both escaped,
326                     // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate
327                     // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8.
328                     if (haveHighSurrogate != char.IsLowSurrogate(c))
329                     {
330                         throw reader.CreateException("Invalid use of surrogate pair code units");
331                     }
332                     haveHighSurrogate = char.IsHighSurrogate(c);
333                     builder.Append(c);
334                 }
335             }
336 
337             /// <summary>
338             /// Reads an escaped character. It is assumed that the leading backslash has already been read.
339             /// </summary>
ReadEscapedCharacter()340             private char ReadEscapedCharacter()
341             {
342                 char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence");
343                 return c switch
344                 {
345                     'n' => '\n',
346                     '\\' => '\\',
347                     'b' => '\b',
348                     'f' => '\f',
349                     'r' => '\r',
350                     't' => '\t',
351                     '"' => '"',
352                     '/' => '/',
353                     'u' => ReadUnicodeEscape(),
354                     _ => throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int)c)),
355                 };
356             }
357 
358             /// <summary>
359             /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read.
360             /// </summary>
ReadUnicodeEscape()361             private char ReadUnicodeEscape()
362             {
363                 int result = 0;
364                 for (int i = 0; i < 4; i++)
365                 {
366                     char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence");
367                     int nybble;
368                     if (c >= '0' && c <= '9')
369                     {
370                         nybble = c - '0';
371                     }
372                     else if (c >= 'a' && c <= 'f')
373                     {
374                         nybble = c - 'a' + 10;
375                     }
376                     else if (c >= 'A' && c <= 'F')
377                     {
378                         nybble = c - 'A' + 10;
379                     }
380                     else
381                     {
382                         throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
383                     }
384                     result = (result << 4) + nybble;
385                 }
386                 return (char) result;
387             }
388 
389             /// <summary>
390             /// Consumes a text-only literal, throwing an exception if the read text doesn't match it.
391             /// It is assumed that the first letter of the literal has already been read.
392             /// </summary>
ConsumeLiteral(string text)393             private void ConsumeLiteral(string text)
394             {
395                 for (int i = 1; i < text.Length; i++)
396                 {
397                     int next = reader.Read();
398                     if (next != text[i])
399                     {
400                         // Only check for "end of text" when we've detected that the character differs from the
401                         // expected one.
402                         var message = next == -1
403                             ? $"Unexpected end of text while reading literal token {text}"
404                             : $"Unexpected character while reading literal token {text}";
405                         throw reader.CreateException(message);
406                     }
407                 }
408             }
409 
ReadNumber(char initialCharacter)410             private double ReadNumber(char initialCharacter)
411             {
412                 //builder will not be released in case of an exception, but this is not a problem and we will create new on next Acquire
413                 var builder = StringBuilderCache.Acquire();
414                 if (initialCharacter == '-')
415                 {
416                     builder.Append("-");
417                 }
418                 else
419                 {
420                     reader.PushBack(initialCharacter);
421                 }
422                 // Each method returns the character it read that doesn't belong in that part,
423                 // so we know what to do next, including pushing the character back at the end.
424                 // null is returned for "end of text".
425                 int next = ReadInt(builder);
426                 if (next == '.')
427                 {
428                     next = ReadFrac(builder);
429                 }
430                 if (next == 'e' || next == 'E')
431                 {
432                     next = ReadExp(builder);
433                 }
434                 // If we read a character which wasn't part of the number, push it back so we can read it again
435                 // to parse the next token.
436                 if (next != -1)
437                 {
438                     reader.PushBack((char) next);
439                 }
440 
441                 // TODO: What exception should we throw if the value can't be represented as a double?
442                 var builderValue = StringBuilderCache.GetStringAndRelease(builder);
443                 try
444                 {
445                     double result = double.Parse(builderValue,
446                         NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent,
447                         CultureInfo.InvariantCulture);
448 
449                     // .NET Core 3.0 and later returns infinity if the number is too large or small to be represented.
450                     // For compatibility with other Protobuf implementations the tokenizer should still throw.
451                     if (double.IsInfinity(result))
452                     {
453                         throw reader.CreateException("Numeric value out of range: " + builderValue);
454                     }
455 
456                     return result;
457                 }
458                 catch (OverflowException)
459                 {
460                     throw reader.CreateException("Numeric value out of range: " + builderValue);
461                 }
462             }
463 
464             /// <summary>
465             /// Copies an integer into a StringBuilder.
466             /// </summary>
467             /// <param name="builder">The builder to read the number into</param>
468             /// <returns>The character following the integer, or -1 for end-of-text.</returns>
ReadInt(StringBuilder builder)469             private int ReadInt(StringBuilder builder)
470             {
471                 char first = reader.ReadOrFail("Invalid numeric literal");
472                 if (first < '0' || first > '9')
473                 {
474                     throw reader.CreateException("Invalid numeric literal");
475                 }
476                 builder.Append(first);
477                 int next = ConsumeDigits(builder, out int digitCount);
478                 if (first == '0' && digitCount != 0)
479                 {
480                     throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
481                 }
482                 return next;
483             }
484 
485             /// <summary>
486             /// Copies the fractional part of an integer into a StringBuilder, assuming reader is positioned after a period.
487             /// </summary>
488             /// <param name="builder">The builder to read the number into</param>
489             /// <returns>The character following the fractional part, or -1 for end-of-text.</returns>
ReadFrac(StringBuilder builder)490             private int ReadFrac(StringBuilder builder)
491             {
492                 builder.Append('.'); // Already consumed this
493                 int next = ConsumeDigits(builder, out int digitCount);
494                 if (digitCount == 0)
495                 {
496                     throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
497                 }
498                 return next;
499             }
500 
501             /// <summary>
502             /// Copies the exponent part of a number into a StringBuilder, with an assumption that the reader is already positioned after the "e".
503             /// </summary>
504             /// <param name="builder">The builder to read the number into</param>
505             /// <returns>The character following the exponent, or -1 for end-of-text.</returns>
ReadExp(StringBuilder builder)506             private int ReadExp(StringBuilder builder)
507             {
508                 builder.Append('E'); // Already consumed this (or 'e')
509                 int next = reader.Read();
510                 if (next == -1)
511                 {
512                     throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
513                 }
514                 if (next == '-' || next == '+')
515                 {
516                     builder.Append((char) next);
517                 }
518                 else
519                 {
520                     reader.PushBack((char) next);
521                 }
522                 next = ConsumeDigits(builder, out int digitCount);
523                 if (digitCount == 0)
524                 {
525                     throw reader.CreateException("Invalid numeric literal: exponent without value");
526                 }
527                 return next;
528             }
529 
530             /// <summary>
531             /// Copies a sequence of digits into a StringBuilder.
532             /// </summary>
533             /// <param name="builder">The builder to read the number into</param>
534             /// <param name="count">The number of digits appended to the builder</param>
535             /// <returns>The character following the digits, or -1 for end-of-text.</returns>
ConsumeDigits(StringBuilder builder, out int count)536             private int ConsumeDigits(StringBuilder builder, out int count)
537             {
538                 count = 0;
539                 while (true)
540                 {
541                     int next = reader.Read();
542                     if (next == -1 || next < '0' || next > '9')
543                     {
544                         return next;
545                     }
546                     count++;
547                     builder.Append((char) next);
548                 }
549             }
550 
551             /// <summary>
552             /// Validates that we're in a valid state to read a value (using the given error prefix if necessary)
553             /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty.
554             /// </summary>
ValidateAndModifyStateForValue(string errorPrefix)555             private void ValidateAndModifyStateForValue(string errorPrefix)
556             {
557                 ValidateState(ValueStates, errorPrefix);
558                 switch (state)
559                 {
560                     case State.StartOfDocument:
561                         state = State.ExpectedEndOfDocument;
562                         return;
563                     case State.ObjectAfterColon:
564                         state = State.ObjectAfterProperty;
565                         return;
566                     case State.ArrayStart:
567                     case State.ArrayAfterComma:
568                         state = State.ArrayAfterValue;
569                         return;
570                     default:
571                         throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)");
572                 }
573             }
574 
575             /// <summary>
576             /// Pops the top-most container, and sets the state to the appropriate one for the end of a value
577             /// in the parent container.
578             /// </summary>
PopContainer()579             private void PopContainer()
580             {
581                 containerStack.Pop();
582                 var parent = containerStack.Peek();
583                 state = parent switch
584                 {
585                     ContainerType.Object => State.ObjectAfterProperty,
586                     ContainerType.Array => State.ArrayAfterValue,
587                     ContainerType.Document => State.ExpectedEndOfDocument,
588                     _ => throw new InvalidOperationException("Unexpected container type: " + parent),
589                 };
590             }
591 
592             private enum ContainerType
593             {
594                 Document, Object, Array
595             }
596 
597             /// <summary>
598             /// Possible states of the tokenizer.
599             /// </summary>
600             /// <remarks>
601             /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states
602             /// for checking.</para>
603             /// <para>
604             /// Each is documented with an example,
605             /// where ^ represents the current position within the text stream. The examples all use string values,
606             /// but could be any value, including nested objects/arrays.
607             /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects).
608             /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which
609             /// point there's an immediate transition to ExpectedEndOfDocument,  ObjectAfterProperty or ArrayAfterValue.
610             /// </para>
611             /// <para>
612             /// These states were derived manually by reading RFC 7159 carefully.
613             /// </para>
614             /// </remarks>
615             [Flags]
616             private enum State
617             {
618                 /// <summary>
619                 /// ^ { "foo": "bar" }
620                 /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue"
621                 /// </summary>
622                 StartOfDocument = 1 << 0,
623                 /// <summary>
624                 /// { "foo": "bar" } ^
625                 /// After the value in a document. Next states: ReaderExhausted
626                 /// </summary>
627                 ExpectedEndOfDocument = 1 << 1,
628                 /// <summary>
629                 /// { "foo": "bar" } ^ (and already read to the end of the reader)
630                 /// Terminal state.
631                 /// </summary>
632                 ReaderExhausted = 1 << 2,
633                 /// <summary>
634                 /// { ^ "foo": "bar" }
635                 /// Before the *first* property in an object.
636                 /// Next states:
637                 /// "AfterValue" (empty object)
638                 /// ObjectBeforeColon (read a name)
639                 /// </summary>
640                 ObjectStart = 1 << 3,
641                 /// <summary>
642                 /// { "foo" ^ : "bar", "x": "y" }
643                 /// Next state: ObjectAfterColon
644                 /// </summary>
645                 ObjectBeforeColon = 1 << 4,
646                 /// <summary>
647                 /// { "foo" : ^ "bar", "x": "y" }
648                 /// Before any property other than the first in an object.
649                 /// (Equivalently: after any property in an object)
650                 /// Next states:
651                 /// "AfterValue" (value is simple)
652                 /// ObjectStart (value is object)
653                 /// ArrayStart (value is array)
654                 /// </summary>
655                 ObjectAfterColon = 1 << 5,
656                 /// <summary>
657                 /// { "foo" : "bar" ^ , "x" : "y" }
658                 /// At the end of a property, so expecting either a comma or end-of-object
659                 /// Next states: ObjectAfterComma or "AfterValue"
660                 /// </summary>
661                 ObjectAfterProperty = 1 << 6,
662                 /// <summary>
663                 /// { "foo":"bar", ^ "x":"y" }
664                 /// Read the comma after the previous property, so expecting another property.
665                 /// This is like ObjectStart, but closing brace isn't valid here
666                 /// Next state: ObjectBeforeColon.
667                 /// </summary>
668                 ObjectAfterComma = 1 << 7,
669                 /// <summary>
670                 /// [ ^ "foo", "bar" ]
671                 /// Before the *first* value in an array.
672                 /// Next states:
673                 /// "AfterValue" (read a value)
674                 /// "AfterValue" (end of array; will pop stack)
675                 /// </summary>
676                 ArrayStart = 1 << 8,
677                 /// <summary>
678                 /// [ "foo" ^ , "bar" ]
679                 /// After any value in an array, so expecting either a comma or end-of-array
680                 /// Next states: ArrayAfterComma or "AfterValue"
681                 /// </summary>
682                 ArrayAfterValue = 1 << 9,
683                 /// <summary>
684                 /// [ "foo", ^ "bar" ]
685                 /// After a comma in an array, so there *must* be another value (simple or complex).
686                 /// Next states: "AfterValue" (simple value), StartObject, StartArray
687                 /// </summary>
688                 ArrayAfterComma = 1 << 10
689             }
690 
691             /// <summary>
692             /// Wrapper around a text reader allowing small amounts of buffering and location handling.
693             /// </summary>
694             private class PushBackReader
695             {
696                 // TODO: Add locations for errors etc.
697 
698                 private readonly TextReader reader;
699 
PushBackReader(TextReader reader)700                 internal PushBackReader(TextReader reader)
701                 {
702                     // TODO: Wrap the reader in a BufferedReader?
703                     this.reader = reader;
704                 }
705 
706                 /// <summary>
707                 /// The buffered next character, if we have one, or -1 if there is no buffered character.
708                 /// </summary>
709                 private int nextChar = -1;
710 
711                 /// <summary>
712                 /// Returns the next character in the stream, or -1 if we have reached the end of the stream.
713                 /// </summary>
Read()714                 internal int Read()
715                 {
716                     if (nextChar != -1)
717                     {
718                         int tmp = nextChar;
719                         nextChar = -1;
720                         return tmp;
721                     }
722                     return reader.Read();
723                 }
724 
725                 /// <summary>
726                 /// Reads the next character from the underlying reader, throwing an <see cref="InvalidJsonException" />
727                 /// with the specified message if there are no more characters available.
728                 /// </summary>
ReadOrFail(string messageOnFailure)729                 internal char ReadOrFail(string messageOnFailure)
730                 {
731                     int next = Read();
732                     if (next == -1)
733                     {
734                         throw CreateException(messageOnFailure);
735                     }
736                     return (char) next;
737                 }
738 
PushBack(char c)739                 internal void PushBack(char c)
740                 {
741                     if (nextChar != -1)
742                     {
743                         throw new InvalidOperationException("Cannot push back when already buffering a character");
744                     }
745                     nextChar = c;
746                 }
747 
748                 /// <summary>
749                 /// Creates a new exception appropriate for the current state of the reader.
750                 /// </summary>
CreateException(string message)751                 internal InvalidJsonException CreateException(string message)
752                 {
753                     // TODO: Keep track of and use the location.
754                     return new InvalidJsonException(message);
755                 }
756             }
757 
758             /// <summary>
759             /// Provide a cached reusable instance of stringbuilder per thread.
760             /// Copied from https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/System/Text/StringBuilderCache.cs
761             /// </summary>
762             private static class StringBuilderCache
763             {
764                 private const int MaxCachedStringBuilderSize = 360;
765                 private const int DefaultStringBuilderCapacity = 16; // == StringBuilder.DefaultCapacity
766 
767                 [ThreadStatic]
768                 private static StringBuilder cachedInstance;
769 
770                 /// <summary>Get a StringBuilder for the specified capacity.</summary>
771                 /// <remarks>If a StringBuilder of an appropriate size is cached, it will be returned and the cache emptied.</remarks>
Acquire(int capacity = DefaultStringBuilderCapacity)772                 public static StringBuilder Acquire(int capacity = DefaultStringBuilderCapacity)
773                 {
774                     if (capacity <= MaxCachedStringBuilderSize)
775                     {
776                         StringBuilder sb = cachedInstance;
777                         if (sb != null)
778                         {
779                             // Avoid stringbuilder block fragmentation by getting a new StringBuilder
780                             // when the requested size is larger than the current capacity
781                             if (capacity <= sb.Capacity)
782                             {
783                                 cachedInstance = null;
784                                 sb.Clear();
785                                 return sb;
786                             }
787                         }
788                     }
789 
790                     return new StringBuilder(capacity);
791                 }
792 
793                 /// <summary>Place the specified builder in the cache if it is not too big.</summary>
Release(StringBuilder sb)794                 private static void Release(StringBuilder sb)
795                 {
796                     if (sb.Capacity <= MaxCachedStringBuilderSize)
797                     {
798                         cachedInstance = cachedInstance?.Capacity >= sb.Capacity ? cachedInstance : sb;
799                     }
800                 }
801 
802                 /// <summary>ToString() the stringbuilder, Release it to the cache, and return the resulting string.</summary>
GetStringAndRelease(StringBuilder sb)803                 public static string GetStringAndRelease(StringBuilder sb)
804                 {
805                     string result = sb.ToString();
806                     Release(sb);
807                     return result;
808                 }
809             }
810         }
811     }
812 }
813