• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.util;
18 
19 import java.io.Closeable;
20 import java.io.EOFException;
21 import java.io.IOException;
22 import java.io.Reader;
23 import java.util.ArrayList;
24 import java.util.List;
25 import libcore.internal.StringPool;
26 
27 /**
28  * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
29  * encoded value as a stream of tokens. This stream includes both literal
30  * values (strings, numbers, booleans, and nulls) as well as the begin and
31  * end delimiters of objects and arrays. The tokens are traversed in
32  * depth-first order, the same order that they appear in the JSON document.
33  * Within JSON objects, name/value pairs are represented by a single token.
34  *
35  * <h3>Parsing JSON</h3>
36  * To create a recursive descent parser for your own JSON streams, first create
37  * an entry point method that creates a {@code JsonReader}.
38  *
39  * <p>Next, create handler methods for each structure in your JSON text. You'll
40  * need a method for each object type and for each array type.
41  * <ul>
42  *   <li>Within <strong>array handling</strong> methods, first call {@link
43  *       #beginArray} to consume the array's opening bracket. Then create a
44  *       while loop that accumulates values, terminating when {@link #hasNext}
45  *       is false. Finally, read the array's closing bracket by calling {@link
46  *       #endArray}.
47  *   <li>Within <strong>object handling</strong> methods, first call {@link
48  *       #beginObject} to consume the object's opening brace. Then create a
49  *       while loop that assigns values to local variables based on their name.
50  *       This loop should terminate when {@link #hasNext} is false. Finally,
51  *       read the object's closing brace by calling {@link #endObject}.
52  * </ul>
53  * <p>When a nested object or array is encountered, delegate to the
54  * corresponding handler method.
55  *
56  * <p>When an unknown name is encountered, strict parsers should fail with an
57  * exception. Lenient parsers should call {@link #skipValue()} to recursively
58  * skip the value's nested tokens, which may otherwise conflict.
59  *
60  * <p>If a value may be null, you should first check using {@link #peek()}.
61  * Null literals can be consumed using either {@link #nextNull()} or {@link
62  * #skipValue()}.
63  *
64  * <h3>Example</h3>
65  * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
66  * [
67  *   {
68  *     "id": 912345678901,
69  *     "text": "How do I read JSON on Android?",
70  *     "geo": null,
71  *     "user": {
72  *       "name": "android_newb",
73  *       "followers_count": 41
74  *      }
75  *   },
76  *   {
77  *     "id": 912345678902,
78  *     "text": "@android_newb just use android.util.JsonReader!",
79  *     "geo": [50.454722, -104.606667],
80  *     "user": {
81  *       "name": "jesse",
82  *       "followers_count": 2
83  *     }
84  *   }
85  * ]}</pre>
86  * This code implements the parser for the above structure: <pre>   {@code
87  *
88  *   public List<Message> readJsonStream(InputStream in) throws IOException {
89  *     JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
90  *     try {
91  *       return readMessagesArray(reader);
92  *     } finally {
93  *       reader.close();
94  *     }
95  *   }
96  *
97  *   public List<Message> readMessagesArray(JsonReader reader) throws IOException {
98  *     List<Message> messages = new ArrayList<Message>();
99  *
100  *     reader.beginArray();
101  *     while (reader.hasNext()) {
102  *       messages.add(readMessage(reader));
103  *     }
104  *     reader.endArray();
105  *     return messages;
106  *   }
107  *
108  *   public Message readMessage(JsonReader reader) throws IOException {
109  *     long id = -1;
110  *     String text = null;
111  *     User user = null;
112  *     List<Double> geo = null;
113  *
114  *     reader.beginObject();
115  *     while (reader.hasNext()) {
116  *       String name = reader.nextName();
117  *       if (name.equals("id")) {
118  *         id = reader.nextLong();
119  *       } else if (name.equals("text")) {
120  *         text = reader.nextString();
121  *       } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
122  *         geo = readDoublesArray(reader);
123  *       } else if (name.equals("user")) {
124  *         user = readUser(reader);
125  *       } else {
126  *         reader.skipValue();
127  *       }
128  *     }
129  *     reader.endObject();
130  *     return new Message(id, text, user, geo);
131  *   }
132  *
133  *   public List<Double> readDoublesArray(JsonReader reader) throws IOException {
134  *     List<Double> doubles = new ArrayList<Double>();
135  *
136  *     reader.beginArray();
137  *     while (reader.hasNext()) {
138  *       doubles.add(reader.nextDouble());
139  *     }
140  *     reader.endArray();
141  *     return doubles;
142  *   }
143  *
144  *   public User readUser(JsonReader reader) throws IOException {
145  *     String username = null;
146  *     int followersCount = -1;
147  *
148  *     reader.beginObject();
149  *     while (reader.hasNext()) {
150  *       String name = reader.nextName();
151  *       if (name.equals("name")) {
152  *         username = reader.nextString();
153  *       } else if (name.equals("followers_count")) {
154  *         followersCount = reader.nextInt();
155  *       } else {
156  *         reader.skipValue();
157  *       }
158  *     }
159  *     reader.endObject();
160  *     return new User(username, followersCount);
161  *   }}</pre>
162  *
163  * <h3>Number Handling</h3>
164  * This reader permits numeric values to be read as strings and string values to
165  * be read as numbers. For example, both elements of the JSON array {@code
166  * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
167  * This behavior is intended to prevent lossy numeric conversions: double is
168  * JavaScript's only numeric type and very large values like {@code
169  * 9007199254740993} cannot be represented exactly on that platform. To minimize
170  * precision loss, extremely large values should be written and read as strings
171  * in JSON.
172  *
173  * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
174  * of this class are not thread safe.
175  */
176 public final class JsonReader implements Closeable {
177 
178     private static final String TRUE = "true";
179     private static final String FALSE = "false";
180 
181     private final StringPool stringPool = new StringPool();
182 
183     /** The input JSON. */
184     private final Reader in;
185 
186     /** True to accept non-spec compliant JSON */
187     private boolean lenient = false;
188 
189     /**
190      * Use a manual buffer to easily read and unread upcoming characters, and
191      * also so we can create strings without an intermediate StringBuilder.
192      * We decode literals directly out of this buffer, so it must be at least as
193      * long as the longest token that can be reported as a number.
194      */
195     private final char[] buffer = new char[1024];
196     private int pos = 0;
197     private int limit = 0;
198 
199     /*
200      * The offset of the first character in the buffer.
201      */
202     private int bufferStartLine = 1;
203     private int bufferStartColumn = 1;
204 
205     private final List<JsonScope> stack = new ArrayList<JsonScope>();
206     {
207         push(JsonScope.EMPTY_DOCUMENT);
208     }
209 
210     /**
211      * The type of the next token to be returned by {@link #peek} and {@link
212      * #advance}. If null, peek() will assign a value.
213      */
214     private JsonToken token;
215 
216     /** The text of the next name. */
217     private String name;
218 
219     /*
220      * For the next literal value, we may have the text value, or the position
221      * and length in the buffer.
222      */
223     private String value;
224     private int valuePos;
225     private int valueLength;
226 
227     /** True if we're currently handling a skipValue() call. */
228     private boolean skipping = false;
229 
230     /**
231      * Creates a new instance that reads a JSON-encoded stream from {@code in}.
232      */
JsonReader(Reader in)233     public JsonReader(Reader in) {
234         if (in == null) {
235             throw new NullPointerException("in == null");
236         }
237         this.in = in;
238     }
239 
240     /**
241      * Configure this parser to be  be liberal in what it accepts. By default,
242      * this parser is strict and only accepts JSON as specified by <a
243      * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
244      * parser to lenient causes it to ignore the following syntax errors:
245      *
246      * <ul>
247      *   <li>End of line comments starting with {@code //} or {@code #} and
248      *       ending with a newline character.
249      *   <li>C-style comments starting with {@code /*} and ending with
250      *       {@code *}{@code /}. Such comments may not be nested.
251      *   <li>Names that are unquoted or {@code 'single quoted'}.
252      *   <li>Strings that are unquoted or {@code 'single quoted'}.
253      *   <li>Array elements separated by {@code ;} instead of {@code ,}.
254      *   <li>Unnecessary array separators. These are interpreted as if null
255      *       was the omitted value.
256      *   <li>Names and values separated by {@code =} or {@code =>} instead of
257      *       {@code :}.
258      *   <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
259      * </ul>
260      */
setLenient(boolean lenient)261     public void setLenient(boolean lenient) {
262         this.lenient = lenient;
263     }
264 
265     /**
266      * Returns true if this parser is liberal in what it accepts.
267      */
isLenient()268     public boolean isLenient() {
269         return lenient;
270     }
271 
272     /**
273      * Consumes the next token from the JSON stream and asserts that it is the
274      * beginning of a new array.
275      */
beginArray()276     public void beginArray() throws IOException {
277         expect(JsonToken.BEGIN_ARRAY);
278     }
279 
280     /**
281      * Consumes the next token from the JSON stream and asserts that it is the
282      * end of the current array.
283      */
endArray()284     public void endArray() throws IOException {
285         expect(JsonToken.END_ARRAY);
286     }
287 
288     /**
289      * Consumes the next token from the JSON stream and asserts that it is the
290      * beginning of a new object.
291      */
beginObject()292     public void beginObject() throws IOException {
293         expect(JsonToken.BEGIN_OBJECT);
294     }
295 
296     /**
297      * Consumes the next token from the JSON stream and asserts that it is the
298      * end of the current array.
299      */
endObject()300     public void endObject() throws IOException {
301         expect(JsonToken.END_OBJECT);
302     }
303 
304     /**
305      * Consumes {@code expected}.
306      */
expect(JsonToken expected)307     private void expect(JsonToken expected) throws IOException {
308         peek();
309         if (token != expected) {
310             throw new IllegalStateException("Expected " + expected + " but was " + peek());
311         }
312         advance();
313     }
314 
315     /**
316      * Returns true if the current array or object has another element.
317      */
hasNext()318     public boolean hasNext() throws IOException {
319         peek();
320         return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
321     }
322 
323     /**
324      * Returns the type of the next token without consuming it.
325      */
peek()326     public JsonToken peek() throws IOException {
327         if (token != null) {
328           return token;
329         }
330 
331         switch (peekStack()) {
332             case EMPTY_DOCUMENT:
333                 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
334                 JsonToken firstToken = nextValue();
335                 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
336                     throw new IOException(
337                             "Expected JSON document to start with '[' or '{' but was " + token);
338                 }
339                 return firstToken;
340             case EMPTY_ARRAY:
341                 return nextInArray(true);
342             case NONEMPTY_ARRAY:
343                 return nextInArray(false);
344             case EMPTY_OBJECT:
345                 return nextInObject(true);
346             case DANGLING_NAME:
347                 return objectValue();
348             case NONEMPTY_OBJECT:
349                 return nextInObject(false);
350             case NONEMPTY_DOCUMENT:
351                 try {
352                     JsonToken token = nextValue();
353                     if (lenient) {
354                         return token;
355                     }
356                     throw syntaxError("Expected EOF");
357                 } catch (EOFException e) {
358                     return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
359                 }
360             case CLOSED:
361                 throw new IllegalStateException("JsonReader is closed");
362             default:
363                 throw new AssertionError();
364         }
365     }
366 
367     /**
368      * Advances the cursor in the JSON stream to the next token.
369      */
advance()370     private JsonToken advance() throws IOException {
371         peek();
372 
373         JsonToken result = token;
374         token = null;
375         value = null;
376         name = null;
377         return result;
378     }
379 
380     /**
381      * Returns the next token, a {@link JsonToken#NAME property name}, and
382      * consumes it.
383      *
384      * @throws IOException if the next token in the stream is not a property
385      *     name.
386      */
nextName()387     public String nextName() throws IOException {
388         peek();
389         if (token != JsonToken.NAME) {
390             throw new IllegalStateException("Expected a name but was " + peek());
391         }
392         String result = name;
393         advance();
394         return result;
395     }
396 
397     /**
398      * Returns the {@link JsonToken#STRING string} value of the next token,
399      * consuming it. If the next token is a number, this method will return its
400      * string form.
401      *
402      * @throws IllegalStateException if the next token is not a string or if
403      *     this reader is closed.
404      */
nextString()405     public String nextString() throws IOException {
406         peek();
407         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
408             throw new IllegalStateException("Expected a string but was " + peek());
409         }
410 
411         String result = value;
412         advance();
413         return result;
414     }
415 
416     /**
417      * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
418      * consuming it.
419      *
420      * @throws IllegalStateException if the next token is not a boolean or if
421      *     this reader is closed.
422      */
nextBoolean()423     public boolean nextBoolean() throws IOException {
424         peek();
425         if (token != JsonToken.BOOLEAN) {
426             throw new IllegalStateException("Expected a boolean but was " + token);
427         }
428 
429         boolean result = (value == TRUE);
430         advance();
431         return result;
432     }
433 
434     /**
435      * Consumes the next token from the JSON stream and asserts that it is a
436      * literal null.
437      *
438      * @throws IllegalStateException if the next token is not null or if this
439      *     reader is closed.
440      */
nextNull()441     public void nextNull() throws IOException {
442         peek();
443         if (token != JsonToken.NULL) {
444             throw new IllegalStateException("Expected null but was " + token);
445         }
446 
447         advance();
448     }
449 
450     /**
451      * Returns the {@link JsonToken#NUMBER double} value of the next token,
452      * consuming it. If the next token is a string, this method will attempt to
453      * parse it as a double using {@link Double#parseDouble(String)}.
454      *
455      * @throws IllegalStateException if the next token is not a literal value.
456      */
nextDouble()457     public double nextDouble() throws IOException {
458         peek();
459         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
460             throw new IllegalStateException("Expected a double but was " + token);
461         }
462 
463         double result = Double.parseDouble(value);
464         advance();
465         return result;
466     }
467 
468     /**
469      * Returns the {@link JsonToken#NUMBER long} value of the next token,
470      * consuming it. If the next token is a string, this method will attempt to
471      * parse it as a long. If the next token's numeric value cannot be exactly
472      * represented by a Java {@code long}, this method throws.
473      *
474      * @throws IllegalStateException if the next token is not a literal value.
475      * @throws NumberFormatException if the next literal value cannot be parsed
476      *     as a number, or exactly represented as a long.
477      */
nextLong()478     public long nextLong() throws IOException {
479         peek();
480         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
481             throw new IllegalStateException("Expected a long but was " + token);
482         }
483 
484         long result;
485         try {
486             result = Long.parseLong(value);
487         } catch (NumberFormatException ignored) {
488             double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
489             result = (long) asDouble;
490             if ((double) result != asDouble) {
491                 throw new NumberFormatException(value);
492             }
493         }
494 
495         advance();
496         return result;
497     }
498 
499     /**
500      * Returns the {@link JsonToken#NUMBER int} value of the next token,
501      * consuming it. If the next token is a string, this method will attempt to
502      * parse it as an int. If the next token's numeric value cannot be exactly
503      * represented by a Java {@code int}, this method throws.
504      *
505      * @throws IllegalStateException if the next token is not a literal value.
506      * @throws NumberFormatException if the next literal value cannot be parsed
507      *     as a number, or exactly represented as an int.
508      */
nextInt()509     public int nextInt() throws IOException {
510         peek();
511         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
512             throw new IllegalStateException("Expected an int but was " + token);
513         }
514 
515         int result;
516         try {
517             result = Integer.parseInt(value);
518         } catch (NumberFormatException ignored) {
519             double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
520             result = (int) asDouble;
521             if ((double) result != asDouble) {
522                 throw new NumberFormatException(value);
523             }
524         }
525 
526         advance();
527         return result;
528     }
529 
530     /**
531      * Closes this JSON reader and the underlying {@link Reader}.
532      */
close()533     public void close() throws IOException {
534         value = null;
535         token = null;
536         stack.clear();
537         stack.add(JsonScope.CLOSED);
538         in.close();
539     }
540 
541     /**
542      * Skips the next value recursively. If it is an object or array, all nested
543      * elements are skipped. This method is intended for use when the JSON token
544      * stream contains unrecognized or unhandled values.
545      */
skipValue()546     public void skipValue() throws IOException {
547         skipping = true;
548         try {
549             int count = 0;
550             do {
551                 JsonToken token = advance();
552                 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
553                     count++;
554                 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
555                     count--;
556                 }
557             } while (count != 0);
558         } finally {
559             skipping = false;
560         }
561     }
562 
peekStack()563     private JsonScope peekStack() {
564         return stack.get(stack.size() - 1);
565     }
566 
pop()567     private JsonScope pop() {
568         return stack.remove(stack.size() - 1);
569     }
570 
push(JsonScope newTop)571     private void push(JsonScope newTop) {
572         stack.add(newTop);
573     }
574 
575     /**
576      * Replace the value on the top of the stack with the given value.
577      */
replaceTop(JsonScope newTop)578     private void replaceTop(JsonScope newTop) {
579         stack.set(stack.size() - 1, newTop);
580     }
581 
nextInArray(boolean firstElement)582     private JsonToken nextInArray(boolean firstElement) throws IOException {
583         if (firstElement) {
584             replaceTop(JsonScope.NONEMPTY_ARRAY);
585         } else {
586             /* Look for a comma before each element after the first element. */
587             switch (nextNonWhitespace()) {
588                 case ']':
589                     pop();
590                     return token = JsonToken.END_ARRAY;
591                 case ';':
592                     checkLenient(); // fall-through
593                 case ',':
594                     break;
595                 default:
596                     throw syntaxError("Unterminated array");
597             }
598         }
599 
600         switch (nextNonWhitespace()) {
601             case ']':
602                 if (firstElement) {
603                     pop();
604                     return token = JsonToken.END_ARRAY;
605                 }
606                 // fall-through to handle ",]"
607             case ';':
608             case ',':
609                 /* In lenient mode, a 0-length literal means 'null' */
610                 checkLenient();
611                 pos--;
612                 value = "null";
613                 return token = JsonToken.NULL;
614             default:
615                 pos--;
616                 return nextValue();
617         }
618     }
619 
nextInObject(boolean firstElement)620     private JsonToken nextInObject(boolean firstElement) throws IOException {
621         /*
622          * Read delimiters. Either a comma/semicolon separating this and the
623          * previous name-value pair, or a close brace to denote the end of the
624          * object.
625          */
626         if (firstElement) {
627             /* Peek to see if this is the empty object. */
628             switch (nextNonWhitespace()) {
629                 case '}':
630                     pop();
631                     return token = JsonToken.END_OBJECT;
632                 default:
633                     pos--;
634             }
635         } else {
636             switch (nextNonWhitespace()) {
637                 case '}':
638                     pop();
639                     return token = JsonToken.END_OBJECT;
640                 case ';':
641                 case ',':
642                     break;
643                 default:
644                     throw syntaxError("Unterminated object");
645             }
646         }
647 
648         /* Read the name. */
649         int quote = nextNonWhitespace();
650         switch (quote) {
651             case '\'':
652                 checkLenient(); // fall-through
653             case '"':
654                 name = nextString((char) quote);
655                 break;
656             default:
657                 checkLenient();
658                 pos--;
659                 name = nextLiteral(false);
660                 if (name.isEmpty()) {
661                     throw syntaxError("Expected name");
662                 }
663         }
664 
665         replaceTop(JsonScope.DANGLING_NAME);
666         return token = JsonToken.NAME;
667     }
668 
objectValue()669     private JsonToken objectValue() throws IOException {
670         /*
671          * Read the name/value separator. Usually a colon ':'. In lenient mode
672          * we also accept an equals sign '=', or an arrow "=>".
673          */
674         switch (nextNonWhitespace()) {
675             case ':':
676                 break;
677             case '=':
678                 checkLenient();
679                 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
680                     pos++;
681                 }
682                 break;
683             default:
684                 throw syntaxError("Expected ':'");
685         }
686 
687         replaceTop(JsonScope.NONEMPTY_OBJECT);
688         return nextValue();
689     }
690 
nextValue()691     private JsonToken nextValue() throws IOException {
692         int c = nextNonWhitespace();
693         switch (c) {
694             case '{':
695                 push(JsonScope.EMPTY_OBJECT);
696                 return token = JsonToken.BEGIN_OBJECT;
697 
698             case '[':
699                 push(JsonScope.EMPTY_ARRAY);
700                 return token = JsonToken.BEGIN_ARRAY;
701 
702             case '\'':
703                 checkLenient(); // fall-through
704             case '"':
705                 value = nextString((char) c);
706                 return token = JsonToken.STRING;
707 
708             default:
709                 pos--;
710                 return readLiteral();
711         }
712     }
713 
714     /**
715      * Returns true once {@code limit - pos >= minimum}. If the data is
716      * exhausted before that many characters are available, this returns
717      * false.
718      */
fillBuffer(int minimum)719     private boolean fillBuffer(int minimum) throws IOException {
720         // Before clobbering the old characters, update where buffer starts
721         for (int i = 0; i < pos; i++) {
722             if (buffer[i] == '\n') {
723                 bufferStartLine++;
724                 bufferStartColumn = 1;
725             } else {
726                 bufferStartColumn++;
727             }
728         }
729 
730         if (limit != pos) {
731             limit -= pos;
732             System.arraycopy(buffer, pos, buffer, 0, limit);
733         } else {
734             limit = 0;
735         }
736 
737         pos = 0;
738         int total;
739         while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
740             limit += total;
741 
742             // if this is the first read, consume an optional byte order mark (BOM) if it exists
743                 if (bufferStartLine == 1 && bufferStartColumn == 1
744                         && limit > 0 && buffer[0] == '\ufeff') {
745                 pos++;
746                 bufferStartColumn--;
747             }
748 
749             if (limit >= minimum) {
750                 return true;
751             }
752         }
753         return false;
754     }
755 
getLineNumber()756     private int getLineNumber() {
757         int result = bufferStartLine;
758         for (int i = 0; i < pos; i++) {
759             if (buffer[i] == '\n') {
760                 result++;
761             }
762         }
763         return result;
764     }
765 
getColumnNumber()766     private int getColumnNumber() {
767         int result = bufferStartColumn;
768         for (int i = 0; i < pos; i++) {
769             if (buffer[i] == '\n') {
770                 result = 1;
771             } else {
772                 result++;
773             }
774         }
775         return result;
776     }
777 
nextNonWhitespace()778     private int nextNonWhitespace() throws IOException {
779         while (pos < limit || fillBuffer(1)) {
780             int c = buffer[pos++];
781             switch (c) {
782                 case '\t':
783                 case ' ':
784                 case '\n':
785                 case '\r':
786                     continue;
787 
788                 case '/':
789                     if (pos == limit && !fillBuffer(1)) {
790                         return c;
791                     }
792 
793                     checkLenient();
794                     char peek = buffer[pos];
795                     switch (peek) {
796                         case '*':
797                             // skip a /* c-style comment */
798                             pos++;
799                             if (!skipTo("*/")) {
800                                 throw syntaxError("Unterminated comment");
801                             }
802                             pos += 2;
803                             continue;
804 
805                         case '/':
806                             // skip a // end-of-line comment
807                             pos++;
808                             skipToEndOfLine();
809                             continue;
810 
811                         default:
812                             return c;
813                     }
814 
815                 case '#':
816                     /*
817                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
818                      * specify this behaviour, but it's required to parse
819                      * existing documents. See http://b/2571423.
820                      */
821                     checkLenient();
822                     skipToEndOfLine();
823                     continue;
824 
825                 default:
826                     return c;
827             }
828         }
829 
830         throw new EOFException("End of input");
831     }
832 
checkLenient()833     private void checkLenient() throws IOException {
834         if (!lenient) {
835             throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
836         }
837     }
838 
839     /**
840      * Advances the position until after the next newline character. If the line
841      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
842      * caller.
843      */
skipToEndOfLine()844     private void skipToEndOfLine() throws IOException {
845         while (pos < limit || fillBuffer(1)) {
846             char c = buffer[pos++];
847             if (c == '\r' || c == '\n') {
848                 break;
849             }
850         }
851     }
852 
skipTo(String toFind)853     private boolean skipTo(String toFind) throws IOException {
854         outer:
855         for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) {
856             for (int c = 0; c < toFind.length(); c++) {
857                 if (buffer[pos + c] != toFind.charAt(c)) {
858                     continue outer;
859                 }
860             }
861             return true;
862         }
863         return false;
864     }
865 
866     /**
867      * Returns the string up to but not including {@code quote}, unescaping any
868      * character escape sequences encountered along the way. The opening quote
869      * should have already been read. This consumes the closing quote, but does
870      * not include it in the returned string.
871      *
872      * @param quote either ' or ".
873      * @throws NumberFormatException if any unicode escape sequences are
874      *     malformed.
875      */
nextString(char quote)876     private String nextString(char quote) throws IOException {
877         StringBuilder builder = null;
878         do {
879             /* the index of the first character not yet appended to the builder. */
880             int start = pos;
881             while (pos < limit) {
882                 int c = buffer[pos++];
883 
884                 if (c == quote) {
885                     if (skipping) {
886                         return "skipped!";
887                     } else if (builder == null) {
888                         return stringPool.get(buffer, start, pos - start - 1);
889                     } else {
890                         builder.append(buffer, start, pos - start - 1);
891                         return builder.toString();
892                     }
893 
894                 } else if (c == '\\') {
895                     if (builder == null) {
896                         builder = new StringBuilder();
897                     }
898                     builder.append(buffer, start, pos - start - 1);
899                     builder.append(readEscapeCharacter());
900                     start = pos;
901                 }
902             }
903 
904             if (builder == null) {
905                 builder = new StringBuilder();
906             }
907             builder.append(buffer, start, pos - start);
908         } while (fillBuffer(1));
909 
910         throw syntaxError("Unterminated string");
911     }
912 
913     /**
914      * Reads the value up to but not including any delimiter characters. This
915      * does not consume the delimiter character.
916      *
917      * @param assignOffsetsOnly true for this method to only set the valuePos
918      *     and valueLength fields and return a null result. This only works if
919      *     the literal is short; a string is returned otherwise.
920      */
nextLiteral(boolean assignOffsetsOnly)921     private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
922         StringBuilder builder = null;
923         valuePos = -1;
924         valueLength = 0;
925         int i = 0;
926 
927         findNonLiteralCharacter:
928         while (true) {
929             for (; pos + i < limit; i++) {
930                 switch (buffer[pos + i]) {
931                 case '/':
932                 case '\\':
933                 case ';':
934                 case '#':
935                 case '=':
936                     checkLenient(); // fall-through
937                 case '{':
938                 case '}':
939                 case '[':
940                 case ']':
941                 case ':':
942                 case ',':
943                 case ' ':
944                 case '\t':
945                 case '\f':
946                 case '\r':
947                 case '\n':
948                     break findNonLiteralCharacter;
949                 }
950             }
951 
952             /*
953              * Attempt to load the entire literal into the buffer at once. If
954              * we run out of input, add a non-literal character at the end so
955              * that decoding doesn't need to do bounds checks.
956              */
957             if (i < buffer.length) {
958                 if (fillBuffer(i + 1)) {
959                     continue;
960                 } else {
961                     buffer[limit] = '\0';
962                     break;
963                 }
964             }
965 
966             // use a StringBuilder when the value is too long. It must be an unquoted string.
967             if (builder == null) {
968                 builder = new StringBuilder();
969             }
970             builder.append(buffer, pos, i);
971             valueLength += i;
972             pos += i;
973             i = 0;
974             if (!fillBuffer(1)) {
975                 break;
976             }
977         }
978 
979         String result;
980         if (assignOffsetsOnly && builder == null) {
981             valuePos = pos;
982             result = null;
983         } else if (skipping) {
984             result = "skipped!";
985         } else if (builder == null) {
986             result = stringPool.get(buffer, pos, i);
987         } else {
988             builder.append(buffer, pos, i);
989             result = builder.toString();
990         }
991         valueLength += i;
992         pos += i;
993         return result;
994     }
995 
toString()996     @Override public String toString() {
997         return getClass().getSimpleName() + " near " + getSnippet();
998     }
999 
1000     /**
1001      * Unescapes the character identified by the character or characters that
1002      * immediately follow a backslash. The backslash '\' should have already
1003      * been read. This supports both unicode escapes "u000A" and two-character
1004      * escapes "\n".
1005      *
1006      * @throws NumberFormatException if any unicode escape sequences are
1007      *     malformed.
1008      */
readEscapeCharacter()1009     private char readEscapeCharacter() throws IOException {
1010         if (pos == limit && !fillBuffer(1)) {
1011             throw syntaxError("Unterminated escape sequence");
1012         }
1013 
1014         char escaped = buffer[pos++];
1015         switch (escaped) {
1016             case 'u':
1017                 if (pos + 4 > limit && !fillBuffer(4)) {
1018                     throw syntaxError("Unterminated escape sequence");
1019                 }
1020                 String hex = stringPool.get(buffer, pos, 4);
1021                 pos += 4;
1022                 return (char) Integer.parseInt(hex, 16);
1023 
1024             case 't':
1025                 return '\t';
1026 
1027             case 'b':
1028                 return '\b';
1029 
1030             case 'n':
1031                 return '\n';
1032 
1033             case 'r':
1034                 return '\r';
1035 
1036             case 'f':
1037                 return '\f';
1038 
1039             case '\'':
1040             case '"':
1041             case '\\':
1042             default:
1043                 return escaped;
1044         }
1045     }
1046 
1047     /**
1048      * Reads a null, boolean, numeric or unquoted string literal value.
1049      */
readLiteral()1050     private JsonToken readLiteral() throws IOException {
1051         value = nextLiteral(true);
1052         if (valueLength == 0) {
1053             throw syntaxError("Expected literal value");
1054         }
1055         token = decodeLiteral();
1056         if (token == JsonToken.STRING) {
1057           checkLenient();
1058         }
1059         return token;
1060     }
1061 
1062     /**
1063      * Assigns {@code nextToken} based on the value of {@code nextValue}.
1064      */
decodeLiteral()1065     private JsonToken decodeLiteral() throws IOException {
1066         if (valuePos == -1) {
1067             // it was too long to fit in the buffer so it can only be a string
1068             return JsonToken.STRING;
1069         } else if (valueLength == 4
1070                 && ('n' == buffer[valuePos    ] || 'N' == buffer[valuePos    ])
1071                 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1072                 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1073                 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1074             value = "null";
1075             return JsonToken.NULL;
1076         } else if (valueLength == 4
1077                 && ('t' == buffer[valuePos    ] || 'T' == buffer[valuePos    ])
1078                 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1079                 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1080                 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1081             value = TRUE;
1082             return JsonToken.BOOLEAN;
1083         } else if (valueLength == 5
1084                 && ('f' == buffer[valuePos    ] || 'F' == buffer[valuePos    ])
1085                 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1086                 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1087                 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1088                 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1089             value = FALSE;
1090             return JsonToken.BOOLEAN;
1091         } else {
1092             value = stringPool.get(buffer, valuePos, valueLength);
1093             return decodeNumber(buffer, valuePos, valueLength);
1094         }
1095     }
1096 
1097     /**
1098      * Determine whether the characters is a JSON number. Numbers are of the
1099      * form -12.34e+56. Fractional and exponential parts are optional. Leading
1100      * zeroes are not allowed in the value or exponential part, but are allowed
1101      * in the fraction.
1102      */
decodeNumber(char[] chars, int offset, int length)1103     private JsonToken decodeNumber(char[] chars, int offset, int length) {
1104         int i = offset;
1105         int c = chars[i];
1106 
1107         if (c == '-') {
1108             c = chars[++i];
1109         }
1110 
1111         if (c == '0') {
1112             c = chars[++i];
1113         } else if (c >= '1' && c <= '9') {
1114             c = chars[++i];
1115             while (c >= '0' && c <= '9') {
1116                 c = chars[++i];
1117             }
1118         } else {
1119             return JsonToken.STRING;
1120         }
1121 
1122         if (c == '.') {
1123             c = chars[++i];
1124             while (c >= '0' && c <= '9') {
1125                 c = chars[++i];
1126             }
1127         }
1128 
1129         if (c == 'e' || c == 'E') {
1130             c = chars[++i];
1131             if (c == '+' || c == '-') {
1132                 c = chars[++i];
1133             }
1134             if (c >= '0' && c <= '9') {
1135                 c = chars[++i];
1136                 while (c >= '0' && c <= '9') {
1137                     c = chars[++i];
1138                 }
1139             } else {
1140                 return JsonToken.STRING;
1141             }
1142         }
1143 
1144         if (i == offset + length) {
1145             return JsonToken.NUMBER;
1146         } else {
1147             return JsonToken.STRING;
1148         }
1149     }
1150 
1151     /**
1152      * Throws a new IO exception with the given message and a context snippet
1153      * with this reader's content.
1154      */
syntaxError(String message)1155     private IOException syntaxError(String message) throws IOException {
1156         throw new MalformedJsonException(message
1157                 + " at line " + getLineNumber() + " column " + getColumnNumber());
1158     }
1159 
getSnippet()1160     private CharSequence getSnippet() {
1161         StringBuilder snippet = new StringBuilder();
1162         int beforePos = Math.min(pos, 20);
1163         snippet.append(buffer, pos - beforePos, beforePos);
1164         int afterPos = Math.min(limit - pos, 20);
1165         snippet.append(buffer, pos, afterPos);
1166         return snippet;
1167     }
1168 }
1169