1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.util; 18 19 import com.android.internal.util.StringPool; 20 21 import java.io.Closeable; 22 import java.io.EOFException; 23 import java.io.IOException; 24 import java.io.Reader; 25 import java.util.ArrayList; 26 import java.util.List; 27 28 29 /** 30 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 31 * encoded value as a stream of tokens. This stream includes both literal 32 * values (strings, numbers, booleans, and nulls) as well as the begin and 33 * end delimiters of objects and arrays. The tokens are traversed in 34 * depth-first order, the same order that they appear in the JSON document. 35 * Within JSON objects, name/value pairs are represented by a single token. 36 * 37 * <h3>Parsing JSON</h3> 38 * To create a recursive descent parser for your own JSON streams, first create 39 * an entry point method that creates a {@code JsonReader}. 40 * 41 * <p>Next, create handler methods for each structure in your JSON text. You'll 42 * need a method for each object type and for each array type. 43 * <ul> 44 * <li>Within <strong>array handling</strong> methods, first call {@link 45 * #beginArray} to consume the array's opening bracket. Then create a 46 * while loop that accumulates values, terminating when {@link #hasNext} 47 * is false. Finally, read the array's closing bracket by calling {@link 48 * #endArray}. 49 * <li>Within <strong>object handling</strong> methods, first call {@link 50 * #beginObject} to consume the object's opening brace. Then create a 51 * while loop that assigns values to local variables based on their name. 52 * This loop should terminate when {@link #hasNext} is false. Finally, 53 * read the object's closing brace by calling {@link #endObject}. 54 * </ul> 55 * <p>When a nested object or array is encountered, delegate to the 56 * corresponding handler method. 57 * 58 * <p>When an unknown name is encountered, strict parsers should fail with an 59 * exception. Lenient parsers should call {@link #skipValue()} to recursively 60 * skip the value's nested tokens, which may otherwise conflict. 61 * 62 * <p>If a value may be null, you should first check using {@link #peek()}. 63 * Null literals can be consumed using either {@link #nextNull()} or {@link 64 * #skipValue()}. 65 * 66 * <h3>Example</h3> 67 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code 68 * [ 69 * { 70 * "id": 912345678901, 71 * "text": "How do I read JSON on Android?", 72 * "geo": null, 73 * "user": { 74 * "name": "android_newb", 75 * "followers_count": 41 76 * } 77 * }, 78 * { 79 * "id": 912345678902, 80 * "text": "@android_newb just use android.util.JsonReader!", 81 * "geo": [50.454722, -104.606667], 82 * "user": { 83 * "name": "jesse", 84 * "followers_count": 2 85 * } 86 * } 87 * ]}</pre> 88 * This code implements the parser for the above structure: <pre> {@code 89 * 90 * public List<Message> readJsonStream(InputStream in) throws IOException { 91 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8")); 92 * try { 93 * return readMessagesArray(reader); 94 * } finally { 95 * reader.close(); 96 * } 97 * } 98 * 99 * public List<Message> readMessagesArray(JsonReader reader) throws IOException { 100 * List<Message> messages = new ArrayList<Message>(); 101 * 102 * reader.beginArray(); 103 * while (reader.hasNext()) { 104 * messages.add(readMessage(reader)); 105 * } 106 * reader.endArray(); 107 * return messages; 108 * } 109 * 110 * public Message readMessage(JsonReader reader) throws IOException { 111 * long id = -1; 112 * String text = null; 113 * User user = null; 114 * List<Double> geo = null; 115 * 116 * reader.beginObject(); 117 * while (reader.hasNext()) { 118 * String name = reader.nextName(); 119 * if (name.equals("id")) { 120 * id = reader.nextLong(); 121 * } else if (name.equals("text")) { 122 * text = reader.nextString(); 123 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) { 124 * geo = readDoublesArray(reader); 125 * } else if (name.equals("user")) { 126 * user = readUser(reader); 127 * } else { 128 * reader.skipValue(); 129 * } 130 * } 131 * reader.endObject(); 132 * return new Message(id, text, user, geo); 133 * } 134 * 135 * public List<Double> readDoublesArray(JsonReader reader) throws IOException { 136 * List<Double> doubles = new ArrayList<Double>(); 137 * 138 * reader.beginArray(); 139 * while (reader.hasNext()) { 140 * doubles.add(reader.nextDouble()); 141 * } 142 * reader.endArray(); 143 * return doubles; 144 * } 145 * 146 * public User readUser(JsonReader reader) throws IOException { 147 * String username = null; 148 * int followersCount = -1; 149 * 150 * reader.beginObject(); 151 * while (reader.hasNext()) { 152 * String name = reader.nextName(); 153 * if (name.equals("name")) { 154 * username = reader.nextString(); 155 * } else if (name.equals("followers_count")) { 156 * followersCount = reader.nextInt(); 157 * } else { 158 * reader.skipValue(); 159 * } 160 * } 161 * reader.endObject(); 162 * return new User(username, followersCount); 163 * }}</pre> 164 * 165 * <h3>Number Handling</h3> 166 * This reader permits numeric values to be read as strings and string values to 167 * be read as numbers. For example, both elements of the JSON array {@code 168 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. 169 * This behavior is intended to prevent lossy numeric conversions: double is 170 * JavaScript's only numeric type and very large values like {@code 171 * 9007199254740993} cannot be represented exactly on that platform. To minimize 172 * precision loss, extremely large values should be written and read as strings 173 * in JSON. 174 * 175 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances 176 * of this class are not thread safe. 177 */ 178 public final class JsonReader implements Closeable { 179 180 private static final String TRUE = "true"; 181 private static final String FALSE = "false"; 182 183 private final StringPool stringPool = new StringPool(); 184 185 /** The input JSON. */ 186 private final Reader in; 187 188 /** True to accept non-spec compliant JSON */ 189 private boolean lenient = false; 190 191 /** 192 * Use a manual buffer to easily read and unread upcoming characters, and 193 * also so we can create strings without an intermediate StringBuilder. 194 * We decode literals directly out of this buffer, so it must be at least as 195 * long as the longest token that can be reported as a number. 196 */ 197 private final char[] buffer = new char[1024]; 198 private int pos = 0; 199 private int limit = 0; 200 201 /* 202 * The offset of the first character in the buffer. 203 */ 204 private int bufferStartLine = 1; 205 private int bufferStartColumn = 1; 206 207 private final List<JsonScope> stack = new ArrayList<JsonScope>(); 208 { 209 push(JsonScope.EMPTY_DOCUMENT); 210 } 211 212 /** 213 * The type of the next token to be returned by {@link #peek} and {@link 214 * #advance}. If null, peek() will assign a value. 215 */ 216 private JsonToken token; 217 218 /** The text of the next name. */ 219 private String name; 220 221 /* 222 * For the next literal value, we may have the text value, or the position 223 * and length in the buffer. 224 */ 225 private String value; 226 private int valuePos; 227 private int valueLength; 228 229 /** True if we're currently handling a skipValue() call. */ 230 private boolean skipping = false; 231 232 /** 233 * Creates a new instance that reads a JSON-encoded stream from {@code in}. 234 */ JsonReader(Reader in)235 public JsonReader(Reader in) { 236 if (in == null) { 237 throw new NullPointerException("in == null"); 238 } 239 this.in = in; 240 } 241 242 /** 243 * Configure this parser to be be liberal in what it accepts. By default, 244 * this parser is strict and only accepts JSON as specified by <a 245 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the 246 * parser to lenient causes it to ignore the following syntax errors: 247 * 248 * <ul> 249 * <li>End of line comments starting with {@code //} or {@code #} and 250 * ending with a newline character. 251 * <li>C-style comments starting with {@code /*} and ending with 252 * {@code *}{@code /}. Such comments may not be nested. 253 * <li>Names that are unquoted or {@code 'single quoted'}. 254 * <li>Strings that are unquoted or {@code 'single quoted'}. 255 * <li>Array elements separated by {@code ;} instead of {@code ,}. 256 * <li>Unnecessary array separators. These are interpreted as if null 257 * was the omitted value. 258 * <li>Names and values separated by {@code =} or {@code =>} instead of 259 * {@code :}. 260 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}. 261 * </ul> 262 */ setLenient(boolean lenient)263 public void setLenient(boolean lenient) { 264 this.lenient = lenient; 265 } 266 267 /** 268 * Returns true if this parser is liberal in what it accepts. 269 */ isLenient()270 public boolean isLenient() { 271 return lenient; 272 } 273 274 /** 275 * Consumes the next token from the JSON stream and asserts that it is the 276 * beginning of a new array. 277 */ beginArray()278 public void beginArray() throws IOException { 279 expect(JsonToken.BEGIN_ARRAY); 280 } 281 282 /** 283 * Consumes the next token from the JSON stream and asserts that it is the 284 * end of the current array. 285 */ endArray()286 public void endArray() throws IOException { 287 expect(JsonToken.END_ARRAY); 288 } 289 290 /** 291 * Consumes the next token from the JSON stream and asserts that it is the 292 * beginning of a new object. 293 */ beginObject()294 public void beginObject() throws IOException { 295 expect(JsonToken.BEGIN_OBJECT); 296 } 297 298 /** 299 * Consumes the next token from the JSON stream and asserts that it is the 300 * end of the current object. 301 */ endObject()302 public void endObject() throws IOException { 303 expect(JsonToken.END_OBJECT); 304 } 305 306 /** 307 * Consumes {@code expected}. 308 */ expect(JsonToken expected)309 private void expect(JsonToken expected) throws IOException { 310 peek(); 311 if (token != expected) { 312 throw new IllegalStateException("Expected " + expected + " but was " + peek()); 313 } 314 advance(); 315 } 316 317 /** 318 * Returns true if the current array or object has another element. 319 */ hasNext()320 public boolean hasNext() throws IOException { 321 peek(); 322 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY; 323 } 324 325 /** 326 * Returns the type of the next token without consuming it. 327 */ peek()328 public JsonToken peek() throws IOException { 329 if (token != null) { 330 return token; 331 } 332 333 switch (peekStack()) { 334 case EMPTY_DOCUMENT: 335 replaceTop(JsonScope.NONEMPTY_DOCUMENT); 336 JsonToken firstToken = nextValue(); 337 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) { 338 throw new IOException( 339 "Expected JSON document to start with '[' or '{' but was " + token); 340 } 341 return firstToken; 342 case EMPTY_ARRAY: 343 return nextInArray(true); 344 case NONEMPTY_ARRAY: 345 return nextInArray(false); 346 case EMPTY_OBJECT: 347 return nextInObject(true); 348 case DANGLING_NAME: 349 return objectValue(); 350 case NONEMPTY_OBJECT: 351 return nextInObject(false); 352 case NONEMPTY_DOCUMENT: 353 try { 354 JsonToken token = nextValue(); 355 if (lenient) { 356 return token; 357 } 358 throw syntaxError("Expected EOF"); 359 } catch (EOFException e) { 360 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here? 361 } 362 case CLOSED: 363 throw new IllegalStateException("JsonReader is closed"); 364 default: 365 throw new AssertionError(); 366 } 367 } 368 369 /** 370 * Advances the cursor in the JSON stream to the next token. 371 */ advance()372 private JsonToken advance() throws IOException { 373 peek(); 374 375 JsonToken result = token; 376 token = null; 377 value = null; 378 name = null; 379 return result; 380 } 381 382 /** 383 * Returns the next token, a {@link JsonToken#NAME property name}, and 384 * consumes it. 385 * 386 * @throws IOException if the next token in the stream is not a property 387 * name. 388 */ nextName()389 public String nextName() throws IOException { 390 peek(); 391 if (token != JsonToken.NAME) { 392 throw new IllegalStateException("Expected a name but was " + peek()); 393 } 394 String result = name; 395 advance(); 396 return result; 397 } 398 399 /** 400 * Returns the {@link JsonToken#STRING string} value of the next token, 401 * consuming it. If the next token is a number, this method will return its 402 * string form. 403 * 404 * @throws IllegalStateException if the next token is not a string or if 405 * this reader is closed. 406 */ nextString()407 public String nextString() throws IOException { 408 peek(); 409 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 410 throw new IllegalStateException("Expected a string but was " + peek()); 411 } 412 413 String result = value; 414 advance(); 415 return result; 416 } 417 418 /** 419 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token, 420 * consuming it. 421 * 422 * @throws IllegalStateException if the next token is not a boolean or if 423 * this reader is closed. 424 */ nextBoolean()425 public boolean nextBoolean() throws IOException { 426 peek(); 427 if (token != JsonToken.BOOLEAN) { 428 throw new IllegalStateException("Expected a boolean but was " + token); 429 } 430 431 boolean result = (value == TRUE); 432 advance(); 433 return result; 434 } 435 436 /** 437 * Consumes the next token from the JSON stream and asserts that it is a 438 * literal null. 439 * 440 * @throws IllegalStateException if the next token is not null or if this 441 * reader is closed. 442 */ nextNull()443 public void nextNull() throws IOException { 444 peek(); 445 if (token != JsonToken.NULL) { 446 throw new IllegalStateException("Expected null but was " + token); 447 } 448 449 advance(); 450 } 451 452 /** 453 * Returns the {@link JsonToken#NUMBER double} value of the next token, 454 * consuming it. If the next token is a string, this method will attempt to 455 * parse it as a double using {@link Double#parseDouble(String)}. 456 * 457 * @throws IllegalStateException if the next token is not a literal value. 458 */ nextDouble()459 public double nextDouble() throws IOException { 460 peek(); 461 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 462 throw new IllegalStateException("Expected a double but was " + token); 463 } 464 465 double result = Double.parseDouble(value); 466 advance(); 467 return result; 468 } 469 470 /** 471 * Returns the {@link JsonToken#NUMBER long} value of the next token, 472 * consuming it. If the next token is a string, this method will attempt to 473 * parse it as a long. If the next token's numeric value cannot be exactly 474 * represented by a Java {@code long}, this method throws. 475 * 476 * @throws IllegalStateException if the next token is not a literal value. 477 * @throws NumberFormatException if the next literal value cannot be parsed 478 * as a number, or exactly represented as a long. 479 */ nextLong()480 public long nextLong() throws IOException { 481 peek(); 482 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 483 throw new IllegalStateException("Expected a long but was " + token); 484 } 485 486 long result; 487 try { 488 result = Long.parseLong(value); 489 } catch (NumberFormatException ignored) { 490 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 491 result = (long) asDouble; 492 if ((double) result != asDouble) { 493 throw new NumberFormatException(value); 494 } 495 } 496 497 advance(); 498 return result; 499 } 500 501 /** 502 * Returns the {@link JsonToken#NUMBER int} value of the next token, 503 * consuming it. If the next token is a string, this method will attempt to 504 * parse it as an int. If the next token's numeric value cannot be exactly 505 * represented by a Java {@code int}, this method throws. 506 * 507 * @throws IllegalStateException if the next token is not a literal value. 508 * @throws NumberFormatException if the next literal value cannot be parsed 509 * as a number, or exactly represented as an int. 510 */ nextInt()511 public int nextInt() throws IOException { 512 peek(); 513 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 514 throw new IllegalStateException("Expected an int but was " + token); 515 } 516 517 int result; 518 try { 519 result = Integer.parseInt(value); 520 } catch (NumberFormatException ignored) { 521 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 522 result = (int) asDouble; 523 if ((double) result != asDouble) { 524 throw new NumberFormatException(value); 525 } 526 } 527 528 advance(); 529 return result; 530 } 531 532 /** 533 * Closes this JSON reader and the underlying {@link Reader}. 534 */ close()535 public void close() throws IOException { 536 value = null; 537 token = null; 538 stack.clear(); 539 stack.add(JsonScope.CLOSED); 540 in.close(); 541 } 542 543 /** 544 * Skips the next value recursively. If it is an object or array, all nested 545 * elements are skipped. This method is intended for use when the JSON token 546 * stream contains unrecognized or unhandled values. 547 */ skipValue()548 public void skipValue() throws IOException { 549 skipping = true; 550 try { 551 if (!hasNext() || peek() == JsonToken.END_DOCUMENT) { 552 throw new IllegalStateException("No element left to skip"); 553 } 554 int count = 0; 555 do { 556 JsonToken token = advance(); 557 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) { 558 count++; 559 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) { 560 count--; 561 } 562 } while (count != 0); 563 } finally { 564 skipping = false; 565 } 566 } 567 peekStack()568 private JsonScope peekStack() { 569 return stack.get(stack.size() - 1); 570 } 571 pop()572 private JsonScope pop() { 573 return stack.remove(stack.size() - 1); 574 } 575 push(JsonScope newTop)576 private void push(JsonScope newTop) { 577 stack.add(newTop); 578 } 579 580 /** 581 * Replace the value on the top of the stack with the given value. 582 */ replaceTop(JsonScope newTop)583 private void replaceTop(JsonScope newTop) { 584 stack.set(stack.size() - 1, newTop); 585 } 586 nextInArray(boolean firstElement)587 private JsonToken nextInArray(boolean firstElement) throws IOException { 588 if (firstElement) { 589 replaceTop(JsonScope.NONEMPTY_ARRAY); 590 } else { 591 /* Look for a comma before each element after the first element. */ 592 switch (nextNonWhitespace()) { 593 case ']': 594 pop(); 595 return token = JsonToken.END_ARRAY; 596 case ';': 597 checkLenient(); // fall-through 598 case ',': 599 break; 600 default: 601 throw syntaxError("Unterminated array"); 602 } 603 } 604 605 switch (nextNonWhitespace()) { 606 case ']': 607 if (firstElement) { 608 pop(); 609 return token = JsonToken.END_ARRAY; 610 } 611 // fall-through to handle ",]" 612 case ';': 613 case ',': 614 /* In lenient mode, a 0-length literal means 'null' */ 615 checkLenient(); 616 pos--; 617 value = "null"; 618 return token = JsonToken.NULL; 619 default: 620 pos--; 621 return nextValue(); 622 } 623 } 624 nextInObject(boolean firstElement)625 private JsonToken nextInObject(boolean firstElement) throws IOException { 626 /* 627 * Read delimiters. Either a comma/semicolon separating this and the 628 * previous name-value pair, or a close brace to denote the end of the 629 * object. 630 */ 631 if (firstElement) { 632 /* Peek to see if this is the empty object. */ 633 switch (nextNonWhitespace()) { 634 case '}': 635 pop(); 636 return token = JsonToken.END_OBJECT; 637 default: 638 pos--; 639 } 640 } else { 641 switch (nextNonWhitespace()) { 642 case '}': 643 pop(); 644 return token = JsonToken.END_OBJECT; 645 case ';': 646 case ',': 647 break; 648 default: 649 throw syntaxError("Unterminated object"); 650 } 651 } 652 653 /* Read the name. */ 654 int quote = nextNonWhitespace(); 655 switch (quote) { 656 case '\'': 657 checkLenient(); // fall-through 658 case '"': 659 name = nextString((char) quote); 660 break; 661 default: 662 checkLenient(); 663 pos--; 664 name = nextLiteral(false); 665 if (name.isEmpty()) { 666 throw syntaxError("Expected name"); 667 } 668 } 669 670 replaceTop(JsonScope.DANGLING_NAME); 671 return token = JsonToken.NAME; 672 } 673 objectValue()674 private JsonToken objectValue() throws IOException { 675 /* 676 * Read the name/value separator. Usually a colon ':'. In lenient mode 677 * we also accept an equals sign '=', or an arrow "=>". 678 */ 679 switch (nextNonWhitespace()) { 680 case ':': 681 break; 682 case '=': 683 checkLenient(); 684 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') { 685 pos++; 686 } 687 break; 688 default: 689 throw syntaxError("Expected ':'"); 690 } 691 692 replaceTop(JsonScope.NONEMPTY_OBJECT); 693 return nextValue(); 694 } 695 nextValue()696 private JsonToken nextValue() throws IOException { 697 int c = nextNonWhitespace(); 698 switch (c) { 699 case '{': 700 push(JsonScope.EMPTY_OBJECT); 701 return token = JsonToken.BEGIN_OBJECT; 702 703 case '[': 704 push(JsonScope.EMPTY_ARRAY); 705 return token = JsonToken.BEGIN_ARRAY; 706 707 case '\'': 708 checkLenient(); // fall-through 709 case '"': 710 value = nextString((char) c); 711 return token = JsonToken.STRING; 712 713 default: 714 pos--; 715 return readLiteral(); 716 } 717 } 718 719 /** 720 * Returns true once {@code limit - pos >= minimum}. If the data is 721 * exhausted before that many characters are available, this returns 722 * false. 723 */ fillBuffer(int minimum)724 private boolean fillBuffer(int minimum) throws IOException { 725 // Before clobbering the old characters, update where buffer starts 726 for (int i = 0; i < pos; i++) { 727 if (buffer[i] == '\n') { 728 bufferStartLine++; 729 bufferStartColumn = 1; 730 } else { 731 bufferStartColumn++; 732 } 733 } 734 735 if (limit != pos) { 736 limit -= pos; 737 System.arraycopy(buffer, pos, buffer, 0, limit); 738 } else { 739 limit = 0; 740 } 741 742 pos = 0; 743 int total; 744 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) { 745 limit += total; 746 747 // if this is the first read, consume an optional byte order mark (BOM) if it exists 748 if (bufferStartLine == 1 && bufferStartColumn == 1 749 && limit > 0 && buffer[0] == '\ufeff') { 750 pos++; 751 bufferStartColumn--; 752 } 753 754 if (limit >= minimum) { 755 return true; 756 } 757 } 758 return false; 759 } 760 getLineNumber()761 private int getLineNumber() { 762 int result = bufferStartLine; 763 for (int i = 0; i < pos; i++) { 764 if (buffer[i] == '\n') { 765 result++; 766 } 767 } 768 return result; 769 } 770 getColumnNumber()771 private int getColumnNumber() { 772 int result = bufferStartColumn; 773 for (int i = 0; i < pos; i++) { 774 if (buffer[i] == '\n') { 775 result = 1; 776 } else { 777 result++; 778 } 779 } 780 return result; 781 } 782 nextNonWhitespace()783 private int nextNonWhitespace() throws IOException { 784 while (pos < limit || fillBuffer(1)) { 785 int c = buffer[pos++]; 786 switch (c) { 787 case '\t': 788 case ' ': 789 case '\n': 790 case '\r': 791 continue; 792 793 case '/': 794 if (pos == limit && !fillBuffer(1)) { 795 return c; 796 } 797 798 checkLenient(); 799 char peek = buffer[pos]; 800 switch (peek) { 801 case '*': 802 // skip a /* c-style comment */ 803 pos++; 804 if (!skipTo("*/")) { 805 throw syntaxError("Unterminated comment"); 806 } 807 pos += 2; 808 continue; 809 810 case '/': 811 // skip a // end-of-line comment 812 pos++; 813 skipToEndOfLine(); 814 continue; 815 816 default: 817 return c; 818 } 819 820 case '#': 821 /* 822 * Skip a # hash end-of-line comment. The JSON RFC doesn't 823 * specify this behaviour, but it's required to parse 824 * existing documents. See http://b/2571423. 825 */ 826 checkLenient(); 827 skipToEndOfLine(); 828 continue; 829 830 default: 831 return c; 832 } 833 } 834 835 throw new EOFException("End of input"); 836 } 837 checkLenient()838 private void checkLenient() throws IOException { 839 if (!lenient) { 840 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON"); 841 } 842 } 843 844 /** 845 * Advances the position until after the next newline character. If the line 846 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 847 * caller. 848 */ skipToEndOfLine()849 private void skipToEndOfLine() throws IOException { 850 while (pos < limit || fillBuffer(1)) { 851 char c = buffer[pos++]; 852 if (c == '\r' || c == '\n') { 853 break; 854 } 855 } 856 } 857 skipTo(String toFind)858 private boolean skipTo(String toFind) throws IOException { 859 outer: 860 for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) { 861 for (int c = 0; c < toFind.length(); c++) { 862 if (buffer[pos + c] != toFind.charAt(c)) { 863 continue outer; 864 } 865 } 866 return true; 867 } 868 return false; 869 } 870 871 /** 872 * Returns the string up to but not including {@code quote}, unescaping any 873 * character escape sequences encountered along the way. The opening quote 874 * should have already been read. This consumes the closing quote, but does 875 * not include it in the returned string. 876 * 877 * @param quote either ' or ". 878 * @throws NumberFormatException if any unicode escape sequences are 879 * malformed. 880 */ nextString(char quote)881 private String nextString(char quote) throws IOException { 882 StringBuilder builder = null; 883 do { 884 /* the index of the first character not yet appended to the builder. */ 885 int start = pos; 886 while (pos < limit) { 887 int c = buffer[pos++]; 888 889 if (c == quote) { 890 if (skipping) { 891 return "skipped!"; 892 } else if (builder == null) { 893 return stringPool.get(buffer, start, pos - start - 1); 894 } else { 895 builder.append(buffer, start, pos - start - 1); 896 return builder.toString(); 897 } 898 899 } else if (c == '\\') { 900 if (builder == null) { 901 builder = new StringBuilder(); 902 } 903 builder.append(buffer, start, pos - start - 1); 904 builder.append(readEscapeCharacter()); 905 start = pos; 906 } 907 } 908 909 if (builder == null) { 910 builder = new StringBuilder(); 911 } 912 builder.append(buffer, start, pos - start); 913 } while (fillBuffer(1)); 914 915 throw syntaxError("Unterminated string"); 916 } 917 918 /** 919 * Reads the value up to but not including any delimiter characters. This 920 * does not consume the delimiter character. 921 * 922 * @param assignOffsetsOnly true for this method to only set the valuePos 923 * and valueLength fields and return a null result. This only works if 924 * the literal is short; a string is returned otherwise. 925 */ nextLiteral(boolean assignOffsetsOnly)926 private String nextLiteral(boolean assignOffsetsOnly) throws IOException { 927 StringBuilder builder = null; 928 valuePos = -1; 929 valueLength = 0; 930 int i = 0; 931 932 findNonLiteralCharacter: 933 while (true) { 934 for (; pos + i < limit; i++) { 935 switch (buffer[pos + i]) { 936 case '/': 937 case '\\': 938 case ';': 939 case '#': 940 case '=': 941 checkLenient(); // fall-through 942 case '{': 943 case '}': 944 case '[': 945 case ']': 946 case ':': 947 case ',': 948 case ' ': 949 case '\t': 950 case '\f': 951 case '\r': 952 case '\n': 953 break findNonLiteralCharacter; 954 } 955 } 956 957 /* 958 * Attempt to load the entire literal into the buffer at once. If 959 * we run out of input, add a non-literal character at the end so 960 * that decoding doesn't need to do bounds checks. 961 */ 962 if (i < buffer.length) { 963 if (fillBuffer(i + 1)) { 964 continue; 965 } else { 966 buffer[limit] = '\0'; 967 break; 968 } 969 } 970 971 // use a StringBuilder when the value is too long. It must be an unquoted string. 972 if (builder == null) { 973 builder = new StringBuilder(); 974 } 975 builder.append(buffer, pos, i); 976 valueLength += i; 977 pos += i; 978 i = 0; 979 if (!fillBuffer(1)) { 980 break; 981 } 982 } 983 984 String result; 985 if (assignOffsetsOnly && builder == null) { 986 valuePos = pos; 987 result = null; 988 } else if (skipping) { 989 result = "skipped!"; 990 } else if (builder == null) { 991 result = stringPool.get(buffer, pos, i); 992 } else { 993 builder.append(buffer, pos, i); 994 result = builder.toString(); 995 } 996 valueLength += i; 997 pos += i; 998 return result; 999 } 1000 toString()1001 @Override public String toString() { 1002 return getClass().getSimpleName() + " near " + getSnippet(); 1003 } 1004 1005 /** 1006 * Unescapes the character identified by the character or characters that 1007 * immediately follow a backslash. The backslash '\' should have already 1008 * been read. This supports both unicode escapes "u000A" and two-character 1009 * escapes "\n". 1010 * 1011 * @throws NumberFormatException if any unicode escape sequences are 1012 * malformed. 1013 */ readEscapeCharacter()1014 private char readEscapeCharacter() throws IOException { 1015 if (pos == limit && !fillBuffer(1)) { 1016 throw syntaxError("Unterminated escape sequence"); 1017 } 1018 1019 char escaped = buffer[pos++]; 1020 switch (escaped) { 1021 case 'u': 1022 if (pos + 4 > limit && !fillBuffer(4)) { 1023 throw syntaxError("Unterminated escape sequence"); 1024 } 1025 String hex = stringPool.get(buffer, pos, 4); 1026 pos += 4; 1027 return (char) Integer.parseInt(hex, 16); 1028 1029 case 't': 1030 return '\t'; 1031 1032 case 'b': 1033 return '\b'; 1034 1035 case 'n': 1036 return '\n'; 1037 1038 case 'r': 1039 return '\r'; 1040 1041 case 'f': 1042 return '\f'; 1043 1044 case '\'': 1045 case '"': 1046 case '\\': 1047 default: 1048 return escaped; 1049 } 1050 } 1051 1052 /** 1053 * Reads a null, boolean, numeric or unquoted string literal value. 1054 */ readLiteral()1055 private JsonToken readLiteral() throws IOException { 1056 value = nextLiteral(true); 1057 if (valueLength == 0) { 1058 throw syntaxError("Expected literal value"); 1059 } 1060 token = decodeLiteral(); 1061 if (token == JsonToken.STRING) { 1062 checkLenient(); 1063 } 1064 return token; 1065 } 1066 1067 /** 1068 * Assigns {@code nextToken} based on the value of {@code nextValue}. 1069 */ decodeLiteral()1070 private JsonToken decodeLiteral() throws IOException { 1071 if (valuePos == -1) { 1072 // it was too long to fit in the buffer so it can only be a string 1073 return JsonToken.STRING; 1074 } else if (valueLength == 4 1075 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ]) 1076 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1]) 1077 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1078 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) { 1079 value = "null"; 1080 return JsonToken.NULL; 1081 } else if (valueLength == 4 1082 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ]) 1083 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1]) 1084 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2]) 1085 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) { 1086 value = TRUE; 1087 return JsonToken.BOOLEAN; 1088 } else if (valueLength == 5 1089 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ]) 1090 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1]) 1091 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1092 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3]) 1093 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) { 1094 value = FALSE; 1095 return JsonToken.BOOLEAN; 1096 } else { 1097 value = stringPool.get(buffer, valuePos, valueLength); 1098 return decodeNumber(buffer, valuePos, valueLength); 1099 } 1100 } 1101 1102 /** 1103 * Determine whether the characters is a JSON number. Numbers are of the 1104 * form -12.34e+56. Fractional and exponential parts are optional. Leading 1105 * zeroes are not allowed in the value or exponential part, but are allowed 1106 * in the fraction. 1107 */ decodeNumber(char[] chars, int offset, int length)1108 private JsonToken decodeNumber(char[] chars, int offset, int length) { 1109 int i = offset; 1110 int c = chars[i]; 1111 1112 if (c == '-') { 1113 c = chars[++i]; 1114 } 1115 1116 if (c == '0') { 1117 c = chars[++i]; 1118 } else if (c >= '1' && c <= '9') { 1119 c = chars[++i]; 1120 while (c >= '0' && c <= '9') { 1121 c = chars[++i]; 1122 } 1123 } else { 1124 return JsonToken.STRING; 1125 } 1126 1127 if (c == '.') { 1128 c = chars[++i]; 1129 while (c >= '0' && c <= '9') { 1130 c = chars[++i]; 1131 } 1132 } 1133 1134 if (c == 'e' || c == 'E') { 1135 c = chars[++i]; 1136 if (c == '+' || c == '-') { 1137 c = chars[++i]; 1138 } 1139 if (c >= '0' && c <= '9') { 1140 c = chars[++i]; 1141 while (c >= '0' && c <= '9') { 1142 c = chars[++i]; 1143 } 1144 } else { 1145 return JsonToken.STRING; 1146 } 1147 } 1148 1149 if (i == offset + length) { 1150 return JsonToken.NUMBER; 1151 } else { 1152 return JsonToken.STRING; 1153 } 1154 } 1155 1156 /** 1157 * Throws a new IO exception with the given message and a context snippet 1158 * with this reader's content. 1159 */ syntaxError(String message)1160 private IOException syntaxError(String message) throws IOException { 1161 throw new MalformedJsonException(message 1162 + " at line " + getLineNumber() + " column " + getColumnNumber()); 1163 } 1164 getSnippet()1165 private CharSequence getSnippet() { 1166 StringBuilder snippet = new StringBuilder(); 1167 int beforePos = Math.min(pos, 20); 1168 snippet.append(buffer, pos - beforePos, beforePos); 1169 int afterPos = Math.min(limit - pos, 20); 1170 snippet.append(buffer, pos, afterPos); 1171 return snippet; 1172 } 1173 } 1174