1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.util; 18 19 import java.io.Closeable; 20 import java.io.EOFException; 21 import java.io.IOException; 22 import java.io.Reader; 23 import java.util.ArrayList; 24 import java.util.List; 25 import libcore.internal.StringPool; 26 27 /** 28 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 29 * encoded value as a stream of tokens. This stream includes both literal 30 * values (strings, numbers, booleans, and nulls) as well as the begin and 31 * end delimiters of objects and arrays. The tokens are traversed in 32 * depth-first order, the same order that they appear in the JSON document. 33 * Within JSON objects, name/value pairs are represented by a single token. 34 * 35 * <h3>Parsing JSON</h3> 36 * To create a recursive descent parser for your own JSON streams, first create 37 * an entry point method that creates a {@code JsonReader}. 38 * 39 * <p>Next, create handler methods for each structure in your JSON text. You'll 40 * need a method for each object type and for each array type. 41 * <ul> 42 * <li>Within <strong>array handling</strong> methods, first call {@link 43 * #beginArray} to consume the array's opening bracket. Then create a 44 * while loop that accumulates values, terminating when {@link #hasNext} 45 * is false. Finally, read the array's closing bracket by calling {@link 46 * #endArray}. 47 * <li>Within <strong>object handling</strong> methods, first call {@link 48 * #beginObject} to consume the object's opening brace. Then create a 49 * while loop that assigns values to local variables based on their name. 50 * This loop should terminate when {@link #hasNext} is false. Finally, 51 * read the object's closing brace by calling {@link #endObject}. 52 * </ul> 53 * <p>When a nested object or array is encountered, delegate to the 54 * corresponding handler method. 55 * 56 * <p>When an unknown name is encountered, strict parsers should fail with an 57 * exception. Lenient parsers should call {@link #skipValue()} to recursively 58 * skip the value's nested tokens, which may otherwise conflict. 59 * 60 * <p>If a value may be null, you should first check using {@link #peek()}. 61 * Null literals can be consumed using either {@link #nextNull()} or {@link 62 * #skipValue()}. 63 * 64 * <h3>Example</h3> 65 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code 66 * [ 67 * { 68 * "id": 912345678901, 69 * "text": "How do I read JSON on Android?", 70 * "geo": null, 71 * "user": { 72 * "name": "android_newb", 73 * "followers_count": 41 74 * } 75 * }, 76 * { 77 * "id": 912345678902, 78 * "text": "@android_newb just use android.util.JsonReader!", 79 * "geo": [50.454722, -104.606667], 80 * "user": { 81 * "name": "jesse", 82 * "followers_count": 2 83 * } 84 * } 85 * ]}</pre> 86 * This code implements the parser for the above structure: <pre> {@code 87 * 88 * public List<Message> readJsonStream(InputStream in) throws IOException { 89 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8")); 90 * try { 91 * return readMessagesArray(reader); 92 * } finally { 93 * reader.close(); 94 * } 95 * } 96 * 97 * public List<Message> readMessagesArray(JsonReader reader) throws IOException { 98 * List<Message> messages = new ArrayList<Message>(); 99 * 100 * reader.beginArray(); 101 * while (reader.hasNext()) { 102 * messages.add(readMessage(reader)); 103 * } 104 * reader.endArray(); 105 * return messages; 106 * } 107 * 108 * public Message readMessage(JsonReader reader) throws IOException { 109 * long id = -1; 110 * String text = null; 111 * User user = null; 112 * List<Double> geo = null; 113 * 114 * reader.beginObject(); 115 * while (reader.hasNext()) { 116 * String name = reader.nextName(); 117 * if (name.equals("id")) { 118 * id = reader.nextLong(); 119 * } else if (name.equals("text")) { 120 * text = reader.nextString(); 121 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) { 122 * geo = readDoublesArray(reader); 123 * } else if (name.equals("user")) { 124 * user = readUser(reader); 125 * } else { 126 * reader.skipValue(); 127 * } 128 * } 129 * reader.endObject(); 130 * return new Message(id, text, user, geo); 131 * } 132 * 133 * public List<Double> readDoublesArray(JsonReader reader) throws IOException { 134 * List<Double> doubles = new ArrayList<Double>(); 135 * 136 * reader.beginArray(); 137 * while (reader.hasNext()) { 138 * doubles.add(reader.nextDouble()); 139 * } 140 * reader.endArray(); 141 * return doubles; 142 * } 143 * 144 * public User readUser(JsonReader reader) throws IOException { 145 * String username = null; 146 * int followersCount = -1; 147 * 148 * reader.beginObject(); 149 * while (reader.hasNext()) { 150 * String name = reader.nextName(); 151 * if (name.equals("name")) { 152 * username = reader.nextString(); 153 * } else if (name.equals("followers_count")) { 154 * followersCount = reader.nextInt(); 155 * } else { 156 * reader.skipValue(); 157 * } 158 * } 159 * reader.endObject(); 160 * return new User(username, followersCount); 161 * }}</pre> 162 * 163 * <h3>Number Handling</h3> 164 * This reader permits numeric values to be read as strings and string values to 165 * be read as numbers. For example, both elements of the JSON array {@code 166 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. 167 * This behavior is intended to prevent lossy numeric conversions: double is 168 * JavaScript's only numeric type and very large values like {@code 169 * 9007199254740993} cannot be represented exactly on that platform. To minimize 170 * precision loss, extremely large values should be written and read as strings 171 * in JSON. 172 * 173 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances 174 * of this class are not thread safe. 175 */ 176 public final class JsonReader implements Closeable { 177 178 private static final String TRUE = "true"; 179 private static final String FALSE = "false"; 180 181 private final StringPool stringPool = new StringPool(); 182 183 /** The input JSON. */ 184 private final Reader in; 185 186 /** True to accept non-spec compliant JSON */ 187 private boolean lenient = false; 188 189 /** 190 * Use a manual buffer to easily read and unread upcoming characters, and 191 * also so we can create strings without an intermediate StringBuilder. 192 * We decode literals directly out of this buffer, so it must be at least as 193 * long as the longest token that can be reported as a number. 194 */ 195 private final char[] buffer = new char[1024]; 196 private int pos = 0; 197 private int limit = 0; 198 199 /* 200 * The offset of the first character in the buffer. 201 */ 202 private int bufferStartLine = 1; 203 private int bufferStartColumn = 1; 204 205 private final List<JsonScope> stack = new ArrayList<JsonScope>(); 206 { 207 push(JsonScope.EMPTY_DOCUMENT); 208 } 209 210 /** 211 * The type of the next token to be returned by {@link #peek} and {@link 212 * #advance}. If null, peek() will assign a value. 213 */ 214 private JsonToken token; 215 216 /** The text of the next name. */ 217 private String name; 218 219 /* 220 * For the next literal value, we may have the text value, or the position 221 * and length in the buffer. 222 */ 223 private String value; 224 private int valuePos; 225 private int valueLength; 226 227 /** True if we're currently handling a skipValue() call. */ 228 private boolean skipping = false; 229 230 /** 231 * Creates a new instance that reads a JSON-encoded stream from {@code in}. 232 */ JsonReader(Reader in)233 public JsonReader(Reader in) { 234 if (in == null) { 235 throw new NullPointerException("in == null"); 236 } 237 this.in = in; 238 } 239 240 /** 241 * Configure this parser to be be liberal in what it accepts. By default, 242 * this parser is strict and only accepts JSON as specified by <a 243 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the 244 * parser to lenient causes it to ignore the following syntax errors: 245 * 246 * <ul> 247 * <li>End of line comments starting with {@code //} or {@code #} and 248 * ending with a newline character. 249 * <li>C-style comments starting with {@code /*} and ending with 250 * {@code *}{@code /}. Such comments may not be nested. 251 * <li>Names that are unquoted or {@code 'single quoted'}. 252 * <li>Strings that are unquoted or {@code 'single quoted'}. 253 * <li>Array elements separated by {@code ;} instead of {@code ,}. 254 * <li>Unnecessary array separators. These are interpreted as if null 255 * was the omitted value. 256 * <li>Names and values separated by {@code =} or {@code =>} instead of 257 * {@code :}. 258 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}. 259 * </ul> 260 */ setLenient(boolean lenient)261 public void setLenient(boolean lenient) { 262 this.lenient = lenient; 263 } 264 265 /** 266 * Returns true if this parser is liberal in what it accepts. 267 */ isLenient()268 public boolean isLenient() { 269 return lenient; 270 } 271 272 /** 273 * Consumes the next token from the JSON stream and asserts that it is the 274 * beginning of a new array. 275 */ beginArray()276 public void beginArray() throws IOException { 277 expect(JsonToken.BEGIN_ARRAY); 278 } 279 280 /** 281 * Consumes the next token from the JSON stream and asserts that it is the 282 * end of the current array. 283 */ endArray()284 public void endArray() throws IOException { 285 expect(JsonToken.END_ARRAY); 286 } 287 288 /** 289 * Consumes the next token from the JSON stream and asserts that it is the 290 * beginning of a new object. 291 */ beginObject()292 public void beginObject() throws IOException { 293 expect(JsonToken.BEGIN_OBJECT); 294 } 295 296 /** 297 * Consumes the next token from the JSON stream and asserts that it is the 298 * end of the current array. 299 */ endObject()300 public void endObject() throws IOException { 301 expect(JsonToken.END_OBJECT); 302 } 303 304 /** 305 * Consumes {@code expected}. 306 */ expect(JsonToken expected)307 private void expect(JsonToken expected) throws IOException { 308 peek(); 309 if (token != expected) { 310 throw new IllegalStateException("Expected " + expected + " but was " + peek()); 311 } 312 advance(); 313 } 314 315 /** 316 * Returns true if the current array or object has another element. 317 */ hasNext()318 public boolean hasNext() throws IOException { 319 peek(); 320 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY; 321 } 322 323 /** 324 * Returns the type of the next token without consuming it. 325 */ peek()326 public JsonToken peek() throws IOException { 327 if (token != null) { 328 return token; 329 } 330 331 switch (peekStack()) { 332 case EMPTY_DOCUMENT: 333 replaceTop(JsonScope.NONEMPTY_DOCUMENT); 334 JsonToken firstToken = nextValue(); 335 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) { 336 throw new IOException( 337 "Expected JSON document to start with '[' or '{' but was " + token); 338 } 339 return firstToken; 340 case EMPTY_ARRAY: 341 return nextInArray(true); 342 case NONEMPTY_ARRAY: 343 return nextInArray(false); 344 case EMPTY_OBJECT: 345 return nextInObject(true); 346 case DANGLING_NAME: 347 return objectValue(); 348 case NONEMPTY_OBJECT: 349 return nextInObject(false); 350 case NONEMPTY_DOCUMENT: 351 try { 352 JsonToken token = nextValue(); 353 if (lenient) { 354 return token; 355 } 356 throw syntaxError("Expected EOF"); 357 } catch (EOFException e) { 358 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here? 359 } 360 case CLOSED: 361 throw new IllegalStateException("JsonReader is closed"); 362 default: 363 throw new AssertionError(); 364 } 365 } 366 367 /** 368 * Advances the cursor in the JSON stream to the next token. 369 */ advance()370 private JsonToken advance() throws IOException { 371 peek(); 372 373 JsonToken result = token; 374 token = null; 375 value = null; 376 name = null; 377 return result; 378 } 379 380 /** 381 * Returns the next token, a {@link JsonToken#NAME property name}, and 382 * consumes it. 383 * 384 * @throws IOException if the next token in the stream is not a property 385 * name. 386 */ nextName()387 public String nextName() throws IOException { 388 peek(); 389 if (token != JsonToken.NAME) { 390 throw new IllegalStateException("Expected a name but was " + peek()); 391 } 392 String result = name; 393 advance(); 394 return result; 395 } 396 397 /** 398 * Returns the {@link JsonToken#STRING string} value of the next token, 399 * consuming it. If the next token is a number, this method will return its 400 * string form. 401 * 402 * @throws IllegalStateException if the next token is not a string or if 403 * this reader is closed. 404 */ nextString()405 public String nextString() throws IOException { 406 peek(); 407 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 408 throw new IllegalStateException("Expected a string but was " + peek()); 409 } 410 411 String result = value; 412 advance(); 413 return result; 414 } 415 416 /** 417 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token, 418 * consuming it. 419 * 420 * @throws IllegalStateException if the next token is not a boolean or if 421 * this reader is closed. 422 */ nextBoolean()423 public boolean nextBoolean() throws IOException { 424 peek(); 425 if (token != JsonToken.BOOLEAN) { 426 throw new IllegalStateException("Expected a boolean but was " + token); 427 } 428 429 boolean result = (value == TRUE); 430 advance(); 431 return result; 432 } 433 434 /** 435 * Consumes the next token from the JSON stream and asserts that it is a 436 * literal null. 437 * 438 * @throws IllegalStateException if the next token is not null or if this 439 * reader is closed. 440 */ nextNull()441 public void nextNull() throws IOException { 442 peek(); 443 if (token != JsonToken.NULL) { 444 throw new IllegalStateException("Expected null but was " + token); 445 } 446 447 advance(); 448 } 449 450 /** 451 * Returns the {@link JsonToken#NUMBER double} value of the next token, 452 * consuming it. If the next token is a string, this method will attempt to 453 * parse it as a double using {@link Double#parseDouble(String)}. 454 * 455 * @throws IllegalStateException if the next token is not a literal value. 456 */ nextDouble()457 public double nextDouble() throws IOException { 458 peek(); 459 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 460 throw new IllegalStateException("Expected a double but was " + token); 461 } 462 463 double result = Double.parseDouble(value); 464 advance(); 465 return result; 466 } 467 468 /** 469 * Returns the {@link JsonToken#NUMBER long} value of the next token, 470 * consuming it. If the next token is a string, this method will attempt to 471 * parse it as a long. If the next token's numeric value cannot be exactly 472 * represented by a Java {@code long}, this method throws. 473 * 474 * @throws IllegalStateException if the next token is not a literal value. 475 * @throws NumberFormatException if the next literal value cannot be parsed 476 * as a number, or exactly represented as a long. 477 */ nextLong()478 public long nextLong() throws IOException { 479 peek(); 480 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 481 throw new IllegalStateException("Expected a long but was " + token); 482 } 483 484 long result; 485 try { 486 result = Long.parseLong(value); 487 } catch (NumberFormatException ignored) { 488 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 489 result = (long) asDouble; 490 if ((double) result != asDouble) { 491 throw new NumberFormatException(value); 492 } 493 } 494 495 advance(); 496 return result; 497 } 498 499 /** 500 * Returns the {@link JsonToken#NUMBER int} value of the next token, 501 * consuming it. If the next token is a string, this method will attempt to 502 * parse it as an int. If the next token's numeric value cannot be exactly 503 * represented by a Java {@code int}, this method throws. 504 * 505 * @throws IllegalStateException if the next token is not a literal value. 506 * @throws NumberFormatException if the next literal value cannot be parsed 507 * as a number, or exactly represented as an int. 508 */ nextInt()509 public int nextInt() throws IOException { 510 peek(); 511 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 512 throw new IllegalStateException("Expected an int but was " + token); 513 } 514 515 int result; 516 try { 517 result = Integer.parseInt(value); 518 } catch (NumberFormatException ignored) { 519 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 520 result = (int) asDouble; 521 if ((double) result != asDouble) { 522 throw new NumberFormatException(value); 523 } 524 } 525 526 advance(); 527 return result; 528 } 529 530 /** 531 * Closes this JSON reader and the underlying {@link Reader}. 532 */ close()533 public void close() throws IOException { 534 value = null; 535 token = null; 536 stack.clear(); 537 stack.add(JsonScope.CLOSED); 538 in.close(); 539 } 540 541 /** 542 * Skips the next value recursively. If it is an object or array, all nested 543 * elements are skipped. This method is intended for use when the JSON token 544 * stream contains unrecognized or unhandled values. 545 */ skipValue()546 public void skipValue() throws IOException { 547 skipping = true; 548 try { 549 int count = 0; 550 do { 551 JsonToken token = advance(); 552 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) { 553 count++; 554 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) { 555 count--; 556 } 557 } while (count != 0); 558 } finally { 559 skipping = false; 560 } 561 } 562 peekStack()563 private JsonScope peekStack() { 564 return stack.get(stack.size() - 1); 565 } 566 pop()567 private JsonScope pop() { 568 return stack.remove(stack.size() - 1); 569 } 570 push(JsonScope newTop)571 private void push(JsonScope newTop) { 572 stack.add(newTop); 573 } 574 575 /** 576 * Replace the value on the top of the stack with the given value. 577 */ replaceTop(JsonScope newTop)578 private void replaceTop(JsonScope newTop) { 579 stack.set(stack.size() - 1, newTop); 580 } 581 nextInArray(boolean firstElement)582 private JsonToken nextInArray(boolean firstElement) throws IOException { 583 if (firstElement) { 584 replaceTop(JsonScope.NONEMPTY_ARRAY); 585 } else { 586 /* Look for a comma before each element after the first element. */ 587 switch (nextNonWhitespace()) { 588 case ']': 589 pop(); 590 return token = JsonToken.END_ARRAY; 591 case ';': 592 checkLenient(); // fall-through 593 case ',': 594 break; 595 default: 596 throw syntaxError("Unterminated array"); 597 } 598 } 599 600 switch (nextNonWhitespace()) { 601 case ']': 602 if (firstElement) { 603 pop(); 604 return token = JsonToken.END_ARRAY; 605 } 606 // fall-through to handle ",]" 607 case ';': 608 case ',': 609 /* In lenient mode, a 0-length literal means 'null' */ 610 checkLenient(); 611 pos--; 612 value = "null"; 613 return token = JsonToken.NULL; 614 default: 615 pos--; 616 return nextValue(); 617 } 618 } 619 nextInObject(boolean firstElement)620 private JsonToken nextInObject(boolean firstElement) throws IOException { 621 /* 622 * Read delimiters. Either a comma/semicolon separating this and the 623 * previous name-value pair, or a close brace to denote the end of the 624 * object. 625 */ 626 if (firstElement) { 627 /* Peek to see if this is the empty object. */ 628 switch (nextNonWhitespace()) { 629 case '}': 630 pop(); 631 return token = JsonToken.END_OBJECT; 632 default: 633 pos--; 634 } 635 } else { 636 switch (nextNonWhitespace()) { 637 case '}': 638 pop(); 639 return token = JsonToken.END_OBJECT; 640 case ';': 641 case ',': 642 break; 643 default: 644 throw syntaxError("Unterminated object"); 645 } 646 } 647 648 /* Read the name. */ 649 int quote = nextNonWhitespace(); 650 switch (quote) { 651 case '\'': 652 checkLenient(); // fall-through 653 case '"': 654 name = nextString((char) quote); 655 break; 656 default: 657 checkLenient(); 658 pos--; 659 name = nextLiteral(false); 660 if (name.isEmpty()) { 661 throw syntaxError("Expected name"); 662 } 663 } 664 665 replaceTop(JsonScope.DANGLING_NAME); 666 return token = JsonToken.NAME; 667 } 668 objectValue()669 private JsonToken objectValue() throws IOException { 670 /* 671 * Read the name/value separator. Usually a colon ':'. In lenient mode 672 * we also accept an equals sign '=', or an arrow "=>". 673 */ 674 switch (nextNonWhitespace()) { 675 case ':': 676 break; 677 case '=': 678 checkLenient(); 679 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') { 680 pos++; 681 } 682 break; 683 default: 684 throw syntaxError("Expected ':'"); 685 } 686 687 replaceTop(JsonScope.NONEMPTY_OBJECT); 688 return nextValue(); 689 } 690 nextValue()691 private JsonToken nextValue() throws IOException { 692 int c = nextNonWhitespace(); 693 switch (c) { 694 case '{': 695 push(JsonScope.EMPTY_OBJECT); 696 return token = JsonToken.BEGIN_OBJECT; 697 698 case '[': 699 push(JsonScope.EMPTY_ARRAY); 700 return token = JsonToken.BEGIN_ARRAY; 701 702 case '\'': 703 checkLenient(); // fall-through 704 case '"': 705 value = nextString((char) c); 706 return token = JsonToken.STRING; 707 708 default: 709 pos--; 710 return readLiteral(); 711 } 712 } 713 714 /** 715 * Returns true once {@code limit - pos >= minimum}. If the data is 716 * exhausted before that many characters are available, this returns 717 * false. 718 */ fillBuffer(int minimum)719 private boolean fillBuffer(int minimum) throws IOException { 720 // Before clobbering the old characters, update where buffer starts 721 for (int i = 0; i < pos; i++) { 722 if (buffer[i] == '\n') { 723 bufferStartLine++; 724 bufferStartColumn = 1; 725 } else { 726 bufferStartColumn++; 727 } 728 } 729 730 if (limit != pos) { 731 limit -= pos; 732 System.arraycopy(buffer, pos, buffer, 0, limit); 733 } else { 734 limit = 0; 735 } 736 737 pos = 0; 738 int total; 739 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) { 740 limit += total; 741 742 // if this is the first read, consume an optional byte order mark (BOM) if it exists 743 if (bufferStartLine == 1 && bufferStartColumn == 1 744 && limit > 0 && buffer[0] == '\ufeff') { 745 pos++; 746 bufferStartColumn--; 747 } 748 749 if (limit >= minimum) { 750 return true; 751 } 752 } 753 return false; 754 } 755 getLineNumber()756 private int getLineNumber() { 757 int result = bufferStartLine; 758 for (int i = 0; i < pos; i++) { 759 if (buffer[i] == '\n') { 760 result++; 761 } 762 } 763 return result; 764 } 765 getColumnNumber()766 private int getColumnNumber() { 767 int result = bufferStartColumn; 768 for (int i = 0; i < pos; i++) { 769 if (buffer[i] == '\n') { 770 result = 1; 771 } else { 772 result++; 773 } 774 } 775 return result; 776 } 777 nextNonWhitespace()778 private int nextNonWhitespace() throws IOException { 779 while (pos < limit || fillBuffer(1)) { 780 int c = buffer[pos++]; 781 switch (c) { 782 case '\t': 783 case ' ': 784 case '\n': 785 case '\r': 786 continue; 787 788 case '/': 789 if (pos == limit && !fillBuffer(1)) { 790 return c; 791 } 792 793 checkLenient(); 794 char peek = buffer[pos]; 795 switch (peek) { 796 case '*': 797 // skip a /* c-style comment */ 798 pos++; 799 if (!skipTo("*/")) { 800 throw syntaxError("Unterminated comment"); 801 } 802 pos += 2; 803 continue; 804 805 case '/': 806 // skip a // end-of-line comment 807 pos++; 808 skipToEndOfLine(); 809 continue; 810 811 default: 812 return c; 813 } 814 815 case '#': 816 /* 817 * Skip a # hash end-of-line comment. The JSON RFC doesn't 818 * specify this behaviour, but it's required to parse 819 * existing documents. See http://b/2571423. 820 */ 821 checkLenient(); 822 skipToEndOfLine(); 823 continue; 824 825 default: 826 return c; 827 } 828 } 829 830 throw new EOFException("End of input"); 831 } 832 checkLenient()833 private void checkLenient() throws IOException { 834 if (!lenient) { 835 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON"); 836 } 837 } 838 839 /** 840 * Advances the position until after the next newline character. If the line 841 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 842 * caller. 843 */ skipToEndOfLine()844 private void skipToEndOfLine() throws IOException { 845 while (pos < limit || fillBuffer(1)) { 846 char c = buffer[pos++]; 847 if (c == '\r' || c == '\n') { 848 break; 849 } 850 } 851 } 852 skipTo(String toFind)853 private boolean skipTo(String toFind) throws IOException { 854 outer: 855 for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) { 856 for (int c = 0; c < toFind.length(); c++) { 857 if (buffer[pos + c] != toFind.charAt(c)) { 858 continue outer; 859 } 860 } 861 return true; 862 } 863 return false; 864 } 865 866 /** 867 * Returns the string up to but not including {@code quote}, unescaping any 868 * character escape sequences encountered along the way. The opening quote 869 * should have already been read. This consumes the closing quote, but does 870 * not include it in the returned string. 871 * 872 * @param quote either ' or ". 873 * @throws NumberFormatException if any unicode escape sequences are 874 * malformed. 875 */ nextString(char quote)876 private String nextString(char quote) throws IOException { 877 StringBuilder builder = null; 878 do { 879 /* the index of the first character not yet appended to the builder. */ 880 int start = pos; 881 while (pos < limit) { 882 int c = buffer[pos++]; 883 884 if (c == quote) { 885 if (skipping) { 886 return "skipped!"; 887 } else if (builder == null) { 888 return stringPool.get(buffer, start, pos - start - 1); 889 } else { 890 builder.append(buffer, start, pos - start - 1); 891 return builder.toString(); 892 } 893 894 } else if (c == '\\') { 895 if (builder == null) { 896 builder = new StringBuilder(); 897 } 898 builder.append(buffer, start, pos - start - 1); 899 builder.append(readEscapeCharacter()); 900 start = pos; 901 } 902 } 903 904 if (builder == null) { 905 builder = new StringBuilder(); 906 } 907 builder.append(buffer, start, pos - start); 908 } while (fillBuffer(1)); 909 910 throw syntaxError("Unterminated string"); 911 } 912 913 /** 914 * Reads the value up to but not including any delimiter characters. This 915 * does not consume the delimiter character. 916 * 917 * @param assignOffsetsOnly true for this method to only set the valuePos 918 * and valueLength fields and return a null result. This only works if 919 * the literal is short; a string is returned otherwise. 920 */ nextLiteral(boolean assignOffsetsOnly)921 private String nextLiteral(boolean assignOffsetsOnly) throws IOException { 922 StringBuilder builder = null; 923 valuePos = -1; 924 valueLength = 0; 925 int i = 0; 926 927 findNonLiteralCharacter: 928 while (true) { 929 for (; pos + i < limit; i++) { 930 switch (buffer[pos + i]) { 931 case '/': 932 case '\\': 933 case ';': 934 case '#': 935 case '=': 936 checkLenient(); // fall-through 937 case '{': 938 case '}': 939 case '[': 940 case ']': 941 case ':': 942 case ',': 943 case ' ': 944 case '\t': 945 case '\f': 946 case '\r': 947 case '\n': 948 break findNonLiteralCharacter; 949 } 950 } 951 952 /* 953 * Attempt to load the entire literal into the buffer at once. If 954 * we run out of input, add a non-literal character at the end so 955 * that decoding doesn't need to do bounds checks. 956 */ 957 if (i < buffer.length) { 958 if (fillBuffer(i + 1)) { 959 continue; 960 } else { 961 buffer[limit] = '\0'; 962 break; 963 } 964 } 965 966 // use a StringBuilder when the value is too long. It must be an unquoted string. 967 if (builder == null) { 968 builder = new StringBuilder(); 969 } 970 builder.append(buffer, pos, i); 971 valueLength += i; 972 pos += i; 973 i = 0; 974 if (!fillBuffer(1)) { 975 break; 976 } 977 } 978 979 String result; 980 if (assignOffsetsOnly && builder == null) { 981 valuePos = pos; 982 result = null; 983 } else if (skipping) { 984 result = "skipped!"; 985 } else if (builder == null) { 986 result = stringPool.get(buffer, pos, i); 987 } else { 988 builder.append(buffer, pos, i); 989 result = builder.toString(); 990 } 991 valueLength += i; 992 pos += i; 993 return result; 994 } 995 toString()996 @Override public String toString() { 997 return getClass().getSimpleName() + " near " + getSnippet(); 998 } 999 1000 /** 1001 * Unescapes the character identified by the character or characters that 1002 * immediately follow a backslash. The backslash '\' should have already 1003 * been read. This supports both unicode escapes "u000A" and two-character 1004 * escapes "\n". 1005 * 1006 * @throws NumberFormatException if any unicode escape sequences are 1007 * malformed. 1008 */ readEscapeCharacter()1009 private char readEscapeCharacter() throws IOException { 1010 if (pos == limit && !fillBuffer(1)) { 1011 throw syntaxError("Unterminated escape sequence"); 1012 } 1013 1014 char escaped = buffer[pos++]; 1015 switch (escaped) { 1016 case 'u': 1017 if (pos + 4 > limit && !fillBuffer(4)) { 1018 throw syntaxError("Unterminated escape sequence"); 1019 } 1020 String hex = stringPool.get(buffer, pos, 4); 1021 pos += 4; 1022 return (char) Integer.parseInt(hex, 16); 1023 1024 case 't': 1025 return '\t'; 1026 1027 case 'b': 1028 return '\b'; 1029 1030 case 'n': 1031 return '\n'; 1032 1033 case 'r': 1034 return '\r'; 1035 1036 case 'f': 1037 return '\f'; 1038 1039 case '\'': 1040 case '"': 1041 case '\\': 1042 default: 1043 return escaped; 1044 } 1045 } 1046 1047 /** 1048 * Reads a null, boolean, numeric or unquoted string literal value. 1049 */ readLiteral()1050 private JsonToken readLiteral() throws IOException { 1051 value = nextLiteral(true); 1052 if (valueLength == 0) { 1053 throw syntaxError("Expected literal value"); 1054 } 1055 token = decodeLiteral(); 1056 if (token == JsonToken.STRING) { 1057 checkLenient(); 1058 } 1059 return token; 1060 } 1061 1062 /** 1063 * Assigns {@code nextToken} based on the value of {@code nextValue}. 1064 */ decodeLiteral()1065 private JsonToken decodeLiteral() throws IOException { 1066 if (valuePos == -1) { 1067 // it was too long to fit in the buffer so it can only be a string 1068 return JsonToken.STRING; 1069 } else if (valueLength == 4 1070 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ]) 1071 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1]) 1072 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1073 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) { 1074 value = "null"; 1075 return JsonToken.NULL; 1076 } else if (valueLength == 4 1077 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ]) 1078 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1]) 1079 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2]) 1080 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) { 1081 value = TRUE; 1082 return JsonToken.BOOLEAN; 1083 } else if (valueLength == 5 1084 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ]) 1085 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1]) 1086 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1087 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3]) 1088 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) { 1089 value = FALSE; 1090 return JsonToken.BOOLEAN; 1091 } else { 1092 value = stringPool.get(buffer, valuePos, valueLength); 1093 return decodeNumber(buffer, valuePos, valueLength); 1094 } 1095 } 1096 1097 /** 1098 * Determine whether the characters is a JSON number. Numbers are of the 1099 * form -12.34e+56. Fractional and exponential parts are optional. Leading 1100 * zeroes are not allowed in the value or exponential part, but are allowed 1101 * in the fraction. 1102 */ decodeNumber(char[] chars, int offset, int length)1103 private JsonToken decodeNumber(char[] chars, int offset, int length) { 1104 int i = offset; 1105 int c = chars[i]; 1106 1107 if (c == '-') { 1108 c = chars[++i]; 1109 } 1110 1111 if (c == '0') { 1112 c = chars[++i]; 1113 } else if (c >= '1' && c <= '9') { 1114 c = chars[++i]; 1115 while (c >= '0' && c <= '9') { 1116 c = chars[++i]; 1117 } 1118 } else { 1119 return JsonToken.STRING; 1120 } 1121 1122 if (c == '.') { 1123 c = chars[++i]; 1124 while (c >= '0' && c <= '9') { 1125 c = chars[++i]; 1126 } 1127 } 1128 1129 if (c == 'e' || c == 'E') { 1130 c = chars[++i]; 1131 if (c == '+' || c == '-') { 1132 c = chars[++i]; 1133 } 1134 if (c >= '0' && c <= '9') { 1135 c = chars[++i]; 1136 while (c >= '0' && c <= '9') { 1137 c = chars[++i]; 1138 } 1139 } else { 1140 return JsonToken.STRING; 1141 } 1142 } 1143 1144 if (i == offset + length) { 1145 return JsonToken.NUMBER; 1146 } else { 1147 return JsonToken.STRING; 1148 } 1149 } 1150 1151 /** 1152 * Throws a new IO exception with the given message and a context snippet 1153 * with this reader's content. 1154 */ syntaxError(String message)1155 private IOException syntaxError(String message) throws IOException { 1156 throw new MalformedJsonException(message 1157 + " at line " + getLineNumber() + " column " + getColumnNumber()); 1158 } 1159 getSnippet()1160 private CharSequence getSnippet() { 1161 StringBuilder snippet = new StringBuilder(); 1162 int beforePos = Math.min(pos, 20); 1163 snippet.append(buffer, pos - beforePos, beforePos); 1164 int afterPos = Math.min(limit - pos, 20); 1165 snippet.append(buffer, pos, afterPos); 1166 return snippet; 1167 } 1168 } 1169