1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.json; 18 19 import android.compat.annotation.UnsupportedAppUsage; 20 21 // Note: this class was written without inspecting the non-free org.json sourcecode. 22 23 /** 24 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 25 * encoded string into the corresponding object. Most clients of 26 * this class will use only need the {@link #JSONTokener(String) constructor} 27 * and {@link #nextValue} method. Example usage: <pre> 28 * String json = "{" 29 * + " \"query\": \"Pizza\", " 30 * + " \"locations\": [ 94043, 90210 ] " 31 * + "}"; 32 * 33 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue(); 34 * String query = object.getString("query"); 35 * JSONArray locations = object.getJSONArray("locations");</pre> 36 * 37 * <p>For best interoperability and performance use JSON that complies with 38 * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons 39 * this parser is lenient, so a successful parse does not indicate that the 40 * input string was valid JSON. All of the following syntax errors will be 41 * ignored: 42 * <ul> 43 * <li>End of line comments starting with {@code //} or {@code #} and ending 44 * with a newline character. 45 * <li>C-style comments starting with {@code /*} and ending with 46 * {@code *}{@code /}. Such comments may not be nested. 47 * <li>Strings that are unquoted or {@code 'single quoted'}. 48 * <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}. 49 * <li>Octal integers prefixed with {@code 0}. 50 * <li>Array elements separated by {@code ;}. 51 * <li>Unnecessary array separators. These are interpreted as if null was the 52 * omitted value. 53 * <li>Key-value pairs separated by {@code =} or {@code =>}. 54 * <li>Key-value pairs separated by {@code ;}. 55 * </ul> 56 * 57 * <p>Each tokener may be used to parse a single JSON string. Instances of this 58 * class are not thread safe. Although this class is nonfinal, it was not 59 * designed for inheritance and should not be subclassed. In particular, 60 * self-use by overrideable methods is not specified. See <i>Effective Java</i> 61 * Item 17, "Design and Document or inheritance or else prohibit it" for further 62 * information. 63 */ 64 public class JSONTokener { 65 66 /** The input JSON. */ 67 @UnsupportedAppUsage 68 private final String in; 69 70 /** 71 * The index of the next character to be returned by {@link #next}. When 72 * the input is exhausted, this equals the input's length. 73 */ 74 @UnsupportedAppUsage 75 private int pos; 76 77 /** 78 * @param in JSON encoded string. Null is not permitted and will yield a 79 * tokener that throws {@code NullPointerExceptions} when methods are 80 * called. 81 */ JSONTokener(String in)82 public JSONTokener(String in) { 83 // consume an optional byte order mark (BOM) if it exists 84 if (in != null && in.startsWith("\ufeff")) { 85 in = in.substring(1); 86 } 87 this.in = in; 88 } 89 90 /** 91 * Returns the next value from the input. 92 * 93 * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, 94 * Integer, Long, Double or {@link JSONObject#NULL}. 95 * @throws JSONException if the input is malformed. 96 */ nextValue()97 public Object nextValue() throws JSONException { 98 int c = nextCleanInternal(); 99 switch (c) { 100 case -1: 101 throw syntaxError("End of input"); 102 103 case '{': 104 return readObject(); 105 106 case '[': 107 return readArray(); 108 109 case '\'': 110 case '"': 111 return nextString((char) c); 112 113 default: 114 pos--; 115 return readLiteral(); 116 } 117 } 118 119 @UnsupportedAppUsage nextCleanInternal()120 private int nextCleanInternal() throws JSONException { 121 final int inLength = in.length(); 122 123 while (pos < inLength) { 124 int c = in.charAt(pos++); 125 switch (c) { 126 case '\t': 127 case ' ': 128 case '\n': 129 case '\r': 130 continue; 131 132 case '/': 133 if (pos == inLength) { 134 return c; 135 } 136 137 char peek = in.charAt(pos); 138 switch (peek) { 139 case '*': 140 // skip a /* c-style comment */ 141 pos++; 142 int commentEnd = in.indexOf("*/", pos); 143 if (commentEnd == -1) { 144 throw syntaxError("Unterminated comment"); 145 } 146 pos = commentEnd + 2; 147 continue; 148 149 case '/': 150 // skip a // end-of-line comment 151 pos++; 152 skipToEndOfLine(); 153 continue; 154 155 default: 156 return c; 157 } 158 159 case '#': 160 /* 161 * Skip a # hash end-of-line comment. The JSON RFC doesn't 162 * specify this behavior, but it's required to parse 163 * existing documents. See http://b/2571423. 164 */ 165 skipToEndOfLine(); 166 continue; 167 168 default: 169 return c; 170 } 171 } 172 173 return -1; 174 } 175 176 /** 177 * Advances the position until after the next newline character. If the line 178 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 179 * caller. 180 */ 181 @UnsupportedAppUsage skipToEndOfLine()182 private void skipToEndOfLine() { 183 final int inLength = in.length(); 184 185 for (; pos < inLength; pos++) { 186 char c = in.charAt(pos); 187 if (c == '\r' || c == '\n') { 188 pos++; 189 break; 190 } 191 } 192 } 193 194 /** 195 * Returns the string up to but not including {@code quote}, unescaping any 196 * character escape sequences encountered along the way. The opening quote 197 * should have already been read. This consumes the closing quote, but does 198 * not include it in the returned string. 199 * 200 * @param quote either ' or ". 201 */ nextString(char quote)202 public String nextString(char quote) throws JSONException { 203 /* 204 * For strings that are free of escape sequences, we can just extract 205 * the result as a substring of the input. But if we encounter an escape 206 * sequence, we need to use a StringBuilder to compose the result. 207 */ 208 StringBuilder builder = null; 209 210 /* the index of the first character not yet appended to the builder. */ 211 int start = pos; 212 213 final int inLength = in.length(); 214 215 while (pos < inLength) { 216 int c = in.charAt(pos++); 217 if (c == quote) { 218 if (builder == null) { 219 return in.substring(start, pos - 1); 220 } else { 221 builder.append(in, start, pos - 1); 222 return builder.toString(); 223 } 224 } 225 226 if (c == '\\') { 227 if (pos == inLength) { 228 throw syntaxError("Unterminated escape sequence"); 229 } 230 if (builder == null) { 231 builder = new StringBuilder(); 232 } 233 builder.append(in, start, pos - 1); 234 builder.append(readEscapeCharacter()); 235 start = pos; 236 } 237 } 238 239 throw syntaxError("Unterminated string"); 240 } 241 242 /** 243 * Unescapes the character identified by the character or characters that 244 * immediately follow a backslash. The backslash '\' should have already 245 * been read. This supports both unicode escapes "u000A" and two-character 246 * escapes "\n". 247 */ 248 @UnsupportedAppUsage readEscapeCharacter()249 private char readEscapeCharacter() throws JSONException { 250 char escaped = in.charAt(pos++); 251 switch (escaped) { 252 case 'u': 253 if (pos + 4 > in.length()) { 254 throw syntaxError("Unterminated escape sequence"); 255 } 256 String hex = in.substring(pos, pos + 4); 257 pos += 4; 258 try { 259 return (char) Integer.parseInt(hex, 16); 260 } catch (NumberFormatException nfe) { 261 throw syntaxError("Invalid escape sequence: " + hex); 262 } 263 264 case 't': 265 return '\t'; 266 267 case 'b': 268 return '\b'; 269 270 case 'n': 271 return '\n'; 272 273 case 'r': 274 return '\r'; 275 276 case 'f': 277 return '\f'; 278 279 case '\'': 280 case '"': 281 case '\\': 282 default: 283 return escaped; 284 } 285 } 286 287 /** 288 * Reads a null, boolean, numeric or unquoted string literal value. Numeric 289 * values will be returned as an Integer, Long, or Double, in that order of 290 * preference. 291 */ 292 @UnsupportedAppUsage readLiteral()293 private Object readLiteral() throws JSONException { 294 String literal = nextToInternal("{}[]/\\:,=;# \t\f"); 295 296 if (literal.length() == 0) { 297 throw syntaxError("Expected literal value"); 298 } else if ("null".equalsIgnoreCase(literal)) { 299 return JSONObject.NULL; 300 } else if ("true".equalsIgnoreCase(literal)) { 301 return Boolean.TRUE; 302 } else if ("false".equalsIgnoreCase(literal)) { 303 return Boolean.FALSE; 304 } 305 306 /* try to parse as an integral type... */ 307 if (literal.indexOf('.') == -1) { 308 int base = 10; 309 String number = literal; 310 if (number.startsWith("0x") || number.startsWith("0X")) { 311 number = number.substring(2); 312 base = 16; 313 } else if (number.startsWith("0") && number.length() > 1) { 314 number = number.substring(1); 315 base = 8; 316 } 317 try { 318 long longValue = Long.parseLong(number, base); 319 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { 320 return (int) longValue; 321 } else { 322 return longValue; 323 } 324 } catch (NumberFormatException e) { 325 /* 326 * This only happens for integral numbers greater than 327 * Long.MAX_VALUE, numbers in exponential form (5e-10) and 328 * unquoted strings. Fall through to try floating point. 329 */ 330 } 331 } 332 333 /* ...next try to parse as a floating point... */ 334 try { 335 return Double.valueOf(literal); 336 } catch (NumberFormatException ignored) { 337 } 338 339 /* ... finally give up. We have an unquoted string */ 340 return literal; 341 } 342 343 /** 344 * Returns the string up to but not including any of the given characters or 345 * a newline character. This does not consume the excluded character. 346 */ 347 @UnsupportedAppUsage nextToInternal(String excluded)348 private String nextToInternal(String excluded) { 349 final int inLength = in.length(); 350 351 int start = pos; 352 for (; pos < inLength; pos++) { 353 char c = in.charAt(pos); 354 if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { 355 return in.substring(start, pos); 356 } 357 } 358 return in.substring(start); 359 } 360 361 /** 362 * Reads a sequence of key/value pairs and the trailing closing brace '}' of 363 * an object. The opening brace '{' should have already been read. 364 */ 365 @UnsupportedAppUsage readObject()366 private JSONObject readObject() throws JSONException { 367 JSONObject result = new JSONObject(); 368 369 /* Peek to see if this is the empty object. */ 370 int first = nextCleanInternal(); 371 if (first == '}') { 372 return result; 373 } else if (first != -1) { 374 pos--; 375 } 376 377 final int inLength = in.length(); 378 379 while (true) { 380 Object name = nextValue(); 381 if (!(name instanceof String)) { 382 if (name == null) { 383 throw syntaxError("Names cannot be null"); 384 } else { 385 throw syntaxError("Names must be strings, but " + name 386 + " is of type " + name.getClass().getName()); 387 } 388 } 389 390 /* 391 * Expect the name/value separator to be either a colon ':', an 392 * equals sign '=', or an arrow "=>". The last two are bogus but we 393 * include them because that's what the original implementation did. 394 */ 395 int separator = nextCleanInternal(); 396 if (separator != ':' && separator != '=') { 397 throw syntaxError("Expected ':' after " + name); 398 } 399 if (pos < inLength && in.charAt(pos) == '>') { 400 pos++; 401 } 402 403 result.put((String) name, nextValue()); 404 405 switch (nextCleanInternal()) { 406 case '}': 407 return result; 408 case ';': 409 case ',': 410 continue; 411 default: 412 throw syntaxError("Unterminated object"); 413 } 414 } 415 } 416 417 /** 418 * Reads a sequence of values and the trailing closing brace ']' of an 419 * array. The opening brace '[' should have already been read. Note that 420 * "[]" yields an empty array, but "[,]" returns a two-element array 421 * equivalent to "[null,null]". 422 */ 423 @UnsupportedAppUsage readArray()424 private JSONArray readArray() throws JSONException { 425 JSONArray result = new JSONArray(); 426 427 /* to cover input that ends with ",]". */ 428 boolean hasTrailingSeparator = false; 429 430 while (true) { 431 switch (nextCleanInternal()) { 432 case -1: 433 throw syntaxError("Unterminated array"); 434 case ']': 435 if (hasTrailingSeparator) { 436 result.put(null); 437 } 438 return result; 439 case ',': 440 case ';': 441 /* A separator without a value first means "null". */ 442 result.put(null); 443 hasTrailingSeparator = true; 444 continue; 445 default: 446 pos--; 447 } 448 449 result.put(nextValue()); 450 451 switch (nextCleanInternal()) { 452 case ']': 453 return result; 454 case ',': 455 case ';': 456 hasTrailingSeparator = true; 457 continue; 458 default: 459 throw syntaxError("Unterminated array"); 460 } 461 } 462 } 463 464 /** 465 * Returns an exception containing the given message plus the current 466 * position and the entire input string. 467 */ syntaxError(String message)468 public JSONException syntaxError(String message) { 469 return new JSONException(message + this); 470 } 471 472 /** 473 * Returns the current position and the entire input string. 474 */ toString()475 @Override public String toString() { 476 // consistent with the original implementation 477 return " at character " + pos + " of " + in; 478 } 479 480 /* 481 * Legacy APIs. 482 * 483 * None of the methods below are on the critical path of parsing JSON 484 * documents. They exist only because they were exposed by the original 485 * implementation and may be used by some clients. 486 */ 487 488 /** 489 * Returns true until the input has been exhausted. 490 */ more()491 public boolean more() { 492 return pos < in.length(); 493 } 494 495 /** 496 * Returns the next available character, or the null character '\0' if all 497 * input has been exhausted. The return value of this method is ambiguous 498 * for JSON strings that contain the character '\0'. 499 */ next()500 public char next() { 501 return pos < in.length() ? in.charAt(pos++) : '\0'; 502 } 503 504 /** 505 * Returns the next available character if it equals {@code c}. Otherwise an 506 * exception is thrown. 507 */ next(char c)508 public char next(char c) throws JSONException { 509 char result = next(); 510 if (result != c) { 511 throw syntaxError("Expected " + c + " but was " + result); 512 } 513 return result; 514 } 515 516 /** 517 * Returns the next character that is not whitespace and does not belong to 518 * a comment. If the input is exhausted before such a character can be 519 * found, the null character '\0' is returned. The return value of this 520 * method is ambiguous for JSON strings that contain the character '\0'. 521 */ nextClean()522 public char nextClean() throws JSONException { 523 int nextCleanInt = nextCleanInternal(); 524 return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; 525 } 526 527 /** 528 * Returns the next {@code length} characters of the input. 529 * 530 * @throws JSONException if the remaining input is not long enough to 531 * satisfy this request. 532 */ next(int length)533 public String next(int length) throws JSONException { 534 if (pos + length > in.length()) { 535 throw syntaxError(length + " is out of bounds"); 536 } 537 String result = in.substring(pos, pos + length); 538 pos += length; 539 return result; 540 } 541 542 /** 543 * Returns the {@link String#trim trimmed} string holding the characters up 544 * to but not including the first of: 545 * <ul> 546 * <li>any character in {@code excluded} 547 * <li>a newline character '\n' 548 * <li>a carriage return '\r' 549 * </ul> 550 * 551 * @return a possibly-empty string 552 */ nextTo(String excluded)553 public String nextTo(String excluded) { 554 if (excluded == null) { 555 throw new NullPointerException("excluded == null"); 556 } 557 return nextToInternal(excluded).trim(); 558 } 559 560 /** 561 * Equivalent to {@code nextTo(String.valueOf(excluded))}. 562 */ nextTo(char excluded)563 public String nextTo(char excluded) { 564 return nextToInternal(String.valueOf(excluded)).trim(); 565 } 566 567 /** 568 * Advances past all input up to and including the next occurrence of 569 * {@code thru}. If the remaining input doesn't contain {@code thru}, the 570 * input is exhausted. 571 */ skipPast(String thru)572 public void skipPast(String thru) { 573 int thruStart = in.indexOf(thru, pos); 574 pos = thruStart == -1 ? in.length() : (thruStart + thru.length()); 575 } 576 577 /** 578 * Advances past all input up to but not including the next occurrence of 579 * {@code to}. If the remaining input doesn't contain {@code to}, the input 580 * is unchanged. 581 */ skipTo(char to)582 public char skipTo(char to) { 583 int index = in.indexOf(to, pos); 584 if (index != -1) { 585 pos = index; 586 return to; 587 } else { 588 return '\0'; 589 } 590 } 591 592 /** 593 * Unreads the most recent character of input. If no input characters have 594 * been read, the input is unchanged. 595 */ back()596 public void back() { 597 if (--pos == -1) { 598 pos = 0; 599 } 600 } 601 602 /** 603 * Returns the integer [0..15] value for the given hex character, or -1 604 * for non-hex input. 605 * 606 * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other 607 * character will yield a -1 result. 608 */ dehexchar(char hex)609 public static int dehexchar(char hex) { 610 if (hex >= '0' && hex <= '9') { 611 return hex - '0'; 612 } else if (hex >= 'A' && hex <= 'F') { 613 return hex - 'A' + 10; 614 } else if (hex >= 'a' && hex <= 'f') { 615 return hex - 'a' + 10; 616 } else { 617 return -1; 618 } 619 } 620 } 621