• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.json;
18 
19 // Note: this class was written without inspecting the non-free org.json sourcecode.
20 
21 /**
22  * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
23  * encoded string into the corresponding object. Most clients of
24  * this class will use only need the {@link #JSONTokener(String) constructor}
25  * and {@link #nextValue} method. Example usage: <pre>
26  * String json = "{"
27  *         + "  \"query\": \"Pizza\", "
28  *         + "  \"locations\": [ 94043, 90210 ] "
29  *         + "}";
30  *
31  * JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
32  * String query = object.getString("query");
33  * JSONArray locations = object.getJSONArray("locations");</pre>
34  *
35  * <p>For best interoperability and performance use JSON that complies with
36  * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
37  * this parser is lenient, so a successful parse does not indicate that the
38  * input string was valid JSON. All of the following syntax errors will be
39  * ignored:
40  * <ul>
41  *   <li>End of line comments starting with {@code //} or {@code #} and ending
42  *       with a newline character.
43  *   <li>C-style comments starting with {@code /*} and ending with
44  *       {@code *}{@code /}. Such comments may not be nested.
45  *   <li>Strings that are unquoted or {@code 'single quoted'}.
46  *   <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
47  *   <li>Octal integers prefixed with {@code 0}.
48  *   <li>Array elements separated by {@code ;}.
49  *   <li>Unnecessary array separators. These are interpreted as if null was the
50  *       omitted value.
51  *   <li>Key-value pairs separated by {@code =} or {@code =>}.
52  *   <li>Key-value pairs separated by {@code ;}.
53  * </ul>
54  *
55  * <p>Each tokener may be used to parse a single JSON string. Instances of this
56  * class are not thread safe. Although this class is nonfinal, it was not
57  * designed for inheritance and should not be subclassed. In particular,
58  * self-use by overrideable methods is not specified. See <i>Effective Java</i>
59  * Item 17, "Design and Document or inheritance or else prohibit it" for further
60  * information.
61  */
62 public class JSONTokener {
63 
64     /** The input JSON. */
65     private final String in;
66 
67     /**
68      * The index of the next character to be returned by {@link #next}. When
69      * the input is exhausted, this equals the input's length.
70      */
71     private int pos;
72 
73     /**
74      * @param in JSON encoded string. Null is not permitted and will yield a
75      *     tokener that throws {@code NullPointerExceptions} when methods are
76      *     called.
77      */
JSONTokener(String in)78     public JSONTokener(String in) {
79         // consume an optional byte order mark (BOM) if it exists
80         if (in != null && in.startsWith("\ufeff")) {
81             in = in.substring(1);
82         }
83         this.in = in;
84     }
85 
86     /**
87      * Returns the next value from the input.
88      *
89      * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean,
90      *     Integer, Long, Double or {@link JSONObject#NULL}.
91      * @throws JSONException if the input is malformed.
92      */
nextValue()93     public Object nextValue() throws JSONException {
94         int c = nextCleanInternal();
95         switch (c) {
96             case -1:
97                 throw syntaxError("End of input");
98 
99             case '{':
100                 return readObject();
101 
102             case '[':
103                 return readArray();
104 
105             case '\'':
106             case '"':
107                 return nextString((char) c);
108 
109             default:
110                 pos--;
111                 return readLiteral();
112         }
113     }
114 
nextCleanInternal()115     private int nextCleanInternal() throws JSONException {
116         while (pos < in.length()) {
117             int c = in.charAt(pos++);
118             switch (c) {
119                 case '\t':
120                 case ' ':
121                 case '\n':
122                 case '\r':
123                     continue;
124 
125                 case '/':
126                     if (pos == in.length()) {
127                         return c;
128                     }
129 
130                     char peek = in.charAt(pos);
131                     switch (peek) {
132                         case '*':
133                             // skip a /* c-style comment */
134                             pos++;
135                             int commentEnd = in.indexOf("*/", pos);
136                             if (commentEnd == -1) {
137                                 throw syntaxError("Unterminated comment");
138                             }
139                             pos = commentEnd + 2;
140                             continue;
141 
142                         case '/':
143                             // skip a // end-of-line comment
144                             pos++;
145                             skipToEndOfLine();
146                             continue;
147 
148                         default:
149                             return c;
150                     }
151 
152                 case '#':
153                     /*
154                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
155                      * specify this behavior, but it's required to parse
156                      * existing documents. See http://b/2571423.
157                      */
158                     skipToEndOfLine();
159                     continue;
160 
161                 default:
162                     return c;
163             }
164         }
165 
166         return -1;
167     }
168 
169     /**
170      * Advances the position until after the next newline character. If the line
171      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
172      * caller.
173      */
skipToEndOfLine()174     private void skipToEndOfLine() {
175         for (; pos < in.length(); pos++) {
176             char c = in.charAt(pos);
177             if (c == '\r' || c == '\n') {
178                 pos++;
179                 break;
180             }
181         }
182     }
183 
184     /**
185      * Returns the string up to but not including {@code quote}, unescaping any
186      * character escape sequences encountered along the way. The opening quote
187      * should have already been read. This consumes the closing quote, but does
188      * not include it in the returned string.
189      *
190      * @param quote either ' or ".
191      * @throws NumberFormatException if any unicode escape sequences are
192      *     malformed.
193      */
nextString(char quote)194     public String nextString(char quote) throws JSONException {
195         /*
196          * For strings that are free of escape sequences, we can just extract
197          * the result as a substring of the input. But if we encounter an escape
198          * sequence, we need to use a StringBuilder to compose the result.
199          */
200         StringBuilder builder = null;
201 
202         /* the index of the first character not yet appended to the builder. */
203         int start = pos;
204 
205         while (pos < in.length()) {
206             int c = in.charAt(pos++);
207             if (c == quote) {
208                 if (builder == null) {
209                     // a new string avoids leaking memory
210                     return new String(in.substring(start, pos - 1));
211                 } else {
212                     builder.append(in, start, pos - 1);
213                     return builder.toString();
214                 }
215             }
216 
217             if (c == '\\') {
218                 if (pos == in.length()) {
219                     throw syntaxError("Unterminated escape sequence");
220                 }
221                 if (builder == null) {
222                     builder = new StringBuilder();
223                 }
224                 builder.append(in, start, pos - 1);
225                 builder.append(readEscapeCharacter());
226                 start = pos;
227             }
228         }
229 
230         throw syntaxError("Unterminated string");
231     }
232 
233     /**
234      * Unescapes the character identified by the character or characters that
235      * immediately follow a backslash. The backslash '\' should have already
236      * been read. This supports both unicode escapes "u000A" and two-character
237      * escapes "\n".
238      *
239      * @throws NumberFormatException if any unicode escape sequences are
240      *     malformed.
241      */
readEscapeCharacter()242     private char readEscapeCharacter() throws JSONException {
243         char escaped = in.charAt(pos++);
244         switch (escaped) {
245             case 'u':
246                 if (pos + 4 > in.length()) {
247                     throw syntaxError("Unterminated escape sequence");
248                 }
249                 String hex = in.substring(pos, pos + 4);
250                 pos += 4;
251                 return (char) Integer.parseInt(hex, 16);
252 
253             case 't':
254                 return '\t';
255 
256             case 'b':
257                 return '\b';
258 
259             case 'n':
260                 return '\n';
261 
262             case 'r':
263                 return '\r';
264 
265             case 'f':
266                 return '\f';
267 
268             case '\'':
269             case '"':
270             case '\\':
271             default:
272                 return escaped;
273         }
274     }
275 
276     /**
277      * Reads a null, boolean, numeric or unquoted string literal value. Numeric
278      * values will be returned as an Integer, Long, or Double, in that order of
279      * preference.
280      */
readLiteral()281     private Object readLiteral() throws JSONException {
282         String literal = nextToInternal("{}[]/\\:,=;# \t\f");
283 
284         if (literal.length() == 0) {
285             throw syntaxError("Expected literal value");
286         } else if ("null".equalsIgnoreCase(literal)) {
287             return JSONObject.NULL;
288         } else if ("true".equalsIgnoreCase(literal)) {
289             return Boolean.TRUE;
290         } else if ("false".equalsIgnoreCase(literal)) {
291             return Boolean.FALSE;
292         }
293 
294         /* try to parse as an integral type... */
295         if (literal.indexOf('.') == -1) {
296             int base = 10;
297             String number = literal;
298             if (number.startsWith("0x") || number.startsWith("0X")) {
299                 number = number.substring(2);
300                 base = 16;
301             } else if (number.startsWith("0") && number.length() > 1) {
302                 number = number.substring(1);
303                 base = 8;
304             }
305             try {
306                 long longValue = Long.parseLong(number, base);
307                 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) {
308                     return (int) longValue;
309                 } else {
310                     return longValue;
311                 }
312             } catch (NumberFormatException e) {
313                 /*
314                  * This only happens for integral numbers greater than
315                  * Long.MAX_VALUE, numbers in exponential form (5e-10) and
316                  * unquoted strings. Fall through to try floating point.
317                  */
318             }
319         }
320 
321         /* ...next try to parse as a floating point... */
322         try {
323             return Double.valueOf(literal);
324         } catch (NumberFormatException ignored) {
325         }
326 
327         /* ... finally give up. We have an unquoted string */
328         return new String(literal); // a new string avoids leaking memory
329     }
330 
331     /**
332      * Returns the string up to but not including any of the given characters or
333      * a newline character. This does not consume the excluded character.
334      */
nextToInternal(String excluded)335     private String nextToInternal(String excluded) {
336         int start = pos;
337         for (; pos < in.length(); pos++) {
338             char c = in.charAt(pos);
339             if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) {
340                 return in.substring(start, pos);
341             }
342         }
343         return in.substring(start);
344     }
345 
346     /**
347      * Reads a sequence of key/value pairs and the trailing closing brace '}' of
348      * an object. The opening brace '{' should have already been read.
349      */
readObject()350     private JSONObject readObject() throws JSONException {
351         JSONObject result = new JSONObject();
352 
353         /* Peek to see if this is the empty object. */
354         int first = nextCleanInternal();
355         if (first == '}') {
356             return result;
357         } else if (first != -1) {
358             pos--;
359         }
360 
361         while (true) {
362             Object name = nextValue();
363             if (!(name instanceof String)) {
364                 if (name == null) {
365                     throw syntaxError("Names cannot be null");
366                 } else {
367                     throw syntaxError("Names must be strings, but " + name
368                             + " is of type " + name.getClass().getName());
369                 }
370             }
371 
372             /*
373              * Expect the name/value separator to be either a colon ':', an
374              * equals sign '=', or an arrow "=>". The last two are bogus but we
375              * include them because that's what the original implementation did.
376              */
377             int separator = nextCleanInternal();
378             if (separator != ':' && separator != '=') {
379                 throw syntaxError("Expected ':' after " + name);
380             }
381             if (pos < in.length() && in.charAt(pos) == '>') {
382                 pos++;
383             }
384 
385             result.put((String) name, nextValue());
386 
387             switch (nextCleanInternal()) {
388                 case '}':
389                     return result;
390                 case ';':
391                 case ',':
392                     continue;
393                 default:
394                     throw syntaxError("Unterminated object");
395             }
396         }
397     }
398 
399     /**
400      * Reads a sequence of values and the trailing closing brace ']' of an
401      * array. The opening brace '[' should have already been read. Note that
402      * "[]" yields an empty array, but "[,]" returns a two-element array
403      * equivalent to "[null,null]".
404      */
readArray()405     private JSONArray readArray() throws JSONException {
406         JSONArray result = new JSONArray();
407 
408         /* to cover input that ends with ",]". */
409         boolean hasTrailingSeparator = false;
410 
411         while (true) {
412             switch (nextCleanInternal()) {
413                 case -1:
414                     throw syntaxError("Unterminated array");
415                 case ']':
416                     if (hasTrailingSeparator) {
417                         result.put(null);
418                     }
419                     return result;
420                 case ',':
421                 case ';':
422                     /* A separator without a value first means "null". */
423                     result.put(null);
424                     hasTrailingSeparator = true;
425                     continue;
426                 default:
427                     pos--;
428             }
429 
430             result.put(nextValue());
431 
432             switch (nextCleanInternal()) {
433                 case ']':
434                     return result;
435                 case ',':
436                 case ';':
437                     hasTrailingSeparator = true;
438                     continue;
439                 default:
440                     throw syntaxError("Unterminated array");
441             }
442         }
443     }
444 
445     /**
446      * Returns an exception containing the given message plus the current
447      * position and the entire input string.
448      */
syntaxError(String message)449     public JSONException syntaxError(String message) {
450         return new JSONException(message + this);
451     }
452 
453     /**
454      * Returns the current position and the entire input string.
455      */
toString()456     @Override public String toString() {
457         // consistent with the original implementation
458         return " at character " + pos + " of " + in;
459     }
460 
461     /*
462      * Legacy APIs.
463      *
464      * None of the methods below are on the critical path of parsing JSON
465      * documents. They exist only because they were exposed by the original
466      * implementation and may be used by some clients.
467      */
468 
469     /**
470      * Returns true until the input has been exhausted.
471      */
more()472     public boolean more() {
473         return pos < in.length();
474     }
475 
476     /**
477      * Returns the next available character, or the null character '\0' if all
478      * input has been exhausted. The return value of this method is ambiguous
479      * for JSON strings that contain the character '\0'.
480      */
next()481     public char next() {
482         return pos < in.length() ? in.charAt(pos++) : '\0';
483     }
484 
485     /**
486      * Returns the next available character if it equals {@code c}. Otherwise an
487      * exception is thrown.
488      */
next(char c)489     public char next(char c) throws JSONException {
490         char result = next();
491         if (result != c) {
492             throw syntaxError("Expected " + c + " but was " + result);
493         }
494         return result;
495     }
496 
497     /**
498      * Returns the next character that is not whitespace and does not belong to
499      * a comment. If the input is exhausted before such a character can be
500      * found, the null character '\0' is returned. The return value of this
501      * method is ambiguous for JSON strings that contain the character '\0'.
502      */
nextClean()503     public char nextClean() throws JSONException {
504         int nextCleanInt = nextCleanInternal();
505         return nextCleanInt == -1 ? '\0' : (char) nextCleanInt;
506     }
507 
508     /**
509      * Returns the next {@code length} characters of the input.
510      *
511      * <p>The returned string shares its backing character array with this
512      * tokener's input string. If a reference to the returned string may be held
513      * indefinitely, you should use {@code new String(result)} to copy it first
514      * to avoid memory leaks.
515      *
516      * @throws JSONException if the remaining input is not long enough to
517      *     satisfy this request.
518      */
next(int length)519     public String next(int length) throws JSONException {
520         if (pos + length > in.length()) {
521             throw syntaxError(length + " is out of bounds");
522         }
523         String result = in.substring(pos, pos + length);
524         pos += length;
525         return result;
526     }
527 
528     /**
529      * Returns the {@link String#trim trimmed} string holding the characters up
530      * to but not including the first of:
531      * <ul>
532      *   <li>any character in {@code excluded}
533      *   <li>a newline character '\n'
534      *   <li>a carriage return '\r'
535      * </ul>
536      *
537      * <p>The returned string shares its backing character array with this
538      * tokener's input string. If a reference to the returned string may be held
539      * indefinitely, you should use {@code new String(result)} to copy it first
540      * to avoid memory leaks.
541      *
542      * @return a possibly-empty string
543      */
nextTo(String excluded)544     public String nextTo(String excluded) {
545         if (excluded == null) {
546             throw new NullPointerException("excluded == null");
547         }
548         return nextToInternal(excluded).trim();
549     }
550 
551     /**
552      * Equivalent to {@code nextTo(String.valueOf(excluded))}.
553      */
nextTo(char excluded)554     public String nextTo(char excluded) {
555         return nextToInternal(String.valueOf(excluded)).trim();
556     }
557 
558     /**
559      * Advances past all input up to and including the next occurrence of
560      * {@code thru}. If the remaining input doesn't contain {@code thru}, the
561      * input is exhausted.
562      */
skipPast(String thru)563     public void skipPast(String thru) {
564         int thruStart = in.indexOf(thru, pos);
565         pos = thruStart == -1 ? in.length() : (thruStart + thru.length());
566     }
567 
568     /**
569      * Advances past all input up to but not including the next occurrence of
570      * {@code to}. If the remaining input doesn't contain {@code to}, the input
571      * is unchanged.
572      */
skipTo(char to)573     public char skipTo(char to) {
574         int index = in.indexOf(to, pos);
575         if (index != -1) {
576             pos = index;
577             return to;
578         } else {
579             return '\0';
580         }
581     }
582 
583     /**
584      * Unreads the most recent character of input. If no input characters have
585      * been read, the input is unchanged.
586      */
back()587     public void back() {
588         if (--pos == -1) {
589             pos = 0;
590         }
591     }
592 
593     /**
594      * Returns the integer [0..15] value for the given hex character, or -1
595      * for non-hex input.
596      *
597      * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other
598      *     character will yield a -1 result.
599      */
dehexchar(char hex)600     public static int dehexchar(char hex) {
601         if (hex >= '0' && hex <= '9') {
602             return hex - '0';
603         } else if (hex >= 'A' && hex <= 'F') {
604             return hex - 'A' + 10;
605         } else if (hex >= 'a' && hex <= 'f') {
606             return hex - 'a' + 10;
607         } else {
608             return -1;
609         }
610     }
611 }
612